cocoindex 0.1.49__tar.gz → 0.1.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.49 → cocoindex-0.1.50}/Cargo.lock +79 -28
- {cocoindex-0.1.49 → cocoindex-0.1.50}/Cargo.toml +4 -3
- {cocoindex-0.1.49 → cocoindex-0.1.50}/PKG-INFO +1 -1
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/data_types.mdx +3 -2
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/overview.md +2 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/quickstart.md +11 -6
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/query.mdx +3 -3
- cocoindex-0.1.50/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/main.py +16 -10
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/Text_Embedding.ipynb +8 -4
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/main.py +6 -2
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/pyproject.toml +2 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/__init__.py +1 -1
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/convert.py +36 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/functions.py +6 -4
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/lib.py +1 -2
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/tests/test_convert.py +280 -52
- cocoindex-0.1.50/python/cocoindex/tests/test_typing.py +499 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/typing.py +88 -13
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/value.rs +10 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/analyzer.rs +3 -4
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/plan.rs +0 -1
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/mod.rs +0 -1
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/lib_context.rs +2 -18
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/factory_bases.rs +3 -12
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/interface.rs +2 -65
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/py_factory.rs +4 -5
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/google_drive.rs +31 -46
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/kuzu.rs +1 -7
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/neo4j.rs +7 -8
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/postgres.rs +5 -197
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/qdrant.rs +13 -42
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/py/mod.rs +16 -81
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/server.rs +8 -12
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/error.rs +12 -4
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/flows.rs +26 -7
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/mod.rs +0 -1
- cocoindex-0.1.49/python/cocoindex/query.py +0 -115
- cocoindex-0.1.49/src/execution/query.rs +0 -124
- cocoindex-0.1.49/src/service/search.rs +0 -58
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.cargo/config.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.env.lib_debug +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.gitignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/.vscode/settings.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/LICENSE +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/.gitignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_def.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/storages.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/package.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/yarn.lock +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/requirements.txt +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/pyproject.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/flow.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/op.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/storages.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/ruff.toml +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/duration.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/schema.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/spec.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/lib.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/registration.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/property_graph.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/table_columns.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/prelude.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/py/convert.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/settings.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/components.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/states.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/db.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/yaml_ser.rs +0 -0
@@ -676,16 +676,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
676
676
|
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
677
677
|
dependencies = [
|
678
678
|
"async-trait",
|
679
|
-
"axum-core",
|
679
|
+
"axum-core 0.4.5",
|
680
680
|
"bytes",
|
681
681
|
"futures-util",
|
682
682
|
"http 1.3.1",
|
683
683
|
"http-body 1.0.1",
|
684
684
|
"http-body-util",
|
685
|
+
"itoa",
|
686
|
+
"matchit 0.7.3",
|
687
|
+
"memchr",
|
688
|
+
"mime",
|
689
|
+
"percent-encoding",
|
690
|
+
"pin-project-lite",
|
691
|
+
"rustversion",
|
692
|
+
"serde",
|
693
|
+
"sync_wrapper",
|
694
|
+
"tower 0.5.2",
|
695
|
+
"tower-layer",
|
696
|
+
"tower-service",
|
697
|
+
]
|
698
|
+
|
699
|
+
[[package]]
|
700
|
+
name = "axum"
|
701
|
+
version = "0.8.4"
|
702
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
703
|
+
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
|
704
|
+
dependencies = [
|
705
|
+
"axum-core 0.5.2",
|
706
|
+
"bytes",
|
707
|
+
"form_urlencoded",
|
708
|
+
"futures-util",
|
709
|
+
"http 1.3.1",
|
710
|
+
"http-body 1.0.1",
|
711
|
+
"http-body-util",
|
685
712
|
"hyper 1.6.0",
|
686
713
|
"hyper-util",
|
687
714
|
"itoa",
|
688
|
-
"matchit",
|
715
|
+
"matchit 0.8.4",
|
689
716
|
"memchr",
|
690
717
|
"mime",
|
691
718
|
"percent-encoding",
|
@@ -721,28 +748,48 @@ dependencies = [
|
|
721
748
|
"sync_wrapper",
|
722
749
|
"tower-layer",
|
723
750
|
"tower-service",
|
751
|
+
]
|
752
|
+
|
753
|
+
[[package]]
|
754
|
+
name = "axum-core"
|
755
|
+
version = "0.5.2"
|
756
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
757
|
+
checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
|
758
|
+
dependencies = [
|
759
|
+
"bytes",
|
760
|
+
"futures-core",
|
761
|
+
"http 1.3.1",
|
762
|
+
"http-body 1.0.1",
|
763
|
+
"http-body-util",
|
764
|
+
"mime",
|
765
|
+
"pin-project-lite",
|
766
|
+
"rustversion",
|
767
|
+
"sync_wrapper",
|
768
|
+
"tower-layer",
|
769
|
+
"tower-service",
|
724
770
|
"tracing",
|
725
771
|
]
|
726
772
|
|
727
773
|
[[package]]
|
728
774
|
name = "axum-extra"
|
729
|
-
version = "0.
|
775
|
+
version = "0.10.1"
|
730
776
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
731
|
-
checksum = "
|
777
|
+
checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d"
|
732
778
|
dependencies = [
|
733
|
-
"axum",
|
734
|
-
"axum-core",
|
779
|
+
"axum 0.8.4",
|
780
|
+
"axum-core 0.5.2",
|
735
781
|
"bytes",
|
736
|
-
"
|
782
|
+
"form_urlencoded",
|
737
783
|
"futures-util",
|
738
784
|
"http 1.3.1",
|
739
785
|
"http-body 1.0.1",
|
740
786
|
"http-body-util",
|
741
787
|
"mime",
|
742
|
-
"multer",
|
743
788
|
"pin-project-lite",
|
789
|
+
"rustversion",
|
744
790
|
"serde",
|
745
791
|
"serde_html_form",
|
792
|
+
"serde_path_to_error",
|
746
793
|
"tower 0.5.2",
|
747
794
|
"tower-layer",
|
748
795
|
"tower-service",
|
@@ -993,7 +1040,7 @@ dependencies = [
|
|
993
1040
|
|
994
1041
|
[[package]]
|
995
1042
|
name = "cocoindex"
|
996
|
-
version = "0.1.
|
1043
|
+
version = "0.1.50"
|
997
1044
|
dependencies = [
|
998
1045
|
"anyhow",
|
999
1046
|
"async-openai",
|
@@ -1002,7 +1049,7 @@ dependencies = [
|
|
1002
1049
|
"aws-config",
|
1003
1050
|
"aws-sdk-s3",
|
1004
1051
|
"aws-sdk-sqs",
|
1005
|
-
"axum",
|
1052
|
+
"axum 0.8.4",
|
1006
1053
|
"axum-extra",
|
1007
1054
|
"base64 0.22.1",
|
1008
1055
|
"blake2",
|
@@ -1028,6 +1075,7 @@ dependencies = [
|
|
1028
1075
|
"neo4rs",
|
1029
1076
|
"owo-colors",
|
1030
1077
|
"pgvector",
|
1078
|
+
"phf",
|
1031
1079
|
"pyo3",
|
1032
1080
|
"pyo3-async-runtimes",
|
1033
1081
|
"pythonize",
|
@@ -2610,6 +2658,12 @@ version = "0.7.3"
|
|
2610
2658
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2611
2659
|
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
2612
2660
|
|
2661
|
+
[[package]]
|
2662
|
+
name = "matchit"
|
2663
|
+
version = "0.8.4"
|
2664
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2665
|
+
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
2666
|
+
|
2613
2667
|
[[package]]
|
2614
2668
|
name = "md-5"
|
2615
2669
|
version = "0.10.6"
|
@@ -2677,23 +2731,6 @@ dependencies = [
|
|
2677
2731
|
"windows-sys 0.52.0",
|
2678
2732
|
]
|
2679
2733
|
|
2680
|
-
[[package]]
|
2681
|
-
name = "multer"
|
2682
|
-
version = "3.1.0"
|
2683
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2684
|
-
checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
|
2685
|
-
dependencies = [
|
2686
|
-
"bytes",
|
2687
|
-
"encoding_rs",
|
2688
|
-
"futures-util",
|
2689
|
-
"http 1.3.1",
|
2690
|
-
"httparse",
|
2691
|
-
"memchr",
|
2692
|
-
"mime",
|
2693
|
-
"spin",
|
2694
|
-
"version_check",
|
2695
|
-
]
|
2696
|
-
|
2697
2734
|
[[package]]
|
2698
2735
|
name = "neo4rs"
|
2699
2736
|
version = "0.8.0"
|
@@ -2993,6 +3030,7 @@ version = "0.11.3"
|
|
2993
3030
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2994
3031
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
2995
3032
|
dependencies = [
|
3033
|
+
"phf_macros",
|
2996
3034
|
"phf_shared",
|
2997
3035
|
]
|
2998
3036
|
|
@@ -3016,6 +3054,19 @@ dependencies = [
|
|
3016
3054
|
"rand 0.8.5",
|
3017
3055
|
]
|
3018
3056
|
|
3057
|
+
[[package]]
|
3058
|
+
name = "phf_macros"
|
3059
|
+
version = "0.11.3"
|
3060
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3061
|
+
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
3062
|
+
dependencies = [
|
3063
|
+
"phf_generator",
|
3064
|
+
"phf_shared",
|
3065
|
+
"proc-macro2",
|
3066
|
+
"quote",
|
3067
|
+
"syn 2.0.101",
|
3068
|
+
]
|
3069
|
+
|
3019
3070
|
[[package]]
|
3020
3071
|
name = "phf_shared"
|
3021
3072
|
version = "0.11.3"
|
@@ -4653,7 +4704,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
|
4653
4704
|
dependencies = [
|
4654
4705
|
"async-stream",
|
4655
4706
|
"async-trait",
|
4656
|
-
"axum",
|
4707
|
+
"axum 0.7.9",
|
4657
4708
|
"base64 0.22.1",
|
4658
4709
|
"bytes",
|
4659
4710
|
"flate2",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
name = "cocoindex"
|
3
3
|
# Version used for local development is always higher than others to take precedence.
|
4
4
|
# Will be overridden for specific release versions.
|
5
|
-
version = "0.1.
|
5
|
+
version = "0.1.50"
|
6
6
|
edition = "2024"
|
7
7
|
|
8
8
|
[profile.release]
|
@@ -20,8 +20,8 @@ pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
|
|
20
20
|
|
21
21
|
anyhow = { version = "1.0.97", features = ["std"] }
|
22
22
|
async-trait = "0.1.88"
|
23
|
-
axum = "0.
|
24
|
-
axum-extra = { version = "0.
|
23
|
+
axum = "0.8.4"
|
24
|
+
axum-extra = { version = "0.10.1", features = ["query"] }
|
25
25
|
base64 = "0.22.1"
|
26
26
|
chrono = "0.4.40"
|
27
27
|
config = "0.14.1"
|
@@ -50,6 +50,7 @@ tower-http = { version = "0.6.2", features = ["cors", "trace"] }
|
|
50
50
|
indexmap = { version = "2.8.0", features = ["serde"] }
|
51
51
|
blake2 = "0.10.6"
|
52
52
|
pgvector = { version = "0.4.0", features = ["sqlx"] }
|
53
|
+
phf = { version = "0.11.3", features = ["macros"] }
|
53
54
|
indenter = "0.3.3"
|
54
55
|
itertools = "0.14.0"
|
55
56
|
derivative = "2.2.0"
|
@@ -36,16 +36,17 @@ This is the list of all basic types supported by CocoIndex:
|
|
36
36
|
| LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
|
37
37
|
| OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
|
38
38
|
| TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
|
39
|
-
| Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
|
40
39
|
| Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
|
40
|
+
| Vector[*T*, *Dim*?] | *T* can be a basic type or a numeric type. *Dim* is a positive integer and optional. | `cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `numpy.typing.NDArray[T]` or `list[T]` |
|
41
41
|
|
42
42
|
Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column).
|
43
43
|
However, the underlying execution engine and some storage system (like Postgres) has finer distinctions for some types, specifically:
|
44
44
|
|
45
45
|
* *Float32* and *Float64* for `float`, with different precision.
|
46
46
|
* *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness.
|
47
|
-
* *Vector* has optional dimension information.
|
48
47
|
* *Range* and *Json* provide a clear tag for the type, to clearly distinguish the type in CocoIndex.
|
48
|
+
* *Vector* holds elements of type *T*. If *T* is numeric (e.g., `np.float32` or `np.float64`), it's represented as `NDArray[T]`; otherwise, as `list[T]`.
|
49
|
+
* *Vector* also has optional dimension information.
|
49
50
|
|
50
51
|
The native Python type is always more permissive and can represent a superset of possible values.
|
51
52
|
* Only when you annotate the return type of a custom function, you should use the specific type,
|
@@ -9,6 +9,8 @@ CocoIndex is an ultra-performant real-time data transformation framework for AI,
|
|
9
9
|
|
10
10
|
As a data framework, CocoIndex takes it to the next level on data freshness. **Incremental processing** is one of the core values provided by CocoIndex.
|
11
11
|
|
12
|
+

|
13
|
+
|
12
14
|
## Programming Model
|
13
15
|
CocoIndex follows the idea of [Dataflow programming](https://en.wikipedia.org/wiki/Dataflow_programming) model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
|
14
16
|
|
@@ -154,11 +154,11 @@ The goal of transforming your data is usually to query against it.
|
|
154
154
|
Once you already have your index built, you can directly access the transformed data in the target database.
|
155
155
|
CocoIndex also provides utilities for you to do this more seamlessly.
|
156
156
|
|
157
|
-
In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) to connect to the database and run queries.
|
158
|
-
Please make sure
|
157
|
+
In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) along with pgvector to connect to the database and run queries on vector data.
|
158
|
+
Please make sure the required packages are installed:
|
159
159
|
|
160
160
|
```bash
|
161
|
-
pip install psycopg[binary,pool]
|
161
|
+
pip install numpy psycopg[binary,pool] pgvector
|
162
162
|
```
|
163
163
|
|
164
164
|
### Step 4.1: Extract common transformations
|
@@ -169,8 +169,11 @@ i.e. they should use exactly the same embedding model and parameters.
|
|
169
169
|
Let's extract that into a function:
|
170
170
|
|
171
171
|
```python title="quickstart.py"
|
172
|
+
from numpy.typing import NDArray
|
173
|
+
import numpy as np
|
174
|
+
|
172
175
|
@cocoindex.transform_flow()
|
173
|
-
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[
|
176
|
+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
|
174
177
|
return text.transform(
|
175
178
|
cocoindex.functions.SentenceTransformerEmbed(
|
176
179
|
model="sentence-transformers/all-MiniLM-L6-v2"))
|
@@ -207,6 +210,7 @@ Now we can create a function to query the index upon a given input query:
|
|
207
210
|
|
208
211
|
```python title="quickstart.py"
|
209
212
|
from psycopg_pool import ConnectionPool
|
213
|
+
from pgvector.psycopg import register_vector
|
210
214
|
|
211
215
|
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
212
216
|
# Get the table name, for the export target in the text_embedding_flow above.
|
@@ -215,9 +219,10 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
|
215
219
|
query_vector = text_to_embedding.eval(query)
|
216
220
|
# Run the query and get the results.
|
217
221
|
with pool.connection() as conn:
|
222
|
+
register_vector(conn)
|
218
223
|
with conn.cursor() as cur:
|
219
224
|
cur.execute(f"""
|
220
|
-
SELECT filename, text, embedding <=> %s
|
225
|
+
SELECT filename, text, embedding <=> %s AS distance
|
221
226
|
FROM {table_name} ORDER BY distance LIMIT %s
|
222
227
|
""", (query_vector, top_k))
|
223
228
|
return [
|
@@ -236,7 +241,7 @@ There're two CocoIndex-specific logic:
|
|
236
241
|
|
237
242
|
2. Evaluate the transform flow defined above with the input query, to get the embedding.
|
238
243
|
It's done by the `eval()` method of the transform flow `text_to_embedding`.
|
239
|
-
The return type of this method is `
|
244
|
+
The return type of this method is `NDArray[np.float32]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[NDArray[np.float32]]`).
|
240
245
|
|
241
246
|
### Step 4.3: Add the main script logic
|
242
247
|
|
@@ -41,7 +41,7 @@ The [quickstart](getting_started/quickstart#step-41-extract-common-transformatio
|
|
41
41
|
|
42
42
|
```python
|
43
43
|
@cocoindex.transform_flow()
|
44
|
-
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[
|
44
|
+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
|
45
45
|
return text.transform(
|
46
46
|
cocoindex.functions.SentenceTransformerEmbed(
|
47
47
|
model="sentence-transformers/all-MiniLM-L6-v2"))
|
@@ -61,7 +61,7 @@ with doc["chunks"].row() as chunk:
|
|
61
61
|
chunk["embedding"] = chunk["text"].call(text_to_embedding)
|
62
62
|
```
|
63
63
|
|
64
|
-
Any time, you can call the `eval()` method with specific string, which will return a `
|
64
|
+
Any time, you can call the `eval()` method with specific string, which will return a `NDArray[np.float32]`:
|
65
65
|
|
66
66
|
```python
|
67
67
|
print(text_to_embedding.eval("Hello, world!"))
|
@@ -93,7 +93,7 @@ For example:
|
|
93
93
|
|
94
94
|
```python
|
95
95
|
table_name = cocoindex.utils.get_target_storage_default_name(text_embedding_flow, "doc_embeddings")
|
96
|
-
query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s
|
96
|
+
query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s DESC LIMIT 5"
|
97
97
|
...
|
98
98
|
```
|
99
99
|
|
Binary file
|
@@ -2,7 +2,9 @@ import cocoindex
|
|
2
2
|
import uvicorn
|
3
3
|
from dotenv import load_dotenv
|
4
4
|
from fastapi import FastAPI, Query
|
5
|
+
from fastapi import Request
|
5
6
|
from psycopg_pool import ConnectionPool
|
7
|
+
from contextlib import asynccontextmanager
|
6
8
|
import os
|
7
9
|
|
8
10
|
|
@@ -86,27 +88,31 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
|
86
88
|
]
|
87
89
|
|
88
90
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
@fastapi_app.on_event("startup")
|
93
|
-
def startup_event():
|
91
|
+
@asynccontextmanager
|
92
|
+
def lifespan(app: FastAPI):
|
94
93
|
load_dotenv()
|
95
94
|
cocoindex.init()
|
96
|
-
|
97
|
-
|
95
|
+
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
|
96
|
+
app.state.pool = pool
|
97
|
+
try:
|
98
|
+
yield
|
99
|
+
finally:
|
100
|
+
pool.close()
|
101
|
+
|
102
|
+
|
103
|
+
fastapi_app = FastAPI(lifespan=lifespan)
|
98
104
|
|
99
105
|
|
100
106
|
@fastapi_app.get("/search")
|
101
107
|
def search_endpoint(
|
108
|
+
request: Request,
|
102
109
|
q: str = Query(..., description="Search query"),
|
103
110
|
limit: int = Query(5, description="Number of results"),
|
104
111
|
):
|
105
|
-
|
112
|
+
pool = request.app.state.pool
|
113
|
+
results = search(pool, q, limit)
|
106
114
|
return {"results": results}
|
107
115
|
|
108
116
|
|
109
117
|
if __name__ == "__main__":
|
110
|
-
load_dotenv()
|
111
|
-
cocoindex.init()
|
112
118
|
uvicorn.run(fastapi_app, host="0.0.0.0", port=8080)
|
@@ -45,7 +45,7 @@
|
|
45
45
|
},
|
46
46
|
"outputs": [],
|
47
47
|
"source": [
|
48
|
-
"%pip install cocoindex python-dotenv psycopg[binary,pool]"
|
48
|
+
"%pip install cocoindex numpy python-dotenv psycopg[binary,pool] pgvector"
|
49
49
|
]
|
50
50
|
},
|
51
51
|
{
|
@@ -164,7 +164,10 @@
|
|
164
164
|
"from dotenv import load_dotenv\n",
|
165
165
|
"import os\n",
|
166
166
|
"from psycopg_pool import ConnectionPool\n",
|
167
|
-
"import
|
167
|
+
"from pgvector.psycopg import register_vector\n",
|
168
|
+
"import cocoindex\n",
|
169
|
+
"from numpy.typing import NDArray\n",
|
170
|
+
"import numpy as np\n"
|
168
171
|
]
|
169
172
|
},
|
170
173
|
{
|
@@ -187,7 +190,7 @@
|
|
187
190
|
"%%writefile -a main.py\n",
|
188
191
|
"\n",
|
189
192
|
"@cocoindex.transform_flow()\n",
|
190
|
-
"def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[
|
193
|
+
"def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:\n",
|
191
194
|
" \"\"\"\n",
|
192
195
|
" Embed the text using a SentenceTransformer model.\n",
|
193
196
|
" This is shared logic between indexing and querying.\n",
|
@@ -274,9 +277,10 @@
|
|
274
277
|
" query_vector = text_to_embedding.eval(query)\n",
|
275
278
|
" # Run the query and get the results.\n",
|
276
279
|
" with pool.connection() as conn:\n",
|
280
|
+
" register_vector(conn)\n",
|
277
281
|
" with conn.cursor() as cur:\n",
|
278
282
|
" cur.execute(f\"\"\"\n",
|
279
|
-
" SELECT filename, text, embedding <=> %s
|
283
|
+
" SELECT filename, text, embedding <=> %s AS distance\n",
|
280
284
|
" FROM {table_name} ORDER BY distance LIMIT %s\n",
|
281
285
|
" \"\"\", (query_vector, top_k))\n",
|
282
286
|
" return [\n",
|
@@ -1,13 +1,16 @@
|
|
1
1
|
from dotenv import load_dotenv
|
2
2
|
from psycopg_pool import ConnectionPool
|
3
|
+
from pgvector.psycopg import register_vector
|
3
4
|
import cocoindex
|
4
5
|
import os
|
6
|
+
from numpy.typing import NDArray
|
7
|
+
import numpy as np
|
5
8
|
|
6
9
|
|
7
10
|
@cocoindex.transform_flow()
|
8
11
|
def text_to_embedding(
|
9
12
|
text: cocoindex.DataSlice[str],
|
10
|
-
) -> cocoindex.DataSlice[
|
13
|
+
) -> cocoindex.DataSlice[NDArray[np.float32]]:
|
11
14
|
"""
|
12
15
|
Embed the text using a SentenceTransformer model.
|
13
16
|
This is a shared logic between indexing and querying, so extract it as a function.
|
@@ -71,10 +74,11 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
|
71
74
|
query_vector = text_to_embedding.eval(query)
|
72
75
|
# Run the query and get the results.
|
73
76
|
with pool.connection() as conn:
|
77
|
+
register_vector(conn)
|
74
78
|
with conn.cursor() as cur:
|
75
79
|
cur.execute(
|
76
80
|
f"""
|
77
|
-
SELECT filename, text, embedding <=> %s
|
81
|
+
SELECT filename, text, embedding <=> %s AS distance
|
78
82
|
FROM {table_name} ORDER BY distance LIMIT %s
|
79
83
|
""",
|
80
84
|
(query_vector, top_k),
|
@@ -2,7 +2,7 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import functions,
|
5
|
+
from . import functions, sources, storages, cli, utils
|
6
6
|
|
7
7
|
from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
|
8
8
|
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
|
@@ -6,6 +6,7 @@ import dataclasses
|
|
6
6
|
import datetime
|
7
7
|
import inspect
|
8
8
|
import uuid
|
9
|
+
import numpy as np
|
9
10
|
|
10
11
|
from enum import Enum
|
11
12
|
from typing import Any, Callable, get_origin, Mapping
|
@@ -15,6 +16,7 @@ from .typing import (
|
|
15
16
|
is_namedtuple_type,
|
16
17
|
TABLE_TYPES,
|
17
18
|
KEY_FIELD_NAME,
|
19
|
+
DtypeRegistry,
|
18
20
|
)
|
19
21
|
|
20
22
|
|
@@ -27,6 +29,8 @@ def encode_engine_value(value: Any) -> Any:
|
|
27
29
|
]
|
28
30
|
if is_namedtuple_type(type(value)):
|
29
31
|
return [encode_engine_value(getattr(value, name)) for name in value._fields]
|
32
|
+
if isinstance(value, np.ndarray):
|
33
|
+
return value
|
30
34
|
if isinstance(value, (list, tuple)):
|
31
35
|
return [encode_engine_value(v) for v in value]
|
32
36
|
if isinstance(value, dict):
|
@@ -122,6 +126,38 @@ def make_engine_value_decoder(
|
|
122
126
|
if src_type_kind == "Uuid":
|
123
127
|
return lambda value: uuid.UUID(bytes=value)
|
124
128
|
|
129
|
+
if src_type_kind == "Vector":
|
130
|
+
elem_coco_type_info = analyze_type_info(dst_type_info.elem_type)
|
131
|
+
dtype_info = DtypeRegistry.get_by_kind(elem_coco_type_info.kind)
|
132
|
+
|
133
|
+
def decode_vector(value: Any) -> Any | None:
|
134
|
+
if value is None:
|
135
|
+
if dst_type_info.nullable:
|
136
|
+
return None
|
137
|
+
raise ValueError(
|
138
|
+
f"Received null for non-nullable vector `{''.join(field_path)}`"
|
139
|
+
)
|
140
|
+
|
141
|
+
if not isinstance(value, (np.ndarray, list)):
|
142
|
+
raise TypeError(
|
143
|
+
f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
|
144
|
+
)
|
145
|
+
expected_dim = (
|
146
|
+
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
147
|
+
)
|
148
|
+
if expected_dim is not None and len(value) != expected_dim:
|
149
|
+
raise ValueError(
|
150
|
+
f"Vector dimension mismatch for `{''.join(field_path)}`: "
|
151
|
+
f"expected {expected_dim}, got {len(value)}"
|
152
|
+
)
|
153
|
+
|
154
|
+
# Use NDArray for supported numeric dtypes, else return list
|
155
|
+
if dtype_info is not None:
|
156
|
+
return np.array(value, dtype=dtype_info.numpy_dtype)
|
157
|
+
return value
|
158
|
+
|
159
|
+
return decode_vector
|
160
|
+
|
125
161
|
return lambda value: value
|
126
162
|
|
127
163
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
"""All builtin functions."""
|
2
2
|
|
3
|
-
from typing import Annotated, Any, TYPE_CHECKING
|
3
|
+
from typing import Annotated, Any, TYPE_CHECKING, Literal
|
4
|
+
import numpy as np
|
5
|
+
from numpy.typing import NDArray
|
4
6
|
import dataclasses
|
5
7
|
|
6
8
|
from .typing import Float32, Vector, TypeAttr
|
@@ -66,11 +68,11 @@ class SentenceTransformerEmbedExecutor:
|
|
66
68
|
self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
|
67
69
|
dim = self._model.get_sentence_embedding_dimension()
|
68
70
|
result: type = Annotated[
|
69
|
-
Vector[
|
71
|
+
Vector[np.float32, Literal[dim]], # type: ignore
|
70
72
|
TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value),
|
71
73
|
]
|
72
74
|
return result
|
73
75
|
|
74
|
-
def __call__(self, text: str) ->
|
75
|
-
result:
|
76
|
+
def __call__(self, text: str) -> NDArray[np.float32]:
|
77
|
+
result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
|
76
78
|
return result
|
@@ -6,7 +6,7 @@ import warnings
|
|
6
6
|
from typing import Callable, Any
|
7
7
|
|
8
8
|
from . import _engine # type: ignore
|
9
|
-
from . import flow,
|
9
|
+
from . import flow, setting
|
10
10
|
from .convert import dump_engine_object
|
11
11
|
|
12
12
|
|
@@ -24,7 +24,6 @@ def init(settings: setting.Settings | None = None) -> None:
|
|
24
24
|
def start_server(settings: setting.ServerSettings) -> None:
|
25
25
|
"""Start the cocoindex server."""
|
26
26
|
flow.ensure_all_flows_built()
|
27
|
-
query.ensure_all_handlers_built()
|
28
27
|
_engine.start_server(settings.__dict__)
|
29
28
|
|
30
29
|
|