cocoindex 0.1.48__tar.gz → 0.1.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.48 → cocoindex-0.1.50}/Cargo.lock +79 -28
- {cocoindex-0.1.48 → cocoindex-0.1.50}/Cargo.toml +4 -3
- {cocoindex-0.1.48 → cocoindex-0.1.50}/PKG-INFO +1 -1
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/data_types.mdx +3 -2
- cocoindex-0.1.50/docs/docs/getting_started/overview.md +36 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/quickstart.md +11 -6
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/functions.md +21 -3
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/storages.md +52 -40
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/query.mdx +3 -3
- cocoindex-0.1.50/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/README.md +7 -10
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/main.py +26 -23
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/main.py +16 -10
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/README.md +0 -15
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/README.md +7 -7
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/main.py +32 -28
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/Text_Embedding.ipynb +8 -4
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/main.py +6 -2
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/pyproject.toml +2 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/README.md +3 -19
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/__init__.py +1 -1
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/convert.py +36 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/functions.py +18 -4
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/lib.py +1 -2
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/tests/test_convert.py +280 -52
- cocoindex-0.1.50/python/cocoindex/tests/test_typing.py +499 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/typing.py +88 -13
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/value.rs +10 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/analyzer.rs +3 -4
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/plan.rs +0 -1
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/mod.rs +0 -1
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/lib_context.rs +2 -18
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/factory_bases.rs +3 -12
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/split_recursively.rs +292 -203
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/interface.rs +2 -65
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/py_factory.rs +4 -5
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/registration.rs +1 -1
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/google_drive.rs +31 -46
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/kuzu.rs +1 -7
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/neo4j.rs +7 -8
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/postgres.rs +5 -197
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/qdrant.rs +13 -42
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/py/mod.rs +16 -81
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/server.rs +8 -12
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/error.rs +12 -4
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/flows.rs +26 -7
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/mod.rs +0 -1
- cocoindex-0.1.48/docs/docs/getting_started/overview.md +0 -14
- cocoindex-0.1.48/python/cocoindex/query.py +0 -115
- cocoindex-0.1.48/src/execution/query.rs +0 -124
- cocoindex-0.1.48/src/service/search.rs +0 -58
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.cargo/config.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.env.lib_debug +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.gitignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/.vscode/settings.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/LICENSE +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/.gitignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_def.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/package.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/yarn.lock +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/requirements.txt +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/pyproject.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/flow.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/op.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/storages.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/ruff.toml +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/duration.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/schema.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/spec.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/lib.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/property_graph.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/table_columns.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/prelude.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/py/convert.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/settings.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/components.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/states.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/db.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/yaml_ser.rs +0 -0
@@ -676,16 +676,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
676
676
|
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
677
677
|
dependencies = [
|
678
678
|
"async-trait",
|
679
|
-
"axum-core",
|
679
|
+
"axum-core 0.4.5",
|
680
680
|
"bytes",
|
681
681
|
"futures-util",
|
682
682
|
"http 1.3.1",
|
683
683
|
"http-body 1.0.1",
|
684
684
|
"http-body-util",
|
685
|
+
"itoa",
|
686
|
+
"matchit 0.7.3",
|
687
|
+
"memchr",
|
688
|
+
"mime",
|
689
|
+
"percent-encoding",
|
690
|
+
"pin-project-lite",
|
691
|
+
"rustversion",
|
692
|
+
"serde",
|
693
|
+
"sync_wrapper",
|
694
|
+
"tower 0.5.2",
|
695
|
+
"tower-layer",
|
696
|
+
"tower-service",
|
697
|
+
]
|
698
|
+
|
699
|
+
[[package]]
|
700
|
+
name = "axum"
|
701
|
+
version = "0.8.4"
|
702
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
703
|
+
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
|
704
|
+
dependencies = [
|
705
|
+
"axum-core 0.5.2",
|
706
|
+
"bytes",
|
707
|
+
"form_urlencoded",
|
708
|
+
"futures-util",
|
709
|
+
"http 1.3.1",
|
710
|
+
"http-body 1.0.1",
|
711
|
+
"http-body-util",
|
685
712
|
"hyper 1.6.0",
|
686
713
|
"hyper-util",
|
687
714
|
"itoa",
|
688
|
-
"matchit",
|
715
|
+
"matchit 0.8.4",
|
689
716
|
"memchr",
|
690
717
|
"mime",
|
691
718
|
"percent-encoding",
|
@@ -721,28 +748,48 @@ dependencies = [
|
|
721
748
|
"sync_wrapper",
|
722
749
|
"tower-layer",
|
723
750
|
"tower-service",
|
751
|
+
]
|
752
|
+
|
753
|
+
[[package]]
|
754
|
+
name = "axum-core"
|
755
|
+
version = "0.5.2"
|
756
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
757
|
+
checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
|
758
|
+
dependencies = [
|
759
|
+
"bytes",
|
760
|
+
"futures-core",
|
761
|
+
"http 1.3.1",
|
762
|
+
"http-body 1.0.1",
|
763
|
+
"http-body-util",
|
764
|
+
"mime",
|
765
|
+
"pin-project-lite",
|
766
|
+
"rustversion",
|
767
|
+
"sync_wrapper",
|
768
|
+
"tower-layer",
|
769
|
+
"tower-service",
|
724
770
|
"tracing",
|
725
771
|
]
|
726
772
|
|
727
773
|
[[package]]
|
728
774
|
name = "axum-extra"
|
729
|
-
version = "0.
|
775
|
+
version = "0.10.1"
|
730
776
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
731
|
-
checksum = "
|
777
|
+
checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d"
|
732
778
|
dependencies = [
|
733
|
-
"axum",
|
734
|
-
"axum-core",
|
779
|
+
"axum 0.8.4",
|
780
|
+
"axum-core 0.5.2",
|
735
781
|
"bytes",
|
736
|
-
"
|
782
|
+
"form_urlencoded",
|
737
783
|
"futures-util",
|
738
784
|
"http 1.3.1",
|
739
785
|
"http-body 1.0.1",
|
740
786
|
"http-body-util",
|
741
787
|
"mime",
|
742
|
-
"multer",
|
743
788
|
"pin-project-lite",
|
789
|
+
"rustversion",
|
744
790
|
"serde",
|
745
791
|
"serde_html_form",
|
792
|
+
"serde_path_to_error",
|
746
793
|
"tower 0.5.2",
|
747
794
|
"tower-layer",
|
748
795
|
"tower-service",
|
@@ -993,7 +1040,7 @@ dependencies = [
|
|
993
1040
|
|
994
1041
|
[[package]]
|
995
1042
|
name = "cocoindex"
|
996
|
-
version = "0.1.
|
1043
|
+
version = "0.1.50"
|
997
1044
|
dependencies = [
|
998
1045
|
"anyhow",
|
999
1046
|
"async-openai",
|
@@ -1002,7 +1049,7 @@ dependencies = [
|
|
1002
1049
|
"aws-config",
|
1003
1050
|
"aws-sdk-s3",
|
1004
1051
|
"aws-sdk-sqs",
|
1005
|
-
"axum",
|
1052
|
+
"axum 0.8.4",
|
1006
1053
|
"axum-extra",
|
1007
1054
|
"base64 0.22.1",
|
1008
1055
|
"blake2",
|
@@ -1028,6 +1075,7 @@ dependencies = [
|
|
1028
1075
|
"neo4rs",
|
1029
1076
|
"owo-colors",
|
1030
1077
|
"pgvector",
|
1078
|
+
"phf",
|
1031
1079
|
"pyo3",
|
1032
1080
|
"pyo3-async-runtimes",
|
1033
1081
|
"pythonize",
|
@@ -2610,6 +2658,12 @@ version = "0.7.3"
|
|
2610
2658
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2611
2659
|
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
2612
2660
|
|
2661
|
+
[[package]]
|
2662
|
+
name = "matchit"
|
2663
|
+
version = "0.8.4"
|
2664
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2665
|
+
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
2666
|
+
|
2613
2667
|
[[package]]
|
2614
2668
|
name = "md-5"
|
2615
2669
|
version = "0.10.6"
|
@@ -2677,23 +2731,6 @@ dependencies = [
|
|
2677
2731
|
"windows-sys 0.52.0",
|
2678
2732
|
]
|
2679
2733
|
|
2680
|
-
[[package]]
|
2681
|
-
name = "multer"
|
2682
|
-
version = "3.1.0"
|
2683
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2684
|
-
checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
|
2685
|
-
dependencies = [
|
2686
|
-
"bytes",
|
2687
|
-
"encoding_rs",
|
2688
|
-
"futures-util",
|
2689
|
-
"http 1.3.1",
|
2690
|
-
"httparse",
|
2691
|
-
"memchr",
|
2692
|
-
"mime",
|
2693
|
-
"spin",
|
2694
|
-
"version_check",
|
2695
|
-
]
|
2696
|
-
|
2697
2734
|
[[package]]
|
2698
2735
|
name = "neo4rs"
|
2699
2736
|
version = "0.8.0"
|
@@ -2993,6 +3030,7 @@ version = "0.11.3"
|
|
2993
3030
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2994
3031
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
2995
3032
|
dependencies = [
|
3033
|
+
"phf_macros",
|
2996
3034
|
"phf_shared",
|
2997
3035
|
]
|
2998
3036
|
|
@@ -3016,6 +3054,19 @@ dependencies = [
|
|
3016
3054
|
"rand 0.8.5",
|
3017
3055
|
]
|
3018
3056
|
|
3057
|
+
[[package]]
|
3058
|
+
name = "phf_macros"
|
3059
|
+
version = "0.11.3"
|
3060
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3061
|
+
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
3062
|
+
dependencies = [
|
3063
|
+
"phf_generator",
|
3064
|
+
"phf_shared",
|
3065
|
+
"proc-macro2",
|
3066
|
+
"quote",
|
3067
|
+
"syn 2.0.101",
|
3068
|
+
]
|
3069
|
+
|
3019
3070
|
[[package]]
|
3020
3071
|
name = "phf_shared"
|
3021
3072
|
version = "0.11.3"
|
@@ -4653,7 +4704,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
|
4653
4704
|
dependencies = [
|
4654
4705
|
"async-stream",
|
4655
4706
|
"async-trait",
|
4656
|
-
"axum",
|
4707
|
+
"axum 0.7.9",
|
4657
4708
|
"base64 0.22.1",
|
4658
4709
|
"bytes",
|
4659
4710
|
"flate2",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
name = "cocoindex"
|
3
3
|
# Version used for local development is always higher than others to take precedence.
|
4
4
|
# Will be overridden for specific release versions.
|
5
|
-
version = "0.1.
|
5
|
+
version = "0.1.50"
|
6
6
|
edition = "2024"
|
7
7
|
|
8
8
|
[profile.release]
|
@@ -20,8 +20,8 @@ pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
|
|
20
20
|
|
21
21
|
anyhow = { version = "1.0.97", features = ["std"] }
|
22
22
|
async-trait = "0.1.88"
|
23
|
-
axum = "0.
|
24
|
-
axum-extra = { version = "0.
|
23
|
+
axum = "0.8.4"
|
24
|
+
axum-extra = { version = "0.10.1", features = ["query"] }
|
25
25
|
base64 = "0.22.1"
|
26
26
|
chrono = "0.4.40"
|
27
27
|
config = "0.14.1"
|
@@ -50,6 +50,7 @@ tower-http = { version = "0.6.2", features = ["cors", "trace"] }
|
|
50
50
|
indexmap = { version = "2.8.0", features = ["serde"] }
|
51
51
|
blake2 = "0.10.6"
|
52
52
|
pgvector = { version = "0.4.0", features = ["sqlx"] }
|
53
|
+
phf = { version = "0.11.3", features = ["macros"] }
|
53
54
|
indenter = "0.3.3"
|
54
55
|
itertools = "0.14.0"
|
55
56
|
derivative = "2.2.0"
|
@@ -36,16 +36,17 @@ This is the list of all basic types supported by CocoIndex:
|
|
36
36
|
| LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
|
37
37
|
| OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
|
38
38
|
| TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
|
39
|
-
| Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
|
40
39
|
| Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
|
40
|
+
| Vector[*T*, *Dim*?] | *T* can be a basic type or a numeric type. *Dim* is a positive integer and optional. | `cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `numpy.typing.NDArray[T]` or `list[T]` |
|
41
41
|
|
42
42
|
Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column).
|
43
43
|
However, the underlying execution engine and some storage system (like Postgres) has finer distinctions for some types, specifically:
|
44
44
|
|
45
45
|
* *Float32* and *Float64* for `float`, with different precision.
|
46
46
|
* *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness.
|
47
|
-
* *Vector* has optional dimension information.
|
48
47
|
* *Range* and *Json* provide a clear tag for the type, to clearly distinguish the type in CocoIndex.
|
48
|
+
* *Vector* holds elements of type *T*. If *T* is numeric (e.g., `np.float32` or `np.float64`), it's represented as `NDArray[T]`; otherwise, as `list[T]`.
|
49
|
+
* *Vector* also has optional dimension information.
|
49
50
|
|
50
51
|
The native Python type is always more permissive and can represent a superset of possible values.
|
51
52
|
* Only when you annotate the return type of a custom function, you should use the specific type,
|
@@ -0,0 +1,36 @@
|
|
1
|
+
---
|
2
|
+
title: Overview
|
3
|
+
slug: /
|
4
|
+
---
|
5
|
+
|
6
|
+
# Welcome to CocoIndex
|
7
|
+
|
8
|
+
CocoIndex is an ultra-performant real-time data transformation framework for AI, with incremental processing.
|
9
|
+
|
10
|
+
As a data framework, CocoIndex takes it to the next level on data freshness. **Incremental processing** is one of the core values provided by CocoIndex.
|
11
|
+
|
12
|
+

|
13
|
+
|
14
|
+
## Programming Model
|
15
|
+
CocoIndex follows the idea of [Dataflow programming](https://en.wikipedia.org/wiki/Dataflow_programming) model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
|
16
|
+
|
17
|
+
The gist of an example data transformation:
|
18
|
+
```python
|
19
|
+
# import
|
20
|
+
data['content'] = flow_builder.add_source(...)
|
21
|
+
|
22
|
+
# transform
|
23
|
+
data['out'] = data['content']
|
24
|
+
.transform(...)
|
25
|
+
.transform(...)
|
26
|
+
|
27
|
+
# collect data
|
28
|
+
collector.collect(...)
|
29
|
+
|
30
|
+
# export to db, vector db, graph db ...
|
31
|
+
collector.export(...)
|
32
|
+
```
|
33
|
+
|
34
|
+
Get Started:
|
35
|
+
- [Quick Start](https://cocoindex.io/docs/getting_started/quickstart)
|
36
|
+
|
@@ -154,11 +154,11 @@ The goal of transforming your data is usually to query against it.
|
|
154
154
|
Once you already have your index built, you can directly access the transformed data in the target database.
|
155
155
|
CocoIndex also provides utilities for you to do this more seamlessly.
|
156
156
|
|
157
|
-
In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) to connect to the database and run queries.
|
158
|
-
Please make sure
|
157
|
+
In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) along with pgvector to connect to the database and run queries on vector data.
|
158
|
+
Please make sure the required packages are installed:
|
159
159
|
|
160
160
|
```bash
|
161
|
-
pip install psycopg[binary,pool]
|
161
|
+
pip install numpy psycopg[binary,pool] pgvector
|
162
162
|
```
|
163
163
|
|
164
164
|
### Step 4.1: Extract common transformations
|
@@ -169,8 +169,11 @@ i.e. they should use exactly the same embedding model and parameters.
|
|
169
169
|
Let's extract that into a function:
|
170
170
|
|
171
171
|
```python title="quickstart.py"
|
172
|
+
from numpy.typing import NDArray
|
173
|
+
import numpy as np
|
174
|
+
|
172
175
|
@cocoindex.transform_flow()
|
173
|
-
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[
|
176
|
+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
|
174
177
|
return text.transform(
|
175
178
|
cocoindex.functions.SentenceTransformerEmbed(
|
176
179
|
model="sentence-transformers/all-MiniLM-L6-v2"))
|
@@ -207,6 +210,7 @@ Now we can create a function to query the index upon a given input query:
|
|
207
210
|
|
208
211
|
```python title="quickstart.py"
|
209
212
|
from psycopg_pool import ConnectionPool
|
213
|
+
from pgvector.psycopg import register_vector
|
210
214
|
|
211
215
|
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
212
216
|
# Get the table name, for the export target in the text_embedding_flow above.
|
@@ -215,9 +219,10 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
|
215
219
|
query_vector = text_to_embedding.eval(query)
|
216
220
|
# Run the query and get the results.
|
217
221
|
with pool.connection() as conn:
|
222
|
+
register_vector(conn)
|
218
223
|
with conn.cursor() as cur:
|
219
224
|
cur.execute(f"""
|
220
|
-
SELECT filename, text, embedding <=> %s
|
225
|
+
SELECT filename, text, embedding <=> %s AS distance
|
221
226
|
FROM {table_name} ORDER BY distance LIMIT %s
|
222
227
|
""", (query_vector, top_k))
|
223
228
|
return [
|
@@ -236,7 +241,7 @@ There're two CocoIndex-specific logic:
|
|
236
241
|
|
237
242
|
2. Evaluate the transform flow defined above with the input query, to get the embedding.
|
238
243
|
It's done by the `eval()` method of the transform flow `text_to_embedding`.
|
239
|
-
The return type of this method is `
|
244
|
+
The return type of this method is `NDArray[np.float32]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[NDArray[np.float32]]`).
|
240
245
|
|
241
246
|
### Step 4.3: Add the main script logic
|
242
247
|
|
@@ -39,9 +39,27 @@ Input data:
|
|
39
39
|
|
40
40
|
* `chunk_overlap` (type: `int`, optional): The maximum overlap size between adjacent chunks, in bytes.
|
41
41
|
* `language` (type: `str`, optional): The language of the document.
|
42
|
-
Can be a
|
43
|
-
|
44
|
-
|
42
|
+
Can be a language name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
|
43
|
+
|
44
|
+
* `custom_languages` (type: `list[CustomLanguageSpec]`, optional): This allows you to customize the way to chunking specific languages using regular expressions. Each `CustomLanguageSpec` is a dict with the following fields:
|
45
|
+
* `language_name` (type: `str`, required): Name of the language.
|
46
|
+
* `aliases` (type: `list[str]`, optional): A list of aliases for the language.
|
47
|
+
It's an error if any language name or alias is duplicated.
|
48
|
+
|
49
|
+
* `separators_regex` (type: `list[str]`, required): A list of regex patterns to split the text.
|
50
|
+
Higher-level boundaries should come first, and lower-level should be listed later. e.g. `[r"\n# ", r"\n## ", r"\n\n", r"\. "]`.
|
51
|
+
See [regex Syntax](https://docs.rs/regex/latest/regex/#syntax) for supported regular expression syntax.
|
52
|
+
|
53
|
+
:::note
|
54
|
+
|
55
|
+
We use the `language` field to determine how to split the input text, following these rules:
|
56
|
+
|
57
|
+
* We'll match the input `language` field against the `language_name` or `aliases` of each custom language specification, and use the matched one. If value of `language` is null, it'll be treated as empty string when matching `language_name` or `aliases`.
|
58
|
+
* If no match is found, we'll match the `language` field against the builtin language configurations.
|
59
|
+
For all supported builtin language names and aliases (extensions), see [the code](https://github.com/search?q=org%3Acocoindex-io+lang%3Arust++%22static+TREE_SITTER_LANGUAGE_BY_LANG%22&type=code).
|
60
|
+
* If no match is found, the input will be treated as plain text.
|
61
|
+
|
62
|
+
:::
|
45
63
|
|
46
64
|
Return type: [KTable](/docs/core/data_types#ktable), each row represents a chunk, with the following sub fields:
|
47
65
|
|
@@ -54,34 +54,21 @@ Here's how CocoIndex data elements map to Qdrant elements during export:
|
|
54
54
|
|-------------------|------------------|
|
55
55
|
| an export target | a unique collection |
|
56
56
|
| a collected row | a point |
|
57
|
-
| a field | a named vector
|
57
|
+
| a field | a named vector, if fits into Qdrant vector; or a field within payload otherwise |
|
58
|
+
|
59
|
+
A vector with `Float32`, `Float64` or `Int64` type, and with fixed dimension, fits into Qdrant vector.
|
58
60
|
|
59
61
|
#### Spec
|
60
62
|
|
61
63
|
The spec takes the following fields:
|
62
64
|
|
63
|
-
* `
|
64
|
-
|
65
|
-
* `
|
66
|
-
|
67
|
-
* `api_key` (type: `str`, optional). API key to authenticate requests with.
|
65
|
+
* `connection` (type: [auth reference](../core/flow_def#auth-registry) to `QdrantConnection`, optional): The connection to the Qdrant instance. `QdrantConnection` has the following fields:
|
66
|
+
* `grpc_url` (type: `str`): The [gRPC URL](https://qdrant.tech/documentation/interfaces/#grpc-interface) of the Qdrant instance, e.g. `http://localhost:6334/`.
|
67
|
+
* `api_key` (type: `str`, optional). API key to authenticate requests with.
|
68
68
|
|
69
|
-
|
69
|
+
If `connection` is not provided, will use local Qdrant instance at `http://localhost:6334/` by default.
|
70
70
|
|
71
|
-
|
72
|
-
|
73
|
-
```python
|
74
|
-
doc_embeddings.export(
|
75
|
-
"doc_embeddings",
|
76
|
-
cocoindex.storages.Qdrant(
|
77
|
-
collection_name="cocoindex",
|
78
|
-
grpc_url="https://xyz-example.cloud-region.cloud-provider.cloud.qdrant.io:6334/",
|
79
|
-
api_key="<your-api-key-here>",
|
80
|
-
),
|
81
|
-
primary_key_fields=["id_field"],
|
82
|
-
setup_by_user=True,
|
83
|
-
)
|
84
|
-
```
|
71
|
+
* `collection_name` (type: `str`, required): The name of the collection to export the data to.
|
85
72
|
|
86
73
|
You can find an end-to-end example [here](https://github.com/cocoindex-io/cocoindex/tree/main/examples/text_embedding_qdrant).
|
87
74
|
|
@@ -399,19 +386,7 @@ You can find end-to-end examples fitting into any of supported property graphs i
|
|
399
386
|
|
400
387
|
### Neo4j
|
401
388
|
|
402
|
-
|
403
|
-
|
404
|
-
```bash
|
405
|
-
docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
|
406
|
-
```
|
407
|
-
|
408
|
-
:::warning
|
409
|
-
|
410
|
-
The docker compose config above will start a Neo4j Enterprise instance under the [Evaluation License](https://neo4j.com/terms/enterprise_us/),
|
411
|
-
with 30 days trial period.
|
412
|
-
Please read and agree the license before starting the instance.
|
413
|
-
|
414
|
-
:::
|
389
|
+
#### Spec
|
415
390
|
|
416
391
|
The `Neo4j` target spec takes the following fields:
|
417
392
|
|
@@ -430,17 +405,32 @@ Neo4j also provides a declaration spec `Neo4jDeclaration`, to configure indexing
|
|
430
405
|
* `primary_key_fields` (required)
|
431
406
|
* `vector_indexes` (optional)
|
432
407
|
|
433
|
-
|
408
|
+
#### Neo4j dev instance
|
434
409
|
|
435
|
-
|
436
|
-
You can bring up a Kuzu API server locally by running:
|
410
|
+
If you don't have a Neo4j database, you can start a Neo4j database using our docker compose config:
|
437
411
|
|
438
412
|
```bash
|
439
|
-
|
440
|
-
KUZU_PORT=8123
|
441
|
-
docker run -d --name kuzu -p ${KUZU_PORT}:8000 -v ${KUZU_DB_DIR}:/database kuzudb/api-server:latest
|
413
|
+
docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
|
442
414
|
```
|
443
415
|
|
416
|
+
If will bring up a Neo4j instance, which can be accessed by username `neo4j` and password `cocoindex`.
|
417
|
+
You can access the Neo4j browser at [http://localhost:7474](http://localhost:7474).
|
418
|
+
|
419
|
+
:::warning
|
420
|
+
|
421
|
+
The docker compose config above will start a Neo4j Enterprise instance under the [Evaluation License](https://neo4j.com/terms/enterprise_us/),
|
422
|
+
with 30 days trial period.
|
423
|
+
Please read and agree the license before starting the instance.
|
424
|
+
|
425
|
+
:::
|
426
|
+
|
427
|
+
|
428
|
+
### Kuzu
|
429
|
+
|
430
|
+
#### Spec
|
431
|
+
|
432
|
+
CocoIndex supports talking to Kuzu through its [API server](https://github.com/kuzudb/api-server).
|
433
|
+
|
444
434
|
The `Kuzu` target spec takes the following fields:
|
445
435
|
|
446
436
|
* `connection` (type: [auth reference](../core/flow_def#auth-registry) to `KuzuConnectionSpec`): The connection to the Kuzu database. `KuzuConnectionSpec` has the following fields:
|
@@ -453,3 +443,25 @@ Kuzu also provides a declaration spec `KuzuDeclaration`, to configure indexing o
|
|
453
443
|
* Fields for [nodes to declare](#declare-extra-node-labels), including
|
454
444
|
* `nodes_label` (required)
|
455
445
|
* `primary_key_fields` (required)
|
446
|
+
|
447
|
+
#### Kuzu dev instance
|
448
|
+
|
449
|
+
If you don't have a Kuzu instance yet, you can bring up a Kuzu API server locally by running:
|
450
|
+
|
451
|
+
```bash
|
452
|
+
KUZU_DB_DIR=$HOME/.kuzudb
|
453
|
+
KUZU_PORT=8123
|
454
|
+
docker run -d --name kuzu -p ${KUZU_PORT}:8000 -v ${KUZU_DB_DIR}:/database kuzudb/api-server:latest
|
455
|
+
```
|
456
|
+
|
457
|
+
To explore the graph you built with Kuzu, you can use the [Kuzu Explorer](https://github.com/kuzudb/explorer).
|
458
|
+
Currently Kuzu API server and the explorer cannot be up at the same time. So you need to stop the API server before running the explorer.
|
459
|
+
|
460
|
+
To start the instance of the explorer, run:
|
461
|
+
|
462
|
+
```bash
|
463
|
+
KUZU_EXPLORER_PORT=8124
|
464
|
+
docker run -d --name kuzu-explorer -p ${KUZU_EXPLORER_PORT}:8000 -v ${KUZU_DB_DIR}:/database -e MODE=READ_ONLY kuzudb/explorer:latest
|
465
|
+
```
|
466
|
+
|
467
|
+
You can then access the explorer at [http://localhost:8124](http://localhost:8124).
|
@@ -41,7 +41,7 @@ The [quickstart](getting_started/quickstart#step-41-extract-common-transformatio
|
|
41
41
|
|
42
42
|
```python
|
43
43
|
@cocoindex.transform_flow()
|
44
|
-
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[
|
44
|
+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
|
45
45
|
return text.transform(
|
46
46
|
cocoindex.functions.SentenceTransformerEmbed(
|
47
47
|
model="sentence-transformers/all-MiniLM-L6-v2"))
|
@@ -61,7 +61,7 @@ with doc["chunks"].row() as chunk:
|
|
61
61
|
chunk["embedding"] = chunk["text"].call(text_to_embedding)
|
62
62
|
```
|
63
63
|
|
64
|
-
Any time, you can call the `eval()` method with specific string, which will return a `
|
64
|
+
Any time, you can call the `eval()` method with specific string, which will return a `NDArray[np.float32]`:
|
65
65
|
|
66
66
|
```python
|
67
67
|
print(text_to_embedding.eval("Hello, world!"))
|
@@ -93,7 +93,7 @@ For example:
|
|
93
93
|
|
94
94
|
```python
|
95
95
|
table_name = cocoindex.utils.get_target_storage_default_name(text_embedding_flow, "doc_embeddings")
|
96
|
-
query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s
|
96
|
+
query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s DESC LIMIT 5"
|
97
97
|
...
|
98
98
|
```
|
99
99
|
|
Binary file
|
@@ -12,10 +12,10 @@ Please drop [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a s
|
|
12
12
|
|
13
13
|

|
14
14
|
|
15
|
-
|
16
15
|
## Prerequisite
|
17
16
|
* [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
|
18
|
-
*
|
17
|
+
* Install [Neo4j](https://cocoindex.io/docs/ops/storages#neo4j-dev-instance) or [Kuzu](https://cocoindex.io/docs/ops/storages#kuzu-dev-instance) if you don't have one.
|
18
|
+
* The example uses Neo4j by default for now. If you want to use Kuzu, find out the "SELECT ONE GRAPH DATABASE TO USE" section and switch the active branch.
|
19
19
|
* [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai).
|
20
20
|
|
21
21
|
## Documentation
|
@@ -45,21 +45,18 @@ cocoindex update main.py
|
|
45
45
|
|
46
46
|
### Browse the knowledge graph
|
47
47
|
|
48
|
-
After the knowledge graph is
|
48
|
+
After the knowledge graph is built, you can explore the knowledge graph.
|
49
49
|
|
50
|
-
|
51
|
-
-
|
52
|
-
- password: `cocoindex`
|
53
|
-
which is pre-configured in the our docker compose [config.yaml](https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml).
|
50
|
+
* If you're using Neo4j, you can open the explorer at [http://localhost:7474](http://localhost:7474), with username `neo4j` and password `cocoindex`.
|
51
|
+
* If you're using Kuzu, you can start a Kuzu explorer locally. See [Kuzu dev instance](https://cocoindex.io/docs/ops/storages#kuzu-dev-instance) for more details.
|
54
52
|
|
55
|
-
You can
|
53
|
+
You can run the following Cypher query to get all relationships:
|
56
54
|
|
57
55
|
```cypher
|
58
56
|
MATCH p=()-->() RETURN p
|
59
57
|
```
|
60
|
-
<img width="1366" alt="neo4j-for-coco-docs" src="https://github.com/user-attachments/assets/3c8b6329-6fee-4533-9480-571399b57e57" />
|
61
|
-
|
62
58
|
|
59
|
+
<img width="1366" alt="neo4j-for-coco-docs" src="https://github.com/user-attachments/assets/3c8b6329-6fee-4533-9480-571399b57e57" />
|
63
60
|
|
64
61
|
## CocoInsight
|
65
62
|
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline.
|
@@ -5,27 +5,6 @@ This example shows how to extract relationships from documents and build a knowl
|
|
5
5
|
import dataclasses
|
6
6
|
import cocoindex
|
7
7
|
|
8
|
-
|
9
|
-
@dataclasses.dataclass
|
10
|
-
class DocumentSummary:
|
11
|
-
"""Describe a summary of a document."""
|
12
|
-
|
13
|
-
title: str
|
14
|
-
summary: str
|
15
|
-
|
16
|
-
|
17
|
-
@dataclasses.dataclass
|
18
|
-
class Relationship:
|
19
|
-
"""
|
20
|
-
Describe a relationship between two entities.
|
21
|
-
Subject and object should be Core CocoIndex concepts only, should be nouns. For example, `CocoIndex`, `Incremental Processing`, `ETL`, `Data` etc.
|
22
|
-
"""
|
23
|
-
|
24
|
-
subject: str
|
25
|
-
predicate: str
|
26
|
-
object: str
|
27
|
-
|
28
|
-
|
29
8
|
neo4j_conn_spec = cocoindex.add_auth_entry(
|
30
9
|
"Neo4jConnection",
|
31
10
|
cocoindex.storages.Neo4jConnection(
|
@@ -41,19 +20,43 @@ kuzu_conn_spec = cocoindex.add_auth_entry(
|
|
41
20
|
),
|
42
21
|
)
|
43
22
|
|
44
|
-
#
|
23
|
+
# SELECT ONE GRAPH DATABASE TO USE
|
24
|
+
# This example can use either Neo4j or Kuzu as the graph database.
|
25
|
+
# Please make sure only one branch is live and others are commented out.
|
26
|
+
|
27
|
+
# Use Neo4j
|
45
28
|
GraphDbSpec = cocoindex.storages.Neo4j
|
46
29
|
GraphDbConnection = cocoindex.storages.Neo4jConnection
|
47
30
|
GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
|
48
31
|
conn_spec = neo4j_conn_spec
|
49
32
|
|
50
|
-
# Use Kuzu
|
33
|
+
# Use Kuzu
|
51
34
|
# GraphDbSpec = cocoindex.storages.Kuzu
|
52
35
|
# GraphDbConnection = cocoindex.storages.KuzuConnection
|
53
36
|
# GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
|
54
37
|
# conn_spec = kuzu_conn_spec
|
55
38
|
|
56
39
|
|
40
|
+
@dataclasses.dataclass
|
41
|
+
class DocumentSummary:
|
42
|
+
"""Describe a summary of a document."""
|
43
|
+
|
44
|
+
title: str
|
45
|
+
summary: str
|
46
|
+
|
47
|
+
|
48
|
+
@dataclasses.dataclass
|
49
|
+
class Relationship:
|
50
|
+
"""
|
51
|
+
Describe a relationship between two entities.
|
52
|
+
Subject and object should be Core CocoIndex concepts only, should be nouns. For example, `CocoIndex`, `Incremental Processing`, `ETL`, `Data` etc.
|
53
|
+
"""
|
54
|
+
|
55
|
+
subject: str
|
56
|
+
predicate: str
|
57
|
+
object: str
|
58
|
+
|
59
|
+
|
57
60
|
@cocoindex.flow_def(name="DocsToKG")
|
58
61
|
def docs_to_kg_flow(
|
59
62
|
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|