cocoindex 0.1.50__tar.gz → 0.1.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex-0.1.52/.github/workflows/_doc_release.yml +31 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/workflows/_test.yml +3 -2
- cocoindex-0.1.52/.github/workflows/docs.yml +34 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/workflows/release.yml +6 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.gitignore +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/Cargo.lock +72 -16
- {cocoindex-0.1.50 → cocoindex-0.1.52}/Cargo.toml +6 -4
- {cocoindex-0.1.50 → cocoindex-0.1.52}/PKG-INFO +2 -2
- {cocoindex-0.1.50 → cocoindex-0.1.52}/README.md +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/basics.md +7 -7
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/cli.mdx +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/data_types.mdx +3 -3
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/flow_def.mdx +18 -18
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/flow_methods.mdx +6 -6
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/getting_started/quickstart.md +4 -4
- cocoindex-0.1.50/docs/docs/ops/storages.md → cocoindex-0.1.52/docs/docs/ops/targets.md +21 -21
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/query.mdx +2 -2
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docusaurus.config.ts +4 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/sidebars.ts +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/code_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/docs_to_knowledge_graph/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding_qdrant/pyproject.toml +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/pyproject.toml +0 -1
- cocoindex-0.1.52/python/cocoindex/__init__.py +71 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/cli.py +23 -6
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/convert.py +7 -3
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/flow.py +2 -2
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/op.py +3 -3
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/setting.py +10 -6
- cocoindex-0.1.50/python/cocoindex/storages.py → cocoindex-0.1.52/python/cocoindex/targets.py +8 -8
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/tests/test_convert.py +151 -104
- cocoindex-0.1.52/python/cocoindex/tests/test_optional_database.py +249 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/tests/test_typing.py +62 -56
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/typing.py +19 -19
- cocoindex-0.1.52/python/cocoindex/utils.py +20 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/value.rs +8 -6
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/builder/flow_builder.rs +10 -6
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/db_tracking_setup.rs +3 -4
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/live_updater.rs +2 -2
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/lib_context.rs +90 -9
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/factory_bases.rs +1 -3
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/mod.rs +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/registration.rs +5 -5
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/kuzu.rs +1 -3
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/neo4j.rs +1 -3
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/postgres.rs +2 -4
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/qdrant.rs +1 -3
- cocoindex-0.1.52/src/py/convert.rs +518 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/py/mod.rs +9 -6
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/service/flows.rs +3 -3
- cocoindex-0.1.52/src/settings.rs +99 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/auth_registry.rs +1 -1
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/components.rs +0 -8
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/db_metadata.rs +3 -4
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/states.rs +2 -14
- cocoindex-0.1.50/.github/workflows/docs.yml +0 -74
- cocoindex-0.1.50/python/cocoindex/__init__.py +0 -17
- cocoindex-0.1.50/python/cocoindex/utils.py +0 -17
- cocoindex-0.1.50/src/py/convert.rs +0 -232
- cocoindex-0.1.50/src/settings.rs +0 -13
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.cargo/config.toml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.env.lib_debug +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/.vscode/settings.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/LICENSE +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/.gitignore +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/package.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/docs/yarn.lock +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/pyproject.toml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/image_search/requirements.txt +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/functions.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/lib.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/ruff.toml +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/duration.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/schema.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/base/spec.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/builder/plan.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/lib.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/interface.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/mod.rs +0 -0
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/shared/mod.rs +0 -0
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/shared/property_graph.rs +0 -0
- {cocoindex-0.1.50/src/ops/storages → cocoindex-0.1.52/src/ops/targets}/shared/table_columns.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/prelude.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/server.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/service/error.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/service/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/db.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.50 → cocoindex-0.1.52}/src/utils/yaml_ser.rs +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
name: Release Docs
|
2
|
+
|
3
|
+
on:
|
4
|
+
workflow_call:
|
5
|
+
|
6
|
+
jobs:
|
7
|
+
deploy:
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
environment: docs-release
|
10
|
+
steps:
|
11
|
+
- uses: actions/checkout@v4
|
12
|
+
- uses: actions/setup-node@v4
|
13
|
+
with:
|
14
|
+
node-version: 18
|
15
|
+
cache: yarn
|
16
|
+
cache-dependency-path: docs/yarn.lock
|
17
|
+
- uses: webfactory/ssh-agent@v0.5.0
|
18
|
+
with:
|
19
|
+
ssh-private-key: ${{ secrets.GH_PAGES_DEPLOY }}
|
20
|
+
- name: Deploy to GitHub Pages
|
21
|
+
env:
|
22
|
+
USE_SSH: true
|
23
|
+
run: |
|
24
|
+
export COCOINDEX_DOCS_POSTHOG_API_KEY=${{ vars.COCOINDEX_DOCS_POSTHOG_API_KEY }}
|
25
|
+
export COCOINDEX_DOCS_MIXPANEL_API_KEY=${{ vars.COCOINDEX_DOCS_MIXPANEL_API_KEY }}
|
26
|
+
export COCOINDEX_DOCS_ALGOLIA_APP_ID=${{ vars.COCOINDEX_DOCS_ALGOLIA_APP_ID }}
|
27
|
+
export COCOINDEX_DOCS_ALGOLIA_API_KEY=${{ vars.COCOINDEX_DOCS_ALGOLIA_API_KEY }}
|
28
|
+
git config --global user.email "${{ vars.COCOINDEX_DOCS_DEPLOY_USER_EMAIL }}"
|
29
|
+
git config --global user.name "${{ vars.COCOINDEX_DOCS_DEPLOY_USER_NAME }}"
|
30
|
+
yarn --cwd docs install --frozen-lockfile
|
31
|
+
yarn --cwd docs deploy
|
@@ -27,15 +27,16 @@ jobs:
|
|
27
27
|
run: cargo test --verbose
|
28
28
|
|
29
29
|
- uses: actions/setup-python@v5
|
30
|
+
id: setup_python
|
30
31
|
with:
|
31
32
|
python-version: ${{ matrix.python-version }}
|
32
33
|
cache: 'pip'
|
33
34
|
- uses: actions/cache@v4
|
34
35
|
with:
|
35
36
|
path: .venv
|
36
|
-
key: ${{ runner.os }}-
|
37
|
+
key: ${{ runner.os }}-pyenv-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('pyproject.toml') }}
|
37
38
|
restore-keys: |
|
38
|
-
${{ runner.os }}-
|
39
|
+
${{ runner.os }}-pyenv-${{ steps.setup_python.outputs.python-version }}-
|
39
40
|
- name: Setup venv
|
40
41
|
run: |
|
41
42
|
python -m venv .venv
|
@@ -0,0 +1,34 @@
|
|
1
|
+
name: docs
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
branches: [main]
|
6
|
+
paths:
|
7
|
+
- docs/**
|
8
|
+
- ".github/workflows/docs.yml"
|
9
|
+
workflow_dispatch:
|
10
|
+
|
11
|
+
permissions:
|
12
|
+
contents: write
|
13
|
+
|
14
|
+
jobs:
|
15
|
+
test-deploy:
|
16
|
+
if: github.event_name == 'pull_request'
|
17
|
+
runs-on: ubuntu-latest
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v4
|
20
|
+
- uses: actions/setup-node@v4
|
21
|
+
with:
|
22
|
+
node-version: 18
|
23
|
+
cache: yarn
|
24
|
+
cache-dependency-path: docs/yarn.lock
|
25
|
+
- name: Install dependencies
|
26
|
+
run: yarn --cwd docs install --frozen-lockfile
|
27
|
+
- name: Test build website
|
28
|
+
run: yarn --cwd docs build
|
29
|
+
|
30
|
+
deploy:
|
31
|
+
name: Release Docs
|
32
|
+
if: ${{ github.event_name == 'workflow_dispatch' }}
|
33
|
+
uses: ./.github/workflows/_doc_release.yml
|
34
|
+
secrets: inherit
|
@@ -1040,7 +1040,7 @@ dependencies = [
|
|
1040
1040
|
|
1041
1041
|
[[package]]
|
1042
1042
|
name = "cocoindex"
|
1043
|
-
version = "0.1.
|
1043
|
+
version = "0.1.52"
|
1044
1044
|
dependencies = [
|
1045
1045
|
"anyhow",
|
1046
1046
|
"async-openai",
|
@@ -1073,6 +1073,7 @@ dependencies = [
|
|
1073
1073
|
"json5",
|
1074
1074
|
"log",
|
1075
1075
|
"neo4rs",
|
1076
|
+
"numpy",
|
1076
1077
|
"owo-colors",
|
1077
1078
|
"pgvector",
|
1078
1079
|
"phf",
|
@@ -2664,6 +2665,16 @@ version = "0.8.4"
|
|
2664
2665
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2665
2666
|
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
2666
2667
|
|
2668
|
+
[[package]]
|
2669
|
+
name = "matrixmultiply"
|
2670
|
+
version = "0.3.10"
|
2671
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2672
|
+
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
2673
|
+
dependencies = [
|
2674
|
+
"autocfg",
|
2675
|
+
"rawpointer",
|
2676
|
+
]
|
2677
|
+
|
2667
2678
|
[[package]]
|
2668
2679
|
name = "md-5"
|
2669
2680
|
version = "0.10.6"
|
@@ -2731,6 +2742,21 @@ dependencies = [
|
|
2731
2742
|
"windows-sys 0.52.0",
|
2732
2743
|
]
|
2733
2744
|
|
2745
|
+
[[package]]
|
2746
|
+
name = "ndarray"
|
2747
|
+
version = "0.16.1"
|
2748
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2749
|
+
checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
|
2750
|
+
dependencies = [
|
2751
|
+
"matrixmultiply",
|
2752
|
+
"num-complex",
|
2753
|
+
"num-integer",
|
2754
|
+
"num-traits",
|
2755
|
+
"portable-atomic",
|
2756
|
+
"portable-atomic-util",
|
2757
|
+
"rawpointer",
|
2758
|
+
]
|
2759
|
+
|
2734
2760
|
[[package]]
|
2735
2761
|
name = "neo4rs"
|
2736
2762
|
version = "0.8.0"
|
@@ -2796,6 +2822,15 @@ dependencies = [
|
|
2796
2822
|
"zeroize",
|
2797
2823
|
]
|
2798
2824
|
|
2825
|
+
[[package]]
|
2826
|
+
name = "num-complex"
|
2827
|
+
version = "0.4.6"
|
2828
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2829
|
+
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
2830
|
+
dependencies = [
|
2831
|
+
"num-traits",
|
2832
|
+
]
|
2833
|
+
|
2799
2834
|
[[package]]
|
2800
2835
|
name = "num-conv"
|
2801
2836
|
version = "0.1.0"
|
@@ -2851,6 +2886,22 @@ dependencies = [
|
|
2851
2886
|
"libc",
|
2852
2887
|
]
|
2853
2888
|
|
2889
|
+
[[package]]
|
2890
|
+
name = "numpy"
|
2891
|
+
version = "0.25.0"
|
2892
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2893
|
+
checksum = "29f1dee9aa8d3f6f8e8b9af3803006101bb3653866ef056d530d53ae68587191"
|
2894
|
+
dependencies = [
|
2895
|
+
"libc",
|
2896
|
+
"ndarray",
|
2897
|
+
"num-complex",
|
2898
|
+
"num-integer",
|
2899
|
+
"num-traits",
|
2900
|
+
"pyo3",
|
2901
|
+
"pyo3-build-config",
|
2902
|
+
"rustc-hash 2.1.1",
|
2903
|
+
]
|
2904
|
+
|
2854
2905
|
[[package]]
|
2855
2906
|
name = "object"
|
2856
2907
|
version = "0.36.7"
|
@@ -3228,11 +3279,10 @@ dependencies = [
|
|
3228
3279
|
|
3229
3280
|
[[package]]
|
3230
3281
|
name = "pyo3"
|
3231
|
-
version = "0.
|
3282
|
+
version = "0.25.0"
|
3232
3283
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3233
|
-
checksum = "
|
3284
|
+
checksum = "f239d656363bcee73afef85277f1b281e8ac6212a1d42aa90e55b90ed43c47a4"
|
3234
3285
|
dependencies = [
|
3235
|
-
"cfg-if",
|
3236
3286
|
"chrono",
|
3237
3287
|
"indoc",
|
3238
3288
|
"libc",
|
@@ -3247,9 +3297,9 @@ dependencies = [
|
|
3247
3297
|
|
3248
3298
|
[[package]]
|
3249
3299
|
name = "pyo3-async-runtimes"
|
3250
|
-
version = "0.
|
3300
|
+
version = "0.25.0"
|
3251
3301
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3252
|
-
checksum = "
|
3302
|
+
checksum = "d73cc6b1b7d8b3cef02101d37390dbdfe7e450dfea14921cae80a9534ba59ef2"
|
3253
3303
|
dependencies = [
|
3254
3304
|
"futures",
|
3255
3305
|
"once_cell",
|
@@ -3260,9 +3310,9 @@ dependencies = [
|
|
3260
3310
|
|
3261
3311
|
[[package]]
|
3262
3312
|
name = "pyo3-build-config"
|
3263
|
-
version = "0.
|
3313
|
+
version = "0.25.0"
|
3264
3314
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3265
|
-
checksum = "
|
3315
|
+
checksum = "755ea671a1c34044fa165247aaf6f419ca39caa6003aee791a0df2713d8f1b6d"
|
3266
3316
|
dependencies = [
|
3267
3317
|
"once_cell",
|
3268
3318
|
"target-lexicon",
|
@@ -3270,9 +3320,9 @@ dependencies = [
|
|
3270
3320
|
|
3271
3321
|
[[package]]
|
3272
3322
|
name = "pyo3-ffi"
|
3273
|
-
version = "0.
|
3323
|
+
version = "0.25.0"
|
3274
3324
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3275
|
-
checksum = "
|
3325
|
+
checksum = "fc95a2e67091e44791d4ea300ff744be5293f394f1bafd9f78c080814d35956e"
|
3276
3326
|
dependencies = [
|
3277
3327
|
"libc",
|
3278
3328
|
"pyo3-build-config",
|
@@ -3280,9 +3330,9 @@ dependencies = [
|
|
3280
3330
|
|
3281
3331
|
[[package]]
|
3282
3332
|
name = "pyo3-macros"
|
3283
|
-
version = "0.
|
3333
|
+
version = "0.25.0"
|
3284
3334
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3285
|
-
checksum = "
|
3335
|
+
checksum = "a179641d1b93920829a62f15e87c0ed791b6c8db2271ba0fd7c2686090510214"
|
3286
3336
|
dependencies = [
|
3287
3337
|
"proc-macro2",
|
3288
3338
|
"pyo3-macros-backend",
|
@@ -3292,9 +3342,9 @@ dependencies = [
|
|
3292
3342
|
|
3293
3343
|
[[package]]
|
3294
3344
|
name = "pyo3-macros-backend"
|
3295
|
-
version = "0.
|
3345
|
+
version = "0.25.0"
|
3296
3346
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3297
|
-
checksum = "
|
3347
|
+
checksum = "9dff85ebcaab8c441b0e3f7ae40a6963ecea8a9f5e74f647e33fcf5ec9a1e89e"
|
3298
3348
|
dependencies = [
|
3299
3349
|
"heck",
|
3300
3350
|
"proc-macro2",
|
@@ -3305,9 +3355,9 @@ dependencies = [
|
|
3305
3355
|
|
3306
3356
|
[[package]]
|
3307
3357
|
name = "pythonize"
|
3308
|
-
version = "0.
|
3358
|
+
version = "0.25.0"
|
3309
3359
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3310
|
-
checksum = "
|
3360
|
+
checksum = "597907139a488b22573158793aa7539df36ae863eba300c75f3a0d65fc475e27"
|
3311
3361
|
dependencies = [
|
3312
3362
|
"pyo3",
|
3313
3363
|
"serde",
|
@@ -3462,6 +3512,12 @@ dependencies = [
|
|
3462
3512
|
"getrandom 0.3.2",
|
3463
3513
|
]
|
3464
3514
|
|
3515
|
+
[[package]]
|
3516
|
+
name = "rawpointer"
|
3517
|
+
version = "0.2.1"
|
3518
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3519
|
+
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
3520
|
+
|
3465
3521
|
[[package]]
|
3466
3522
|
name = "redox_syscall"
|
3467
3523
|
version = "0.5.11"
|
@@ -2,8 +2,9 @@
|
|
2
2
|
name = "cocoindex"
|
3
3
|
# Version used for local development is always higher than others to take precedence.
|
4
4
|
# Will be overridden for specific release versions.
|
5
|
-
version = "0.1.
|
5
|
+
version = "0.1.52"
|
6
6
|
edition = "2024"
|
7
|
+
rust-version = "1.86"
|
7
8
|
|
8
9
|
[profile.release]
|
9
10
|
codegen-units = 1
|
@@ -14,9 +15,9 @@ name = "cocoindex_engine"
|
|
14
15
|
crate-type = ["cdylib"]
|
15
16
|
|
16
17
|
[dependencies]
|
17
|
-
pyo3 = { version = "0.
|
18
|
-
pythonize = "0.
|
19
|
-
pyo3-async-runtimes = { version = "0.
|
18
|
+
pyo3 = { version = "0.25.0", features = ["chrono", "auto-initialize"] }
|
19
|
+
pythonize = "0.25.0"
|
20
|
+
pyo3-async-runtimes = { version = "0.25.0", features = ["tokio-runtime"] }
|
20
21
|
|
21
22
|
anyhow = { version = "1.0.97", features = ["std"] }
|
22
23
|
async-trait = "0.1.88"
|
@@ -113,3 +114,4 @@ json5 = "0.4.1"
|
|
113
114
|
aws-config = "1.6.2"
|
114
115
|
aws-sdk-s3 = "1.85.0"
|
115
116
|
aws-sdk-sqs = "1.67.0"
|
117
|
+
numpy = "0.25.0"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.52
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
@@ -130,7 +130,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
130
130
|
# Export collected data to a vector index.
|
131
131
|
doc_embeddings.export(
|
132
132
|
"doc_embeddings",
|
133
|
-
cocoindex.
|
133
|
+
cocoindex.targets.Postgres(),
|
134
134
|
primary_key_fields=["filename", "location"],
|
135
135
|
vector_indexes=[
|
136
136
|
cocoindex.VectorIndexDef(
|
@@ -111,7 +111,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
111
111
|
# Export collected data to a vector index.
|
112
112
|
doc_embeddings.export(
|
113
113
|
"doc_embeddings",
|
114
|
-
cocoindex.
|
114
|
+
cocoindex.targets.Postgres(),
|
115
115
|
primary_key_fields=["filename", "location"],
|
116
116
|
vector_indexes=[
|
117
117
|
cocoindex.VectorIndexDef(
|
@@ -9,7 +9,7 @@ An **index** is a collection of data stored in a way that is easy for retrieval.
|
|
9
9
|
|
10
10
|
CocoIndex is an ETL framework for building indexes from specified data sources, a.k.a. **indexing**. It also offers utilities for users to retrieve data from the indexes.
|
11
11
|
|
12
|
-
An **indexing flow** extracts data from specified data sources, upon specified transformations, and puts the transformed data into specified
|
12
|
+
An **indexing flow** extracts data from specified data sources, upon specified transformations, and puts the transformed data into specified target for later retrieval.
|
13
13
|
|
14
14
|
## Indexing flow elements
|
15
15
|
|
@@ -17,7 +17,7 @@ An indexing flow has two aspects: data and operations on data.
|
|
17
17
|
|
18
18
|
### Data
|
19
19
|
|
20
|
-
An indexing flow involves source data and transformed data (either as an intermediate result or the final result to be put into
|
20
|
+
An indexing flow involves source data and transformed data (either as an intermediate result or the final result to be put into targets). All data within the indexing flow has **schema** determined at flow definition time.
|
21
21
|
|
22
22
|
Each piece of data has a **data type**, falling into one of the following categories:
|
23
23
|
|
@@ -36,8 +36,8 @@ An **operation** in an indexing flow defines a step in the flow. An operation is
|
|
36
36
|
* **Action**, which defines the behavior of the operation, e.g. *import*, *transform*, *for each*, *collect* and *export*.
|
37
37
|
See [Flow Definition](flow_def) for more details for each action.
|
38
38
|
|
39
|
-
* Some actions (i.e. "import", "transform" and "export") require an **Operation Spec**, which describes the specific behavior of the operation, e.g. a source to import from, a function describing the transformation behavior, a target
|
40
|
-
* Each operation spec has a **operation type**, e.g. `LocalFile` (data source), `SplitRecursively` (function), `SentenceTransformerEmbed` (function), `Postgres` (
|
39
|
+
* Some actions (i.e. "import", "transform" and "export") require an **Operation Spec**, which describes the specific behavior of the operation, e.g. a source to import from, a function describing the transformation behavior, a target to export to (as an index).
|
40
|
+
* Each operation spec has a **operation type**, e.g. `LocalFile` (data source), `SplitRecursively` (function), `SentenceTransformerEmbed` (function), `Postgres` (target).
|
41
41
|
* CocoIndex framework maintains a set of supported operation types. Users can also implement their own.
|
42
42
|
|
43
43
|
"import" and "transform" operations produce output data, whose data type is determined based on the operation spec and data types of input data (for "transform" operation only).
|
@@ -62,11 +62,11 @@ This shows schema and example data for the indexing flow:
|
|
62
62
|
|
63
63
|
## Life cycle of an indexing flow
|
64
64
|
|
65
|
-
An indexing flow, once set up, maintains a long-lived relationship between data source and
|
65
|
+
An indexing flow, once set up, maintains a long-lived relationship between data source and target. This means:
|
66
66
|
|
67
|
-
1. The target
|
67
|
+
1. The target created by the flow remain available for querying at any time
|
68
68
|
|
69
|
-
2. As source data changes (new data added, existing data updated or deleted), data in the target
|
69
|
+
2. As source data changes (new data added, existing data updated or deleted), data in the target are updated to reflect those changes,
|
70
70
|
on certain pace, according to the update mode:
|
71
71
|
|
72
72
|
* **One time update**: Once triggered, CocoIndex updates the target data to reflect the version of source data up to the current moment.
|
@@ -61,7 +61,7 @@ The following subcommands are available:
|
|
61
61
|
| ---------- | ----------- |
|
62
62
|
| `ls` | List all flows present in the given file/module. Or list all persisted flows under the current app namespace if no file/module specified. |
|
63
63
|
| `show` | Show the spec and schema for a specific flow. |
|
64
|
-
| `setup` | Check and apply backend setup changes for flows, including the internal and target
|
64
|
+
| `setup` | Check and apply backend setup changes for flows, including the internal storage and target (to export). |
|
65
65
|
| `drop` | Drop the backend setup for specified flows. |
|
66
66
|
| `update` | Update the index defined by the flow. |
|
67
67
|
| `evaluate` | Evaluate the flow and dump flow outputs to files. Instead of updating the index, it dumps what should be indexed to files. Mainly used for evaluation purpose. |
|
@@ -13,7 +13,7 @@ This makes schema of data processed by CocoIndex clear, and easily determine the
|
|
13
13
|
|
14
14
|
You don't need to spell out data types in CocoIndex, when you define the flow using existing operations (source, function, etc).
|
15
15
|
These operations decide data types of fields produced by them based on the spec and input data types.
|
16
|
-
All you need to do is to make sure the data passed to functions and
|
16
|
+
All you need to do is to make sure the data passed to functions and targets are accepted by them.
|
17
17
|
|
18
18
|
When you define [custom functions](/docs/core/custom_function), you need to specify the data types of arguments and return values.
|
19
19
|
|
@@ -40,7 +40,7 @@ This is the list of all basic types supported by CocoIndex:
|
|
40
40
|
| Vector[*T*, *Dim*?] | *T* can be a basic type or a numeric type. *Dim* is a positive integer and optional. | `cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `numpy.typing.NDArray[T]` or `list[T]` |
|
41
41
|
|
42
42
|
Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column).
|
43
|
-
However, the underlying execution engine
|
43
|
+
However, the underlying execution engine has finer distinctions for some types, specifically:
|
44
44
|
|
45
45
|
* *Float32* and *Float64* for `float`, with different precision.
|
46
46
|
* *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness.
|
@@ -50,7 +50,7 @@ However, the underlying execution engine and some storage system (like Postgres)
|
|
50
50
|
|
51
51
|
The native Python type is always more permissive and can represent a superset of possible values.
|
52
52
|
* Only when you annotate the return type of a custom function, you should use the specific type,
|
53
|
-
so that CocoIndex will have information about the precise type to be used in the execution engine and
|
53
|
+
so that CocoIndex will have information about the precise type to be used in the execution engine and target.
|
54
54
|
* For all other purposes, e.g. to provide annotation for argument types of a custom function, or used internally in your custom function,
|
55
55
|
you can choose whatever to use.
|
56
56
|
The native Python type is usually simpler.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
---
|
2
2
|
title: Flow Definition
|
3
|
-
description: Define a CocoIndex flow, by specifying source, transformations and
|
3
|
+
description: Define a CocoIndex flow, by specifying source, transformations and targets, and connect input/output data of them.
|
4
4
|
---
|
5
5
|
|
6
6
|
import Tabs from '@theme/Tabs';
|
@@ -8,7 +8,7 @@ import TabItem from '@theme/TabItem';
|
|
8
8
|
|
9
9
|
# CocoIndex Flow Definition
|
10
10
|
|
11
|
-
In CocoIndex, to define an indexing flow, you provide a function to import source, transform data and put them into
|
11
|
+
In CocoIndex, to define an indexing flow, you provide a function to import source, transform data and put them into targets.
|
12
12
|
You connect input/output of these operations with fields of data scopes.
|
13
13
|
|
14
14
|
## Entry Point
|
@@ -246,14 +246,14 @@ and generates a `id` field with UUID and remains stable when `filename` and `sum
|
|
246
246
|
|
247
247
|
### Export
|
248
248
|
|
249
|
-
The `export()` method exports the collected data to an external
|
249
|
+
The `export()` method exports the collected data to an external target.
|
250
250
|
|
251
|
-
A *
|
251
|
+
A *target spec* needs to be provided for any export operation, to describe the target and parameters related to the target.
|
252
252
|
|
253
253
|
Export must happen at the top level of a flow, i.e. not within any child scopes created by "for each row". It takes the following arguments:
|
254
254
|
|
255
255
|
* `name`: the name to identify the export target.
|
256
|
-
* `target_spec`: the
|
256
|
+
* `target_spec`: the target spec as the export target.
|
257
257
|
* `setup_by_user` (optional):
|
258
258
|
whether the export target is setup by user.
|
259
259
|
By default, CocoIndex is managing the target setup (surfaced by the `cocoindex setup` CLI subcommand), e.g. create related tables/collections/etc. with compatible schema, and update them upon change.
|
@@ -270,7 +270,7 @@ def demo_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataSco
|
|
270
270
|
demo_collector = data_scope.add_collector()
|
271
271
|
...
|
272
272
|
demo_collector.export(
|
273
|
-
"
|
273
|
+
"demo_target", DemoTargetSpec(...),
|
274
274
|
primary_key_fields=["field1"],
|
275
275
|
vector_indexes=[cocoindex.VectorIndexDef("field2", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
|
276
276
|
```
|
@@ -278,14 +278,14 @@ def demo_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataSco
|
|
278
278
|
</TabItem>
|
279
279
|
</Tabs>
|
280
280
|
|
281
|
-
The target
|
282
|
-
The `name` for the same
|
283
|
-
If it changes, CocoIndex will treat it as an old
|
281
|
+
The target is managed by CocoIndex, i.e. it'll be created by [CocoIndex CLI](/docs/core/cli) when you run `cocoindex setup`, and the data will be automatically updated (including stale data removal) when updating the index.
|
282
|
+
The `name` for the same target should remain stable across different runs.
|
283
|
+
If it changes, CocoIndex will treat it as an old target removed and a new one created, and perform setup changes and reindexing accordingly.
|
284
284
|
|
285
285
|
## Storage Indexes
|
286
286
|
|
287
|
-
Many storage
|
288
|
-
CocoIndex provides a common way to configure indexes for various
|
287
|
+
Many targets are storage systems supporting indexes, to boost efficiency in retrieving data.
|
288
|
+
CocoIndex provides a common way to configure indexes for various targets.
|
289
289
|
|
290
290
|
### Primary Key
|
291
291
|
|
@@ -330,7 +330,7 @@ For example,
|
|
330
330
|
```python
|
331
331
|
doc_embeddings.export(
|
332
332
|
"doc_embeddings",
|
333
|
-
cocoindex.
|
333
|
+
cocoindex.targets.Qdrant(
|
334
334
|
collection_name=cocoindex.get_app_namespace(trailing_delimiter='__') + "doc_embeddings",
|
335
335
|
...
|
336
336
|
),
|
@@ -345,8 +345,8 @@ It will use `Staging__doc_embeddings` as the collection name if the current app
|
|
345
345
|
|
346
346
|
### Target Declarations
|
347
347
|
|
348
|
-
Most time a target
|
349
|
-
Occasionally, you may need to specify some configurations for target
|
348
|
+
Most time a target is created by calling `export()` method on a collector, and this `export()` call comes with configurations needed for the target, e.g. options for storage indexes.
|
349
|
+
Occasionally, you may need to specify some configurations for the target out of the context of any specific data collector.
|
350
350
|
|
351
351
|
For example, for graph database targets like `Neo4j` and `Kuzu`, you may have a data collector to export data to relationships, which will create nodes referenced by various relationships in turn.
|
352
352
|
These nodes don't directly come from any specific data collector (consider relationships from different data collectors may share the same nodes).
|
@@ -359,7 +359,7 @@ To specify configurations for these nodes, you can *declare* spec for related no
|
|
359
359
|
|
360
360
|
```python
|
361
361
|
flow_builder.declare(
|
362
|
-
cocoindex.
|
362
|
+
cocoindex.targets.Neo4jDeclarations(...)
|
363
363
|
)
|
364
364
|
```
|
365
365
|
|
@@ -389,7 +389,7 @@ You can add an auth entry by `cocoindex.add_auth_entry()` function, which return
|
|
389
389
|
```python
|
390
390
|
my_graph_conn = cocoindex.add_auth_entry(
|
391
391
|
"my_graph_conn",
|
392
|
-
cocoindex.
|
392
|
+
cocoindex.targets.Neo4jConnectionSpec(
|
393
393
|
uri="bolt://localhost:7687",
|
394
394
|
user="neo4j",
|
395
395
|
password="cocoindex",
|
@@ -403,7 +403,7 @@ Then reference it when building a spec that takes an auth entry:
|
|
403
403
|
```python
|
404
404
|
demo_collector.export(
|
405
405
|
"MyGraph",
|
406
|
-
cocoindex.
|
406
|
+
cocoindex.targets.Neo4jRelationship(connection=my_graph_conn, ...)
|
407
407
|
)
|
408
408
|
```
|
409
409
|
|
@@ -412,7 +412,7 @@ Then reference it when building a spec that takes an auth entry:
|
|
412
412
|
```python
|
413
413
|
demo_collector.export(
|
414
414
|
"MyGraph",
|
415
|
-
cocoindex.
|
415
|
+
cocoindex.targets.Neo4jRelationship(connection=cocoindex.ref_auth_entry("my_graph_conn"), ...))
|
416
416
|
```
|
417
417
|
|
418
418
|
</TabItem>
|
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
title: Run a Flow
|
3
3
|
toc_max_heading_level: 4
|
4
|
-
description: Run a CocoIndex Flow, including build / update data in the target
|
4
|
+
description: Run a CocoIndex Flow, including build / update data in the target and evaluate the flow without changing the target.
|
5
5
|
---
|
6
6
|
|
7
7
|
import Tabs from '@theme/Tabs';
|
@@ -37,7 +37,7 @@ It creates a `demo_flow` object in `cocoindex.Flow` type.
|
|
37
37
|
|
38
38
|
## Build / update target data
|
39
39
|
|
40
|
-
The major goal of a flow is to perform the transformations on source data and build / update data in the target
|
40
|
+
The major goal of a flow is to perform the transformations on source data and build / update data in the target.
|
41
41
|
This action has two modes:
|
42
42
|
|
43
43
|
* **One time update.**
|
@@ -53,7 +53,7 @@ This action has two modes:
|
|
53
53
|
:::info
|
54
54
|
|
55
55
|
For both modes, CocoIndex is performing *incremental processing*,
|
56
|
-
i.e. we only perform computations and
|
56
|
+
i.e. we only perform computations and target mutations on source data that are changed, or the flow has changed.
|
57
57
|
This is to achieve best efficiency.
|
58
58
|
|
59
59
|
:::
|
@@ -63,7 +63,7 @@ This is to achieve best efficiency.
|
|
63
63
|
|
64
64
|
#### CLI
|
65
65
|
|
66
|
-
The `cocoindex update` subcommand creates/updates data in the target
|
66
|
+
The `cocoindex update` subcommand creates/updates data in the target.
|
67
67
|
|
68
68
|
Once it's done, the target data is fresh up to the moment when the function is called.
|
69
69
|
|
@@ -76,7 +76,7 @@ cocoindex update main.py
|
|
76
76
|
<Tabs>
|
77
77
|
<TabItem value="python" label="Python">
|
78
78
|
|
79
|
-
The `update()` async method creates/updates data in the target
|
79
|
+
The `update()` async method creates/updates data in the target.
|
80
80
|
|
81
81
|
Once the function returns, the target data is fresh up to the moment when the function is called.
|
82
82
|
|
@@ -207,7 +207,7 @@ CocoIndex also provides asynchronous versions of APIs for blocking operations, i
|
|
207
207
|
|
208
208
|
## Evaluate the flow
|
209
209
|
|
210
|
-
CocoIndex allows you to run the transformations defined by the flow without updating the target
|
210
|
+
CocoIndex allows you to run the transformations defined by the flow without updating the target.
|
211
211
|
|
212
212
|
### CLI
|
213
213
|
|
@@ -87,7 +87,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
87
87
|
# Export collected data to a vector index.
|
88
88
|
doc_embeddings.export(
|
89
89
|
"doc_embeddings",
|
90
|
-
cocoindex.
|
90
|
+
cocoindex.targets.Postgres(),
|
91
91
|
primary_key_fields=["filename", "location"],
|
92
92
|
vector_indexes=[
|
93
93
|
cocoindex.VectorIndexDef(
|
@@ -158,7 +158,7 @@ In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) alo
|
|
158
158
|
Please make sure the required packages are installed:
|
159
159
|
|
160
160
|
```bash
|
161
|
-
pip install numpy psycopg[binary,pool] pgvector
|
161
|
+
pip install numpy "psycopg[binary,pool]" pgvector
|
162
162
|
```
|
163
163
|
|
164
164
|
### Step 4.1: Extract common transformations
|
@@ -214,7 +214,7 @@ from pgvector.psycopg import register_vector
|
|
214
214
|
|
215
215
|
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
216
216
|
# Get the table name, for the export target in the text_embedding_flow above.
|
217
|
-
table_name = cocoindex.utils.
|
217
|
+
table_name = cocoindex.utils.get_target_default_name(text_embedding_flow, "doc_embeddings")
|
218
218
|
# Evaluate the transform flow defined above with the input query, to get the embedding.
|
219
219
|
query_vector = text_to_embedding.eval(query)
|
220
220
|
# Run the query and get the results.
|
@@ -237,7 +237,7 @@ There're two CocoIndex-specific logic:
|
|
237
237
|
1. Get the table name from the export target in the `text_embedding_flow` above.
|
238
238
|
Since the table name for the `Postgres` target is not explicitly specified in the `export()` call,
|
239
239
|
CocoIndex uses a default name.
|
240
|
-
`cocoindex.utils.
|
240
|
+
`cocoindex.utils.get_target_default_name()` is a utility function to get the default table name for this case.
|
241
241
|
|
242
242
|
2. Evaluate the transform flow defined above with the input query, to get the embedding.
|
243
243
|
It's done by the `eval()` method of the transform flow `text_to_embedding`.
|