cocoindex 0.1.67__tar.gz → 0.1.69__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.pre-commit-config.yaml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/Cargo.lock +12 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/Cargo.toml +4 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/PKG-INFO +25 -5
- {cocoindex-0.1.67 → cocoindex-0.1.69}/README.md +22 -2
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/about/community.md +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/about/contributing.md +4 -4
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/data_types.mdx +0 -2
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/flow_def.mdx +3 -4
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/flow_methods.mdx +57 -14
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/getting_started/quickstart.md +6 -6
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/ops/functions.md +43 -4
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/ops/sources.md +7 -8
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/ops/targets.md +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/src/css/custom.css +1 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/amazon_s3_embedding/main.py +7 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/amazon_s3_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/azure_blob_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/code_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/docs_to_knowledge_graph/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/gdrive_text_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/pyproject.toml +2 -2
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding_qdrant/pyproject.toml +1 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/pyproject.toml +1 -2
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/__init__.py +2 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/flow.py +35 -7
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/runtime.py +5 -2
- cocoindex-0.1.69/src/execution/live_updater.rs +399 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/source_indexer.rs +57 -55
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/mod.rs +4 -5
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/ollama.rs +75 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/split_recursively.rs +8 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sources/amazon_s3.rs +13 -1
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/py/mod.rs +33 -15
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/service/flows.rs +1 -1
- cocoindex-0.1.67/src/execution/live_updater.rs +0 -269
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.cargo/config.toml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.env.lib_debug +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/_doc_release.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/format.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/LICENSE +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docs/query.mdx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/package.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/docs/yarn.lock +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/azure_blob_embedding/.env.example +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/azure_blob_embedding/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/azure_blob_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/azure_blob_embedding/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/.env.example +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/.gitignore +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/.env.example +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/data/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/patient_intake_extraction/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/convert.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/functions.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/lib.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/op.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/targets.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/tests/test_convert.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/tests/test_optional_database.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/tests/test_typing.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/typing.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/ruff.toml +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/duration.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/schema.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/spec.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/base/value.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/exec_ctx.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/builder/plan.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/lib.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/lib_context.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/litellm.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/openrouter.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/vertex_ai.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/vllm.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/llm/voyage.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/factory_bases.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/embed_text.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/functions/test_utils.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/interface.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/registration.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sources/azure_blob.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/kuzu.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/neo4j.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/postgres.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/qdrant.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/shared/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/shared/property_graph.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/ops/targets/shared/table_columns.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/prelude.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/py/convert.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/server.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/service/error.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/service/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/settings.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/components.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/setup/states.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/concur_control.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/db.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.67 → cocoindex-0.1.69}/src/utils/yaml_ser.rs +0 -0
@@ -1297,7 +1297,7 @@ dependencies = [
|
|
1297
1297
|
|
1298
1298
|
[[package]]
|
1299
1299
|
name = "cocoindex"
|
1300
|
-
version = "0.1.
|
1300
|
+
version = "0.1.69"
|
1301
1301
|
dependencies = [
|
1302
1302
|
"anyhow",
|
1303
1303
|
"async-openai",
|
@@ -1369,6 +1369,7 @@ dependencies = [
|
|
1369
1369
|
"tree-sitter-java",
|
1370
1370
|
"tree-sitter-javascript",
|
1371
1371
|
"tree-sitter-json",
|
1372
|
+
"tree-sitter-kotlin-ng",
|
1372
1373
|
"tree-sitter-language",
|
1373
1374
|
"tree-sitter-md",
|
1374
1375
|
"tree-sitter-pascal",
|
@@ -5724,6 +5725,16 @@ dependencies = [
|
|
5724
5725
|
"tree-sitter-language",
|
5725
5726
|
]
|
5726
5727
|
|
5728
|
+
[[package]]
|
5729
|
+
name = "tree-sitter-kotlin-ng"
|
5730
|
+
version = "1.1.0"
|
5731
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
5732
|
+
checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450"
|
5733
|
+
dependencies = [
|
5734
|
+
"cc",
|
5735
|
+
"tree-sitter-language",
|
5736
|
+
]
|
5737
|
+
|
5727
5738
|
[[package]]
|
5728
5739
|
name = "tree-sitter-language"
|
5729
5740
|
version = "0.1.5"
|
@@ -2,7 +2,7 @@
|
|
2
2
|
name = "cocoindex"
|
3
3
|
# Version used for local development is always higher than others to take precedence.
|
4
4
|
# Will be overridden for specific release versions.
|
5
|
-
version = "0.1.
|
5
|
+
version = "0.1.69"
|
6
6
|
edition = "2024"
|
7
7
|
rust-version = "1.88"
|
8
8
|
|
@@ -63,6 +63,7 @@ reqwest = { version = "0.12.15", default-features = false, features = [
|
|
63
63
|
"rustls-tls",
|
64
64
|
] }
|
65
65
|
async-openai = "0.28.0"
|
66
|
+
|
66
67
|
tree-sitter = "0.25.3"
|
67
68
|
tree-sitter-language = "0.1.5"
|
68
69
|
# Per language tree-sitter parsers
|
@@ -76,6 +77,8 @@ tree-sitter-html = "0.23.2"
|
|
76
77
|
tree-sitter-java = "0.23.5"
|
77
78
|
tree-sitter-javascript = "0.23.1"
|
78
79
|
tree-sitter-json = "0.24.8"
|
80
|
+
# The other more popular crate tree-sitter-kotlin requires tree-sitter < 0.23 for now
|
81
|
+
tree-sitter-kotlin-ng = "1.1.0"
|
79
82
|
tree-sitter-md = "0.3.2"
|
80
83
|
tree-sitter-pascal = "0.10.0"
|
81
84
|
tree-sitter-php = "0.23.11"
|
@@ -1,17 +1,17 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.69
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
7
7
|
Requires-Dist: watchfiles>=1.1.0
|
8
8
|
Requires-Dist: numpy>=1.23.2
|
9
|
-
Requires-Dist: pytest ; extra == '
|
9
|
+
Requires-Dist: pytest ; extra == 'dev'
|
10
10
|
Requires-Dist: ruff ; extra == 'dev'
|
11
|
+
Requires-Dist: mypy ; extra == 'dev'
|
11
12
|
Requires-Dist: pre-commit ; extra == 'dev'
|
12
13
|
Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'embeddings'
|
13
14
|
Requires-Dist: cocoindex[embeddings] ; extra == 'all'
|
14
|
-
Provides-Extra: test
|
15
15
|
Provides-Extra: dev
|
16
16
|
Provides-Extra: embeddings
|
17
17
|
Provides-Extra: all
|
@@ -36,16 +36,36 @@ Project-URL: Homepage, https://cocoindex.io/
|
|
36
36
|
[](https://opensource.org/licenses/Apache-2.0)
|
37
37
|
[](https://pypi.org/project/cocoindex/)
|
38
38
|
[](https://pypistats.org/packages/cocoindex)
|
39
|
-
|
40
39
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
|
41
40
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
|
42
41
|
[](https://discord.com/invite/zpA9S2DR7s)
|
42
|
+
|
43
|
+
</div>
|
44
|
+
|
45
|
+
<div align="center">
|
46
|
+
<a href="https://trendshift.io/repositories/13939" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13939" alt="cocoindex-io%2Fcocoindex | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
43
47
|
</div>
|
44
48
|
|
49
|
+
|
45
50
|
Ultra performant data transformation framework for AI, with core engine written in Rust. Support incremental processing and data lineage out-of-box. Exceptional developer velocity. Production-ready at day 0.
|
46
51
|
|
47
52
|
⭐ Drop a star to help us grow!
|
48
53
|
|
54
|
+
<div align="center">
|
55
|
+
|
56
|
+
<!-- Keep these links. Translations will automatically update with the README. -->
|
57
|
+
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
58
|
+
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
59
|
+
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
60
|
+
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
61
|
+
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
62
|
+
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
63
|
+
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
64
|
+
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
65
|
+
[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
|
66
|
+
|
67
|
+
</div>
|
68
|
+
|
49
69
|
</br>
|
50
70
|
|
51
71
|
<p align="center">
|
@@ -59,7 +79,7 @@ CocoIndex makes it super easy to transform data with AI workloads, and keep sour
|
|
59
79
|
</br>
|
60
80
|
|
61
81
|
<p align="center">
|
62
|
-
<img src="https://cocoindex.io/images/venn-features.png" alt="CocoIndex Features" width='
|
82
|
+
<img src="https://cocoindex.io/images/venn-features.png" alt="CocoIndex Features" width='400'>
|
63
83
|
</p>
|
64
84
|
|
65
85
|
</br>
|
@@ -11,16 +11,36 @@
|
|
11
11
|
[](https://opensource.org/licenses/Apache-2.0)
|
12
12
|
[](https://pypi.org/project/cocoindex/)
|
13
13
|
[](https://pypistats.org/packages/cocoindex)
|
14
|
-
|
15
14
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
|
16
15
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
|
17
16
|
[](https://discord.com/invite/zpA9S2DR7s)
|
17
|
+
|
18
|
+
</div>
|
19
|
+
|
20
|
+
<div align="center">
|
21
|
+
<a href="https://trendshift.io/repositories/13939" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13939" alt="cocoindex-io%2Fcocoindex | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
18
22
|
</div>
|
19
23
|
|
24
|
+
|
20
25
|
Ultra performant data transformation framework for AI, with core engine written in Rust. Support incremental processing and data lineage out-of-box. Exceptional developer velocity. Production-ready at day 0.
|
21
26
|
|
22
27
|
⭐ Drop a star to help us grow!
|
23
28
|
|
29
|
+
<div align="center">
|
30
|
+
|
31
|
+
<!-- Keep these links. Translations will automatically update with the README. -->
|
32
|
+
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
33
|
+
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
34
|
+
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
35
|
+
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
36
|
+
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
37
|
+
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
38
|
+
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
39
|
+
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
40
|
+
[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
|
41
|
+
|
42
|
+
</div>
|
43
|
+
|
24
44
|
</br>
|
25
45
|
|
26
46
|
<p align="center">
|
@@ -34,7 +54,7 @@ CocoIndex makes it super easy to transform data with AI workloads, and keep sour
|
|
34
54
|
</br>
|
35
55
|
|
36
56
|
<p align="center">
|
37
|
-
<img src="https://cocoindex.io/images/venn-features.png" alt="CocoIndex Features" width='
|
57
|
+
<img src="https://cocoindex.io/images/venn-features.png" alt="CocoIndex Features" width='400'>
|
38
58
|
</p>
|
39
59
|
|
40
60
|
</br>
|
@@ -9,7 +9,7 @@ Welcome with a huge coconut hug 🥥⋆。˚🤗.
|
|
9
9
|
|
10
10
|
We are super excited for community contributions of all kinds - whether it's code improvements, documentation updates, issue reports, feature requests on [GitHub](https://github.com/cocoindex-io/cocoindex), and discussions in our [Discord](https://discord.com/invite/zpA9S2DR7s).
|
11
11
|
|
12
|
-
We would love to
|
12
|
+
We would love to foster an inclusive, welcoming, and supportive environment. Contributing to CocoIndex should feel collaborative, friendly and enjoyable for everyone. Together, we can build better AI applications through robust data infrastructure.
|
13
13
|
|
14
14
|
:::tip Start hacking CocoIndex
|
15
15
|
Check out our [Contributing guide](./contributing) to get started!
|
@@ -18,10 +18,10 @@ We tag issues with the ["good first issue"](https://github.com/cocoindex-io/coco
|
|
18
18
|
## How to Contribute
|
19
19
|
- If you decide to work on an issue, unless the PR can be sent immediately (e.g. just a few lines of code), we recommend you to leave a comment on the issue like **`I'm working on it`** or **`Can I work on this issue?`** to avoid duplicating work.
|
20
20
|
- For larger features, we recommend you to discuss with us first in our [Discord server](https://discord.com/invite/zpA9S2DR7s) to coordinate the design and work.
|
21
|
-
- Our [Discord server](https://discord.com/invite/zpA9S2DR7s)
|
21
|
+
- Our [Discord server](https://discord.com/invite/zpA9S2DR7s) is constantly open. If you are unsure about anything, it is a good place to discuss! We'd love to collaborate and will always be friendly.
|
22
22
|
|
23
23
|
## Start hacking! Setting Up Development Environment
|
24
|
-
|
24
|
+
Follow the steps below to get cocoindex built on the latest codebase locally - if you are making changes to cocoindex functionality and want to test it out.
|
25
25
|
|
26
26
|
- 🦀 [Install Rust](https://rust-lang.org/tools/install)
|
27
27
|
|
@@ -46,12 +46,12 @@ Following the steps below to get cocoindex build on latest codebase locally - if
|
|
46
46
|
|
47
47
|
- Install required tools:
|
48
48
|
```sh
|
49
|
-
pip install maturin
|
49
|
+
pip install maturin
|
50
50
|
```
|
51
51
|
|
52
52
|
- Build the library. Run at the root of cocoindex directory:
|
53
53
|
```sh
|
54
|
-
maturin develop
|
54
|
+
maturin develop -E all,dev
|
55
55
|
```
|
56
56
|
|
57
57
|
- Install and enable pre-commit hooks. This ensures all checks run automatically before each commit:
|
@@ -28,8 +28,6 @@ When you define a [custom function](/docs/core/custom_function), you need to ann
|
|
28
28
|
we use the type annotation as a guidance to construct the Python value.
|
29
29
|
Type annotation is optional for basic types and struct types, and required for table types.
|
30
30
|
|
31
|
-
* When you define a [custom function](/docs/core/custom_function), type annotation is required for arguments and return values.
|
32
|
-
|
33
31
|
### Basic Types
|
34
32
|
|
35
33
|
#### Primitive Types
|
@@ -57,8 +57,7 @@ After it's called, `demo_flow` becomes an invalid object, and you should not cal
|
|
57
57
|
:::note
|
58
58
|
|
59
59
|
This only removes states of the flow from the current process, and it won't affect the persistent states.
|
60
|
-
|
61
|
-
If you w
|
60
|
+
See [Setup / drop flow](/docs/core/flow_methods#setup--drop-flow) if you want to clean up the persistent states.
|
62
61
|
|
63
62
|
:::
|
64
63
|
|
@@ -469,7 +468,7 @@ Then reference it when building a spec that takes an auth entry:
|
|
469
468
|
Note that CocoIndex backends use the key of an auth entry to identify the backend.
|
470
469
|
|
471
470
|
* Keep the key stable.
|
472
|
-
If the key doesn't change, it's considered to be the same backend (even if the underlying way to connect/authenticate
|
471
|
+
If the key doesn't change, it's considered to be the same backend (even if the underlying way to connect/authenticate changes).
|
473
472
|
|
474
473
|
* If a key is no longer referenced in any operation spec, keep it until the next flow setup / drop action,
|
475
|
-
so that
|
474
|
+
so that CocoIndex will be able to clean up the backends.
|
@@ -44,9 +44,9 @@ For a flow, its persistent backends need to be ready before it can run, includin
|
|
44
44
|
The desired state of the backends for a flow is derived based on the flow definition itself.
|
45
45
|
CocoIndex supports two types of actions to manage the persistent backends automatically:
|
46
46
|
|
47
|
-
* *Setup* a flow, which will change the backends owned by the flow to
|
47
|
+
* *Setup* a flow, which will change the backends owned by the flow to the desired state, e.g. create new tables for new flow, drop an existing table if the corresponding target is gone, add new column to a target table if a new field is collected, etc. It's no-op if the backend states are already in the desired state.
|
48
48
|
|
49
|
-
* *Drop* a flow, which will drop all backends owned by the flow. It's no-op if there
|
49
|
+
* *Drop* a flow, which will drop all backends owned by the flow. It's no-op if there are no existing backends owned by the flow (e.g. never setup or already dropped).
|
50
50
|
|
51
51
|
### CLI
|
52
52
|
|
@@ -138,7 +138,7 @@ This is to achieve best efficiency.
|
|
138
138
|
|
139
139
|
The `cocoindex update` subcommand creates/updates data in the target.
|
140
140
|
|
141
|
-
Once it's done, the target data is fresh up to the moment when the
|
141
|
+
Once it's done, the target data is fresh up to the moment when the command is called.
|
142
142
|
|
143
143
|
```sh
|
144
144
|
cocoindex update main.py
|
@@ -203,7 +203,7 @@ To perform live update, run the `cocoindex update` subcommand with `-L` option:
|
|
203
203
|
cocoindex update main.py -L
|
204
204
|
```
|
205
205
|
|
206
|
-
If there's at least one data source with change capture mechanism enabled, it will keep running until
|
206
|
+
If there's at least one data source with change capture mechanism enabled, it will keep running until aborted (e.g. by `Ctrl-C`).
|
207
207
|
Otherwise, it falls back to the same behavior as one time update, and will finish after a one-time update is done.
|
208
208
|
|
209
209
|
With a `--setup` option, it will also setup the flow first if needed.
|
@@ -241,13 +241,27 @@ A `FlowLiveUpdater` object supports the following methods:
|
|
241
241
|
|
242
242
|
* `start()`: Start the updater.
|
243
243
|
CocoIndex will continuously capture changes from the source data and update the target data accordingly in background threads managed by the engine.
|
244
|
+
|
244
245
|
* `abort()`: Abort the updater.
|
246
|
+
|
245
247
|
* `wait()`: Wait for the updater to finish. It only unblocks in one of the following cases:
|
246
248
|
* The updater was aborted.
|
247
249
|
* A one time update is done, and live update is not enabled:
|
248
250
|
either `live_mode` is `False`, or all data sources have no change capture mechanisms enabled.
|
251
|
+
|
252
|
+
* `next_status_updates()`: Get the next status updates.
|
253
|
+
It blocks until there's a new status updates, including the processing finishes for a bunch of source updates, and live updater stops (aborted, or no more sources to process).
|
254
|
+
You can continuously call this method in a loop to get the latest status updates and react accordingly.
|
255
|
+
|
256
|
+
It returns a `cocoindex.FlowUpdaterStatusUpdates` object, with the following properties:
|
257
|
+
* `active_sources`: Names of sources that are still active, i.e. not stopped processing. If it's empty, it means the updater is stopped.
|
258
|
+
* `updated_sources`: Names of sources with updates since last time.
|
259
|
+
You can check this to see which sources have recent updates and get processed.
|
260
|
+
|
249
261
|
* `update_stats()`: It returns the stats of the updater.
|
250
262
|
|
263
|
+
This snippets shows the lifecycle of a live updater:
|
264
|
+
|
251
265
|
```python
|
252
266
|
my_updater = cocoindex.FlowLiveUpdater(demo_flow)
|
253
267
|
# Start the updater.
|
@@ -256,14 +270,37 @@ my_updater.start()
|
|
256
270
|
# Perform your own logic (e.g. a query loop).
|
257
271
|
...
|
258
272
|
|
259
|
-
|
260
|
-
print(my_updater.update_stats())
|
261
|
-
# Abort the updater.
|
262
|
-
my_updater.abort()
|
273
|
+
...
|
263
274
|
# Wait for the updater to finish.
|
264
275
|
my_updater.wait()
|
276
|
+
# Print the update stats.
|
277
|
+
print(my_updater.update_stats())
|
265
278
|
```
|
266
279
|
|
280
|
+
Somewhere (in the same or other threads) you can also continuously call `next_status_updates()` to get the latest status updates and react accordingly, e.g.
|
281
|
+
|
282
|
+
```python
|
283
|
+
while True:
|
284
|
+
updates = my_updater.next_status_updates()
|
285
|
+
|
286
|
+
for source in updates.updated_sources:
|
287
|
+
# Perform downstream operations on the target of the source.
|
288
|
+
run_your_downstream_operations_for(source)
|
289
|
+
|
290
|
+
# Break the loop if there's no more active sources.
|
291
|
+
if not updates.active_sources:
|
292
|
+
break
|
293
|
+
```
|
294
|
+
|
295
|
+
:::info
|
296
|
+
|
297
|
+
`next_status_updates()` automatically combines multiple status updates if more than one arrives between two calls,
|
298
|
+
e.g. your downstream operations may take more time, or you don't need to process too frequently (in which case you can explicitly sleep for a while).
|
299
|
+
|
300
|
+
So you don't need to worry about the status updates piling up.
|
301
|
+
|
302
|
+
:::
|
303
|
+
|
267
304
|
Python SDK also allows you to use the updater as a context manager.
|
268
305
|
It will automatically start the updater during the context entry, and abort and wait for the updater to finish automatically when the context is exited.
|
269
306
|
The following code is equivalent to the code above (if no early return happens):
|
@@ -272,7 +309,6 @@ The following code is equivalent to the code above (if no early return happens):
|
|
272
309
|
with cocoindex.FlowLiveUpdater(demo_flow) as my_updater:
|
273
310
|
# Perform your own logic (e.g. a query loop).
|
274
311
|
...
|
275
|
-
print(my_updater.update_stats())
|
276
312
|
```
|
277
313
|
|
278
314
|
CocoIndex also provides asynchronous versions of APIs for blocking operations, including:
|
@@ -287,20 +323,27 @@ CocoIndex also provides asynchronous versions of APIs for blocking operations, i
|
|
287
323
|
# Perform your own logic (e.g. a query loop).
|
288
324
|
...
|
289
325
|
|
290
|
-
# Print the update stats.
|
291
|
-
print(my_updater.update_stats())
|
292
|
-
# Abort the updater.
|
293
|
-
my_updater.abort()
|
294
326
|
# Wait for the updater to finish.
|
295
327
|
await my_updater.wait_async()
|
328
|
+
# Print the update stats.
|
329
|
+
print(my_updater.update_stats())
|
330
|
+
```
|
331
|
+
|
332
|
+
* `next_status_updates_async()`, e.g.
|
333
|
+
|
334
|
+
```python
|
335
|
+
while True:
|
336
|
+
updates = await my_updater.next_status_updates_async()
|
337
|
+
|
338
|
+
...
|
296
339
|
```
|
340
|
+
|
297
341
|
* Async context manager, e.g.
|
298
342
|
|
299
343
|
```python
|
300
344
|
async with cocoindex.FlowLiveUpdater(demo_flow) as my_updater:
|
301
345
|
# Perform your own logic (e.g. a query loop).
|
302
346
|
...
|
303
|
-
print(my_updater.update_stats())
|
304
347
|
```
|
305
348
|
|
306
349
|
</TabItem>
|
@@ -7,10 +7,10 @@ import ReactPlayer from 'react-player'
|
|
7
7
|
|
8
8
|
# Build your first CocoIndex project
|
9
9
|
|
10
|
-
This guide will help you get up and running with CocoIndex in just a few minutes
|
10
|
+
This guide will help you get up and running with CocoIndex in just a few minutes. We'll build a project that does:
|
11
11
|
* Read files from a directory
|
12
12
|
* Perform basic chunking and embedding
|
13
|
-
*
|
13
|
+
* Load the data into a vector store (PG Vector)
|
14
14
|
|
15
15
|
<ReactPlayer controls url='https://www.youtube.com/watch?v=gv5R8nOXsWU' />
|
16
16
|
|
@@ -107,11 +107,11 @@ Notes:
|
|
107
107
|
3. A *data source* extracts data from an external source.
|
108
108
|
In this example, the `LocalFile` data source imports local files as a KTable (table with a key field, see [KTable](../core/data_types#ktable) for details), each row has `"filename"` and `"content"` fields.
|
109
109
|
|
110
|
-
4. After defining the KTable, we
|
110
|
+
4. After defining the KTable, we extend a new field `"chunks"` to each row by *transforming* the `"content"` field using `SplitRecursively`. The output of the `SplitRecursively` is also a KTable representing each chunk of the document, with `"location"` and `"text"` fields.
|
111
111
|
|
112
|
-
5. After defining the KTable, we
|
112
|
+
5. After defining the KTable, we extend a new field `"embedding"` to each row by *transforming* the `"text"` field using `SentenceTransformerEmbed`.
|
113
113
|
|
114
|
-
6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and
|
114
|
+
6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and uses the collected data to build a vector index `"doc_embeddings"`, using `Postgres`.
|
115
115
|
|
116
116
|
## Step 3: Run the indexing pipeline and queries
|
117
117
|
|
@@ -271,7 +271,7 @@ Now we can run the same Python file, which will run the new added main logic:
|
|
271
271
|
python quickstart.py
|
272
272
|
```
|
273
273
|
|
274
|
-
It will ask you to enter a query and it will return the top
|
274
|
+
It will ask you to enter a query and it will return the top 5 results.
|
275
275
|
|
276
276
|
## Next Steps
|
277
277
|
|
@@ -31,7 +31,7 @@ The spec takes the following fields:
|
|
31
31
|
|
32
32
|
* `separators_regex` (`list[str]`): A list of regex patterns to split the text.
|
33
33
|
Higher-level boundaries should come first, and lower-level should be listed later. e.g. `[r"\n# ", r"\n## ", r"\n\n", r"\. "]`.
|
34
|
-
See [regex
|
34
|
+
See [regex syntax](https://docs.rs/regex/latest/regex/#syntax) for supported regular expression syntax.
|
35
35
|
|
36
36
|
Input data:
|
37
37
|
|
@@ -57,9 +57,12 @@ Input data:
|
|
57
57
|
|
58
58
|
We use the `language` field to determine how to split the input text, following these rules:
|
59
59
|
|
60
|
-
* We
|
61
|
-
|
62
|
-
|
60
|
+
* We match the input `language` field against the following registries in the following order:
|
61
|
+
* `custom_languages` in the spec, against the `language_name` or `aliases` field of each entry.
|
62
|
+
* Builtin languages (see [Supported Languages](#supported-languages) section below), against the language, aliases or file extensions of each entry.
|
63
|
+
|
64
|
+
All matches are in a case-insensitive manner. If the value of `language` is null, it'll be treated as empty string.
|
65
|
+
|
63
66
|
* If no match is found, the input will be treated as plain text.
|
64
67
|
|
65
68
|
:::
|
@@ -73,6 +76,42 @@ Return: [*KTable*](/docs/core/data_types#ktable), each row represents a chunk, w
|
|
73
76
|
* `line` (*Int64*): The line number of the position. Starting from 1.
|
74
77
|
* `column` (*Int64*): The column number of the position. Starting from 1.
|
75
78
|
|
79
|
+
### Supported Languages
|
80
|
+
|
81
|
+
Currently, `SplitRecursively` supports the following languages:
|
82
|
+
|
83
|
+
| Language | Aliases | File Extensions |
|
84
|
+
|----------|---------|-----------------|
|
85
|
+
| C | | `.c` |
|
86
|
+
| C++ | CPP | `.cpp`, `.cc`, `.cxx`, `.h`, `.hpp` |
|
87
|
+
| C# | CSharp, CS | `.cs` |
|
88
|
+
| CSS | | `.css`, `.scss` |
|
89
|
+
| DTD | | `.dtd` |
|
90
|
+
| Fortran | F, F90, F95, F03 | `.f`, `.f90`, `.f95`, `.f03` |
|
91
|
+
| Go | Golang | `.go` |
|
92
|
+
| HTML | | `.html`, `.htm` |
|
93
|
+
| Java | | `.java` |
|
94
|
+
| JavaScript | JS | `.js` |
|
95
|
+
| JSON | | `.json` |
|
96
|
+
| Kotlin | | `.kt`, `.kts` |
|
97
|
+
| Markdown | MD | `.md`, `.mdx` |
|
98
|
+
| Pascal | PAS, DPR, Delphi | `.pas`, `.dpr` |
|
99
|
+
| PHP | | `.php` |
|
100
|
+
| Python | | `.py` |
|
101
|
+
| R | | `.r` |
|
102
|
+
| Ruby | | `.rb` |
|
103
|
+
| Rust | RS | `.rs` |
|
104
|
+
| Scala | | `.scala` |
|
105
|
+
| SQL | | `.sql` |
|
106
|
+
| Swift | | `.swift` |
|
107
|
+
| TOML | | `.toml` |
|
108
|
+
| TSX | | `.tsx` |
|
109
|
+
| TypeScript | TS | `.ts` |
|
110
|
+
| XML | | `.xml` |
|
111
|
+
| YAML | | `.yaml`, `.yml` |
|
112
|
+
|
113
|
+
|
114
|
+
|
76
115
|
## SentenceTransformerEmbed
|
77
116
|
|
78
117
|
`SentenceTransformerEmbed` embeds a text into a vector space using the [SentenceTransformer](https://huggingface.co/sentence-transformers) library.
|
@@ -111,10 +111,9 @@ This is how to setup:
|
|
111
111
|
|
112
112
|
* In the [Amazon S3 Console](https://s3.console.aws.amazon.com/s3/home), open your S3 bucket. Under *Properties* tab, click *Create event notification*.
|
113
113
|
* Fill in an arbitrary event name, e.g. `S3ChangeNotifications`.
|
114
|
-
* If you want your AmazonS3 data source expose a subset of files sharing a prefix, set the same prefix here. Otherwise, leave it empty.
|
114
|
+
* If you want your AmazonS3 data source to expose a subset of files sharing a prefix, set the same prefix here. Otherwise, leave it empty.
|
115
115
|
* Select the following event types: *All object create events*, *All object removal events*.
|
116
116
|
* Select *SQS queue* as the destination, and specify the SQS queue you created above.
|
117
|
-
and enable *Change Event Notifications* for your bucket, and specify the SQS queue as the destination.
|
118
117
|
|
119
118
|
AWS's [Guide of Configuring a Bucket for Notifications](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification) provides more details.
|
120
119
|
|
@@ -141,7 +140,7 @@ The spec takes the following fields:
|
|
141
140
|
:::info
|
142
141
|
|
143
142
|
We will delete messages from the queue after they're processed.
|
144
|
-
If there
|
143
|
+
If there are unrelated messages in the queue (e.g. test messages that SQS will send automatically on queue creation, messages for a different bucket, for non-included files, etc.), we will delete the message upon receiving it, to avoid repeatedly receiving irrelevant messages after they're redelivered.
|
145
144
|
|
146
145
|
:::
|
147
146
|
|
@@ -167,7 +166,7 @@ These are actions you need to take:
|
|
167
166
|
* Create a storage account in the [Azure Portal](https://portal.azure.com/).
|
168
167
|
* Create a container in the storage account.
|
169
168
|
* Upload your files to the container.
|
170
|
-
* Grant the user / identity / service principal (depends on your authentication method, see below) access to the
|
169
|
+
* Grant the user / identity / service principal (depends on your authentication method, see below) access to the storage account. At minimum, a **Storage Blob Data Reader** role is needed. See [this doc](https://learn.microsoft.com/en-us/azure/storage/blobs/authorize-data-operations-portal) for reference.
|
171
170
|
|
172
171
|
#### Authentication
|
173
172
|
|
@@ -253,12 +252,12 @@ The spec takes the following fields:
|
|
253
252
|
it's typically cheaper than a full refresh by setting the [refresh interval](../core/flow_def#refresh-interval) especially when the folder contains a large number of files.
|
254
253
|
So you can usually set it with a smaller value compared to the `refresh_interval`.
|
255
254
|
|
256
|
-
On the other hand, this only detects changes for files still
|
257
|
-
If the file is deleted (or the current account no longer has access to), this change will not be detected by this change stream.
|
255
|
+
On the other hand, this only detects changes for files that still exist.
|
256
|
+
If the file is deleted (or the current account no longer has access to it), this change will not be detected by this change stream.
|
258
257
|
|
259
|
-
So when a `GoogleDrive` source
|
258
|
+
So when a `GoogleDrive` source has `recent_changes_poll_interval` enabled, it's still recommended to set a `refresh_interval`, with a larger value.
|
260
259
|
So that most changes can be covered by polling recent changes (with low latency, like 10 seconds), and remaining changes (files no longer exist or accessible) will still be covered (with a higher latency, like 5 minutes, and should be larger if you have a huge number of files like 1M).
|
261
|
-
In reality, configure them based on your requirement: how
|
260
|
+
In reality, configure them based on your requirement: how fresh do you need the target index to be?
|
262
261
|
|
263
262
|
:::
|
264
263
|
|
@@ -413,7 +413,7 @@ If you don't have a Neo4j database, you can start a Neo4j database using our doc
|
|
413
413
|
docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
|
414
414
|
```
|
415
415
|
|
416
|
-
|
416
|
+
This will bring up a Neo4j instance, which can be accessed by username `neo4j` and password `cocoindex`.
|
417
417
|
You can access the Neo4j browser at [http://localhost:7474](http://localhost:7474).
|
418
418
|
|
419
419
|
:::warning
|
@@ -101,7 +101,13 @@ def _main() -> None:
|
|
101
101
|
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
|
102
102
|
|
103
103
|
amazon_s3_text_embedding_flow.setup()
|
104
|
-
with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow):
|
104
|
+
with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow) as updater:
|
105
|
+
while True:
|
106
|
+
updates = updater.next_status_updates()
|
107
|
+
print(f"Updates: {updates}")
|
108
|
+
if not updates.active_sources:
|
109
|
+
break
|
110
|
+
|
105
111
|
# Run queries in a loop to demonstrate the query capabilities.
|
106
112
|
while True:
|
107
113
|
query = input("Enter search query (or Enter to quit): ")
|
@@ -3,7 +3,7 @@ name = "amazon-s3-text-embedding"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: build embedding index based on Amazon S3 files."
|
5
5
|
requires-python = ">=3.11"
|
6
|
-
dependencies = ["cocoindex[embeddings]>=0.1.
|
6
|
+
dependencies = ["cocoindex[embeddings]>=0.1.67", "python-dotenv>=1.0.1"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|
@@ -3,7 +3,7 @@ name = "azure-blob-text-embedding"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: build embedding index based on Azure Blob Storage files."
|
5
5
|
requires-python = ">=3.11"
|
6
|
-
dependencies = ["cocoindex[embeddings]>=0.1.
|
6
|
+
dependencies = ["cocoindex[embeddings]>=0.1.67", "python-dotenv>=1.0.1"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|
@@ -3,7 +3,7 @@ name = "code-embedding"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: build embedding index based on source code."
|
5
5
|
requires-python = ">=3.11"
|
6
|
-
dependencies = ["cocoindex[embeddings]>=0.1.
|
6
|
+
dependencies = ["cocoindex[embeddings]>=0.1.67", "python-dotenv>=1.0.1"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|
@@ -3,7 +3,7 @@ name = "manuals-to-kg"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: extract triples from files and build knowledge graph."
|
5
5
|
requires-python = ">=3.11"
|
6
|
-
dependencies = ["cocoindex>=0.1.
|
6
|
+
dependencies = ["cocoindex>=0.1.67"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|
@@ -3,7 +3,7 @@ name = "gdrive-text-embedding"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: build embedding index based on Google Drive files."
|
5
5
|
requires-python = ">=3.11"
|
6
|
-
dependencies = ["cocoindex[embeddings]>=0.1.
|
6
|
+
dependencies = ["cocoindex[embeddings]>=0.1.67", "python-dotenv>=1.0.1"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|