cocoindex 0.2.13__tar.gz → 0.2.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.2.13 → cocoindex-0.2.15}/Cargo.lock +1 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/Cargo.toml +1 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/PKG-INFO +1 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/THIRD_PARTY_NOTICES.html +1 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_def.mdx +1 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/simple_vector_index.md +10 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/__init__.py +12 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/functions.py +20 -2
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/index.py +22 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/lancedb.py +6 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/spec.rs +58 -1
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/lib_context.rs +7 -11
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/gemini.rs +30 -10
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/mod.rs +1 -0
- cocoindex-0.2.15/src/ops/functions/split_by_separators.rs +254 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/split_recursively.rs +4 -125
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/registration.rs +1 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/shared/mod.rs +1 -0
- cocoindex-0.2.15/src/ops/shared/split.rs +142 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/kuzu.rs +3 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/neo4j.rs +3 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/postgres.rs +34 -4
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/qdrant.rs +3 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/py/mod.rs +6 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.cargo/config.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.env.lib_debug +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/SECURITY.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/_docs_release.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/docs_release.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/docs_test.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/format.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/release.yml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/.pre-commit-config.yaml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/CONTRIBUTING.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/LICENSE +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/about.hbs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/about.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/dev/neo4j.yaml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/dev/postgres.yaml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/about/community.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/guide.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/new_built_in_target.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/setup_dev_environment.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/data_types.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/custom_ops/custom_functions.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/custom_ops/custom_targets.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/academic_papers_index.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/codebase_index.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/custom_targets.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/docs_to_knowledge_graph.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/document_ai.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/image_search.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/manual_extraction.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/multi_format_index.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/patient_form_extraction.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/photo_search.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/postgres_source.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/product_recommendation.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/index.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/quickstart.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/targets.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/query.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/tutorials/live_updates.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/package.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/sidebars.ts +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/components/GitHubButton/index.tsx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/css/custom.css +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCard/index.tsx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCard/styles.module.css +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCardList/index.tsx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCardList/styles.module.css +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/.nojekyll +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/abstract_chunks.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/basic_info.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/chunk_embedding.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/first_page.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/metadata.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/chunk.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/custom_targets/convert.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/custom_targets/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/dedupe.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/export_document.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/export_relationship.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/extract_relationship.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/relationship.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/summary.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/document_ai.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/processor.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/embedding.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/multi_modal_architecture.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/result.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/extraction.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/summary.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/colpali_architecture.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/embed.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/pages.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/extraction.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/fields.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/tomarkdown.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/extraction.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/collector.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/description.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/embed.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/lineage.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/price.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/source.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/dedupe.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_all.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_product.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_taxonomy.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/extract_product.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/extract_taxonomy.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/neo4j.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/parse_json.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/taxonomy.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/chunk.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/cover.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/embed.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/flow.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/robots.txt +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/tsconfig.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/yarn.lock +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/data/bizarre_animals.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/data/chunk_norris.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/einplanck3.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/colpali_main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/data/bizarre_animals.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/data/chunk_norris.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/2502.06786v3.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p101.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p86.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p9.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/restaurant_brands_international_2023.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/sweetgreen_2023.jpg +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/prepare_source_data.sql +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/.env.example +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/.gitignore +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/pyproject.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/convert.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/flow.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/lib.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/op.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/query_handler.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/subprocess_exec.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/__init__.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/_engine_builtin_specs.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_convert.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_load_convert.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_optional_database.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_transform_flow.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_typing.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_validation.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/typing.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/user_app_loader.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/validation.py +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/ruff.toml +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/duration.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/json_schema.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/schema.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/value.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/exec_ctx.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/plan.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/dumper.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/memoization.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/stats.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/lib.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/litellm.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/ollama.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/openai.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/openrouter.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/vllm.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/voyage.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/factory_bases.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/embed_text.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/test_utils.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/interface.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/registry.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sdk.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/shared/postgres.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/azure_blob.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/postgres.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/shared/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/shared/pattern_matcher.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/property_graph.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/table_columns.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/prelude.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/py/convert.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/server.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/error.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/flows.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/query_handler.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/settings.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/components.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/driver.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/flow_features.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/states.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/concur_control.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/db.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/deser.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/immutable.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/mod.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/retryable.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/str_sanitize.rs +0 -0
- {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/yaml_ser.rs +0 -0
@@ -2,7 +2,7 @@
|
|
2
2
|
name = "cocoindex"
|
3
3
|
# Version used for local development is always higher than others to take precedence.
|
4
4
|
# Will be overridden for specific release versions.
|
5
|
-
version = "0.2.
|
5
|
+
version = "0.2.15"
|
6
6
|
edition = "2024"
|
7
7
|
rust-version = "1.89"
|
8
8
|
license = "Apache-2.0"
|
@@ -2428,7 +2428,7 @@ Software.
|
|
2428
2428
|
<h3 id="Apache-2.0">Apache License 2.0</h3>
|
2429
2429
|
<h4>Used by:</h4>
|
2430
2430
|
<ul class="license-used-by">
|
2431
|
-
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.
|
2431
|
+
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.15</a></li>
|
2432
2432
|
<li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
|
2433
2433
|
<li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
|
2434
2434
|
</ul>
|
@@ -313,6 +313,7 @@ Types of the fields must be key types. See [Key Types](data_types#key-types) for
|
|
313
313
|
|
314
314
|
* `field_name`: the field to create vector index.
|
315
315
|
* `metric`: the similarity metric to use.
|
316
|
+
* `method` (optional): the index algorithm and optional tuning parameters. Leave unset to use the target default (HNSW for Postgres). Use `cocoindex.HnswVectorIndexMethod()` or `cocoindex.IvfFlatVectorIndexMethod()` to customize the method and its parameters.
|
316
317
|
|
317
318
|
#### Similarity Metrics
|
318
319
|
|
@@ -105,6 +105,16 @@ doc_embeddings.export(
|
|
105
105
|
CocoIndex supports other vector databases as well, with 1-line switch.
|
106
106
|
<DocumentationButton url="https://cocoindex.io/docs/ops/targets" text="Targets" />
|
107
107
|
|
108
|
+
Need IVFFlat or custom HNSW parameters? Pass a method, for example:
|
109
|
+
|
110
|
+
```python
|
111
|
+
cocoindex.VectorIndexDef(
|
112
|
+
field_name="embedding",
|
113
|
+
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
|
114
|
+
method=cocoindex.IvfFlatVectorIndexMethod(lists=200),
|
115
|
+
)
|
116
|
+
```
|
117
|
+
|
108
118
|
## Query the index
|
109
119
|
|
110
120
|
### Define a shared flow for both indexing and querying
|
@@ -2,6 +2,7 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
+
from . import _engine # type: ignore
|
5
6
|
from . import functions, sources, targets, cli, utils
|
6
7
|
|
7
8
|
from . import targets as storages # Deprecated: Use targets instead
|
@@ -21,7 +22,13 @@ from .flow import add_flow_def, remove_flow # DEPRECATED
|
|
21
22
|
from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
|
22
23
|
from .lib import settings, init, start_server, stop
|
23
24
|
from .llm import LlmSpec, LlmApiType
|
24
|
-
from .index import
|
25
|
+
from .index import (
|
26
|
+
VectorSimilarityMetric,
|
27
|
+
VectorIndexDef,
|
28
|
+
IndexOptions,
|
29
|
+
HnswVectorIndexMethod,
|
30
|
+
IvfFlatVectorIndexMethod,
|
31
|
+
)
|
25
32
|
from .setting import DatabaseConnectionSpec, Settings, ServerSettings
|
26
33
|
from .setting import get_app_namespace
|
27
34
|
from .query_handler import QueryHandlerResultFields, QueryInfo, QueryOutput
|
@@ -36,6 +43,8 @@ from .typing import (
|
|
36
43
|
Json,
|
37
44
|
)
|
38
45
|
|
46
|
+
_engine.init_pyo3_runtime()
|
47
|
+
|
39
48
|
__all__ = [
|
40
49
|
# Submodules
|
41
50
|
"_engine",
|
@@ -82,6 +91,8 @@ __all__ = [
|
|
82
91
|
"VectorSimilarityMetric",
|
83
92
|
"VectorIndexDef",
|
84
93
|
"IndexOptions",
|
94
|
+
"HnswVectorIndexMethod",
|
95
|
+
"IvfFlatVectorIndexMethod",
|
85
96
|
# Settings
|
86
97
|
"DatabaseConnectionSpec",
|
87
98
|
"Settings",
|
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
import dataclasses
|
4
4
|
import functools
|
5
|
-
from typing import
|
5
|
+
from typing import Any, Literal
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
10
|
from . import llm, op
|
11
|
-
from .typing import
|
11
|
+
from .typing import Vector
|
12
12
|
|
13
13
|
|
14
14
|
class ParseJson(op.FunctionSpec):
|
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
|
|
40
40
|
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
41
41
|
|
42
42
|
|
43
|
+
class SplitBySeparators(op.FunctionSpec):
|
44
|
+
"""
|
45
|
+
Split text by specified regex separators only.
|
46
|
+
Output schema matches SplitRecursively for drop-in compatibility:
|
47
|
+
KTable rows with fields: location (Range), text (Str), start, end.
|
48
|
+
Args:
|
49
|
+
separators_regex: list[str] # e.g., [r"\\n\\n+"]
|
50
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
51
|
+
include_empty: bool = False
|
52
|
+
trim: bool = True
|
53
|
+
"""
|
54
|
+
|
55
|
+
separators_regex: list[str] = dataclasses.field(default_factory=list)
|
56
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
57
|
+
include_empty: bool = False
|
58
|
+
trim: bool = True
|
59
|
+
|
60
|
+
|
43
61
|
class EmbedText(op.FunctionSpec):
|
44
62
|
"""Embed a text into a vector space."""
|
45
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
from dataclasses import dataclass
|
3
|
-
from typing import Sequence
|
3
|
+
from typing import Sequence, Union
|
4
4
|
|
5
5
|
|
6
6
|
class VectorSimilarityMetric(Enum):
|
@@ -9,6 +9,26 @@ class VectorSimilarityMetric(Enum):
|
|
9
9
|
INNER_PRODUCT = "InnerProduct"
|
10
10
|
|
11
11
|
|
12
|
+
@dataclass
|
13
|
+
class HnswVectorIndexMethod:
|
14
|
+
"""HNSW vector index parameters."""
|
15
|
+
|
16
|
+
kind: str = "Hnsw"
|
17
|
+
m: int | None = None
|
18
|
+
ef_construction: int | None = None
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class IvfFlatVectorIndexMethod:
|
23
|
+
"""IVFFlat vector index parameters."""
|
24
|
+
|
25
|
+
kind: str = "IvfFlat"
|
26
|
+
lists: int | None = None
|
27
|
+
|
28
|
+
|
29
|
+
VectorIndexMethod = Union[HnswVectorIndexMethod, IvfFlatVectorIndexMethod]
|
30
|
+
|
31
|
+
|
12
32
|
@dataclass
|
13
33
|
class VectorIndexDef:
|
14
34
|
"""
|
@@ -17,6 +37,7 @@ class VectorIndexDef:
|
|
17
37
|
|
18
38
|
field_name: str
|
19
39
|
metric: VectorSimilarityMetric
|
40
|
+
method: VectorIndexMethod | None = None
|
20
41
|
|
21
42
|
|
22
43
|
@dataclass
|
@@ -296,6 +296,12 @@ class _Connector:
|
|
296
296
|
) -> _State:
|
297
297
|
if len(key_fields_schema) != 1:
|
298
298
|
raise ValueError("LanceDB only supports a single key field")
|
299
|
+
if index_options.vector_indexes is not None:
|
300
|
+
for vector_index in index_options.vector_indexes:
|
301
|
+
if vector_index.method is not None:
|
302
|
+
raise ValueError(
|
303
|
+
"Vector index method is not configurable for LanceDB yet"
|
304
|
+
)
|
299
305
|
return _State(
|
300
306
|
key_field_schema=key_fields_schema[0],
|
301
307
|
value_fields_schema=value_fields_schema,
|
@@ -384,15 +384,72 @@ impl fmt::Display for VectorSimilarityMetric {
|
|
384
384
|
}
|
385
385
|
}
|
386
386
|
|
387
|
+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
388
|
+
#[serde(tag = "kind")]
|
389
|
+
pub enum VectorIndexMethod {
|
390
|
+
Hnsw {
|
391
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
392
|
+
m: Option<u32>,
|
393
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
394
|
+
ef_construction: Option<u32>,
|
395
|
+
},
|
396
|
+
IvfFlat {
|
397
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
398
|
+
lists: Option<u32>,
|
399
|
+
},
|
400
|
+
}
|
401
|
+
|
402
|
+
impl VectorIndexMethod {
|
403
|
+
pub fn kind(&self) -> &'static str {
|
404
|
+
match self {
|
405
|
+
Self::Hnsw { .. } => "Hnsw",
|
406
|
+
Self::IvfFlat { .. } => "IvfFlat",
|
407
|
+
}
|
408
|
+
}
|
409
|
+
}
|
410
|
+
|
411
|
+
impl fmt::Display for VectorIndexMethod {
|
412
|
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
413
|
+
match self {
|
414
|
+
Self::Hnsw { m, ef_construction } => {
|
415
|
+
let mut parts = Vec::new();
|
416
|
+
if let Some(m) = m {
|
417
|
+
parts.push(format!("m={}", m));
|
418
|
+
}
|
419
|
+
if let Some(ef) = ef_construction {
|
420
|
+
parts.push(format!("ef_construction={}", ef));
|
421
|
+
}
|
422
|
+
if parts.is_empty() {
|
423
|
+
write!(f, "Hnsw")
|
424
|
+
} else {
|
425
|
+
write!(f, "Hnsw({})", parts.join(","))
|
426
|
+
}
|
427
|
+
}
|
428
|
+
Self::IvfFlat { lists } => {
|
429
|
+
if let Some(lists) = lists {
|
430
|
+
write!(f, "IvfFlat(lists={lists})")
|
431
|
+
} else {
|
432
|
+
write!(f, "IvfFlat")
|
433
|
+
}
|
434
|
+
}
|
435
|
+
}
|
436
|
+
}
|
437
|
+
}
|
438
|
+
|
387
439
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
388
440
|
pub struct VectorIndexDef {
|
389
441
|
pub field_name: FieldName,
|
390
442
|
pub metric: VectorSimilarityMetric,
|
443
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
444
|
+
pub method: Option<VectorIndexMethod>,
|
391
445
|
}
|
392
446
|
|
393
447
|
impl fmt::Display for VectorIndexDef {
|
394
448
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
395
|
-
|
449
|
+
match &self.method {
|
450
|
+
None => write!(f, "{}:{}", self.field_name, self.metric),
|
451
|
+
Some(method) => write!(f, "{}:{}:{}", self.field_name, self.metric, method),
|
452
|
+
}
|
396
453
|
}
|
397
454
|
}
|
398
455
|
|
@@ -162,6 +162,13 @@ impl FlowContext {
|
|
162
162
|
static TOKIO_RUNTIME: LazyLock<Runtime> = LazyLock::new(|| Runtime::new().unwrap());
|
163
163
|
static AUTH_REGISTRY: LazyLock<Arc<AuthRegistry>> = LazyLock::new(|| Arc::new(AuthRegistry::new()));
|
164
164
|
|
165
|
+
pub fn get_runtime() -> &'static Runtime {
|
166
|
+
&TOKIO_RUNTIME
|
167
|
+
}
|
168
|
+
pub fn get_auth_registry() -> &'static Arc<AuthRegistry> {
|
169
|
+
&AUTH_REGISTRY
|
170
|
+
}
|
171
|
+
|
165
172
|
type PoolKey = (String, Option<String>);
|
166
173
|
type PoolValue = Arc<tokio::sync::OnceCell<PgPool>>;
|
167
174
|
|
@@ -271,21 +278,10 @@ impl LibContext {
|
|
271
278
|
}
|
272
279
|
}
|
273
280
|
|
274
|
-
pub fn get_runtime() -> &'static Runtime {
|
275
|
-
&TOKIO_RUNTIME
|
276
|
-
}
|
277
|
-
|
278
|
-
pub fn get_auth_registry() -> &'static Arc<AuthRegistry> {
|
279
|
-
&AUTH_REGISTRY
|
280
|
-
}
|
281
|
-
|
282
281
|
static LIB_INIT: OnceLock<()> = OnceLock::new();
|
283
282
|
pub async fn create_lib_context(settings: settings::Settings) -> Result<LibContext> {
|
284
283
|
LIB_INIT.get_or_init(|| {
|
285
284
|
let _ = env_logger::try_init();
|
286
|
-
|
287
|
-
pyo3_async_runtimes::tokio::init_with_runtime(get_runtime()).unwrap();
|
288
|
-
|
289
285
|
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
290
286
|
});
|
291
287
|
|
@@ -74,6 +74,30 @@ impl AiStudioClient {
|
|
74
74
|
}
|
75
75
|
}
|
76
76
|
|
77
|
+
fn build_embed_payload(
|
78
|
+
model: &str,
|
79
|
+
text: &str,
|
80
|
+
task_type: Option<&str>,
|
81
|
+
output_dimension: Option<u32>,
|
82
|
+
) -> serde_json::Value {
|
83
|
+
let mut payload = serde_json::json!({
|
84
|
+
"model": model,
|
85
|
+
"content": { "parts": [{ "text": text }] },
|
86
|
+
});
|
87
|
+
if let Some(task_type) = task_type {
|
88
|
+
payload["taskType"] = serde_json::Value::String(task_type.to_string());
|
89
|
+
}
|
90
|
+
if let Some(output_dimension) = output_dimension {
|
91
|
+
payload["outputDimensionality"] = serde_json::json!(output_dimension);
|
92
|
+
if model.starts_with("gemini-embedding-") {
|
93
|
+
payload["config"] = serde_json::json!({
|
94
|
+
"outputDimensionality": output_dimension,
|
95
|
+
});
|
96
|
+
}
|
97
|
+
}
|
98
|
+
payload
|
99
|
+
}
|
100
|
+
|
77
101
|
#[async_trait]
|
78
102
|
impl LlmGenerationClient for AiStudioClient {
|
79
103
|
async fn generate<'req>(
|
@@ -174,16 +198,12 @@ impl LlmEmbeddingClient for AiStudioClient {
|
|
174
198
|
request: super::LlmEmbeddingRequest<'req>,
|
175
199
|
) -> Result<super::LlmEmbeddingResponse> {
|
176
200
|
let url = self.get_api_url(request.model, "embedContent");
|
177
|
-
let
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
}
|
184
|
-
if let Some(output_dimension) = request.output_dimension {
|
185
|
-
payload["outputDimensionality"] = serde_json::Value::Number(output_dimension.into());
|
186
|
-
}
|
201
|
+
let payload = build_embed_payload(
|
202
|
+
request.model,
|
203
|
+
request.text.as_ref(),
|
204
|
+
request.task_type.as_deref(),
|
205
|
+
request.output_dimension,
|
206
|
+
);
|
187
207
|
let resp = retryable::run(
|
188
208
|
|| async {
|
189
209
|
self.client
|
@@ -0,0 +1,254 @@
|
|
1
|
+
use anyhow::{Context, Result};
|
2
|
+
use regex::Regex;
|
3
|
+
use std::sync::Arc;
|
4
|
+
|
5
|
+
use crate::ops::registry::ExecutorFactoryRegistry;
|
6
|
+
use crate::ops::shared::split::{Position, make_common_chunk_schema, set_output_positions};
|
7
|
+
use crate::{fields_value, ops::sdk::*};
|
8
|
+
|
9
|
+
#[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
10
|
+
#[serde(rename_all = "UPPERCASE")]
|
11
|
+
enum KeepSep {
|
12
|
+
Left,
|
13
|
+
Right,
|
14
|
+
}
|
15
|
+
|
16
|
+
#[derive(Serialize, Deserialize)]
|
17
|
+
struct Spec {
|
18
|
+
// Python SDK provides defaults/values.
|
19
|
+
separators_regex: Vec<String>,
|
20
|
+
keep_separator: Option<KeepSep>,
|
21
|
+
include_empty: bool,
|
22
|
+
trim: bool,
|
23
|
+
}
|
24
|
+
|
25
|
+
struct Args {
|
26
|
+
text: ResolvedOpArg,
|
27
|
+
}
|
28
|
+
|
29
|
+
struct Executor {
|
30
|
+
spec: Spec,
|
31
|
+
regex: Option<Regex>,
|
32
|
+
args: Args,
|
33
|
+
}
|
34
|
+
|
35
|
+
impl Executor {
|
36
|
+
fn new(args: Args, spec: Spec) -> Result<Self> {
|
37
|
+
let regex = if spec.separators_regex.is_empty() {
|
38
|
+
None
|
39
|
+
} else {
|
40
|
+
// OR-join all separators, multiline
|
41
|
+
let pattern = format!(
|
42
|
+
"(?m){}",
|
43
|
+
spec.separators_regex
|
44
|
+
.iter()
|
45
|
+
.map(|s| format!("(?:{s})"))
|
46
|
+
.collect::<Vec<_>>()
|
47
|
+
.join("|")
|
48
|
+
);
|
49
|
+
Some(Regex::new(&pattern).context("failed to compile separators_regex")?)
|
50
|
+
};
|
51
|
+
Ok(Self { args, spec, regex })
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
struct ChunkOutput<'s> {
|
56
|
+
start_pos: Position,
|
57
|
+
end_pos: Position,
|
58
|
+
text: &'s str,
|
59
|
+
}
|
60
|
+
|
61
|
+
#[async_trait]
|
62
|
+
impl SimpleFunctionExecutor for Executor {
|
63
|
+
async fn evaluate(&self, input: Vec<Value>) -> Result<Value> {
|
64
|
+
let full_text = self.args.text.value(&input)?.as_str()?;
|
65
|
+
let bytes = full_text.as_bytes();
|
66
|
+
|
67
|
+
// add_range applies trim/include_empty and records the text slice
|
68
|
+
let mut chunks: Vec<ChunkOutput<'_>> = Vec::new();
|
69
|
+
let mut add_range = |mut s: usize, mut e: usize| {
|
70
|
+
if self.spec.trim {
|
71
|
+
while s < e && bytes[s].is_ascii_whitespace() {
|
72
|
+
s += 1;
|
73
|
+
}
|
74
|
+
while e > s && bytes[e - 1].is_ascii_whitespace() {
|
75
|
+
e -= 1;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
if self.spec.include_empty || e > s {
|
79
|
+
chunks.push(ChunkOutput {
|
80
|
+
start_pos: Position::new(s),
|
81
|
+
end_pos: Position::new(e),
|
82
|
+
text: &full_text[s..e],
|
83
|
+
});
|
84
|
+
}
|
85
|
+
};
|
86
|
+
|
87
|
+
if let Some(re) = &self.regex {
|
88
|
+
let mut start = 0usize;
|
89
|
+
for m in re.find_iter(full_text) {
|
90
|
+
let end = match self.spec.keep_separator {
|
91
|
+
Some(KeepSep::Left) => m.end(),
|
92
|
+
Some(KeepSep::Right) | None => m.start(),
|
93
|
+
};
|
94
|
+
add_range(start, end);
|
95
|
+
start = match self.spec.keep_separator {
|
96
|
+
Some(KeepSep::Right) => m.start(),
|
97
|
+
_ => m.end(),
|
98
|
+
};
|
99
|
+
}
|
100
|
+
add_range(start, full_text.len());
|
101
|
+
} else {
|
102
|
+
// No separators: emit whole text
|
103
|
+
add_range(0, full_text.len());
|
104
|
+
}
|
105
|
+
|
106
|
+
set_output_positions(
|
107
|
+
full_text,
|
108
|
+
chunks.iter_mut().flat_map(|c| {
|
109
|
+
std::iter::once(&mut c.start_pos).chain(std::iter::once(&mut c.end_pos))
|
110
|
+
}),
|
111
|
+
);
|
112
|
+
|
113
|
+
let table = chunks
|
114
|
+
.into_iter()
|
115
|
+
.map(|c| {
|
116
|
+
let s = c.start_pos.output.unwrap();
|
117
|
+
let e = c.end_pos.output.unwrap();
|
118
|
+
(
|
119
|
+
KeyValue::from_single_part(RangeValue::new(s.char_offset, e.char_offset)),
|
120
|
+
fields_value!(Arc::<str>::from(c.text), s.into_output(), e.into_output())
|
121
|
+
.into(),
|
122
|
+
)
|
123
|
+
})
|
124
|
+
.collect();
|
125
|
+
|
126
|
+
Ok(Value::KTable(table))
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
struct Factory;
|
131
|
+
|
132
|
+
#[async_trait]
|
133
|
+
impl SimpleFunctionFactoryBase for Factory {
|
134
|
+
type Spec = Spec;
|
135
|
+
type ResolvedArgs = Args;
|
136
|
+
|
137
|
+
fn name(&self) -> &str {
|
138
|
+
"SplitBySeparators"
|
139
|
+
}
|
140
|
+
|
141
|
+
async fn resolve_schema<'a>(
|
142
|
+
&'a self,
|
143
|
+
_spec: &'a Spec,
|
144
|
+
args_resolver: &mut OpArgsResolver<'a>,
|
145
|
+
_context: &FlowInstanceContext,
|
146
|
+
) -> Result<(Args, EnrichedValueType)> {
|
147
|
+
// one required arg: text: Str
|
148
|
+
let args = Args {
|
149
|
+
text: args_resolver
|
150
|
+
.next_arg("text")?
|
151
|
+
.expect_type(&ValueType::Basic(BasicValueType::Str))?
|
152
|
+
.required()?,
|
153
|
+
};
|
154
|
+
|
155
|
+
let output_schema = make_common_chunk_schema(args_resolver, &args.text)?;
|
156
|
+
Ok((args, output_schema))
|
157
|
+
}
|
158
|
+
|
159
|
+
async fn build_executor(
|
160
|
+
self: Arc<Self>,
|
161
|
+
spec: Spec,
|
162
|
+
args: Args,
|
163
|
+
_context: Arc<FlowInstanceContext>,
|
164
|
+
) -> Result<impl SimpleFunctionExecutor> {
|
165
|
+
Executor::new(args, spec)
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
pub fn register(registry: &mut ExecutorFactoryRegistry) -> Result<()> {
|
170
|
+
Factory.register(registry)
|
171
|
+
}
|
172
|
+
|
173
|
+
#[cfg(test)]
|
174
|
+
mod tests {
|
175
|
+
use super::*;
|
176
|
+
use crate::ops::functions::test_utils::test_flow_function;
|
177
|
+
|
178
|
+
#[tokio::test]
|
179
|
+
async fn test_split_by_separators_paragraphs() {
|
180
|
+
let spec = Spec {
|
181
|
+
separators_regex: vec![r"\n\n+".to_string()],
|
182
|
+
keep_separator: None,
|
183
|
+
include_empty: false,
|
184
|
+
trim: true,
|
185
|
+
};
|
186
|
+
let factory = Arc::new(Factory);
|
187
|
+
let text = "Para1\n\nPara2\n\n\nPara3";
|
188
|
+
|
189
|
+
let input_arg_schemas = &[(
|
190
|
+
Some("text"),
|
191
|
+
make_output_type(BasicValueType::Str).with_nullable(true),
|
192
|
+
)];
|
193
|
+
|
194
|
+
let result = test_flow_function(
|
195
|
+
&factory,
|
196
|
+
&spec,
|
197
|
+
input_arg_schemas,
|
198
|
+
vec![text.to_string().into()],
|
199
|
+
)
|
200
|
+
.await
|
201
|
+
.unwrap();
|
202
|
+
|
203
|
+
match result {
|
204
|
+
Value::KTable(table) => {
|
205
|
+
// Expected ranges after trimming whitespace:
|
206
|
+
let expected = vec![
|
207
|
+
(RangeValue::new(0, 5), "Para1"),
|
208
|
+
(RangeValue::new(7, 12), "Para2"),
|
209
|
+
(RangeValue::new(15, 20), "Para3"),
|
210
|
+
];
|
211
|
+
for (range, expected_text) in expected {
|
212
|
+
let key = KeyValue::from_single_part(range);
|
213
|
+
let row = table.get(&key).unwrap();
|
214
|
+
let chunk_text = row.0.fields[0].as_str().unwrap();
|
215
|
+
assert_eq!(**chunk_text, *expected_text);
|
216
|
+
}
|
217
|
+
}
|
218
|
+
other => panic!("Expected KTable, got {other:?}"),
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
#[tokio::test]
|
223
|
+
async fn test_split_by_separators_keep_right() {
|
224
|
+
let spec = Spec {
|
225
|
+
separators_regex: vec![r"\.".to_string()],
|
226
|
+
keep_separator: Some(KeepSep::Right),
|
227
|
+
include_empty: false,
|
228
|
+
trim: true,
|
229
|
+
};
|
230
|
+
let factory = Arc::new(Factory);
|
231
|
+
let text = "A. B. C.";
|
232
|
+
|
233
|
+
let input_arg_schemas = &[(
|
234
|
+
Some("text"),
|
235
|
+
make_output_type(BasicValueType::Str).with_nullable(true),
|
236
|
+
)];
|
237
|
+
|
238
|
+
let result = test_flow_function(
|
239
|
+
&factory,
|
240
|
+
&spec,
|
241
|
+
input_arg_schemas,
|
242
|
+
vec![text.to_string().into()],
|
243
|
+
)
|
244
|
+
.await
|
245
|
+
.unwrap();
|
246
|
+
|
247
|
+
match result {
|
248
|
+
Value::KTable(table) => {
|
249
|
+
assert!(table.len() >= 3);
|
250
|
+
}
|
251
|
+
_ => panic!("KTable expected"),
|
252
|
+
}
|
253
|
+
}
|
254
|
+
}
|