cocoindex 0.1.41__tar.gz → 0.1.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.41 → cocoindex-0.1.42}/Cargo.lock +1 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/Cargo.toml +1 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/PKG-INFO +2 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/data_types.mdx +1 -0
- cocoindex-0.1.42/examples/code_embedding/README.md +71 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/code_embedding/main.py +27 -12
- cocoindex-0.1.42/examples/text_embedding/README.md +63 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/main.py +11 -17
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/pyproject.toml +1 -1
- cocoindex-0.1.42/examples/text_embedding_qdrant/README.md +87 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding_qdrant/main.py +28 -19
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding_qdrant/pyproject.toml +1 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/pyproject.toml +4 -1
- cocoindex-0.1.42/python/cocoindex/cli.py +437 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/flow.py +2 -2
- cocoindex-0.1.42/python/cocoindex/lib.py +71 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/typing.py +2 -0
- cocoindex-0.1.42/src/base/duration.rs +674 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/json_schema.rs +11 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/mod.rs +1 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/schema.rs +4 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/value.rs +16 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/query.rs +2 -1
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/storages/neo4j.rs +14 -4
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/storages/postgres.rs +12 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/storages/qdrant.rs +9 -2
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/py/convert.rs +6 -2
- cocoindex-0.1.41/examples/code_embedding/README.md +0 -52
- cocoindex-0.1.41/examples/text_embedding/README.md +0 -46
- cocoindex-0.1.41/examples/text_embedding_qdrant/README.md +0 -69
- cocoindex-0.1.41/python/cocoindex/cli.py +0 -238
- cocoindex-0.1.41/python/cocoindex/lib.py +0 -78
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.cargo/config.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.env.lib_debug +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.gitignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/.vscode/settings.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/LICENSE +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/.gitignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/flow_def.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/core/initialization.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/getting_started/quickstart.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/ops/storages.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docs/query.mdx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/package.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/docs/yarn.lock +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/sample_code/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/fastapi_server_docker/src/cocoindex_funs.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/.gitignore +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/index.html +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/package-lock.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/package.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/src/style.css +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/frontend/vite.config.js +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/img/cat1.jpeg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/img/dog1.jpeg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/img/elephant1.jpg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/img/giraffe.jpg +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/image_search_example/requirements.txt +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/__init__.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/convert.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/functions.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/op.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/query.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/storages.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/tests/test_convert.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/base/spec.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/builder/plan.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/lib.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/lib_context.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/factory_bases.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/interface.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/registration.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/storages/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/ops/storages/spec.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/prelude.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/py/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/server.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/service/error.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/service/flows.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/service/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/service/search.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/settings.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/components.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/setup/states.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/db.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.41 → cocoindex-0.1.42}/src/utils/yaml_ser.rs +0 -0
@@ -1,9 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.42
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
7
|
+
Requires-Dist: python-dotenv>=1.1.0
|
7
8
|
Requires-Dist: pytest ; extra == 'test'
|
8
9
|
Provides-Extra: test
|
9
10
|
License-File: LICENSE
|
@@ -35,6 +35,7 @@ This is the list of all basic types supported by CocoIndex:
|
|
35
35
|
| Time | | `datetime.time` | `datetime.time` |
|
36
36
|
| LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
|
37
37
|
| OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
|
38
|
+
| TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
|
38
39
|
| Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
|
39
40
|
| Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
|
40
41
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Build real-time index for codebase
|
2
|
+
[](https://github.com/cocoindex-io/cocoindex)
|
3
|
+
|
4
|
+
CocoIndex provides built-in support for code base chunking, using Tree-sitter to keep syntax boundary. In this example, we will build real-time index for codebase using CocoIndex.
|
5
|
+
|
6
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
7
|
+
|
8
|
+

|
9
|
+
|
10
|
+
[Tree-sitter](https://en.wikipedia.org/wiki/Tree-sitter_%28parser_generator%29) is a parser generator tool and an incremental parsing library. It is available in Rust 🦀 - [GitHub](https://github.com/tree-sitter/tree-sitter). CocoIndex has built-in Rust integration with Tree-sitter to efficiently parse code and extract syntax trees for various programming languages. Check out the list of supported languages [here](https://cocoindex.io/docs/ops/functions#splitrecursively) - in the `language` section.
|
11
|
+
|
12
|
+
|
13
|
+
## Tutorials
|
14
|
+
- Step by step tutorial - Check out the [blog](https://cocoindex.io/blogs/index-code-base-for-rag).
|
15
|
+
- Video tutorial - [Youtube](https://youtu.be/G3WstvhHO24?si=Bnxu67Ax5Lv8b-J2).
|
16
|
+
|
17
|
+
## Steps
|
18
|
+
|
19
|
+
### Indexing Flow
|
20
|
+
<p align='center'>
|
21
|
+
<img width="434" alt="Screenshot 2025-05-19 at 10 14 36 PM" src="https://github.com/user-attachments/assets/3a506034-698f-480a-b653-22184dae4e14" />
|
22
|
+
</p>
|
23
|
+
|
24
|
+
1. We will ingest CocoIndex codebase.
|
25
|
+
2. For each file, perform chunking (Tree-sitter) and then embedding.
|
26
|
+
3. We will save the embeddings and the metadata in Postgres with PGVector.
|
27
|
+
|
28
|
+
### Query:
|
29
|
+
We will match against user-provided text by a SQL query, reusing the embedding operation in the indexing flow.
|
30
|
+
|
31
|
+
|
32
|
+
## Prerequisite
|
33
|
+
[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
|
34
|
+
|
35
|
+
## Run
|
36
|
+
|
37
|
+
- Install dependencies:
|
38
|
+
```bash
|
39
|
+
pip install -e .
|
40
|
+
```
|
41
|
+
|
42
|
+
- Setup:
|
43
|
+
|
44
|
+
```bash
|
45
|
+
python main.py cocoindex setup
|
46
|
+
```
|
47
|
+
|
48
|
+
- Update index:
|
49
|
+
|
50
|
+
```bash
|
51
|
+
python main.py cocoindex update
|
52
|
+
```
|
53
|
+
|
54
|
+
- Run:
|
55
|
+
|
56
|
+
```bash
|
57
|
+
python main.py
|
58
|
+
```
|
59
|
+
|
60
|
+
## CocoInsight
|
61
|
+
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline.
|
62
|
+
It just connects to your local CocoIndex server, with Zero pipeline data retention. Run the following command to start CocoInsight:
|
63
|
+
|
64
|
+
```
|
65
|
+
python main.py cocoindex server -ci
|
66
|
+
```
|
67
|
+
|
68
|
+
Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
|
69
|
+
|
70
|
+
<img width="1305" alt="Chunking Visualization" src="https://github.com/user-attachments/assets/8e83b9a4-2bed-456b-83e5-b5381b28b84a" />
|
71
|
+
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from dotenv import load_dotenv
|
2
|
-
|
2
|
+
from psycopg_pool import ConnectionPool
|
3
3
|
import cocoindex
|
4
4
|
import os
|
5
5
|
|
@@ -8,7 +8,8 @@ def extract_extension(filename: str) -> str:
|
|
8
8
|
"""Extract the extension of a filename."""
|
9
9
|
return os.path.splitext(filename)[1]
|
10
10
|
|
11
|
-
|
11
|
+
@cocoindex.transform_flow()
|
12
|
+
def code_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
|
12
13
|
"""
|
13
14
|
Embed the text using a SentenceTransformer model.
|
14
15
|
"""
|
@@ -24,7 +25,7 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
24
25
|
data_scope["files"] = flow_builder.add_source(
|
25
26
|
cocoindex.sources.LocalFile(path="../..",
|
26
27
|
included_patterns=["*.py", "*.rs", "*.toml", "*.md", "*.mdx"],
|
27
|
-
excluded_patterns=["
|
28
|
+
excluded_patterns=["**/.*", "target", "**/node_modules"]))
|
28
29
|
code_embeddings = data_scope.add_collector()
|
29
30
|
|
30
31
|
with data_scope["files"].row() as file:
|
@@ -47,26 +48,40 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
47
48
|
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
|
48
49
|
|
49
50
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
|
52
|
+
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
53
|
+
# Get the table name, for the export target in the code_embedding_flow above.
|
54
|
+
table_name = cocoindex.utils.get_target_storage_default_name(code_embedding_flow, "code_embeddings")
|
55
|
+
# Evaluate the transform flow defined above with the input query, to get the embedding.
|
56
|
+
query_vector = code_to_embedding.eval(query)
|
57
|
+
# Run the query and get the results.
|
58
|
+
with pool.connection() as conn:
|
59
|
+
with conn.cursor() as cur:
|
60
|
+
cur.execute(f"""
|
61
|
+
SELECT filename, code, embedding <=> %s::vector AS distance
|
62
|
+
FROM {table_name} ORDER BY distance LIMIT %s
|
63
|
+
""", (query_vector, top_k))
|
64
|
+
return [
|
65
|
+
{"filename": row[0], "code": row[1], "score": 1.0 - row[2]}
|
66
|
+
for row in cur.fetchall()
|
67
|
+
]
|
56
68
|
|
57
69
|
@cocoindex.main_fn()
|
58
70
|
def _run():
|
71
|
+
# Initialize the database connection pool.
|
72
|
+
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
|
59
73
|
# Run queries in a loop to demonstrate the query capabilities.
|
60
74
|
while True:
|
61
75
|
try:
|
62
76
|
query = input("Enter search query (or Enter to quit): ")
|
63
77
|
if query == '':
|
64
78
|
break
|
65
|
-
|
79
|
+
# Run the query function with the database connection pool and the query.
|
80
|
+
results = search(pool, query)
|
66
81
|
print("\nSearch results:")
|
67
82
|
for result in results:
|
68
|
-
print(f"[{result
|
69
|
-
print(f" {result
|
83
|
+
print(f"[{result['score']:.3f}] {result['filename']}")
|
84
|
+
print(f" {result['code']}")
|
70
85
|
print("---")
|
71
86
|
print()
|
72
87
|
except KeyboardInterrupt:
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Build text embedding and semantic search 🔍
|
2
|
+
[](https://colab.research.google.com/github/cocoindex-io/cocoindex/blob/main/examples/text_embedding/Text_Embedding.ipynb)
|
3
|
+
[](https://github.com/cocoindex-io/cocoindex)
|
4
|
+
|
5
|
+
In this example, we will build index flow from text embedding from local markdown files, and query the index.
|
6
|
+
|
7
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
8
|
+
|
9
|
+
## Steps
|
10
|
+
🌱 A detailed step by step tutorial can be found here: [Get Started Documentation](https://cocoindex.io/docs/getting_started/quickstart)
|
11
|
+
|
12
|
+
### Indexing Flow
|
13
|
+
<img width="461" alt="Screenshot 2025-05-19 at 5 48 28 PM" src="https://github.com/user-attachments/assets/b6825302-a0c7-4b86-9a2d-52da8286b4bd" />
|
14
|
+
|
15
|
+
1. We will ingest a list of local files.
|
16
|
+
2. For each file, perform chunking (recursively split) and then embedding.
|
17
|
+
3. We will save the embeddings and the metadata in Postgres with PGVector.
|
18
|
+
|
19
|
+
### Query
|
20
|
+
We will match against user-provided text by a SQL query, and reuse the embedding operation in the indexing flow.
|
21
|
+
|
22
|
+
|
23
|
+
## Prerequisite
|
24
|
+
|
25
|
+
[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
|
26
|
+
|
27
|
+
## Run
|
28
|
+
|
29
|
+
Install dependencies:
|
30
|
+
|
31
|
+
```bash
|
32
|
+
pip install -e .
|
33
|
+
```
|
34
|
+
|
35
|
+
Setup:
|
36
|
+
|
37
|
+
```bash
|
38
|
+
python main.py cocoindex setup
|
39
|
+
```
|
40
|
+
|
41
|
+
Update index:
|
42
|
+
|
43
|
+
```bash
|
44
|
+
python main.py cocoindex update
|
45
|
+
```
|
46
|
+
|
47
|
+
Run:
|
48
|
+
|
49
|
+
```bash
|
50
|
+
python main.py
|
51
|
+
```
|
52
|
+
|
53
|
+
## CocoInsight
|
54
|
+
|
55
|
+
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline.
|
56
|
+
It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight:
|
57
|
+
|
58
|
+
```
|
59
|
+
python main.py cocoindex server -ci
|
60
|
+
```
|
61
|
+
|
62
|
+
Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
|
63
|
+
|
@@ -1,8 +1,7 @@
|
|
1
|
-
import os
|
2
1
|
from dotenv import load_dotenv
|
3
2
|
from psycopg_pool import ConnectionPool
|
4
|
-
|
5
3
|
import cocoindex
|
4
|
+
import os
|
6
5
|
|
7
6
|
@cocoindex.transform_flow()
|
8
7
|
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
|
@@ -20,7 +19,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
20
19
|
Define an example flow that embeds text into a vector database.
|
21
20
|
"""
|
22
21
|
data_scope["documents"] = flow_builder.add_source(
|
23
|
-
cocoindex.sources.LocalFile(path="markdown_files"
|
22
|
+
cocoindex.sources.LocalFile(path="markdown_files"))
|
24
23
|
|
25
24
|
doc_embeddings = data_scope.add_collector()
|
26
25
|
|
@@ -43,33 +42,27 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
43
42
|
field_name="embedding",
|
44
43
|
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
|
45
44
|
|
46
|
-
# Keep for now to allow CocoInsight to query.
|
47
|
-
# Will be removed later after we expose `search()` below as a query function (https://github.com/cocoindex-io/cocoindex/issues/502).
|
48
|
-
cocoindex.query.SimpleSemanticsQueryHandler(
|
49
|
-
name="SemanticsSearch",
|
50
|
-
flow=text_embedding_flow,
|
51
|
-
target_name="doc_embeddings",
|
52
|
-
query_transform_flow=text_to_embedding,
|
53
|
-
default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)
|
54
45
|
|
55
46
|
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
47
|
+
# Get the table name, for the export target in the text_embedding_flow above.
|
56
48
|
table_name = cocoindex.utils.get_target_storage_default_name(text_embedding_flow, "doc_embeddings")
|
49
|
+
# Evaluate the transform flow defined above with the input query, to get the embedding.
|
57
50
|
query_vector = text_to_embedding.eval(query)
|
51
|
+
# Run the query and get the results.
|
58
52
|
with pool.connection() as conn:
|
59
53
|
with conn.cursor() as cur:
|
60
54
|
cur.execute(f"""
|
61
|
-
SELECT filename,
|
62
|
-
FROM {table_name}
|
63
|
-
ORDER BY distance
|
64
|
-
LIMIT %s
|
55
|
+
SELECT filename, text, embedding <=> %s::vector AS distance
|
56
|
+
FROM {table_name} ORDER BY distance LIMIT %s
|
65
57
|
""", (query_vector, top_k))
|
66
58
|
return [
|
67
|
-
{"filename": row[0], "
|
59
|
+
{"filename": row[0], "text": row[1], "score": 1.0 - row[2]}
|
68
60
|
for row in cur.fetchall()
|
69
61
|
]
|
70
62
|
|
71
63
|
@cocoindex.main_fn()
|
72
64
|
def _run():
|
65
|
+
# Initialize the database connection pool.
|
73
66
|
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
|
74
67
|
# Run queries in a loop to demonstrate the query capabilities.
|
75
68
|
while True:
|
@@ -77,10 +70,11 @@ def _run():
|
|
77
70
|
query = input("Enter search query (or Enter to quit): ")
|
78
71
|
if query == '':
|
79
72
|
break
|
73
|
+
# Run the query function with the database connection pool and the query.
|
80
74
|
results = search(pool, query)
|
81
75
|
print("\nSearch results:")
|
82
76
|
for result in results:
|
83
|
-
print(f"[{result['score']:.3f}] {result['filename']}
|
77
|
+
print(f"[{result['score']:.3f}] {result['filename']}")
|
84
78
|
print(f" {result['text']}")
|
85
79
|
print("---")
|
86
80
|
print()
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# Build text embedding and semantic search 🔍 with Qdrant
|
2
|
+
|
3
|
+
[](https://github.com/cocoindex-io/cocoindex)
|
4
|
+
|
5
|
+
CocoIndex supports Qdrant natively - [documentation](https://cocoindex.io/docs/ops/storages#qdrant). In this example, we will build index flow from text embedding from local markdown files, and query the index. We will use **Qdrant** as the vector database.
|
6
|
+
|
7
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
8
|
+
|
9
|
+
<img width="860" alt="CocoIndex supports Qdrant" src="https://github.com/user-attachments/assets/a9deecfa-dd94-4b97-a1b1-90488d8178df" />
|
10
|
+
|
11
|
+
## Steps
|
12
|
+
### Indexing Flow
|
13
|
+
<img width="480" alt="Index flow for text embedding" src="https://github.com/user-attachments/assets/44d47b5e-b49b-4f05-9a00-dcb8027602a1" />
|
14
|
+
|
15
|
+
1. We will ingest a list of local files.
|
16
|
+
2. For each file, perform chunking (recursively split) and then embedding.
|
17
|
+
3. We will save the embeddings and the metadata in Postgres with PGVector.
|
18
|
+
|
19
|
+
### Query
|
20
|
+
We use Qdrant client to query the index, and reuse the embedding operation in the indexing flow.
|
21
|
+
|
22
|
+
|
23
|
+
## Pre-requisites
|
24
|
+
|
25
|
+
- [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. Although the target store is Qdrant, CocoIndex uses Postgress to track the data lineage for incremental processing.
|
26
|
+
|
27
|
+
- Run Qdrant.
|
28
|
+
|
29
|
+
```bash
|
30
|
+
docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant
|
31
|
+
```
|
32
|
+
|
33
|
+
- [Create a collection](https://qdrant.tech/documentation/concepts/vectors/#named-vectors) to export the embeddings to.
|
34
|
+
|
35
|
+
```bash
|
36
|
+
curl -X PUT \
|
37
|
+
'http://localhost:6333/collections/cocoindex' \
|
38
|
+
--header 'Content-Type: application/json' \
|
39
|
+
--data-raw '{
|
40
|
+
"vectors": {
|
41
|
+
"text_embedding": {
|
42
|
+
"size": 384,
|
43
|
+
"distance": "Cosine"
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}'
|
47
|
+
```
|
48
|
+
|
49
|
+
You can view the collections and data with the Qdrant dashboard at <http://localhost:6333/dashboard>.
|
50
|
+
|
51
|
+
## Run
|
52
|
+
|
53
|
+
- Install dependencies:
|
54
|
+
|
55
|
+
```bash
|
56
|
+
pip install -e .
|
57
|
+
```
|
58
|
+
|
59
|
+
- Setup:
|
60
|
+
|
61
|
+
```bash
|
62
|
+
python main.py cocoindex setup
|
63
|
+
```
|
64
|
+
|
65
|
+
- Update index:
|
66
|
+
|
67
|
+
```bash
|
68
|
+
python main.py cocoindex update
|
69
|
+
```
|
70
|
+
|
71
|
+
- Run:
|
72
|
+
|
73
|
+
```bash
|
74
|
+
python main.py
|
75
|
+
```
|
76
|
+
|
77
|
+
## CocoInsight
|
78
|
+
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline.
|
79
|
+
It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight:
|
80
|
+
|
81
|
+
```bash
|
82
|
+
python main.py cocoindex server -ci
|
83
|
+
```
|
84
|
+
|
85
|
+
Open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
|
86
|
+
|
87
|
+
|
@@ -1,21 +1,26 @@
|
|
1
1
|
from dotenv import load_dotenv
|
2
|
+
from qdrant_client import QdrantClient
|
3
|
+
from qdrant_client.http.models import Filter, FieldCondition, MatchValue
|
2
4
|
|
3
5
|
import cocoindex
|
4
6
|
|
7
|
+
# Define Qdrant connection constants
|
8
|
+
QDRANT_GRPC_URL = "http://localhost:6334"
|
9
|
+
QDRANT_COLLECTION = "cocoindex"
|
5
10
|
|
6
|
-
|
11
|
+
|
12
|
+
@cocoindex.transform_flow()
|
13
|
+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
|
7
14
|
"""
|
8
15
|
Embed the text using a SentenceTransformer model.
|
9
16
|
This is a shared logic between indexing and querying, so extract it as a function.
|
10
17
|
"""
|
11
18
|
return text.transform(
|
12
19
|
cocoindex.functions.SentenceTransformerEmbed(
|
13
|
-
model="sentence-transformers/all-MiniLM-L6-v2"
|
14
|
-
)
|
15
|
-
)
|
20
|
+
model="sentence-transformers/all-MiniLM-L6-v2"))
|
16
21
|
|
17
22
|
|
18
|
-
@cocoindex.flow_def(name="
|
23
|
+
@cocoindex.flow_def(name="TextEmbeddingWithQdrant")
|
19
24
|
def text_embedding_flow(
|
20
25
|
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
21
26
|
):
|
@@ -50,35 +55,39 @@ def text_embedding_flow(
|
|
50
55
|
doc_embeddings.export(
|
51
56
|
"doc_embeddings",
|
52
57
|
cocoindex.storages.Qdrant(
|
53
|
-
collection_name=
|
58
|
+
collection_name=QDRANT_COLLECTION, grpc_url=QDRANT_GRPC_URL
|
54
59
|
),
|
55
60
|
primary_key_fields=["id"],
|
56
61
|
setup_by_user=True,
|
57
62
|
)
|
58
63
|
|
59
64
|
|
60
|
-
query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
|
61
|
-
name="SemanticsSearch",
|
62
|
-
flow=text_embedding_flow,
|
63
|
-
target_name="doc_embeddings",
|
64
|
-
query_transform_flow=text_to_embedding,
|
65
|
-
default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
|
66
|
-
)
|
67
|
-
|
68
|
-
|
69
65
|
@cocoindex.main_fn()
|
70
66
|
def _run():
|
67
|
+
# Initialize Qdrant client
|
68
|
+
client = QdrantClient(url=QDRANT_GRPC_URL, prefer_grpc=True)
|
69
|
+
|
71
70
|
# Run queries in a loop to demonstrate the query capabilities.
|
72
71
|
while True:
|
73
72
|
try:
|
74
73
|
query = input("Enter search query (or Enter to quit): ")
|
75
74
|
if query == "":
|
76
75
|
break
|
77
|
-
|
76
|
+
|
77
|
+
# Get the embedding for the query
|
78
|
+
query_embedding = text_to_embedding.eval(query)
|
79
|
+
|
80
|
+
search_results = client.search(
|
81
|
+
collection_name=QDRANT_COLLECTION,
|
82
|
+
query_vector=("text_embedding", query_embedding),
|
83
|
+
limit=10
|
84
|
+
)
|
78
85
|
print("\nSearch results:")
|
79
|
-
for result in
|
80
|
-
|
81
|
-
|
86
|
+
for result in search_results:
|
87
|
+
score = result.score
|
88
|
+
payload = result.payload
|
89
|
+
print(f"[{score:.3f}] {payload['filename']}")
|
90
|
+
print(f" {payload['text']}")
|
82
91
|
print("---")
|
83
92
|
print()
|
84
93
|
except KeyboardInterrupt:
|
@@ -3,7 +3,7 @@ name = "text-embedding-qdrant"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
description = "Simple example for cocoindex: build embedding index based on local text files."
|
5
5
|
requires-python = ">=3.10"
|
6
|
-
dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
|
6
|
+
dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1", "qdrant-client>=1.6.0"]
|
7
7
|
|
8
8
|
[tool.setuptools]
|
9
9
|
packages = []
|
@@ -9,10 +9,13 @@ description = "With CocoIndex, users declare the transformation, CocoIndex creat
|
|
9
9
|
authors = [{ name = "CocoIndex", email = "cocoindex.io@gmail.com" }]
|
10
10
|
readme = "README.md"
|
11
11
|
requires-python = ">=3.11"
|
12
|
-
dependencies = ["sentence-transformers>=3.3.1", "click>=8.1.8", "rich>=14.0.0"]
|
12
|
+
dependencies = ["sentence-transformers>=3.3.1", "click>=8.1.8", "rich>=14.0.0", "python-dotenv>=1.1.0"]
|
13
13
|
license = "Apache-2.0"
|
14
14
|
urls = { Homepage = "https://cocoindex.io/" }
|
15
15
|
|
16
|
+
[project.scripts]
|
17
|
+
cocoindex = "cocoindex.cli:cli"
|
18
|
+
|
16
19
|
[tool.maturin]
|
17
20
|
bindings = "pyo3"
|
18
21
|
python-source = "python"
|