cocoindex 0.1.44__tar.gz → 0.1.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex-0.1.45/.github/workflows/CI.yml +46 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/workflows/_test.yml +14 -2
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.vscode/settings.json +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/Cargo.lock +1 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/Cargo.toml +1 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/PKG-INFO +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/settings.mdx +1 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/amazon_s3_embedding/main.py +41 -21
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/code_embedding/main.py +52 -26
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/docs_to_knowledge_graph/main.py +82 -37
- cocoindex-0.1.45/examples/fastapi_server_docker/.env +5 -0
- cocoindex-0.1.45/examples/fastapi_server_docker/README.md +65 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/fastapi_server_docker/compose.yaml +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/fastapi_server_docker/dockerfile +6 -0
- cocoindex-0.1.45/examples/fastapi_server_docker/main.py +112 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/fastapi_server_docker/requirements.txt +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/gdrive_text_embedding/main.py +50 -25
- cocoindex-0.1.45/examples/image_search/README.md +58 -0
- cocoindex-0.1.45/examples/image_search/main.py +130 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/pyproject.toml +7 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/main.py +30 -11
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/main.py +43 -21
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/main.py +86 -42
- cocoindex-0.1.45/examples/text_embedding/Text_Embedding.ipynb +408 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/main.py +47 -24
- cocoindex-0.1.45/examples/text_embedding/markdown_files/1810.04805v2.md +530 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding_qdrant/main.py +27 -25
- {cocoindex-0.1.44 → cocoindex-0.1.45}/pyproject.toml +14 -2
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/__init__.py +2 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/auth_registry.py +7 -3
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/cli.py +185 -66
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/convert.py +93 -52
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/flow.py +302 -131
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/functions.py +17 -4
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/index.py +6 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/lib.py +14 -9
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/llm.py +4 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/op.py +126 -61
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/query.py +40 -17
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/runtime.py +9 -4
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/setting.py +35 -12
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/setup.py +7 -3
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/sources.py +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/storages.py +50 -7
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/tests/test_convert.py +255 -63
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/typing.py +116 -70
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/utils.py +10 -2
- cocoindex-0.1.45/ruff.toml +5 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/schema.rs +6 -6
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/spec.rs +10 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/value.rs +21 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/builder/analyzer.rs +3 -2
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/builder/plan.rs +2 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/db_tracking.rs +76 -7
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/row_indexer.rs +78 -49
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/factory_bases.rs +45 -7
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/interface.rs +23 -3
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/registration.rs +3 -0
- cocoindex-0.1.45/src/ops/storages/kuzu.rs +1119 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/storages/mod.rs +3 -1
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/storages/neo4j.rs +187 -438
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/storages/postgres.rs +139 -283
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/storages/qdrant.rs +4 -3
- cocoindex-0.1.45/src/ops/storages/shared/mod.rs +2 -0
- cocoindex-0.1.45/src/ops/storages/shared/property_graph.rs +562 -0
- cocoindex-0.1.45/src/ops/storages/shared/table_columns.rs +185 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/prelude.rs +2 -2
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/service/error.rs +4 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/driver.rs +26 -9
- cocoindex-0.1.44/.github/workflows/CI.yml +0 -51
- cocoindex-0.1.44/examples/fastapi_server_docker/.env +0 -1
- cocoindex-0.1.44/examples/fastapi_server_docker/README.md +0 -10
- cocoindex-0.1.44/examples/fastapi_server_docker/main.py +0 -27
- cocoindex-0.1.44/examples/fastapi_server_docker/sample_code/main.py +0 -113
- cocoindex-0.1.44/examples/fastapi_server_docker/src/cocoindex_funs.py +0 -45
- cocoindex-0.1.44/examples/image_search/README.md +0 -60
- cocoindex-0.1.44/examples/image_search/main.py +0 -130
- cocoindex-0.1.44/examples/text_embedding/Text_Embedding.ipynb +0 -392
- cocoindex-0.1.44/src/ops/storages/spec.rs +0 -50
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.cargo/config.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.env.lib_debug +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/.gitignore +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/LICENSE +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/.gitignore +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/custom_function.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/data_types.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/flow_def.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/getting_started/quickstart.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/ops/storages.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docs/query.mdx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/package.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/docs/yarn.lock +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.44/examples/text_embedding/markdown_files → cocoindex-0.1.45/examples/fastapi_server_docker/files}/1810.04805v2.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/image_search/requirements.txt +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding_qdrant/.env +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/duration.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/base/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/query.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/lib.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/lib_context.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/py/convert.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/py/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/server.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/service/flows.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/service/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/service/search.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/settings.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/components.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/setup/states.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/db.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.44 → cocoindex-0.1.45}/src/utils/yaml_ser.rs +0 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
# This file is autogenerated by maturin v1.8.1
|
2
|
+
# To update, run
|
3
|
+
#
|
4
|
+
# maturin generate-ci github
|
5
|
+
#
|
6
|
+
name: CI
|
7
|
+
|
8
|
+
on:
|
9
|
+
pull_request:
|
10
|
+
branches: [main]
|
11
|
+
paths:
|
12
|
+
- src/**
|
13
|
+
- python/**
|
14
|
+
- "*.toml"
|
15
|
+
- ".github/workflows/*.yml"
|
16
|
+
push:
|
17
|
+
branches: [main]
|
18
|
+
paths:
|
19
|
+
- src/**
|
20
|
+
- python/**
|
21
|
+
- "*.toml"
|
22
|
+
- ".github/workflows/*.yml"
|
23
|
+
workflow_dispatch:
|
24
|
+
|
25
|
+
permissions:
|
26
|
+
contents: read
|
27
|
+
|
28
|
+
jobs:
|
29
|
+
format-check:
|
30
|
+
name: Check Python formatting
|
31
|
+
runs-on: ubuntu-latest
|
32
|
+
steps:
|
33
|
+
- uses: actions/checkout@v4
|
34
|
+
- uses: actions/setup-python@v5
|
35
|
+
with:
|
36
|
+
python-version: 3.11
|
37
|
+
- name: Install Ruff
|
38
|
+
run: |
|
39
|
+
pip install ruff
|
40
|
+
- name: Check Python formatting
|
41
|
+
run: |
|
42
|
+
ruff format --check .
|
43
|
+
|
44
|
+
test:
|
45
|
+
name: Run test
|
46
|
+
uses: ./.github/workflows/_test.yml
|
@@ -36,10 +36,22 @@ jobs:
|
|
36
36
|
key: ${{ runner.os }}-pythonenv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
|
37
37
|
restore-keys: |
|
38
38
|
${{ runner.os }}-pythonenv-${{ matrix.python-version }}-
|
39
|
-
- name:
|
39
|
+
- name: Setup venv
|
40
40
|
run: |
|
41
41
|
python -m venv .venv
|
42
|
+
- name: Install Python toolchains
|
43
|
+
run: |
|
44
|
+
source .venv/bin/activate
|
45
|
+
pip install maturin pytest mypy
|
46
|
+
- name: Python build
|
47
|
+
run: |
|
42
48
|
source .venv/bin/activate
|
43
|
-
pip install maturin pytest
|
44
49
|
maturin develop
|
50
|
+
- name: Python type check (mypy)
|
51
|
+
run: |
|
52
|
+
source .venv/bin/activate
|
53
|
+
mypy python
|
54
|
+
- name: Python tests
|
55
|
+
run: |
|
56
|
+
source .venv/bin/activate
|
45
57
|
pytest python/cocoindex/tests
|
@@ -1,12 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.45
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
7
7
|
Requires-Dist: python-dotenv>=1.1.0
|
8
8
|
Requires-Dist: pytest ; extra == 'test'
|
9
|
+
Requires-Dist: ruff ; extra == 'dev'
|
9
10
|
Provides-Extra: test
|
11
|
+
Provides-Extra: dev
|
10
12
|
License-File: LICENSE
|
11
13
|
Summary: With CocoIndex, users declare the transformation, CocoIndex creates & maintains an index, and keeps the derived index up to date based on source update, with minimal computation and changes.
|
12
14
|
Author-email: CocoIndex <cocoindex.io@gmail.com>
|
@@ -6,7 +6,7 @@ description: Provide settings for CocoIndex, e.g. database connection, app names
|
|
6
6
|
import Tabs from '@theme/Tabs';
|
7
7
|
import TabItem from '@theme/TabItem';
|
8
8
|
|
9
|
-
# CocoIndex
|
9
|
+
# CocoIndex Setting
|
10
10
|
|
11
11
|
Certain settings need to be provided for CocoIndex to work, e.g. database connections, app namespace, etc.
|
12
12
|
|
@@ -3,8 +3,11 @@ from dotenv import load_dotenv
|
|
3
3
|
import cocoindex
|
4
4
|
import os
|
5
5
|
|
6
|
+
|
6
7
|
@cocoindex.flow_def(name="AmazonS3TextEmbedding")
|
7
|
-
def amazon_s3_text_embedding_flow(
|
8
|
+
def amazon_s3_text_embedding_flow(
|
9
|
+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
10
|
+
):
|
8
11
|
"""
|
9
12
|
Define an example flow that embeds text from Amazon S3 into a vector database.
|
10
13
|
"""
|
@@ -18,21 +21,32 @@ def amazon_s3_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scop
|
|
18
21
|
prefix=prefix,
|
19
22
|
included_patterns=["*.md", "*.txt", "*.docx"],
|
20
23
|
binary=False,
|
21
|
-
sqs_queue_url=sqs_queue_url
|
24
|
+
sqs_queue_url=sqs_queue_url,
|
25
|
+
)
|
26
|
+
)
|
22
27
|
|
23
28
|
doc_embeddings = data_scope.add_collector()
|
24
29
|
|
25
30
|
with data_scope["documents"].row() as doc:
|
26
31
|
doc["chunks"] = doc["content"].transform(
|
27
32
|
cocoindex.functions.SplitRecursively(),
|
28
|
-
language="markdown",
|
33
|
+
language="markdown",
|
34
|
+
chunk_size=2000,
|
35
|
+
chunk_overlap=500,
|
36
|
+
)
|
29
37
|
|
30
38
|
with doc["chunks"].row() as chunk:
|
31
39
|
chunk["embedding"] = chunk["text"].transform(
|
32
40
|
cocoindex.functions.SentenceTransformerEmbed(
|
33
|
-
|
34
|
-
|
35
|
-
|
41
|
+
model="sentence-transformers/all-MiniLM-L6-v2"
|
42
|
+
)
|
43
|
+
)
|
44
|
+
doc_embeddings.collect(
|
45
|
+
filename=doc["filename"],
|
46
|
+
location=chunk["location"],
|
47
|
+
text=chunk["text"],
|
48
|
+
embedding=chunk["embedding"],
|
49
|
+
)
|
36
50
|
|
37
51
|
doc_embeddings.export(
|
38
52
|
"doc_embeddings",
|
@@ -41,7 +55,11 @@ def amazon_s3_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scop
|
|
41
55
|
vector_indexes=[
|
42
56
|
cocoindex.VectorIndexDef(
|
43
57
|
field_name="embedding",
|
44
|
-
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY
|
58
|
+
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
|
59
|
+
)
|
60
|
+
],
|
61
|
+
)
|
62
|
+
|
45
63
|
|
46
64
|
query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
|
47
65
|
name="SemanticsSearch",
|
@@ -49,27 +67,29 @@ query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
|
|
49
67
|
target_name="doc_embeddings",
|
50
68
|
query_transform_flow=lambda text: text.transform(
|
51
69
|
cocoindex.functions.SentenceTransformerEmbed(
|
52
|
-
model="sentence-transformers/all-MiniLM-L6-v2"
|
53
|
-
|
70
|
+
model="sentence-transformers/all-MiniLM-L6-v2"
|
71
|
+
)
|
72
|
+
),
|
73
|
+
default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
|
74
|
+
)
|
75
|
+
|
54
76
|
|
55
77
|
def _main():
|
56
78
|
# Use a `FlowLiveUpdater` to keep the flow data updated.
|
57
79
|
with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow):
|
58
80
|
# Run queries in a loop to demonstrate the query capabilities.
|
59
81
|
while True:
|
60
|
-
|
61
|
-
|
62
|
-
if query == '':
|
63
|
-
break
|
64
|
-
results, _ = query_handler.search(query, 10)
|
65
|
-
print("\nSearch results:")
|
66
|
-
for result in results:
|
67
|
-
print(f"[{result.score:.3f}] {result.data['filename']}")
|
68
|
-
print(f" {result.data['text']}")
|
69
|
-
print("---")
|
70
|
-
print()
|
71
|
-
except KeyboardInterrupt:
|
82
|
+
query = input("Enter search query (or Enter to quit): ")
|
83
|
+
if query == "":
|
72
84
|
break
|
85
|
+
results, _ = query_handler.search(query, 10)
|
86
|
+
print("\nSearch results:")
|
87
|
+
for result in results:
|
88
|
+
print(f"[{result.score:.3f}] {result.data['filename']}")
|
89
|
+
print(f" {result.data['text']}")
|
90
|
+
print("---")
|
91
|
+
print()
|
92
|
+
|
73
93
|
|
74
94
|
if __name__ == "__main__":
|
75
95
|
load_dotenv()
|
@@ -3,40 +3,59 @@ from psycopg_pool import ConnectionPool
|
|
3
3
|
import cocoindex
|
4
4
|
import os
|
5
5
|
|
6
|
+
|
6
7
|
@cocoindex.op.function()
|
7
8
|
def extract_extension(filename: str) -> str:
|
8
9
|
"""Extract the extension of a filename."""
|
9
10
|
return os.path.splitext(filename)[1]
|
10
11
|
|
12
|
+
|
11
13
|
@cocoindex.transform_flow()
|
12
|
-
def code_to_embedding(
|
14
|
+
def code_to_embedding(
|
15
|
+
text: cocoindex.DataSlice[str],
|
16
|
+
) -> cocoindex.DataSlice[list[float]]:
|
13
17
|
"""
|
14
18
|
Embed the text using a SentenceTransformer model.
|
15
19
|
"""
|
16
20
|
return text.transform(
|
17
21
|
cocoindex.functions.SentenceTransformerEmbed(
|
18
|
-
model="sentence-transformers/all-MiniLM-L6-v2"
|
22
|
+
model="sentence-transformers/all-MiniLM-L6-v2"
|
23
|
+
)
|
24
|
+
)
|
25
|
+
|
19
26
|
|
20
27
|
@cocoindex.flow_def(name="CodeEmbedding")
|
21
|
-
def code_embedding_flow(
|
28
|
+
def code_embedding_flow(
|
29
|
+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
30
|
+
):
|
22
31
|
"""
|
23
32
|
Define an example flow that embeds files into a vector database.
|
24
33
|
"""
|
25
34
|
data_scope["files"] = flow_builder.add_source(
|
26
|
-
cocoindex.sources.LocalFile(
|
27
|
-
|
28
|
-
|
35
|
+
cocoindex.sources.LocalFile(
|
36
|
+
path="../..",
|
37
|
+
included_patterns=["*.py", "*.rs", "*.toml", "*.md", "*.mdx"],
|
38
|
+
excluded_patterns=["**/.*", "target", "**/node_modules"],
|
39
|
+
)
|
40
|
+
)
|
29
41
|
code_embeddings = data_scope.add_collector()
|
30
42
|
|
31
43
|
with data_scope["files"].row() as file:
|
32
44
|
file["extension"] = file["filename"].transform(extract_extension)
|
33
45
|
file["chunks"] = file["content"].transform(
|
34
46
|
cocoindex.functions.SplitRecursively(),
|
35
|
-
language=file["extension"],
|
47
|
+
language=file["extension"],
|
48
|
+
chunk_size=1000,
|
49
|
+
chunk_overlap=300,
|
50
|
+
)
|
36
51
|
with file["chunks"].row() as chunk:
|
37
52
|
chunk["embedding"] = chunk["text"].call(code_to_embedding)
|
38
|
-
code_embeddings.collect(
|
39
|
-
|
53
|
+
code_embeddings.collect(
|
54
|
+
filename=file["filename"],
|
55
|
+
location=chunk["location"],
|
56
|
+
code=chunk["text"],
|
57
|
+
embedding=chunk["embedding"],
|
58
|
+
)
|
40
59
|
|
41
60
|
code_embeddings.export(
|
42
61
|
"code_embeddings",
|
@@ -45,26 +64,35 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
45
64
|
vector_indexes=[
|
46
65
|
cocoindex.VectorIndexDef(
|
47
66
|
field_name="embedding",
|
48
|
-
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY
|
67
|
+
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
|
68
|
+
)
|
69
|
+
],
|
70
|
+
)
|
49
71
|
|
50
72
|
|
51
73
|
def search(pool: ConnectionPool, query: str, top_k: int = 5):
|
52
74
|
# Get the table name, for the export target in the code_embedding_flow above.
|
53
|
-
table_name = cocoindex.utils.get_target_storage_default_name(
|
75
|
+
table_name = cocoindex.utils.get_target_storage_default_name(
|
76
|
+
code_embedding_flow, "code_embeddings"
|
77
|
+
)
|
54
78
|
# Evaluate the transform flow defined above with the input query, to get the embedding.
|
55
79
|
query_vector = code_to_embedding.eval(query)
|
56
80
|
# Run the query and get the results.
|
57
81
|
with pool.connection() as conn:
|
58
82
|
with conn.cursor() as cur:
|
59
|
-
cur.execute(
|
83
|
+
cur.execute(
|
84
|
+
f"""
|
60
85
|
SELECT filename, code, embedding <=> %s::vector AS distance
|
61
86
|
FROM {table_name} ORDER BY distance LIMIT %s
|
62
|
-
""",
|
87
|
+
""",
|
88
|
+
(query_vector, top_k),
|
89
|
+
)
|
63
90
|
return [
|
64
91
|
{"filename": row[0], "code": row[1], "score": 1.0 - row[2]}
|
65
92
|
for row in cur.fetchall()
|
66
93
|
]
|
67
94
|
|
95
|
+
|
68
96
|
def _main():
|
69
97
|
# Make sure the flow is built and up-to-date.
|
70
98
|
stats = code_embedding_flow.update()
|
@@ -74,20 +102,18 @@ def _main():
|
|
74
102
|
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
|
75
103
|
# Run queries in a loop to demonstrate the query capabilities.
|
76
104
|
while True:
|
77
|
-
|
78
|
-
|
79
|
-
if query == '':
|
80
|
-
break
|
81
|
-
# Run the query function with the database connection pool and the query.
|
82
|
-
results = search(pool, query)
|
83
|
-
print("\nSearch results:")
|
84
|
-
for result in results:
|
85
|
-
print(f"[{result['score']:.3f}] {result['filename']}")
|
86
|
-
print(f" {result['code']}")
|
87
|
-
print("---")
|
88
|
-
print()
|
89
|
-
except KeyboardInterrupt:
|
105
|
+
query = input("Enter search query (or Enter to quit): ")
|
106
|
+
if query == "":
|
90
107
|
break
|
108
|
+
# Run the query function with the database connection pool and the query.
|
109
|
+
results = search(pool, query)
|
110
|
+
print("\nSearch results:")
|
111
|
+
for result in results:
|
112
|
+
print(f"[{result['score']:.3f}] {result['filename']}")
|
113
|
+
print(f" {result['code']}")
|
114
|
+
print("---")
|
115
|
+
print()
|
116
|
+
|
91
117
|
|
92
118
|
if __name__ == "__main__":
|
93
119
|
load_dotenv()
|
@@ -1,42 +1,71 @@
|
|
1
1
|
"""
|
2
2
|
This example shows how to extract relationships from documents and build a knowledge graph.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import dataclasses
|
5
6
|
import cocoindex
|
6
7
|
|
8
|
+
|
7
9
|
@dataclasses.dataclass
|
8
10
|
class DocumentSummary:
|
9
11
|
"""Describe a summary of a document."""
|
12
|
+
|
10
13
|
title: str
|
11
14
|
summary: str
|
12
15
|
|
16
|
+
|
13
17
|
@dataclasses.dataclass
|
14
18
|
class Relationship:
|
15
19
|
"""
|
16
20
|
Describe a relationship between two entities.
|
17
21
|
Subject and object should be Core CocoIndex concepts only, should be nouns. For example, `CocoIndex`, `Incremental Processing`, `ETL`, `Data` etc.
|
18
22
|
"""
|
23
|
+
|
19
24
|
subject: str
|
20
25
|
predicate: str
|
21
26
|
object: str
|
22
27
|
|
28
|
+
|
29
|
+
neo4j_conn_spec = cocoindex.add_auth_entry(
|
30
|
+
"Neo4jConnection",
|
31
|
+
cocoindex.storages.Neo4jConnection(
|
32
|
+
uri="bolt://localhost:7687",
|
33
|
+
user="neo4j",
|
34
|
+
password="cocoindex",
|
35
|
+
),
|
36
|
+
)
|
37
|
+
kuzu_conn_spec = cocoindex.add_auth_entry(
|
38
|
+
"KuzuConnection",
|
39
|
+
cocoindex.storages.KuzuConnection(
|
40
|
+
api_server_url="http://localhost:8123",
|
41
|
+
),
|
42
|
+
)
|
43
|
+
|
44
|
+
# Use Neo4j as the graph database
|
45
|
+
GraphDbSpec = cocoindex.storages.Neo4j
|
46
|
+
GraphDbConnection = cocoindex.storages.Neo4jConnection
|
47
|
+
GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
|
48
|
+
conn_spec = neo4j_conn_spec
|
49
|
+
|
50
|
+
# Use Kuzu as the graph database
|
51
|
+
# GraphDbSpec = cocoindex.storages.Kuzu
|
52
|
+
# GraphDbConnection = cocoindex.storages.KuzuConnection
|
53
|
+
# GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
|
54
|
+
# conn_spec = kuzu_conn_spec
|
55
|
+
|
56
|
+
|
23
57
|
@cocoindex.flow_def(name="DocsToKG")
|
24
|
-
def docs_to_kg_flow(
|
58
|
+
def docs_to_kg_flow(
|
59
|
+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
60
|
+
) -> None:
|
25
61
|
"""
|
26
62
|
Define an example flow that extracts relationship from files and build knowledge graph.
|
27
63
|
"""
|
28
|
-
# configure neo4j connection
|
29
|
-
conn_spec = cocoindex.add_auth_entry(
|
30
|
-
"Neo4jConnection",
|
31
|
-
cocoindex.storages.Neo4jConnection(
|
32
|
-
uri="bolt://localhost:7687",
|
33
|
-
user="neo4j",
|
34
|
-
password="cocoindex",
|
35
|
-
))
|
36
|
-
|
37
64
|
data_scope["documents"] = flow_builder.add_source(
|
38
|
-
cocoindex.sources.LocalFile(
|
39
|
-
|
65
|
+
cocoindex.sources.LocalFile(
|
66
|
+
path="../../docs/docs/core", included_patterns=["*.md", "*.mdx"]
|
67
|
+
)
|
68
|
+
)
|
40
69
|
|
41
70
|
document_node = data_scope.add_collector()
|
42
71
|
entity_relationship = data_scope.add_collector()
|
@@ -48,24 +77,34 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
48
77
|
cocoindex.functions.ExtractByLlm(
|
49
78
|
llm_spec=cocoindex.LlmSpec(
|
50
79
|
# Supported LLM: https://cocoindex.io/docs/ai/llm
|
51
|
-
api_type=cocoindex.LlmApiType.OPENAI,
|
80
|
+
api_type=cocoindex.LlmApiType.OPENAI,
|
81
|
+
model="gpt-4o",
|
82
|
+
),
|
52
83
|
output_type=DocumentSummary,
|
53
|
-
instruction="Please summarize the content of the document."
|
84
|
+
instruction="Please summarize the content of the document.",
|
85
|
+
)
|
86
|
+
)
|
54
87
|
document_node.collect(
|
55
|
-
filename=doc["filename"],
|
56
|
-
|
88
|
+
filename=doc["filename"],
|
89
|
+
title=doc["summary"]["title"],
|
90
|
+
summary=doc["summary"]["summary"],
|
91
|
+
)
|
57
92
|
|
58
93
|
# extract relationships from document
|
59
94
|
doc["relationships"] = doc["content"].transform(
|
60
95
|
cocoindex.functions.ExtractByLlm(
|
61
96
|
llm_spec=cocoindex.LlmSpec(
|
62
97
|
# Supported LLM: https://cocoindex.io/docs/ai/llm
|
63
|
-
api_type=cocoindex.LlmApiType.OPENAI,
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
98
|
+
api_type=cocoindex.LlmApiType.OPENAI,
|
99
|
+
model="gpt-4o",
|
100
|
+
),
|
101
|
+
output_type=list[Relationship],
|
102
|
+
instruction=(
|
103
|
+
"Please extract relationships from CocoIndex documents. "
|
104
|
+
"Focus on concepts and ignore examples and code. "
|
105
|
+
),
|
106
|
+
)
|
107
|
+
)
|
69
108
|
|
70
109
|
with doc["relationships"].row() as relationship:
|
71
110
|
# relationship between two entities
|
@@ -77,27 +116,28 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
77
116
|
)
|
78
117
|
# mention of an entity in a document, for subject
|
79
118
|
entity_mention.collect(
|
80
|
-
id=cocoindex.GeneratedField.UUID,
|
119
|
+
id=cocoindex.GeneratedField.UUID,
|
120
|
+
entity=relationship["subject"],
|
81
121
|
filename=doc["filename"],
|
82
122
|
)
|
83
123
|
# mention of an entity in a document, for object
|
84
124
|
entity_mention.collect(
|
85
|
-
id=cocoindex.GeneratedField.UUID,
|
125
|
+
id=cocoindex.GeneratedField.UUID,
|
126
|
+
entity=relationship["object"],
|
86
127
|
filename=doc["filename"],
|
87
128
|
)
|
88
129
|
|
89
|
-
|
90
130
|
# export to neo4j
|
91
131
|
document_node.export(
|
92
132
|
"document_node",
|
93
|
-
|
94
|
-
connection=conn_spec,
|
95
|
-
|
133
|
+
GraphDbSpec(
|
134
|
+
connection=conn_spec, mapping=cocoindex.storages.Nodes(label="Document")
|
135
|
+
),
|
96
136
|
primary_key_fields=["filename"],
|
97
137
|
)
|
98
138
|
# Declare reference Node to reference entity node in a relationship
|
99
139
|
flow_builder.declare(
|
100
|
-
|
140
|
+
GraphDbDeclaration(
|
101
141
|
connection=conn_spec,
|
102
142
|
nodes_label="Entity",
|
103
143
|
primary_key_fields=["value"],
|
@@ -105,7 +145,7 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
105
145
|
)
|
106
146
|
entity_relationship.export(
|
107
147
|
"entity_relationship",
|
108
|
-
|
148
|
+
GraphDbSpec(
|
109
149
|
connection=conn_spec,
|
110
150
|
mapping=cocoindex.storages.Relationships(
|
111
151
|
rel_type="RELATIONSHIP",
|
@@ -113,15 +153,17 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
113
153
|
label="Entity",
|
114
154
|
fields=[
|
115
155
|
cocoindex.storages.TargetFieldMapping(
|
116
|
-
source="subject", target="value"
|
117
|
-
|
156
|
+
source="subject", target="value"
|
157
|
+
),
|
158
|
+
],
|
118
159
|
),
|
119
160
|
target=cocoindex.storages.NodeFromFields(
|
120
161
|
label="Entity",
|
121
162
|
fields=[
|
122
163
|
cocoindex.storages.TargetFieldMapping(
|
123
|
-
source="object", target="value"
|
124
|
-
|
164
|
+
source="object", target="value"
|
165
|
+
),
|
166
|
+
],
|
125
167
|
),
|
126
168
|
),
|
127
169
|
),
|
@@ -129,7 +171,7 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
129
171
|
)
|
130
172
|
entity_mention.export(
|
131
173
|
"entity_mention",
|
132
|
-
|
174
|
+
GraphDbSpec(
|
133
175
|
connection=conn_spec,
|
134
176
|
mapping=cocoindex.storages.Relationships(
|
135
177
|
rel_type="MENTION",
|
@@ -139,8 +181,11 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
|
|
139
181
|
),
|
140
182
|
target=cocoindex.storages.NodeFromFields(
|
141
183
|
label="Entity",
|
142
|
-
fields=[
|
143
|
-
|
184
|
+
fields=[
|
185
|
+
cocoindex.storages.TargetFieldMapping(
|
186
|
+
source="entity", target="value"
|
187
|
+
)
|
188
|
+
],
|
144
189
|
),
|
145
190
|
),
|
146
191
|
),
|
@@ -0,0 +1,65 @@
|
|
1
|
+
## Run docker container with a simple query endpoint via fastapi
|
2
|
+
|
3
|
+
In this example, we will build index for text embedding from local markdown files, and provide a simple query endpoint via fastapi.
|
4
|
+
We provide a simple docker container using docker compose to build pgvector17 along with a simple python fastapi script
|
5
|
+
|
6
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
7
|
+
|
8
|
+
|
9
|
+
## Run locally without docker
|
10
|
+
|
11
|
+
In the `.env` file, use local Postgres URL
|
12
|
+
|
13
|
+
```
|
14
|
+
# For local testing
|
15
|
+
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
|
16
|
+
```
|
17
|
+
|
18
|
+
- Install dependencies:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install -e .
|
22
|
+
```
|
23
|
+
|
24
|
+
- Setup:
|
25
|
+
|
26
|
+
```bash
|
27
|
+
cocoindex setup main.py
|
28
|
+
```
|
29
|
+
|
30
|
+
- Update index:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
cocoindex update main.py
|
34
|
+
```
|
35
|
+
|
36
|
+
- Run:
|
37
|
+
|
38
|
+
```bash
|
39
|
+
uvicorn main:fastapi_app --reload --host 0.0.0.0 --port 8000
|
40
|
+
```
|
41
|
+
|
42
|
+
## Query the endpoint
|
43
|
+
|
44
|
+
```bash
|
45
|
+
curl "http://localhost:8000/search?q=model&limit=3"
|
46
|
+
```
|
47
|
+
|
48
|
+
|
49
|
+
## Run Docker
|
50
|
+
|
51
|
+
In the `.env` file, use Docker Postgres URL
|
52
|
+
|
53
|
+
```
|
54
|
+
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@coco_db:5436/cocoindex
|
55
|
+
```
|
56
|
+
|
57
|
+
Build the docker container via:
|
58
|
+
```bash
|
59
|
+
docker compose up --build
|
60
|
+
```
|
61
|
+
|
62
|
+
Test the endpoint:
|
63
|
+
```bash
|
64
|
+
curl "http://0.0.0.0:8080/search?q=model&limit=3"
|
65
|
+
```
|