cocoindex 0.1.69__tar.gz → 0.1.71__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.69 → cocoindex-0.1.71}/Cargo.lock +1 -1
- {cocoindex-0.1.69 → cocoindex-0.1.71}/Cargo.toml +1 -1
- {cocoindex-0.1.69 → cocoindex-0.1.71}/PKG-INFO +12 -11
- {cocoindex-0.1.69 → cocoindex-0.1.71}/README.md +11 -10
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/custom_function.mdx +11 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/data_types.mdx +1 -1
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/flow_def.mdx +39 -8
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/ops/sources.md +29 -16
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/amazon_s3_embedding/main.py +0 -6
- cocoindex-0.1.71/examples/face_recognition/README.md +51 -0
- cocoindex-0.1.71/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
- cocoindex-0.1.71/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
- cocoindex-0.1.71/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
- cocoindex-0.1.71/examples/face_recognition/images/einplanck3.jpg +0 -0
- cocoindex-0.1.71/examples/face_recognition/main.py +115 -0
- cocoindex-0.1.71/examples/face_recognition/pyproject.toml +14 -0
- cocoindex-0.1.71/examples/text_embedding_qdrant/.env +2 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/__init__.py +3 -1
- cocoindex-0.1.71/python/cocoindex/auth_registry.py +51 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/convert.py +79 -4
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/functions.py +8 -7
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/op.py +33 -4
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/sources.py +9 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/tests/test_convert.py +127 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/field_attrs.rs +1 -1
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/live_updater.rs +53 -29
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sources/azure_blob.rs +20 -8
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/py/mod.rs +3 -3
- cocoindex-0.1.69/python/cocoindex/auth_registry.py +0 -29
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.cargo/config.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.env.lib_debug +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/_doc_release.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/format.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/.pre-commit-config.yaml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/LICENSE +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/getting_started/quickstart.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/ops/functions.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/ops/targets.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docs/query.mdx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/package.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/docs/yarn.lock +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/azure_blob_embedding/.env.example +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/azure_blob_embedding/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/azure_blob_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/azure_blob_embedding/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/azure_blob_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.69/examples/manuals_llm_extraction → cocoindex-0.1.71/examples/face_recognition}/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/image_search/pyproject.toml +0 -0
- {cocoindex-0.1.69/examples/pdf_embedding → cocoindex-0.1.71/examples/manuals_llm_extraction}/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/.env.example +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/.gitignore +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/paper_metadata/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/.env.example +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/data/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/patient_intake_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.69/examples/product_recommendation → cocoindex-0.1.71/examples/pdf_embedding}/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69/examples/text_embedding → cocoindex-0.1.71/examples/product_recommendation}/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.69/examples/text_embedding_qdrant → cocoindex-0.1.71/examples/text_embedding}/.env +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/pyproject.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/flow.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/lib.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/targets.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/tests/test_optional_database.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/tests/test_typing.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/typing.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/ruff.toml +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/duration.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/schema.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/spec.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/base/value.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/exec_ctx.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/builder/plan.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/evaluator.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/lib.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/lib_context.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/litellm.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/openrouter.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/vertex_ai.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/vllm.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/llm/voyage.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/factory_bases.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/embed_text.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/functions/test_utils.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/interface.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/registration.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/kuzu.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/neo4j.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/postgres.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/qdrant.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/shared/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/shared/property_graph.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/ops/targets/shared/table_columns.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/prelude.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/py/convert.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/server.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/service/error.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/service/flows.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/service/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/settings.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/components.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/setup/states.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/concur_control.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/db.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.69 → cocoindex-0.1.71}/src/utils/yaml_ser.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.71
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
@@ -52,18 +52,18 @@ Ultra performant data transformation framework for AI, with core engine written
|
|
52
52
|
⭐ Drop a star to help us grow!
|
53
53
|
|
54
54
|
<div align="center">
|
55
|
-
|
55
|
+
|
56
56
|
<!-- Keep these links. Translations will automatically update with the README. -->
|
57
|
-
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
58
|
-
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
59
|
-
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
60
|
-
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
61
|
-
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
62
|
-
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
63
|
-
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
64
|
-
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
57
|
+
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
58
|
+
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
59
|
+
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
60
|
+
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
61
|
+
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
62
|
+
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
63
|
+
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
64
|
+
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
65
65
|
[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
|
66
|
-
|
66
|
+
|
67
67
|
</div>
|
68
68
|
|
69
69
|
</br>
|
@@ -208,6 +208,7 @@ It defines an index flow like this:
|
|
208
208
|
| [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
|
209
209
|
| [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
|
210
210
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
211
|
+
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
211
212
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
212
213
|
|
213
214
|
More coming and stay tuned 👀!
|
@@ -27,18 +27,18 @@ Ultra performant data transformation framework for AI, with core engine written
|
|
27
27
|
⭐ Drop a star to help us grow!
|
28
28
|
|
29
29
|
<div align="center">
|
30
|
-
|
30
|
+
|
31
31
|
<!-- Keep these links. Translations will automatically update with the README. -->
|
32
|
-
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
33
|
-
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
34
|
-
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
35
|
-
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
36
|
-
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
37
|
-
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
38
|
-
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
39
|
-
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
32
|
+
[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
|
33
|
+
[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
|
34
|
+
[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
|
35
|
+
[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
|
36
|
+
[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
|
37
|
+
[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
|
38
|
+
[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
|
39
|
+
[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
|
40
40
|
[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
|
41
|
-
|
41
|
+
|
42
42
|
</div>
|
43
43
|
|
44
44
|
</br>
|
@@ -183,6 +183,7 @@ It defines an index flow like this:
|
|
183
183
|
| [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
|
184
184
|
| [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
|
185
185
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
186
|
+
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
186
187
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
187
188
|
|
188
189
|
More coming and stay tuned 👀!
|
@@ -148,6 +148,17 @@ Custom functions take the following additional parameters:
|
|
148
148
|
When the version is changed, the function will be re-executed even if cache is enabled.
|
149
149
|
It's required to be set if `cache` is `True`.
|
150
150
|
|
151
|
+
* `arg_relationship: tuple[ArgRelationship, str]`: It specifies the relationship between an input argument and the output,
|
152
|
+
e.g. `(ArgRelationship.CHUNKS_BASE_TEXT, "content")` means the output is chunks for the text represented by the
|
153
|
+
input argument with name `content`.
|
154
|
+
This provides metadata for tools, e.g. CocoInsight.
|
155
|
+
Currently the following attributes are supported:
|
156
|
+
|
157
|
+
* `ArgRelationship.CHUNKS_BASE_TEXT`:
|
158
|
+
The output is chunks for the text represented by the input argument. In this case, the output is expected to be a *Table*, whose each row represents a text chunk, and the first column has type *Range*, representing the range of the text chunk.
|
159
|
+
* `ArgRelationship.EMBEDDING_ORIGIN_TEXT`: The output is embedding vector for the text represented by the input argument. The output is expected to be a *Vector*.
|
160
|
+
* `ArgRelationship.RECTS_BASE_IMAGE`: The output is rectangles for the image represented by the input argument. The output is expected to be a *Table*, whose each row represents a rectangle, and the first column has type *Struct*, with fields `min_x`, `min_y`, `max_x`, `max_y` to represent the coordinates of the rectangle.
|
161
|
+
|
151
162
|
For example:
|
152
163
|
|
153
164
|
<Tabs>
|
@@ -86,7 +86,7 @@ Optionally, it can have a fixed dimension. Noted as *Vector[Type]* or *Vector[Ty
|
|
86
86
|
|
87
87
|
It supports the following Python types:
|
88
88
|
|
89
|
-
* `cocoindex.Vector[T]` or `cocoindex.Vector[T, typing.Literal[Dim]]`, e.g. `cocoindex.Vector[cocoindex.Float32]` or `cocoindex.Vector[cocoindex.Float32, 384]`
|
89
|
+
* `cocoindex.Vector[T]` or `cocoindex.Vector[T, typing.Literal[Dim]]`, e.g. `cocoindex.Vector[cocoindex.Float32]` or `cocoindex.Vector[cocoindex.Float32, typing.Literal[384]]`
|
90
90
|
* The underlying Python type is `numpy.typing.NDArray[T]` where `T` is a numpy numeric type (`numpy.int64`, `numpy.float32` or `numpy.float64`), or `list[T]` otherwise
|
91
91
|
* `numpy.typing.NDArray[T]` where `T` is a numpy numeric type
|
92
92
|
* `list[T]`
|
@@ -416,22 +416,28 @@ flow_builder.declare(
|
|
416
416
|
### Auth Registry
|
417
417
|
|
418
418
|
CocoIndex manages an auth registry. It's an in-memory key-value store, mainly to store authentication information for a backend.
|
419
|
+
It's usually used for targets, where key stability is important for backend cleanup.
|
419
420
|
|
420
|
-
Operation spec is the default way to configure
|
421
|
+
Operation spec is the default way to configure sources, functions and targets. But it has the following limitations:
|
421
422
|
|
422
423
|
* The spec isn't supposed to contain secret information, and it's frequently shown in various places, e.g. `cocoindex show`.
|
423
|
-
*
|
424
|
-
But we still need to be able to drop the backend (e.g. a table) when [setup / drop flow](/docs/core/flow_methods#setup--drop-flow).
|
424
|
+
* For targets, once an operation is removed after flow definition code change, the spec is also gone.
|
425
|
+
But we still need to be able to drop the persistent backend (e.g. a table) when [setup / drop flow](/docs/core/flow_methods#setup--drop-flow).
|
425
426
|
|
426
|
-
Auth registry is introduced to solve the problems above.
|
427
|
+
Auth registry is introduced to solve the problems above.
|
427
428
|
|
428
|
-
|
429
|
-
|
429
|
+
|
430
|
+
#### Auth Entry
|
431
|
+
|
432
|
+
An auth entry is an entry in the auth registry with an explicit key.
|
433
|
+
|
434
|
+
* You can create new *auth entry* by a key and a value.
|
435
|
+
* You can reference the entry by the key, and pass it as part of spec for certain operations. e.g. `Neo4j` takes `connection` field in the form of auth entry reference.
|
430
436
|
|
431
437
|
<Tabs>
|
432
438
|
<TabItem value="python" label="Python" default>
|
433
439
|
|
434
|
-
You can add an auth entry by `cocoindex.add_auth_entry()` function, which returns a `cocoindex.AuthEntryReference`:
|
440
|
+
You can add an auth entry by `cocoindex.add_auth_entry()` function, which returns a `cocoindex.AuthEntryReference[T]`:
|
435
441
|
|
436
442
|
```python
|
437
443
|
my_graph_conn = cocoindex.add_auth_entry(
|
@@ -445,7 +451,7 @@ my_graph_conn = cocoindex.add_auth_entry(
|
|
445
451
|
|
446
452
|
Then reference it when building a spec that takes an auth entry:
|
447
453
|
|
448
|
-
* You can either reference by the `AuthEntryReference` object directly:
|
454
|
+
* You can either reference by the `AuthEntryReference[T]` object directly:
|
449
455
|
|
450
456
|
```python
|
451
457
|
demo_collector.export(
|
@@ -472,3 +478,28 @@ Note that CocoIndex backends use the key of an auth entry to identify the backen
|
|
472
478
|
|
473
479
|
* If a key is no longer referenced in any operation spec, keep it until the next flow setup / drop action,
|
474
480
|
so that CocoIndex will be able to clean up the backends.
|
481
|
+
|
482
|
+
#### Transient Auth Entry
|
483
|
+
|
484
|
+
A transient auth entry is an entry in the auth registry with an automatically generated key.
|
485
|
+
It's usually used for sources and functions, where key stability is not important.
|
486
|
+
|
487
|
+
<Tabs>
|
488
|
+
<TabItem value="python" label="Python" default>
|
489
|
+
|
490
|
+
You can create a new *transient auth entry* by `cocoindex.add_transient_auth_entry()` function, which returns a `cocoindex.TransientAuthEntryReference[T]`, and pass it to a source or function spec that takes it, e.g.
|
491
|
+
|
492
|
+
```python
|
493
|
+
flow_builder.add_source(
|
494
|
+
cocoindex.sources.AzureBlob(
|
495
|
+
...
|
496
|
+
sas_token=cocoindex.add_transient_auth_entry("...")
|
497
|
+
)
|
498
|
+
)
|
499
|
+
```
|
500
|
+
|
501
|
+
|
502
|
+
</TabItem>
|
503
|
+
</Tabs>
|
504
|
+
|
505
|
+
Whenever a `TransientAuthEntryReference[T]` is expected, you can also pass a `AuthEntryReference[T]` instead, as `AuthEntryReference[T]` is a subtype of `TransientAuthEntryReference[T]`.
|
@@ -170,22 +170,33 @@ These are actions you need to take:
|
|
170
170
|
|
171
171
|
#### Authentication
|
172
172
|
|
173
|
-
We
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
* `
|
173
|
+
We support the following authentication methods:
|
174
|
+
|
175
|
+
* Shared access signature (SAS) tokens.
|
176
|
+
You can generate it from the Azure Portal in the settings for a specific container.
|
177
|
+
You need to provide at least *List* and *Read* permissions when generating the SAS token.
|
178
|
+
It's a query string in the form of
|
179
|
+
`sp=rl&st=2025-07-20T09:33:00Z&se=2025-07-19T09:48:53Z&sv=2024-11-04&sr=c&sig=i3FDjsadfklj3%23adsfkk`.
|
180
|
+
|
181
|
+
* Storage account access key. You can find it in the Azure Portal in the settings for a specific storage account.
|
182
|
+
|
183
|
+
* Default credential. When none of the above is provided, it will use the default credential.
|
184
|
+
|
185
|
+
This allows you to connect to Azure services without putting any secrets in the code or flow spec.
|
186
|
+
It automatically chooses the best authentication method based on your environment:
|
187
|
+
|
188
|
+
* On your local machine: uses your Azure CLI login (`az login`) or environment variables.
|
189
|
+
|
190
|
+
```sh
|
191
|
+
az login
|
192
|
+
# Optional: Set a default subscription if you have more than one
|
193
|
+
az account set --subscription "<YOUR_SUBSCRIPTION_NAME_OR_ID>"
|
194
|
+
```
|
195
|
+
* In Azure (VM, App Service, AKS, etc.): uses the resource’s Managed Identity.
|
196
|
+
* In automated environments: supports Service Principals via environment variables
|
197
|
+
* `AZURE_CLIENT_ID`
|
198
|
+
* `AZURE_TENANT_ID`
|
199
|
+
* `AZURE_CLIENT_SECRET`
|
189
200
|
|
190
201
|
You can refer to [this doc](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/overview) for more details.
|
191
202
|
|
@@ -202,6 +213,8 @@ The spec takes the following fields:
|
|
202
213
|
* `excluded_patterns` (`list[str]`, optional): a list of glob patterns to exclude files, e.g. `["*.tmp", "**/*.log"]`.
|
203
214
|
Any file or directory matching these patterns will be excluded even if they match `included_patterns`.
|
204
215
|
If not specified, no files will be excluded.
|
216
|
+
* `sas_token` (`cocoindex.TransientAuthEntryReference[str]`, optional): a SAS token for authentication.
|
217
|
+
* `account_access_key` (`cocoindex.TransientAuthEntryReference[str]`, optional): an account access key for authentication.
|
205
218
|
|
206
219
|
:::info
|
207
220
|
|
@@ -102,12 +102,6 @@ def _main() -> None:
|
|
102
102
|
|
103
103
|
amazon_s3_text_embedding_flow.setup()
|
104
104
|
with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow) as updater:
|
105
|
-
while True:
|
106
|
-
updates = updater.next_status_updates()
|
107
|
-
print(f"Updates: {updates}")
|
108
|
-
if not updates.active_sources:
|
109
|
-
break
|
110
|
-
|
111
105
|
# Run queries in a loop to demonstrate the query capabilities.
|
112
106
|
while True:
|
113
107
|
query = input("Enter search query (or Enter to quit): ")
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Recognize faces in images and build embedding index
|
2
|
+
[](https://github.com/cocoindex-io/cocoindex)
|
3
|
+
|
4
|
+
|
5
|
+
In this example, we will recognize faces in images and build embedding index.
|
6
|
+
|
7
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
8
|
+
|
9
|
+
## Steps
|
10
|
+
### Indexing Flow
|
11
|
+
|
12
|
+
1. We will ingest a list of images.
|
13
|
+
2. For each image, we:
|
14
|
+
- Extract faces from the image.
|
15
|
+
- Compute embeddings for each face.
|
16
|
+
3. We will export to the following tables in Postgres with PGVector:
|
17
|
+
- Filename, rect, embedding for each face.
|
18
|
+
|
19
|
+
|
20
|
+
## Prerequisite
|
21
|
+
|
22
|
+
1. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
|
23
|
+
|
24
|
+
2. dependencies:
|
25
|
+
|
26
|
+
```bash
|
27
|
+
pip install -e .
|
28
|
+
```
|
29
|
+
|
30
|
+
## Run
|
31
|
+
|
32
|
+
Update index, which will also setup the tables at the first time:
|
33
|
+
|
34
|
+
```bash
|
35
|
+
cocoindex update --setup main.py
|
36
|
+
```
|
37
|
+
|
38
|
+
You can also run the command with `-L`, which will watch for file changes and update the index automatically.
|
39
|
+
|
40
|
+
```bash
|
41
|
+
cocoindex update --setup -L main.py
|
42
|
+
```
|
43
|
+
|
44
|
+
## CocoInsight
|
45
|
+
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with zero pipeline data retention. Run following command to start CocoInsight:
|
46
|
+
|
47
|
+
```
|
48
|
+
cocoindex server -ci main.py
|
49
|
+
```
|
50
|
+
|
51
|
+
Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
|
Binary file
|
Binary file
|
@@ -0,0 +1,115 @@
|
|
1
|
+
import cocoindex
|
2
|
+
import io
|
3
|
+
import dataclasses
|
4
|
+
import datetime
|
5
|
+
import typing
|
6
|
+
|
7
|
+
import face_recognition
|
8
|
+
from PIL import Image
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
|
12
|
+
@dataclasses.dataclass
|
13
|
+
class ImageRect:
|
14
|
+
top: int
|
15
|
+
left: int
|
16
|
+
bottom: int
|
17
|
+
right: int
|
18
|
+
|
19
|
+
|
20
|
+
@dataclasses.dataclass
|
21
|
+
class FaceBase:
|
22
|
+
"""A face in an image."""
|
23
|
+
|
24
|
+
rect: ImageRect
|
25
|
+
image: bytes
|
26
|
+
|
27
|
+
|
28
|
+
MAX_IMAGE_WIDTH = 1280
|
29
|
+
|
30
|
+
|
31
|
+
@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
|
32
|
+
def extract_faces(content: bytes) -> list[FaceBase]:
|
33
|
+
"""Extract the first pages of a PDF."""
|
34
|
+
orig_img = Image.open(io.BytesIO(content)).convert("RGB")
|
35
|
+
|
36
|
+
# The model is too slow on large images, so we resize them if too large.
|
37
|
+
if orig_img.width > MAX_IMAGE_WIDTH:
|
38
|
+
ratio = orig_img.width * 1.0 / MAX_IMAGE_WIDTH
|
39
|
+
img = orig_img.resize(
|
40
|
+
(MAX_IMAGE_WIDTH, int(orig_img.height / ratio)),
|
41
|
+
resample=Image.Resampling.BICUBIC,
|
42
|
+
)
|
43
|
+
else:
|
44
|
+
ratio = 1.0
|
45
|
+
img = orig_img
|
46
|
+
|
47
|
+
# Extract face locations.
|
48
|
+
locs = face_recognition.face_locations(np.array(img), model="cnn")
|
49
|
+
|
50
|
+
faces: list[FaceBase] = []
|
51
|
+
for top, right, bottom, left in locs:
|
52
|
+
rect = ImageRect(
|
53
|
+
left=int(left * ratio),
|
54
|
+
top=int(top * ratio),
|
55
|
+
right=int(right * ratio),
|
56
|
+
bottom=int(bottom * ratio),
|
57
|
+
)
|
58
|
+
|
59
|
+
# Crop the face and save it as a PNG.
|
60
|
+
buf = io.BytesIO()
|
61
|
+
orig_img.crop((rect.left, rect.top, rect.right, rect.bottom)).save(
|
62
|
+
buf, format="PNG"
|
63
|
+
)
|
64
|
+
face = buf.getvalue()
|
65
|
+
faces.append(FaceBase(rect, face))
|
66
|
+
|
67
|
+
return faces
|
68
|
+
|
69
|
+
|
70
|
+
@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
|
71
|
+
def extract_face_embedding(
|
72
|
+
face: bytes,
|
73
|
+
) -> cocoindex.Vector[cocoindex.Float32, typing.Literal[128]]:
|
74
|
+
"""Extract the embedding of a face."""
|
75
|
+
img = Image.open(io.BytesIO(face)).convert("RGB")
|
76
|
+
embedding = face_recognition.face_encodings(
|
77
|
+
np.array(img),
|
78
|
+
known_face_locations=[(0, img.width - 1, img.height - 1, 0)],
|
79
|
+
)[0]
|
80
|
+
return embedding
|
81
|
+
|
82
|
+
|
83
|
+
@cocoindex.flow_def(name="FaceRecognition")
|
84
|
+
def face_recognition_flow(
|
85
|
+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
86
|
+
) -> None:
|
87
|
+
"""
|
88
|
+
Define an example flow that embeds files into a vector database.
|
89
|
+
"""
|
90
|
+
data_scope["images"] = flow_builder.add_source(
|
91
|
+
cocoindex.sources.LocalFile(path="images", binary=True),
|
92
|
+
refresh_interval=datetime.timedelta(seconds=10),
|
93
|
+
)
|
94
|
+
|
95
|
+
face_embeddings = data_scope.add_collector()
|
96
|
+
|
97
|
+
with data_scope["images"].row() as image:
|
98
|
+
# Extract faces
|
99
|
+
image["faces"] = image["content"].transform(extract_faces)
|
100
|
+
|
101
|
+
with image["faces"].row() as face:
|
102
|
+
face["embedding"] = face["image"].transform(extract_face_embedding)
|
103
|
+
|
104
|
+
# Collect embeddings
|
105
|
+
face_embeddings.collect(
|
106
|
+
filename=image["filename"],
|
107
|
+
rect=face["rect"],
|
108
|
+
embedding=face["embedding"],
|
109
|
+
)
|
110
|
+
|
111
|
+
face_embeddings.export(
|
112
|
+
"face_embeddings",
|
113
|
+
cocoindex.targets.Postgres(),
|
114
|
+
primary_key_fields=["filename", "rect"],
|
115
|
+
)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[project]
|
2
|
+
name = "cocoindex-face-recognition-example"
|
3
|
+
version = "0.1.0"
|
4
|
+
description = "Build index for papers with both metadata and content embeddings"
|
5
|
+
requires-python = ">=3.11"
|
6
|
+
dependencies = [
|
7
|
+
"cocoindex>=0.1.67",
|
8
|
+
"face-recognition>=1.3.0",
|
9
|
+
"pillow>=10.0.0",
|
10
|
+
"numpy>=1.26.0",
|
11
|
+
]
|
12
|
+
|
13
|
+
[tool.setuptools]
|
14
|
+
packages = []
|
@@ -6,7 +6,7 @@ from . import functions, sources, targets, cli, utils
|
|
6
6
|
|
7
7
|
from . import targets as storages # Deprecated: Use targets instead
|
8
8
|
|
9
|
-
from .auth_registry import AuthEntryReference, add_auth_entry,
|
9
|
+
from .auth_registry import AuthEntryReference, add_auth_entry, add_transient_auth_entry
|
10
10
|
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
|
11
11
|
from .flow import flow_def
|
12
12
|
from .flow import EvaluateAndDumpOptions, GeneratedField
|
@@ -38,10 +38,12 @@ __all__ = [
|
|
38
38
|
"targets",
|
39
39
|
"storages",
|
40
40
|
"cli",
|
41
|
+
"op",
|
41
42
|
"utils",
|
42
43
|
# Auth registry
|
43
44
|
"AuthEntryReference",
|
44
45
|
"add_auth_entry",
|
46
|
+
"add_transient_auth_entry",
|
45
47
|
"ref_auth_entry",
|
46
48
|
# Flow
|
47
49
|
"FlowBuilder",
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
Auth registry is used to register and reference auth entries.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Generic, TypeVar
|
7
|
+
import threading
|
8
|
+
|
9
|
+
from . import _engine # type: ignore
|
10
|
+
from .convert import dump_engine_object
|
11
|
+
|
12
|
+
T = TypeVar("T")
|
13
|
+
|
14
|
+
# Global atomic counter for generating unique auth entry keys
|
15
|
+
_counter_lock = threading.Lock()
|
16
|
+
_auth_key_counter = 0
|
17
|
+
|
18
|
+
|
19
|
+
def _generate_auth_key() -> str:
|
20
|
+
"""Generate a unique auth entry key using a global atomic counter."""
|
21
|
+
global _auth_key_counter # pylint: disable=global-statement
|
22
|
+
with _counter_lock:
|
23
|
+
_auth_key_counter += 1
|
24
|
+
return f"__auth_{_auth_key_counter}"
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass
|
28
|
+
class TransientAuthEntryReference(Generic[T]):
|
29
|
+
"""Reference an auth entry, may or may not have a stable key."""
|
30
|
+
|
31
|
+
key: str
|
32
|
+
|
33
|
+
|
34
|
+
class AuthEntryReference(TransientAuthEntryReference[T]):
|
35
|
+
"""Reference an auth entry, with a key stable across ."""
|
36
|
+
|
37
|
+
|
38
|
+
def add_transient_auth_entry(value: T) -> TransientAuthEntryReference[T]:
|
39
|
+
"""Add an auth entry to the registry. Returns its reference."""
|
40
|
+
return add_auth_entry(_generate_auth_key(), value)
|
41
|
+
|
42
|
+
|
43
|
+
def add_auth_entry(key: str, value: T) -> AuthEntryReference[T]:
|
44
|
+
"""Add an auth entry to the registry. Returns its reference."""
|
45
|
+
_engine.add_auth_entry(key, dump_engine_object(value))
|
46
|
+
return AuthEntryReference(key)
|
47
|
+
|
48
|
+
|
49
|
+
def ref_auth_entry(key: str) -> AuthEntryReference[T]:
|
50
|
+
"""Reference an auth entry by its key."""
|
51
|
+
return AuthEntryReference(key)
|
@@ -92,10 +92,14 @@ def make_engine_value_decoder(
|
|
92
92
|
if src_type_kind == "Struct":
|
93
93
|
return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"])
|
94
94
|
if src_type_kind in TABLE_TYPES:
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
95
|
+
if src_type_kind == "LTable":
|
96
|
+
return _make_engine_ltable_to_list_dict_decoder(
|
97
|
+
field_path, src_type["row"]["fields"]
|
98
|
+
)
|
99
|
+
elif src_type_kind == "KTable":
|
100
|
+
return _make_engine_ktable_to_dict_dict_decoder(
|
101
|
+
field_path, src_type["row"]["fields"]
|
102
|
+
)
|
99
103
|
return lambda value: value
|
100
104
|
|
101
105
|
# Handle struct -> dict binding for explicit dict annotations
|
@@ -340,6 +344,77 @@ def _make_engine_struct_to_dict_decoder(
|
|
340
344
|
return decode_to_dict
|
341
345
|
|
342
346
|
|
347
|
+
def _make_engine_ltable_to_list_dict_decoder(
|
348
|
+
field_path: list[str],
|
349
|
+
src_fields: list[dict[str, Any]],
|
350
|
+
) -> Callable[[list[Any] | None], list[dict[str, Any]] | None]:
|
351
|
+
"""Make a decoder from engine LTable values to a list of dicts."""
|
352
|
+
|
353
|
+
# Create a decoder for each row (struct) to dict
|
354
|
+
row_decoder = _make_engine_struct_to_dict_decoder(field_path, src_fields)
|
355
|
+
|
356
|
+
def decode_to_list_dict(values: list[Any] | None) -> list[dict[str, Any]] | None:
|
357
|
+
if values is None:
|
358
|
+
return None
|
359
|
+
result = []
|
360
|
+
for i, row_values in enumerate(values):
|
361
|
+
decoded_row = row_decoder(row_values)
|
362
|
+
if decoded_row is None:
|
363
|
+
raise ValueError(
|
364
|
+
f"LTable row at index {i} decoded to None, which is not allowed."
|
365
|
+
)
|
366
|
+
result.append(decoded_row)
|
367
|
+
return result
|
368
|
+
|
369
|
+
return decode_to_list_dict
|
370
|
+
|
371
|
+
|
372
|
+
def _make_engine_ktable_to_dict_dict_decoder(
|
373
|
+
field_path: list[str],
|
374
|
+
src_fields: list[dict[str, Any]],
|
375
|
+
) -> Callable[[list[Any] | None], dict[Any, dict[str, Any]] | None]:
|
376
|
+
"""Make a decoder from engine KTable values to a dict of dicts."""
|
377
|
+
|
378
|
+
if not src_fields:
|
379
|
+
raise ValueError("KTable must have at least one field for the key")
|
380
|
+
|
381
|
+
# First field is the key, remaining fields are the value
|
382
|
+
key_field_schema = src_fields[0]
|
383
|
+
value_fields_schema = src_fields[1:]
|
384
|
+
|
385
|
+
# Create decoders
|
386
|
+
field_path.append(f".{key_field_schema.get('name', KEY_FIELD_NAME)}")
|
387
|
+
key_decoder = make_engine_value_decoder(field_path, key_field_schema["type"], Any)
|
388
|
+
field_path.pop()
|
389
|
+
|
390
|
+
value_decoder = _make_engine_struct_to_dict_decoder(field_path, value_fields_schema)
|
391
|
+
|
392
|
+
def decode_to_dict_dict(
|
393
|
+
values: list[Any] | None,
|
394
|
+
) -> dict[Any, dict[str, Any]] | None:
|
395
|
+
if values is None:
|
396
|
+
return None
|
397
|
+
result = {}
|
398
|
+
for row_values in values:
|
399
|
+
if not row_values:
|
400
|
+
raise ValueError("KTable row must have at least 1 value (the key)")
|
401
|
+
key = key_decoder(row_values[0])
|
402
|
+
if len(row_values) == 1:
|
403
|
+
value: dict[str, Any] = {}
|
404
|
+
else:
|
405
|
+
tmp = value_decoder(row_values[1:])
|
406
|
+
if tmp is None:
|
407
|
+
value = {}
|
408
|
+
else:
|
409
|
+
value = tmp
|
410
|
+
if isinstance(key, dict):
|
411
|
+
key = tuple(key.values())
|
412
|
+
result[key] = value
|
413
|
+
return result
|
414
|
+
|
415
|
+
return decode_to_dict_dict
|
416
|
+
|
417
|
+
|
343
418
|
def dump_engine_object(v: Any) -> Any:
|
344
419
|
"""Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
|
345
420
|
if v is None:
|