cocoindex 0.1.76__tar.gz → 0.1.77__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex-0.1.76 → cocoindex-0.1.77}/Cargo.lock +1 -1
- {cocoindex-0.1.76 → cocoindex-0.1.77}/Cargo.toml +1 -1
- {cocoindex-0.1.76 → cocoindex-0.1.77}/PKG-INFO +2 -1
- {cocoindex-0.1.76 → cocoindex-0.1.77}/README.md +1 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/data_types.mdx +20 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/functions.md +9 -9
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/pyproject.toml +1 -1
- cocoindex-0.1.77/examples/multi_format_indexing/README.md +71 -0
- cocoindex-0.1.77/examples/multi_format_indexing/main.py +135 -0
- cocoindex-0.1.77/examples/multi_format_indexing/pyproject.toml +14 -0
- cocoindex-0.1.77/examples/multi_format_indexing/source_files/cat1.jpeg +0 -0
- cocoindex-0.1.77/examples/multi_format_indexing/source_files/dog1.jpeg +0 -0
- cocoindex-0.1.77/examples/multi_format_indexing/source_files/elephant1.jpg +0 -0
- cocoindex-0.1.77/examples/multi_format_indexing/source_files/giraffe.jpg +0 -0
- cocoindex-0.1.77/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
- cocoindex-0.1.77/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
- cocoindex-0.1.77/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
- cocoindex-0.1.77/examples/text_embedding_qdrant/.env +2 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/op.py +65 -39
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_transform_flow.py +41 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/evaluator.rs +11 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.cargo/config.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.env.lib_debug +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/scripts/update_version.sh +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/CI.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/_doc_release.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/_test.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/docs.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/format.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/release.yml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/.pre-commit-config.yaml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/CODE_OF_CONDUCT.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/CONTRIBUTING.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/LICENSE +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/dev/neo4j.yaml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/dev/postgres.yaml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/about/community.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/about/contributing.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ai/llm.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/basics.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/cli.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/data_example.svg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_def.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_example.svg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_methods.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/settings.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/custom_ops/custom_functions.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/custom_ops/custom_targets.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/installation.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/markdown_files.zip +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/overview.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/quickstart.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/sources.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/targets.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/query.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/tutorials/live_updates.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docusaurus.config.ts +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/package.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/sidebars.ts +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/components/HomepageFeatures/index.tsx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/css/custom.css +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/theme/Root.js +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/.nojekyll +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/docusaurus.png +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/favicon.ico +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/icon.svg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/incremental-etl.gif +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/robots.txt +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/tsconfig.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/yarn.lock +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/.env.example +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/.env.example +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/data/bizarre_animals.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/data/chunk_norris.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/einplanck3.jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/.dockerignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/compose.yaml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/dockerfile +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/requirements.txt +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/.env.example +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/colpali_main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/index.html +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/package-lock.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/package.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/App.jsx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/main.jsx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/style.css +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/vite.config.js +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/cat1.jpeg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/dog1.jpeg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/elephant1.jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/giraffe.jpg +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/data/bizarre_animals.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/data/chunk_norris.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.76/examples/pdf_embedding → cocoindex-0.1.77/examples/multi_format_indexing}/.env +0 -0
- {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/rfc8259.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/.env.example +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/.gitignore +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/main.py +0 -0
- {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/paper_metadata/papers}/1706.03762v7.pdf +0 -0
- {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/paper_metadata/papers}/1810.04805v2.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/.env.example +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/pyproject.toml +0 -0
- {cocoindex-0.1.76/examples/product_recommendation → cocoindex-0.1.77/examples/pdf_embedding}/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76/examples/text_embedding → cocoindex-0.1.77/examples/product_recommendation}/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/img/cocoinsight.png +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/img/neo4j.png +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p1.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p2.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p3.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p4.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p5.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p6.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p7.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p8.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p9.json +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/pyproject.toml +0 -0
- {cocoindex-0.1.76/examples/text_embedding_qdrant → cocoindex-0.1.77/examples/text_embedding}/.env +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/Text_Embedding.ipynb +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/README.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/main.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/pyproject.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/__init__.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/auth_registry.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/cli.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/convert.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/flow.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/functions.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/index.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/lib.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/llm.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/py.typed +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/runtime.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/setting.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/setup.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/sources.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/targets.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/__init__.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_convert.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_optional_database.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_typing.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_validation.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/typing.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/utils.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/validation.py +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/ruff.toml +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/duration.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/field_attrs.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/json_schema.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/schema.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/spec.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/value.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/analyzed_flow.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/analyzer.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/exec_ctx.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/flow_builder.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/plan.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/db_tracking.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/db_tracking_setup.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/dumper.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/indexing_status.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/live_updater.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/memoization.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/row_indexer.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/source_indexer.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/stats.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/lib.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/lib_context.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/anthropic.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/gemini.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/litellm.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/ollama.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/openai.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/openrouter.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/vllm.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/voyage.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/factory_bases.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/embed_text.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/extract_by_llm.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/parse_json.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/split_recursively.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/test_utils.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/interface.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/py_factory.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/registration.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/registry.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sdk.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/amazon_s3.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/azure_blob.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/google_drive.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/local_file.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/kuzu.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/neo4j.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/postgres.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/qdrant.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/property_graph.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/table_columns.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/prelude.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/py/convert.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/py/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/server.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/error.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/flows.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/settings.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/auth_registry.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/components.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/db_metadata.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/driver.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/states.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/concur_control.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/db.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/fingerprint.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/immutable.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/mod.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/retryable.rs +0 -0
- {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/yaml_ser.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.77
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
@@ -214,6 +214,7 @@ It defines an index flow like this:
|
|
214
214
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
215
215
|
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
216
216
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
217
|
+
| [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
|
217
218
|
| [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
|
218
219
|
|
219
220
|
More coming and stay tuned 👀!
|
@@ -185,6 +185,7 @@ It defines an index flow like this:
|
|
185
185
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
186
186
|
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
187
187
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
188
|
+
| [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
|
188
189
|
| [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
|
189
190
|
|
190
191
|
More coming and stay tuned 👀!
|
@@ -199,3 +199,23 @@ Currently, the following types are key types
|
|
199
199
|
- *Uuid*
|
200
200
|
- *Date*
|
201
201
|
- *Struct* with all fields being key types (using `@dataclass(frozen=True)` or `NamedTuple`)
|
202
|
+
|
203
|
+
## *Null* Values
|
204
|
+
|
205
|
+
CocoIndex supports *Null* values. A *Null* value represents the absence of data or an unknown value, distinct from empty strings, zero numbers, or false boolean values.
|
206
|
+
|
207
|
+
### Nullable Type
|
208
|
+
|
209
|
+
For any data (e.g. a field of a *Struct*, an argument or return value of a CocoIndex function), if it is nullable, it means its value can be *Null*.
|
210
|
+
We use a `?` suffix to indicate a nullable type, e.g. *Str?*, *Person?*.
|
211
|
+
|
212
|
+
In Python, *Null* is represented as `None`, so a nullable type can be represented by `T | None` or `typing.Optional[T]`.
|
213
|
+
|
214
|
+
### *Null* propagating on CocoIndex functions
|
215
|
+
|
216
|
+
A function may specify whether each input argument is nullable or not.
|
217
|
+
Non-nullable argument means the function needs a known value for the argument to work.
|
218
|
+
However, it doesn't forbid the argument to be *Null* at runtime.
|
219
|
+
When a non-nullable argument receives a *Null* value, the function execution is skipped and the result is *Null*.
|
220
|
+
|
221
|
+
For example, for [`SplitRecursively` function](/docs/ops/functions#splitrecursively), the `text` and `chunk_size` arguments are not nullable. If the input value of either of them is *Null*, the function will return *Null*.
|
@@ -9,12 +9,12 @@ description: CocoIndex Built-in Functions
|
|
9
9
|
|
10
10
|
`ParseJson` parses a given text to JSON.
|
11
11
|
|
12
|
-
|
12
|
+
Input data:
|
13
13
|
|
14
|
-
* `text` (
|
15
|
-
* `language` (`
|
14
|
+
* `text` (*Str*): The source text to parse.
|
15
|
+
* `language` (*Str?*, default: `"json"`): The language of the source text. Only `json` is supported now.
|
16
16
|
|
17
|
-
Return: *Json
|
17
|
+
Return: *Json*, the parsed JSON object.
|
18
18
|
|
19
19
|
## SplitRecursively
|
20
20
|
|
@@ -37,7 +37,7 @@ Input data:
|
|
37
37
|
|
38
38
|
* `text` (*Str*): The text to split.
|
39
39
|
* `chunk_size` (*Int64*): The maximum size of each chunk, in bytes.
|
40
|
-
* `min_chunk_size` (*Int64*,
|
40
|
+
* `min_chunk_size` (*Int64*, default: `chunk_size / 2`): The minimum size of each chunk, in bytes.
|
41
41
|
|
42
42
|
:::note
|
43
43
|
|
@@ -48,8 +48,8 @@ Input data:
|
|
48
48
|
|
49
49
|
:::
|
50
50
|
|
51
|
-
* `chunk_overlap` (*Int64
|
52
|
-
* `language` (*Str*,
|
51
|
+
* `chunk_overlap` (*Int64?*, default: *Null*): The maximum overlap size between adjacent chunks, in bytes.
|
52
|
+
* `language` (*Str*, default: `""`): The language of the document.
|
53
53
|
Can be a language name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
|
54
54
|
|
55
55
|
|
@@ -61,7 +61,7 @@ Input data:
|
|
61
61
|
* `custom_languages` in the spec, against the `language_name` or `aliases` field of each entry.
|
62
62
|
* Builtin languages (see [Supported Languages](#supported-languages) section below), against the language, aliases or file extensions of each entry.
|
63
63
|
|
64
|
-
All matches are in a case-insensitive manner.
|
64
|
+
All matches are in a case-insensitive manner.
|
65
65
|
|
66
66
|
* If no match is found, the input will be treated as plain text.
|
67
67
|
|
@@ -185,7 +185,7 @@ Not all LLM APIs support text embedding. See the [LLM API Types table](/docs/ai/
|
|
185
185
|
|
186
186
|
Input data:
|
187
187
|
|
188
|
-
* `text` (*Str
|
188
|
+
* `text` (*Str*): The text to embed.
|
189
189
|
|
190
190
|
Return: *Vector[Float32, N]*, where *N* is the dimension of the embedding vector determined by the model.
|
191
191
|
|
@@ -4,7 +4,7 @@ version = "0.1.0"
|
|
4
4
|
description = "Image search examples for cocoindex: CLIP and ColPali-based embedding."
|
5
5
|
requires-python = ">=3.11"
|
6
6
|
dependencies = [
|
7
|
-
"cocoindex[colpali]>=0.1.
|
7
|
+
"cocoindex[colpali]>=0.1.76",
|
8
8
|
"python-dotenv>=1.0.1",
|
9
9
|
"fastapi>=0.100.0",
|
10
10
|
"torch>=2.0.0",
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Build visual document index from PDFs and images with ColPali
|
2
|
+
[](https://github.com/cocoindex-io/cocoindex)
|
3
|
+
|
4
|
+
|
5
|
+
In this example, we build a visual document indexing flow using ColPali for embedding PDFs and images. and query the index with natural language.
|
6
|
+
|
7
|
+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
|
8
|
+
|
9
|
+
## Steps
|
10
|
+
### Indexing Flow
|
11
|
+
|
12
|
+
1. We ingest a list of PDF files and image files from the `source_files` directory.
|
13
|
+
2. For each file:
|
14
|
+
- **PDF files**: convert each page to a high-resolution image (300 DPI)
|
15
|
+
- **Image files**: use the image directly
|
16
|
+
- Generate visual embeddings for each page/image using ColPali model
|
17
|
+
3. We will save the embeddings and metadata in Qdrant vector database.
|
18
|
+
|
19
|
+
### Query
|
20
|
+
We will match against user-provided natural language text using ColPali's text-to-visual embedding capability, enabling semantic search across visual document content.
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
## Prerequisite
|
25
|
+
[Install Qdrant](https://qdrant.tech/documentation/guides/installation/) if you don't have one running locally.
|
26
|
+
|
27
|
+
You can start Qdrant with Docker:
|
28
|
+
```bash
|
29
|
+
docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
|
30
|
+
```
|
31
|
+
|
32
|
+
## Run
|
33
|
+
|
34
|
+
Install dependencies:
|
35
|
+
|
36
|
+
```bash
|
37
|
+
pip install -e .
|
38
|
+
```
|
39
|
+
|
40
|
+
Setup:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
cocoindex setup main.py
|
44
|
+
```
|
45
|
+
|
46
|
+
Update index:
|
47
|
+
|
48
|
+
```bash
|
49
|
+
cocoindex update main.py
|
50
|
+
```
|
51
|
+
|
52
|
+
Run:
|
53
|
+
|
54
|
+
```bash
|
55
|
+
python main.py
|
56
|
+
```
|
57
|
+
|
58
|
+
## About ColPali
|
59
|
+
This example uses [ColPali](https://github.com/illuin-tech/colpali), a state-of-the-art vision-language model that enables:
|
60
|
+
- Direct visual understanding of document layouts, tables, and figures
|
61
|
+
- Natural language queries against visual document content
|
62
|
+
- No need for OCR or text extraction - works directly with document images
|
63
|
+
|
64
|
+
## CocoInsight
|
65
|
+
I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight:
|
66
|
+
|
67
|
+
```
|
68
|
+
cocoindex server -ci main.py
|
69
|
+
```
|
70
|
+
|
71
|
+
Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
|
@@ -0,0 +1,135 @@
|
|
1
|
+
import cocoindex
|
2
|
+
import os
|
3
|
+
import mimetypes
|
4
|
+
|
5
|
+
from dotenv import load_dotenv
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from pdf2image import convert_from_bytes
|
8
|
+
from io import BytesIO
|
9
|
+
|
10
|
+
from qdrant_client import QdrantClient
|
11
|
+
|
12
|
+
QDRANT_GRPC_URL = "http://localhost:6334"
|
13
|
+
QDRANT_COLLECTION = "MultiFormatIndexings"
|
14
|
+
COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class Page:
|
19
|
+
page_number: int | None
|
20
|
+
image: bytes
|
21
|
+
|
22
|
+
|
23
|
+
@cocoindex.op.function()
|
24
|
+
def file_to_pages(filename: str, content: bytes) -> list[Page]:
|
25
|
+
"""
|
26
|
+
Classify file content based on MIME type detection.
|
27
|
+
Returns ClassifiedFileContent with appropriate field populated based on file type.
|
28
|
+
"""
|
29
|
+
# Guess the MIME type based on the filename
|
30
|
+
mime_type, _ = mimetypes.guess_type(filename)
|
31
|
+
|
32
|
+
if mime_type == "application/pdf":
|
33
|
+
images = convert_from_bytes(content, dpi=300)
|
34
|
+
pages = []
|
35
|
+
for i, image in enumerate(images):
|
36
|
+
with BytesIO() as buffer:
|
37
|
+
image.save(buffer, format="PNG")
|
38
|
+
pages.append(Page(page_number=i + 1, image=buffer.getvalue()))
|
39
|
+
return pages
|
40
|
+
elif mime_type and mime_type.startswith("image/"):
|
41
|
+
return [Page(page_number=None, image=content)]
|
42
|
+
else:
|
43
|
+
return []
|
44
|
+
|
45
|
+
|
46
|
+
qdrant_connection = cocoindex.add_auth_entry(
|
47
|
+
"qdrant_connection",
|
48
|
+
cocoindex.targets.QdrantConnection(grpc_url=QDRANT_GRPC_URL),
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
@cocoindex.flow_def(name="MultiFormatIndexing")
|
53
|
+
def multi_format_indexing_flow(
|
54
|
+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
|
55
|
+
) -> None:
|
56
|
+
"""
|
57
|
+
Define an example flow that embeds files into a vector database.
|
58
|
+
"""
|
59
|
+
data_scope["documents"] = flow_builder.add_source(
|
60
|
+
cocoindex.sources.LocalFile(path="source_files", binary=True)
|
61
|
+
)
|
62
|
+
|
63
|
+
output_embeddings = data_scope.add_collector()
|
64
|
+
|
65
|
+
with data_scope["documents"].row() as doc:
|
66
|
+
doc["pages"] = flow_builder.transform(
|
67
|
+
file_to_pages, filename=doc["filename"], content=doc["content"]
|
68
|
+
)
|
69
|
+
with doc["pages"].row() as page:
|
70
|
+
page["embedding"] = page["image"].transform(
|
71
|
+
cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
|
72
|
+
)
|
73
|
+
output_embeddings.collect(
|
74
|
+
id=cocoindex.GeneratedField.UUID,
|
75
|
+
filename=doc["filename"],
|
76
|
+
page=page["page_number"],
|
77
|
+
embedding=page["embedding"],
|
78
|
+
)
|
79
|
+
|
80
|
+
output_embeddings.export(
|
81
|
+
"multi_format_indexings",
|
82
|
+
cocoindex.targets.Qdrant(
|
83
|
+
connection=qdrant_connection,
|
84
|
+
collection_name=QDRANT_COLLECTION,
|
85
|
+
),
|
86
|
+
primary_key_fields=["id"],
|
87
|
+
)
|
88
|
+
|
89
|
+
|
90
|
+
@cocoindex.transform_flow()
|
91
|
+
def query_to_colpali_embedding(
|
92
|
+
text: cocoindex.DataSlice[str],
|
93
|
+
) -> cocoindex.DataSlice[list[list[float]]]:
|
94
|
+
return text.transform(
|
95
|
+
cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
def _main() -> None:
|
100
|
+
# Initialize Qdrant client
|
101
|
+
client = QdrantClient(url=QDRANT_GRPC_URL, prefer_grpc=True)
|
102
|
+
|
103
|
+
# Run queries in a loop to demonstrate the query capabilities.
|
104
|
+
while True:
|
105
|
+
query = input("Enter search query (or Enter to quit): ")
|
106
|
+
if query == "":
|
107
|
+
break
|
108
|
+
|
109
|
+
# Get the embedding for the query
|
110
|
+
query_embedding = query_to_colpali_embedding.eval(query)
|
111
|
+
|
112
|
+
search_results = client.query_points(
|
113
|
+
collection_name=QDRANT_COLLECTION,
|
114
|
+
query=query_embedding, # Multi-vector format: list[list[float]]
|
115
|
+
using="embedding", # Specify the vector field name
|
116
|
+
limit=5,
|
117
|
+
with_payload=True,
|
118
|
+
)
|
119
|
+
print("\nSearch results:")
|
120
|
+
for result in search_results.points:
|
121
|
+
score = result.score
|
122
|
+
payload = result.payload
|
123
|
+
if payload is None:
|
124
|
+
continue
|
125
|
+
page_number = payload["page"]
|
126
|
+
page_number_str = f"Page:{page_number}" if page_number is not None else ""
|
127
|
+
print(f"[{score:.3f}] {payload['filename']} {page_number_str}")
|
128
|
+
print("---")
|
129
|
+
print()
|
130
|
+
|
131
|
+
|
132
|
+
if __name__ == "__main__":
|
133
|
+
load_dotenv()
|
134
|
+
cocoindex.init()
|
135
|
+
_main()
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[project]
|
2
|
+
name = "pdf-embedding"
|
3
|
+
version = "0.1.0"
|
4
|
+
description = "Simple example for cocoindex: build embedding index based on local PDF files."
|
5
|
+
requires-python = ">=3.11"
|
6
|
+
dependencies = [
|
7
|
+
"cocoindex[colpali]>=0.1.76",
|
8
|
+
"python-dotenv>=1.0.1",
|
9
|
+
"pdf2image>=1.17.0",
|
10
|
+
"qdrant-client>=1.15.0",
|
11
|
+
]
|
12
|
+
|
13
|
+
[tool.setuptools]
|
14
|
+
packages = []
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -114,8 +114,8 @@ class _FunctionExecutorFactory:
|
|
114
114
|
) -> tuple[dict[str, Any], Executor]:
|
115
115
|
spec = _load_spec_from_engine(self._spec_cls, spec)
|
116
116
|
executor = self._executor_cls(spec)
|
117
|
-
result_type = executor.
|
118
|
-
return (
|
117
|
+
result_type = executor.analyze_schema(*args, **kwargs)
|
118
|
+
return (result_type, executor)
|
119
119
|
|
120
120
|
|
121
121
|
_gpu_dispatch_lock = asyncio.Lock()
|
@@ -156,6 +156,12 @@ def _to_async_call(call: Callable[..., Any]) -> Callable[..., Awaitable[Any]]:
|
|
156
156
|
return lambda *args, **kwargs: asyncio.to_thread(lambda: call(*args, **kwargs))
|
157
157
|
|
158
158
|
|
159
|
+
@dataclasses.dataclass
|
160
|
+
class _ArgInfo:
|
161
|
+
decoder: Callable[[Any], Any]
|
162
|
+
is_required: bool
|
163
|
+
|
164
|
+
|
159
165
|
def _register_op_factory(
|
160
166
|
category: OpCategory,
|
161
167
|
expected_args: list[tuple[str, inspect.Parameter]],
|
@@ -176,8 +182,8 @@ def _register_op_factory(
|
|
176
182
|
return op_args.behavior_version
|
177
183
|
|
178
184
|
class _WrappedClass(executor_cls, _Fallback): # type: ignore[misc]
|
179
|
-
|
180
|
-
|
185
|
+
_args_info: list[_ArgInfo]
|
186
|
+
_kwargs_info: dict[str, _ArgInfo]
|
181
187
|
_acall: Callable[..., Awaitable[Any]]
|
182
188
|
|
183
189
|
def __init__(self, spec: Any) -> None:
|
@@ -185,28 +191,45 @@ def _register_op_factory(
|
|
185
191
|
self.spec = spec
|
186
192
|
self._acall = _to_async_call(super().__call__)
|
187
193
|
|
188
|
-
def
|
194
|
+
def analyze_schema(
|
189
195
|
self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema
|
190
196
|
) -> Any:
|
191
197
|
"""
|
192
198
|
Analyze the spec and arguments. In this phase, argument types should be validated.
|
193
199
|
It should return the expected result type for the current op.
|
194
200
|
"""
|
195
|
-
self.
|
196
|
-
self.
|
201
|
+
self._args_info = []
|
202
|
+
self._kwargs_info = {}
|
197
203
|
attributes = []
|
198
|
-
|
199
|
-
|
204
|
+
potentially_missing_required_arg = False
|
205
|
+
|
206
|
+
def process_arg(
|
207
|
+
arg_name: str,
|
208
|
+
arg_param: inspect.Parameter,
|
209
|
+
actual_arg: _engine.OpArgSchema,
|
210
|
+
) -> _ArgInfo:
|
211
|
+
nonlocal potentially_missing_required_arg
|
200
212
|
if op_args.arg_relationship is not None:
|
201
213
|
related_attr, related_arg_name = op_args.arg_relationship
|
202
214
|
if related_arg_name == arg_name:
|
203
215
|
attributes.append(
|
204
|
-
TypeAttr(related_attr.value,
|
216
|
+
TypeAttr(related_attr.value, actual_arg.analyzed_value)
|
205
217
|
)
|
218
|
+
type_info = analyze_type_info(arg_param.annotation)
|
219
|
+
decoder = make_engine_value_decoder(
|
220
|
+
[arg_name], actual_arg.value_type["type"], type_info
|
221
|
+
)
|
222
|
+
is_required = not type_info.nullable
|
223
|
+
if is_required and actual_arg.value_type.get("nullable", False):
|
224
|
+
potentially_missing_required_arg = True
|
225
|
+
return _ArgInfo(
|
226
|
+
decoder=decoder,
|
227
|
+
is_required=is_required,
|
228
|
+
)
|
206
229
|
|
207
230
|
# Match arguments with parameters.
|
208
231
|
next_param_idx = 0
|
209
|
-
for
|
232
|
+
for actual_arg in args:
|
210
233
|
if next_param_idx >= len(expected_args):
|
211
234
|
raise ValueError(
|
212
235
|
f"Too many arguments passed in: {len(args)} > {len(expected_args)}"
|
@@ -219,20 +242,13 @@ def _register_op_factory(
|
|
219
242
|
raise ValueError(
|
220
243
|
f"Too many positional arguments passed in: {len(args)} > {next_param_idx}"
|
221
244
|
)
|
222
|
-
self.
|
223
|
-
make_engine_value_decoder(
|
224
|
-
[arg_name],
|
225
|
-
arg.value_type["type"],
|
226
|
-
analyze_type_info(arg_param.annotation),
|
227
|
-
)
|
228
|
-
)
|
229
|
-
process_attribute(arg_name, arg)
|
245
|
+
self._args_info.append(process_arg(arg_name, arg_param, actual_arg))
|
230
246
|
if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
|
231
247
|
next_param_idx += 1
|
232
248
|
|
233
249
|
expected_kwargs = expected_args[next_param_idx:]
|
234
250
|
|
235
|
-
for kwarg_name,
|
251
|
+
for kwarg_name, actual_arg in kwargs.items():
|
236
252
|
expected_arg = next(
|
237
253
|
(
|
238
254
|
arg
|
@@ -254,12 +270,9 @@ def _register_op_factory(
|
|
254
270
|
f"Unexpected keyword argument passed in: {kwarg_name}"
|
255
271
|
)
|
256
272
|
arg_param = expected_arg[1]
|
257
|
-
self.
|
258
|
-
|
259
|
-
kwarg.value_type["type"],
|
260
|
-
analyze_type_info(arg_param.annotation),
|
273
|
+
self._kwargs_info[kwarg_name] = process_arg(
|
274
|
+
kwarg_name, arg_param, actual_arg
|
261
275
|
)
|
262
|
-
process_attribute(kwarg_name, kwarg)
|
263
276
|
|
264
277
|
missing_args = [
|
265
278
|
name
|
@@ -280,32 +293,45 @@ def _register_op_factory(
|
|
280
293
|
if len(missing_args) > 0:
|
281
294
|
raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
|
282
295
|
|
283
|
-
|
284
|
-
if
|
285
|
-
result =
|
296
|
+
base_analyze_method = getattr(self, "analyze", None)
|
297
|
+
if base_analyze_method is not None:
|
298
|
+
result = base_analyze_method(*args, **kwargs)
|
286
299
|
else:
|
287
300
|
result = expected_return
|
288
301
|
if len(attributes) > 0:
|
289
302
|
result = Annotated[result, *attributes]
|
290
|
-
|
303
|
+
|
304
|
+
encoded_type = encode_enriched_type(result)
|
305
|
+
if potentially_missing_required_arg:
|
306
|
+
encoded_type["nullable"] = True
|
307
|
+
return encoded_type
|
291
308
|
|
292
309
|
async def prepare(self) -> None:
|
293
310
|
"""
|
294
311
|
Prepare for execution.
|
295
312
|
It's executed after `analyze` and before any `__call__` execution.
|
296
313
|
"""
|
297
|
-
|
298
|
-
if
|
299
|
-
await _to_async_call(
|
314
|
+
prepare_method = getattr(super(), "prepare", None)
|
315
|
+
if prepare_method is not None:
|
316
|
+
await _to_async_call(prepare_method)()
|
300
317
|
|
301
318
|
async def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
302
|
-
decoded_args =
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
}
|
319
|
+
decoded_args = []
|
320
|
+
for arg_info, arg in zip(self._args_info, args):
|
321
|
+
if arg_info.is_required and arg is None:
|
322
|
+
return None
|
323
|
+
decoded_args.append(arg_info.decoder(arg))
|
324
|
+
|
325
|
+
decoded_kwargs = {}
|
326
|
+
for kwarg_name, arg in kwargs.items():
|
327
|
+
kwarg_info = self._kwargs_info.get(kwarg_name)
|
328
|
+
if kwarg_info is None:
|
329
|
+
raise ValueError(
|
330
|
+
f"Unexpected keyword argument passed in: {kwarg_name}"
|
331
|
+
)
|
332
|
+
if kwarg_info.is_required and arg is None:
|
333
|
+
return None
|
334
|
+
decoded_kwargs[kwarg_name] = kwarg_info.decoder(arg)
|
309
335
|
|
310
336
|
if op_args.gpu:
|
311
337
|
# For GPU executions, data-level parallelism is applied, so we don't want to
|
@@ -101,3 +101,44 @@ async def test_for_each_transform_flow_async() -> None:
|
|
101
101
|
}
|
102
102
|
|
103
103
|
assert result == expected, f"Expected {expected}, got {result}"
|
104
|
+
|
105
|
+
|
106
|
+
def test_none_arg_yield_none_result() -> None:
|
107
|
+
"""Test that None arguments yield None results."""
|
108
|
+
|
109
|
+
@cocoindex.op.function()
|
110
|
+
def custom_fn(
|
111
|
+
required_arg: int,
|
112
|
+
optional_arg: int | None,
|
113
|
+
required_kwarg: int,
|
114
|
+
optional_kwarg: int | None,
|
115
|
+
) -> int:
|
116
|
+
return (
|
117
|
+
required_arg + (optional_arg or 0) + required_kwarg + (optional_kwarg or 0)
|
118
|
+
)
|
119
|
+
|
120
|
+
@cocoindex.transform_flow()
|
121
|
+
def transform_flow(
|
122
|
+
required_arg: cocoindex.DataSlice[int | None],
|
123
|
+
optional_arg: cocoindex.DataSlice[int | None],
|
124
|
+
required_kwarg: cocoindex.DataSlice[int | None],
|
125
|
+
optional_kwarg: cocoindex.DataSlice[int | None],
|
126
|
+
) -> cocoindex.DataSlice[int | None]:
|
127
|
+
return required_arg.transform(
|
128
|
+
custom_fn,
|
129
|
+
optional_arg,
|
130
|
+
required_kwarg=required_kwarg,
|
131
|
+
optional_kwarg=optional_kwarg,
|
132
|
+
)
|
133
|
+
|
134
|
+
result = transform_flow.eval(1, 2, 4, 8)
|
135
|
+
assert result == 15, f"Expected 15, got {result}"
|
136
|
+
|
137
|
+
result = transform_flow.eval(1, None, 4, None)
|
138
|
+
assert result == 5, f"Expected 5, got {result}"
|
139
|
+
|
140
|
+
result = transform_flow.eval(None, 2, 4, 8)
|
141
|
+
assert result is None, f"Expected None, got {result}"
|
142
|
+
|
143
|
+
result = transform_flow.eval(1, 2, None, None)
|
144
|
+
assert result is None, f"Expected None, got {result}"
|
@@ -530,6 +530,17 @@ pub async fn evaluate_source_entry(
|
|
530
530
|
value::Value::KTable(BTreeMap::from([(src_eval_ctx.key.clone(), scope_value)])),
|
531
531
|
)?;
|
532
532
|
|
533
|
+
// Fill other source fields with empty tables
|
534
|
+
for import_op in src_eval_ctx.plan.import_ops.iter() {
|
535
|
+
let field_idx = import_op.output.field_idx;
|
536
|
+
if field_idx != src_eval_ctx.import_op.output.field_idx {
|
537
|
+
root_scope_entry.define_field(
|
538
|
+
&AnalyzedOpOutput { field_idx },
|
539
|
+
&value::Value::KTable(BTreeMap::new()),
|
540
|
+
)?;
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
533
544
|
evaluate_op_scope(
|
534
545
|
&src_eval_ctx.plan.op_scope,
|
535
546
|
RefList::Nil.prepend(&root_scope_entry),
|
File without changes
|
File without changes
|
{cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md"
RENAMED
File without changes
|
{cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|