cocoindex 0.1.76__tar.gz → 0.1.77__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {cocoindex-0.1.76 → cocoindex-0.1.77}/Cargo.lock +1 -1
  2. {cocoindex-0.1.76 → cocoindex-0.1.77}/Cargo.toml +1 -1
  3. {cocoindex-0.1.76 → cocoindex-0.1.77}/PKG-INFO +2 -1
  4. {cocoindex-0.1.76 → cocoindex-0.1.77}/README.md +1 -0
  5. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/data_types.mdx +20 -0
  6. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/functions.md +9 -9
  7. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/pyproject.toml +1 -1
  8. cocoindex-0.1.77/examples/multi_format_indexing/README.md +71 -0
  9. cocoindex-0.1.77/examples/multi_format_indexing/main.py +135 -0
  10. cocoindex-0.1.77/examples/multi_format_indexing/pyproject.toml +14 -0
  11. cocoindex-0.1.77/examples/multi_format_indexing/source_files/cat1.jpeg +0 -0
  12. cocoindex-0.1.77/examples/multi_format_indexing/source_files/dog1.jpeg +0 -0
  13. cocoindex-0.1.77/examples/multi_format_indexing/source_files/elephant1.jpg +0 -0
  14. cocoindex-0.1.77/examples/multi_format_indexing/source_files/giraffe.jpg +0 -0
  15. cocoindex-0.1.77/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  16. cocoindex-0.1.77/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  17. cocoindex-0.1.77/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  18. cocoindex-0.1.77/examples/text_embedding_qdrant/.env +2 -0
  19. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/op.py +65 -39
  20. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_transform_flow.py +41 -0
  21. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/evaluator.rs +11 -0
  22. {cocoindex-0.1.76 → cocoindex-0.1.77}/.cargo/config.toml +0 -0
  23. {cocoindex-0.1.76 → cocoindex-0.1.77}/.env.lib_debug +0 -0
  24. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  25. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  26. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/scripts/update_version.sh +0 -0
  27. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/CI.yml +0 -0
  28. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/_doc_release.yml +0 -0
  29. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/_test.yml +0 -0
  30. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/docs.yml +0 -0
  31. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/format.yml +0 -0
  32. {cocoindex-0.1.76 → cocoindex-0.1.77}/.github/workflows/release.yml +0 -0
  33. {cocoindex-0.1.76 → cocoindex-0.1.77}/.gitignore +0 -0
  34. {cocoindex-0.1.76 → cocoindex-0.1.77}/.pre-commit-config.yaml +0 -0
  35. {cocoindex-0.1.76 → cocoindex-0.1.77}/CODE_OF_CONDUCT.md +0 -0
  36. {cocoindex-0.1.76 → cocoindex-0.1.77}/CONTRIBUTING.md +0 -0
  37. {cocoindex-0.1.76 → cocoindex-0.1.77}/LICENSE +0 -0
  38. {cocoindex-0.1.76 → cocoindex-0.1.77}/dev/neo4j.yaml +0 -0
  39. {cocoindex-0.1.76 → cocoindex-0.1.77}/dev/postgres.yaml +0 -0
  40. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/.gitignore +0 -0
  41. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/README.md +0 -0
  42. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/about/community.md +0 -0
  43. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/about/contributing.md +0 -0
  44. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ai/llm.mdx +0 -0
  45. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/basics.md +0 -0
  46. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/cli.mdx +0 -0
  47. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/data_example.svg +0 -0
  48. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_def.mdx +0 -0
  49. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_example.svg +0 -0
  50. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/flow_methods.mdx +0 -0
  51. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/core/settings.mdx +0 -0
  52. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/custom_ops/custom_functions.mdx +0 -0
  53. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/custom_ops/custom_targets.mdx +0 -0
  54. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/installation.md +0 -0
  55. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/markdown_files.zip +0 -0
  56. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/overview.md +0 -0
  57. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/getting_started/quickstart.md +0 -0
  58. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/sources.md +0 -0
  59. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/ops/targets.md +0 -0
  60. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/query.mdx +0 -0
  61. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/tutorials/live_updates.md +0 -0
  62. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
  63. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/docusaurus.config.ts +0 -0
  64. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/package.json +0 -0
  65. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/sidebars.ts +0 -0
  66. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  67. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  68. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/css/custom.css +0 -0
  69. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/src/theme/Root.js +0 -0
  70. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/.nojekyll +0 -0
  71. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/docusaurus.png +0 -0
  72. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/favicon.ico +0 -0
  73. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/icon.svg +0 -0
  74. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/img/incremental-etl.gif +0 -0
  75. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/static/robots.txt +0 -0
  76. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/tsconfig.json +0 -0
  77. {cocoindex-0.1.76 → cocoindex-0.1.77}/docs/yarn.lock +0 -0
  78. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/.env.example +0 -0
  79. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/.gitignore +0 -0
  80. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/README.md +0 -0
  81. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/main.py +0 -0
  82. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  83. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/.env.example +0 -0
  84. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/.gitignore +0 -0
  85. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/README.md +0 -0
  86. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/main.py +0 -0
  87. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/azure_blob_embedding/pyproject.toml +0 -0
  88. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/.env +0 -0
  89. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/README.md +0 -0
  90. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/main.py +0 -0
  91. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/code_embedding/pyproject.toml +0 -0
  92. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/.env +0 -0
  93. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/.gitignore +0 -0
  94. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/README.md +0 -0
  95. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/data/bizarre_animals.md +0 -0
  96. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/data/chunk_norris.md +0 -0
  97. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/main.py +0 -0
  98. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/custom_output_files/pyproject.toml +0 -0
  99. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/.env +0 -0
  100. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/README.md +0 -0
  101. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/main.py +0 -0
  102. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  103. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/.env +0 -0
  104. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/README.md +0 -0
  105. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
  106. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
  107. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
  108. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/images/einplanck3.jpg +0 -0
  109. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/main.py +0 -0
  110. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/face_recognition/pyproject.toml +0 -0
  111. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/.dockerignore +0 -0
  112. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/.env +0 -0
  113. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/README.md +0 -0
  114. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/compose.yaml +0 -0
  115. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/dockerfile +0 -0
  116. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  117. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/main.py +0 -0
  118. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/fastapi_server_docker/requirements.txt +0 -0
  119. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/.env.example +0 -0
  120. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/.gitignore +0 -0
  121. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/README.md +0 -0
  122. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/main.py +0 -0
  123. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  124. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/.env +0 -0
  125. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/README.md +0 -0
  126. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/colpali_main.py +0 -0
  127. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/.gitignore +0 -0
  128. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/index.html +0 -0
  129. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/package-lock.json +0 -0
  130. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/package.json +0 -0
  131. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/App.jsx +0 -0
  132. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/main.jsx +0 -0
  133. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/src/style.css +0 -0
  134. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/frontend/vite.config.js +0 -0
  135. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/cat1.jpeg +0 -0
  136. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/dog1.jpeg +0 -0
  137. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/elephant1.jpg +0 -0
  138. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/img/giraffe.jpg +0 -0
  139. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/image_search/main.py +0 -0
  140. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/.env +0 -0
  141. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/README.md +0 -0
  142. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/data/bizarre_animals.md +0 -0
  143. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/data/chunk_norris.md +0 -0
  144. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/main.py +0 -0
  145. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/live_updates/pyproject.toml +0 -0
  146. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/.env +0 -0
  147. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/README.md +0 -0
  148. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/main.py +0 -0
  149. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  150. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  151. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  152. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  153. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  154. {cocoindex-0.1.76/examples/pdf_embedding → cocoindex-0.1.77/examples/multi_format_indexing}/.env +0 -0
  155. {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/1706.03762v7.pdf +0 -0
  156. {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/1810.04805v2.pdf +0 -0
  157. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/multi_format_indexing/source_files}/rfc8259.pdf +0 -0
  158. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/.env.example +0 -0
  159. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/.gitignore +0 -0
  160. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/README.md +0 -0
  161. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/main.py +0 -0
  162. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/paper_metadata/papers}/1706.03762v7.pdf +0 -0
  163. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.77/examples/paper_metadata/papers}/1810.04805v2.pdf +0 -0
  164. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  165. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  166. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/paper_metadata/pyproject.toml +0 -0
  167. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/.env.example +0 -0
  168. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/README.md +0 -0
  169. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/README.md +0 -0
  170. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  171. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  172. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  173. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  174. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/main.py +0 -0
  175. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/patient_intake_extraction/pyproject.toml +0 -0
  176. {cocoindex-0.1.76/examples/product_recommendation → cocoindex-0.1.77/examples/pdf_embedding}/.env +0 -0
  177. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/README.md +0 -0
  178. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/main.py +0 -0
  179. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/pdf_embedding/pyproject.toml +0 -0
  180. {cocoindex-0.1.76/examples/text_embedding → cocoindex-0.1.77/examples/product_recommendation}/.env +0 -0
  181. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/README.md +0 -0
  182. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/img/cocoinsight.png +0 -0
  183. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/img/neo4j.png +0 -0
  184. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/main.py +0 -0
  185. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p1.json +0 -0
  186. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p2.json +0 -0
  187. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p3.json +0 -0
  188. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p4.json +0 -0
  189. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p5.json +0 -0
  190. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p6.json +0 -0
  191. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p7.json +0 -0
  192. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p8.json +0 -0
  193. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/products/p9.json +0 -0
  194. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/product_recommendation/pyproject.toml +0 -0
  195. {cocoindex-0.1.76/examples/text_embedding_qdrant → cocoindex-0.1.77/examples/text_embedding}/.env +0 -0
  196. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/README.md +0 -0
  197. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  198. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/main.py +0 -0
  199. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  200. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  201. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  202. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding/pyproject.toml +0 -0
  203. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/README.md +0 -0
  204. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/main.py +0 -0
  205. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  206. {cocoindex-0.1.76 → cocoindex-0.1.77}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  207. {cocoindex-0.1.76 → cocoindex-0.1.77}/pyproject.toml +0 -0
  208. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/__init__.py +0 -0
  209. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/auth_registry.py +0 -0
  210. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/cli.py +0 -0
  211. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/convert.py +0 -0
  212. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/flow.py +0 -0
  213. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/functions.py +0 -0
  214. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/index.py +0 -0
  215. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/lib.py +0 -0
  216. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/llm.py +0 -0
  217. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/py.typed +0 -0
  218. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/runtime.py +0 -0
  219. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/setting.py +0 -0
  220. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/setup.py +0 -0
  221. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/sources.py +0 -0
  222. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/targets.py +0 -0
  223. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/__init__.py +0 -0
  224. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_convert.py +0 -0
  225. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_optional_database.py +0 -0
  226. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_typing.py +0 -0
  227. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/tests/test_validation.py +0 -0
  228. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/typing.py +0 -0
  229. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/utils.py +0 -0
  230. {cocoindex-0.1.76 → cocoindex-0.1.77}/python/cocoindex/validation.py +0 -0
  231. {cocoindex-0.1.76 → cocoindex-0.1.77}/ruff.toml +0 -0
  232. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/duration.rs +0 -0
  233. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/field_attrs.rs +0 -0
  234. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/json_schema.rs +0 -0
  235. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/mod.rs +0 -0
  236. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/schema.rs +0 -0
  237. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/spec.rs +0 -0
  238. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/base/value.rs +0 -0
  239. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/analyzed_flow.rs +0 -0
  240. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/analyzer.rs +0 -0
  241. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/exec_ctx.rs +0 -0
  242. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/flow_builder.rs +0 -0
  243. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/mod.rs +0 -0
  244. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/builder/plan.rs +0 -0
  245. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/db_tracking.rs +0 -0
  246. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/db_tracking_setup.rs +0 -0
  247. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/dumper.rs +0 -0
  248. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/indexing_status.rs +0 -0
  249. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/live_updater.rs +0 -0
  250. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/memoization.rs +0 -0
  251. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/mod.rs +0 -0
  252. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/row_indexer.rs +0 -0
  253. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/source_indexer.rs +0 -0
  254. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/execution/stats.rs +0 -0
  255. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/lib.rs +0 -0
  256. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/lib_context.rs +0 -0
  257. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/anthropic.rs +0 -0
  258. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/gemini.rs +0 -0
  259. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/litellm.rs +0 -0
  260. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/mod.rs +0 -0
  261. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/ollama.rs +0 -0
  262. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/openai.rs +0 -0
  263. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/openrouter.rs +0 -0
  264. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/vllm.rs +0 -0
  265. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/llm/voyage.rs +0 -0
  266. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/factory_bases.rs +0 -0
  267. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/embed_text.rs +0 -0
  268. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/extract_by_llm.rs +0 -0
  269. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/mod.rs +0 -0
  270. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/parse_json.rs +0 -0
  271. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/split_recursively.rs +0 -0
  272. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/functions/test_utils.rs +0 -0
  273. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/interface.rs +0 -0
  274. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/mod.rs +0 -0
  275. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/py_factory.rs +0 -0
  276. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/registration.rs +0 -0
  277. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/registry.rs +0 -0
  278. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sdk.rs +0 -0
  279. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/amazon_s3.rs +0 -0
  280. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/azure_blob.rs +0 -0
  281. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/google_drive.rs +0 -0
  282. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/local_file.rs +0 -0
  283. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/sources/mod.rs +0 -0
  284. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/kuzu.rs +0 -0
  285. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/mod.rs +0 -0
  286. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/neo4j.rs +0 -0
  287. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/postgres.rs +0 -0
  288. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/qdrant.rs +0 -0
  289. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/mod.rs +0 -0
  290. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/property_graph.rs +0 -0
  291. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/ops/targets/shared/table_columns.rs +0 -0
  292. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/prelude.rs +0 -0
  293. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/py/convert.rs +0 -0
  294. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/py/mod.rs +0 -0
  295. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/server.rs +0 -0
  296. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/error.rs +0 -0
  297. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/flows.rs +0 -0
  298. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/service/mod.rs +0 -0
  299. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/settings.rs +0 -0
  300. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/auth_registry.rs +0 -0
  301. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/components.rs +0 -0
  302. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/db_metadata.rs +0 -0
  303. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/driver.rs +0 -0
  304. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/mod.rs +0 -0
  305. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/setup/states.rs +0 -0
  306. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/concur_control.rs +0 -0
  307. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/db.rs +0 -0
  308. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/fingerprint.rs +0 -0
  309. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/immutable.rs +0 -0
  310. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/mod.rs +0 -0
  311. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/retryable.rs +0 -0
  312. {cocoindex-0.1.76 → cocoindex-0.1.77}/src/utils/yaml_ser.rs +0 -0
@@ -1297,7 +1297,7 @@ dependencies = [
1297
1297
 
1298
1298
  [[package]]
1299
1299
  name = "cocoindex"
1300
- version = "0.1.76"
1300
+ version = "0.1.77"
1301
1301
  dependencies = [
1302
1302
  "anyhow",
1303
1303
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.76"
5
+ version = "0.1.77"
6
6
  edition = "2024"
7
7
  rust-version = "1.88"
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.76
3
+ Version: 0.1.77
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -214,6 +214,7 @@ It defines an index flow like this:
214
214
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
215
215
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
216
216
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
217
+ | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
217
218
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
218
219
 
219
220
  More coming and stay tuned 👀!
@@ -185,6 +185,7 @@ It defines an index flow like this:
185
185
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
186
186
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
187
187
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
188
+ | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
188
189
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
189
190
 
190
191
  More coming and stay tuned 👀!
@@ -199,3 +199,23 @@ Currently, the following types are key types
199
199
  - *Uuid*
200
200
  - *Date*
201
201
  - *Struct* with all fields being key types (using `@dataclass(frozen=True)` or `NamedTuple`)
202
+
203
+ ## *Null* Values
204
+
205
+ CocoIndex supports *Null* values. A *Null* value represents the absence of data or an unknown value, distinct from empty strings, zero numbers, or false boolean values.
206
+
207
+ ### Nullable Type
208
+
209
+ For any data (e.g. a field of a *Struct*, an argument or return value of a CocoIndex function), if it is nullable, it means its value can be *Null*.
210
+ We use a `?` suffix to indicate a nullable type, e.g. *Str?*, *Person?*.
211
+
212
+ In Python, *Null* is represented as `None`, so a nullable type can be represented by `T | None` or `typing.Optional[T]`.
213
+
214
+ ### *Null* propagating on CocoIndex functions
215
+
216
+ A function may specify whether each input argument is nullable or not.
217
+ Non-nullable argument means the function needs a known value for the argument to work.
218
+ However, it doesn't forbid the argument to be *Null* at runtime.
219
+ When a non-nullable argument receives a *Null* value, the function execution is skipped and the result is *Null*.
220
+
221
+ For example, for [`SplitRecursively` function](/docs/ops/functions#splitrecursively), the `text` and `chunk_size` arguments are not nullable. If the input value of either of them is *Null*, the function will return *Null*.
@@ -9,12 +9,12 @@ description: CocoIndex Built-in Functions
9
9
 
10
10
  `ParseJson` parses a given text to JSON.
11
11
 
12
- The spec takes the following fields:
12
+ Input data:
13
13
 
14
- * `text` (`str`): The source text to parse.
15
- * `language` (`str`, optional): The language of the source text. Only `json` is supported now. Default to `json`.
14
+ * `text` (*Str*): The source text to parse.
15
+ * `language` (*Str?*, default: `"json"`): The language of the source text. Only `json` is supported now.
16
16
 
17
- Return: *Json*
17
+ Return: *Json*, the parsed JSON object.
18
18
 
19
19
  ## SplitRecursively
20
20
 
@@ -37,7 +37,7 @@ Input data:
37
37
 
38
38
  * `text` (*Str*): The text to split.
39
39
  * `chunk_size` (*Int64*): The maximum size of each chunk, in bytes.
40
- * `min_chunk_size` (*Int64*, optional): The minimum size of each chunk, in bytes. If not provided, default to `chunk_size / 2`.
40
+ * `min_chunk_size` (*Int64*, default: `chunk_size / 2`): The minimum size of each chunk, in bytes.
41
41
 
42
42
  :::note
43
43
 
@@ -48,8 +48,8 @@ Input data:
48
48
 
49
49
  :::
50
50
 
51
- * `chunk_overlap` (*Int64*, optional): The maximum overlap size between adjacent chunks, in bytes.
52
- * `language` (*Str*, optional): The language of the document.
51
+ * `chunk_overlap` (*Int64?*, default: *Null*): The maximum overlap size between adjacent chunks, in bytes.
52
+ * `language` (*Str*, default: `""`): The language of the document.
53
53
  Can be a language name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
54
54
 
55
55
 
@@ -61,7 +61,7 @@ Input data:
61
61
  * `custom_languages` in the spec, against the `language_name` or `aliases` field of each entry.
62
62
  * Builtin languages (see [Supported Languages](#supported-languages) section below), against the language, aliases or file extensions of each entry.
63
63
 
64
- All matches are in a case-insensitive manner. If the value of `language` is null, it'll be treated as empty string.
64
+ All matches are in a case-insensitive manner.
65
65
 
66
66
  * If no match is found, the input will be treated as plain text.
67
67
 
@@ -185,7 +185,7 @@ Not all LLM APIs support text embedding. See the [LLM API Types table](/docs/ai/
185
185
 
186
186
  Input data:
187
187
 
188
- * `text` (*Str*, required): The text to embed.
188
+ * `text` (*Str*): The text to embed.
189
189
 
190
190
  Return: *Vector[Float32, N]*, where *N* is the dimension of the embedding vector determined by the model.
191
191
 
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "Image search examples for cocoindex: CLIP and ColPali-based embedding."
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "cocoindex[colpali]>=0.1.75",
7
+ "cocoindex[colpali]>=0.1.76",
8
8
  "python-dotenv>=1.0.1",
9
9
  "fastapi>=0.100.0",
10
10
  "torch>=2.0.0",
@@ -0,0 +1,71 @@
1
+ # Build visual document index from PDFs and images with ColPali
2
+ [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
3
+
4
+
5
+ In this example, we build a visual document indexing flow using ColPali for embedding PDFs and images. and query the index with natural language.
6
+
7
+ We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
8
+
9
+ ## Steps
10
+ ### Indexing Flow
11
+
12
+ 1. We ingest a list of PDF files and image files from the `source_files` directory.
13
+ 2. For each file:
14
+ - **PDF files**: convert each page to a high-resolution image (300 DPI)
15
+ - **Image files**: use the image directly
16
+ - Generate visual embeddings for each page/image using ColPali model
17
+ 3. We will save the embeddings and metadata in Qdrant vector database.
18
+
19
+ ### Query
20
+ We will match against user-provided natural language text using ColPali's text-to-visual embedding capability, enabling semantic search across visual document content.
21
+
22
+
23
+
24
+ ## Prerequisite
25
+ [Install Qdrant](https://qdrant.tech/documentation/guides/installation/) if you don't have one running locally.
26
+
27
+ You can start Qdrant with Docker:
28
+ ```bash
29
+ docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
30
+ ```
31
+
32
+ ## Run
33
+
34
+ Install dependencies:
35
+
36
+ ```bash
37
+ pip install -e .
38
+ ```
39
+
40
+ Setup:
41
+
42
+ ```bash
43
+ cocoindex setup main.py
44
+ ```
45
+
46
+ Update index:
47
+
48
+ ```bash
49
+ cocoindex update main.py
50
+ ```
51
+
52
+ Run:
53
+
54
+ ```bash
55
+ python main.py
56
+ ```
57
+
58
+ ## About ColPali
59
+ This example uses [ColPali](https://github.com/illuin-tech/colpali), a state-of-the-art vision-language model that enables:
60
+ - Direct visual understanding of document layouts, tables, and figures
61
+ - Natural language queries against visual document content
62
+ - No need for OCR or text extraction - works directly with document images
63
+
64
+ ## CocoInsight
65
+ I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight:
66
+
67
+ ```
68
+ cocoindex server -ci main.py
69
+ ```
70
+
71
+ Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
@@ -0,0 +1,135 @@
1
+ import cocoindex
2
+ import os
3
+ import mimetypes
4
+
5
+ from dotenv import load_dotenv
6
+ from dataclasses import dataclass
7
+ from pdf2image import convert_from_bytes
8
+ from io import BytesIO
9
+
10
+ from qdrant_client import QdrantClient
11
+
12
+ QDRANT_GRPC_URL = "http://localhost:6334"
13
+ QDRANT_COLLECTION = "MultiFormatIndexings"
14
+ COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
15
+
16
+
17
+ @dataclass
18
+ class Page:
19
+ page_number: int | None
20
+ image: bytes
21
+
22
+
23
+ @cocoindex.op.function()
24
+ def file_to_pages(filename: str, content: bytes) -> list[Page]:
25
+ """
26
+ Classify file content based on MIME type detection.
27
+ Returns ClassifiedFileContent with appropriate field populated based on file type.
28
+ """
29
+ # Guess the MIME type based on the filename
30
+ mime_type, _ = mimetypes.guess_type(filename)
31
+
32
+ if mime_type == "application/pdf":
33
+ images = convert_from_bytes(content, dpi=300)
34
+ pages = []
35
+ for i, image in enumerate(images):
36
+ with BytesIO() as buffer:
37
+ image.save(buffer, format="PNG")
38
+ pages.append(Page(page_number=i + 1, image=buffer.getvalue()))
39
+ return pages
40
+ elif mime_type and mime_type.startswith("image/"):
41
+ return [Page(page_number=None, image=content)]
42
+ else:
43
+ return []
44
+
45
+
46
+ qdrant_connection = cocoindex.add_auth_entry(
47
+ "qdrant_connection",
48
+ cocoindex.targets.QdrantConnection(grpc_url=QDRANT_GRPC_URL),
49
+ )
50
+
51
+
52
+ @cocoindex.flow_def(name="MultiFormatIndexing")
53
+ def multi_format_indexing_flow(
54
+ flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
55
+ ) -> None:
56
+ """
57
+ Define an example flow that embeds files into a vector database.
58
+ """
59
+ data_scope["documents"] = flow_builder.add_source(
60
+ cocoindex.sources.LocalFile(path="source_files", binary=True)
61
+ )
62
+
63
+ output_embeddings = data_scope.add_collector()
64
+
65
+ with data_scope["documents"].row() as doc:
66
+ doc["pages"] = flow_builder.transform(
67
+ file_to_pages, filename=doc["filename"], content=doc["content"]
68
+ )
69
+ with doc["pages"].row() as page:
70
+ page["embedding"] = page["image"].transform(
71
+ cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
72
+ )
73
+ output_embeddings.collect(
74
+ id=cocoindex.GeneratedField.UUID,
75
+ filename=doc["filename"],
76
+ page=page["page_number"],
77
+ embedding=page["embedding"],
78
+ )
79
+
80
+ output_embeddings.export(
81
+ "multi_format_indexings",
82
+ cocoindex.targets.Qdrant(
83
+ connection=qdrant_connection,
84
+ collection_name=QDRANT_COLLECTION,
85
+ ),
86
+ primary_key_fields=["id"],
87
+ )
88
+
89
+
90
+ @cocoindex.transform_flow()
91
+ def query_to_colpali_embedding(
92
+ text: cocoindex.DataSlice[str],
93
+ ) -> cocoindex.DataSlice[list[list[float]]]:
94
+ return text.transform(
95
+ cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
96
+ )
97
+
98
+
99
+ def _main() -> None:
100
+ # Initialize Qdrant client
101
+ client = QdrantClient(url=QDRANT_GRPC_URL, prefer_grpc=True)
102
+
103
+ # Run queries in a loop to demonstrate the query capabilities.
104
+ while True:
105
+ query = input("Enter search query (or Enter to quit): ")
106
+ if query == "":
107
+ break
108
+
109
+ # Get the embedding for the query
110
+ query_embedding = query_to_colpali_embedding.eval(query)
111
+
112
+ search_results = client.query_points(
113
+ collection_name=QDRANT_COLLECTION,
114
+ query=query_embedding, # Multi-vector format: list[list[float]]
115
+ using="embedding", # Specify the vector field name
116
+ limit=5,
117
+ with_payload=True,
118
+ )
119
+ print("\nSearch results:")
120
+ for result in search_results.points:
121
+ score = result.score
122
+ payload = result.payload
123
+ if payload is None:
124
+ continue
125
+ page_number = payload["page"]
126
+ page_number_str = f"Page:{page_number}" if page_number is not None else ""
127
+ print(f"[{score:.3f}] {payload['filename']} {page_number_str}")
128
+ print("---")
129
+ print()
130
+
131
+
132
+ if __name__ == "__main__":
133
+ load_dotenv()
134
+ cocoindex.init()
135
+ _main()
@@ -0,0 +1,14 @@
1
+ [project]
2
+ name = "pdf-embedding"
3
+ version = "0.1.0"
4
+ description = "Simple example for cocoindex: build embedding index based on local PDF files."
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "cocoindex[colpali]>=0.1.76",
8
+ "python-dotenv>=1.0.1",
9
+ "pdf2image>=1.17.0",
10
+ "qdrant-client>=1.15.0",
11
+ ]
12
+
13
+ [tool.setuptools]
14
+ packages = []
@@ -0,0 +1,2 @@
1
+ # Postgres database address for cocoindex
2
+ COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
@@ -114,8 +114,8 @@ class _FunctionExecutorFactory:
114
114
  ) -> tuple[dict[str, Any], Executor]:
115
115
  spec = _load_spec_from_engine(self._spec_cls, spec)
116
116
  executor = self._executor_cls(spec)
117
- result_type = executor.analyze(*args, **kwargs)
118
- return (encode_enriched_type(result_type), executor)
117
+ result_type = executor.analyze_schema(*args, **kwargs)
118
+ return (result_type, executor)
119
119
 
120
120
 
121
121
  _gpu_dispatch_lock = asyncio.Lock()
@@ -156,6 +156,12 @@ def _to_async_call(call: Callable[..., Any]) -> Callable[..., Awaitable[Any]]:
156
156
  return lambda *args, **kwargs: asyncio.to_thread(lambda: call(*args, **kwargs))
157
157
 
158
158
 
159
+ @dataclasses.dataclass
160
+ class _ArgInfo:
161
+ decoder: Callable[[Any], Any]
162
+ is_required: bool
163
+
164
+
159
165
  def _register_op_factory(
160
166
  category: OpCategory,
161
167
  expected_args: list[tuple[str, inspect.Parameter]],
@@ -176,8 +182,8 @@ def _register_op_factory(
176
182
  return op_args.behavior_version
177
183
 
178
184
  class _WrappedClass(executor_cls, _Fallback): # type: ignore[misc]
179
- _args_decoders: list[Callable[[Any], Any]]
180
- _kwargs_decoders: dict[str, Callable[[Any], Any]]
185
+ _args_info: list[_ArgInfo]
186
+ _kwargs_info: dict[str, _ArgInfo]
181
187
  _acall: Callable[..., Awaitable[Any]]
182
188
 
183
189
  def __init__(self, spec: Any) -> None:
@@ -185,28 +191,45 @@ def _register_op_factory(
185
191
  self.spec = spec
186
192
  self._acall = _to_async_call(super().__call__)
187
193
 
188
- def analyze(
194
+ def analyze_schema(
189
195
  self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema
190
196
  ) -> Any:
191
197
  """
192
198
  Analyze the spec and arguments. In this phase, argument types should be validated.
193
199
  It should return the expected result type for the current op.
194
200
  """
195
- self._args_decoders = []
196
- self._kwargs_decoders = {}
201
+ self._args_info = []
202
+ self._kwargs_info = {}
197
203
  attributes = []
198
-
199
- def process_attribute(arg_name: str, arg: _engine.OpArgSchema) -> None:
204
+ potentially_missing_required_arg = False
205
+
206
+ def process_arg(
207
+ arg_name: str,
208
+ arg_param: inspect.Parameter,
209
+ actual_arg: _engine.OpArgSchema,
210
+ ) -> _ArgInfo:
211
+ nonlocal potentially_missing_required_arg
200
212
  if op_args.arg_relationship is not None:
201
213
  related_attr, related_arg_name = op_args.arg_relationship
202
214
  if related_arg_name == arg_name:
203
215
  attributes.append(
204
- TypeAttr(related_attr.value, arg.analyzed_value)
216
+ TypeAttr(related_attr.value, actual_arg.analyzed_value)
205
217
  )
218
+ type_info = analyze_type_info(arg_param.annotation)
219
+ decoder = make_engine_value_decoder(
220
+ [arg_name], actual_arg.value_type["type"], type_info
221
+ )
222
+ is_required = not type_info.nullable
223
+ if is_required and actual_arg.value_type.get("nullable", False):
224
+ potentially_missing_required_arg = True
225
+ return _ArgInfo(
226
+ decoder=decoder,
227
+ is_required=is_required,
228
+ )
206
229
 
207
230
  # Match arguments with parameters.
208
231
  next_param_idx = 0
209
- for arg in args:
232
+ for actual_arg in args:
210
233
  if next_param_idx >= len(expected_args):
211
234
  raise ValueError(
212
235
  f"Too many arguments passed in: {len(args)} > {len(expected_args)}"
@@ -219,20 +242,13 @@ def _register_op_factory(
219
242
  raise ValueError(
220
243
  f"Too many positional arguments passed in: {len(args)} > {next_param_idx}"
221
244
  )
222
- self._args_decoders.append(
223
- make_engine_value_decoder(
224
- [arg_name],
225
- arg.value_type["type"],
226
- analyze_type_info(arg_param.annotation),
227
- )
228
- )
229
- process_attribute(arg_name, arg)
245
+ self._args_info.append(process_arg(arg_name, arg_param, actual_arg))
230
246
  if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
231
247
  next_param_idx += 1
232
248
 
233
249
  expected_kwargs = expected_args[next_param_idx:]
234
250
 
235
- for kwarg_name, kwarg in kwargs.items():
251
+ for kwarg_name, actual_arg in kwargs.items():
236
252
  expected_arg = next(
237
253
  (
238
254
  arg
@@ -254,12 +270,9 @@ def _register_op_factory(
254
270
  f"Unexpected keyword argument passed in: {kwarg_name}"
255
271
  )
256
272
  arg_param = expected_arg[1]
257
- self._kwargs_decoders[kwarg_name] = make_engine_value_decoder(
258
- [kwarg_name],
259
- kwarg.value_type["type"],
260
- analyze_type_info(arg_param.annotation),
273
+ self._kwargs_info[kwarg_name] = process_arg(
274
+ kwarg_name, arg_param, actual_arg
261
275
  )
262
- process_attribute(kwarg_name, kwarg)
263
276
 
264
277
  missing_args = [
265
278
  name
@@ -280,32 +293,45 @@ def _register_op_factory(
280
293
  if len(missing_args) > 0:
281
294
  raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
282
295
 
283
- prepare_method = getattr(executor_cls, "analyze", None)
284
- if prepare_method is not None:
285
- result = prepare_method(self, *args, **kwargs)
296
+ base_analyze_method = getattr(self, "analyze", None)
297
+ if base_analyze_method is not None:
298
+ result = base_analyze_method(*args, **kwargs)
286
299
  else:
287
300
  result = expected_return
288
301
  if len(attributes) > 0:
289
302
  result = Annotated[result, *attributes]
290
- return result
303
+
304
+ encoded_type = encode_enriched_type(result)
305
+ if potentially_missing_required_arg:
306
+ encoded_type["nullable"] = True
307
+ return encoded_type
291
308
 
292
309
  async def prepare(self) -> None:
293
310
  """
294
311
  Prepare for execution.
295
312
  It's executed after `analyze` and before any `__call__` execution.
296
313
  """
297
- setup_method = getattr(super(), "prepare", None)
298
- if setup_method is not None:
299
- await _to_async_call(setup_method)()
314
+ prepare_method = getattr(super(), "prepare", None)
315
+ if prepare_method is not None:
316
+ await _to_async_call(prepare_method)()
300
317
 
301
318
  async def __call__(self, *args: Any, **kwargs: Any) -> Any:
302
- decoded_args = (
303
- decoder(arg) for decoder, arg in zip(self._args_decoders, args)
304
- )
305
- decoded_kwargs = {
306
- arg_name: self._kwargs_decoders[arg_name](arg)
307
- for arg_name, arg in kwargs.items()
308
- }
319
+ decoded_args = []
320
+ for arg_info, arg in zip(self._args_info, args):
321
+ if arg_info.is_required and arg is None:
322
+ return None
323
+ decoded_args.append(arg_info.decoder(arg))
324
+
325
+ decoded_kwargs = {}
326
+ for kwarg_name, arg in kwargs.items():
327
+ kwarg_info = self._kwargs_info.get(kwarg_name)
328
+ if kwarg_info is None:
329
+ raise ValueError(
330
+ f"Unexpected keyword argument passed in: {kwarg_name}"
331
+ )
332
+ if kwarg_info.is_required and arg is None:
333
+ return None
334
+ decoded_kwargs[kwarg_name] = kwarg_info.decoder(arg)
309
335
 
310
336
  if op_args.gpu:
311
337
  # For GPU executions, data-level parallelism is applied, so we don't want to
@@ -101,3 +101,44 @@ async def test_for_each_transform_flow_async() -> None:
101
101
  }
102
102
 
103
103
  assert result == expected, f"Expected {expected}, got {result}"
104
+
105
+
106
+ def test_none_arg_yield_none_result() -> None:
107
+ """Test that None arguments yield None results."""
108
+
109
+ @cocoindex.op.function()
110
+ def custom_fn(
111
+ required_arg: int,
112
+ optional_arg: int | None,
113
+ required_kwarg: int,
114
+ optional_kwarg: int | None,
115
+ ) -> int:
116
+ return (
117
+ required_arg + (optional_arg or 0) + required_kwarg + (optional_kwarg or 0)
118
+ )
119
+
120
+ @cocoindex.transform_flow()
121
+ def transform_flow(
122
+ required_arg: cocoindex.DataSlice[int | None],
123
+ optional_arg: cocoindex.DataSlice[int | None],
124
+ required_kwarg: cocoindex.DataSlice[int | None],
125
+ optional_kwarg: cocoindex.DataSlice[int | None],
126
+ ) -> cocoindex.DataSlice[int | None]:
127
+ return required_arg.transform(
128
+ custom_fn,
129
+ optional_arg,
130
+ required_kwarg=required_kwarg,
131
+ optional_kwarg=optional_kwarg,
132
+ )
133
+
134
+ result = transform_flow.eval(1, 2, 4, 8)
135
+ assert result == 15, f"Expected 15, got {result}"
136
+
137
+ result = transform_flow.eval(1, None, 4, None)
138
+ assert result == 5, f"Expected 5, got {result}"
139
+
140
+ result = transform_flow.eval(None, 2, 4, 8)
141
+ assert result is None, f"Expected None, got {result}"
142
+
143
+ result = transform_flow.eval(1, 2, None, None)
144
+ assert result is None, f"Expected None, got {result}"
@@ -530,6 +530,17 @@ pub async fn evaluate_source_entry(
530
530
  value::Value::KTable(BTreeMap::from([(src_eval_ctx.key.clone(), scope_value)])),
531
531
  )?;
532
532
 
533
+ // Fill other source fields with empty tables
534
+ for import_op in src_eval_ctx.plan.import_ops.iter() {
535
+ let field_idx = import_op.output.field_idx;
536
+ if field_idx != src_eval_ctx.import_op.output.field_idx {
537
+ root_scope_entry.define_field(
538
+ &AnalyzedOpOutput { field_idx },
539
+ &value::Value::KTable(BTreeMap::new()),
540
+ )?;
541
+ }
542
+ }
543
+
533
544
  evaluate_op_scope(
534
545
  &src_eval_ctx.plan.op_scope,
535
546
  RefList::Nil.prepend(&root_scope_entry),
File without changes
File without changes