cocoindex 0.1.76__tar.gz → 0.1.78__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {cocoindex-0.1.76 → cocoindex-0.1.78}/Cargo.lock +1 -1
  2. {cocoindex-0.1.76 → cocoindex-0.1.78}/Cargo.toml +1 -1
  3. {cocoindex-0.1.76 → cocoindex-0.1.78}/PKG-INFO +2 -1
  4. {cocoindex-0.1.76 → cocoindex-0.1.78}/README.md +1 -0
  5. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/data_types.mdx +20 -0
  6. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/flow_def.mdx +28 -9
  7. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/settings.mdx +22 -11
  8. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/ops/functions.md +9 -9
  9. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/pyproject.toml +1 -1
  10. cocoindex-0.1.78/examples/multi_format_indexing/README.md +71 -0
  11. cocoindex-0.1.78/examples/multi_format_indexing/main.py +135 -0
  12. cocoindex-0.1.78/examples/multi_format_indexing/pyproject.toml +14 -0
  13. cocoindex-0.1.78/examples/multi_format_indexing/source_files/cat1.jpeg +0 -0
  14. cocoindex-0.1.78/examples/multi_format_indexing/source_files/dog1.jpeg +0 -0
  15. cocoindex-0.1.78/examples/multi_format_indexing/source_files/elephant1.jpg +0 -0
  16. cocoindex-0.1.78/examples/multi_format_indexing/source_files/giraffe.jpg +0 -0
  17. cocoindex-0.1.78/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  18. cocoindex-0.1.78/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  19. cocoindex-0.1.78/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  20. cocoindex-0.1.78/examples/text_embedding_qdrant/.env +2 -0
  21. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/op.py +65 -39
  22. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/setting.py +16 -2
  23. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/test_transform_flow.py +41 -0
  24. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/evaluator.rs +11 -0
  25. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/lib_context.rs +28 -2
  26. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/settings.rs +17 -3
  27. {cocoindex-0.1.76 → cocoindex-0.1.78}/.cargo/config.toml +0 -0
  28. {cocoindex-0.1.76 → cocoindex-0.1.78}/.env.lib_debug +0 -0
  29. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  30. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  31. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/scripts/update_version.sh +0 -0
  32. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/CI.yml +0 -0
  33. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/_doc_release.yml +0 -0
  34. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/_test.yml +0 -0
  35. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/docs.yml +0 -0
  36. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/format.yml +0 -0
  37. {cocoindex-0.1.76 → cocoindex-0.1.78}/.github/workflows/release.yml +0 -0
  38. {cocoindex-0.1.76 → cocoindex-0.1.78}/.gitignore +0 -0
  39. {cocoindex-0.1.76 → cocoindex-0.1.78}/.pre-commit-config.yaml +0 -0
  40. {cocoindex-0.1.76 → cocoindex-0.1.78}/CODE_OF_CONDUCT.md +0 -0
  41. {cocoindex-0.1.76 → cocoindex-0.1.78}/CONTRIBUTING.md +0 -0
  42. {cocoindex-0.1.76 → cocoindex-0.1.78}/LICENSE +0 -0
  43. {cocoindex-0.1.76 → cocoindex-0.1.78}/dev/neo4j.yaml +0 -0
  44. {cocoindex-0.1.76 → cocoindex-0.1.78}/dev/postgres.yaml +0 -0
  45. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/.gitignore +0 -0
  46. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/README.md +0 -0
  47. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/about/community.md +0 -0
  48. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/about/contributing.md +0 -0
  49. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/ai/llm.mdx +0 -0
  50. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/basics.md +0 -0
  51. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/cli.mdx +0 -0
  52. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/data_example.svg +0 -0
  53. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/flow_example.svg +0 -0
  54. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/core/flow_methods.mdx +0 -0
  55. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/custom_ops/custom_functions.mdx +0 -0
  56. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/custom_ops/custom_targets.mdx +0 -0
  57. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/getting_started/installation.md +0 -0
  58. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/getting_started/markdown_files.zip +0 -0
  59. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/getting_started/overview.md +0 -0
  60. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/getting_started/quickstart.md +0 -0
  61. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/ops/sources.md +0 -0
  62. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/ops/targets.md +0 -0
  63. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/query.mdx +0 -0
  64. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/tutorials/live_updates.md +0 -0
  65. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
  66. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/docusaurus.config.ts +0 -0
  67. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/package.json +0 -0
  68. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/sidebars.ts +0 -0
  69. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  70. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  71. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/src/css/custom.css +0 -0
  72. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/src/theme/Root.js +0 -0
  73. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/.nojekyll +0 -0
  74. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/img/docusaurus.png +0 -0
  75. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/img/favicon.ico +0 -0
  76. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/img/icon.svg +0 -0
  77. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/img/incremental-etl.gif +0 -0
  78. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/static/robots.txt +0 -0
  79. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/tsconfig.json +0 -0
  80. {cocoindex-0.1.76 → cocoindex-0.1.78}/docs/yarn.lock +0 -0
  81. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/amazon_s3_embedding/.env.example +0 -0
  82. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/amazon_s3_embedding/.gitignore +0 -0
  83. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/amazon_s3_embedding/README.md +0 -0
  84. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/amazon_s3_embedding/main.py +0 -0
  85. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  86. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/azure_blob_embedding/.env.example +0 -0
  87. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/azure_blob_embedding/.gitignore +0 -0
  88. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/azure_blob_embedding/README.md +0 -0
  89. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/azure_blob_embedding/main.py +0 -0
  90. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/azure_blob_embedding/pyproject.toml +0 -0
  91. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/code_embedding/.env +0 -0
  92. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/code_embedding/README.md +0 -0
  93. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/code_embedding/main.py +0 -0
  94. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/code_embedding/pyproject.toml +0 -0
  95. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/.env +0 -0
  96. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/.gitignore +0 -0
  97. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/README.md +0 -0
  98. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/data/bizarre_animals.md +0 -0
  99. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/data/chunk_norris.md +0 -0
  100. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/main.py +0 -0
  101. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/custom_output_files/pyproject.toml +0 -0
  102. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/docs_to_knowledge_graph/.env +0 -0
  103. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/docs_to_knowledge_graph/README.md +0 -0
  104. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/docs_to_knowledge_graph/main.py +0 -0
  105. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  106. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/.env +0 -0
  107. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/README.md +0 -0
  108. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
  109. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
  110. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
  111. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/images/einplanck3.jpg +0 -0
  112. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/main.py +0 -0
  113. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/face_recognition/pyproject.toml +0 -0
  114. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/.dockerignore +0 -0
  115. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/.env +0 -0
  116. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/README.md +0 -0
  117. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/compose.yaml +0 -0
  118. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/dockerfile +0 -0
  119. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  120. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/main.py +0 -0
  121. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/fastapi_server_docker/requirements.txt +0 -0
  122. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/gdrive_text_embedding/.env.example +0 -0
  123. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/gdrive_text_embedding/.gitignore +0 -0
  124. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/gdrive_text_embedding/README.md +0 -0
  125. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/gdrive_text_embedding/main.py +0 -0
  126. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  127. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/.env +0 -0
  128. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/README.md +0 -0
  129. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/colpali_main.py +0 -0
  130. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/.gitignore +0 -0
  131. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/index.html +0 -0
  132. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/package-lock.json +0 -0
  133. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/package.json +0 -0
  134. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/src/App.jsx +0 -0
  135. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/src/main.jsx +0 -0
  136. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/src/style.css +0 -0
  137. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/frontend/vite.config.js +0 -0
  138. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/img/cat1.jpeg +0 -0
  139. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/img/dog1.jpeg +0 -0
  140. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/img/elephant1.jpg +0 -0
  141. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/img/giraffe.jpg +0 -0
  142. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/image_search/main.py +0 -0
  143. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/.env +0 -0
  144. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/README.md +0 -0
  145. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/data/bizarre_animals.md +0 -0
  146. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/data/chunk_norris.md +0 -0
  147. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/main.py +0 -0
  148. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/live_updates/pyproject.toml +0 -0
  149. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/.env +0 -0
  150. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/README.md +0 -0
  151. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/main.py +0 -0
  152. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  153. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  154. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  155. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  156. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  157. {cocoindex-0.1.76/examples/pdf_embedding → cocoindex-0.1.78/examples/multi_format_indexing}/.env +0 -0
  158. {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.78/examples/multi_format_indexing/source_files}/1706.03762v7.pdf +0 -0
  159. {cocoindex-0.1.76/examples/paper_metadata/papers → cocoindex-0.1.78/examples/multi_format_indexing/source_files}/1810.04805v2.pdf +0 -0
  160. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.78/examples/multi_format_indexing/source_files}/rfc8259.pdf +0 -0
  161. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/.env.example +0 -0
  162. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/.gitignore +0 -0
  163. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/README.md +0 -0
  164. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/main.py +0 -0
  165. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.78/examples/paper_metadata/papers}/1706.03762v7.pdf +0 -0
  166. {cocoindex-0.1.76/examples/pdf_embedding/pdf_files → cocoindex-0.1.78/examples/paper_metadata/papers}/1810.04805v2.pdf +0 -0
  167. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  168. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  169. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/paper_metadata/pyproject.toml +0 -0
  170. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/.env.example +0 -0
  171. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/README.md +0 -0
  172. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/data/README.md +0 -0
  173. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  174. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  175. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  176. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  177. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/main.py +0 -0
  178. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/patient_intake_extraction/pyproject.toml +0 -0
  179. {cocoindex-0.1.76/examples/product_recommendation → cocoindex-0.1.78/examples/pdf_embedding}/.env +0 -0
  180. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/pdf_embedding/README.md +0 -0
  181. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/pdf_embedding/main.py +0 -0
  182. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/pdf_embedding/pyproject.toml +0 -0
  183. {cocoindex-0.1.76/examples/text_embedding → cocoindex-0.1.78/examples/product_recommendation}/.env +0 -0
  184. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/README.md +0 -0
  185. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/img/cocoinsight.png +0 -0
  186. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/img/neo4j.png +0 -0
  187. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/main.py +0 -0
  188. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p1.json +0 -0
  189. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p2.json +0 -0
  190. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p3.json +0 -0
  191. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p4.json +0 -0
  192. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p5.json +0 -0
  193. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p6.json +0 -0
  194. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p7.json +0 -0
  195. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p8.json +0 -0
  196. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/products/p9.json +0 -0
  197. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/product_recommendation/pyproject.toml +0 -0
  198. {cocoindex-0.1.76/examples/text_embedding_qdrant → cocoindex-0.1.78/examples/text_embedding}/.env +0 -0
  199. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/README.md +0 -0
  200. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  201. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/main.py +0 -0
  202. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  203. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  204. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  205. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding/pyproject.toml +0 -0
  206. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding_qdrant/README.md +0 -0
  207. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding_qdrant/main.py +0 -0
  208. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  209. {cocoindex-0.1.76 → cocoindex-0.1.78}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  210. {cocoindex-0.1.76 → cocoindex-0.1.78}/pyproject.toml +0 -0
  211. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/__init__.py +0 -0
  212. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/auth_registry.py +0 -0
  213. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/cli.py +0 -0
  214. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/convert.py +0 -0
  215. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/flow.py +0 -0
  216. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/functions.py +0 -0
  217. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/index.py +0 -0
  218. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/lib.py +0 -0
  219. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/llm.py +0 -0
  220. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/py.typed +0 -0
  221. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/runtime.py +0 -0
  222. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/setup.py +0 -0
  223. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/sources.py +0 -0
  224. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/targets.py +0 -0
  225. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/__init__.py +0 -0
  226. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/test_convert.py +0 -0
  227. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/test_optional_database.py +0 -0
  228. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/test_typing.py +0 -0
  229. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/tests/test_validation.py +0 -0
  230. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/typing.py +0 -0
  231. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/utils.py +0 -0
  232. {cocoindex-0.1.76 → cocoindex-0.1.78}/python/cocoindex/validation.py +0 -0
  233. {cocoindex-0.1.76 → cocoindex-0.1.78}/ruff.toml +0 -0
  234. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/duration.rs +0 -0
  235. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/field_attrs.rs +0 -0
  236. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/json_schema.rs +0 -0
  237. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/mod.rs +0 -0
  238. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/schema.rs +0 -0
  239. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/spec.rs +0 -0
  240. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/base/value.rs +0 -0
  241. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/analyzed_flow.rs +0 -0
  242. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/analyzer.rs +0 -0
  243. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/exec_ctx.rs +0 -0
  244. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/flow_builder.rs +0 -0
  245. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/mod.rs +0 -0
  246. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/builder/plan.rs +0 -0
  247. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/db_tracking.rs +0 -0
  248. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/db_tracking_setup.rs +0 -0
  249. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/dumper.rs +0 -0
  250. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/indexing_status.rs +0 -0
  251. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/live_updater.rs +0 -0
  252. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/memoization.rs +0 -0
  253. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/mod.rs +0 -0
  254. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/row_indexer.rs +0 -0
  255. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/source_indexer.rs +0 -0
  256. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/execution/stats.rs +0 -0
  257. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/lib.rs +0 -0
  258. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/anthropic.rs +0 -0
  259. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/gemini.rs +0 -0
  260. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/litellm.rs +0 -0
  261. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/mod.rs +0 -0
  262. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/ollama.rs +0 -0
  263. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/openai.rs +0 -0
  264. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/openrouter.rs +0 -0
  265. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/vllm.rs +0 -0
  266. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/llm/voyage.rs +0 -0
  267. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/factory_bases.rs +0 -0
  268. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/embed_text.rs +0 -0
  269. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/extract_by_llm.rs +0 -0
  270. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/mod.rs +0 -0
  271. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/parse_json.rs +0 -0
  272. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/split_recursively.rs +0 -0
  273. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/functions/test_utils.rs +0 -0
  274. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/interface.rs +0 -0
  275. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/mod.rs +0 -0
  276. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/py_factory.rs +0 -0
  277. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/registration.rs +0 -0
  278. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/registry.rs +0 -0
  279. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sdk.rs +0 -0
  280. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sources/amazon_s3.rs +0 -0
  281. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sources/azure_blob.rs +0 -0
  282. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sources/google_drive.rs +0 -0
  283. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sources/local_file.rs +0 -0
  284. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/sources/mod.rs +0 -0
  285. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/kuzu.rs +0 -0
  286. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/mod.rs +0 -0
  287. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/neo4j.rs +0 -0
  288. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/postgres.rs +0 -0
  289. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/qdrant.rs +0 -0
  290. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/shared/mod.rs +0 -0
  291. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/shared/property_graph.rs +0 -0
  292. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/ops/targets/shared/table_columns.rs +0 -0
  293. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/prelude.rs +0 -0
  294. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/py/convert.rs +0 -0
  295. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/py/mod.rs +0 -0
  296. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/server.rs +0 -0
  297. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/service/error.rs +0 -0
  298. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/service/flows.rs +0 -0
  299. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/service/mod.rs +0 -0
  300. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/auth_registry.rs +0 -0
  301. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/components.rs +0 -0
  302. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/db_metadata.rs +0 -0
  303. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/driver.rs +0 -0
  304. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/mod.rs +0 -0
  305. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/setup/states.rs +0 -0
  306. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/concur_control.rs +0 -0
  307. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/db.rs +0 -0
  308. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/fingerprint.rs +0 -0
  309. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/immutable.rs +0 -0
  310. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/mod.rs +0 -0
  311. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/retryable.rs +0 -0
  312. {cocoindex-0.1.76 → cocoindex-0.1.78}/src/utils/yaml_ser.rs +0 -0
@@ -1297,7 +1297,7 @@ dependencies = [
1297
1297
 
1298
1298
  [[package]]
1299
1299
  name = "cocoindex"
1300
- version = "0.1.76"
1300
+ version = "0.1.78"
1301
1301
  dependencies = [
1302
1302
  "anyhow",
1303
1303
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.76"
5
+ version = "0.1.78"
6
6
  edition = "2024"
7
7
  rust-version = "1.88"
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.76
3
+ Version: 0.1.78
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -214,6 +214,7 @@ It defines an index flow like this:
214
214
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
215
215
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
216
216
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
217
+ | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
217
218
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
218
219
 
219
220
  More coming and stay tuned 👀!
@@ -185,6 +185,7 @@ It defines an index flow like this:
185
185
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
186
186
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
187
187
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
188
+ | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
188
189
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
189
190
 
190
191
  More coming and stay tuned 👀!
@@ -199,3 +199,23 @@ Currently, the following types are key types
199
199
  - *Uuid*
200
200
  - *Date*
201
201
  - *Struct* with all fields being key types (using `@dataclass(frozen=True)` or `NamedTuple`)
202
+
203
+ ## *Null* Values
204
+
205
+ CocoIndex supports *Null* values. A *Null* value represents the absence of data or an unknown value, distinct from empty strings, zero numbers, or false boolean values.
206
+
207
+ ### Nullable Type
208
+
209
+ For any data (e.g. a field of a *Struct*, an argument or return value of a CocoIndex function), if it is nullable, it means its value can be *Null*.
210
+ We use a `?` suffix to indicate a nullable type, e.g. *Str?*, *Person?*.
211
+
212
+ In Python, *Null* is represented as `None`, so a nullable type can be represented by `T | None` or `typing.Optional[T]`.
213
+
214
+ ### *Null* propagating on CocoIndex functions
215
+
216
+ A function may specify whether each input argument is nullable or not.
217
+ Non-nullable argument means the function needs a known value for the argument to work.
218
+ However, it doesn't forbid the argument to be *Null* at runtime.
219
+ When a non-nullable argument receives a *Null* value, the function execution is skipped and the result is *Null*.
220
+
221
+ For example, for [`SplitRecursively` function](/docs/ops/functions#splitrecursively), the `text` and `chunk_size` arguments are not nullable. If the input value of either of them is *Null*, the function will return *Null*.
@@ -360,21 +360,40 @@ It will use `Staging__doc_embeddings` as the collection name if the current app
360
360
 
361
361
  ### Control Processing Concurrency
362
362
 
363
- You can control the concurrency of the processing by setting the following options:
363
+ CocoIndex processes data in parallel to maximize throughput, but unconstrained parallelism can overwhelm your system.
364
+ Processing too many items simultaneously can lead to:
364
365
 
365
- * `max_inflight_rows`: the maximum number of concurrent inflight requests for the processing.
366
- * `max_inflight_bytes`: the maximum number of concurrent inflight bytes for the processing.
366
+ - **Memory exhaustion**: Large datasets loaded concurrently can consume excessive RAM
367
+ - **Resource contention**: Too many parallel operations competing for CPU, disk I/O, or network bandwidth
368
+ - **System instability**: High concurrency can cause timeouts, crashes, or degraded performance
367
369
 
368
- These options can be passed in to the following APIs:
370
+ To prevent these issues, CocoIndex provides concurrency controls that limit how many data items are processed simultaneously.
369
371
 
370
- * [`FlowBuilder.add_source()`](#import-from-source): The options above control the processing concurrency of multiple rows from a source. New rows will not be loaded in memory if it'll be over the limit.
372
+ #### Concurrency Options
371
373
 
372
- Besides, global limits on overall processing concurrency of all sources from all flows can be specified by [`GlobalExecutionOptions`](/docs/core/settings#globalexecutionoptions) or corresponding [environment variables](/docs/core/settings#list-of-environment-variables).
373
- If both global and per-source limits are specified, both need to be satisfied to admit additional source rows.
374
+ You can control processing concurrency using these options:
374
375
 
375
- * [`DataSlice.row()`](#for-each-row): The options above provides a finer-grained control, to limit the processing concurrency of multiple rows within a table at any level.
376
+ * `max_inflight_rows`: Limits the maximum number of data rows being processed concurrently
377
+ * `max_inflight_bytes`: Limits the total memory footprint of data being processed concurrently (measured in bytes)
376
378
 
377
- `max_inflight_bytes` only counts the number of bytes already existing in the current row before any further processing.
379
+ When these limits are reached, CocoIndex will pause loading new data until some of the current processing completes, ensuring your system remains stable.
380
+
381
+ #### Where to Apply Concurrency Controls
382
+
383
+ These concurrency options can be configured at different levels:
384
+
385
+ * **Source level** via [`FlowBuilder.add_source()`](#import-from-source): Controls how many rows from a data source are processed simultaneously. This prevents overwhelming your system when ingesting large datasets.
386
+
387
+ You can also set global limits across all sources and flows using [`GlobalExecutionOptions`](/docs/core/settings#globalexecutionoptions) or environment variables [`COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS`](/docs/core/settings#list-of-environment-variables)/[`COCOINDEX_SOURCE_MAX_INFLIGHT_BYTES`](/docs/core/settings#list-of-environment-variables).
388
+ When both global and per-source limits are specified, both limits are enforced independently - a new row can only be processed if there's available capacity in both the global budget (shared across all sources) and the per-source budget (specific to that source).
389
+
390
+ * **Row iteration level** via [`DataSlice.row()`](#for-each-row): Provides fine-grained control over parallel processing within nested data structures, allowing you to tune concurrency at any level of your data hierarchy.
391
+
392
+ :::note
393
+
394
+ The `max_inflight_bytes` limit only counts the size of data that already exists in memory before any transformations are applied. It doesn't include the memory used by intermediate processing results.
395
+
396
+ :::
378
397
 
379
398
  For example:
380
399
 
@@ -77,24 +77,33 @@ If not set, all flows are in a default unnamed namespace.
77
77
 
78
78
  `DatabaseConnectionSpec` configures the connection to a database. Only Postgres is supported for now. It has the following fields:
79
79
 
80
- * `url` (type: `str`, required): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`.
80
+ * `url` (type: `str`): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`.
81
81
 
82
82
  *Environment variable* for `Settings.database.url`: `COCOINDEX_DATABASE_URL`
83
83
 
84
- * `user` (type: `str`, optional): The username for the Postgres database. If not provided, username will come from `url`.
84
+ * `user` (type: `str | None`, default: `None`): The username for the Postgres database. If not provided, username will come from `url`.
85
85
 
86
86
  *Environment variable* for `Settings.database.user`: `COCOINDEX_DATABASE_USER`
87
87
 
88
- * `password` (type: `str`, optional): The password for the Postgres database. If not provided, password will come from `url`.
88
+ * `password` (type: `str | None`, default: `None`): The password for the Postgres database. If not provided, password will come from `url`.
89
89
 
90
90
  *Environment variable* for `Settings.database.password`: `COCOINDEX_DATABASE_PASSWORD`
91
91
 
92
- :::tip
92
+ :::tip
93
93
 
94
- Please be careful that all values in `url` needs to be url-encoded if they contain special characters.
95
- For this reason, prefer to use the separated `user` and `password` fields for username and password.
94
+ Please be careful that all values in `url` needs to be url-encoded if they contain special characters.
95
+ For this reason, prefer to use the separated `user` and `password` fields for username and password.
96
+
97
+ :::
98
+
99
+ * `max_connections` (type: `int`, default: `64`): The maximum number of connections to keep in the pool.
100
+
101
+ *Environment variable* for `Settings.database.max_connections`: `COCOINDEX_DATABASE_MAX_CONNECTIONS`
102
+
103
+ * `min_connections` (type: `int`, default: `16`): The minimum number of connections to keep in the pool.
104
+
105
+ *Environment variable* for `Settings.database.min_connections`: `COCOINDEX_DATABASE_MIN_CONNECTIONS`
96
106
 
97
- :::
98
107
 
99
108
  :::info
100
109
 
@@ -109,10 +118,10 @@ If you use the Postgres database hosted by [Supabase](https://supabase.com/), pl
109
118
 
110
119
  `GlobalExecutionOptions` is used to configure the global execution options shared by all flows. It has the following fields:
111
120
 
112
- * `source_max_inflight_rows` (type: `int`, optional): The maximum number of concurrent inflight requests for all source operations.
113
- * `source_max_inflight_bytes` (type: `int`, optional): The maximum number of concurrent inflight bytes for all source operations.
121
+ * `source_max_inflight_rows` (type: `int | None`, default: `256`): The maximum number of concurrent inflight rows for all source operations.
122
+ * `source_max_inflight_bytes` (type: `int | None`, default: `None`): The maximum number of concurrent inflight bytes for all source operations.
114
123
 
115
- See also [flow definition docs](/docs/core/flow_def#control-processing-concurrency) to control processing concurrency on per-source basis.
124
+ See also [flow definition docs](/docs/core/flow_def#control-processing-concurrency) about why it's necessary to control processing concurrency, and how to configure it on per-source basis.
116
125
  If both global and per-source limits are specified, both need to be satisfied to admit additional source rows.
117
126
 
118
127
  ## List of Environment Variables
@@ -125,5 +134,7 @@ This is the list of environment variables, each of which has a corresponding fie
125
134
  | `COCOINDEX_DATABASE_URL` | `database.url` | Yes |
126
135
  | `COCOINDEX_DATABASE_USER` | `database.user` | No |
127
136
  | `COCOINDEX_DATABASE_PASSWORD` | `database.password` | No |
128
- | `COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS` | `global_execution_options.source_max_inflight_rows` | No |
137
+ | `COCOINDEX_DATABASE_MAX_CONNECTIONS` | `database.max_connections` | No (default: `64`) |
138
+ | `COCOINDEX_DATABASE_MIN_CONNECTIONS` | `database.min_connections` | No (default: `16`) |
139
+ | `COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS` | `global_execution_options.source_max_inflight_rows` | No (default: `256`) |
129
140
  | `COCOINDEX_SOURCE_MAX_INFLIGHT_BYTES` | `global_execution_options.source_max_inflight_bytes` | No |
@@ -9,12 +9,12 @@ description: CocoIndex Built-in Functions
9
9
 
10
10
  `ParseJson` parses a given text to JSON.
11
11
 
12
- The spec takes the following fields:
12
+ Input data:
13
13
 
14
- * `text` (`str`): The source text to parse.
15
- * `language` (`str`, optional): The language of the source text. Only `json` is supported now. Default to `json`.
14
+ * `text` (*Str*): The source text to parse.
15
+ * `language` (*Str?*, default: `"json"`): The language of the source text. Only `json` is supported now.
16
16
 
17
- Return: *Json*
17
+ Return: *Json*, the parsed JSON object.
18
18
 
19
19
  ## SplitRecursively
20
20
 
@@ -37,7 +37,7 @@ Input data:
37
37
 
38
38
  * `text` (*Str*): The text to split.
39
39
  * `chunk_size` (*Int64*): The maximum size of each chunk, in bytes.
40
- * `min_chunk_size` (*Int64*, optional): The minimum size of each chunk, in bytes. If not provided, default to `chunk_size / 2`.
40
+ * `min_chunk_size` (*Int64*, default: `chunk_size / 2`): The minimum size of each chunk, in bytes.
41
41
 
42
42
  :::note
43
43
 
@@ -48,8 +48,8 @@ Input data:
48
48
 
49
49
  :::
50
50
 
51
- * `chunk_overlap` (*Int64*, optional): The maximum overlap size between adjacent chunks, in bytes.
52
- * `language` (*Str*, optional): The language of the document.
51
+ * `chunk_overlap` (*Int64?*, default: *Null*): The maximum overlap size between adjacent chunks, in bytes.
52
+ * `language` (*Str*, default: `""`): The language of the document.
53
53
  Can be a language name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
54
54
 
55
55
 
@@ -61,7 +61,7 @@ Input data:
61
61
  * `custom_languages` in the spec, against the `language_name` or `aliases` field of each entry.
62
62
  * Builtin languages (see [Supported Languages](#supported-languages) section below), against the language, aliases or file extensions of each entry.
63
63
 
64
- All matches are in a case-insensitive manner. If the value of `language` is null, it'll be treated as empty string.
64
+ All matches are in a case-insensitive manner.
65
65
 
66
66
  * If no match is found, the input will be treated as plain text.
67
67
 
@@ -185,7 +185,7 @@ Not all LLM APIs support text embedding. See the [LLM API Types table](/docs/ai/
185
185
 
186
186
  Input data:
187
187
 
188
- * `text` (*Str*, required): The text to embed.
188
+ * `text` (*Str*): The text to embed.
189
189
 
190
190
  Return: *Vector[Float32, N]*, where *N* is the dimension of the embedding vector determined by the model.
191
191
 
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "Image search examples for cocoindex: CLIP and ColPali-based embedding."
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "cocoindex[colpali]>=0.1.75",
7
+ "cocoindex[colpali]>=0.1.76",
8
8
  "python-dotenv>=1.0.1",
9
9
  "fastapi>=0.100.0",
10
10
  "torch>=2.0.0",
@@ -0,0 +1,71 @@
1
+ # Build visual document index from PDFs and images with ColPali
2
+ [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
3
+
4
+
5
+ In this example, we build a visual document indexing flow using ColPali for embedding PDFs and images. and query the index with natural language.
6
+
7
+ We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
8
+
9
+ ## Steps
10
+ ### Indexing Flow
11
+
12
+ 1. We ingest a list of PDF files and image files from the `source_files` directory.
13
+ 2. For each file:
14
+ - **PDF files**: convert each page to a high-resolution image (300 DPI)
15
+ - **Image files**: use the image directly
16
+ - Generate visual embeddings for each page/image using ColPali model
17
+ 3. We will save the embeddings and metadata in Qdrant vector database.
18
+
19
+ ### Query
20
+ We will match against user-provided natural language text using ColPali's text-to-visual embedding capability, enabling semantic search across visual document content.
21
+
22
+
23
+
24
+ ## Prerequisite
25
+ [Install Qdrant](https://qdrant.tech/documentation/guides/installation/) if you don't have one running locally.
26
+
27
+ You can start Qdrant with Docker:
28
+ ```bash
29
+ docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
30
+ ```
31
+
32
+ ## Run
33
+
34
+ Install dependencies:
35
+
36
+ ```bash
37
+ pip install -e .
38
+ ```
39
+
40
+ Setup:
41
+
42
+ ```bash
43
+ cocoindex setup main.py
44
+ ```
45
+
46
+ Update index:
47
+
48
+ ```bash
49
+ cocoindex update main.py
50
+ ```
51
+
52
+ Run:
53
+
54
+ ```bash
55
+ python main.py
56
+ ```
57
+
58
+ ## About ColPali
59
+ This example uses [ColPali](https://github.com/illuin-tech/colpali), a state-of-the-art vision-language model that enables:
60
+ - Direct visual understanding of document layouts, tables, and figures
61
+ - Natural language queries against visual document content
62
+ - No need for OCR or text extraction - works directly with document images
63
+
64
+ ## CocoInsight
65
+ I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight:
66
+
67
+ ```
68
+ cocoindex server -ci main.py
69
+ ```
70
+
71
+ Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
@@ -0,0 +1,135 @@
1
+ import cocoindex
2
+ import os
3
+ import mimetypes
4
+
5
+ from dotenv import load_dotenv
6
+ from dataclasses import dataclass
7
+ from pdf2image import convert_from_bytes
8
+ from io import BytesIO
9
+
10
+ from qdrant_client import QdrantClient
11
+
12
+ QDRANT_GRPC_URL = "http://localhost:6334"
13
+ QDRANT_COLLECTION = "MultiFormatIndexings"
14
+ COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
15
+
16
+
17
+ @dataclass
18
+ class Page:
19
+ page_number: int | None
20
+ image: bytes
21
+
22
+
23
+ @cocoindex.op.function()
24
+ def file_to_pages(filename: str, content: bytes) -> list[Page]:
25
+ """
26
+ Classify file content based on MIME type detection.
27
+ Returns ClassifiedFileContent with appropriate field populated based on file type.
28
+ """
29
+ # Guess the MIME type based on the filename
30
+ mime_type, _ = mimetypes.guess_type(filename)
31
+
32
+ if mime_type == "application/pdf":
33
+ images = convert_from_bytes(content, dpi=300)
34
+ pages = []
35
+ for i, image in enumerate(images):
36
+ with BytesIO() as buffer:
37
+ image.save(buffer, format="PNG")
38
+ pages.append(Page(page_number=i + 1, image=buffer.getvalue()))
39
+ return pages
40
+ elif mime_type and mime_type.startswith("image/"):
41
+ return [Page(page_number=None, image=content)]
42
+ else:
43
+ return []
44
+
45
+
46
+ qdrant_connection = cocoindex.add_auth_entry(
47
+ "qdrant_connection",
48
+ cocoindex.targets.QdrantConnection(grpc_url=QDRANT_GRPC_URL),
49
+ )
50
+
51
+
52
+ @cocoindex.flow_def(name="MultiFormatIndexing")
53
+ def multi_format_indexing_flow(
54
+ flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
55
+ ) -> None:
56
+ """
57
+ Define an example flow that embeds files into a vector database.
58
+ """
59
+ data_scope["documents"] = flow_builder.add_source(
60
+ cocoindex.sources.LocalFile(path="source_files", binary=True)
61
+ )
62
+
63
+ output_embeddings = data_scope.add_collector()
64
+
65
+ with data_scope["documents"].row() as doc:
66
+ doc["pages"] = flow_builder.transform(
67
+ file_to_pages, filename=doc["filename"], content=doc["content"]
68
+ )
69
+ with doc["pages"].row() as page:
70
+ page["embedding"] = page["image"].transform(
71
+ cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
72
+ )
73
+ output_embeddings.collect(
74
+ id=cocoindex.GeneratedField.UUID,
75
+ filename=doc["filename"],
76
+ page=page["page_number"],
77
+ embedding=page["embedding"],
78
+ )
79
+
80
+ output_embeddings.export(
81
+ "multi_format_indexings",
82
+ cocoindex.targets.Qdrant(
83
+ connection=qdrant_connection,
84
+ collection_name=QDRANT_COLLECTION,
85
+ ),
86
+ primary_key_fields=["id"],
87
+ )
88
+
89
+
90
+ @cocoindex.transform_flow()
91
+ def query_to_colpali_embedding(
92
+ text: cocoindex.DataSlice[str],
93
+ ) -> cocoindex.DataSlice[list[list[float]]]:
94
+ return text.transform(
95
+ cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
96
+ )
97
+
98
+
99
+ def _main() -> None:
100
+ # Initialize Qdrant client
101
+ client = QdrantClient(url=QDRANT_GRPC_URL, prefer_grpc=True)
102
+
103
+ # Run queries in a loop to demonstrate the query capabilities.
104
+ while True:
105
+ query = input("Enter search query (or Enter to quit): ")
106
+ if query == "":
107
+ break
108
+
109
+ # Get the embedding for the query
110
+ query_embedding = query_to_colpali_embedding.eval(query)
111
+
112
+ search_results = client.query_points(
113
+ collection_name=QDRANT_COLLECTION,
114
+ query=query_embedding, # Multi-vector format: list[list[float]]
115
+ using="embedding", # Specify the vector field name
116
+ limit=5,
117
+ with_payload=True,
118
+ )
119
+ print("\nSearch results:")
120
+ for result in search_results.points:
121
+ score = result.score
122
+ payload = result.payload
123
+ if payload is None:
124
+ continue
125
+ page_number = payload["page"]
126
+ page_number_str = f"Page:{page_number}" if page_number is not None else ""
127
+ print(f"[{score:.3f}] {payload['filename']} {page_number_str}")
128
+ print("---")
129
+ print()
130
+
131
+
132
+ if __name__ == "__main__":
133
+ load_dotenv()
134
+ cocoindex.init()
135
+ _main()
@@ -0,0 +1,14 @@
1
+ [project]
2
+ name = "pdf-embedding"
3
+ version = "0.1.0"
4
+ description = "Simple example for cocoindex: build embedding index based on local PDF files."
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "cocoindex[colpali]>=0.1.76",
8
+ "python-dotenv>=1.0.1",
9
+ "pdf2image>=1.17.0",
10
+ "qdrant-client>=1.15.0",
11
+ ]
12
+
13
+ [tool.setuptools]
14
+ packages = []
@@ -0,0 +1,2 @@
1
+ # Postgres database address for cocoindex
2
+ COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex