cocoindex 0.1.74__tar.gz → 0.1.76__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/_doc_release.yml +0 -1
  2. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/_test.yml +1 -1
  3. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/docs.yml +0 -1
  4. {cocoindex-0.1.74 → cocoindex-0.1.76}/Cargo.lock +1 -1
  5. {cocoindex-0.1.74 → cocoindex-0.1.76}/Cargo.toml +1 -1
  6. {cocoindex-0.1.74 → cocoindex-0.1.76}/PKG-INFO +7 -2
  7. {cocoindex-0.1.74 → cocoindex-0.1.76}/README.md +1 -0
  8. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/flow_def.mdx +4 -4
  9. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/flow_methods.mdx +5 -5
  10. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/custom_ops/custom_targets.mdx +12 -7
  11. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/ops/functions.md +50 -0
  12. cocoindex-0.1.76/docs/docs/tutorials/manage_flow_dynamically.mdx +302 -0
  13. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/sidebars.ts +1 -0
  14. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/yarn.lock +190 -190
  15. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/amazon_s3_embedding/pyproject.toml +5 -1
  16. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/azure_blob_embedding/pyproject.toml +5 -1
  17. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/code_embedding/pyproject.toml +6 -1
  18. cocoindex-0.1.76/examples/custom_output_files/.gitignore +1 -0
  19. cocoindex-0.1.76/examples/custom_output_files/README.md +53 -0
  20. cocoindex-0.1.76/examples/custom_output_files/main.py +123 -0
  21. cocoindex-0.1.76/examples/custom_output_files/pyproject.toml +9 -0
  22. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/docs_to_knowledge_graph/pyproject.toml +1 -1
  23. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/pyproject.toml +1 -1
  24. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/requirements.txt +1 -1
  25. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/gdrive_text_embedding/pyproject.toml +5 -1
  26. cocoindex-0.1.76/examples/image_search/.env +1 -0
  27. cocoindex-0.1.76/examples/image_search/README.md +105 -0
  28. cocoindex-0.1.76/examples/image_search/colpali_main.py +161 -0
  29. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/src/App.jsx +2 -2
  30. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/vite.config.js +1 -0
  31. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/pyproject.toml +3 -3
  32. cocoindex-0.1.76/examples/live_updates/data/bizarre_animals.md +21 -0
  33. cocoindex-0.1.76/examples/live_updates/data/chunk_norris.md +19 -0
  34. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/live_updates/pyproject.toml +1 -4
  35. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/pyproject.toml +1 -1
  36. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/pyproject.toml +1 -1
  37. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/pyproject.toml +1 -1
  38. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/main.py +0 -1
  39. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/pyproject.toml +3 -1
  40. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/pyproject.toml +1 -1
  41. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/pyproject.toml +1 -1
  42. cocoindex-0.1.76/examples/text_embedding_qdrant/.env +2 -0
  43. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding_qdrant/pyproject.toml +1 -1
  44. {cocoindex-0.1.74 → cocoindex-0.1.76}/pyproject.toml +3 -2
  45. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/__init__.py +5 -3
  46. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/convert.py +56 -87
  47. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/flow.py +27 -11
  48. cocoindex-0.1.76/python/cocoindex/functions.py +298 -0
  49. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/op.py +3 -2
  50. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/tests/test_convert.py +111 -24
  51. cocoindex-0.1.76/python/cocoindex/tests/test_transform_flow.py +103 -0
  52. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/typing.py +4 -4
  53. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/schema.rs +5 -0
  54. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/spec.rs +0 -10
  55. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/analyzer.rs +11 -19
  56. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/flow_builder.rs +61 -54
  57. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/factory_bases.rs +55 -8
  58. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/embed_text.rs +6 -5
  59. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/extract_by_llm.rs +58 -19
  60. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/parse_json.rs +5 -4
  61. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/split_recursively.rs +104 -47
  62. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/test_utils.rs +15 -27
  63. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sdk.rs +1 -1
  64. cocoindex-0.1.74/examples/image_search/.env +0 -1
  65. cocoindex-0.1.74/examples/image_search/README.md +0 -52
  66. cocoindex-0.1.74/python/cocoindex/functions.py +0 -101
  67. {cocoindex-0.1.74 → cocoindex-0.1.76}/.cargo/config.toml +0 -0
  68. {cocoindex-0.1.74 → cocoindex-0.1.76}/.env.lib_debug +0 -0
  69. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  70. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  71. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/scripts/update_version.sh +0 -0
  72. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/CI.yml +0 -0
  73. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/format.yml +0 -0
  74. {cocoindex-0.1.74 → cocoindex-0.1.76}/.github/workflows/release.yml +0 -0
  75. {cocoindex-0.1.74 → cocoindex-0.1.76}/.gitignore +0 -0
  76. {cocoindex-0.1.74 → cocoindex-0.1.76}/.pre-commit-config.yaml +0 -0
  77. {cocoindex-0.1.74 → cocoindex-0.1.76}/CODE_OF_CONDUCT.md +0 -0
  78. {cocoindex-0.1.74 → cocoindex-0.1.76}/CONTRIBUTING.md +0 -0
  79. {cocoindex-0.1.74 → cocoindex-0.1.76}/LICENSE +0 -0
  80. {cocoindex-0.1.74 → cocoindex-0.1.76}/dev/neo4j.yaml +0 -0
  81. {cocoindex-0.1.74 → cocoindex-0.1.76}/dev/postgres.yaml +0 -0
  82. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/.gitignore +0 -0
  83. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/README.md +0 -0
  84. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/about/community.md +0 -0
  85. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/about/contributing.md +0 -0
  86. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/ai/llm.mdx +0 -0
  87. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/basics.md +0 -0
  88. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/cli.mdx +0 -0
  89. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/data_example.svg +0 -0
  90. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/data_types.mdx +0 -0
  91. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/flow_example.svg +0 -0
  92. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/core/settings.mdx +0 -0
  93. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/custom_ops/custom_functions.mdx +0 -0
  94. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/getting_started/installation.md +0 -0
  95. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/getting_started/markdown_files.zip +0 -0
  96. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/getting_started/overview.md +0 -0
  97. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/getting_started/quickstart.md +0 -0
  98. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/ops/sources.md +0 -0
  99. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/ops/targets.md +0 -0
  100. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/query.mdx +0 -0
  101. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docs/tutorials/live_updates.md +0 -0
  102. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/docusaurus.config.ts +0 -0
  103. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/package.json +0 -0
  104. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  105. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  106. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/src/css/custom.css +0 -0
  107. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/src/theme/Root.js +0 -0
  108. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/.nojekyll +0 -0
  109. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/img/docusaurus.png +0 -0
  110. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/img/favicon.ico +0 -0
  111. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/img/icon.svg +0 -0
  112. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/img/incremental-etl.gif +0 -0
  113. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/static/robots.txt +0 -0
  114. {cocoindex-0.1.74 → cocoindex-0.1.76}/docs/tsconfig.json +0 -0
  115. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/amazon_s3_embedding/.env.example +0 -0
  116. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/amazon_s3_embedding/.gitignore +0 -0
  117. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/amazon_s3_embedding/README.md +0 -0
  118. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/amazon_s3_embedding/main.py +0 -0
  119. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/azure_blob_embedding/.env.example +0 -0
  120. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/azure_blob_embedding/.gitignore +0 -0
  121. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/azure_blob_embedding/README.md +0 -0
  122. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/azure_blob_embedding/main.py +0 -0
  123. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/code_embedding/.env +0 -0
  124. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/code_embedding/README.md +0 -0
  125. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/code_embedding/main.py +0 -0
  126. {cocoindex-0.1.74/examples/docs_to_knowledge_graph → cocoindex-0.1.76/examples/custom_output_files}/.env +0 -0
  127. {cocoindex-0.1.74/examples/live_updates → cocoindex-0.1.76/examples/custom_output_files}/data/bizarre_animals.md +0 -0
  128. {cocoindex-0.1.74/examples/live_updates → cocoindex-0.1.76/examples/custom_output_files}/data/chunk_norris.md +0 -0
  129. {cocoindex-0.1.74/examples/face_recognition → cocoindex-0.1.76/examples/docs_to_knowledge_graph}/.env +0 -0
  130. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/docs_to_knowledge_graph/README.md +0 -0
  131. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/docs_to_knowledge_graph/main.py +0 -0
  132. {cocoindex-0.1.74/examples/manuals_llm_extraction → cocoindex-0.1.76/examples/face_recognition}/.env +0 -0
  133. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/README.md +0 -0
  134. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
  135. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
  136. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
  137. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/images/einplanck3.jpg +0 -0
  138. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/face_recognition/main.py +0 -0
  139. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/.dockerignore +0 -0
  140. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/.env +0 -0
  141. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/README.md +0 -0
  142. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/compose.yaml +0 -0
  143. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/dockerfile +0 -0
  144. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  145. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/fastapi_server_docker/main.py +0 -0
  146. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/gdrive_text_embedding/.env.example +0 -0
  147. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/gdrive_text_embedding/.gitignore +0 -0
  148. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/gdrive_text_embedding/README.md +0 -0
  149. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/gdrive_text_embedding/main.py +0 -0
  150. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/.gitignore +0 -0
  151. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/index.html +0 -0
  152. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/package-lock.json +0 -0
  153. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/package.json +0 -0
  154. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/src/main.jsx +0 -0
  155. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/frontend/src/style.css +0 -0
  156. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/img/cat1.jpeg +0 -0
  157. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/img/dog1.jpeg +0 -0
  158. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/img/elephant1.jpg +0 -0
  159. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/img/giraffe.jpg +0 -0
  160. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/image_search/main.py +0 -0
  161. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/live_updates/.env +0 -0
  162. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/live_updates/README.md +0 -0
  163. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/live_updates/main.py +0 -0
  164. {cocoindex-0.1.74/examples/pdf_embedding → cocoindex-0.1.76/examples/manuals_llm_extraction}/.env +0 -0
  165. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/README.md +0 -0
  166. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/main.py +0 -0
  167. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  168. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  169. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  170. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  171. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/.env.example +0 -0
  172. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/.gitignore +0 -0
  173. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/README.md +0 -0
  174. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/main.py +0 -0
  175. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
  176. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
  177. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  178. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  179. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/.env.example +0 -0
  180. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/README.md +0 -0
  181. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/data/README.md +0 -0
  182. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  183. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  184. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  185. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  186. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/patient_intake_extraction/main.py +0 -0
  187. {cocoindex-0.1.74/examples/product_recommendation → cocoindex-0.1.76/examples/pdf_embedding}/.env +0 -0
  188. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/README.md +0 -0
  189. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  190. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  191. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  192. {cocoindex-0.1.74/examples/text_embedding → cocoindex-0.1.76/examples/product_recommendation}/.env +0 -0
  193. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/README.md +0 -0
  194. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/img/cocoinsight.png +0 -0
  195. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/img/neo4j.png +0 -0
  196. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/main.py +0 -0
  197. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p1.json +0 -0
  198. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p2.json +0 -0
  199. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p3.json +0 -0
  200. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p4.json +0 -0
  201. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p5.json +0 -0
  202. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p6.json +0 -0
  203. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p7.json +0 -0
  204. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p8.json +0 -0
  205. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/product_recommendation/products/p9.json +0 -0
  206. {cocoindex-0.1.74/examples/text_embedding_qdrant → cocoindex-0.1.76/examples/text_embedding}/.env +0 -0
  207. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/README.md +0 -0
  208. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  209. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/main.py +0 -0
  210. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  211. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  212. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  213. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding_qdrant/README.md +0 -0
  214. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding_qdrant/main.py +0 -0
  215. {cocoindex-0.1.74 → cocoindex-0.1.76}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  216. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/auth_registry.py +0 -0
  217. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/cli.py +0 -0
  218. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/index.py +0 -0
  219. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/lib.py +0 -0
  220. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/llm.py +0 -0
  221. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/py.typed +0 -0
  222. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/runtime.py +0 -0
  223. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/setting.py +0 -0
  224. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/setup.py +0 -0
  225. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/sources.py +0 -0
  226. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/targets.py +0 -0
  227. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/tests/__init__.py +0 -0
  228. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/tests/test_optional_database.py +0 -0
  229. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/tests/test_typing.py +0 -0
  230. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/tests/test_validation.py +0 -0
  231. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/utils.py +0 -0
  232. {cocoindex-0.1.74 → cocoindex-0.1.76}/python/cocoindex/validation.py +0 -0
  233. {cocoindex-0.1.74 → cocoindex-0.1.76}/ruff.toml +0 -0
  234. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/duration.rs +0 -0
  235. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/field_attrs.rs +0 -0
  236. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/json_schema.rs +0 -0
  237. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/mod.rs +0 -0
  238. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/base/value.rs +0 -0
  239. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/analyzed_flow.rs +0 -0
  240. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/exec_ctx.rs +0 -0
  241. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/mod.rs +0 -0
  242. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/builder/plan.rs +0 -0
  243. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/db_tracking.rs +0 -0
  244. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/db_tracking_setup.rs +0 -0
  245. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/dumper.rs +0 -0
  246. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/evaluator.rs +0 -0
  247. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/indexing_status.rs +0 -0
  248. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/live_updater.rs +0 -0
  249. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/memoization.rs +0 -0
  250. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/mod.rs +0 -0
  251. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/row_indexer.rs +0 -0
  252. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/source_indexer.rs +0 -0
  253. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/execution/stats.rs +0 -0
  254. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/lib.rs +0 -0
  255. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/lib_context.rs +0 -0
  256. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/anthropic.rs +0 -0
  257. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/gemini.rs +0 -0
  258. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/litellm.rs +0 -0
  259. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/mod.rs +0 -0
  260. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/ollama.rs +0 -0
  261. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/openai.rs +0 -0
  262. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/openrouter.rs +0 -0
  263. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/vllm.rs +0 -0
  264. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/llm/voyage.rs +0 -0
  265. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/functions/mod.rs +0 -0
  266. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/interface.rs +0 -0
  267. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/mod.rs +0 -0
  268. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/py_factory.rs +0 -0
  269. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/registration.rs +0 -0
  270. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/registry.rs +0 -0
  271. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sources/amazon_s3.rs +0 -0
  272. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sources/azure_blob.rs +0 -0
  273. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sources/google_drive.rs +0 -0
  274. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sources/local_file.rs +0 -0
  275. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/sources/mod.rs +0 -0
  276. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/kuzu.rs +0 -0
  277. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/mod.rs +0 -0
  278. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/neo4j.rs +0 -0
  279. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/postgres.rs +0 -0
  280. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/qdrant.rs +0 -0
  281. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/shared/mod.rs +0 -0
  282. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/shared/property_graph.rs +0 -0
  283. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/ops/targets/shared/table_columns.rs +0 -0
  284. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/prelude.rs +0 -0
  285. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/py/convert.rs +0 -0
  286. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/py/mod.rs +0 -0
  287. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/server.rs +0 -0
  288. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/service/error.rs +0 -0
  289. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/service/flows.rs +0 -0
  290. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/service/mod.rs +0 -0
  291. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/settings.rs +0 -0
  292. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/auth_registry.rs +0 -0
  293. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/components.rs +0 -0
  294. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/db_metadata.rs +0 -0
  295. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/driver.rs +0 -0
  296. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/mod.rs +0 -0
  297. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/setup/states.rs +0 -0
  298. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/concur_control.rs +0 -0
  299. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/db.rs +0 -0
  300. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/fingerprint.rs +0 -0
  301. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/immutable.rs +0 -0
  302. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/mod.rs +0 -0
  303. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/retryable.rs +0 -0
  304. {cocoindex-0.1.74 → cocoindex-0.1.76}/src/utils/yaml_ser.rs +0 -0
@@ -11,7 +11,6 @@ jobs:
11
11
  - uses: actions/checkout@v4
12
12
  - uses: actions/setup-node@v4
13
13
  with:
14
- node-version: 18
15
14
  cache: yarn
16
15
  cache-dependency-path: docs/yarn.lock
17
16
  - uses: webfactory/ssh-agent@v0.5.0
@@ -43,7 +43,7 @@ jobs:
43
43
  - name: Install Python toolchains
44
44
  run: |
45
45
  source .venv/bin/activate
46
- pip install maturin pytest mypy
46
+ pip install maturin mypy pytest pytest-asyncio
47
47
  - name: Python build
48
48
  run: |
49
49
  source .venv/bin/activate
@@ -19,7 +19,6 @@ jobs:
19
19
  - uses: actions/checkout@v4
20
20
  - uses: actions/setup-node@v4
21
21
  with:
22
- node-version: 18
23
22
  cache: yarn
24
23
  cache-dependency-path: docs/yarn.lock
25
24
  - name: Install dependencies
@@ -1297,7 +1297,7 @@ dependencies = [
1297
1297
 
1298
1298
  [[package]]
1299
1299
  name = "cocoindex"
1300
- version = "0.1.74"
1300
+ version = "0.1.76"
1301
1301
  dependencies = [
1302
1302
  "anyhow",
1303
1303
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.74"
5
+ version = "0.1.76"
6
6
  edition = "2024"
7
7
  rust-version = "1.88"
8
8
 
@@ -1,24 +1,28 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.74
3
+ Version: 0.1.76
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
7
7
  Requires-Dist: watchfiles>=1.1.0
8
8
  Requires-Dist: numpy>=1.23.2
9
9
  Requires-Dist: pytest ; extra == 'dev'
10
+ Requires-Dist: pytest-asyncio ; extra == 'dev'
10
11
  Requires-Dist: ruff ; extra == 'dev'
11
12
  Requires-Dist: mypy ; extra == 'dev'
12
13
  Requires-Dist: pre-commit ; extra == 'dev'
13
14
  Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'embeddings'
15
+ Requires-Dist: colpali-engine ; extra == 'colpali'
14
16
  Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'all'
17
+ Requires-Dist: colpali-engine ; extra == 'all'
15
18
  Provides-Extra: dev
16
19
  Provides-Extra: embeddings
20
+ Provides-Extra: colpali
17
21
  Provides-Extra: all
18
22
  License-File: LICENSE
19
23
  Summary: With CocoIndex, users declare the transformation, CocoIndex creates & maintains an index, and keeps the derived index up to date based on source update, with minimal computation and changes.
20
24
  Author-email: CocoIndex <cocoindex.io@gmail.com>
21
- License: Apache-2.0
25
+ License-Expression: Apache-2.0
22
26
  Requires-Python: >=3.11
23
27
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
24
28
  Project-URL: Homepage, https://cocoindex.io/
@@ -210,6 +214,7 @@ It defines an index flow like this:
210
214
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
211
215
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
212
216
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
217
+ | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
213
218
 
214
219
  More coming and stay tuned 👀!
215
220
 
@@ -185,6 +185,7 @@ It defines an index flow like this:
185
185
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
186
186
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
187
187
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
188
+ | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
188
189
 
189
190
  More coming and stay tuned 👀!
190
191
 
@@ -33,23 +33,23 @@ It takes two arguments:
33
33
  * `flow_builder`: a `FlowBuilder` object to help build the flow.
34
34
  * `data_scope`: a `DataScope` object, representing the top-level data scope. Any data created by the flow should be added to it.
35
35
 
36
- Alternatively, for more flexibility (e.g. you want to do this conditionally or generate dynamic name), you can explicitly call the `cocoindex.add_flow_def()` method:
36
+ Alternatively, for more flexibility (e.g. you want to do this conditionally or generate dynamic name), you can explicitly call the `cocoindex.open_flow()` method:
37
37
 
38
38
  ```python
39
39
  def demo_flow_def(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
40
40
  ...
41
41
 
42
42
  # Add the flow definition to the flow registry.
43
- demo_flow = cocoindex.add_flow_def("DemoFlow", demo_flow_def)
43
+ demo_flow = cocoindex.open_flow("DemoFlow", demo_flow_def)
44
44
  ```
45
45
 
46
46
  In both cases, `demo_flow` will be an object with `cocoindex.Flow` class type.
47
47
  See [Flow Running](/docs/core/flow_methods) for more details on it.
48
48
 
49
- Sometimes you no longer want to keep states of the flow in memory. We provide a `cocoindex.remove_flow()` method for this purpose:
49
+ Sometimes you no longer want to keep states of the flow in memory. We provide a `close()` method for this purpose:
50
50
 
51
51
  ```python
52
- cocoindex.remove_flow(demo_flow)
52
+ demo_flow.close()
53
53
  ```
54
54
 
55
55
  After it's called, `demo_flow` becomes an invalid object, and you should not call any methods of it.
@@ -1,13 +1,13 @@
1
1
  ---
2
- title: Run a Flow
2
+ title: Operate a Flow
3
3
  toc_max_heading_level: 4
4
- description: Run a CocoIndex Flow, including build / update data in the target and evaluate the flow without changing the target.
4
+ description: Operate a CocoIndex Flow, including build / update data in the target and evaluate the flow without changing the target.
5
5
  ---
6
6
 
7
7
  import Tabs from '@theme/Tabs';
8
8
  import TabItem from '@theme/TabItem';
9
9
 
10
- # Run a CocoIndex Flow
10
+ # Operate a CocoIndex Flow
11
11
 
12
12
  After a flow is defined as discussed in [Flow Definition](/docs/core/flow_def), you can start to transform data with it.
13
13
 
@@ -39,7 +39,7 @@ It creates a `demo_flow` object in `cocoindex.Flow` type.
39
39
  For a flow, its persistent backends need to be ready before it can run, including:
40
40
 
41
41
  * [Internal storage](/docs/core/basics#internal-storage) for CocoIndex.
42
- * Backend entities for targets exported by the flow, e.g. a table (in relational databases), a collection (in some vector databases), etc.
42
+ * Backend resources for targets exported by the flow, e.g. a table (in relational databases), a collection (in some vector databases), etc.
43
43
 
44
44
  The desired state of the backends for a flow is derived based on the flow definition itself.
45
45
  CocoIndex supports two types of actions to manage the persistent backends automatically:
@@ -104,7 +104,7 @@ cocoindex.drop_all_flows(report_to_stdout=True)
104
104
 
105
105
  After dropping the flow, the in-memory `cocoindex.Flow` instance is still valid, and you can call setup methods on it again.
106
106
 
107
- If you want to remove the flow from the current process, you can call `cocoindex.remove_flow(demo_flow)` to do so (see [related doc](/docs/core/flow_def#entry-point)).
107
+ If you want to remove the flow from the current process, you can call `demo_flow.close()` to do so (see [related doc](/docs/core/flow_def#entry-point)).
108
108
 
109
109
  :::
110
110
 
@@ -114,13 +114,6 @@ This method should be implemented to:
114
114
  - Update configuration when a target spec changes
115
115
  - Clean up resources when a target is removed (`current` is `None`)
116
116
 
117
- :::note Best practice: Keep all actions idempotent
118
-
119
- Ideally this method should be idempotent, i.e. when calling this with the same arguments multiple times, the effect should remain the same.
120
- For example, if the target is a directory, it should be a no-op if we try to create it (`previous` is `None`) when the directory already exists, and also a no-op if we try to delete it (`current` is `None`) when the directory does not exist.
121
-
122
- :::
123
-
124
117
  #### `describe(key) -> str` (Optional)
125
118
 
126
119
  Returns a human-readable description of the target for logging and debugging purposes.
@@ -165,6 +158,18 @@ def prepare(spec: CustomTarget) -> PreparedCustomTarget:
165
158
 
166
159
  If not provided, the original spec will be passed directly to `mutate`.
167
160
 
161
+ ## Best Practices
162
+
163
+ ### Idempotency of Methods with Side Effects
164
+
165
+ `apply_setup_change()` and `mutate()` are the two methods that are expected to produce side effects.
166
+ We expect them to be idempotent, i.e. when calling them with the same arguments multiple times, the effect should remain the same.
167
+
168
+ For example,
169
+ - For `apply_setup_change()`, if the target is a directory, it should be a no-op if we try to create it (`previous` is `None`) when the directory already exists, and also a no-op if we try to delete it (`current` is `None`) when the directory does not exist.
170
+ - For `mutate()`, if a mutation is a deletion, it should be a no-op if the row does not exist.
171
+
172
+ This is to make sure when the system if left in an intermediate state, e.g. interrupted in the middle between a change is made and CocoIndex notes down the change is completed, the targets can still be gracefully rolled forward to the desired states after the system is resumed.
168
173
 
169
174
  ## Examples
170
175
 
@@ -188,3 +188,53 @@ Input data:
188
188
  * `text` (*Str*, required): The text to embed.
189
189
 
190
190
  Return: *Vector[Float32, N]*, where *N* is the dimension of the embedding vector determined by the model.
191
+
192
+ ## ColPaliEmbedImage
193
+
194
+ `ColPaliEmbedImage` embeds images using the ColPali multimodal model.
195
+
196
+ ColPali (Contextual Late-interaction over Patches) uses late interaction between image patch embeddings and text token embeddings for retrieval.
197
+
198
+ :::note Optional Dependency Required
199
+
200
+ This function requires the `colpali-engine` library, which is an optional dependency. Install CocoIndex with:
201
+
202
+ ```bash
203
+ pip install 'cocoindex[colpali]'
204
+ ```
205
+ :::
206
+
207
+ The spec takes the following fields:
208
+
209
+ * `model` (`str`): The ColPali model name to use (e.g., "vidore/colpali-v1.2")
210
+
211
+ Input data:
212
+
213
+ * `img_bytes` (*Bytes*): The image data in bytes format.
214
+
215
+ Return: *Vector[Vector[Float32, N]]*, where *N* is the hidden dimension determined by the model. This returns a multi-vector format with variable patches and fixed hidden dimension.
216
+
217
+ ## ColPaliEmbedQuery
218
+
219
+ `ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
220
+
221
+ This produces query embeddings compatible with ColPali image embeddings for late interaction scoring (MaxSim).
222
+
223
+ :::note Optional Dependency Required
224
+
225
+ This function requires the `colpali-engine` library, which is an optional dependency. Install CocoIndex with:
226
+
227
+ ```bash
228
+ pip install 'cocoindex[colpali]'
229
+ ```
230
+ :::
231
+
232
+ The spec takes the following fields:
233
+
234
+ * `model` (`str`): The ColPali model name to use (e.g., "vidore/colpali-v1.2")
235
+
236
+ Input data:
237
+
238
+ * `query` (*Str*): The text query to embed.
239
+
240
+ Return: *Vector[Vector[Float32, N]]*, where *N* is the hidden dimension determined by the model. This returns a multi-vector format with variable tokens and fixed hidden dimension.
@@ -0,0 +1,302 @@
1
+ ---
2
+ title: Manage Flows Dynamically
3
+ description: "Learn how to dynamically manage multiple flow instances in CocoIndex. Create parameterized data indexing pipelines, handle persistent resources, perform updates, and implement memory-efficient caching for scalable applications."
4
+ ---
5
+
6
+ # Manage Flows Dynamically
7
+
8
+ You write a function, a.k.a. *flow definition*, to define indexing logic.
9
+ Sometimes you want to reuse the same flow definition for multiple *flow instances* (a.k.a. *flow*), e.g. each takes input from different sources, exports to different targets, and even with slightly different parameters for transformation logic.
10
+
11
+ ## States of a flow instance
12
+
13
+ A flow instance has states from two aspects:
14
+
15
+ * *In-process object*, of type `cocoindex.Flow`.
16
+ * *Persistent resource*, including states in the [internal storage](/docs/core/basics#internal-storage) and backend resources that are owned by the flow instance.
17
+
18
+ A flow instance is ultimately a persistent resource. Its in-process object is a handle to operate on it. Consider file handles and database connections.
19
+ CocoIndex provides APIs to *open* and *close* flow instances, and *setup* and *drop* the persistent resource.
20
+
21
+ ## Parameterize the flow definition
22
+
23
+ In the example from the [Quickstart Guide](/docs/getting_started/quickstart), we decorate the flow definition function with a `@cocoindex.flow_def(name="DemoFlow")` decorator:
24
+
25
+ ```python title="Example in Quickstart Guide"
26
+ @cocoindex.flow_def(name="TextEmbedding")
27
+ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
28
+ ...
29
+ ```
30
+
31
+ This immediately creates the in-process object of the flow instance, using the given function as the flow definition.
32
+ This is a shortcut of:
33
+
34
+ ```python
35
+ def _text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
36
+ ...
37
+
38
+ text_embedding_flow = cocoindex.open_flow("TextEmbedding", _text_embedding_flow)
39
+ ```
40
+
41
+ Here, `cocoindex.open_flow()` is the function that creates the in-process object of the flow instance, with the given name and flow definition function.
42
+ You can directly call it dynamically with flow name created programmatically.
43
+
44
+ Oftentimes, you also want to parameterize the flow definition function.
45
+ For example, we may have a dataclass like this to hold the parameters of the flow:
46
+
47
+ ```python
48
+ @dataclass
49
+ class TextEmbeddingFlowParameters:
50
+ source_path: str
51
+ target_table_name: str
52
+ ```
53
+
54
+ And consider we have a registry of parameters for all flow instances somewhere.
55
+ For simplicity, we use a hardcoded `dict` here, and provide a simple function to get the parameters for a given flow name.
56
+ In reality, the source of truth may come from a configuration file, a database, etc., and the function can be replaced by your own implementation.
57
+
58
+ ```python
59
+ FLOW_PARAMETERS: dict[str, TextEmbeddingFlowParameters] = {
60
+ "foo": TextEmbeddingFlowParameters(source_path="/path/to/foo", target_table_name="foo_embeddings"),
61
+ "bar": TextEmbeddingFlowParameters(source_path="/path/to/bar", target_table_name="bar_embeddings"),
62
+ }
63
+
64
+ def get_flow_parameters(name: str) -> TextEmbeddingFlowParameters:
65
+ return FLOW_PARAMETERS[name]
66
+ ```
67
+
68
+ Then you can have a function that returns the flow definition function for the given parameters:
69
+
70
+ ```python
71
+ def text_embedding_flow_def(params: TextEmbeddingFlowParameters):
72
+ def _flow_def(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
73
+ # Add a data source to read files from the specified directory
74
+ data_scope["documents"] = flow_builder.add_source(
75
+ cocoindex.sources.LocalFile(path=params.source_path))
76
+
77
+ doc_embeddings = data_scope.add_collector()
78
+ ...
79
+
80
+ # Export the collected data to a Postgres table, with the specified table name.
81
+ doc_embeddings.export(
82
+ "doc_embeddings",
83
+ cocoindex.targets.Postgres(table_name=params.target_table_name),
84
+ primary_key_fields=["filename", "location"],
85
+ )
86
+
87
+ return _flow_def
88
+ ```
89
+
90
+ With this, you can open flow instances dynamically with its parameters:
91
+
92
+ ```python
93
+ text_embedding_flows: dict[str, cocoindex.Flow] = {}
94
+
95
+ def get_text_embedding_flow(name: str) -> cocoindex.Flow:
96
+ flow = text_embedding_flows.get(name)
97
+
98
+ if flow is None:
99
+ params = get_flow_parameters(name)
100
+ flow = text_embedding_flows[name] = cocoindex.open_flow(f"TextEmbedding_{name}", text_embedding_flow_def(params))
101
+
102
+ return flow
103
+ ```
104
+
105
+ ## Operate on the flow instances
106
+
107
+ ### Setup the persistent resource
108
+
109
+ After you instantiated and open flow instances dynamically, before you can perform any data updates, you need to make sure the persistent resource is ready.
110
+ You can use the `setup()` method, e.g. modify the above code to:
111
+
112
+ ```python
113
+ text_embedding_flows: dict[str, cocoindex.Flow] = {}
114
+
115
+ def get_text_embedding_flow(name: str) -> cocoindex.Flow:
116
+ flow = text_embedding_flows.get(name)
117
+
118
+ if flow is None:
119
+ params = get_flow_parameters(name)
120
+ flow = text_embedding_flows[name] = cocoindex.open_flow(f"TextEmbedding_{name}", text_embedding_flow_def(params))
121
+ flow.setup(report_to_stdout=True)
122
+
123
+ return flow
124
+ ```
125
+
126
+ `setup()` method synchronizes the persistent resource to a state that is consistent with the in-process object. For example,
127
+ * If the persistent resource is not there yet, it will create the backend resources for new targets.
128
+ * If your flow definition changed and a new target has been added since the last time of setup, it will create the backend resources for new targets.
129
+ * If an existing target is removed from the flow definition, it will drop the backend resources for the removed target.
130
+ * If nothing changed since the last time of setup, it will be a no-op. i.e. the `setup()` method is idempotent.
131
+
132
+ `setup()` takes a `report_to_stdout` parameter to control whether to print the setup progress to the standard output.
133
+
134
+ `setup()` takes care of all scenarios and makes sure the persistent resource is in the right state.
135
+ It's generally safe to call it after you open a flow instance, even if you don't know whether the persistent resource already exists.
136
+
137
+
138
+ ### Perform data updates
139
+
140
+ After you make sure the persistent resource is ready, you can perform data updates using the flow.
141
+
142
+ The `update()` method updates the target defined by the flow.
143
+
144
+ ```python
145
+ flow.update()
146
+ ```
147
+
148
+ This performs a one-time data update. After the function returns, the target is up-to-date as of the moment when the function is called. For example, we can call `update()` to update the target after the flow is setup:
149
+
150
+ ```python
151
+ def update_text_embedding_index(name: str):
152
+ flow = get_text_embedding_flow(name)
153
+ flow.update()
154
+ ```
155
+
156
+ You can also do a live update.
157
+ See the [Live Updates](/docs/tutorials/live_updates) tutorial for more details.
158
+
159
+
160
+ ### Close the flow object
161
+
162
+ Sometimes you don't want to hold the in-process object forever.
163
+ You can free up the memory resources by closing the flow instances with the `close()` method.
164
+
165
+ For example, the `dict` we managed above behaves like a cache to hold the flow instances.
166
+ If a specific flow isn't used for a while, we may close it.
167
+ The `TTLCache` from [`cachetools`](https://pypi.org/project/cachetools/) package provides exactly this functionality.
168
+ We can rewrite the above code a little bit.
169
+ First, we bring in necessary imports:
170
+
171
+ ```python
172
+ from cachetools import cached, TTLCache
173
+ ```
174
+
175
+ Then we define our own version of `TTLCache` to make it call the `close()` method when the flow instance is evicted from the cache:
176
+
177
+ ```python
178
+ class MyTTLCache(TTLCache):
179
+ def popitem(self):
180
+ # Close the flow instance when it is evicted from the cache
181
+ key, flow = super().popitem()
182
+ flow.close()
183
+ return key, flow
184
+ ```
185
+
186
+ With this, we can modify our `get_text_embedding_flow()` function to use `MyTTLCache` to cache the flow instances, instead of managing our own `dict`:
187
+
188
+ ```python
189
+ @cached(cache=MyTTLCache(maxsize=20, ttl=600))
190
+ def get_text_embedding_flow(name: str) -> cocoindex.Flow:
191
+ params = get_flow_parameters(name)
192
+ flow = cocoindex.open_flow(f"TextEmbedding_{name}", text_embedding_flow_def(params))
193
+ flow.setup(report_to_stdout=True)
194
+ return flow
195
+ ```
196
+
197
+ The `@cached()` decorator from `cachetools` package automatically manages the cache for us (and it also offers thread safety!).
198
+ Once a flow is not touched for 10 minutes, it will call the `popitem()` method, which will close the in-memory flow object.
199
+
200
+
201
+ ### Drop the persistent resource
202
+
203
+ Occasionally, you may want to drop the persistent resource of a flow.
204
+ The `drop()` method is for this purpose.
205
+
206
+ ```python
207
+ def drop_text_embedding_index(name: str):
208
+ flow = get_text_embedding_flow(name)
209
+ flow.drop()
210
+ ```
211
+
212
+ This will drop the persistent resource of the flow.
213
+ The in-memory flow object is still alive, and can be reused until it's closed.
214
+ For example, you can still call `setup()` again.
215
+
216
+ ## Put it all together
217
+
218
+ ```python
219
+ import cocoindex
220
+ from cachetools import cached, TTLCache
221
+ from dataclasses import dataclass
222
+
223
+ @dataclass
224
+ class TextEmbeddingFlowParameters:
225
+ source_path: str
226
+ target_table_name: str
227
+
228
+ FLOW_PARAMETERS: dict[str, TextEmbeddingFlowParameters] = {
229
+ "foo": TextEmbeddingFlowParameters(source_path="/path/to/foo", target_table_name="foo_embeddings"),
230
+ "bar": TextEmbeddingFlowParameters(source_path="/path/to/bar", target_table_name="bar_embeddings"),
231
+ }
232
+
233
+ # Placeholder to get the parameters for a given flow name. You can replace this with your own implementation.
234
+ def get_flow_parameters(name: str) -> TextEmbeddingFlowParameters:
235
+ return FLOW_PARAMETERS[name]
236
+
237
+
238
+ def text_embedding_flow_def(params: TextEmbeddingFlowParameters):
239
+ def _flow_def(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
240
+ # Add a data source to read files from a directory
241
+ data_scope["documents"] = flow_builder.add_source(
242
+ cocoindex.sources.LocalFile(path=params.source_path))
243
+
244
+ doc_embeddings = data_scope.add_collector()
245
+ ...
246
+
247
+ # Export the collected data to a Postgres table, with the specified table name.
248
+ doc_embeddings.export(
249
+ "doc_embeddings",
250
+ cocoindex.targets.Postgres(table_name=params.target_table_name),
251
+ primary_key_fields=["filename", "location"],
252
+ )
253
+
254
+ return _flow_def
255
+
256
+ class MyTTLCache(TTLCache):
257
+ def popitem(self):
258
+ # Close the flow instance when it is evicted from the cache
259
+ key, flow = super().popitem()
260
+ flow.close()
261
+ return key, flow
262
+
263
+ @cached(cache=MyTTLCache(maxsize=20, ttl=600))
264
+ def get_text_embedding_flow(name: str) -> cocoindex.Flow:
265
+ params = get_flow_parameters(name)
266
+ flow = cocoindex.open_flow(f"TextEmbedding_{name}", text_embedding_flow_def(params))
267
+ flow.setup(report_to_stdout=True)
268
+ return flow
269
+
270
+ def update_text_embedding_index(name: str):
271
+ flow = get_text_embedding_flow(name)
272
+ flow.update()
273
+
274
+ def drop_text_embedding_index(name: str):
275
+ flow = get_text_embedding_flow(name)
276
+ flow.drop()
277
+ ```
278
+
279
+ This provides a skeleton.
280
+ With this, you can trigger `update_text_embedding_index()` and `drop_text_embedding_index()` from your application, e.g. a web server API.
281
+
282
+ ## Takeaways
283
+
284
+ From this tutorial, we walked through major flow management / operation APIs provided by CocoIndex. These APIs can be categorized into three aspects:
285
+
286
+ | Aspect | APIs | Description |
287
+ |--------|------|-------------|
288
+ | Life of in-process flow object | `open_flow()`, `Flow.close()` | Create and destroy the in-memory handle to operate on flow instances |
289
+ | Life of persistent resource | `Flow.setup()`, `Flow.drop()` | Create and destroy the backend resources and internal storage |
290
+ | Data updates | `Flow.update()`, `FlowLiveUpdater` | Execute the indexing logic to update targets. *Requires persistent resource to be up-to-date first.* |
291
+
292
+
293
+ For simplicity, we use an in-memory `dict` as source of truth for the flow parameters.
294
+ You can replace it with your own mechanism, e.g. table from a database, a configuration file, etc.
295
+ You can trigger these APIs from your applications specific to your use case, e.g. from a specific API endpoint of a web server.
296
+
297
+ ## Further readings
298
+
299
+ You can see the following documents for more details:
300
+
301
+ * [CocoIndex Flow Definition: Entry Point](/docs/core/flow_def#entry-point)
302
+ * [Operate a Flow](/docs/core/flow_methods)
@@ -18,6 +18,7 @@ const sidebars: SidebarsConfig = {
18
18
  collapsed: false,
19
19
  items: [
20
20
  'tutorials/live_updates',
21
+ 'tutorials/manage_flow_dynamically',
21
22
  ],
22
23
  },
23
24
  {