cocoindex 0.1.77__tar.gz → 0.1.79__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {cocoindex-0.1.77 → cocoindex-0.1.79}/Cargo.lock +1 -1
  2. {cocoindex-0.1.77 → cocoindex-0.1.79}/Cargo.toml +2 -1
  3. {cocoindex-0.1.77 → cocoindex-0.1.79}/PKG-INFO +1 -1
  4. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/ai/llm.mdx +5 -1
  5. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/flow_def.mdx +28 -9
  6. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/settings.mdx +22 -11
  7. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/colpali_main.py +3 -30
  8. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/llm.py +11 -1
  9. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/setting.py +16 -2
  10. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/lib_context.rs +28 -2
  11. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/mod.rs +9 -2
  12. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/openai.rs +18 -4
  13. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/settings.rs +17 -3
  14. {cocoindex-0.1.77 → cocoindex-0.1.79}/.cargo/config.toml +0 -0
  15. {cocoindex-0.1.77 → cocoindex-0.1.79}/.env.lib_debug +0 -0
  16. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  17. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  18. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/scripts/update_version.sh +0 -0
  19. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/CI.yml +0 -0
  20. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/_doc_release.yml +0 -0
  21. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/_test.yml +0 -0
  22. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/docs.yml +0 -0
  23. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/format.yml +0 -0
  24. {cocoindex-0.1.77 → cocoindex-0.1.79}/.github/workflows/release.yml +0 -0
  25. {cocoindex-0.1.77 → cocoindex-0.1.79}/.gitignore +0 -0
  26. {cocoindex-0.1.77 → cocoindex-0.1.79}/.pre-commit-config.yaml +0 -0
  27. {cocoindex-0.1.77 → cocoindex-0.1.79}/CODE_OF_CONDUCT.md +0 -0
  28. {cocoindex-0.1.77 → cocoindex-0.1.79}/CONTRIBUTING.md +0 -0
  29. {cocoindex-0.1.77 → cocoindex-0.1.79}/LICENSE +0 -0
  30. {cocoindex-0.1.77 → cocoindex-0.1.79}/README.md +0 -0
  31. {cocoindex-0.1.77 → cocoindex-0.1.79}/dev/neo4j.yaml +0 -0
  32. {cocoindex-0.1.77 → cocoindex-0.1.79}/dev/postgres.yaml +0 -0
  33. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/.gitignore +0 -0
  34. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/README.md +0 -0
  35. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/about/community.md +0 -0
  36. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/about/contributing.md +0 -0
  37. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/basics.md +0 -0
  38. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/cli.mdx +0 -0
  39. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/data_example.svg +0 -0
  40. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/data_types.mdx +0 -0
  41. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/flow_example.svg +0 -0
  42. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/core/flow_methods.mdx +0 -0
  43. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/custom_ops/custom_functions.mdx +0 -0
  44. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/custom_ops/custom_targets.mdx +0 -0
  45. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/getting_started/installation.md +0 -0
  46. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/getting_started/markdown_files.zip +0 -0
  47. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/getting_started/overview.md +0 -0
  48. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/getting_started/quickstart.md +0 -0
  49. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/ops/functions.md +0 -0
  50. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/ops/sources.md +0 -0
  51. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/ops/targets.md +0 -0
  52. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/query.mdx +0 -0
  53. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/tutorials/live_updates.md +0 -0
  54. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
  55. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/docusaurus.config.ts +0 -0
  56. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/package.json +0 -0
  57. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/sidebars.ts +0 -0
  58. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  59. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  60. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/src/css/custom.css +0 -0
  61. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/src/theme/Root.js +0 -0
  62. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/.nojekyll +0 -0
  63. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/img/docusaurus.png +0 -0
  64. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/img/favicon.ico +0 -0
  65. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/img/icon.svg +0 -0
  66. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/img/incremental-etl.gif +0 -0
  67. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/static/robots.txt +0 -0
  68. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/tsconfig.json +0 -0
  69. {cocoindex-0.1.77 → cocoindex-0.1.79}/docs/yarn.lock +0 -0
  70. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/amazon_s3_embedding/.env.example +0 -0
  71. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/amazon_s3_embedding/.gitignore +0 -0
  72. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/amazon_s3_embedding/README.md +0 -0
  73. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/amazon_s3_embedding/main.py +0 -0
  74. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  75. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/azure_blob_embedding/.env.example +0 -0
  76. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/azure_blob_embedding/.gitignore +0 -0
  77. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/azure_blob_embedding/README.md +0 -0
  78. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/azure_blob_embedding/main.py +0 -0
  79. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/azure_blob_embedding/pyproject.toml +0 -0
  80. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/code_embedding/.env +0 -0
  81. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/code_embedding/README.md +0 -0
  82. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/code_embedding/main.py +0 -0
  83. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/code_embedding/pyproject.toml +0 -0
  84. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/.env +0 -0
  85. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/.gitignore +0 -0
  86. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/README.md +0 -0
  87. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/data/bizarre_animals.md +0 -0
  88. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/data/chunk_norris.md +0 -0
  89. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/main.py +0 -0
  90. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/custom_output_files/pyproject.toml +0 -0
  91. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/docs_to_knowledge_graph/.env +0 -0
  92. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/docs_to_knowledge_graph/README.md +0 -0
  93. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/docs_to_knowledge_graph/main.py +0 -0
  94. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  95. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/.env +0 -0
  96. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/README.md +0 -0
  97. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
  98. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
  99. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
  100. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/images/einplanck3.jpg +0 -0
  101. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/main.py +0 -0
  102. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/face_recognition/pyproject.toml +0 -0
  103. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/.dockerignore +0 -0
  104. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/.env +0 -0
  105. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/README.md +0 -0
  106. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/compose.yaml +0 -0
  107. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/dockerfile +0 -0
  108. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  109. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/main.py +0 -0
  110. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/fastapi_server_docker/requirements.txt +0 -0
  111. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/gdrive_text_embedding/.env.example +0 -0
  112. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/gdrive_text_embedding/.gitignore +0 -0
  113. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/gdrive_text_embedding/README.md +0 -0
  114. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/gdrive_text_embedding/main.py +0 -0
  115. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  116. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/.env +0 -0
  117. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/README.md +0 -0
  118. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/.gitignore +0 -0
  119. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/index.html +0 -0
  120. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/package-lock.json +0 -0
  121. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/package.json +0 -0
  122. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/src/App.jsx +0 -0
  123. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/src/main.jsx +0 -0
  124. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/src/style.css +0 -0
  125. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/frontend/vite.config.js +0 -0
  126. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/img/cat1.jpeg +0 -0
  127. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/img/dog1.jpeg +0 -0
  128. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/img/elephant1.jpg +0 -0
  129. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/img/giraffe.jpg +0 -0
  130. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/main.py +0 -0
  131. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/image_search/pyproject.toml +0 -0
  132. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/.env +0 -0
  133. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/README.md +0 -0
  134. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/data/bizarre_animals.md +0 -0
  135. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/data/chunk_norris.md +0 -0
  136. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/main.py +0 -0
  137. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/live_updates/pyproject.toml +0 -0
  138. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/.env +0 -0
  139. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/README.md +0 -0
  140. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/main.py +0 -0
  141. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  142. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  143. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  144. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  145. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  146. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/.env +0 -0
  147. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/README.md +0 -0
  148. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/main.py +0 -0
  149. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/pyproject.toml +0 -0
  150. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/1706.03762v7.pdf +0 -0
  151. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/1810.04805v2.pdf +0 -0
  152. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/cat1.jpeg +0 -0
  153. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/dog1.jpeg +0 -0
  154. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/elephant1.jpg +0 -0
  155. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/giraffe.jpg +0 -0
  156. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/multi_format_indexing/source_files/rfc8259.pdf +0 -0
  157. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/.env.example +0 -0
  158. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/.gitignore +0 -0
  159. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/README.md +0 -0
  160. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/main.py +0 -0
  161. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
  162. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
  163. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  164. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  165. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/paper_metadata/pyproject.toml +0 -0
  166. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/.env.example +0 -0
  167. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/README.md +0 -0
  168. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/data/README.md +0 -0
  169. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  170. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  171. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  172. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  173. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/main.py +0 -0
  174. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/patient_intake_extraction/pyproject.toml +0 -0
  175. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/.env +0 -0
  176. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/README.md +0 -0
  177. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/main.py +0 -0
  178. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  179. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  180. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  181. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/pdf_embedding/pyproject.toml +0 -0
  182. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/.env +0 -0
  183. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/README.md +0 -0
  184. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/img/cocoinsight.png +0 -0
  185. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/img/neo4j.png +0 -0
  186. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/main.py +0 -0
  187. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p1.json +0 -0
  188. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p2.json +0 -0
  189. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p3.json +0 -0
  190. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p4.json +0 -0
  191. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p5.json +0 -0
  192. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p6.json +0 -0
  193. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p7.json +0 -0
  194. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p8.json +0 -0
  195. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/products/p9.json +0 -0
  196. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/product_recommendation/pyproject.toml +0 -0
  197. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/.env +0 -0
  198. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/README.md +0 -0
  199. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  200. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/main.py +0 -0
  201. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  202. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  203. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  204. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding/pyproject.toml +0 -0
  205. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding_qdrant/.env +0 -0
  206. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding_qdrant/README.md +0 -0
  207. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding_qdrant/main.py +0 -0
  208. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  209. {cocoindex-0.1.77 → cocoindex-0.1.79}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  210. {cocoindex-0.1.77 → cocoindex-0.1.79}/pyproject.toml +0 -0
  211. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/__init__.py +0 -0
  212. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/auth_registry.py +0 -0
  213. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/cli.py +0 -0
  214. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/convert.py +0 -0
  215. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/flow.py +0 -0
  216. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/functions.py +0 -0
  217. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/index.py +0 -0
  218. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/lib.py +0 -0
  219. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/op.py +0 -0
  220. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/py.typed +0 -0
  221. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/runtime.py +0 -0
  222. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/setup.py +0 -0
  223. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/sources.py +0 -0
  224. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/targets.py +0 -0
  225. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/__init__.py +0 -0
  226. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/test_convert.py +0 -0
  227. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/test_optional_database.py +0 -0
  228. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/test_transform_flow.py +0 -0
  229. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/test_typing.py +0 -0
  230. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/tests/test_validation.py +0 -0
  231. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/typing.py +0 -0
  232. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/utils.py +0 -0
  233. {cocoindex-0.1.77 → cocoindex-0.1.79}/python/cocoindex/validation.py +0 -0
  234. {cocoindex-0.1.77 → cocoindex-0.1.79}/ruff.toml +0 -0
  235. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/duration.rs +0 -0
  236. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/field_attrs.rs +0 -0
  237. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/json_schema.rs +0 -0
  238. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/mod.rs +0 -0
  239. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/schema.rs +0 -0
  240. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/spec.rs +0 -0
  241. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/base/value.rs +0 -0
  242. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/analyzed_flow.rs +0 -0
  243. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/analyzer.rs +0 -0
  244. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/exec_ctx.rs +0 -0
  245. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/flow_builder.rs +0 -0
  246. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/mod.rs +0 -0
  247. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/builder/plan.rs +0 -0
  248. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/db_tracking.rs +0 -0
  249. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/db_tracking_setup.rs +0 -0
  250. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/dumper.rs +0 -0
  251. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/evaluator.rs +0 -0
  252. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/indexing_status.rs +0 -0
  253. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/live_updater.rs +0 -0
  254. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/memoization.rs +0 -0
  255. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/mod.rs +0 -0
  256. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/row_indexer.rs +0 -0
  257. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/source_indexer.rs +0 -0
  258. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/execution/stats.rs +0 -0
  259. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/lib.rs +0 -0
  260. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/anthropic.rs +0 -0
  261. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/gemini.rs +0 -0
  262. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/litellm.rs +0 -0
  263. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/ollama.rs +0 -0
  264. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/openrouter.rs +0 -0
  265. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/vllm.rs +0 -0
  266. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/llm/voyage.rs +0 -0
  267. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/factory_bases.rs +0 -0
  268. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/embed_text.rs +0 -0
  269. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/extract_by_llm.rs +0 -0
  270. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/mod.rs +0 -0
  271. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/parse_json.rs +0 -0
  272. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/split_recursively.rs +0 -0
  273. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/functions/test_utils.rs +0 -0
  274. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/interface.rs +0 -0
  275. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/mod.rs +0 -0
  276. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/py_factory.rs +0 -0
  277. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/registration.rs +0 -0
  278. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/registry.rs +0 -0
  279. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sdk.rs +0 -0
  280. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sources/amazon_s3.rs +0 -0
  281. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sources/azure_blob.rs +0 -0
  282. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sources/google_drive.rs +0 -0
  283. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sources/local_file.rs +0 -0
  284. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/sources/mod.rs +0 -0
  285. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/kuzu.rs +0 -0
  286. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/mod.rs +0 -0
  287. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/neo4j.rs +0 -0
  288. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/postgres.rs +0 -0
  289. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/qdrant.rs +0 -0
  290. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/shared/mod.rs +0 -0
  291. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/shared/property_graph.rs +0 -0
  292. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/ops/targets/shared/table_columns.rs +0 -0
  293. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/prelude.rs +0 -0
  294. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/py/convert.rs +0 -0
  295. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/py/mod.rs +0 -0
  296. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/server.rs +0 -0
  297. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/service/error.rs +0 -0
  298. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/service/flows.rs +0 -0
  299. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/service/mod.rs +0 -0
  300. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/auth_registry.rs +0 -0
  301. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/components.rs +0 -0
  302. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/db_metadata.rs +0 -0
  303. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/driver.rs +0 -0
  304. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/mod.rs +0 -0
  305. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/setup/states.rs +0 -0
  306. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/concur_control.rs +0 -0
  307. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/db.rs +0 -0
  308. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/fingerprint.rs +0 -0
  309. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/immutable.rs +0 -0
  310. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/mod.rs +0 -0
  311. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/retryable.rs +0 -0
  312. {cocoindex-0.1.77 → cocoindex-0.1.79}/src/utils/yaml_ser.rs +0 -0
@@ -1297,7 +1297,7 @@ dependencies = [
1297
1297
 
1298
1298
  [[package]]
1299
1299
  name = "cocoindex"
1300
- version = "0.1.77"
1300
+ version = "0.1.79"
1301
1301
  dependencies = [
1302
1302
  "anyhow",
1303
1303
  "async-openai",
@@ -2,9 +2,10 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.77"
5
+ version = "0.1.79"
6
6
  edition = "2024"
7
7
  rust-version = "1.88"
8
+ readme = "README.md"
8
9
 
9
10
  [profile.release]
10
11
  codegen-units = 1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.77
3
+ Version: 0.1.79
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -75,7 +75,11 @@ CocoIndex integrates with various LLM APIs for these functions.
75
75
  To use the OpenAI LLM API, you need to set the environment variable `OPENAI_API_KEY`.
76
76
  You can generate the API key from [OpenAI Dashboard](https://platform.openai.com/api-keys).
77
77
 
78
- Currently we don't support custom address for OpenAI API.
78
+ If you want to use a custom address, you can either provide the `address` parameter in `LlmSpec` / `EmbedText`, or set the environment variable `OPENAI_API_BASE`. The `address` parameter takes precedence over the environment variable.
79
+
80
+ Spec for OpenAI takes additional `api_config` field, in type `cocoindex.llm.OpenAiConfig` with the following fields:
81
+ - `org_id` (type: `str`, optional): The organization ID of the OpenAI account.
82
+ - `project_id` (type: `str`, optional): The project ID of the OpenAI account.
79
83
 
80
84
  You can find the full list of models supported by OpenAI [here](https://platform.openai.com/docs/models).
81
85
 
@@ -360,21 +360,40 @@ It will use `Staging__doc_embeddings` as the collection name if the current app
360
360
 
361
361
  ### Control Processing Concurrency
362
362
 
363
- You can control the concurrency of the processing by setting the following options:
363
+ CocoIndex processes data in parallel to maximize throughput, but unconstrained parallelism can overwhelm your system.
364
+ Processing too many items simultaneously can lead to:
364
365
 
365
- * `max_inflight_rows`: the maximum number of concurrent inflight requests for the processing.
366
- * `max_inflight_bytes`: the maximum number of concurrent inflight bytes for the processing.
366
+ - **Memory exhaustion**: Large datasets loaded concurrently can consume excessive RAM
367
+ - **Resource contention**: Too many parallel operations competing for CPU, disk I/O, or network bandwidth
368
+ - **System instability**: High concurrency can cause timeouts, crashes, or degraded performance
367
369
 
368
- These options can be passed in to the following APIs:
370
+ To prevent these issues, CocoIndex provides concurrency controls that limit how many data items are processed simultaneously.
369
371
 
370
- * [`FlowBuilder.add_source()`](#import-from-source): The options above control the processing concurrency of multiple rows from a source. New rows will not be loaded in memory if it'll be over the limit.
372
+ #### Concurrency Options
371
373
 
372
- Besides, global limits on overall processing concurrency of all sources from all flows can be specified by [`GlobalExecutionOptions`](/docs/core/settings#globalexecutionoptions) or corresponding [environment variables](/docs/core/settings#list-of-environment-variables).
373
- If both global and per-source limits are specified, both need to be satisfied to admit additional source rows.
374
+ You can control processing concurrency using these options:
374
375
 
375
- * [`DataSlice.row()`](#for-each-row): The options above provides a finer-grained control, to limit the processing concurrency of multiple rows within a table at any level.
376
+ * `max_inflight_rows`: Limits the maximum number of data rows being processed concurrently
377
+ * `max_inflight_bytes`: Limits the total memory footprint of data being processed concurrently (measured in bytes)
376
378
 
377
- `max_inflight_bytes` only counts the number of bytes already existing in the current row before any further processing.
379
+ When these limits are reached, CocoIndex will pause loading new data until some of the current processing completes, ensuring your system remains stable.
380
+
381
+ #### Where to Apply Concurrency Controls
382
+
383
+ These concurrency options can be configured at different levels:
384
+
385
+ * **Source level** via [`FlowBuilder.add_source()`](#import-from-source): Controls how many rows from a data source are processed simultaneously. This prevents overwhelming your system when ingesting large datasets.
386
+
387
+ You can also set global limits across all sources and flows using [`GlobalExecutionOptions`](/docs/core/settings#globalexecutionoptions) or environment variables [`COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS`](/docs/core/settings#list-of-environment-variables)/[`COCOINDEX_SOURCE_MAX_INFLIGHT_BYTES`](/docs/core/settings#list-of-environment-variables).
388
+ When both global and per-source limits are specified, both limits are enforced independently - a new row can only be processed if there's available capacity in both the global budget (shared across all sources) and the per-source budget (specific to that source).
389
+
390
+ * **Row iteration level** via [`DataSlice.row()`](#for-each-row): Provides fine-grained control over parallel processing within nested data structures, allowing you to tune concurrency at any level of your data hierarchy.
391
+
392
+ :::note
393
+
394
+ The `max_inflight_bytes` limit only counts the size of data that already exists in memory before any transformations are applied. It doesn't include the memory used by intermediate processing results.
395
+
396
+ :::
378
397
 
379
398
  For example:
380
399
 
@@ -77,24 +77,33 @@ If not set, all flows are in a default unnamed namespace.
77
77
 
78
78
  `DatabaseConnectionSpec` configures the connection to a database. Only Postgres is supported for now. It has the following fields:
79
79
 
80
- * `url` (type: `str`, required): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`.
80
+ * `url` (type: `str`): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`.
81
81
 
82
82
  *Environment variable* for `Settings.database.url`: `COCOINDEX_DATABASE_URL`
83
83
 
84
- * `user` (type: `str`, optional): The username for the Postgres database. If not provided, username will come from `url`.
84
+ * `user` (type: `str | None`, default: `None`): The username for the Postgres database. If not provided, username will come from `url`.
85
85
 
86
86
  *Environment variable* for `Settings.database.user`: `COCOINDEX_DATABASE_USER`
87
87
 
88
- * `password` (type: `str`, optional): The password for the Postgres database. If not provided, password will come from `url`.
88
+ * `password` (type: `str | None`, default: `None`): The password for the Postgres database. If not provided, password will come from `url`.
89
89
 
90
90
  *Environment variable* for `Settings.database.password`: `COCOINDEX_DATABASE_PASSWORD`
91
91
 
92
- :::tip
92
+ :::tip
93
93
 
94
- Please be careful that all values in `url` needs to be url-encoded if they contain special characters.
95
- For this reason, prefer to use the separated `user` and `password` fields for username and password.
94
+ Please be careful that all values in `url` needs to be url-encoded if they contain special characters.
95
+ For this reason, prefer to use the separated `user` and `password` fields for username and password.
96
+
97
+ :::
98
+
99
+ * `max_connections` (type: `int`, default: `64`): The maximum number of connections to keep in the pool.
100
+
101
+ *Environment variable* for `Settings.database.max_connections`: `COCOINDEX_DATABASE_MAX_CONNECTIONS`
102
+
103
+ * `min_connections` (type: `int`, default: `16`): The minimum number of connections to keep in the pool.
104
+
105
+ *Environment variable* for `Settings.database.min_connections`: `COCOINDEX_DATABASE_MIN_CONNECTIONS`
96
106
 
97
- :::
98
107
 
99
108
  :::info
100
109
 
@@ -109,10 +118,10 @@ If you use the Postgres database hosted by [Supabase](https://supabase.com/), pl
109
118
 
110
119
  `GlobalExecutionOptions` is used to configure the global execution options shared by all flows. It has the following fields:
111
120
 
112
- * `source_max_inflight_rows` (type: `int`, optional): The maximum number of concurrent inflight requests for all source operations.
113
- * `source_max_inflight_bytes` (type: `int`, optional): The maximum number of concurrent inflight bytes for all source operations.
121
+ * `source_max_inflight_rows` (type: `int | None`, default: `1024`): The maximum number of concurrent inflight rows for all source operations.
122
+ * `source_max_inflight_bytes` (type: `int | None`, default: `None`): The maximum number of concurrent inflight bytes for all source operations.
114
123
 
115
- See also [flow definition docs](/docs/core/flow_def#control-processing-concurrency) to control processing concurrency on per-source basis.
124
+ See also [flow definition docs](/docs/core/flow_def#control-processing-concurrency) about why it's necessary to control processing concurrency, and how to configure it on per-source basis.
116
125
  If both global and per-source limits are specified, both need to be satisfied to admit additional source rows.
117
126
 
118
127
  ## List of Environment Variables
@@ -125,5 +134,7 @@ This is the list of environment variables, each of which has a corresponding fie
125
134
  | `COCOINDEX_DATABASE_URL` | `database.url` | Yes |
126
135
  | `COCOINDEX_DATABASE_USER` | `database.user` | No |
127
136
  | `COCOINDEX_DATABASE_PASSWORD` | `database.password` | No |
128
- | `COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS` | `global_execution_options.source_max_inflight_rows` | No |
137
+ | `COCOINDEX_DATABASE_MAX_CONNECTIONS` | `database.max_connections` | No (default: `64`) |
138
+ | `COCOINDEX_DATABASE_MIN_CONNECTIONS` | `database.min_connections` | No (default: `16`) |
139
+ | `COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS` | `global_execution_options.source_max_inflight_rows` | No (default: `1024`) |
129
140
  | `COCOINDEX_SOURCE_MAX_INFLIGHT_BYTES` | `global_execution_options.source_max_inflight_bytes` | No |
@@ -27,10 +27,6 @@ COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
27
27
  print(f"📐 Using ColPali model {COLPALI_MODEL_NAME}")
28
28
 
29
29
 
30
- # Create ColPali embedding function using the class-based pattern
31
- colpali_embed = cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
32
-
33
-
34
30
  @cocoindex.transform_flow()
35
31
  def text_to_colpali_embedding(
36
32
  text: cocoindex.DataSlice[str],
@@ -56,38 +52,15 @@ def image_object_embedding_flow(
56
52
  )
57
53
  img_embeddings = data_scope.add_collector()
58
54
  with data_scope["images"].row() as img:
59
- ollama_model_name = os.getenv("OLLAMA_MODEL")
60
- if ollama_model_name is not None:
61
- # If an Ollama model is specified, generate an image caption
62
- img["caption"] = flow_builder.transform(
63
- cocoindex.functions.ExtractByLlm(
64
- llm_spec=cocoindex.llm.LlmSpec(
65
- api_type=cocoindex.LlmApiType.OLLAMA, model=ollama_model_name
66
- ),
67
- instruction=(
68
- "Describe the image in one detailed sentence. "
69
- "Name all visible animal species, objects, and the main scene. "
70
- "Be specific about type, color, and notable features. "
71
- "Mention what each animal is doing."
72
- ),
73
- output_type=str,
74
- ),
75
- image=img["content"],
76
- )
77
- img["embedding"] = img["content"].transform(colpali_embed)
55
+ img["embedding"] = img["content"].transform(
56
+ cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
57
+ )
78
58
 
79
59
  collect_fields = {
80
60
  "id": cocoindex.GeneratedField.UUID,
81
61
  "filename": img["filename"],
82
62
  "embedding": img["embedding"],
83
63
  }
84
-
85
- if ollama_model_name is not None:
86
- print(f"Using Ollama model '{ollama_model_name}' for captioning.")
87
- collect_fields["caption"] = img["caption"]
88
- else:
89
- print(f"No Ollama model '{ollama_model_name}' found — skipping captioning.")
90
-
91
64
  img_embeddings.collect(**collect_fields)
92
65
 
93
66
  img_embeddings.export(
@@ -26,6 +26,16 @@ class VertexAiConfig:
26
26
  region: str | None = None
27
27
 
28
28
 
29
+ @dataclass
30
+ class OpenAiConfig:
31
+ """A specification for a OpenAI LLM."""
32
+
33
+ kind = "OpenAi"
34
+
35
+ org_id: str | None = None
36
+ project_id: str | None = None
37
+
38
+
29
39
  @dataclass
30
40
  class LlmSpec:
31
41
  """A specification for a LLM."""
@@ -33,4 +43,4 @@ class LlmSpec:
33
43
  api_type: LlmApiType
34
44
  model: str
35
45
  address: str | None = None
36
- api_config: VertexAiConfig | None = None
46
+ api_config: VertexAiConfig | OpenAiConfig | None = None
@@ -44,6 +44,8 @@ class DatabaseConnectionSpec:
44
44
  url: str
45
45
  user: str | None = None
46
46
  password: str | None = None
47
+ max_connections: int = 64
48
+ min_connections: int = 16
47
49
 
48
50
 
49
51
  @dataclass
@@ -51,7 +53,7 @@ class GlobalExecutionOptions:
51
53
  """Global execution options."""
52
54
 
53
55
  # The maximum number of concurrent inflight requests, shared among all sources from all flows.
54
- source_max_inflight_rows: int | None = None
56
+ source_max_inflight_rows: int | None = 1024
55
57
  source_max_inflight_bytes: int | None = None
56
58
 
57
59
 
@@ -92,10 +94,22 @@ class Settings:
92
94
 
93
95
  database_url = os.getenv("COCOINDEX_DATABASE_URL")
94
96
  if database_url is not None:
95
- db_kwargs: dict[str, str] = dict()
97
+ db_kwargs: dict[str, Any] = dict()
96
98
  _load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
97
99
  _load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
98
100
  _load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
101
+ _load_field(
102
+ db_kwargs,
103
+ "max_connections",
104
+ "COCOINDEX_DATABASE_MAX_CONNECTIONS",
105
+ parse=int,
106
+ )
107
+ _load_field(
108
+ db_kwargs,
109
+ "min_connections",
110
+ "COCOINDEX_DATABASE_MIN_CONNECTIONS",
111
+ parse=int,
112
+ )
99
113
  database = DatabaseConnectionSpec(**db_kwargs)
100
114
  else:
101
115
  database = None
@@ -1,3 +1,5 @@
1
+ use std::time::Duration;
2
+
1
3
  use crate::prelude::*;
2
4
 
3
5
  use crate::builder::AnalyzedFlow;
@@ -7,7 +9,7 @@ use crate::settings;
7
9
  use crate::setup::ObjectSetupStatus;
8
10
  use axum::http::StatusCode;
9
11
  use sqlx::PgPool;
10
- use sqlx::postgres::PgConnectOptions;
12
+ use sqlx::postgres::{PgConnectOptions, PgPoolOptions};
11
13
  use tokio::runtime::Runtime;
12
14
 
13
15
  pub struct FlowExecutionContext {
@@ -176,7 +178,29 @@ impl DbPools {
176
178
  if let Some(password) = &conn_spec.password {
177
179
  pg_options = pg_options.password(password);
178
180
  }
179
- let pool = PgPool::connect_with(pg_options)
181
+
182
+ // Try to connect to the database with a low timeout first.
183
+ {
184
+ let pool_options = PgPoolOptions::new()
185
+ .max_connections(1)
186
+ .min_connections(1)
187
+ .acquire_timeout(Duration::from_secs(30));
188
+ let pool = pool_options
189
+ .connect_with(pg_options.clone())
190
+ .await
191
+ .context(format!("Failed to connect to database {}", conn_spec.url))?;
192
+ let _ = pool.acquire().await?;
193
+ }
194
+
195
+ // Now create the actual pool.
196
+ let pool_options = PgPoolOptions::new()
197
+ .max_connections(conn_spec.max_connections)
198
+ .min_connections(conn_spec.min_connections)
199
+ .acquire_timeout(Duration::from_secs(5 * 60))
200
+ .idle_timeout(Duration::from_secs(10 * 60))
201
+ .max_lifetime(Duration::from_secs(60 * 60));
202
+ let pool = pool_options
203
+ .connect_with(pg_options)
180
204
  .await
181
205
  .context("Failed to connect to database")?;
182
206
  anyhow::Ok(pool)
@@ -330,6 +354,8 @@ mod tests {
330
354
  url: "postgresql://test".to_string(),
331
355
  user: None,
332
356
  password: None,
357
+ max_connections: 10,
358
+ min_connections: 1,
333
359
  }),
334
360
  ..Default::default()
335
361
  };
@@ -26,10 +26,17 @@ pub struct VertexAiConfig {
26
26
  pub region: Option<String>,
27
27
  }
28
28
 
29
+ #[derive(Debug, Clone, Serialize, Deserialize, Default)]
30
+ pub struct OpenAiConfig {
31
+ pub org_id: Option<String>,
32
+ pub project_id: Option<String>,
33
+ }
34
+
29
35
  #[derive(Debug, Clone, Serialize, Deserialize)]
30
36
  #[serde(tag = "kind")]
31
37
  pub enum LlmApiConfig {
32
38
  VertexAi(VertexAiConfig),
39
+ OpenAi(OpenAiConfig),
33
40
  }
34
41
 
35
42
  #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -113,7 +120,7 @@ pub async fn new_llm_generation_client(
113
120
  Box::new(ollama::Client::new(address).await?) as Box<dyn LlmGenerationClient>
114
121
  }
115
122
  LlmApiType::OpenAi => {
116
- Box::new(openai::Client::new(address)?) as Box<dyn LlmGenerationClient>
123
+ Box::new(openai::Client::new(address, api_config)?) as Box<dyn LlmGenerationClient>
117
124
  }
118
125
  LlmApiType::Gemini => {
119
126
  Box::new(gemini::AiStudioClient::new(address)?) as Box<dyn LlmGenerationClient>
@@ -151,7 +158,7 @@ pub async fn new_llm_embedding_client(
151
158
  Box::new(gemini::AiStudioClient::new(address)?) as Box<dyn LlmEmbeddingClient>
152
159
  }
153
160
  LlmApiType::OpenAi => {
154
- Box::new(openai::Client::new(address)?) as Box<dyn LlmEmbeddingClient>
161
+ Box::new(openai::Client::new(address, api_config)?) as Box<dyn LlmEmbeddingClient>
155
162
  }
156
163
  LlmApiType::Voyage => {
157
164
  Box::new(voyage::Client::new(address)?) as Box<dyn LlmEmbeddingClient>
@@ -33,17 +33,31 @@ impl Client {
33
33
  Self { client }
34
34
  }
35
35
 
36
- pub fn new(address: Option<String>) -> Result<Self> {
36
+ pub fn new(address: Option<String>, api_config: Option<super::LlmApiConfig>) -> Result<Self> {
37
+ let config = match api_config {
38
+ Some(super::LlmApiConfig::OpenAi(config)) => config,
39
+ Some(_) => api_bail!("unexpected config type, expected OpenAiConfig"),
40
+ None => super::OpenAiConfig::default(),
41
+ };
42
+
43
+ let mut openai_config = OpenAIConfig::new();
37
44
  if let Some(address) = address {
38
- api_bail!("OpenAI doesn't support custom API address: {address}");
45
+ openai_config = openai_config.with_api_base(address);
46
+ }
47
+ if let Some(org_id) = config.org_id {
48
+ openai_config = openai_config.with_org_id(org_id);
39
49
  }
50
+ if let Some(project_id) = config.project_id {
51
+ openai_config = openai_config.with_project_id(project_id);
52
+ }
53
+
40
54
  // Verify API key is set
41
55
  if std::env::var("OPENAI_API_KEY").is_err() {
42
56
  api_bail!("OPENAI_API_KEY environment variable must be set");
43
57
  }
44
58
  Ok(Self {
45
- // OpenAI client will use OPENAI_API_KEY env variable by default
46
- client: OpenAIClient::new(),
59
+ // OpenAI client will use OPENAI_API_KEY and OPENAI_API_BASE env variables by default
60
+ client: OpenAIClient::with_config(openai_config),
47
61
  })
48
62
  }
49
63
  }
@@ -5,6 +5,8 @@ pub struct DatabaseConnectionSpec {
5
5
  pub url: String,
6
6
  pub user: Option<String>,
7
7
  pub password: Option<String>,
8
+ pub max_connections: u32,
9
+ pub min_connections: u32,
8
10
  }
9
11
 
10
12
  #[derive(Deserialize, Debug, Default)]
@@ -34,7 +36,9 @@ mod tests {
34
36
  "database": {
35
37
  "url": "postgresql://localhost:5432/test",
36
38
  "user": "testuser",
37
- "password": "testpass"
39
+ "password": "testpass",
40
+ "min_connections": 1,
41
+ "max_connections": 10
38
42
  },
39
43
  "app_namespace": "test_app"
40
44
  }"#;
@@ -46,6 +50,8 @@ mod tests {
46
50
  assert_eq!(db.url, "postgresql://localhost:5432/test");
47
51
  assert_eq!(db.user, Some("testuser".to_string()));
48
52
  assert_eq!(db.password, Some("testpass".to_string()));
53
+ assert_eq!(db.min_connections, 1);
54
+ assert_eq!(db.max_connections, 10);
49
55
  assert_eq!(settings.app_namespace, "test_app");
50
56
  }
51
57
 
@@ -75,7 +81,9 @@ mod tests {
75
81
  fn test_settings_deserialize_database_without_user_password() {
76
82
  let json = r#"{
77
83
  "database": {
78
- "url": "postgresql://localhost:5432/test"
84
+ "url": "postgresql://localhost:5432/test",
85
+ "min_connections": 1,
86
+ "max_connections": 10
79
87
  }
80
88
  }"#;
81
89
 
@@ -86,6 +94,8 @@ mod tests {
86
94
  assert_eq!(db.url, "postgresql://localhost:5432/test");
87
95
  assert_eq!(db.user, None);
88
96
  assert_eq!(db.password, None);
97
+ assert_eq!(db.min_connections, 1);
98
+ assert_eq!(db.max_connections, 10);
89
99
  assert_eq!(settings.app_namespace, "");
90
100
  }
91
101
 
@@ -94,7 +104,9 @@ mod tests {
94
104
  let json = r#"{
95
105
  "url": "postgresql://localhost:5432/test",
96
106
  "user": "testuser",
97
- "password": "testpass"
107
+ "password": "testpass",
108
+ "min_connections": 1,
109
+ "max_connections": 10
98
110
  }"#;
99
111
 
100
112
  let db_spec: DatabaseConnectionSpec = serde_json::from_str(json).unwrap();
@@ -102,5 +114,7 @@ mod tests {
102
114
  assert_eq!(db_spec.url, "postgresql://localhost:5432/test");
103
115
  assert_eq!(db_spec.user, Some("testuser".to_string()));
104
116
  assert_eq!(db_spec.password, Some("testpass".to_string()));
117
+ assert_eq!(db_spec.min_connections, 1);
118
+ assert_eq!(db_spec.max_connections, 10);
105
119
  }
106
120
  }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes