cocoindex 0.2.13__tar.gz → 0.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. {cocoindex-0.2.13 → cocoindex-0.2.15}/Cargo.lock +1 -1
  2. {cocoindex-0.2.13 → cocoindex-0.2.15}/Cargo.toml +1 -1
  3. {cocoindex-0.2.13 → cocoindex-0.2.15}/PKG-INFO +1 -1
  4. {cocoindex-0.2.13 → cocoindex-0.2.15}/THIRD_PARTY_NOTICES.html +1 -1
  5. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_def.mdx +1 -0
  6. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/simple_vector_index.md +10 -0
  7. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/__init__.py +12 -1
  8. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/functions.py +20 -2
  9. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/index.py +22 -1
  10. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/lancedb.py +6 -0
  11. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/spec.rs +58 -1
  12. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/lib_context.rs +7 -11
  13. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/gemini.rs +30 -10
  14. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/mod.rs +1 -0
  15. cocoindex-0.2.15/src/ops/functions/split_by_separators.rs +254 -0
  16. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/split_recursively.rs +4 -125
  17. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/registration.rs +1 -0
  18. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/shared/mod.rs +1 -0
  19. cocoindex-0.2.15/src/ops/shared/split.rs +142 -0
  20. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/kuzu.rs +3 -0
  21. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/neo4j.rs +3 -0
  22. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/postgres.rs +34 -4
  23. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/qdrant.rs +3 -0
  24. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/py/mod.rs +6 -0
  25. {cocoindex-0.2.13 → cocoindex-0.2.15}/.cargo/config.toml +0 -0
  26. {cocoindex-0.2.13 → cocoindex-0.2.15}/.env.lib_debug +0 -0
  27. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  28. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  29. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/SECURITY.md +0 -0
  30. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/scripts/update_version.sh +0 -0
  31. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/CI.yml +0 -0
  32. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/_docs_release.yml +0 -0
  33. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/_test.yml +0 -0
  34. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/docs_release.yml +0 -0
  35. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/docs_test.yml +0 -0
  36. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/format.yml +0 -0
  37. {cocoindex-0.2.13 → cocoindex-0.2.15}/.github/workflows/release.yml +0 -0
  38. {cocoindex-0.2.13 → cocoindex-0.2.15}/.gitignore +0 -0
  39. {cocoindex-0.2.13 → cocoindex-0.2.15}/.pre-commit-config.yaml +0 -0
  40. {cocoindex-0.2.13 → cocoindex-0.2.15}/CODE_OF_CONDUCT.md +0 -0
  41. {cocoindex-0.2.13 → cocoindex-0.2.15}/CONTRIBUTING.md +0 -0
  42. {cocoindex-0.2.13 → cocoindex-0.2.15}/LICENSE +0 -0
  43. {cocoindex-0.2.13 → cocoindex-0.2.15}/README.md +0 -0
  44. {cocoindex-0.2.13 → cocoindex-0.2.15}/about.hbs +0 -0
  45. {cocoindex-0.2.13 → cocoindex-0.2.15}/about.toml +0 -0
  46. {cocoindex-0.2.13 → cocoindex-0.2.15}/dev/neo4j.yaml +0 -0
  47. {cocoindex-0.2.13 → cocoindex-0.2.15}/dev/postgres.yaml +0 -0
  48. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/.gitignore +0 -0
  49. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/README.md +0 -0
  50. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/about/community.md +0 -0
  51. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ai/llm.mdx +0 -0
  52. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/guide.md +0 -0
  53. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/new_built_in_target.mdx +0 -0
  54. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/contributing/setup_dev_environment.md +0 -0
  55. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/basics.md +0 -0
  56. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/cli.mdx +0 -0
  57. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/data_example.svg +0 -0
  58. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/data_types.mdx +0 -0
  59. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_example.svg +0 -0
  60. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/flow_methods.mdx +0 -0
  61. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/core/settings.mdx +0 -0
  62. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/custom_ops/custom_functions.mdx +0 -0
  63. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/custom_ops/custom_targets.mdx +0 -0
  64. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/academic_papers_index.md +0 -0
  65. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/codebase_index.md +0 -0
  66. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/custom_targets.md +0 -0
  67. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/docs_to_knowledge_graph.md +0 -0
  68. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/document_ai.md +0 -0
  69. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/image_search.md +0 -0
  70. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/manual_extraction.md +0 -0
  71. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/multi_format_index.md +0 -0
  72. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/patient_form_extraction.md +0 -0
  73. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/photo_search.md +0 -0
  74. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/postgres_source.md +0 -0
  75. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/examples/product_recommendation.md +0 -0
  76. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/examples/index.md +0 -0
  77. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/installation.md +0 -0
  78. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/markdown_files.zip +0 -0
  79. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/overview.md +0 -0
  80. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/getting_started/quickstart.md +0 -0
  81. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/functions.md +0 -0
  82. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/sources.md +0 -0
  83. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/ops/targets.md +0 -0
  84. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/query.mdx +0 -0
  85. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/tutorials/live_updates.md +0 -0
  86. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docs/tutorials/manage_flow_dynamically.mdx +0 -0
  87. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/docusaurus.config.ts +0 -0
  88. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/package.json +0 -0
  89. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/sidebars.ts +0 -0
  90. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/components/GitHubButton/index.tsx +0 -0
  91. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/css/custom.css +0 -0
  92. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCard/index.tsx +0 -0
  93. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCard/styles.module.css +0 -0
  94. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCardList/index.tsx +0 -0
  95. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/DocCardList/styles.module.css +0 -0
  96. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/src/theme/Root.js +0 -0
  97. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/.nojekyll +0 -0
  98. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/docusaurus.png +0 -0
  99. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/abstract_chunks.png +0 -0
  100. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/basic_info.png +0 -0
  101. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/chunk_embedding.png +0 -0
  102. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/cover.png +0 -0
  103. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/first_page.png +0 -0
  104. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/flow.png +0 -0
  105. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/academic_papers_index/metadata.png +0 -0
  106. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/chunk.png +0 -0
  107. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/cover.png +0 -0
  108. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/codebase_index/flow.png +0 -0
  109. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/custom_targets/convert.png +0 -0
  110. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/custom_targets/cover.png +0 -0
  111. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/cover.png +0 -0
  112. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/dedupe.png +0 -0
  113. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/export_document.png +0 -0
  114. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/export_relationship.png +0 -0
  115. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/extract_relationship.png +0 -0
  116. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/flow.png +0 -0
  117. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/relationship.png +0 -0
  118. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/docs_to_knowledge_graph/summary.png +0 -0
  119. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/cover.png +0 -0
  120. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/document_ai.png +0 -0
  121. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/document_ai/processor.png +0 -0
  122. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/cover.png +0 -0
  123. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/embedding.png +0 -0
  124. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/flow.png +0 -0
  125. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/multi_modal_architecture.png +0 -0
  126. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/image_search/result.png +0 -0
  127. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/cover.png +0 -0
  128. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/extraction.png +0 -0
  129. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/flow.png +0 -0
  130. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/manual_extraction/summary.png +0 -0
  131. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/colpali_architecture.png +0 -0
  132. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/cover.png +0 -0
  133. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/embed.png +0 -0
  134. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/flow.png +0 -0
  135. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/multi_format_index/pages.png +0 -0
  136. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/cover.png +0 -0
  137. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/extraction.png +0 -0
  138. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/fields.png +0 -0
  139. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/flow.png +0 -0
  140. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/patient_form_extraction/tomarkdown.png +0 -0
  141. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/cover.png +0 -0
  142. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/extraction.png +0 -0
  143. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/photo_search/flow.png +0 -0
  144. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/collector.png +0 -0
  145. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/cover.png +0 -0
  146. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/description.png +0 -0
  147. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/embed.png +0 -0
  148. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/flow.png +0 -0
  149. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/lineage.png +0 -0
  150. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/price.png +0 -0
  151. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/postgres_source/source.png +0 -0
  152. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/cover.png +0 -0
  153. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/dedupe.png +0 -0
  154. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_all.png +0 -0
  155. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_product.png +0 -0
  156. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/export_taxonomy.png +0 -0
  157. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/extract_product.png +0 -0
  158. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/extract_taxonomy.png +0 -0
  159. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/neo4j.png +0 -0
  160. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/parse_json.png +0 -0
  161. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/product_recommendation/taxonomy.png +0 -0
  162. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/chunk.png +0 -0
  163. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/cover.png +0 -0
  164. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/embed.png +0 -0
  165. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/examples/simple_vector_index/flow.png +0 -0
  166. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/favicon.ico +0 -0
  167. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/icon.svg +0 -0
  168. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/img/incremental-etl.gif +0 -0
  169. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/static/robots.txt +0 -0
  170. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/tsconfig.json +0 -0
  171. {cocoindex-0.2.13 → cocoindex-0.2.15}/docs/yarn.lock +0 -0
  172. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/.env.example +0 -0
  173. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/.gitignore +0 -0
  174. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/README.md +0 -0
  175. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/main.py +0 -0
  176. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  177. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/.env.example +0 -0
  178. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/.gitignore +0 -0
  179. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/README.md +0 -0
  180. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/main.py +0 -0
  181. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/azure_blob_embedding/pyproject.toml +0 -0
  182. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/.env +0 -0
  183. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/README.md +0 -0
  184. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/main.py +0 -0
  185. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/code_embedding/pyproject.toml +0 -0
  186. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/.env +0 -0
  187. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/.gitignore +0 -0
  188. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/README.md +0 -0
  189. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/data/bizarre_animals.md +0 -0
  190. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/data/chunk_norris.md +0 -0
  191. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/main.py +0 -0
  192. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/custom_output_files/pyproject.toml +0 -0
  193. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/.env +0 -0
  194. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/README.md +0 -0
  195. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/main.py +0 -0
  196. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  197. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/.env +0 -0
  198. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/README.md +0 -0
  199. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Carter_welcomes_Reagan.jpg +0 -0
  200. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Solvay_conference_1927.jpg +0 -0
  201. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg +0 -0
  202. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/images/einplanck3.jpg +0 -0
  203. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/main.py +0 -0
  204. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/face_recognition/pyproject.toml +0 -0
  205. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/.dockerignore +0 -0
  206. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/.env +0 -0
  207. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/README.md +0 -0
  208. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/compose.yaml +0 -0
  209. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/dockerfile +0 -0
  210. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  211. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/main.py +0 -0
  212. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/fastapi_server_docker/requirements.txt +0 -0
  213. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/.env.example +0 -0
  214. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/.gitignore +0 -0
  215. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/README.md +0 -0
  216. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/main.py +0 -0
  217. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  218. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/.env +0 -0
  219. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/README.md +0 -0
  220. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/colpali_main.py +0 -0
  221. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/.gitignore +0 -0
  222. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/index.html +0 -0
  223. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/package-lock.json +0 -0
  224. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/package.json +0 -0
  225. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/App.jsx +0 -0
  226. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/main.jsx +0 -0
  227. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/src/style.css +0 -0
  228. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/frontend/vite.config.js +0 -0
  229. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/cat1.jpeg +0 -0
  230. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/dog1.jpeg +0 -0
  231. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/elephant1.jpg +0 -0
  232. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/img/giraffe.jpg +0 -0
  233. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/main.py +0 -0
  234. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/image_search/pyproject.toml +0 -0
  235. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/.env +0 -0
  236. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/README.md +0 -0
  237. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/data/bizarre_animals.md +0 -0
  238. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/data/chunk_norris.md +0 -0
  239. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/main.py +0 -0
  240. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/live_updates/pyproject.toml +0 -0
  241. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/.env +0 -0
  242. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/README.md +0 -0
  243. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/main.py +0 -0
  244. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  245. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  246. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  247. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  248. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  249. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/.env +0 -0
  250. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/README.md +0 -0
  251. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/main.py +0 -0
  252. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/pyproject.toml +0 -0
  253. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/1706.03762v7.pdf +0 -0
  254. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/1810.04805v2.pdf +0 -0
  255. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/2502.06786v3.pdf +0 -0
  256. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p101.jpg +0 -0
  257. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p86.jpg +0 -0
  258. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/healthcare_industry_test_p9.jpg +0 -0
  259. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/restaurant_brands_international_2023.jpg +0 -0
  260. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/multi_format_indexing/source_files/sweetgreen_2023.jpg +0 -0
  261. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/.env.example +0 -0
  262. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/.gitignore +0 -0
  263. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/README.md +0 -0
  264. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/main.py +0 -0
  265. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
  266. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
  267. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  268. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  269. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/paper_metadata/pyproject.toml +0 -0
  270. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/.env.example +0 -0
  271. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/README.md +0 -0
  272. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/README.md +0 -0
  273. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  274. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  275. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  276. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  277. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/main.py +0 -0
  278. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/patient_intake_extraction/pyproject.toml +0 -0
  279. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/.env +0 -0
  280. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/README.md +0 -0
  281. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/main.py +0 -0
  282. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  283. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  284. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  285. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/pdf_embedding/pyproject.toml +0 -0
  286. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/.env +0 -0
  287. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/README.md +0 -0
  288. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/main.py +0 -0
  289. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/prepare_source_data.sql +0 -0
  290. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/postgres_source/pyproject.toml +0 -0
  291. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/.env.example +0 -0
  292. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/.gitignore +0 -0
  293. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/README.md +0 -0
  294. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/img/cocoinsight.png +0 -0
  295. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/img/neo4j.png +0 -0
  296. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/main.py +0 -0
  297. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p1.json +0 -0
  298. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p2.json +0 -0
  299. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p3.json +0 -0
  300. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p4.json +0 -0
  301. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p5.json +0 -0
  302. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p6.json +0 -0
  303. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p7.json +0 -0
  304. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p8.json +0 -0
  305. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/products/p9.json +0 -0
  306. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/product_recommendation/pyproject.toml +0 -0
  307. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/.env +0 -0
  308. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/README.md +0 -0
  309. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  310. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/main.py +0 -0
  311. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  312. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  313. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  314. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding/pyproject.toml +0 -0
  315. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/.env +0 -0
  316. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/.gitignore +0 -0
  317. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/README.md +0 -0
  318. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/main.py +0 -0
  319. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/markdown_files/rfc8259.md +0 -0
  320. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_lancedb/pyproject.toml +0 -0
  321. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/.env +0 -0
  322. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/README.md +0 -0
  323. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/main.py +0 -0
  324. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  325. {cocoindex-0.2.13 → cocoindex-0.2.15}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  326. {cocoindex-0.2.13 → cocoindex-0.2.15}/pyproject.toml +0 -0
  327. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/auth_registry.py +0 -0
  328. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/cli.py +0 -0
  329. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/convert.py +0 -0
  330. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/flow.py +0 -0
  331. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/lib.py +0 -0
  332. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/llm.py +0 -0
  333. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/op.py +0 -0
  334. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/py.typed +0 -0
  335. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/query_handler.py +0 -0
  336. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/runtime.py +0 -0
  337. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/setting.py +0 -0
  338. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/setup.py +0 -0
  339. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/sources.py +0 -0
  340. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/subprocess_exec.py +0 -0
  341. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/__init__.py +0 -0
  342. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/targets/_engine_builtin_specs.py +0 -0
  343. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/__init__.py +0 -0
  344. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_convert.py +0 -0
  345. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_load_convert.py +0 -0
  346. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_optional_database.py +0 -0
  347. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_transform_flow.py +0 -0
  348. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_typing.py +0 -0
  349. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/tests/test_validation.py +0 -0
  350. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/typing.py +0 -0
  351. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/user_app_loader.py +0 -0
  352. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/utils.py +0 -0
  353. {cocoindex-0.2.13 → cocoindex-0.2.15}/python/cocoindex/validation.py +0 -0
  354. {cocoindex-0.2.13 → cocoindex-0.2.15}/ruff.toml +0 -0
  355. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/duration.rs +0 -0
  356. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/field_attrs.rs +0 -0
  357. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/json_schema.rs +0 -0
  358. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/mod.rs +0 -0
  359. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/schema.rs +0 -0
  360. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/base/value.rs +0 -0
  361. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/analyzed_flow.rs +0 -0
  362. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/analyzer.rs +0 -0
  363. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/exec_ctx.rs +0 -0
  364. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/flow_builder.rs +0 -0
  365. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/mod.rs +0 -0
  366. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/builder/plan.rs +0 -0
  367. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/db_tracking.rs +0 -0
  368. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/db_tracking_setup.rs +0 -0
  369. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/dumper.rs +0 -0
  370. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/evaluator.rs +0 -0
  371. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/indexing_status.rs +0 -0
  372. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/live_updater.rs +0 -0
  373. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/memoization.rs +0 -0
  374. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/mod.rs +0 -0
  375. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/row_indexer.rs +0 -0
  376. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/source_indexer.rs +0 -0
  377. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/execution/stats.rs +0 -0
  378. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/lib.rs +0 -0
  379. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/anthropic.rs +0 -0
  380. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/litellm.rs +0 -0
  381. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/mod.rs +0 -0
  382. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/ollama.rs +0 -0
  383. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/openai.rs +0 -0
  384. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/openrouter.rs +0 -0
  385. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/vllm.rs +0 -0
  386. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/llm/voyage.rs +0 -0
  387. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/factory_bases.rs +0 -0
  388. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/embed_text.rs +0 -0
  389. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/extract_by_llm.rs +0 -0
  390. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/parse_json.rs +0 -0
  391. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/functions/test_utils.rs +0 -0
  392. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/interface.rs +0 -0
  393. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/mod.rs +0 -0
  394. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/py_factory.rs +0 -0
  395. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/registry.rs +0 -0
  396. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sdk.rs +0 -0
  397. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/shared/postgres.rs +0 -0
  398. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/amazon_s3.rs +0 -0
  399. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/azure_blob.rs +0 -0
  400. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/google_drive.rs +0 -0
  401. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/local_file.rs +0 -0
  402. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/mod.rs +0 -0
  403. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/postgres.rs +0 -0
  404. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/shared/mod.rs +0 -0
  405. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/sources/shared/pattern_matcher.rs +0 -0
  406. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/mod.rs +0 -0
  407. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/mod.rs +0 -0
  408. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/property_graph.rs +0 -0
  409. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/ops/targets/shared/table_columns.rs +0 -0
  410. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/prelude.rs +0 -0
  411. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/py/convert.rs +0 -0
  412. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/server.rs +0 -0
  413. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/error.rs +0 -0
  414. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/flows.rs +0 -0
  415. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/mod.rs +0 -0
  416. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/service/query_handler.rs +0 -0
  417. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/settings.rs +0 -0
  418. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/auth_registry.rs +0 -0
  419. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/components.rs +0 -0
  420. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/db_metadata.rs +0 -0
  421. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/driver.rs +0 -0
  422. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/flow_features.rs +0 -0
  423. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/mod.rs +0 -0
  424. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/setup/states.rs +0 -0
  425. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/concur_control.rs +0 -0
  426. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/db.rs +0 -0
  427. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/deser.rs +0 -0
  428. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/fingerprint.rs +0 -0
  429. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/immutable.rs +0 -0
  430. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/mod.rs +0 -0
  431. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/retryable.rs +0 -0
  432. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/str_sanitize.rs +0 -0
  433. {cocoindex-0.2.13 → cocoindex-0.2.15}/src/utils/yaml_ser.rs +0 -0
@@ -1283,7 +1283,7 @@ dependencies = [
1283
1283
 
1284
1284
  [[package]]
1285
1285
  name = "cocoindex"
1286
- version = "0.2.13"
1286
+ version = "0.2.15"
1287
1287
  dependencies = [
1288
1288
  "anyhow",
1289
1289
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.2.13"
5
+ version = "0.2.15"
6
6
  edition = "2024"
7
7
  rust-version = "1.89"
8
8
  license = "Apache-2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.2.13
3
+ Version: 0.2.15
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: License :: OSI Approved :: Apache Software License
6
6
  Classifier: Operating System :: OS Independent
@@ -2428,7 +2428,7 @@ Software.
2428
2428
  <h3 id="Apache-2.0">Apache License 2.0</h3>
2429
2429
  <h4>Used by:</h4>
2430
2430
  <ul class="license-used-by">
2431
- <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.13</a></li>
2431
+ <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.15</a></li>
2432
2432
  <li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
2433
2433
  <li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
2434
2434
  </ul>
@@ -313,6 +313,7 @@ Types of the fields must be key types. See [Key Types](data_types#key-types) for
313
313
 
314
314
  * `field_name`: the field to create vector index.
315
315
  * `metric`: the similarity metric to use.
316
+ * `method` (optional): the index algorithm and optional tuning parameters. Leave unset to use the target default (HNSW for Postgres). Use `cocoindex.HnswVectorIndexMethod()` or `cocoindex.IvfFlatVectorIndexMethod()` to customize the method and its parameters.
316
317
 
317
318
  #### Similarity Metrics
318
319
 
@@ -105,6 +105,16 @@ doc_embeddings.export(
105
105
  CocoIndex supports other vector databases as well, with 1-line switch.
106
106
  <DocumentationButton url="https://cocoindex.io/docs/ops/targets" text="Targets" />
107
107
 
108
+ Need IVFFlat or custom HNSW parameters? Pass a method, for example:
109
+
110
+ ```python
111
+ cocoindex.VectorIndexDef(
112
+ field_name="embedding",
113
+ metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
114
+ method=cocoindex.IvfFlatVectorIndexMethod(lists=200),
115
+ )
116
+ ```
117
+
108
118
  ## Query the index
109
119
 
110
120
  ### Define a shared flow for both indexing and querying
@@ -2,6 +2,7 @@
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
4
 
5
+ from . import _engine # type: ignore
5
6
  from . import functions, sources, targets, cli, utils
6
7
 
7
8
  from . import targets as storages # Deprecated: Use targets instead
@@ -21,7 +22,13 @@ from .flow import add_flow_def, remove_flow # DEPRECATED
21
22
  from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
22
23
  from .lib import settings, init, start_server, stop
23
24
  from .llm import LlmSpec, LlmApiType
24
- from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
25
+ from .index import (
26
+ VectorSimilarityMetric,
27
+ VectorIndexDef,
28
+ IndexOptions,
29
+ HnswVectorIndexMethod,
30
+ IvfFlatVectorIndexMethod,
31
+ )
25
32
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
26
33
  from .setting import get_app_namespace
27
34
  from .query_handler import QueryHandlerResultFields, QueryInfo, QueryOutput
@@ -36,6 +43,8 @@ from .typing import (
36
43
  Json,
37
44
  )
38
45
 
46
+ _engine.init_pyo3_runtime()
47
+
39
48
  __all__ = [
40
49
  # Submodules
41
50
  "_engine",
@@ -82,6 +91,8 @@ __all__ = [
82
91
  "VectorSimilarityMetric",
83
92
  "VectorIndexDef",
84
93
  "IndexOptions",
94
+ "HnswVectorIndexMethod",
95
+ "IvfFlatVectorIndexMethod",
85
96
  # Settings
86
97
  "DatabaseConnectionSpec",
87
98
  "Settings",
@@ -2,13 +2,13 @@
2
2
 
3
3
  import dataclasses
4
4
  import functools
5
- from typing import Annotated, Any, Literal
5
+ from typing import Any, Literal
6
6
 
7
7
  import numpy as np
8
8
  from numpy.typing import NDArray
9
9
 
10
10
  from . import llm, op
11
- from .typing import TypeAttr, Vector
11
+ from .typing import Vector
12
12
 
13
13
 
14
14
  class ParseJson(op.FunctionSpec):
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
40
40
  custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
41
41
 
42
42
 
43
+ class SplitBySeparators(op.FunctionSpec):
44
+ """
45
+ Split text by specified regex separators only.
46
+ Output schema matches SplitRecursively for drop-in compatibility:
47
+ KTable rows with fields: location (Range), text (Str), start, end.
48
+ Args:
49
+ separators_regex: list[str] # e.g., [r"\\n\\n+"]
50
+ keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
51
+ include_empty: bool = False
52
+ trim: bool = True
53
+ """
54
+
55
+ separators_regex: list[str] = dataclasses.field(default_factory=list)
56
+ keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
57
+ include_empty: bool = False
58
+ trim: bool = True
59
+
60
+
43
61
  class EmbedText(op.FunctionSpec):
44
62
  """Embed a text into a vector space."""
45
63
 
@@ -1,6 +1,6 @@
1
1
  from enum import Enum
2
2
  from dataclasses import dataclass
3
- from typing import Sequence
3
+ from typing import Sequence, Union
4
4
 
5
5
 
6
6
  class VectorSimilarityMetric(Enum):
@@ -9,6 +9,26 @@ class VectorSimilarityMetric(Enum):
9
9
  INNER_PRODUCT = "InnerProduct"
10
10
 
11
11
 
12
+ @dataclass
13
+ class HnswVectorIndexMethod:
14
+ """HNSW vector index parameters."""
15
+
16
+ kind: str = "Hnsw"
17
+ m: int | None = None
18
+ ef_construction: int | None = None
19
+
20
+
21
+ @dataclass
22
+ class IvfFlatVectorIndexMethod:
23
+ """IVFFlat vector index parameters."""
24
+
25
+ kind: str = "IvfFlat"
26
+ lists: int | None = None
27
+
28
+
29
+ VectorIndexMethod = Union[HnswVectorIndexMethod, IvfFlatVectorIndexMethod]
30
+
31
+
12
32
  @dataclass
13
33
  class VectorIndexDef:
14
34
  """
@@ -17,6 +37,7 @@ class VectorIndexDef:
17
37
 
18
38
  field_name: str
19
39
  metric: VectorSimilarityMetric
40
+ method: VectorIndexMethod | None = None
20
41
 
21
42
 
22
43
  @dataclass
@@ -296,6 +296,12 @@ class _Connector:
296
296
  ) -> _State:
297
297
  if len(key_fields_schema) != 1:
298
298
  raise ValueError("LanceDB only supports a single key field")
299
+ if index_options.vector_indexes is not None:
300
+ for vector_index in index_options.vector_indexes:
301
+ if vector_index.method is not None:
302
+ raise ValueError(
303
+ "Vector index method is not configurable for LanceDB yet"
304
+ )
299
305
  return _State(
300
306
  key_field_schema=key_fields_schema[0],
301
307
  value_fields_schema=value_fields_schema,
@@ -384,15 +384,72 @@ impl fmt::Display for VectorSimilarityMetric {
384
384
  }
385
385
  }
386
386
 
387
+ #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
388
+ #[serde(tag = "kind")]
389
+ pub enum VectorIndexMethod {
390
+ Hnsw {
391
+ #[serde(default, skip_serializing_if = "Option::is_none")]
392
+ m: Option<u32>,
393
+ #[serde(default, skip_serializing_if = "Option::is_none")]
394
+ ef_construction: Option<u32>,
395
+ },
396
+ IvfFlat {
397
+ #[serde(default, skip_serializing_if = "Option::is_none")]
398
+ lists: Option<u32>,
399
+ },
400
+ }
401
+
402
+ impl VectorIndexMethod {
403
+ pub fn kind(&self) -> &'static str {
404
+ match self {
405
+ Self::Hnsw { .. } => "Hnsw",
406
+ Self::IvfFlat { .. } => "IvfFlat",
407
+ }
408
+ }
409
+ }
410
+
411
+ impl fmt::Display for VectorIndexMethod {
412
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413
+ match self {
414
+ Self::Hnsw { m, ef_construction } => {
415
+ let mut parts = Vec::new();
416
+ if let Some(m) = m {
417
+ parts.push(format!("m={}", m));
418
+ }
419
+ if let Some(ef) = ef_construction {
420
+ parts.push(format!("ef_construction={}", ef));
421
+ }
422
+ if parts.is_empty() {
423
+ write!(f, "Hnsw")
424
+ } else {
425
+ write!(f, "Hnsw({})", parts.join(","))
426
+ }
427
+ }
428
+ Self::IvfFlat { lists } => {
429
+ if let Some(lists) = lists {
430
+ write!(f, "IvfFlat(lists={lists})")
431
+ } else {
432
+ write!(f, "IvfFlat")
433
+ }
434
+ }
435
+ }
436
+ }
437
+ }
438
+
387
439
  #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
388
440
  pub struct VectorIndexDef {
389
441
  pub field_name: FieldName,
390
442
  pub metric: VectorSimilarityMetric,
443
+ #[serde(default, skip_serializing_if = "Option::is_none")]
444
+ pub method: Option<VectorIndexMethod>,
391
445
  }
392
446
 
393
447
  impl fmt::Display for VectorIndexDef {
394
448
  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
395
- write!(f, "{}:{}", self.field_name, self.metric)
449
+ match &self.method {
450
+ None => write!(f, "{}:{}", self.field_name, self.metric),
451
+ Some(method) => write!(f, "{}:{}:{}", self.field_name, self.metric, method),
452
+ }
396
453
  }
397
454
  }
398
455
 
@@ -162,6 +162,13 @@ impl FlowContext {
162
162
  static TOKIO_RUNTIME: LazyLock<Runtime> = LazyLock::new(|| Runtime::new().unwrap());
163
163
  static AUTH_REGISTRY: LazyLock<Arc<AuthRegistry>> = LazyLock::new(|| Arc::new(AuthRegistry::new()));
164
164
 
165
+ pub fn get_runtime() -> &'static Runtime {
166
+ &TOKIO_RUNTIME
167
+ }
168
+ pub fn get_auth_registry() -> &'static Arc<AuthRegistry> {
169
+ &AUTH_REGISTRY
170
+ }
171
+
165
172
  type PoolKey = (String, Option<String>);
166
173
  type PoolValue = Arc<tokio::sync::OnceCell<PgPool>>;
167
174
 
@@ -271,21 +278,10 @@ impl LibContext {
271
278
  }
272
279
  }
273
280
 
274
- pub fn get_runtime() -> &'static Runtime {
275
- &TOKIO_RUNTIME
276
- }
277
-
278
- pub fn get_auth_registry() -> &'static Arc<AuthRegistry> {
279
- &AUTH_REGISTRY
280
- }
281
-
282
281
  static LIB_INIT: OnceLock<()> = OnceLock::new();
283
282
  pub async fn create_lib_context(settings: settings::Settings) -> Result<LibContext> {
284
283
  LIB_INIT.get_or_init(|| {
285
284
  let _ = env_logger::try_init();
286
-
287
- pyo3_async_runtimes::tokio::init_with_runtime(get_runtime()).unwrap();
288
-
289
285
  let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
290
286
  });
291
287
 
@@ -74,6 +74,30 @@ impl AiStudioClient {
74
74
  }
75
75
  }
76
76
 
77
+ fn build_embed_payload(
78
+ model: &str,
79
+ text: &str,
80
+ task_type: Option<&str>,
81
+ output_dimension: Option<u32>,
82
+ ) -> serde_json::Value {
83
+ let mut payload = serde_json::json!({
84
+ "model": model,
85
+ "content": { "parts": [{ "text": text }] },
86
+ });
87
+ if let Some(task_type) = task_type {
88
+ payload["taskType"] = serde_json::Value::String(task_type.to_string());
89
+ }
90
+ if let Some(output_dimension) = output_dimension {
91
+ payload["outputDimensionality"] = serde_json::json!(output_dimension);
92
+ if model.starts_with("gemini-embedding-") {
93
+ payload["config"] = serde_json::json!({
94
+ "outputDimensionality": output_dimension,
95
+ });
96
+ }
97
+ }
98
+ payload
99
+ }
100
+
77
101
  #[async_trait]
78
102
  impl LlmGenerationClient for AiStudioClient {
79
103
  async fn generate<'req>(
@@ -174,16 +198,12 @@ impl LlmEmbeddingClient for AiStudioClient {
174
198
  request: super::LlmEmbeddingRequest<'req>,
175
199
  ) -> Result<super::LlmEmbeddingResponse> {
176
200
  let url = self.get_api_url(request.model, "embedContent");
177
- let mut payload = serde_json::json!({
178
- "model": request.model,
179
- "content": { "parts": [{ "text": request.text }] },
180
- });
181
- if let Some(task_type) = request.task_type {
182
- payload["taskType"] = serde_json::Value::String(task_type.into());
183
- }
184
- if let Some(output_dimension) = request.output_dimension {
185
- payload["outputDimensionality"] = serde_json::Value::Number(output_dimension.into());
186
- }
201
+ let payload = build_embed_payload(
202
+ request.model,
203
+ request.text.as_ref(),
204
+ request.task_type.as_deref(),
205
+ request.output_dimension,
206
+ );
187
207
  let resp = retryable::run(
188
208
  || async {
189
209
  self.client
@@ -1,6 +1,7 @@
1
1
  pub mod embed_text;
2
2
  pub mod extract_by_llm;
3
3
  pub mod parse_json;
4
+ pub mod split_by_separators;
4
5
  pub mod split_recursively;
5
6
 
6
7
  #[cfg(test)]
@@ -0,0 +1,254 @@
1
+ use anyhow::{Context, Result};
2
+ use regex::Regex;
3
+ use std::sync::Arc;
4
+
5
+ use crate::ops::registry::ExecutorFactoryRegistry;
6
+ use crate::ops::shared::split::{Position, make_common_chunk_schema, set_output_positions};
7
+ use crate::{fields_value, ops::sdk::*};
8
+
9
+ #[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
10
+ #[serde(rename_all = "UPPERCASE")]
11
+ enum KeepSep {
12
+ Left,
13
+ Right,
14
+ }
15
+
16
+ #[derive(Serialize, Deserialize)]
17
+ struct Spec {
18
+ // Python SDK provides defaults/values.
19
+ separators_regex: Vec<String>,
20
+ keep_separator: Option<KeepSep>,
21
+ include_empty: bool,
22
+ trim: bool,
23
+ }
24
+
25
+ struct Args {
26
+ text: ResolvedOpArg,
27
+ }
28
+
29
+ struct Executor {
30
+ spec: Spec,
31
+ regex: Option<Regex>,
32
+ args: Args,
33
+ }
34
+
35
+ impl Executor {
36
+ fn new(args: Args, spec: Spec) -> Result<Self> {
37
+ let regex = if spec.separators_regex.is_empty() {
38
+ None
39
+ } else {
40
+ // OR-join all separators, multiline
41
+ let pattern = format!(
42
+ "(?m){}",
43
+ spec.separators_regex
44
+ .iter()
45
+ .map(|s| format!("(?:{s})"))
46
+ .collect::<Vec<_>>()
47
+ .join("|")
48
+ );
49
+ Some(Regex::new(&pattern).context("failed to compile separators_regex")?)
50
+ };
51
+ Ok(Self { args, spec, regex })
52
+ }
53
+ }
54
+
55
+ struct ChunkOutput<'s> {
56
+ start_pos: Position,
57
+ end_pos: Position,
58
+ text: &'s str,
59
+ }
60
+
61
+ #[async_trait]
62
+ impl SimpleFunctionExecutor for Executor {
63
+ async fn evaluate(&self, input: Vec<Value>) -> Result<Value> {
64
+ let full_text = self.args.text.value(&input)?.as_str()?;
65
+ let bytes = full_text.as_bytes();
66
+
67
+ // add_range applies trim/include_empty and records the text slice
68
+ let mut chunks: Vec<ChunkOutput<'_>> = Vec::new();
69
+ let mut add_range = |mut s: usize, mut e: usize| {
70
+ if self.spec.trim {
71
+ while s < e && bytes[s].is_ascii_whitespace() {
72
+ s += 1;
73
+ }
74
+ while e > s && bytes[e - 1].is_ascii_whitespace() {
75
+ e -= 1;
76
+ }
77
+ }
78
+ if self.spec.include_empty || e > s {
79
+ chunks.push(ChunkOutput {
80
+ start_pos: Position::new(s),
81
+ end_pos: Position::new(e),
82
+ text: &full_text[s..e],
83
+ });
84
+ }
85
+ };
86
+
87
+ if let Some(re) = &self.regex {
88
+ let mut start = 0usize;
89
+ for m in re.find_iter(full_text) {
90
+ let end = match self.spec.keep_separator {
91
+ Some(KeepSep::Left) => m.end(),
92
+ Some(KeepSep::Right) | None => m.start(),
93
+ };
94
+ add_range(start, end);
95
+ start = match self.spec.keep_separator {
96
+ Some(KeepSep::Right) => m.start(),
97
+ _ => m.end(),
98
+ };
99
+ }
100
+ add_range(start, full_text.len());
101
+ } else {
102
+ // No separators: emit whole text
103
+ add_range(0, full_text.len());
104
+ }
105
+
106
+ set_output_positions(
107
+ full_text,
108
+ chunks.iter_mut().flat_map(|c| {
109
+ std::iter::once(&mut c.start_pos).chain(std::iter::once(&mut c.end_pos))
110
+ }),
111
+ );
112
+
113
+ let table = chunks
114
+ .into_iter()
115
+ .map(|c| {
116
+ let s = c.start_pos.output.unwrap();
117
+ let e = c.end_pos.output.unwrap();
118
+ (
119
+ KeyValue::from_single_part(RangeValue::new(s.char_offset, e.char_offset)),
120
+ fields_value!(Arc::<str>::from(c.text), s.into_output(), e.into_output())
121
+ .into(),
122
+ )
123
+ })
124
+ .collect();
125
+
126
+ Ok(Value::KTable(table))
127
+ }
128
+ }
129
+
130
+ struct Factory;
131
+
132
+ #[async_trait]
133
+ impl SimpleFunctionFactoryBase for Factory {
134
+ type Spec = Spec;
135
+ type ResolvedArgs = Args;
136
+
137
+ fn name(&self) -> &str {
138
+ "SplitBySeparators"
139
+ }
140
+
141
+ async fn resolve_schema<'a>(
142
+ &'a self,
143
+ _spec: &'a Spec,
144
+ args_resolver: &mut OpArgsResolver<'a>,
145
+ _context: &FlowInstanceContext,
146
+ ) -> Result<(Args, EnrichedValueType)> {
147
+ // one required arg: text: Str
148
+ let args = Args {
149
+ text: args_resolver
150
+ .next_arg("text")?
151
+ .expect_type(&ValueType::Basic(BasicValueType::Str))?
152
+ .required()?,
153
+ };
154
+
155
+ let output_schema = make_common_chunk_schema(args_resolver, &args.text)?;
156
+ Ok((args, output_schema))
157
+ }
158
+
159
+ async fn build_executor(
160
+ self: Arc<Self>,
161
+ spec: Spec,
162
+ args: Args,
163
+ _context: Arc<FlowInstanceContext>,
164
+ ) -> Result<impl SimpleFunctionExecutor> {
165
+ Executor::new(args, spec)
166
+ }
167
+ }
168
+
169
+ pub fn register(registry: &mut ExecutorFactoryRegistry) -> Result<()> {
170
+ Factory.register(registry)
171
+ }
172
+
173
+ #[cfg(test)]
174
+ mod tests {
175
+ use super::*;
176
+ use crate::ops::functions::test_utils::test_flow_function;
177
+
178
+ #[tokio::test]
179
+ async fn test_split_by_separators_paragraphs() {
180
+ let spec = Spec {
181
+ separators_regex: vec![r"\n\n+".to_string()],
182
+ keep_separator: None,
183
+ include_empty: false,
184
+ trim: true,
185
+ };
186
+ let factory = Arc::new(Factory);
187
+ let text = "Para1\n\nPara2\n\n\nPara3";
188
+
189
+ let input_arg_schemas = &[(
190
+ Some("text"),
191
+ make_output_type(BasicValueType::Str).with_nullable(true),
192
+ )];
193
+
194
+ let result = test_flow_function(
195
+ &factory,
196
+ &spec,
197
+ input_arg_schemas,
198
+ vec![text.to_string().into()],
199
+ )
200
+ .await
201
+ .unwrap();
202
+
203
+ match result {
204
+ Value::KTable(table) => {
205
+ // Expected ranges after trimming whitespace:
206
+ let expected = vec![
207
+ (RangeValue::new(0, 5), "Para1"),
208
+ (RangeValue::new(7, 12), "Para2"),
209
+ (RangeValue::new(15, 20), "Para3"),
210
+ ];
211
+ for (range, expected_text) in expected {
212
+ let key = KeyValue::from_single_part(range);
213
+ let row = table.get(&key).unwrap();
214
+ let chunk_text = row.0.fields[0].as_str().unwrap();
215
+ assert_eq!(**chunk_text, *expected_text);
216
+ }
217
+ }
218
+ other => panic!("Expected KTable, got {other:?}"),
219
+ }
220
+ }
221
+
222
+ #[tokio::test]
223
+ async fn test_split_by_separators_keep_right() {
224
+ let spec = Spec {
225
+ separators_regex: vec![r"\.".to_string()],
226
+ keep_separator: Some(KeepSep::Right),
227
+ include_empty: false,
228
+ trim: true,
229
+ };
230
+ let factory = Arc::new(Factory);
231
+ let text = "A. B. C.";
232
+
233
+ let input_arg_schemas = &[(
234
+ Some("text"),
235
+ make_output_type(BasicValueType::Str).with_nullable(true),
236
+ )];
237
+
238
+ let result = test_flow_function(
239
+ &factory,
240
+ &spec,
241
+ input_arg_schemas,
242
+ vec![text.to_string().into()],
243
+ )
244
+ .await
245
+ .unwrap();
246
+
247
+ match result {
248
+ Value::KTable(table) => {
249
+ assert!(table.len() >= 3);
250
+ }
251
+ _ => panic!("KTable expected"),
252
+ }
253
+ }
254
+ }