cocoindex 0.1.59__tar.gz → 0.1.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. {cocoindex-0.1.59 → cocoindex-0.1.61}/.gitignore +2 -0
  2. {cocoindex-0.1.59 → cocoindex-0.1.61}/Cargo.lock +1 -1
  3. {cocoindex-0.1.59 → cocoindex-0.1.61}/Cargo.toml +1 -1
  4. {cocoindex-0.1.59 → cocoindex-0.1.61}/PKG-INFO +1 -1
  5. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/cli.mdx +0 -6
  6. cocoindex-0.1.61/docs/docs/core/data_types.mdx +188 -0
  7. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/__init__.py +12 -3
  8. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/flow.py +10 -1
  9. cocoindex-0.1.61/python/cocoindex/lib.py +32 -0
  10. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/tests/test_convert.py +69 -19
  11. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/typing.py +1 -0
  12. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/spec.rs +8 -0
  13. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/analyzer.rs +3 -0
  14. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/flow_builder.rs +5 -1
  15. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/plan.rs +1 -0
  16. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/source_indexer.rs +1 -0
  17. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/functions/split_recursively.rs +5 -7
  18. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/py/convert.rs +19 -5
  19. cocoindex-0.1.61/src/utils/concur_control.rs +30 -0
  20. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/mod.rs +3 -0
  21. cocoindex-0.1.59/.vscode/settings.json +0 -9
  22. cocoindex-0.1.59/docs/docs/core/data_types.mdx +0 -146
  23. cocoindex-0.1.59/python/cocoindex/lib.py +0 -75
  24. {cocoindex-0.1.59 → cocoindex-0.1.61}/.cargo/config.toml +0 -0
  25. {cocoindex-0.1.59 → cocoindex-0.1.61}/.env.lib_debug +0 -0
  26. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  27. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  28. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/scripts/update_version.sh +0 -0
  29. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/CI.yml +0 -0
  30. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/_doc_release.yml +0 -0
  31. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/_test.yml +0 -0
  32. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/docs.yml +0 -0
  33. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/format.yml +0 -0
  34. {cocoindex-0.1.59 → cocoindex-0.1.61}/.github/workflows/release.yml +0 -0
  35. {cocoindex-0.1.59 → cocoindex-0.1.61}/.pre-commit-config.yaml +0 -0
  36. {cocoindex-0.1.59 → cocoindex-0.1.61}/CODE_OF_CONDUCT.md +0 -0
  37. {cocoindex-0.1.59 → cocoindex-0.1.61}/CONTRIBUTING.md +0 -0
  38. {cocoindex-0.1.59 → cocoindex-0.1.61}/LICENSE +0 -0
  39. {cocoindex-0.1.59 → cocoindex-0.1.61}/README.md +0 -0
  40. {cocoindex-0.1.59 → cocoindex-0.1.61}/dev/neo4j.yaml +0 -0
  41. {cocoindex-0.1.59 → cocoindex-0.1.61}/dev/postgres.yaml +0 -0
  42. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/.gitignore +0 -0
  43. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/README.md +0 -0
  44. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/about/community.md +0 -0
  45. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/about/contributing.md +0 -0
  46. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/ai/llm.mdx +0 -0
  47. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/basics.md +0 -0
  48. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/custom_function.mdx +0 -0
  49. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/data_example.svg +0 -0
  50. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/flow_def.mdx +0 -0
  51. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/flow_example.svg +0 -0
  52. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/flow_methods.mdx +0 -0
  53. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/core/settings.mdx +0 -0
  54. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/getting_started/installation.md +0 -0
  55. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/getting_started/markdown_files.zip +0 -0
  56. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/getting_started/overview.md +0 -0
  57. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/getting_started/quickstart.md +0 -0
  58. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/ops/functions.md +0 -0
  59. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/ops/sources.md +0 -0
  60. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/ops/targets.md +0 -0
  61. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docs/query.mdx +0 -0
  62. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/docusaurus.config.ts +0 -0
  63. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/package.json +0 -0
  64. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/sidebars.ts +0 -0
  65. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  66. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  67. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/src/css/custom.css +0 -0
  68. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/src/theme/Root.js +0 -0
  69. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/.nojekyll +0 -0
  70. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/img/docusaurus.png +0 -0
  71. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/img/favicon.ico +0 -0
  72. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/img/icon.svg +0 -0
  73. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/img/incremental-etl.gif +0 -0
  74. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/static/robots.txt +0 -0
  75. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/tsconfig.json +0 -0
  76. {cocoindex-0.1.59 → cocoindex-0.1.61}/docs/yarn.lock +0 -0
  77. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/amazon_s3_embedding/.env.example +0 -0
  78. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/amazon_s3_embedding/.gitignore +0 -0
  79. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/amazon_s3_embedding/README.md +0 -0
  80. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/amazon_s3_embedding/main.py +0 -0
  81. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  82. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/code_embedding/.env +0 -0
  83. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/code_embedding/README.md +0 -0
  84. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/code_embedding/main.py +0 -0
  85. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/code_embedding/pyproject.toml +0 -0
  86. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/docs_to_knowledge_graph/.env +0 -0
  87. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/docs_to_knowledge_graph/README.md +0 -0
  88. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/docs_to_knowledge_graph/main.py +0 -0
  89. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  90. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/.dockerignore +0 -0
  91. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/.env +0 -0
  92. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/README.md +0 -0
  93. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/compose.yaml +0 -0
  94. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/dockerfile +0 -0
  95. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  96. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/main.py +0 -0
  97. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/fastapi_server_docker/requirements.txt +0 -0
  98. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/gdrive_text_embedding/.env.example +0 -0
  99. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/gdrive_text_embedding/.gitignore +0 -0
  100. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/gdrive_text_embedding/README.md +0 -0
  101. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/gdrive_text_embedding/main.py +0 -0
  102. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  103. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/.env +0 -0
  104. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/README.md +0 -0
  105. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/.gitignore +0 -0
  106. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/index.html +0 -0
  107. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/package-lock.json +0 -0
  108. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/package.json +0 -0
  109. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/src/App.jsx +0 -0
  110. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/src/main.jsx +0 -0
  111. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/src/style.css +0 -0
  112. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/frontend/vite.config.js +0 -0
  113. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/img/cat1.jpeg +0 -0
  114. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/img/dog1.jpeg +0 -0
  115. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/img/elephant1.jpg +0 -0
  116. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/img/giraffe.jpg +0 -0
  117. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/main.py +0 -0
  118. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/pyproject.toml +0 -0
  119. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/image_search/requirements.txt +0 -0
  120. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/.env +0 -0
  121. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/README.md +0 -0
  122. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/main.py +0 -0
  123. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  124. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  125. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  126. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  127. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  128. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/.env.example +0 -0
  129. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/README.md +0 -0
  130. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/data/README.md +0 -0
  131. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  132. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  133. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  134. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  135. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/main.py +0 -0
  136. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/patient_intake_extraction/pyproject.toml +0 -0
  137. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/.env +0 -0
  138. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/README.md +0 -0
  139. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/main.py +0 -0
  140. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  141. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  142. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  143. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/pdf_embedding/pyproject.toml +0 -0
  144. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/.env +0 -0
  145. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/README.md +0 -0
  146. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/img/cocoinsight.png +0 -0
  147. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/img/neo4j.png +0 -0
  148. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/main.py +0 -0
  149. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p1.json +0 -0
  150. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p2.json +0 -0
  151. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p3.json +0 -0
  152. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p4.json +0 -0
  153. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p5.json +0 -0
  154. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p6.json +0 -0
  155. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p7.json +0 -0
  156. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p8.json +0 -0
  157. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/products/p9.json +0 -0
  158. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/product_recommendation/pyproject.toml +0 -0
  159. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/.env +0 -0
  160. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/README.md +0 -0
  161. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  162. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/main.py +0 -0
  163. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  164. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  165. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  166. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding/pyproject.toml +0 -0
  167. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding_qdrant/.env +0 -0
  168. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding_qdrant/README.md +0 -0
  169. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding_qdrant/main.py +0 -0
  170. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  171. {cocoindex-0.1.59 → cocoindex-0.1.61}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  172. {cocoindex-0.1.59 → cocoindex-0.1.61}/pyproject.toml +0 -0
  173. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/auth_registry.py +0 -0
  174. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/cli.py +0 -0
  175. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/convert.py +0 -0
  176. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/functions.py +0 -0
  177. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/index.py +0 -0
  178. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/llm.py +0 -0
  179. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/op.py +0 -0
  180. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/py.typed +0 -0
  181. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/runtime.py +0 -0
  182. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/setting.py +0 -0
  183. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/setup.py +0 -0
  184. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/sources.py +0 -0
  185. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/targets.py +0 -0
  186. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/tests/__init__.py +0 -0
  187. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/tests/test_optional_database.py +0 -0
  188. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/tests/test_typing.py +0 -0
  189. {cocoindex-0.1.59 → cocoindex-0.1.61}/python/cocoindex/utils.py +0 -0
  190. {cocoindex-0.1.59 → cocoindex-0.1.61}/ruff.toml +0 -0
  191. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/duration.rs +0 -0
  192. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/field_attrs.rs +0 -0
  193. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/json_schema.rs +0 -0
  194. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/mod.rs +0 -0
  195. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/schema.rs +0 -0
  196. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/base/value.rs +0 -0
  197. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/analyzed_flow.rs +0 -0
  198. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/exec_ctx.rs +0 -0
  199. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/builder/mod.rs +0 -0
  200. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/db_tracking.rs +0 -0
  201. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/db_tracking_setup.rs +0 -0
  202. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/dumper.rs +0 -0
  203. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/evaluator.rs +0 -0
  204. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/indexing_status.rs +0 -0
  205. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/live_updater.rs +0 -0
  206. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/memoization.rs +0 -0
  207. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/mod.rs +0 -0
  208. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/row_indexer.rs +0 -0
  209. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/execution/stats.rs +0 -0
  210. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/lib.rs +0 -0
  211. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/lib_context.rs +0 -0
  212. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/anthropic.rs +0 -0
  213. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/gemini.rs +0 -0
  214. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/litellm.rs +0 -0
  215. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/mod.rs +0 -0
  216. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/ollama.rs +0 -0
  217. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/openai.rs +0 -0
  218. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/openrouter.rs +0 -0
  219. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/vllm.rs +0 -0
  220. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/llm/voyage.rs +0 -0
  221. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/factory_bases.rs +0 -0
  222. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/functions/embed_text.rs +0 -0
  223. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/functions/extract_by_llm.rs +0 -0
  224. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/functions/mod.rs +0 -0
  225. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/functions/parse_json.rs +0 -0
  226. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/interface.rs +0 -0
  227. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/mod.rs +0 -0
  228. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/py_factory.rs +0 -0
  229. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/registration.rs +0 -0
  230. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/registry.rs +0 -0
  231. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/sdk.rs +0 -0
  232. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/sources/amazon_s3.rs +0 -0
  233. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/sources/google_drive.rs +0 -0
  234. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/sources/local_file.rs +0 -0
  235. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/sources/mod.rs +0 -0
  236. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/kuzu.rs +0 -0
  237. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/mod.rs +0 -0
  238. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/neo4j.rs +0 -0
  239. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/postgres.rs +0 -0
  240. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/qdrant.rs +0 -0
  241. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/shared/mod.rs +0 -0
  242. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/shared/property_graph.rs +0 -0
  243. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/ops/targets/shared/table_columns.rs +0 -0
  244. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/prelude.rs +0 -0
  245. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/py/mod.rs +0 -0
  246. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/server.rs +0 -0
  247. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/service/error.rs +0 -0
  248. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/service/flows.rs +0 -0
  249. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/service/mod.rs +0 -0
  250. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/settings.rs +0 -0
  251. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/auth_registry.rs +0 -0
  252. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/components.rs +0 -0
  253. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/db_metadata.rs +0 -0
  254. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/driver.rs +0 -0
  255. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/mod.rs +0 -0
  256. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/setup/states.rs +0 -0
  257. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/db.rs +0 -0
  258. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/fingerprint.rs +0 -0
  259. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/immutable.rs +0 -0
  260. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/retryable.rs +0 -0
  261. {cocoindex-0.1.59 → cocoindex-0.1.61}/src/utils/yaml_ser.rs +0 -0
@@ -18,3 +18,5 @@ dist/
18
18
 
19
19
  # Output of `cocoindex eval`
20
20
  examples/**/eval_*
21
+
22
+ /.vscode
@@ -1040,7 +1040,7 @@ dependencies = [
1040
1040
 
1041
1041
  [[package]]
1042
1042
  name = "cocoindex"
1043
- version = "0.1.59"
1043
+ version = "0.1.61"
1044
1044
  dependencies = [
1045
1045
  "anyhow",
1046
1046
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.59"
5
+ version = "0.1.61"
6
6
  edition = "2024"
7
7
  rust-version = "1.86"
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.59
3
+ Version: 0.1.61
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -47,12 +47,6 @@ CocoIndex CLI supports the following global options:
47
47
  * `--version`: Show the CocoIndex version and exit.
48
48
  * `--help`: Show the main help message and exit.
49
49
 
50
- :::caution Deprecated Usage
51
-
52
- The old method of invoking the CLI using `python main.py cocoindex ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described.
53
-
54
- :::
55
-
56
50
  ## Subcommands
57
51
 
58
52
  The following subcommands are available:
@@ -0,0 +1,188 @@
1
+ ---
2
+ title: Data Types
3
+ description: Data Types in CocoIndex
4
+ toc_max_heading_level: 4
5
+ ---
6
+
7
+ # Data Types in CocoIndex
8
+
9
+ In CocoIndex, all data processed by the flow have a type determined when the flow is defined, before any actual data is processed at runtime.
10
+
11
+ This makes schema of data processed by CocoIndex clear, and easily determine the schema of your index.
12
+
13
+ ## Data Types
14
+
15
+ As an engine written in Rust, designed to be used in different languages and data are always serializable, CocoIndex defines a type system independent of any specific programming language.
16
+
17
+ CocoIndex automatically infers data types of the output created by CocoIndex sources and functions.
18
+ You don't need to spell out any data type explicitly when you define the flow.
19
+ All you need to do is to make sure the data passed to functions and targets are compatible with them.
20
+
21
+ Each type in CocoIndex type system is mapped to one or multiple types in Python.
22
+ When you define a [custom function](/docs/core/custom_function), you need to annotate the data types of arguments and return values.
23
+
24
+ * For return values, type annotation is required. Because this provides the ground truth to define the type of the output of the custom function.
25
+ * For arguments, type annotation is only used to enable the conversion from data values already existing in CocoIndex engine to Python value.
26
+ Type annotation is optional for basic types. When not specified, CocoIndex will use the *default Python type* for the argument.
27
+ Type annotation is required for arguments of struct types and table types.
28
+
29
+ ### Basic Types
30
+
31
+ #### Primitive Types
32
+
33
+ Primitive types are basic types that are not composed of other types.
34
+ This is the list of all primitive types supported by CocoIndex:
35
+
36
+ | CocoIndex Type | Python Types | Convertible to | Explanation |
37
+ |------|-------------|--------------|----------------|
38
+ | *Bytes* | `bytes` | | |
39
+ | *Str* | `str` | | |
40
+ | *Bool* | `bool` | | |
41
+ | *Int64* | `cocoindex.Int64`, `int`, `numpy.int64` | | |
42
+ | *Float32* | `cocoindex.Float32`, `numpy.float32` | *Float64* | |
43
+ | *Float64* | `cocoindex.Float64`, `float`, `numpy.float64` | | |
44
+ | *Range* | `cocoindex.Range` | | |
45
+ | *Uuid* | `uuid.UUId` | | |
46
+ | *Date* | `datetime.date` | | |
47
+ | *Time* | `datetime.time` | | |
48
+ | *LocalDatetime* | `cocoindex.LocalDateTime` | *OffsetDatetime* | without timezone |
49
+ | *OffsetDatetime* | `cocoindex.OffsetDateTime`, `datetime.datetime` | | with timezone |
50
+ | *TimeDelta* | `datetime.timedelta` | | |
51
+
52
+ Notes:
53
+
54
+ * For some CocoIndex types, we support multiple Python types. You can annotate with any of these Python types.
55
+ The first one is the *default Python type*, which means CocoIndex will create a value with this type when you don't annotate the type in function arguments.
56
+
57
+ * All Python types starting with `cocoindex.` are type aliases exported by CocoIndex. They're annotated types based on certain Python types:
58
+
59
+ * `cocoindex.Int64`: `int`
60
+ * `cocoindex.Float64`: `float`
61
+ * `cocoindex.Float32`: `float`
62
+ * `cocoindex.Range`: `tuple[int, int]`, i.e. a start offset (inclusive) and an end offset (exclusive)
63
+ * `cocoindex.OffsetDateTime`: `datetime.datetime`
64
+ * `cocoindex.LocalDateTime`: `datetime.datetime`
65
+
66
+ These aliases provide a non-ambiguous way to represent a specific type in CocoIndex, given their base Python types can represent a superset of possible values.
67
+
68
+ * When we say a CocoIndex type is *convertible to* another type, it means Python types for the second type can be also used to bind to a value of the first type.
69
+ For example, *Float32* is convertible to *Float64*, so you can bind a value of *Float32* to a Python value of `float` or `np.float64` types.
70
+ For *LocalDatetime*, when you use `cocoindex.OffsetDateTime` or `datetime.datetime` as the annotation to bind its value, the timezone will be set to UTC.
71
+
72
+
73
+ #### Json Type
74
+
75
+ *Json* type can hold any data convertible to JSON by `json` package.
76
+ In Python, it's represented by `cocoindex.Json`.
77
+ It's useful to hold data without fixed schema known at flow definition time.
78
+
79
+
80
+ #### Vector Types
81
+
82
+ A vector type is a collection of elements of the same basic type.
83
+ Optionally, it can have a fixed dimension. Noted as *Vector[Type]* or *Vector[Type, Dim]*, e.g. *Vector[Float32]* or *Vector[Float32, 384]*.
84
+
85
+ It supports the following Python types:
86
+
87
+ * `cocoindex.Vector[T]` or `cocoindex.Vector[T, typing.Literal[Dim]]`, e.g. `cocoindex.Vector[cocoindex.Float32]` or `cocoindex.Vector[cocoindex.Float32, 384]`
88
+ * The underlying Python type is `numpy.typing.NDArray[T]` where `T` is a numpy numeric type (`numpy.int64`, `numpy.float32` or `numpy.float64`), or `list[T]` otherwise
89
+ * `numpy.typing.NDArray[T]` where `T` is a numpy numeric type
90
+ * `list[T]`
91
+
92
+
93
+ #### Union Types
94
+
95
+ A union type is a type that can represent values in one of multiple basic types.
96
+ Noted as *Type1* | *Type2* | ..., e.g. *Int64* | *Float32* | *Float64*.
97
+
98
+ The Python type is `T1 | T2 | ...`, e.g. `cocoindex.Int64 | cocoindex.Float32 | cocoindex.Float64`, `int | float` (equivalent to `cocoindex.Int64 | cocoindex.Float64`)
99
+
100
+
101
+ ### Struct Types
102
+
103
+ A *Struct* has a bunch of fields, each with a name and a type.
104
+
105
+ In Python, a *Struct* type is represented by either a [dataclass](https://docs.python.org/3/library/dataclasses.html)
106
+ or a [NamedTuple](https://docs.python.org/3/library/typing.html#typing.NamedTuple), with all fields annotated with a specific type.
107
+ Both options define a structured type with named fields, but they differ slightly:
108
+
109
+ - **Dataclass**: A flexible class-based structure, mutable by default, defined using the `@dataclass` decorator.
110
+ - **NamedTuple**: An immutable tuple-based structure, defined using `typing.NamedTuple`.
111
+
112
+ For example:
113
+
114
+ ```python
115
+ from dataclasses import dataclass
116
+ from typing import NamedTuple
117
+ import datetime
118
+
119
+ # Using dataclass
120
+ @dataclass
121
+ class Person:
122
+ first_name: str
123
+ last_name: str
124
+ dob: datetime.date
125
+
126
+ # Using NamedTuple
127
+ class PersonTuple(NamedTuple):
128
+ first_name: str
129
+ last_name: str
130
+ dob: datetime.date
131
+ ```
132
+
133
+ Both `Person` and `PersonTuple` are valid Struct types in CocoIndex, with identical schemas (three fields: `first_name` (Str), `last_name` (Str), `dob` (Date)).
134
+ Choose `dataclass` for mutable objects or when you need additional methods, and `NamedTuple` for immutable, lightweight structures.
135
+
136
+ ### Table Types
137
+
138
+ A *Table* type models a collection of rows, each with multiple columns.
139
+ Each column of a table has a specific type.
140
+
141
+ We have two specific types of *Table* types: *KTable* and *LTable*.
142
+
143
+ #### KTable
144
+
145
+ *KTable* is a *Table* type whose first column serves as the key.
146
+ The row order of a *KTable* is not preserved.
147
+ Type of the first column (key column) must be a [key type](#key-types).
148
+
149
+ In Python, a *KTable* type is represented by `dict[K, V]`.
150
+ The `V` should be a *Struct* type, either a `dataclass` or `NamedTuple`, representing the value fields of each row.
151
+ For example, you can use `dict[str, Person]` or `dict[str, PersonTuple]` to represent a *KTable*, with 4 columns: key (*Str*), `first_name` (*Str*), `last_name` (*Str*), `dob` (*Date*).
152
+
153
+ Note that if you want to use a *Struct* as the key, you need to ensure its value in Python is immutable. For `dataclass`, annotate it with `@dataclass(frozen=True)`. For `NamedTuple`, immutability is built-in. For example:
154
+ For example:
155
+
156
+ ```python
157
+ @dataclass(frozen=True)
158
+ class PersonKey:
159
+ id_kind: str
160
+ id: str
161
+
162
+ class PersonKeyTuple(NamedTuple):
163
+ id_kind: str
164
+ id: str
165
+ ```
166
+
167
+ Then you can use `dict[PersonKey, Person]` or `dict[PersonKeyTuple, PersonTuple]` to represent a KTable keyed by `PersonKey` or `PersonKeyTuple`.
168
+
169
+
170
+ #### LTable
171
+
172
+ *LTable* is a *Table* type whose row order is preserved. *LTable* has no key column.
173
+
174
+ In Python, a *LTable* type is represented by `list[R]`, where `R` is a dataclass representing a row.
175
+ For example, you can use `list[Person]` to represent a *LTable* with 3 columns: `first_name` (*Str*), `last_name` (*Str*), `dob` (*Date*).
176
+
177
+ ## Key Types
178
+
179
+ Currently, the following types are key types
180
+
181
+ - *Bytes*
182
+ - *Str*
183
+ - *Bool*
184
+ - *Int64*
185
+ - *Range*
186
+ - *Uuid*
187
+ - *Date*
188
+ - *Struct* with all fields being key types (using `@dataclass(frozen=True)` or `NamedTuple`)
@@ -12,12 +12,21 @@ from .flow import flow_def
12
12
  from .flow import EvaluateAndDumpOptions, GeneratedField
13
13
  from .flow import FlowLiveUpdater, FlowLiveUpdaterOptions
14
14
  from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
15
- from .lib import init, start_server, stop, main_fn
15
+ from .lib import init, start_server, stop
16
16
  from .llm import LlmSpec, LlmApiType
17
17
  from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
18
18
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
19
19
  from .setting import get_app_namespace
20
- from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
20
+ from .typing import (
21
+ Int64,
22
+ Float32,
23
+ Float64,
24
+ LocalDateTime,
25
+ OffsetDateTime,
26
+ Range,
27
+ Vector,
28
+ Json,
29
+ )
21
30
 
22
31
  __all__ = [
23
32
  # Submodules
@@ -50,7 +59,6 @@ __all__ = [
50
59
  "init",
51
60
  "start_server",
52
61
  "stop",
53
- "main_fn",
54
62
  # LLM
55
63
  "LlmSpec",
56
64
  "LlmApiType",
@@ -64,6 +72,7 @@ __all__ = [
64
72
  "ServerSettings",
65
73
  "get_app_namespace",
66
74
  # Typing
75
+ "Int64",
67
76
  "Float32",
68
77
  "Float64",
69
78
  "LocalDateTime",
@@ -416,6 +416,11 @@ class _SourceRefreshOptions:
416
416
  refresh_interval: datetime.timedelta | None = None
417
417
 
418
418
 
419
+ @dataclass
420
+ class _ExecutionOptions:
421
+ max_inflight_count: int | None = None
422
+
423
+
419
424
  class FlowBuilder:
420
425
  """
421
426
  A flow builder is used to build a flow.
@@ -439,6 +444,7 @@ class FlowBuilder:
439
444
  *,
440
445
  name: str | None = None,
441
446
  refresh_interval: datetime.timedelta | None = None,
447
+ max_inflight_count: int | None = None,
442
448
  ) -> DataSlice[T]:
443
449
  """
444
450
  Import a source to the flow.
@@ -454,9 +460,12 @@ class FlowBuilder:
454
460
  self._state.field_name_builder.build_name(
455
461
  name, prefix=_to_snake_case(_spec_kind(spec)) + "_"
456
462
  ),
457
- dump_engine_object(
463
+ refresh_options=dump_engine_object(
458
464
  _SourceRefreshOptions(refresh_interval=refresh_interval)
459
465
  ),
466
+ execution_options=dump_engine_object(
467
+ _ExecutionOptions(max_inflight_count=max_inflight_count)
468
+ ),
460
469
  ),
461
470
  name,
462
471
  )
@@ -0,0 +1,32 @@
1
+ """
2
+ Library level functions and states.
3
+ """
4
+
5
+ import warnings
6
+ from typing import Callable, Any
7
+
8
+ from . import _engine # type: ignore
9
+ from . import flow, setting
10
+ from .convert import dump_engine_object
11
+
12
+
13
+ def init(settings: setting.Settings | None = None) -> None:
14
+ """
15
+ Initialize the cocoindex library.
16
+
17
+ If the settings are not provided, they are loaded from the environment variables.
18
+ """
19
+ settings = settings or setting.Settings.from_env()
20
+ _engine.init(dump_engine_object(settings))
21
+ setting.set_app_namespace(settings.app_namespace)
22
+
23
+
24
+ def start_server(settings: setting.ServerSettings) -> None:
25
+ """Start the cocoindex server."""
26
+ flow.ensure_all_flows_built()
27
+ _engine.start_server(settings.__dict__)
28
+
29
+
30
+ def stop() -> None:
31
+ """Stop the cocoindex library."""
32
+ _engine.stop()
@@ -78,16 +78,15 @@ def build_engine_value_decoder(
78
78
  return make_engine_value_decoder([], engine_type, python_type or engine_type_in_py)
79
79
 
80
80
 
81
- def validate_full_roundtrip(
81
+ def validate_full_roundtrip_to(
82
82
  value: Any,
83
- value_type: Any = None,
84
- *other_decoded_values: tuple[Any, Any],
83
+ value_type: Any,
84
+ *decoded_values: tuple[Any, Any],
85
85
  ) -> None:
86
86
  """
87
- Validate the given value doesn't change after encoding, sending to engine (using output_type), receiving back and decoding (using input_type).
87
+ Validate the given value becomes specific values after encoding, sending to engine (using output_type), receiving back and decoding (using input_type).
88
88
 
89
- `other_decoded_values` is a tuple of (value, type) pairs.
90
- If provided, also validate the value can be decoded to the other types.
89
+ `decoded_values` is a tuple of (value, type) pairs.
91
90
  """
92
91
  from cocoindex import _engine # type: ignore
93
92
 
@@ -102,15 +101,27 @@ def validate_full_roundtrip(
102
101
  value_from_engine = _engine.testutil.seder_roundtrip(
103
102
  encoded_value, encoded_output_type
104
103
  )
105
- decoder = make_engine_value_decoder([], encoded_output_type, value_type)
106
- decoded_value = decoder(value_from_engine)
107
- assert eq(decoded_value, value), f"{decoded_value} != {value}"
108
104
 
109
- if other_decoded_values is not None:
110
- for other_value, other_type in other_decoded_values:
111
- decoder = make_engine_value_decoder([], encoded_output_type, other_type)
112
- other_decoded_value = decoder(value_from_engine)
113
- assert eq(other_decoded_value, other_value)
105
+ for other_value, other_type in decoded_values:
106
+ decoder = make_engine_value_decoder([], encoded_output_type, other_type)
107
+ other_decoded_value = decoder(value_from_engine)
108
+ assert eq(other_decoded_value, other_value)
109
+
110
+
111
+ def validate_full_roundtrip(
112
+ value: Any,
113
+ value_type: Any,
114
+ *other_decoded_values: tuple[Any, Any],
115
+ ) -> None:
116
+ """
117
+ Validate the given value doesn't change after encoding, sending to engine (using output_type), receiving back and decoding (using input_type).
118
+
119
+ `other_decoded_values` is a tuple of (value, type) pairs.
120
+ If provided, also validate the value can be decoded to the other types.
121
+ """
122
+ validate_full_roundtrip_to(
123
+ value, value_type, (value, value_type), *other_decoded_values
124
+ )
114
125
 
115
126
 
116
127
  def test_encode_engine_value_basic_types() -> None:
@@ -218,17 +229,33 @@ def test_encode_engine_value_none() -> None:
218
229
 
219
230
 
220
231
  def test_roundtrip_basic_types() -> None:
221
- validate_full_roundtrip(42, int, (42, None))
222
- validate_full_roundtrip(3.25, float, (3.25, Float64))
223
232
  validate_full_roundtrip(
224
- 3.25, Float64, (3.25, float), (np.float64(3.25), np.float64)
233
+ 42, cocoindex.Int64, (42, int), (np.int64(42), np.int64), (42, None)
225
234
  )
235
+ validate_full_roundtrip(42, int, (42, cocoindex.Int64))
236
+ validate_full_roundtrip(np.int64(42), np.int64, (42, cocoindex.Int64))
237
+
226
238
  validate_full_roundtrip(
227
- 3.25, Float32, (3.25, float), (np.float32(3.25), np.float32)
239
+ 3.25, Float64, (3.25, float), (np.float64(3.25), np.float64), (3.25, None)
228
240
  )
241
+ validate_full_roundtrip(3.25, float, (3.25, Float64))
242
+ validate_full_roundtrip(np.float64(3.25), np.float64, (3.25, Float64))
243
+
244
+ validate_full_roundtrip(
245
+ 3.25,
246
+ Float32,
247
+ (3.25, float),
248
+ (np.float32(3.25), np.float32),
249
+ (np.float64(3.25), np.float64),
250
+ (3.25, Float64),
251
+ (3.25, None),
252
+ )
253
+ validate_full_roundtrip(np.float32(3.25), np.float32, (3.25, Float32))
254
+
229
255
  validate_full_roundtrip("hello", str, ("hello", None))
230
256
  validate_full_roundtrip(True, bool, (True, None))
231
257
  validate_full_roundtrip(False, bool, (False, None))
258
+ validate_full_roundtrip((1, 2), cocoindex.Range, ((1, 2), None))
232
259
  validate_full_roundtrip(
233
260
  datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1), None)
234
261
  )
@@ -238,14 +265,37 @@ def test_roundtrip_basic_types() -> None:
238
265
  cocoindex.LocalDateTime,
239
266
  (datetime.datetime(2025, 1, 2, 3, 4, 5, 123456), datetime.datetime),
240
267
  )
268
+
269
+ tz = datetime.timezone(datetime.timedelta(hours=5))
241
270
  validate_full_roundtrip(
242
- datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
271
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, tz),
272
+ cocoindex.OffsetDateTime,
273
+ (
274
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, tz),
275
+ datetime.datetime,
276
+ ),
277
+ )
278
+ validate_full_roundtrip(
279
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, tz),
280
+ datetime.datetime,
281
+ (datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, tz), cocoindex.OffsetDateTime),
282
+ )
283
+ validate_full_roundtrip_to(
284
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456),
243
285
  cocoindex.OffsetDateTime,
244
286
  (
245
287
  datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
246
288
  datetime.datetime,
247
289
  ),
248
290
  )
291
+ validate_full_roundtrip_to(
292
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456),
293
+ datetime.datetime,
294
+ (
295
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
296
+ cocoindex.OffsetDateTime,
297
+ ),
298
+ )
249
299
 
250
300
  uuid_value = uuid.uuid4()
251
301
  validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value, None))
@@ -40,6 +40,7 @@ class TypeAttr:
40
40
 
41
41
  Annotation = TypeKind | TypeAttr | VectorInfo
42
42
 
43
+ Int64 = Annotated[int, TypeKind("Int64")]
43
44
  Float32 = Annotated[float, TypeKind("Float32")]
44
45
  Float64 = Annotated[float, TypeKind("Float64")]
45
46
  Range = Annotated[tuple[int, int], TypeKind("Range")]
@@ -253,6 +253,11 @@ impl SpecFormatter for OpSpec {
253
253
  }
254
254
  }
255
255
 
256
+ #[derive(Debug, Clone, Serialize, Deserialize, Default)]
257
+ pub struct ExecutionOptions {
258
+ pub max_inflight_count: Option<u32>,
259
+ }
260
+
256
261
  #[derive(Debug, Clone, Serialize, Deserialize, Default)]
257
262
  pub struct SourceRefreshOptions {
258
263
  pub refresh_interval: Option<std::time::Duration>,
@@ -274,6 +279,9 @@ pub struct ImportOpSpec {
274
279
 
275
280
  #[serde(default)]
276
281
  pub refresh_options: SourceRefreshOptions,
282
+
283
+ #[serde(default)]
284
+ pub execution_options: ExecutionOptions,
277
285
  }
278
286
 
279
287
  impl SpecFormatter for ImportOpSpec {
@@ -697,6 +697,9 @@ impl AnalyzerContext {
697
697
  primary_key_type,
698
698
  name: op_name,
699
699
  refresh_options: import_op.spec.refresh_options,
700
+ concurrency_controller: utils::ConcurrencyController::new(
701
+ import_op.spec.execution_options.max_inflight_count,
702
+ ),
700
703
  })
701
704
  };
702
705
  Ok(result_fut)
@@ -288,7 +288,7 @@ impl FlowBuilder {
288
288
  OpScopeRef(self.root_op_scope.clone())
289
289
  }
290
290
 
291
- #[pyo3(signature = (kind, op_spec, target_scope, name, refresh_options=None))]
291
+ #[pyo3(signature = (kind, op_spec, target_scope, name, refresh_options=None, execution_options=None))]
292
292
  pub fn add_source(
293
293
  &mut self,
294
294
  py: Python<'_>,
@@ -297,6 +297,7 @@ impl FlowBuilder {
297
297
  target_scope: Option<OpScopeRef>,
298
298
  name: String,
299
299
  refresh_options: Option<py::Pythonized<spec::SourceRefreshOptions>>,
300
+ execution_options: Option<py::Pythonized<spec::ExecutionOptions>>,
300
301
  ) -> PyResult<DataSlice> {
301
302
  if let Some(target_scope) = target_scope {
302
303
  if *target_scope != self.root_op_scope {
@@ -313,6 +314,9 @@ impl FlowBuilder {
313
314
  spec: op_spec.into_inner(),
314
315
  },
315
316
  refresh_options: refresh_options.map(|o| o.into_inner()).unwrap_or_default(),
317
+ execution_options: execution_options
318
+ .map(|o| o.into_inner())
319
+ .unwrap_or_default(),
316
320
  },
317
321
  };
318
322
  let analyzer_ctx = AnalyzerContext {
@@ -56,6 +56,7 @@ pub struct AnalyzedImportOp {
56
56
  pub output: AnalyzedOpOutput,
57
57
  pub primary_key_type: schema::ValueType,
58
58
  pub refresh_options: spec::SourceRefreshOptions,
59
+ pub concurrency_controller: utils::ConcurrencyController,
59
60
  }
60
61
 
61
62
  pub struct AnalyzedFunctionExecInfo {
@@ -282,6 +282,7 @@ impl SourceIndexingContext {
282
282
  state.scan_generation
283
283
  };
284
284
  while let Some(row) = rows_stream.next().await {
285
+ let _ = import_op.concurrency_controller.acquire().await?;
285
286
  for row in row? {
286
287
  self.process_source_key_if_newer(
287
288
  row.key,
@@ -918,16 +918,14 @@ impl SimpleFunctionExecutor for Executor {
918
918
  let table = output
919
919
  .into_iter()
920
920
  .map(|chunk_output| {
921
+ let output_start = chunk_output.start_pos.output.unwrap();
922
+ let output_end = chunk_output.end_pos.output.unwrap();
921
923
  (
922
- RangeValue::new(
923
- chunk_output.start_pos.byte_offset,
924
- chunk_output.end_pos.byte_offset,
925
- )
926
- .into(),
924
+ RangeValue::new(output_start.char_offset, output_end.char_offset).into(),
927
925
  fields_value!(
928
926
  Arc::<str>::from(chunk_output.text),
929
- chunk_output.start_pos.output.unwrap().into_output(),
930
- chunk_output.end_pos.output.unwrap().into_output()
927
+ output_start.into_output(),
928
+ output_end.into_output()
931
929
  )
932
930
  .into(),
933
931
  )
@@ -1,3 +1,5 @@
1
+ use crate::prelude::*;
2
+
1
3
  use bytes::Bytes;
2
4
  use numpy::{PyArray1, PyArrayDyn, PyArrayMethods};
3
5
  use pyo3::IntoPyObjectExt;
@@ -6,14 +8,10 @@ use pyo3::types::PyAny;
6
8
  use pyo3::types::{PyList, PyTuple};
7
9
  use pyo3::{exceptions::PyException, prelude::*};
8
10
  use pythonize::{depythonize, pythonize};
9
- use serde::Serialize;
10
11
  use serde::de::DeserializeOwned;
11
- use std::collections::BTreeMap;
12
12
  use std::ops::Deref;
13
- use std::sync::Arc;
14
13
 
15
14
  use super::IntoPyResult;
16
- use crate::base::{schema, value};
17
15
 
18
16
  #[derive(Debug)]
19
17
  pub struct Pythonized<T>(pub T);
@@ -143,7 +141,23 @@ fn basic_value_from_py_object<'py>(
143
141
  value::BasicValue::LocalDateTime(v.extract::<chrono::NaiveDateTime>()?)
144
142
  }
145
143
  schema::BasicValueType::OffsetDateTime => {
146
- value::BasicValue::OffsetDateTime(v.extract::<chrono::DateTime<chrono::FixedOffset>>()?)
144
+ if v.getattr_opt("tzinfo")?
145
+ .ok_or_else(|| {
146
+ PyErr::new::<PyTypeError, _>(format!(
147
+ "expecting a datetime.datetime value, got {}",
148
+ v.get_type()
149
+ ))
150
+ })?
151
+ .is_none()
152
+ {
153
+ value::BasicValue::OffsetDateTime(
154
+ v.extract::<chrono::NaiveDateTime>()?.and_utc().into(),
155
+ )
156
+ } else {
157
+ value::BasicValue::OffsetDateTime(
158
+ v.extract::<chrono::DateTime<chrono::FixedOffset>>()?,
159
+ )
160
+ }
147
161
  }
148
162
  schema::BasicValueType::TimeDelta => {
149
163
  value::BasicValue::TimeDelta(v.extract::<chrono::TimeDelta>()?)
@@ -0,0 +1,30 @@
1
+ use crate::prelude::*;
2
+
3
+ use tokio::sync::{Semaphore, SemaphorePermit};
4
+
5
+ pub struct ConcurrencyController {
6
+ inflight_count_sem: Option<Semaphore>,
7
+ }
8
+
9
+ pub struct ConcurrencyControllerPermit<'a> {
10
+ _inflight_count_permit: Option<SemaphorePermit<'a>>,
11
+ }
12
+
13
+ impl ConcurrencyController {
14
+ pub fn new(max_inflight_count: Option<u32>) -> Self {
15
+ Self {
16
+ inflight_count_sem: max_inflight_count.map(|max| Semaphore::new(max as usize)),
17
+ }
18
+ }
19
+
20
+ pub async fn acquire<'a>(&'a self) -> Result<ConcurrencyControllerPermit<'a>> {
21
+ let inflight_count_permit = if let Some(sem) = &self.inflight_count_sem {
22
+ Some(sem.acquire().await?)
23
+ } else {
24
+ None
25
+ };
26
+ Ok(ConcurrencyControllerPermit {
27
+ _inflight_count_permit: inflight_count_permit,
28
+ })
29
+ }
30
+ }
@@ -3,3 +3,6 @@ pub mod fingerprint;
3
3
  pub mod immutable;
4
4
  pub mod retryable;
5
5
  pub mod yaml_ser;
6
+
7
+ mod concur_control;
8
+ pub use concur_control::ConcurrencyController;