cocoindex 0.1.49__tar.gz → 0.1.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. {cocoindex-0.1.49 → cocoindex-0.1.50}/Cargo.lock +79 -28
  2. {cocoindex-0.1.49 → cocoindex-0.1.50}/Cargo.toml +4 -3
  3. {cocoindex-0.1.49 → cocoindex-0.1.50}/PKG-INFO +1 -1
  4. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/data_types.mdx +3 -2
  5. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/overview.md +2 -0
  6. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/quickstart.md +11 -6
  7. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/query.mdx +3 -3
  8. cocoindex-0.1.50/docs/static/img/incremental-etl.gif +0 -0
  9. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/main.py +16 -10
  10. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/Text_Embedding.ipynb +8 -4
  11. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/main.py +6 -2
  12. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/pyproject.toml +2 -0
  13. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/__init__.py +1 -1
  14. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/convert.py +36 -0
  15. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/functions.py +6 -4
  16. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/lib.py +1 -2
  17. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/tests/test_convert.py +280 -52
  18. cocoindex-0.1.50/python/cocoindex/tests/test_typing.py +499 -0
  19. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/typing.py +88 -13
  20. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/value.rs +10 -0
  21. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/analyzer.rs +3 -4
  22. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/plan.rs +0 -1
  23. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/mod.rs +0 -1
  24. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/lib_context.rs +2 -18
  25. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/factory_bases.rs +3 -12
  26. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/interface.rs +2 -65
  27. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/py_factory.rs +4 -5
  28. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/google_drive.rs +31 -46
  29. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/kuzu.rs +1 -7
  30. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/neo4j.rs +7 -8
  31. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/postgres.rs +5 -197
  32. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/qdrant.rs +13 -42
  33. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/py/mod.rs +16 -81
  34. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/server.rs +8 -12
  35. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/error.rs +12 -4
  36. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/flows.rs +26 -7
  37. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/service/mod.rs +0 -1
  38. cocoindex-0.1.49/python/cocoindex/query.py +0 -115
  39. cocoindex-0.1.49/src/execution/query.rs +0 -124
  40. cocoindex-0.1.49/src/service/search.rs +0 -58
  41. {cocoindex-0.1.49 → cocoindex-0.1.50}/.cargo/config.toml +0 -0
  42. {cocoindex-0.1.49 → cocoindex-0.1.50}/.env.lib_debug +0 -0
  43. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  44. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  45. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/scripts/update_version.sh +0 -0
  46. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/CI.yml +0 -0
  47. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/_test.yml +0 -0
  48. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/docs.yml +0 -0
  49. {cocoindex-0.1.49 → cocoindex-0.1.50}/.github/workflows/release.yml +0 -0
  50. {cocoindex-0.1.49 → cocoindex-0.1.50}/.gitignore +0 -0
  51. {cocoindex-0.1.49 → cocoindex-0.1.50}/.vscode/settings.json +0 -0
  52. {cocoindex-0.1.49 → cocoindex-0.1.50}/CODE_OF_CONDUCT.md +0 -0
  53. {cocoindex-0.1.49 → cocoindex-0.1.50}/CONTRIBUTING.md +0 -0
  54. {cocoindex-0.1.49 → cocoindex-0.1.50}/LICENSE +0 -0
  55. {cocoindex-0.1.49 → cocoindex-0.1.50}/README.md +0 -0
  56. {cocoindex-0.1.49 → cocoindex-0.1.50}/dev/neo4j.yaml +0 -0
  57. {cocoindex-0.1.49 → cocoindex-0.1.50}/dev/postgres.yaml +0 -0
  58. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/.gitignore +0 -0
  59. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/README.md +0 -0
  60. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/about/community.md +0 -0
  61. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/about/contributing.md +0 -0
  62. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ai/llm.mdx +0 -0
  63. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/basics.md +0 -0
  64. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/cli.mdx +0 -0
  65. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/custom_function.mdx +0 -0
  66. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/data_example.svg +0 -0
  67. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_def.mdx +0 -0
  68. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_example.svg +0 -0
  69. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/flow_methods.mdx +0 -0
  70. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/core/settings.mdx +0 -0
  71. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/installation.md +0 -0
  72. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/getting_started/markdown_files.zip +0 -0
  73. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/functions.md +0 -0
  74. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/sources.md +0 -0
  75. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docs/ops/storages.md +0 -0
  76. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/docusaurus.config.ts +0 -0
  77. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/package.json +0 -0
  78. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/sidebars.ts +0 -0
  79. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  80. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  81. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/css/custom.css +0 -0
  82. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/src/theme/Root.js +0 -0
  83. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/.nojekyll +0 -0
  84. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/docusaurus.png +0 -0
  85. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/favicon.ico +0 -0
  86. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/img/icon.svg +0 -0
  87. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/static/robots.txt +0 -0
  88. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/tsconfig.json +0 -0
  89. {cocoindex-0.1.49 → cocoindex-0.1.50}/docs/yarn.lock +0 -0
  90. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.env.example +0 -0
  91. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.gitignore +0 -0
  92. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/README.md +0 -0
  93. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/main.py +0 -0
  94. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  95. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/.env +0 -0
  96. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/README.md +0 -0
  97. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/main.py +0 -0
  98. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/code_embedding/pyproject.toml +0 -0
  99. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/.env +0 -0
  100. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/README.md +0 -0
  101. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/main.py +0 -0
  102. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  103. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/.dockerignore +0 -0
  104. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/.env +0 -0
  105. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/README.md +0 -0
  106. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/compose.yaml +0 -0
  107. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/dockerfile +0 -0
  108. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  109. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/fastapi_server_docker/requirements.txt +0 -0
  110. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.env.example +0 -0
  111. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.gitignore +0 -0
  112. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/README.md +0 -0
  113. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/main.py +0 -0
  114. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  115. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/.env +0 -0
  116. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/README.md +0 -0
  117. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/.gitignore +0 -0
  118. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/index.html +0 -0
  119. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/package-lock.json +0 -0
  120. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/package.json +0 -0
  121. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/App.jsx +0 -0
  122. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/main.jsx +0 -0
  123. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/src/style.css +0 -0
  124. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/frontend/vite.config.js +0 -0
  125. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/cat1.jpeg +0 -0
  126. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/dog1.jpeg +0 -0
  127. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/elephant1.jpg +0 -0
  128. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/img/giraffe.jpg +0 -0
  129. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/main.py +0 -0
  130. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/pyproject.toml +0 -0
  131. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/image_search/requirements.txt +0 -0
  132. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/.env +0 -0
  133. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/README.md +0 -0
  134. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/main.py +0 -0
  135. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  136. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  137. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  138. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  139. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  140. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/.env +0 -0
  141. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/README.md +0 -0
  142. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/main.py +0 -0
  143. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  144. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  145. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  146. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/pdf_embedding/pyproject.toml +0 -0
  147. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/.env +0 -0
  148. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/README.md +0 -0
  149. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/img/cocoinsight.png +0 -0
  150. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/img/neo4j.png +0 -0
  151. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/main.py +0 -0
  152. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p1.json +0 -0
  153. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p2.json +0 -0
  154. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p3.json +0 -0
  155. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p4.json +0 -0
  156. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p5.json +0 -0
  157. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p6.json +0 -0
  158. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p7.json +0 -0
  159. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p8.json +0 -0
  160. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/products/p9.json +0 -0
  161. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/product_recommendation/pyproject.toml +0 -0
  162. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/.env +0 -0
  163. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/README.md +0 -0
  164. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  165. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  166. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  167. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/.env +0 -0
  168. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/README.md +0 -0
  169. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/main.py +0 -0
  170. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  171. {cocoindex-0.1.49 → cocoindex-0.1.50}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  172. {cocoindex-0.1.49 → cocoindex-0.1.50}/pyproject.toml +0 -0
  173. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/auth_registry.py +0 -0
  174. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/cli.py +0 -0
  175. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/flow.py +0 -0
  176. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/index.py +0 -0
  177. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/llm.py +0 -0
  178. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/op.py +0 -0
  179. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/py.typed +0 -0
  180. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/runtime.py +0 -0
  181. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/setting.py +0 -0
  182. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/setup.py +0 -0
  183. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/sources.py +0 -0
  184. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/storages.py +0 -0
  185. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/tests/__init__.py +0 -0
  186. {cocoindex-0.1.49 → cocoindex-0.1.50}/python/cocoindex/utils.py +0 -0
  187. {cocoindex-0.1.49 → cocoindex-0.1.50}/ruff.toml +0 -0
  188. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/duration.rs +0 -0
  189. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/field_attrs.rs +0 -0
  190. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/json_schema.rs +0 -0
  191. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/mod.rs +0 -0
  192. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/schema.rs +0 -0
  193. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/base/spec.rs +0 -0
  194. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/analyzed_flow.rs +0 -0
  195. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/flow_builder.rs +0 -0
  196. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/builder/mod.rs +0 -0
  197. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/db_tracking.rs +0 -0
  198. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/db_tracking_setup.rs +0 -0
  199. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/dumper.rs +0 -0
  200. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/evaluator.rs +0 -0
  201. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/indexing_status.rs +0 -0
  202. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/live_updater.rs +0 -0
  203. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/memoization.rs +0 -0
  204. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/row_indexer.rs +0 -0
  205. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/source_indexer.rs +0 -0
  206. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/execution/stats.rs +0 -0
  207. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/lib.rs +0 -0
  208. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/anthropic.rs +0 -0
  209. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/gemini.rs +0 -0
  210. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/mod.rs +0 -0
  211. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/ollama.rs +0 -0
  212. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/llm/openai.rs +0 -0
  213. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/extract_by_llm.rs +0 -0
  214. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/mod.rs +0 -0
  215. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/parse_json.rs +0 -0
  216. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/functions/split_recursively.rs +0 -0
  217. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/mod.rs +0 -0
  218. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/registration.rs +0 -0
  219. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/registry.rs +0 -0
  220. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sdk.rs +0 -0
  221. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/amazon_s3.rs +0 -0
  222. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/local_file.rs +0 -0
  223. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/sources/mod.rs +0 -0
  224. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/mod.rs +0 -0
  225. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/mod.rs +0 -0
  226. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/property_graph.rs +0 -0
  227. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/ops/storages/shared/table_columns.rs +0 -0
  228. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/prelude.rs +0 -0
  229. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/py/convert.rs +0 -0
  230. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/settings.rs +0 -0
  231. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/auth_registry.rs +0 -0
  232. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/components.rs +0 -0
  233. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/db_metadata.rs +0 -0
  234. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/driver.rs +0 -0
  235. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/mod.rs +0 -0
  236. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/setup/states.rs +0 -0
  237. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/db.rs +0 -0
  238. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/fingerprint.rs +0 -0
  239. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/immutable.rs +0 -0
  240. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/mod.rs +0 -0
  241. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/retryable.rs +0 -0
  242. {cocoindex-0.1.49 → cocoindex-0.1.50}/src/utils/yaml_ser.rs +0 -0
@@ -676,16 +676,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
676
676
  checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
677
677
  dependencies = [
678
678
  "async-trait",
679
- "axum-core",
679
+ "axum-core 0.4.5",
680
680
  "bytes",
681
681
  "futures-util",
682
682
  "http 1.3.1",
683
683
  "http-body 1.0.1",
684
684
  "http-body-util",
685
+ "itoa",
686
+ "matchit 0.7.3",
687
+ "memchr",
688
+ "mime",
689
+ "percent-encoding",
690
+ "pin-project-lite",
691
+ "rustversion",
692
+ "serde",
693
+ "sync_wrapper",
694
+ "tower 0.5.2",
695
+ "tower-layer",
696
+ "tower-service",
697
+ ]
698
+
699
+ [[package]]
700
+ name = "axum"
701
+ version = "0.8.4"
702
+ source = "registry+https://github.com/rust-lang/crates.io-index"
703
+ checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
704
+ dependencies = [
705
+ "axum-core 0.5.2",
706
+ "bytes",
707
+ "form_urlencoded",
708
+ "futures-util",
709
+ "http 1.3.1",
710
+ "http-body 1.0.1",
711
+ "http-body-util",
685
712
  "hyper 1.6.0",
686
713
  "hyper-util",
687
714
  "itoa",
688
- "matchit",
715
+ "matchit 0.8.4",
689
716
  "memchr",
690
717
  "mime",
691
718
  "percent-encoding",
@@ -721,28 +748,48 @@ dependencies = [
721
748
  "sync_wrapper",
722
749
  "tower-layer",
723
750
  "tower-service",
751
+ ]
752
+
753
+ [[package]]
754
+ name = "axum-core"
755
+ version = "0.5.2"
756
+ source = "registry+https://github.com/rust-lang/crates.io-index"
757
+ checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
758
+ dependencies = [
759
+ "bytes",
760
+ "futures-core",
761
+ "http 1.3.1",
762
+ "http-body 1.0.1",
763
+ "http-body-util",
764
+ "mime",
765
+ "pin-project-lite",
766
+ "rustversion",
767
+ "sync_wrapper",
768
+ "tower-layer",
769
+ "tower-service",
724
770
  "tracing",
725
771
  ]
726
772
 
727
773
  [[package]]
728
774
  name = "axum-extra"
729
- version = "0.9.6"
775
+ version = "0.10.1"
730
776
  source = "registry+https://github.com/rust-lang/crates.io-index"
731
- checksum = "c794b30c904f0a1c2fb7740f7df7f7972dfaa14ef6f57cb6178dc63e5dca2f04"
777
+ checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d"
732
778
  dependencies = [
733
- "axum",
734
- "axum-core",
779
+ "axum 0.8.4",
780
+ "axum-core 0.5.2",
735
781
  "bytes",
736
- "fastrand",
782
+ "form_urlencoded",
737
783
  "futures-util",
738
784
  "http 1.3.1",
739
785
  "http-body 1.0.1",
740
786
  "http-body-util",
741
787
  "mime",
742
- "multer",
743
788
  "pin-project-lite",
789
+ "rustversion",
744
790
  "serde",
745
791
  "serde_html_form",
792
+ "serde_path_to_error",
746
793
  "tower 0.5.2",
747
794
  "tower-layer",
748
795
  "tower-service",
@@ -993,7 +1040,7 @@ dependencies = [
993
1040
 
994
1041
  [[package]]
995
1042
  name = "cocoindex"
996
- version = "0.1.49"
1043
+ version = "0.1.50"
997
1044
  dependencies = [
998
1045
  "anyhow",
999
1046
  "async-openai",
@@ -1002,7 +1049,7 @@ dependencies = [
1002
1049
  "aws-config",
1003
1050
  "aws-sdk-s3",
1004
1051
  "aws-sdk-sqs",
1005
- "axum",
1052
+ "axum 0.8.4",
1006
1053
  "axum-extra",
1007
1054
  "base64 0.22.1",
1008
1055
  "blake2",
@@ -1028,6 +1075,7 @@ dependencies = [
1028
1075
  "neo4rs",
1029
1076
  "owo-colors",
1030
1077
  "pgvector",
1078
+ "phf",
1031
1079
  "pyo3",
1032
1080
  "pyo3-async-runtimes",
1033
1081
  "pythonize",
@@ -2610,6 +2658,12 @@ version = "0.7.3"
2610
2658
  source = "registry+https://github.com/rust-lang/crates.io-index"
2611
2659
  checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
2612
2660
 
2661
+ [[package]]
2662
+ name = "matchit"
2663
+ version = "0.8.4"
2664
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2665
+ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
2666
+
2613
2667
  [[package]]
2614
2668
  name = "md-5"
2615
2669
  version = "0.10.6"
@@ -2677,23 +2731,6 @@ dependencies = [
2677
2731
  "windows-sys 0.52.0",
2678
2732
  ]
2679
2733
 
2680
- [[package]]
2681
- name = "multer"
2682
- version = "3.1.0"
2683
- source = "registry+https://github.com/rust-lang/crates.io-index"
2684
- checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
2685
- dependencies = [
2686
- "bytes",
2687
- "encoding_rs",
2688
- "futures-util",
2689
- "http 1.3.1",
2690
- "httparse",
2691
- "memchr",
2692
- "mime",
2693
- "spin",
2694
- "version_check",
2695
- ]
2696
-
2697
2734
  [[package]]
2698
2735
  name = "neo4rs"
2699
2736
  version = "0.8.0"
@@ -2993,6 +3030,7 @@ version = "0.11.3"
2993
3030
  source = "registry+https://github.com/rust-lang/crates.io-index"
2994
3031
  checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
2995
3032
  dependencies = [
3033
+ "phf_macros",
2996
3034
  "phf_shared",
2997
3035
  ]
2998
3036
 
@@ -3016,6 +3054,19 @@ dependencies = [
3016
3054
  "rand 0.8.5",
3017
3055
  ]
3018
3056
 
3057
+ [[package]]
3058
+ name = "phf_macros"
3059
+ version = "0.11.3"
3060
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3061
+ checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
3062
+ dependencies = [
3063
+ "phf_generator",
3064
+ "phf_shared",
3065
+ "proc-macro2",
3066
+ "quote",
3067
+ "syn 2.0.101",
3068
+ ]
3069
+
3019
3070
  [[package]]
3020
3071
  name = "phf_shared"
3021
3072
  version = "0.11.3"
@@ -4653,7 +4704,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
4653
4704
  dependencies = [
4654
4705
  "async-stream",
4655
4706
  "async-trait",
4656
- "axum",
4707
+ "axum 0.7.9",
4657
4708
  "base64 0.22.1",
4658
4709
  "bytes",
4659
4710
  "flate2",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.49"
5
+ version = "0.1.50"
6
6
  edition = "2024"
7
7
 
8
8
  [profile.release]
@@ -20,8 +20,8 @@ pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
20
20
 
21
21
  anyhow = { version = "1.0.97", features = ["std"] }
22
22
  async-trait = "0.1.88"
23
- axum = "0.7.9"
24
- axum-extra = { version = "0.9.6", features = ["query"] }
23
+ axum = "0.8.4"
24
+ axum-extra = { version = "0.10.1", features = ["query"] }
25
25
  base64 = "0.22.1"
26
26
  chrono = "0.4.40"
27
27
  config = "0.14.1"
@@ -50,6 +50,7 @@ tower-http = { version = "0.6.2", features = ["cors", "trace"] }
50
50
  indexmap = { version = "2.8.0", features = ["serde"] }
51
51
  blake2 = "0.10.6"
52
52
  pgvector = { version = "0.4.0", features = ["sqlx"] }
53
+ phf = { version = "0.11.3", features = ["macros"] }
53
54
  indenter = "0.3.3"
54
55
  itertools = "0.14.0"
55
56
  derivative = "2.2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.49
3
+ Version: 0.1.50
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -36,16 +36,17 @@ This is the list of all basic types supported by CocoIndex:
36
36
  | LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
37
37
  | OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
38
38
  | TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
39
- | Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
40
39
  | Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
40
+ | Vector[*T*, *Dim*?] | *T* can be a basic type or a numeric type. *Dim* is a positive integer and optional. | `cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `numpy.typing.NDArray[T]` or `list[T]` |
41
41
 
42
42
  Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column).
43
43
  However, the underlying execution engine and some storage system (like Postgres) has finer distinctions for some types, specifically:
44
44
 
45
45
  * *Float32* and *Float64* for `float`, with different precision.
46
46
  * *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness.
47
- * *Vector* has optional dimension information.
48
47
  * *Range* and *Json* provide a clear tag for the type, to clearly distinguish the type in CocoIndex.
48
+ * *Vector* holds elements of type *T*. If *T* is numeric (e.g., `np.float32` or `np.float64`), it's represented as `NDArray[T]`; otherwise, as `list[T]`.
49
+ * *Vector* also has optional dimension information.
49
50
 
50
51
  The native Python type is always more permissive and can represent a superset of possible values.
51
52
  * Only when you annotate the return type of a custom function, you should use the specific type,
@@ -9,6 +9,8 @@ CocoIndex is an ultra-performant real-time data transformation framework for AI,
9
9
 
10
10
  As a data framework, CocoIndex takes it to the next level on data freshness. **Incremental processing** is one of the core values provided by CocoIndex.
11
11
 
12
+ ![Incremental Processing](/img/incremental-etl.gif)
13
+
12
14
  ## Programming Model
13
15
  CocoIndex follows the idea of [Dataflow programming](https://en.wikipedia.org/wiki/Dataflow_programming) model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
14
16
 
@@ -154,11 +154,11 @@ The goal of transforming your data is usually to query against it.
154
154
  Once you already have your index built, you can directly access the transformed data in the target database.
155
155
  CocoIndex also provides utilities for you to do this more seamlessly.
156
156
 
157
- In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) to connect to the database and run queries.
158
- Please make sure it's installed:
157
+ In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) along with pgvector to connect to the database and run queries on vector data.
158
+ Please make sure the required packages are installed:
159
159
 
160
160
  ```bash
161
- pip install psycopg[binary,pool]
161
+ pip install numpy psycopg[binary,pool] pgvector
162
162
  ```
163
163
 
164
164
  ### Step 4.1: Extract common transformations
@@ -169,8 +169,11 @@ i.e. they should use exactly the same embedding model and parameters.
169
169
  Let's extract that into a function:
170
170
 
171
171
  ```python title="quickstart.py"
172
+ from numpy.typing import NDArray
173
+ import numpy as np
174
+
172
175
  @cocoindex.transform_flow()
173
- def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
176
+ def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
174
177
  return text.transform(
175
178
  cocoindex.functions.SentenceTransformerEmbed(
176
179
  model="sentence-transformers/all-MiniLM-L6-v2"))
@@ -207,6 +210,7 @@ Now we can create a function to query the index upon a given input query:
207
210
 
208
211
  ```python title="quickstart.py"
209
212
  from psycopg_pool import ConnectionPool
213
+ from pgvector.psycopg import register_vector
210
214
 
211
215
  def search(pool: ConnectionPool, query: str, top_k: int = 5):
212
216
  # Get the table name, for the export target in the text_embedding_flow above.
@@ -215,9 +219,10 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
215
219
  query_vector = text_to_embedding.eval(query)
216
220
  # Run the query and get the results.
217
221
  with pool.connection() as conn:
222
+ register_vector(conn)
218
223
  with conn.cursor() as cur:
219
224
  cur.execute(f"""
220
- SELECT filename, text, embedding <=> %s::vector AS distance
225
+ SELECT filename, text, embedding <=> %s AS distance
221
226
  FROM {table_name} ORDER BY distance LIMIT %s
222
227
  """, (query_vector, top_k))
223
228
  return [
@@ -236,7 +241,7 @@ There're two CocoIndex-specific logic:
236
241
 
237
242
  2. Evaluate the transform flow defined above with the input query, to get the embedding.
238
243
  It's done by the `eval()` method of the transform flow `text_to_embedding`.
239
- The return type of this method is `list[float]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[list[float]]`).
244
+ The return type of this method is `NDArray[np.float32]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[NDArray[np.float32]]`).
240
245
 
241
246
  ### Step 4.3: Add the main script logic
242
247
 
@@ -41,7 +41,7 @@ The [quickstart](getting_started/quickstart#step-41-extract-common-transformatio
41
41
 
42
42
  ```python
43
43
  @cocoindex.transform_flow()
44
- def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
44
+ def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
45
45
  return text.transform(
46
46
  cocoindex.functions.SentenceTransformerEmbed(
47
47
  model="sentence-transformers/all-MiniLM-L6-v2"))
@@ -61,7 +61,7 @@ with doc["chunks"].row() as chunk:
61
61
  chunk["embedding"] = chunk["text"].call(text_to_embedding)
62
62
  ```
63
63
 
64
- Any time, you can call the `eval()` method with specific string, which will return a `list[float]`:
64
+ Any time, you can call the `eval()` method with specific string, which will return a `NDArray[np.float32]`:
65
65
 
66
66
  ```python
67
67
  print(text_to_embedding.eval("Hello, world!"))
@@ -93,7 +93,7 @@ For example:
93
93
 
94
94
  ```python
95
95
  table_name = cocoindex.utils.get_target_storage_default_name(text_embedding_flow, "doc_embeddings")
96
- query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s::vector DESC LIMIT 5"
96
+ query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s DESC LIMIT 5"
97
97
  ...
98
98
  ```
99
99
 
@@ -2,7 +2,9 @@ import cocoindex
2
2
  import uvicorn
3
3
  from dotenv import load_dotenv
4
4
  from fastapi import FastAPI, Query
5
+ from fastapi import Request
5
6
  from psycopg_pool import ConnectionPool
7
+ from contextlib import asynccontextmanager
6
8
  import os
7
9
 
8
10
 
@@ -86,27 +88,31 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
86
88
  ]
87
89
 
88
90
 
89
- fastapi_app = FastAPI()
90
-
91
-
92
- @fastapi_app.on_event("startup")
93
- def startup_event():
91
+ @asynccontextmanager
92
+ def lifespan(app: FastAPI):
94
93
  load_dotenv()
95
94
  cocoindex.init()
96
- # Initialize database connection pool
97
- fastapi_app.state.pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
95
+ pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
96
+ app.state.pool = pool
97
+ try:
98
+ yield
99
+ finally:
100
+ pool.close()
101
+
102
+
103
+ fastapi_app = FastAPI(lifespan=lifespan)
98
104
 
99
105
 
100
106
  @fastapi_app.get("/search")
101
107
  def search_endpoint(
108
+ request: Request,
102
109
  q: str = Query(..., description="Search query"),
103
110
  limit: int = Query(5, description="Number of results"),
104
111
  ):
105
- results = search(fastapi_app.state.pool, q, limit)
112
+ pool = request.app.state.pool
113
+ results = search(pool, q, limit)
106
114
  return {"results": results}
107
115
 
108
116
 
109
117
  if __name__ == "__main__":
110
- load_dotenv()
111
- cocoindex.init()
112
118
  uvicorn.run(fastapi_app, host="0.0.0.0", port=8080)
@@ -45,7 +45,7 @@
45
45
  },
46
46
  "outputs": [],
47
47
  "source": [
48
- "%pip install cocoindex python-dotenv psycopg[binary,pool]"
48
+ "%pip install cocoindex numpy python-dotenv psycopg[binary,pool] pgvector"
49
49
  ]
50
50
  },
51
51
  {
@@ -164,7 +164,10 @@
164
164
  "from dotenv import load_dotenv\n",
165
165
  "import os\n",
166
166
  "from psycopg_pool import ConnectionPool\n",
167
- "import cocoindex\n"
167
+ "from pgvector.psycopg import register_vector\n",
168
+ "import cocoindex\n",
169
+ "from numpy.typing import NDArray\n",
170
+ "import numpy as np\n"
168
171
  ]
169
172
  },
170
173
  {
@@ -187,7 +190,7 @@
187
190
  "%%writefile -a main.py\n",
188
191
  "\n",
189
192
  "@cocoindex.transform_flow()\n",
190
- "def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:\n",
193
+ "def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:\n",
191
194
  " \"\"\"\n",
192
195
  " Embed the text using a SentenceTransformer model.\n",
193
196
  " This is shared logic between indexing and querying.\n",
@@ -274,9 +277,10 @@
274
277
  " query_vector = text_to_embedding.eval(query)\n",
275
278
  " # Run the query and get the results.\n",
276
279
  " with pool.connection() as conn:\n",
280
+ " register_vector(conn)\n",
277
281
  " with conn.cursor() as cur:\n",
278
282
  " cur.execute(f\"\"\"\n",
279
- " SELECT filename, text, embedding <=> %s::vector AS distance\n",
283
+ " SELECT filename, text, embedding <=> %s AS distance\n",
280
284
  " FROM {table_name} ORDER BY distance LIMIT %s\n",
281
285
  " \"\"\", (query_vector, top_k))\n",
282
286
  " return [\n",
@@ -1,13 +1,16 @@
1
1
  from dotenv import load_dotenv
2
2
  from psycopg_pool import ConnectionPool
3
+ from pgvector.psycopg import register_vector
3
4
  import cocoindex
4
5
  import os
6
+ from numpy.typing import NDArray
7
+ import numpy as np
5
8
 
6
9
 
7
10
  @cocoindex.transform_flow()
8
11
  def text_to_embedding(
9
12
  text: cocoindex.DataSlice[str],
10
- ) -> cocoindex.DataSlice[list[float]]:
13
+ ) -> cocoindex.DataSlice[NDArray[np.float32]]:
11
14
  """
12
15
  Embed the text using a SentenceTransformer model.
13
16
  This is a shared logic between indexing and querying, so extract it as a function.
@@ -71,10 +74,11 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
71
74
  query_vector = text_to_embedding.eval(query)
72
75
  # Run the query and get the results.
73
76
  with pool.connection() as conn:
77
+ register_vector(conn)
74
78
  with conn.cursor() as cur:
75
79
  cur.execute(
76
80
  f"""
77
- SELECT filename, text, embedding <=> %s::vector AS distance
81
+ SELECT filename, text, embedding <=> %s AS distance
78
82
  FROM {table_name} ORDER BY distance LIMIT %s
79
83
  """,
80
84
  (query_vector, top_k),
@@ -6,7 +6,9 @@ requires-python = ">=3.10"
6
6
  dependencies = [
7
7
  "cocoindex>=0.1.42",
8
8
  "python-dotenv>=1.0.1",
9
+ "pgvector>=0.4.1",
9
10
  "psycopg[binary,pool]",
11
+ "numpy",
10
12
  ]
11
13
 
12
14
  [tool.setuptools]
@@ -2,7 +2,7 @@
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
4
 
5
- from . import functions, query, sources, storages, cli, utils
5
+ from . import functions, sources, storages, cli, utils
6
6
 
7
7
  from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
8
8
  from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
@@ -6,6 +6,7 @@ import dataclasses
6
6
  import datetime
7
7
  import inspect
8
8
  import uuid
9
+ import numpy as np
9
10
 
10
11
  from enum import Enum
11
12
  from typing import Any, Callable, get_origin, Mapping
@@ -15,6 +16,7 @@ from .typing import (
15
16
  is_namedtuple_type,
16
17
  TABLE_TYPES,
17
18
  KEY_FIELD_NAME,
19
+ DtypeRegistry,
18
20
  )
19
21
 
20
22
 
@@ -27,6 +29,8 @@ def encode_engine_value(value: Any) -> Any:
27
29
  ]
28
30
  if is_namedtuple_type(type(value)):
29
31
  return [encode_engine_value(getattr(value, name)) for name in value._fields]
32
+ if isinstance(value, np.ndarray):
33
+ return value
30
34
  if isinstance(value, (list, tuple)):
31
35
  return [encode_engine_value(v) for v in value]
32
36
  if isinstance(value, dict):
@@ -122,6 +126,38 @@ def make_engine_value_decoder(
122
126
  if src_type_kind == "Uuid":
123
127
  return lambda value: uuid.UUID(bytes=value)
124
128
 
129
+ if src_type_kind == "Vector":
130
+ elem_coco_type_info = analyze_type_info(dst_type_info.elem_type)
131
+ dtype_info = DtypeRegistry.get_by_kind(elem_coco_type_info.kind)
132
+
133
+ def decode_vector(value: Any) -> Any | None:
134
+ if value is None:
135
+ if dst_type_info.nullable:
136
+ return None
137
+ raise ValueError(
138
+ f"Received null for non-nullable vector `{''.join(field_path)}`"
139
+ )
140
+
141
+ if not isinstance(value, (np.ndarray, list)):
142
+ raise TypeError(
143
+ f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
144
+ )
145
+ expected_dim = (
146
+ dst_type_info.vector_info.dim if dst_type_info.vector_info else None
147
+ )
148
+ if expected_dim is not None and len(value) != expected_dim:
149
+ raise ValueError(
150
+ f"Vector dimension mismatch for `{''.join(field_path)}`: "
151
+ f"expected {expected_dim}, got {len(value)}"
152
+ )
153
+
154
+ # Use NDArray for supported numeric dtypes, else return list
155
+ if dtype_info is not None:
156
+ return np.array(value, dtype=dtype_info.numpy_dtype)
157
+ return value
158
+
159
+ return decode_vector
160
+
125
161
  return lambda value: value
126
162
 
127
163
 
@@ -1,6 +1,8 @@
1
1
  """All builtin functions."""
2
2
 
3
- from typing import Annotated, Any, TYPE_CHECKING
3
+ from typing import Annotated, Any, TYPE_CHECKING, Literal
4
+ import numpy as np
5
+ from numpy.typing import NDArray
4
6
  import dataclasses
5
7
 
6
8
  from .typing import Float32, Vector, TypeAttr
@@ -66,11 +68,11 @@ class SentenceTransformerEmbedExecutor:
66
68
  self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
67
69
  dim = self._model.get_sentence_embedding_dimension()
68
70
  result: type = Annotated[
69
- Vector[Float32, dim], # type: ignore
71
+ Vector[np.float32, Literal[dim]], # type: ignore
70
72
  TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value),
71
73
  ]
72
74
  return result
73
75
 
74
- def __call__(self, text: str) -> list[Float32]:
75
- result: list[Float32] = self._model.encode(text).tolist()
76
+ def __call__(self, text: str) -> NDArray[np.float32]:
77
+ result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
76
78
  return result
@@ -6,7 +6,7 @@ import warnings
6
6
  from typing import Callable, Any
7
7
 
8
8
  from . import _engine # type: ignore
9
- from . import flow, query, setting
9
+ from . import flow, setting
10
10
  from .convert import dump_engine_object
11
11
 
12
12
 
@@ -24,7 +24,6 @@ def init(settings: setting.Settings | None = None) -> None:
24
24
  def start_server(settings: setting.ServerSettings) -> None:
25
25
  """Start the cocoindex server."""
26
26
  flow.ensure_all_flows_built()
27
- query.ensure_all_handlers_built()
28
27
  _engine.start_server(settings.__dict__)
29
28
 
30
29