cocoindex 0.1.48__tar.gz → 0.1.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. {cocoindex-0.1.48 → cocoindex-0.1.50}/Cargo.lock +79 -28
  2. {cocoindex-0.1.48 → cocoindex-0.1.50}/Cargo.toml +4 -3
  3. {cocoindex-0.1.48 → cocoindex-0.1.50}/PKG-INFO +1 -1
  4. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/data_types.mdx +3 -2
  5. cocoindex-0.1.50/docs/docs/getting_started/overview.md +36 -0
  6. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/quickstart.md +11 -6
  7. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/functions.md +21 -3
  8. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/storages.md +52 -40
  9. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/query.mdx +3 -3
  10. cocoindex-0.1.50/docs/static/img/incremental-etl.gif +0 -0
  11. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/README.md +7 -10
  12. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/main.py +26 -23
  13. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/main.py +16 -10
  14. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/README.md +0 -15
  15. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/README.md +7 -7
  16. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/main.py +32 -28
  17. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/Text_Embedding.ipynb +8 -4
  18. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/main.py +6 -2
  19. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/pyproject.toml +2 -0
  20. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/README.md +3 -19
  21. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/__init__.py +1 -1
  22. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/convert.py +36 -0
  23. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/functions.py +18 -4
  24. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/lib.py +1 -2
  25. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/tests/test_convert.py +280 -52
  26. cocoindex-0.1.50/python/cocoindex/tests/test_typing.py +499 -0
  27. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/typing.py +88 -13
  28. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/value.rs +10 -0
  29. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/analyzer.rs +3 -4
  30. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/plan.rs +0 -1
  31. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/mod.rs +0 -1
  32. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/lib_context.rs +2 -18
  33. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/factory_bases.rs +3 -12
  34. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/split_recursively.rs +292 -203
  35. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/interface.rs +2 -65
  36. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/py_factory.rs +4 -5
  37. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/registration.rs +1 -1
  38. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/google_drive.rs +31 -46
  39. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/kuzu.rs +1 -7
  40. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/neo4j.rs +7 -8
  41. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/postgres.rs +5 -197
  42. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/qdrant.rs +13 -42
  43. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/py/mod.rs +16 -81
  44. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/server.rs +8 -12
  45. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/error.rs +12 -4
  46. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/flows.rs +26 -7
  47. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/service/mod.rs +0 -1
  48. cocoindex-0.1.48/docs/docs/getting_started/overview.md +0 -14
  49. cocoindex-0.1.48/python/cocoindex/query.py +0 -115
  50. cocoindex-0.1.48/src/execution/query.rs +0 -124
  51. cocoindex-0.1.48/src/service/search.rs +0 -58
  52. {cocoindex-0.1.48 → cocoindex-0.1.50}/.cargo/config.toml +0 -0
  53. {cocoindex-0.1.48 → cocoindex-0.1.50}/.env.lib_debug +0 -0
  54. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  55. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  56. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/scripts/update_version.sh +0 -0
  57. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/CI.yml +0 -0
  58. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/_test.yml +0 -0
  59. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/docs.yml +0 -0
  60. {cocoindex-0.1.48 → cocoindex-0.1.50}/.github/workflows/release.yml +0 -0
  61. {cocoindex-0.1.48 → cocoindex-0.1.50}/.gitignore +0 -0
  62. {cocoindex-0.1.48 → cocoindex-0.1.50}/.vscode/settings.json +0 -0
  63. {cocoindex-0.1.48 → cocoindex-0.1.50}/CODE_OF_CONDUCT.md +0 -0
  64. {cocoindex-0.1.48 → cocoindex-0.1.50}/CONTRIBUTING.md +0 -0
  65. {cocoindex-0.1.48 → cocoindex-0.1.50}/LICENSE +0 -0
  66. {cocoindex-0.1.48 → cocoindex-0.1.50}/README.md +0 -0
  67. {cocoindex-0.1.48 → cocoindex-0.1.50}/dev/neo4j.yaml +0 -0
  68. {cocoindex-0.1.48 → cocoindex-0.1.50}/dev/postgres.yaml +0 -0
  69. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/.gitignore +0 -0
  70. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/README.md +0 -0
  71. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/about/community.md +0 -0
  72. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/about/contributing.md +0 -0
  73. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ai/llm.mdx +0 -0
  74. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/basics.md +0 -0
  75. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/cli.mdx +0 -0
  76. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/custom_function.mdx +0 -0
  77. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/data_example.svg +0 -0
  78. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_def.mdx +0 -0
  79. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_example.svg +0 -0
  80. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/flow_methods.mdx +0 -0
  81. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/core/settings.mdx +0 -0
  82. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/installation.md +0 -0
  83. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/getting_started/markdown_files.zip +0 -0
  84. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docs/ops/sources.md +0 -0
  85. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/docusaurus.config.ts +0 -0
  86. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/package.json +0 -0
  87. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/sidebars.ts +0 -0
  88. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  89. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  90. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/css/custom.css +0 -0
  91. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/src/theme/Root.js +0 -0
  92. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/.nojekyll +0 -0
  93. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/docusaurus.png +0 -0
  94. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/favicon.ico +0 -0
  95. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/img/icon.svg +0 -0
  96. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/static/robots.txt +0 -0
  97. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/tsconfig.json +0 -0
  98. {cocoindex-0.1.48 → cocoindex-0.1.50}/docs/yarn.lock +0 -0
  99. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.env.example +0 -0
  100. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/.gitignore +0 -0
  101. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/README.md +0 -0
  102. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/main.py +0 -0
  103. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  104. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/.env +0 -0
  105. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/README.md +0 -0
  106. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/main.py +0 -0
  107. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/code_embedding/pyproject.toml +0 -0
  108. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/.env +0 -0
  109. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  110. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/.dockerignore +0 -0
  111. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/.env +0 -0
  112. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/README.md +0 -0
  113. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/compose.yaml +0 -0
  114. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/dockerfile +0 -0
  115. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  116. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/fastapi_server_docker/requirements.txt +0 -0
  117. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.env.example +0 -0
  118. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/.gitignore +0 -0
  119. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/README.md +0 -0
  120. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/main.py +0 -0
  121. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  122. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/.env +0 -0
  123. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/.gitignore +0 -0
  124. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/index.html +0 -0
  125. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/package-lock.json +0 -0
  126. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/package.json +0 -0
  127. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/App.jsx +0 -0
  128. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/main.jsx +0 -0
  129. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/src/style.css +0 -0
  130. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/frontend/vite.config.js +0 -0
  131. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/cat1.jpeg +0 -0
  132. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/dog1.jpeg +0 -0
  133. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/elephant1.jpg +0 -0
  134. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/img/giraffe.jpg +0 -0
  135. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/main.py +0 -0
  136. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/pyproject.toml +0 -0
  137. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/image_search/requirements.txt +0 -0
  138. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/.env +0 -0
  139. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/README.md +0 -0
  140. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/main.py +0 -0
  141. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  142. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  143. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  144. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  145. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  146. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/.env +0 -0
  147. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/README.md +0 -0
  148. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/main.py +0 -0
  149. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  150. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  151. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  152. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/pdf_embedding/pyproject.toml +0 -0
  153. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/.env +0 -0
  154. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/img/cocoinsight.png +0 -0
  155. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/img/neo4j.png +0 -0
  156. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p1.json +0 -0
  157. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p2.json +0 -0
  158. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p3.json +0 -0
  159. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p4.json +0 -0
  160. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p5.json +0 -0
  161. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p6.json +0 -0
  162. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p7.json +0 -0
  163. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p8.json +0 -0
  164. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/products/p9.json +0 -0
  165. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/product_recommendation/pyproject.toml +0 -0
  166. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/.env +0 -0
  167. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/README.md +0 -0
  168. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  169. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  170. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  171. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/.env +0 -0
  172. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/main.py +0 -0
  173. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  174. {cocoindex-0.1.48 → cocoindex-0.1.50}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  175. {cocoindex-0.1.48 → cocoindex-0.1.50}/pyproject.toml +0 -0
  176. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/auth_registry.py +0 -0
  177. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/cli.py +0 -0
  178. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/flow.py +0 -0
  179. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/index.py +0 -0
  180. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/llm.py +0 -0
  181. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/op.py +0 -0
  182. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/py.typed +0 -0
  183. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/runtime.py +0 -0
  184. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/setting.py +0 -0
  185. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/setup.py +0 -0
  186. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/sources.py +0 -0
  187. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/storages.py +0 -0
  188. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/tests/__init__.py +0 -0
  189. {cocoindex-0.1.48 → cocoindex-0.1.50}/python/cocoindex/utils.py +0 -0
  190. {cocoindex-0.1.48 → cocoindex-0.1.50}/ruff.toml +0 -0
  191. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/duration.rs +0 -0
  192. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/field_attrs.rs +0 -0
  193. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/json_schema.rs +0 -0
  194. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/mod.rs +0 -0
  195. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/schema.rs +0 -0
  196. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/base/spec.rs +0 -0
  197. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/analyzed_flow.rs +0 -0
  198. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/flow_builder.rs +0 -0
  199. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/builder/mod.rs +0 -0
  200. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/db_tracking.rs +0 -0
  201. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/db_tracking_setup.rs +0 -0
  202. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/dumper.rs +0 -0
  203. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/evaluator.rs +0 -0
  204. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/indexing_status.rs +0 -0
  205. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/live_updater.rs +0 -0
  206. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/memoization.rs +0 -0
  207. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/row_indexer.rs +0 -0
  208. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/source_indexer.rs +0 -0
  209. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/execution/stats.rs +0 -0
  210. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/lib.rs +0 -0
  211. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/anthropic.rs +0 -0
  212. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/gemini.rs +0 -0
  213. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/mod.rs +0 -0
  214. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/ollama.rs +0 -0
  215. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/llm/openai.rs +0 -0
  216. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/extract_by_llm.rs +0 -0
  217. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/mod.rs +0 -0
  218. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/functions/parse_json.rs +0 -0
  219. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/mod.rs +0 -0
  220. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/registry.rs +0 -0
  221. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sdk.rs +0 -0
  222. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/amazon_s3.rs +0 -0
  223. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/local_file.rs +0 -0
  224. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/sources/mod.rs +0 -0
  225. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/mod.rs +0 -0
  226. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/mod.rs +0 -0
  227. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/property_graph.rs +0 -0
  228. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/ops/storages/shared/table_columns.rs +0 -0
  229. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/prelude.rs +0 -0
  230. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/py/convert.rs +0 -0
  231. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/settings.rs +0 -0
  232. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/auth_registry.rs +0 -0
  233. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/components.rs +0 -0
  234. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/db_metadata.rs +0 -0
  235. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/driver.rs +0 -0
  236. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/mod.rs +0 -0
  237. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/setup/states.rs +0 -0
  238. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/db.rs +0 -0
  239. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/fingerprint.rs +0 -0
  240. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/immutable.rs +0 -0
  241. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/mod.rs +0 -0
  242. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/retryable.rs +0 -0
  243. {cocoindex-0.1.48 → cocoindex-0.1.50}/src/utils/yaml_ser.rs +0 -0
@@ -676,16 +676,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
676
676
  checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
677
677
  dependencies = [
678
678
  "async-trait",
679
- "axum-core",
679
+ "axum-core 0.4.5",
680
680
  "bytes",
681
681
  "futures-util",
682
682
  "http 1.3.1",
683
683
  "http-body 1.0.1",
684
684
  "http-body-util",
685
+ "itoa",
686
+ "matchit 0.7.3",
687
+ "memchr",
688
+ "mime",
689
+ "percent-encoding",
690
+ "pin-project-lite",
691
+ "rustversion",
692
+ "serde",
693
+ "sync_wrapper",
694
+ "tower 0.5.2",
695
+ "tower-layer",
696
+ "tower-service",
697
+ ]
698
+
699
+ [[package]]
700
+ name = "axum"
701
+ version = "0.8.4"
702
+ source = "registry+https://github.com/rust-lang/crates.io-index"
703
+ checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
704
+ dependencies = [
705
+ "axum-core 0.5.2",
706
+ "bytes",
707
+ "form_urlencoded",
708
+ "futures-util",
709
+ "http 1.3.1",
710
+ "http-body 1.0.1",
711
+ "http-body-util",
685
712
  "hyper 1.6.0",
686
713
  "hyper-util",
687
714
  "itoa",
688
- "matchit",
715
+ "matchit 0.8.4",
689
716
  "memchr",
690
717
  "mime",
691
718
  "percent-encoding",
@@ -721,28 +748,48 @@ dependencies = [
721
748
  "sync_wrapper",
722
749
  "tower-layer",
723
750
  "tower-service",
751
+ ]
752
+
753
+ [[package]]
754
+ name = "axum-core"
755
+ version = "0.5.2"
756
+ source = "registry+https://github.com/rust-lang/crates.io-index"
757
+ checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
758
+ dependencies = [
759
+ "bytes",
760
+ "futures-core",
761
+ "http 1.3.1",
762
+ "http-body 1.0.1",
763
+ "http-body-util",
764
+ "mime",
765
+ "pin-project-lite",
766
+ "rustversion",
767
+ "sync_wrapper",
768
+ "tower-layer",
769
+ "tower-service",
724
770
  "tracing",
725
771
  ]
726
772
 
727
773
  [[package]]
728
774
  name = "axum-extra"
729
- version = "0.9.6"
775
+ version = "0.10.1"
730
776
  source = "registry+https://github.com/rust-lang/crates.io-index"
731
- checksum = "c794b30c904f0a1c2fb7740f7df7f7972dfaa14ef6f57cb6178dc63e5dca2f04"
777
+ checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d"
732
778
  dependencies = [
733
- "axum",
734
- "axum-core",
779
+ "axum 0.8.4",
780
+ "axum-core 0.5.2",
735
781
  "bytes",
736
- "fastrand",
782
+ "form_urlencoded",
737
783
  "futures-util",
738
784
  "http 1.3.1",
739
785
  "http-body 1.0.1",
740
786
  "http-body-util",
741
787
  "mime",
742
- "multer",
743
788
  "pin-project-lite",
789
+ "rustversion",
744
790
  "serde",
745
791
  "serde_html_form",
792
+ "serde_path_to_error",
746
793
  "tower 0.5.2",
747
794
  "tower-layer",
748
795
  "tower-service",
@@ -993,7 +1040,7 @@ dependencies = [
993
1040
 
994
1041
  [[package]]
995
1042
  name = "cocoindex"
996
- version = "0.1.48"
1043
+ version = "0.1.50"
997
1044
  dependencies = [
998
1045
  "anyhow",
999
1046
  "async-openai",
@@ -1002,7 +1049,7 @@ dependencies = [
1002
1049
  "aws-config",
1003
1050
  "aws-sdk-s3",
1004
1051
  "aws-sdk-sqs",
1005
- "axum",
1052
+ "axum 0.8.4",
1006
1053
  "axum-extra",
1007
1054
  "base64 0.22.1",
1008
1055
  "blake2",
@@ -1028,6 +1075,7 @@ dependencies = [
1028
1075
  "neo4rs",
1029
1076
  "owo-colors",
1030
1077
  "pgvector",
1078
+ "phf",
1031
1079
  "pyo3",
1032
1080
  "pyo3-async-runtimes",
1033
1081
  "pythonize",
@@ -2610,6 +2658,12 @@ version = "0.7.3"
2610
2658
  source = "registry+https://github.com/rust-lang/crates.io-index"
2611
2659
  checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
2612
2660
 
2661
+ [[package]]
2662
+ name = "matchit"
2663
+ version = "0.8.4"
2664
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2665
+ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
2666
+
2613
2667
  [[package]]
2614
2668
  name = "md-5"
2615
2669
  version = "0.10.6"
@@ -2677,23 +2731,6 @@ dependencies = [
2677
2731
  "windows-sys 0.52.0",
2678
2732
  ]
2679
2733
 
2680
- [[package]]
2681
- name = "multer"
2682
- version = "3.1.0"
2683
- source = "registry+https://github.com/rust-lang/crates.io-index"
2684
- checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
2685
- dependencies = [
2686
- "bytes",
2687
- "encoding_rs",
2688
- "futures-util",
2689
- "http 1.3.1",
2690
- "httparse",
2691
- "memchr",
2692
- "mime",
2693
- "spin",
2694
- "version_check",
2695
- ]
2696
-
2697
2734
  [[package]]
2698
2735
  name = "neo4rs"
2699
2736
  version = "0.8.0"
@@ -2993,6 +3030,7 @@ version = "0.11.3"
2993
3030
  source = "registry+https://github.com/rust-lang/crates.io-index"
2994
3031
  checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
2995
3032
  dependencies = [
3033
+ "phf_macros",
2996
3034
  "phf_shared",
2997
3035
  ]
2998
3036
 
@@ -3016,6 +3054,19 @@ dependencies = [
3016
3054
  "rand 0.8.5",
3017
3055
  ]
3018
3056
 
3057
+ [[package]]
3058
+ name = "phf_macros"
3059
+ version = "0.11.3"
3060
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3061
+ checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
3062
+ dependencies = [
3063
+ "phf_generator",
3064
+ "phf_shared",
3065
+ "proc-macro2",
3066
+ "quote",
3067
+ "syn 2.0.101",
3068
+ ]
3069
+
3019
3070
  [[package]]
3020
3071
  name = "phf_shared"
3021
3072
  version = "0.11.3"
@@ -4653,7 +4704,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
4653
4704
  dependencies = [
4654
4705
  "async-stream",
4655
4706
  "async-trait",
4656
- "axum",
4707
+ "axum 0.7.9",
4657
4708
  "base64 0.22.1",
4658
4709
  "bytes",
4659
4710
  "flate2",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.48"
5
+ version = "0.1.50"
6
6
  edition = "2024"
7
7
 
8
8
  [profile.release]
@@ -20,8 +20,8 @@ pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
20
20
 
21
21
  anyhow = { version = "1.0.97", features = ["std"] }
22
22
  async-trait = "0.1.88"
23
- axum = "0.7.9"
24
- axum-extra = { version = "0.9.6", features = ["query"] }
23
+ axum = "0.8.4"
24
+ axum-extra = { version = "0.10.1", features = ["query"] }
25
25
  base64 = "0.22.1"
26
26
  chrono = "0.4.40"
27
27
  config = "0.14.1"
@@ -50,6 +50,7 @@ tower-http = { version = "0.6.2", features = ["cors", "trace"] }
50
50
  indexmap = { version = "2.8.0", features = ["serde"] }
51
51
  blake2 = "0.10.6"
52
52
  pgvector = { version = "0.4.0", features = ["sqlx"] }
53
+ phf = { version = "0.11.3", features = ["macros"] }
53
54
  indenter = "0.3.3"
54
55
  itertools = "0.14.0"
55
56
  derivative = "2.2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.48
3
+ Version: 0.1.50
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -36,16 +36,17 @@ This is the list of all basic types supported by CocoIndex:
36
36
  | LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
37
37
  | OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
38
38
  | TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
39
- | Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
40
39
  | Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
40
+ | Vector[*T*, *Dim*?] | *T* can be a basic type or a numeric type. *Dim* is a positive integer and optional. | `cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `numpy.typing.NDArray[T]` or `list[T]` |
41
41
 
42
42
  Values of all data types can be represented by values in Python's native types (as described under the Native Python Type column).
43
43
  However, the underlying execution engine and some storage system (like Postgres) has finer distinctions for some types, specifically:
44
44
 
45
45
  * *Float32* and *Float64* for `float`, with different precision.
46
46
  * *LocalDateTime* and *OffsetDateTime* for `datetime.datetime`, with different timezone awareness.
47
- * *Vector* has optional dimension information.
48
47
  * *Range* and *Json* provide a clear tag for the type, to clearly distinguish the type in CocoIndex.
48
+ * *Vector* holds elements of type *T*. If *T* is numeric (e.g., `np.float32` or `np.float64`), it's represented as `NDArray[T]`; otherwise, as `list[T]`.
49
+ * *Vector* also has optional dimension information.
49
50
 
50
51
  The native Python type is always more permissive and can represent a superset of possible values.
51
52
  * Only when you annotate the return type of a custom function, you should use the specific type,
@@ -0,0 +1,36 @@
1
+ ---
2
+ title: Overview
3
+ slug: /
4
+ ---
5
+
6
+ # Welcome to CocoIndex
7
+
8
+ CocoIndex is an ultra-performant real-time data transformation framework for AI, with incremental processing.
9
+
10
+ As a data framework, CocoIndex takes it to the next level on data freshness. **Incremental processing** is one of the core values provided by CocoIndex.
11
+
12
+ ![Incremental Processing](/img/incremental-etl.gif)
13
+
14
+ ## Programming Model
15
+ CocoIndex follows the idea of [Dataflow programming](https://en.wikipedia.org/wiki/Dataflow_programming) model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
16
+
17
+ The gist of an example data transformation:
18
+ ```python
19
+ # import
20
+ data['content'] = flow_builder.add_source(...)
21
+
22
+ # transform
23
+ data['out'] = data['content']
24
+ .transform(...)
25
+ .transform(...)
26
+
27
+ # collect data
28
+ collector.collect(...)
29
+
30
+ # export to db, vector db, graph db ...
31
+ collector.export(...)
32
+ ```
33
+
34
+ Get Started:
35
+ - [Quick Start](https://cocoindex.io/docs/getting_started/quickstart)
36
+
@@ -154,11 +154,11 @@ The goal of transforming your data is usually to query against it.
154
154
  Once you already have your index built, you can directly access the transformed data in the target database.
155
155
  CocoIndex also provides utilities for you to do this more seamlessly.
156
156
 
157
- In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) to connect to the database and run queries.
158
- Please make sure it's installed:
157
+ In this example, we'll use the [`psycopg` library](https://www.psycopg.org/) along with pgvector to connect to the database and run queries on vector data.
158
+ Please make sure the required packages are installed:
159
159
 
160
160
  ```bash
161
- pip install psycopg[binary,pool]
161
+ pip install numpy psycopg[binary,pool] pgvector
162
162
  ```
163
163
 
164
164
  ### Step 4.1: Extract common transformations
@@ -169,8 +169,11 @@ i.e. they should use exactly the same embedding model and parameters.
169
169
  Let's extract that into a function:
170
170
 
171
171
  ```python title="quickstart.py"
172
+ from numpy.typing import NDArray
173
+ import numpy as np
174
+
172
175
  @cocoindex.transform_flow()
173
- def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
176
+ def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
174
177
  return text.transform(
175
178
  cocoindex.functions.SentenceTransformerEmbed(
176
179
  model="sentence-transformers/all-MiniLM-L6-v2"))
@@ -207,6 +210,7 @@ Now we can create a function to query the index upon a given input query:
207
210
 
208
211
  ```python title="quickstart.py"
209
212
  from psycopg_pool import ConnectionPool
213
+ from pgvector.psycopg import register_vector
210
214
 
211
215
  def search(pool: ConnectionPool, query: str, top_k: int = 5):
212
216
  # Get the table name, for the export target in the text_embedding_flow above.
@@ -215,9 +219,10 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
215
219
  query_vector = text_to_embedding.eval(query)
216
220
  # Run the query and get the results.
217
221
  with pool.connection() as conn:
222
+ register_vector(conn)
218
223
  with conn.cursor() as cur:
219
224
  cur.execute(f"""
220
- SELECT filename, text, embedding <=> %s::vector AS distance
225
+ SELECT filename, text, embedding <=> %s AS distance
221
226
  FROM {table_name} ORDER BY distance LIMIT %s
222
227
  """, (query_vector, top_k))
223
228
  return [
@@ -236,7 +241,7 @@ There're two CocoIndex-specific logic:
236
241
 
237
242
  2. Evaluate the transform flow defined above with the input query, to get the embedding.
238
243
  It's done by the `eval()` method of the transform flow `text_to_embedding`.
239
- The return type of this method is `list[float]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[list[float]]`).
244
+ The return type of this method is `NDArray[np.float32]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[NDArray[np.float32]]`).
240
245
 
241
246
  ### Step 4.3: Add the main script logic
242
247
 
@@ -39,9 +39,27 @@ Input data:
39
39
 
40
40
  * `chunk_overlap` (type: `int`, optional): The maximum overlap size between adjacent chunks, in bytes.
41
41
  * `language` (type: `str`, optional): The language of the document.
42
- Can be a langauge name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
43
- To see all supported language names and extensions, see [the code](https://github.com/search?q=org%3Acocoindex-io+lang%3Arust++%22static+TREE_SITTER_LANGUAGE_BY_LANG%22&type=code).
44
- If it's unspecified or the specified language is not supported, it will be treated as plain text.
42
+ Can be a language name (e.g. `Python`, `Javascript`, `Markdown`) or a file extension (e.g. `.py`, `.js`, `.md`).
43
+
44
+ * `custom_languages` (type: `list[CustomLanguageSpec]`, optional): This allows you to customize the way to chunking specific languages using regular expressions. Each `CustomLanguageSpec` is a dict with the following fields:
45
+ * `language_name` (type: `str`, required): Name of the language.
46
+ * `aliases` (type: `list[str]`, optional): A list of aliases for the language.
47
+ It's an error if any language name or alias is duplicated.
48
+
49
+ * `separators_regex` (type: `list[str]`, required): A list of regex patterns to split the text.
50
+ Higher-level boundaries should come first, and lower-level should be listed later. e.g. `[r"\n# ", r"\n## ", r"\n\n", r"\. "]`.
51
+ See [regex Syntax](https://docs.rs/regex/latest/regex/#syntax) for supported regular expression syntax.
52
+
53
+ :::note
54
+
55
+ We use the `language` field to determine how to split the input text, following these rules:
56
+
57
+ * We'll match the input `language` field against the `language_name` or `aliases` of each custom language specification, and use the matched one. If value of `language` is null, it'll be treated as empty string when matching `language_name` or `aliases`.
58
+ * If no match is found, we'll match the `language` field against the builtin language configurations.
59
+ For all supported builtin language names and aliases (extensions), see [the code](https://github.com/search?q=org%3Acocoindex-io+lang%3Arust++%22static+TREE_SITTER_LANGUAGE_BY_LANG%22&type=code).
60
+ * If no match is found, the input will be treated as plain text.
61
+
62
+ :::
45
63
 
46
64
  Return type: [KTable](/docs/core/data_types#ktable), each row represents a chunk, with the following sub fields:
47
65
 
@@ -54,34 +54,21 @@ Here's how CocoIndex data elements map to Qdrant elements during export:
54
54
  |-------------------|------------------|
55
55
  | an export target | a unique collection |
56
56
  | a collected row | a point |
57
- | a field | a named vector (for fields with vector type); a field within payload (otherwise) |
57
+ | a field | a named vector, if fits into Qdrant vector; or a field within payload otherwise |
58
+
59
+ A vector with `Float32`, `Float64` or `Int64` type, and with fixed dimension, fits into Qdrant vector.
58
60
 
59
61
  #### Spec
60
62
 
61
63
  The spec takes the following fields:
62
64
 
63
- * `collection_name` (type: `str`, required): The name of the collection to export the data to.
64
-
65
- * `grpc_url` (type: `str`, optional): The [gRPC URL](https://qdrant.tech/documentation/interfaces/#grpc-interface) of the Qdrant instance. Defaults to `http://localhost:6334/`.
66
-
67
- * `api_key` (type: `str`, optional). API key to authenticate requests with.
65
+ * `connection` (type: [auth reference](../core/flow_def#auth-registry) to `QdrantConnection`, optional): The connection to the Qdrant instance. `QdrantConnection` has the following fields:
66
+ * `grpc_url` (type: `str`): The [gRPC URL](https://qdrant.tech/documentation/interfaces/#grpc-interface) of the Qdrant instance, e.g. `http://localhost:6334/`.
67
+ * `api_key` (type: `str`, optional). API key to authenticate requests with.
68
68
 
69
- Before exporting, you must create a collection with a [vector name](https://qdrant.tech/documentation/concepts/vectors/#named-vectors) that matches the vector field name in CocoIndex, and set `setup_by_user=True` during export.
69
+ If `connection` is not provided, will use local Qdrant instance at `http://localhost:6334/` by default.
70
70
 
71
- Example:
72
-
73
- ```python
74
- doc_embeddings.export(
75
- "doc_embeddings",
76
- cocoindex.storages.Qdrant(
77
- collection_name="cocoindex",
78
- grpc_url="https://xyz-example.cloud-region.cloud-provider.cloud.qdrant.io:6334/",
79
- api_key="<your-api-key-here>",
80
- ),
81
- primary_key_fields=["id_field"],
82
- setup_by_user=True,
83
- )
84
- ```
71
+ * `collection_name` (type: `str`, required): The name of the collection to export the data to.
85
72
 
86
73
  You can find an end-to-end example [here](https://github.com/cocoindex-io/cocoindex/tree/main/examples/text_embedding_qdrant).
87
74
 
@@ -399,19 +386,7 @@ You can find end-to-end examples fitting into any of supported property graphs i
399
386
 
400
387
  ### Neo4j
401
388
 
402
- If you don't have a Neo4j database, you can start a Neo4j database using our docker compose config:
403
-
404
- ```bash
405
- docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
406
- ```
407
-
408
- :::warning
409
-
410
- The docker compose config above will start a Neo4j Enterprise instance under the [Evaluation License](https://neo4j.com/terms/enterprise_us/),
411
- with 30 days trial period.
412
- Please read and agree the license before starting the instance.
413
-
414
- :::
389
+ #### Spec
415
390
 
416
391
  The `Neo4j` target spec takes the following fields:
417
392
 
@@ -430,17 +405,32 @@ Neo4j also provides a declaration spec `Neo4jDeclaration`, to configure indexing
430
405
  * `primary_key_fields` (required)
431
406
  * `vector_indexes` (optional)
432
407
 
433
- ### Kuzu
408
+ #### Neo4j dev instance
434
409
 
435
- CocoIndex supports talking to Kuzu through its [API server](https://github.com/kuzudb/api-server).
436
- You can bring up a Kuzu API server locally by running:
410
+ If you don't have a Neo4j database, you can start a Neo4j database using our docker compose config:
437
411
 
438
412
  ```bash
439
- KUZU_DB_DIR=$HOME/.kuzudb
440
- KUZU_PORT=8123
441
- docker run -d --name kuzu -p ${KUZU_PORT}:8000 -v ${KUZU_DB_DIR}:/database kuzudb/api-server:latest
413
+ docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
442
414
  ```
443
415
 
416
+ If will bring up a Neo4j instance, which can be accessed by username `neo4j` and password `cocoindex`.
417
+ You can access the Neo4j browser at [http://localhost:7474](http://localhost:7474).
418
+
419
+ :::warning
420
+
421
+ The docker compose config above will start a Neo4j Enterprise instance under the [Evaluation License](https://neo4j.com/terms/enterprise_us/),
422
+ with 30 days trial period.
423
+ Please read and agree the license before starting the instance.
424
+
425
+ :::
426
+
427
+
428
+ ### Kuzu
429
+
430
+ #### Spec
431
+
432
+ CocoIndex supports talking to Kuzu through its [API server](https://github.com/kuzudb/api-server).
433
+
444
434
  The `Kuzu` target spec takes the following fields:
445
435
 
446
436
  * `connection` (type: [auth reference](../core/flow_def#auth-registry) to `KuzuConnectionSpec`): The connection to the Kuzu database. `KuzuConnectionSpec` has the following fields:
@@ -453,3 +443,25 @@ Kuzu also provides a declaration spec `KuzuDeclaration`, to configure indexing o
453
443
  * Fields for [nodes to declare](#declare-extra-node-labels), including
454
444
  * `nodes_label` (required)
455
445
  * `primary_key_fields` (required)
446
+
447
+ #### Kuzu dev instance
448
+
449
+ If you don't have a Kuzu instance yet, you can bring up a Kuzu API server locally by running:
450
+
451
+ ```bash
452
+ KUZU_DB_DIR=$HOME/.kuzudb
453
+ KUZU_PORT=8123
454
+ docker run -d --name kuzu -p ${KUZU_PORT}:8000 -v ${KUZU_DB_DIR}:/database kuzudb/api-server:latest
455
+ ```
456
+
457
+ To explore the graph you built with Kuzu, you can use the [Kuzu Explorer](https://github.com/kuzudb/explorer).
458
+ Currently Kuzu API server and the explorer cannot be up at the same time. So you need to stop the API server before running the explorer.
459
+
460
+ To start the instance of the explorer, run:
461
+
462
+ ```bash
463
+ KUZU_EXPLORER_PORT=8124
464
+ docker run -d --name kuzu-explorer -p ${KUZU_EXPLORER_PORT}:8000 -v ${KUZU_DB_DIR}:/database -e MODE=READ_ONLY kuzudb/explorer:latest
465
+ ```
466
+
467
+ You can then access the explorer at [http://localhost:8124](http://localhost:8124).
@@ -41,7 +41,7 @@ The [quickstart](getting_started/quickstart#step-41-extract-common-transformatio
41
41
 
42
42
  ```python
43
43
  @cocoindex.transform_flow()
44
- def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
44
+ def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[NDArray[np.float32]]:
45
45
  return text.transform(
46
46
  cocoindex.functions.SentenceTransformerEmbed(
47
47
  model="sentence-transformers/all-MiniLM-L6-v2"))
@@ -61,7 +61,7 @@ with doc["chunks"].row() as chunk:
61
61
  chunk["embedding"] = chunk["text"].call(text_to_embedding)
62
62
  ```
63
63
 
64
- Any time, you can call the `eval()` method with specific string, which will return a `list[float]`:
64
+ Any time, you can call the `eval()` method with specific string, which will return a `NDArray[np.float32]`:
65
65
 
66
66
  ```python
67
67
  print(text_to_embedding.eval("Hello, world!"))
@@ -93,7 +93,7 @@ For example:
93
93
 
94
94
  ```python
95
95
  table_name = cocoindex.utils.get_target_storage_default_name(text_embedding_flow, "doc_embeddings")
96
- query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s::vector DESC LIMIT 5"
96
+ query = f"SELECT filename, text FROM {table_name} ORDER BY embedding <=> %s DESC LIMIT 5"
97
97
  ...
98
98
  ```
99
99
 
@@ -12,10 +12,10 @@ Please drop [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a s
12
12
 
13
13
  ![example-explanation](https://github.com/user-attachments/assets/07ddbd60-106f-427f-b7cc-16b73b142d27)
14
14
 
15
-
16
15
  ## Prerequisite
17
16
  * [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
18
- * [Install Neo4j](https://cocoindex.io/docs/ops/storages#neo4j) if you don't have one.
17
+ * Install [Neo4j](https://cocoindex.io/docs/ops/storages#neo4j-dev-instance) or [Kuzu](https://cocoindex.io/docs/ops/storages#kuzu-dev-instance) if you don't have one.
18
+ * The example uses Neo4j by default for now. If you want to use Kuzu, find out the "SELECT ONE GRAPH DATABASE TO USE" section and switch the active branch.
19
19
  * [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai).
20
20
 
21
21
  ## Documentation
@@ -45,21 +45,18 @@ cocoindex update main.py
45
45
 
46
46
  ### Browse the knowledge graph
47
47
 
48
- After the knowledge graph is build, you can explore the knowledge graph you built in Neo4j Browser.
48
+ After the knowledge graph is built, you can explore the knowledge graph.
49
49
 
50
- For the dev enviroment, you can connect neo4j browser using credentials:
51
- - username: `neo4j`
52
- - password: `cocoindex`
53
- which is pre-configured in the our docker compose [config.yaml](https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml).
50
+ * If you're using Neo4j, you can open the explorer at [http://localhost:7474](http://localhost:7474), with username `neo4j` and password `cocoindex`.
51
+ * If you're using Kuzu, you can start a Kuzu explorer locally. See [Kuzu dev instance](https://cocoindex.io/docs/ops/storages#kuzu-dev-instance) for more details.
54
52
 
55
- You can open it at [http://localhost:7474](http://localhost:7474), and run the following Cypher query to get all relationships:
53
+ You can run the following Cypher query to get all relationships:
56
54
 
57
55
  ```cypher
58
56
  MATCH p=()-->() RETURN p
59
57
  ```
60
- <img width="1366" alt="neo4j-for-coco-docs" src="https://github.com/user-attachments/assets/3c8b6329-6fee-4533-9480-571399b57e57" />
61
-
62
58
 
59
+ <img width="1366" alt="neo4j-for-coco-docs" src="https://github.com/user-attachments/assets/3c8b6329-6fee-4533-9480-571399b57e57" />
63
60
 
64
61
  ## CocoInsight
65
62
  I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline.
@@ -5,27 +5,6 @@ This example shows how to extract relationships from documents and build a knowl
5
5
  import dataclasses
6
6
  import cocoindex
7
7
 
8
-
9
- @dataclasses.dataclass
10
- class DocumentSummary:
11
- """Describe a summary of a document."""
12
-
13
- title: str
14
- summary: str
15
-
16
-
17
- @dataclasses.dataclass
18
- class Relationship:
19
- """
20
- Describe a relationship between two entities.
21
- Subject and object should be Core CocoIndex concepts only, should be nouns. For example, `CocoIndex`, `Incremental Processing`, `ETL`, `Data` etc.
22
- """
23
-
24
- subject: str
25
- predicate: str
26
- object: str
27
-
28
-
29
8
  neo4j_conn_spec = cocoindex.add_auth_entry(
30
9
  "Neo4jConnection",
31
10
  cocoindex.storages.Neo4jConnection(
@@ -41,19 +20,43 @@ kuzu_conn_spec = cocoindex.add_auth_entry(
41
20
  ),
42
21
  )
43
22
 
44
- # Use Neo4j as the graph database
23
+ # SELECT ONE GRAPH DATABASE TO USE
24
+ # This example can use either Neo4j or Kuzu as the graph database.
25
+ # Please make sure only one branch is live and others are commented out.
26
+
27
+ # Use Neo4j
45
28
  GraphDbSpec = cocoindex.storages.Neo4j
46
29
  GraphDbConnection = cocoindex.storages.Neo4jConnection
47
30
  GraphDbDeclaration = cocoindex.storages.Neo4jDeclaration
48
31
  conn_spec = neo4j_conn_spec
49
32
 
50
- # Use Kuzu as the graph database
33
+ # Use Kuzu
51
34
  # GraphDbSpec = cocoindex.storages.Kuzu
52
35
  # GraphDbConnection = cocoindex.storages.KuzuConnection
53
36
  # GraphDbDeclaration = cocoindex.storages.KuzuDeclaration
54
37
  # conn_spec = kuzu_conn_spec
55
38
 
56
39
 
40
+ @dataclasses.dataclass
41
+ class DocumentSummary:
42
+ """Describe a summary of a document."""
43
+
44
+ title: str
45
+ summary: str
46
+
47
+
48
+ @dataclasses.dataclass
49
+ class Relationship:
50
+ """
51
+ Describe a relationship between two entities.
52
+ Subject and object should be Core CocoIndex concepts only, should be nouns. For example, `CocoIndex`, `Incremental Processing`, `ETL`, `Data` etc.
53
+ """
54
+
55
+ subject: str
56
+ predicate: str
57
+ object: str
58
+
59
+
57
60
  @cocoindex.flow_def(name="DocsToKG")
58
61
  def docs_to_kg_flow(
59
62
  flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope