cocoindex 0.1.38__tar.gz → 0.1.40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {cocoindex-0.1.38 → cocoindex-0.1.40}/Cargo.lock +1 -1
  2. {cocoindex-0.1.38 → cocoindex-0.1.40}/Cargo.toml +1 -1
  3. {cocoindex-0.1.38 → cocoindex-0.1.40}/PKG-INFO +2 -2
  4. {cocoindex-0.1.38 → cocoindex-0.1.40}/README.md +1 -1
  5. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/flow_def.mdx +34 -1
  6. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/flow_methods.mdx +1 -1
  7. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/initialization.mdx +3 -2
  8. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/ops/storages.md +1 -1
  9. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/amazon_s3_embedding/pyproject.toml +4 -1
  10. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/code_embedding/pyproject.toml +4 -1
  11. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/docs_to_knowledge_graph/pyproject.toml +4 -1
  12. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/gdrive_text_embedding/pyproject.toml +4 -1
  13. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/pyproject.toml +4 -1
  14. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/pyproject.toml +4 -1
  15. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/README.md +4 -2
  16. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/pyproject.toml +4 -1
  17. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/main.py +1 -0
  18. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/pyproject.toml +4 -1
  19. cocoindex-0.1.40/examples/text_embedding_qdrant/.env +2 -0
  20. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding_qdrant/pyproject.toml +4 -1
  21. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/__init__.py +4 -3
  22. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/convert.py +1 -1
  23. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/flow.py +130 -29
  24. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/op.py +12 -12
  25. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/query.py +1 -1
  26. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/setting.py +1 -1
  27. cocoindex-0.1.40/python/cocoindex/utils.py +9 -0
  28. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/builder/analyzed_flow.rs +1 -2
  29. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/builder/analyzer.rs +5 -2
  30. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/builder/flow_builder.rs +23 -18
  31. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/py/mod.rs +23 -0
  32. cocoindex-0.1.38/examples/docs_to_knowledge_graph/.env +0 -3
  33. {cocoindex-0.1.38 → cocoindex-0.1.40}/.cargo/config.toml +0 -0
  34. {cocoindex-0.1.38 → cocoindex-0.1.40}/.env.lib_debug +0 -0
  35. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  36. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  37. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/scripts/update_version.sh +0 -0
  38. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/workflows/CI.yml +0 -0
  39. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/workflows/_test.yml +0 -0
  40. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/workflows/docs.yml +0 -0
  41. {cocoindex-0.1.38 → cocoindex-0.1.40}/.github/workflows/release.yml +0 -0
  42. {cocoindex-0.1.38 → cocoindex-0.1.40}/.gitignore +0 -0
  43. {cocoindex-0.1.38 → cocoindex-0.1.40}/.vscode/settings.json +0 -0
  44. {cocoindex-0.1.38 → cocoindex-0.1.40}/CODE_OF_CONDUCT.md +0 -0
  45. {cocoindex-0.1.38 → cocoindex-0.1.40}/CONTRIBUTING.md +0 -0
  46. {cocoindex-0.1.38 → cocoindex-0.1.40}/LICENSE +0 -0
  47. {cocoindex-0.1.38 → cocoindex-0.1.40}/dev/neo4j.yaml +0 -0
  48. {cocoindex-0.1.38 → cocoindex-0.1.40}/dev/postgres.yaml +0 -0
  49. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/.gitignore +0 -0
  50. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/README.md +0 -0
  51. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/about/community.md +0 -0
  52. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/about/contributing.md +0 -0
  53. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/ai/llm.mdx +0 -0
  54. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/basics.md +0 -0
  55. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/cli.mdx +0 -0
  56. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/custom_function.mdx +0 -0
  57. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/data_example.svg +0 -0
  58. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/data_types.mdx +0 -0
  59. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/core/flow_example.svg +0 -0
  60. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/getting_started/installation.md +0 -0
  61. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/getting_started/markdown_files.zip +0 -0
  62. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/getting_started/overview.md +0 -0
  63. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/getting_started/quickstart.md +0 -0
  64. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/ops/functions.md +0 -0
  65. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docs/ops/sources.md +0 -0
  66. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/docusaurus.config.ts +0 -0
  67. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/package.json +0 -0
  68. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/sidebars.ts +0 -0
  69. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  70. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  71. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/src/css/custom.css +0 -0
  72. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/src/theme/Root.js +0 -0
  73. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/static/.nojekyll +0 -0
  74. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/static/img/docusaurus.png +0 -0
  75. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/static/img/favicon.ico +0 -0
  76. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/static/img/icon.svg +0 -0
  77. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/static/robots.txt +0 -0
  78. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/tsconfig.json +0 -0
  79. {cocoindex-0.1.38 → cocoindex-0.1.40}/docs/yarn.lock +0 -0
  80. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/amazon_s3_embedding/.env.example +0 -0
  81. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/amazon_s3_embedding/.gitignore +0 -0
  82. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/amazon_s3_embedding/README.md +0 -0
  83. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/amazon_s3_embedding/main.py +0 -0
  84. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/code_embedding/.env +0 -0
  85. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/code_embedding/README.md +0 -0
  86. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/code_embedding/main.py +0 -0
  87. {cocoindex-0.1.38/examples/manuals_llm_extraction → cocoindex-0.1.40/examples/docs_to_knowledge_graph}/.env +0 -0
  88. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/docs_to_knowledge_graph/README.md +0 -0
  89. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/docs_to_knowledge_graph/main.py +0 -0
  90. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/.dockerignore +0 -0
  91. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/.env +0 -0
  92. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/README.md +0 -0
  93. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/compose.yaml +0 -0
  94. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/dockerfile +0 -0
  95. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/main.py +0 -0
  96. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/requirements.txt +0 -0
  97. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/sample_code/main.py +0 -0
  98. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/fastapi_server_docker/src/cocoindex_funs.py +0 -0
  99. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/gdrive_text_embedding/.env.example +0 -0
  100. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/gdrive_text_embedding/.gitignore +0 -0
  101. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/gdrive_text_embedding/README.md +0 -0
  102. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/gdrive_text_embedding/main.py +0 -0
  103. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/.env +0 -0
  104. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/README.md +0 -0
  105. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/.gitignore +0 -0
  106. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/index.html +0 -0
  107. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/package-lock.json +0 -0
  108. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/package.json +0 -0
  109. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/src/App.jsx +0 -0
  110. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/src/main.jsx +0 -0
  111. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/src/style.css +0 -0
  112. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/frontend/vite.config.js +0 -0
  113. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/img/cat1.jpeg +0 -0
  114. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/img/dog1.jpeg +0 -0
  115. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/img/elephant1.jpg +0 -0
  116. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/img/giraffe.jpg +0 -0
  117. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/main.py +0 -0
  118. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/image_search_example/requirements.txt +0 -0
  119. {cocoindex-0.1.38/examples/pdf_embedding → cocoindex-0.1.40/examples/manuals_llm_extraction}/.env +0 -0
  120. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/README.md +0 -0
  121. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/main.py +0 -0
  122. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  123. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  124. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  125. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  126. {cocoindex-0.1.38/examples/text_embedding → cocoindex-0.1.40/examples/pdf_embedding}/.env +0 -0
  127. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/README.md +0 -0
  128. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/main.py +0 -0
  129. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  130. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  131. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  132. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/.env +0 -0
  133. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/img/cocoinsight.png +0 -0
  134. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/img/neo4j.png +0 -0
  135. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/main.py +0 -0
  136. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p1.json +0 -0
  137. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p2.json +0 -0
  138. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p3.json +0 -0
  139. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p4.json +0 -0
  140. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p5.json +0 -0
  141. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p6.json +0 -0
  142. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p7.json +0 -0
  143. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p8.json +0 -0
  144. {cocoindex-0.1.38/examples/product_taxonomy_knowledge_graph → cocoindex-0.1.40/examples/product_recommendation}/products/p9.json +0 -0
  145. {cocoindex-0.1.38/examples/text_embedding_qdrant → cocoindex-0.1.40/examples/text_embedding}/.env +0 -0
  146. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/README.md +0 -0
  147. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  148. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  149. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  150. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  151. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding_qdrant/README.md +0 -0
  152. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding_qdrant/main.py +0 -0
  153. {cocoindex-0.1.38 → cocoindex-0.1.40}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  154. {cocoindex-0.1.38 → cocoindex-0.1.40}/pyproject.toml +0 -0
  155. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/auth_registry.py +0 -0
  156. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/cli.py +0 -0
  157. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/functions.py +0 -0
  158. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/index.py +0 -0
  159. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/lib.py +0 -0
  160. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/llm.py +0 -0
  161. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/py.typed +0 -0
  162. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/runtime.py +0 -0
  163. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/setup.py +0 -0
  164. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/sources.py +0 -0
  165. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/storages.py +0 -0
  166. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/tests/__init__.py +0 -0
  167. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/tests/test_convert.py +0 -0
  168. {cocoindex-0.1.38 → cocoindex-0.1.40}/python/cocoindex/typing.py +0 -0
  169. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/field_attrs.rs +0 -0
  170. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/json_schema.rs +0 -0
  171. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/mod.rs +0 -0
  172. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/schema.rs +0 -0
  173. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/spec.rs +0 -0
  174. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/base/value.rs +0 -0
  175. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/builder/mod.rs +0 -0
  176. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/builder/plan.rs +0 -0
  177. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/db_tracking.rs +0 -0
  178. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/db_tracking_setup.rs +0 -0
  179. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/dumper.rs +0 -0
  180. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/evaluator.rs +0 -0
  181. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/indexing_status.rs +0 -0
  182. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/live_updater.rs +0 -0
  183. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/memoization.rs +0 -0
  184. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/mod.rs +0 -0
  185. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/query.rs +0 -0
  186. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/row_indexer.rs +0 -0
  187. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/source_indexer.rs +0 -0
  188. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/execution/stats.rs +0 -0
  189. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/lib.rs +0 -0
  190. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/lib_context.rs +0 -0
  191. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/llm/anthropic.rs +0 -0
  192. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/llm/gemini.rs +0 -0
  193. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/llm/mod.rs +0 -0
  194. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/llm/ollama.rs +0 -0
  195. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/llm/openai.rs +0 -0
  196. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/factory_bases.rs +0 -0
  197. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/functions/extract_by_llm.rs +0 -0
  198. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/functions/mod.rs +0 -0
  199. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/functions/parse_json.rs +0 -0
  200. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/functions/split_recursively.rs +0 -0
  201. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/interface.rs +0 -0
  202. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/mod.rs +0 -0
  203. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/py_factory.rs +0 -0
  204. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/registration.rs +0 -0
  205. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/registry.rs +0 -0
  206. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/sdk.rs +0 -0
  207. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/sources/amazon_s3.rs +0 -0
  208. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/sources/google_drive.rs +0 -0
  209. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/sources/local_file.rs +0 -0
  210. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/sources/mod.rs +0 -0
  211. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/storages/mod.rs +0 -0
  212. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/storages/neo4j.rs +0 -0
  213. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/storages/postgres.rs +0 -0
  214. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/storages/qdrant.rs +0 -0
  215. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/ops/storages/spec.rs +0 -0
  216. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/prelude.rs +0 -0
  217. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/py/convert.rs +0 -0
  218. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/server.rs +0 -0
  219. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/service/error.rs +0 -0
  220. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/service/flows.rs +0 -0
  221. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/service/mod.rs +0 -0
  222. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/service/search.rs +0 -0
  223. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/settings.rs +0 -0
  224. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/auth_registry.rs +0 -0
  225. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/components.rs +0 -0
  226. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/db_metadata.rs +0 -0
  227. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/driver.rs +0 -0
  228. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/mod.rs +0 -0
  229. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/setup/states.rs +0 -0
  230. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/db.rs +0 -0
  231. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/fingerprint.rs +0 -0
  232. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/immutable.rs +0 -0
  233. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/mod.rs +0 -0
  234. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/retryable.rs +0 -0
  235. {cocoindex-0.1.38 → cocoindex-0.1.40}/src/utils/yaml_ser.rs +0 -0
@@ -993,7 +993,7 @@ dependencies = [
993
993
 
994
994
  [[package]]
995
995
  name = "cocoindex"
996
- version = "0.1.38"
996
+ version = "0.1.40"
997
997
  dependencies = [
998
998
  "anyhow",
999
999
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.38"
5
+ version = "0.1.40"
6
6
  edition = "2024"
7
7
 
8
8
  [profile.release]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.38
3
+ Version: 0.1.40
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -153,7 +153,7 @@ It defines an index flow like this:
153
153
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
154
154
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
155
155
  | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
156
- | [Product_Taxonomy_Knowledge_Graph](examples/product_taxonomy_knowledge_graph) | Build knowledge graph for product recommendations |
156
+ | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
157
157
  | [Image Search with Vision API](examples/image_search_example) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
158
158
 
159
159
  More coming and stay tuned 👀!
@@ -137,7 +137,7 @@ It defines an index flow like this:
137
137
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
138
138
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
139
139
  | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
140
- | [Product_Taxonomy_Knowledge_Graph](examples/product_taxonomy_knowledge_graph) | Build knowledge graph for product recommendations |
140
+ | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
141
141
  | [Image Search with Vision API](examples/image_search_example) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
142
142
 
143
143
  More coming and stay tuned 👀!
@@ -146,8 +146,9 @@ def demo_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataSco
146
146
 
147
147
  :::info
148
148
 
149
- In live update mode, for each refresh, CocoIndex will traverse the data source to figure out the changes,
149
+ In live update mode, for each refresh, CocoIndex will list rows in the data source to figure out the changes based on metadata such as last modified time,
150
150
  and only perform transformations on changed source keys.
151
+ If nothing changed during the last refresh cycle, only list operations will be performed, which is usually cheap for most data sources.
151
152
 
152
153
  :::
153
154
 
@@ -311,6 +312,38 @@ Following metrics are supported:
311
312
 
312
313
  ## Miscellaneous
313
314
 
315
+ ### Getting App Namespace
316
+
317
+ You can use the [`app_namespace` setting](initialization#app-namespace) or `COCOINDEX_APP_NAMESPACE` environment variable to specify the app namespace,
318
+ to organize flows across different environments (e.g., dev, staging, production), team members, etc.
319
+
320
+ In the code, You can call `flow.get_app_namespace()` to get the app namespace, and use it to name certain backends. It takes the following arguments:
321
+
322
+ * `trailing_delimiter` (optional): a string to append to the app namespace when it's not empty.
323
+
324
+ e.g. when the current app namespace is `Staging`, `flow.get_app_namespace(trailing_delimiter='.')` will return `Staging.`.
325
+
326
+ For example,
327
+
328
+ <Tabs>
329
+ <TabItem value="python" label="Python" default>
330
+
331
+ ```python
332
+ doc_embeddings.export(
333
+ "doc_embeddings",
334
+ cocoindex.storages.Qdrant(
335
+ collection_name=cocoindex.get_app_namespace(trailing_delimiter='__') + "doc_embeddings",
336
+ ...
337
+ ),
338
+ ...
339
+ )
340
+ ```
341
+
342
+ </TabItem>
343
+ </Tabs>
344
+
345
+ It will use `Staging__doc_embeddings` as the collection name if the current app namespace is `Staging`, and use `doc_embeddings` if the app namespace is empty.
346
+
314
347
  ### Target Declarations
315
348
 
316
349
  Most time a target storage is created by calling `export()` method on a collector, and this `export()` call comes with configurations needed for the target storage, e.g. options for storage indexes.
@@ -105,7 +105,7 @@ A data source may enable one or multiple *change capture mechanisms*:
105
105
  * Configured with a [refresh interval](flow_def#refresh-interval), which is generally applicable to all data sources.
106
106
 
107
107
  * Specific data sources also provide their specific change capture mechanisms.
108
- For example, [`GoogleDrive` source](../ops/sources#googledrive) allows polling recent modified files.
108
+ For example, [`AmazonS3` source](../ops/sources/#amazons3) watches S3 bucket's change events, and [`GoogleDrive` source](../ops/sources#googledrive) allows polling recent modified files.
109
109
  See documentations for specific data sources.
110
110
 
111
111
  Change capture mechanisms enable CocoIndex to continuously capture changes from the source data and update the target data accordingly, under live update mode.
@@ -88,9 +88,10 @@ if __name__ == "__main__":
88
88
 
89
89
  ### App Namespace
90
90
 
91
- The `app_namespace` field helps organize flows across different environments (e.g., testing, production) or teams. When set, it prefixes flow names with the namespace.
91
+ The `app_namespace` field helps organize flows across different environments (e.g., dev, staging, production), team members, etc. When set, it prefixes flow names with the namespace.
92
92
 
93
- For example, if the namespace is "Staging", for a flow with name specified as `Flow1` in code, the full name of the flow will be `Staging.Flow1`.
93
+ For example, if the namespace is `Staging`, for a flow with name specified as `Flow1` in code, the full name of the flow will be `Staging.Flow1`.
94
+ You can also get the current app namespace by calling `cocoindex.get_app_namespace()` (see [Getting App Namespace](flow_def#getting-app-namespace) for more details).
94
95
 
95
96
  If not set, all flows are in a default unnamed namespace.
96
97
 
@@ -40,7 +40,7 @@ The spec takes the following fields:
40
40
  See [DatabaseConnectionSpec](../core/initialization#databaseconnectionspec) for its specific fields.
41
41
  If not provided, will use the same database as the [internal storage](/docs/core/basics#internal-storage).
42
42
 
43
- * `table_name` (type: `str`, optional): The name of the table to store to. If unspecified, will generate a new automatically. We recommend specifying a name explicitly if you want to directly query the table. It can be omitted if you want to use CocoIndex's query handlers to query the table.
43
+ * `table_name` (type: `str`, optional): The name of the table to store to. If unspecified, will use the table name `[${AppNamespace}__]${FlowName}__${TargetName}`, e.g. `DemoFlow__doc_embeddings` or `Staging__DemoFlow__doc_embeddings`.
44
44
 
45
45
  ### Qdrant
46
46
 
@@ -3,4 +3,7 @@ name = "amazon-s3-text-embedding"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on Amazon S3 files."
5
5
  requires-python = ">=3.11"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -3,4 +3,7 @@ name = "code-embedding"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on source code."
5
5
  requires-python = ">=3.10"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -3,4 +3,7 @@ name = "manuals-to-kg"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: extract triples from files and build knowledge graph."
5
5
  requires-python = ">=3.10"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -3,4 +3,7 @@ name = "gdrive-text-embedding"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on Google Drive files."
5
5
  requires-python = ">=3.11"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -4,7 +4,10 @@ version = "0.1.0"
4
4
  description = "Simple example for cocoindex: extract structured information from a Markdown file using LLM."
5
5
  requires-python = ">=3.10"
6
6
  dependencies = [
7
- "cocoindex>=0.1.35",
7
+ "cocoindex>=0.1.39",
8
8
  "python-dotenv>=1.0.1",
9
9
  "marker-pdf>=1.5.2",
10
10
  ]
11
+
12
+ [tool.setuptools]
13
+ packages = []
@@ -4,7 +4,10 @@ version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on local PDF files."
5
5
  requires-python = ">=3.10"
6
6
  dependencies = [
7
- "cocoindex>=0.1.35",
7
+ "cocoindex>=0.1.39",
8
8
  "python-dotenv>=1.0.1",
9
9
  "marker-pdf>=1.5.2",
10
10
  ]
11
+
12
+ [tool.setuptools]
13
+ packages = []
@@ -1,6 +1,8 @@
1
- # Build Real-Time Product Recommendation based on LLM Taxonomy Extraction and Knowledge Graph
1
+ # Build Real-Time Recommendation Engine with LLM and Graph Database
2
2
 
3
- We will process a list of products and use LLM to extract the taxonomy and complimentary taxonomy for each product.
3
+ We will build a real-time product recommendation engine with LLM and graph database. In particular, we will use LLM to understand the category (taxonomy) of a product. In addition, we will use LLM to enumerate the complementary products - users are likely to buy together with the current product (pencil and notebook).
4
+
5
+ We will use Graph to explore the relationships between products that can be further used for product recommendations or labeling.
4
6
 
5
7
  Please drop [CocoIndex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us and stay tuned for more updates. Thank you so much 🥥🤗. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
6
8
 
@@ -3,4 +3,7 @@ name = "cocoindex-ecommerce-taxonomy"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for CocoIndex: extract taxonomy from e-commerce products and build knowledge graph."
5
5
  requires-python = ">=3.10"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1", "jinja2>=3.1.6"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1", "jinja2>=3.1.6"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -2,6 +2,7 @@ from dotenv import load_dotenv
2
2
 
3
3
  import cocoindex
4
4
 
5
+ @cocoindex.transform_flow()
5
6
  def text_to_embedding(text: cocoindex.DataSlice) -> cocoindex.DataSlice:
6
7
  """
7
8
  Embed the text using a SentenceTransformer model.
@@ -3,4 +3,7 @@ name = "text-embedding"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on local text files."
5
5
  requires-python = ">=3.10"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -0,0 +1,2 @@
1
+ # Postgres database address for cocoindex
2
+ COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
@@ -3,4 +3,7 @@ name = "text-embedding-qdrant"
3
3
  version = "0.1.0"
4
4
  description = "Simple example for cocoindex: build embedding index based on local text files."
5
5
  requires-python = ">=3.10"
6
- dependencies = ["cocoindex>=0.1.35", "python-dotenv>=1.0.1"]
6
+ dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"]
7
+
8
+ [tool.setuptools]
9
+ packages = []
@@ -1,14 +1,15 @@
1
1
  """
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
- from . import functions, query, sources, storages, cli
5
- from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def
4
+ from . import functions, query, sources, storages, cli, utils
5
+ from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def, transform_flow
6
6
  from .flow import EvaluateAndDumpOptions, GeneratedField
7
7
  from .flow import update_all_flows_async, FlowLiveUpdater, FlowLiveUpdaterOptions
8
8
  from .llm import LlmSpec, LlmApiType
9
9
  from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
10
10
  from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
11
11
  from .lib import *
12
- from .setting import *
12
+ from .setting import DatabaseConnectionSpec, Settings, ServerSettings
13
+ from .setting import get_app_namespace
13
14
  from ._engine import OpArgSchema
14
15
  from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
@@ -44,7 +44,7 @@ def make_engine_value_decoder(
44
44
 
45
45
  src_type_kind = src_type['kind']
46
46
 
47
- if dst_annotation is inspect.Parameter.empty:
47
+ if dst_annotation is None or dst_annotation is inspect.Parameter.empty or dst_annotation is Any:
48
48
  if src_type_kind == 'Struct' or src_type_kind in TABLE_TYPES:
49
49
  raise ValueError(f"Missing type annotation for `{''.join(field_path)}`."
50
50
  f"It's required for {src_type_kind} type.")
@@ -8,8 +8,9 @@ import asyncio
8
8
  import re
9
9
  import inspect
10
10
  import datetime
11
+ import functools
11
12
 
12
- from typing import Any, Callable, Sequence, TypeVar
13
+ from typing import Any, Callable, Sequence, TypeVar, Generic, get_args, get_origin, Type, NamedTuple
13
14
  from threading import Lock
14
15
  from enum import Enum
15
16
  from dataclasses import dataclass
@@ -20,7 +21,7 @@ from . import _engine
20
21
  from . import index
21
22
  from . import op
22
23
  from . import setting
23
- from .convert import dump_engine_object
24
+ from .convert import dump_engine_object, encode_engine_value, make_engine_value_decoder
24
25
  from .typing import encode_enriched_type
25
26
  from .runtime import execution_context
26
27
 
@@ -123,7 +124,7 @@ class _DataSliceState:
123
124
  # TODO: We'll support this by an identity transformer or "aliasing" in the future.
124
125
  raise ValueError("DataSlice is already attached to a field")
125
126
 
126
- class DataSlice:
127
+ class DataSlice(Generic[T]):
127
128
  """A data slice represents a slice of data in a flow. It's readonly."""
128
129
 
129
130
  _state: _DataSliceState
@@ -183,11 +184,11 @@ class DataSlice:
183
184
  name, prefix=_to_snake_case(_spec_kind(fn_spec))+'_'),
184
185
  ))
185
186
 
186
- def call(self, func: Callable[[DataSlice], T]) -> T:
187
+ def call(self, func: Callable[[DataSlice], T], *args, **kwargs) -> T:
187
188
  """
188
189
  Call a function with the data slice.
189
190
  """
190
- return func(self)
191
+ return func(self, *args, **kwargs)
191
192
 
192
193
  def _data_slice_state(data_slice: DataSlice) -> _DataSliceState:
193
194
  return data_slice._state # pylint: disable=protected-access
@@ -309,9 +310,8 @@ class _FlowBuilderState:
309
310
  engine_flow_builder: _engine.FlowBuilder
310
311
  field_name_builder: _NameBuilder
311
312
 
312
- def __init__(self, /, name: str | None = None):
313
- flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
314
- self.engine_flow_builder = _engine.FlowBuilder(get_full_flow_name(flow_name))
313
+ def __init__(self, full_name: str):
314
+ self.engine_flow_builder = _engine.FlowBuilder(full_name)
315
315
  self.field_name_builder = _NameBuilder()
316
316
 
317
317
  def get_data_slice(self, v: Any) -> _engine.DataSlice:
@@ -463,9 +463,13 @@ class Flow:
463
463
  """
464
464
  A flow describes an indexing pipeline.
465
465
  """
466
+ _name: str
467
+ _full_name: str
466
468
  _lazy_engine_flow: Callable[[], _engine.Flow]
467
469
 
468
- def __init__(self, engine_flow_creator: Callable[[], _engine.Flow]):
470
+ def __init__(self, name: str, full_name: str, engine_flow_creator: Callable[[], _engine.Flow]):
471
+ self._name = name
472
+ self._full_name = full_name
469
473
  engine_flow = None
470
474
  lock = Lock()
471
475
  def _lazy_engine_flow() -> _engine.Flow:
@@ -496,7 +500,7 @@ class Flow:
496
500
  tree.children.append(section_node)
497
501
  return tree
498
502
 
499
- def _get_spec(self, verbose: bool = False) -> list[tuple[str, str, int]]:
503
+ def _get_spec(self, verbose: bool = False) -> _engine.RenderedSpec:
500
504
  return self._lazy_engine_flow().get_spec(output_mode="verbose" if verbose else "concise")
501
505
 
502
506
  def _get_schema(self) -> list[tuple[str, str, str]]:
@@ -508,12 +512,19 @@ class Flow:
508
512
  def __repr__(self):
509
513
  return repr(self._lazy_engine_flow())
510
514
 
515
+ @property
516
+ def name(self) -> str:
517
+ """
518
+ Get the name of the flow.
519
+ """
520
+ return self._name
521
+
511
522
  @property
512
523
  def full_name(self) -> str:
513
524
  """
514
525
  Get the full name of the flow.
515
526
  """
516
- return self._lazy_engine_flow().name()
527
+ return self._full_name
517
528
 
518
529
  def update(self) -> _engine.IndexUpdateInfo:
519
530
  """
@@ -554,14 +565,16 @@ def _create_lazy_flow(name: str | None, fl_def: Callable[[FlowBuilder, DataScope
554
565
  Create a flow without really building it yet.
555
566
  The flow will be built the first time when it's really needed.
556
567
  """
568
+ flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
569
+ flow_full_name = get_full_flow_name(flow_name)
557
570
  def _create_engine_flow() -> _engine.Flow:
558
- flow_builder_state = _FlowBuilderState(name=name)
571
+ flow_builder_state = _FlowBuilderState(flow_full_name)
559
572
  root_scope = DataScope(
560
573
  flow_builder_state, flow_builder_state.engine_flow_builder.root_scope())
561
574
  fl_def(FlowBuilder(flow_builder_state), root_scope)
562
575
  return flow_builder_state.engine_flow_builder.build_flow(execution_context.event_loop)
563
576
 
564
- return Flow(_create_engine_flow)
577
+ return Flow(flow_name, flow_full_name, _create_engine_flow)
565
578
 
566
579
 
567
580
  _flows_lock = Lock()
@@ -642,27 +655,67 @@ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _
642
655
  all_stats = await asyncio.gather(*(_update_flow(name, fl) for (name, fl) in fls.items()))
643
656
  return dict(all_stats)
644
657
 
645
- _transient_flow_name_builder = _NameBuilder()
646
- class TransientFlow:
658
+ def _get_data_slice_annotation_type(data_slice_type: Type[DataSlice[T]]) -> Type[T] | None:
659
+ type_args = get_args(data_slice_type)
660
+ if data_slice_type is DataSlice:
661
+ return None
662
+ if get_origin(data_slice_type) != DataSlice or len(type_args) != 1:
663
+ raise ValueError(f"Expect a DataSlice[T] type, but got {data_slice_type}")
664
+ return type_args[0]
665
+
666
+ _transform_flow_name_builder = _NameBuilder()
667
+
668
+ class TransformFlowInfo(NamedTuple):
669
+ engine_flow: _engine.TransientFlow
670
+ result_decoder: Callable[[Any], T]
671
+
672
+ class TransformFlow(Generic[T]):
647
673
  """
648
674
  A transient transformation flow that transforms in-memory data.
649
675
  """
650
- _engine_flow: _engine.TransientFlow
676
+ _flow_fn: Callable[..., DataSlice[T]]
677
+ _flow_name: str
678
+ _flow_arg_types: list[Any]
679
+ _param_names: list[str]
680
+
681
+ _lazy_lock: asyncio.Lock
682
+ _lazy_flow_info: TransformFlowInfo | None = None
651
683
 
652
684
  def __init__(
653
- self, flow_fn: Callable[..., DataSlice],
685
+ self, flow_fn: Callable[..., DataSlice[T]],
654
686
  flow_arg_types: Sequence[Any], /, name: str | None = None):
687
+ self._flow_fn = flow_fn
688
+ self._flow_name = _transform_flow_name_builder.build_name(name, prefix="_transform_flow_")
689
+ self._flow_arg_types = list(flow_arg_types)
690
+ self._lazy_lock = asyncio.Lock()
691
+
692
+ def __call__(self, *args, **kwargs) -> DataSlice[T]:
693
+ return self._flow_fn(*args, **kwargs)
655
694
 
656
- flow_builder_state = _FlowBuilderState(
657
- name=_transient_flow_name_builder.build_name(name, prefix="_transient_flow_"))
658
- sig = inspect.signature(flow_fn)
659
- if len(sig.parameters) != len(flow_arg_types):
695
+ @property
696
+ def _flow_info(self) -> TransformFlowInfo:
697
+ if self._lazy_flow_info is not None:
698
+ return self._lazy_flow_info
699
+ return execution_context.run(self._flow_info_async())
700
+
701
+ async def _flow_info_async(self) -> TransformFlowInfo:
702
+ if self._lazy_flow_info is not None:
703
+ return self._lazy_flow_info
704
+ async with self._lazy_lock:
705
+ if self._lazy_flow_info is None:
706
+ self._lazy_flow_info = await self._build_flow_info_async()
707
+ return self._lazy_flow_info
708
+
709
+ async def _build_flow_info_async(self) -> TransformFlowInfo:
710
+ flow_builder_state = _FlowBuilderState(self._flow_name)
711
+ sig = inspect.signature(self._flow_fn)
712
+ if len(sig.parameters) != len(self._flow_arg_types):
660
713
  raise ValueError(
661
714
  f"Number of parameters in the flow function ({len(sig.parameters)}) "
662
- "does not match the number of argument types ({len(flow_arg_types)})")
715
+ f"does not match the number of argument types ({len(self._flow_arg_types)})")
663
716
 
664
717
  kwargs: dict[str, DataSlice] = {}
665
- for (param_name, param), param_type in zip(sig.parameters.items(), flow_arg_types):
718
+ for (param_name, param), param_type in zip(sig.parameters.items(), self._flow_arg_types):
666
719
  if param.kind not in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
667
720
  inspect.Parameter.KEYWORD_ONLY):
668
721
  raise ValueError(f"Parameter {param_name} is not a parameter can be passed by name")
@@ -670,20 +723,68 @@ class TransientFlow:
670
723
  param_name, encode_enriched_type(param_type))
671
724
  kwargs[param_name] = DataSlice(_DataSliceState(flow_builder_state, engine_ds))
672
725
 
673
- output = flow_fn(**kwargs)
726
+ output = self._flow_fn(**kwargs)
674
727
  flow_builder_state.engine_flow_builder.set_direct_output(
675
728
  _data_slice_state(output).engine_data_slice)
676
- self._engine_flow = flow_builder_state.engine_flow_builder.build_transient_flow(
677
- execution_context.event_loop)
729
+ engine_flow = await flow_builder_state.engine_flow_builder.build_transient_flow_async(execution_context.event_loop)
730
+ self._param_names = list(sig.parameters.keys())
731
+
732
+ engine_return_type = _data_slice_state(output).engine_data_slice.data_type().schema()
733
+ python_return_type = _get_data_slice_annotation_type(sig.return_annotation)
734
+ result_decoder = make_engine_value_decoder([], engine_return_type['type'], python_return_type)
735
+
736
+ return TransformFlowInfo(engine_flow, result_decoder)
678
737
 
679
738
  def __str__(self):
680
- return str(self._engine_flow)
739
+ return str(self._flow_info.engine_flow)
681
740
 
682
741
  def __repr__(self):
683
- return repr(self._engine_flow)
742
+ return repr(self._flow_info.engine_flow)
684
743
 
685
744
  def internal_flow(self) -> _engine.TransientFlow:
686
745
  """
687
746
  Get the internal flow.
688
747
  """
689
- return self._engine_flow
748
+ return self._flow_info.engine_flow
749
+
750
+ def eval(self, *args, **kwargs) -> T:
751
+ """
752
+ Evaluate the transform flow.
753
+ """
754
+ return execution_context.run(self.eval_async(*args, **kwargs))
755
+
756
+ async def eval_async(self, *args, **kwargs) -> T:
757
+ """
758
+ Evaluate the transform flow.
759
+ """
760
+ flow_info = await self._flow_info_async()
761
+ params = []
762
+ for i, arg in enumerate(self._param_names):
763
+ if i < len(args):
764
+ params.append(encode_engine_value(args[i]))
765
+ elif arg in kwargs:
766
+ params.append(encode_engine_value(kwargs[arg]))
767
+ else:
768
+ raise ValueError(f"Parameter {arg} is not provided")
769
+ engine_result = await flow_info.engine_flow.evaluate_async(params)
770
+ return flow_info.result_decoder(engine_result)
771
+
772
+
773
+ def transform_flow() -> Callable[[Callable[..., DataSlice[T]]], TransformFlow[T]]:
774
+ """
775
+ A decorator to wrap the transform function.
776
+ """
777
+ def _transform_flow_wrapper(fn: Callable[..., DataSlice[T]]):
778
+ sig = inspect.signature(fn)
779
+ arg_types = []
780
+ for (param_name, param) in sig.parameters.items():
781
+ if param.kind not in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
782
+ inspect.Parameter.KEYWORD_ONLY):
783
+ raise ValueError(f"Parameter {param_name} is not a parameter can be passed by name")
784
+ arg_types.append(_get_data_slice_annotation_type(param.annotation))
785
+
786
+ _transform_flow = TransformFlow(fn, arg_types)
787
+ functools.update_wrapper(_transform_flow, fn)
788
+ return _transform_flow
789
+
790
+ return _transform_flow_wrapper
@@ -100,8 +100,8 @@ def _register_op_factory(
100
100
  return op_args.behavior_version
101
101
 
102
102
  class _WrappedClass(executor_cls, _Fallback):
103
- _args_converters: list[Callable[[Any], Any]]
104
- _kwargs_converters: dict[str, Callable[[str, Any], Any]]
103
+ _args_decoders: list[Callable[[Any], Any]]
104
+ _kwargs_decoders: dict[str, Callable[[str, Any], Any]]
105
105
  _acall: Callable
106
106
 
107
107
  def __init__(self, spec):
@@ -109,17 +109,17 @@ def _register_op_factory(
109
109
  self.spec = spec
110
110
  self._acall = _to_async_call(super().__call__)
111
111
 
112
- def analyze(self, *args, **kwargs):
112
+ def analyze(self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema):
113
113
  """
114
114
  Analyze the spec and arguments. In this phase, argument types should be validated.
115
115
  It should return the expected result type for the current op.
116
116
  """
117
- self._args_converters = []
118
- self._kwargs_converters = {}
117
+ self._args_decoders = []
118
+ self._kwargs_decoders = {}
119
119
 
120
120
  # Match arguments with parameters.
121
121
  next_param_idx = 0
122
- for arg in args:
122
+ for arg in args:
123
123
  if next_param_idx >= len(expected_args):
124
124
  raise ValueError(
125
125
  f"Too many arguments passed in: {len(args)} > {len(expected_args)}")
@@ -128,7 +128,7 @@ def _register_op_factory(
128
128
  inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
129
129
  raise ValueError(
130
130
  f"Too many positional arguments passed in: {len(args)} > {next_param_idx}")
131
- self._args_converters.append(
131
+ self._args_decoders.append(
132
132
  make_engine_value_decoder(
133
133
  [arg_name], arg.value_type['type'], arg_param.annotation))
134
134
  if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
@@ -146,7 +146,7 @@ def _register_op_factory(
146
146
  if expected_arg is None:
147
147
  raise ValueError(f"Unexpected keyword argument passed in: {kwarg_name}")
148
148
  arg_param = expected_arg[1]
149
- self._kwargs_converters[kwarg_name] = make_engine_value_decoder(
149
+ self._kwargs_decoders[kwarg_name] = make_engine_value_decoder(
150
150
  [kwarg_name], kwarg.value_type['type'], arg_param.annotation)
151
151
 
152
152
  missing_args = [name for (name, arg) in expected_kwargs
@@ -174,8 +174,8 @@ def _register_op_factory(
174
174
  await _to_async_call(setup_method)()
175
175
 
176
176
  async def __call__(self, *args, **kwargs):
177
- converted_args = (converter(arg) for converter, arg in zip(self._args_converters, args))
178
- converted_kwargs = {arg_name: self._kwargs_converters[arg_name](arg)
177
+ decoded_args = (decoder(arg) for decoder, arg in zip(self._args_decoders, args))
178
+ decoded_kwargs = {arg_name: self._kwargs_decoders[arg_name](arg)
179
179
  for arg_name, arg in kwargs.items()}
180
180
 
181
181
  if op_args.gpu:
@@ -185,9 +185,9 @@ def _register_op_factory(
185
185
  # For now, we use a lock to ensure only one task is executed at a time.
186
186
  # TODO: Implement multi-processing dispatching.
187
187
  async with _gpu_dispatch_lock:
188
- output = await self._acall(*converted_args, **converted_kwargs)
188
+ output = await self._acall(*decoded_args, **decoded_kwargs)
189
189
  else:
190
- output = await self._acall(*converted_args, **converted_kwargs)
190
+ output = await self._acall(*decoded_args, **decoded_kwargs)
191
191
  return encode_engine_value(output)
192
192
 
193
193
  _WrappedClass.__name__ = executor_cls.__name__
@@ -50,7 +50,7 @@ class SimpleSemanticsQueryHandler:
50
50
  if engine_handler is None:
51
51
  engine_handler = _engine.SimpleSemanticsQueryHandler(
52
52
  flow.internal_flow(), target_name,
53
- fl.TransientFlow(query_transform_flow, [str]).internal_flow(),
53
+ fl.TransformFlow(query_transform_flow, [str]).internal_flow(),
54
54
  default_similarity_metric.value)
55
55
  engine_handler.register_query_handler(name)
56
56
  return engine_handler