cocoindex 0.1.53__tar.gz → 0.1.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md +1 -1
  2. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/scripts/update_version.sh +1 -1
  3. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/workflows/CI.yml +13 -1
  4. cocoindex-0.1.55/.pre-commit-config.yaml +71 -0
  5. {cocoindex-0.1.53 → cocoindex-0.1.55}/.vscode/settings.json +1 -1
  6. {cocoindex-0.1.53 → cocoindex-0.1.55}/CONTRIBUTING.md +1 -1
  7. {cocoindex-0.1.53 → cocoindex-0.1.55}/Cargo.lock +2 -1
  8. {cocoindex-0.1.53 → cocoindex-0.1.55}/Cargo.toml +2 -2
  9. {cocoindex-0.1.53 → cocoindex-0.1.55}/PKG-INFO +12 -10
  10. {cocoindex-0.1.53 → cocoindex-0.1.55}/README.md +9 -9
  11. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/about/contributing.md +19 -10
  12. cocoindex-0.1.55/docs/docs/ai/llm.mdx +309 -0
  13. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/basics.md +2 -2
  14. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/cli.mdx +1 -1
  15. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/custom_function.mdx +1 -1
  16. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/flow_def.mdx +1 -1
  17. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/flow_methods.mdx +3 -3
  18. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/settings.mdx +1 -1
  19. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/getting_started/installation.md +2 -3
  20. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/getting_started/overview.md +3 -4
  21. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/getting_started/quickstart.md +3 -3
  22. cocoindex-0.1.55/docs/docs/ops/functions.md +142 -0
  23. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/ops/sources.md +29 -29
  24. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/ops/targets.md +26 -26
  25. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/query.mdx +0 -1
  26. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/sidebars.ts +1 -1
  27. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/src/css/custom.css +7 -7
  28. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/src/theme/Root.js +1 -1
  29. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/robots.txt +1 -1
  30. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/amazon_s3_embedding/.env.example +1 -1
  31. cocoindex-0.1.55/examples/amazon_s3_embedding/.gitignore +1 -0
  32. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/amazon_s3_embedding/README.md +2 -2
  33. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/code_embedding/README.md +6 -7
  34. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/code_embedding/main.py +16 -4
  35. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/docs_to_knowledge_graph/README.md +4 -6
  36. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/README.md +2 -2
  37. {cocoindex-0.1.53/examples/text_embedding/markdown_files → cocoindex-0.1.55/examples/fastapi_server_docker/files}/1810.04805v2.md +1 -1
  38. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/requirements.txt +1 -1
  39. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/gdrive_text_embedding/.env.example +2 -2
  40. cocoindex-0.1.55/examples/gdrive_text_embedding/.gitignore +1 -0
  41. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/gdrive_text_embedding/README.md +4 -4
  42. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/.env +1 -1
  43. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/README.md +0 -1
  44. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/main.py +28 -26
  45. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/pyproject.toml +2 -0
  46. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/requirements.txt +1 -1
  47. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/README.md +1 -1
  48. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/README.md +1 -1
  49. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/README.md +4 -6
  50. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p1.json +1 -1
  51. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p2.json +1 -1
  52. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p3.json +1 -1
  53. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p4.json +1 -1
  54. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p6.json +1 -1
  55. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p7.json +1 -1
  56. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p8.json +1 -1
  57. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p9.json +1 -1
  58. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/README.md +3 -4
  59. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/main.py +11 -3
  60. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/markdown_files/1706.03762v7.md +1 -1
  61. {cocoindex-0.1.53/examples/fastapi_server_docker/files → cocoindex-0.1.55/examples/text_embedding/markdown_files}/1810.04805v2.md +1 -1
  62. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/markdown_files/rfc8259.md +1 -1
  63. cocoindex-0.1.55/examples/text_embedding_qdrant/.env +2 -0
  64. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding_qdrant/README.md +3 -5
  65. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +1 -1
  66. {cocoindex-0.1.53 → cocoindex-0.1.55}/pyproject.toml +3 -1
  67. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/cli.py +90 -11
  68. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/convert.py +77 -62
  69. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/flow.py +3 -2
  70. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/functions.py +10 -0
  71. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/llm.py +1 -0
  72. cocoindex-0.1.55/python/cocoindex/tests/__init__.py +0 -0
  73. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/tests/test_convert.py +137 -38
  74. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/tests/test_typing.py +26 -7
  75. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/typing.py +31 -12
  76. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/json_schema.rs +12 -0
  77. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/schema.rs +20 -1
  78. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/value.rs +60 -3
  79. cocoindex-0.1.55/src/builder/analyzed_flow.rs +66 -0
  80. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/builder/analyzer.rs +108 -296
  81. cocoindex-0.1.55/src/builder/exec_ctx.rs +275 -0
  82. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/builder/flow_builder.rs +41 -25
  83. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/builder/mod.rs +2 -0
  84. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/builder/plan.rs +0 -4
  85. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/db_tracking.rs +23 -2
  86. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/db_tracking_setup.rs +4 -3
  87. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/dumper.rs +24 -11
  88. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/evaluator.rs +3 -3
  89. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/indexing_status.rs +3 -2
  90. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/live_updater.rs +4 -4
  91. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/memoization.rs +8 -1
  92. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/row_indexer.rs +375 -46
  93. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/source_indexer.rs +33 -21
  94. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/lib_context.rs +57 -16
  95. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/anthropic.rs +13 -6
  96. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/gemini.rs +81 -15
  97. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/litellm.rs +4 -4
  98. cocoindex-0.1.55/src/llm/mod.rs +137 -0
  99. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/ollama.rs +13 -10
  100. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/openai.rs +46 -10
  101. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/llm/openrouter.rs +4 -4
  102. cocoindex-0.1.55/src/llm/voyage.rs +109 -0
  103. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/factory_bases.rs +14 -10
  104. cocoindex-0.1.55/src/ops/functions/embed_text.rs +97 -0
  105. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/functions/extract_by_llm.rs +9 -5
  106. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/functions/mod.rs +1 -0
  107. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/functions/parse_json.rs +4 -4
  108. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/functions/split_recursively.rs +189 -52
  109. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/interface.rs +5 -3
  110. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/py_factory.rs +2 -1
  111. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/registration.rs +10 -3
  112. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/sdk.rs +1 -0
  113. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/sources/amazon_s3.rs +1 -1
  114. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/sources/google_drive.rs +1 -1
  115. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/sources/local_file.rs +1 -1
  116. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/kuzu.rs +3 -3
  117. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/neo4j.rs +12 -1
  118. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/postgres.rs +8 -1
  119. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/qdrant.rs +1 -1
  120. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/prelude.rs +1 -1
  121. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/py/convert.rs +27 -4
  122. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/py/mod.rs +2 -0
  123. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/service/flows.rs +6 -0
  124. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/driver.rs +17 -12
  125. cocoindex-0.1.53/check.sh +0 -12
  126. cocoindex-0.1.53/docs/docs/ai/llm.mdx +0 -209
  127. cocoindex-0.1.53/docs/docs/ops/functions.md +0 -107
  128. cocoindex-0.1.53/examples/amazon_s3_embedding/.gitignore +0 -1
  129. cocoindex-0.1.53/examples/gdrive_text_embedding/.gitignore +0 -1
  130. cocoindex-0.1.53/examples/product_recommendation/.env +0 -3
  131. cocoindex-0.1.53/python/cocoindex/tests/__init__.py +0 -1
  132. cocoindex-0.1.53/src/builder/analyzed_flow.rs +0 -90
  133. cocoindex-0.1.53/src/llm/mod.rs +0 -88
  134. {cocoindex-0.1.53 → cocoindex-0.1.55}/.cargo/config.toml +0 -0
  135. {cocoindex-0.1.53 → cocoindex-0.1.55}/.env.lib_debug +0 -0
  136. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  137. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/workflows/_doc_release.yml +0 -0
  138. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/workflows/_test.yml +0 -0
  139. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/workflows/docs.yml +0 -0
  140. {cocoindex-0.1.53 → cocoindex-0.1.55}/.github/workflows/release.yml +0 -0
  141. {cocoindex-0.1.53 → cocoindex-0.1.55}/.gitignore +0 -0
  142. {cocoindex-0.1.53 → cocoindex-0.1.55}/CODE_OF_CONDUCT.md +0 -0
  143. {cocoindex-0.1.53 → cocoindex-0.1.55}/LICENSE +0 -0
  144. {cocoindex-0.1.53 → cocoindex-0.1.55}/dev/neo4j.yaml +0 -0
  145. {cocoindex-0.1.53 → cocoindex-0.1.55}/dev/postgres.yaml +0 -0
  146. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/.gitignore +0 -0
  147. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/README.md +0 -0
  148. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/about/community.md +0 -0
  149. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/data_example.svg +0 -0
  150. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/data_types.mdx +0 -0
  151. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/core/flow_example.svg +0 -0
  152. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docs/getting_started/markdown_files.zip +0 -0
  153. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/docusaurus.config.ts +0 -0
  154. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/package.json +0 -0
  155. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  156. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  157. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/.nojekyll +0 -0
  158. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/img/docusaurus.png +0 -0
  159. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/img/favicon.ico +0 -0
  160. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/img/icon.svg +0 -0
  161. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/static/img/incremental-etl.gif +0 -0
  162. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/tsconfig.json +0 -0
  163. {cocoindex-0.1.53 → cocoindex-0.1.55}/docs/yarn.lock +0 -0
  164. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/amazon_s3_embedding/main.py +0 -0
  165. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  166. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/code_embedding/.env +0 -0
  167. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/code_embedding/pyproject.toml +0 -0
  168. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/docs_to_knowledge_graph/.env +0 -0
  169. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/docs_to_knowledge_graph/main.py +0 -0
  170. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  171. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/.dockerignore +0 -0
  172. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/.env +0 -0
  173. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/compose.yaml +0 -0
  174. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/dockerfile +0 -0
  175. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/fastapi_server_docker/main.py +0 -0
  176. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/gdrive_text_embedding/main.py +0 -0
  177. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  178. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/.gitignore +0 -0
  179. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/index.html +0 -0
  180. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/package-lock.json +0 -0
  181. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/package.json +0 -0
  182. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/src/App.jsx +0 -0
  183. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/src/main.jsx +0 -0
  184. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/src/style.css +0 -0
  185. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/frontend/vite.config.js +0 -0
  186. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/img/cat1.jpeg +0 -0
  187. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/img/dog1.jpeg +0 -0
  188. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/img/elephant1.jpg +0 -0
  189. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/image_search/img/giraffe.jpg +0 -0
  190. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/.env +0 -0
  191. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/main.py +0 -0
  192. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  193. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  194. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  195. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  196. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  197. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/.env +0 -0
  198. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/main.py +0 -0
  199. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  200. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  201. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  202. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/pdf_embedding/pyproject.toml +0 -0
  203. {cocoindex-0.1.53/examples/text_embedding → cocoindex-0.1.55/examples/product_recommendation}/.env +0 -0
  204. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/img/cocoinsight.png +0 -0
  205. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/img/neo4j.png +0 -0
  206. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/main.py +0 -0
  207. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/products/p5.json +0 -0
  208. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/product_recommendation/pyproject.toml +0 -0
  209. {cocoindex-0.1.53/examples/text_embedding_qdrant → cocoindex-0.1.55/examples/text_embedding}/.env +0 -0
  210. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  211. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding/pyproject.toml +0 -0
  212. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding_qdrant/main.py +0 -0
  213. {cocoindex-0.1.53 → cocoindex-0.1.55}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  214. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/__init__.py +0 -0
  215. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/auth_registry.py +0 -0
  216. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/index.py +0 -0
  217. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/lib.py +0 -0
  218. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/op.py +0 -0
  219. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/py.typed +0 -0
  220. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/runtime.py +0 -0
  221. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/setting.py +0 -0
  222. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/setup.py +0 -0
  223. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/sources.py +0 -0
  224. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/targets.py +0 -0
  225. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/tests/test_optional_database.py +0 -0
  226. {cocoindex-0.1.53 → cocoindex-0.1.55}/python/cocoindex/utils.py +0 -0
  227. {cocoindex-0.1.53 → cocoindex-0.1.55}/ruff.toml +0 -0
  228. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/duration.rs +0 -0
  229. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/field_attrs.rs +0 -0
  230. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/mod.rs +0 -0
  231. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/base/spec.rs +0 -0
  232. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/mod.rs +0 -0
  233. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/execution/stats.rs +0 -0
  234. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/lib.rs +0 -0
  235. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/mod.rs +0 -0
  236. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/registry.rs +0 -0
  237. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/sources/mod.rs +0 -0
  238. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/mod.rs +0 -0
  239. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/shared/mod.rs +0 -0
  240. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/shared/property_graph.rs +0 -0
  241. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/ops/targets/shared/table_columns.rs +0 -0
  242. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/server.rs +0 -0
  243. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/service/error.rs +0 -0
  244. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/service/mod.rs +0 -0
  245. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/settings.rs +0 -0
  246. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/auth_registry.rs +0 -0
  247. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/components.rs +0 -0
  248. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/db_metadata.rs +0 -0
  249. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/mod.rs +0 -0
  250. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/setup/states.rs +0 -0
  251. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/db.rs +0 -0
  252. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/fingerprint.rs +0 -0
  253. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/immutable.rs +0 -0
  254. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/mod.rs +0 -0
  255. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/retryable.rs +0 -0
  256. {cocoindex-0.1.53 → cocoindex-0.1.55}/src/utils/yaml_ser.rs +0 -0
@@ -17,4 +17,4 @@ assignees: ''
17
17
 
18
18
  ---
19
19
  ❤️ Contributors, please refer to 📙[Contributing Guide](https://cocoindex.io/docs/about/contributing).
20
- Unless the PR can be sent immediately (e.g. just a few lines of code), we recommend you to leave a comment on the issue like **`I'm working on it`** or **`Can I work on this issue?`** to avoid duplicating work. Our [Discord server](https://discord.com/invite/zpA9S2DR7s) is always open and friendly.
20
+ Unless the PR can be sent immediately (e.g. just a few lines of code), we recommend you to leave a comment on the issue like **`I'm working on it`** or **`Can I work on this issue?`** to avoid duplicating work. Our [Discord server](https://discord.com/invite/zpA9S2DR7s) is always open and friendly.
@@ -19,4 +19,4 @@ else
19
19
  fi
20
20
 
21
21
  # Update Cargo.toml
22
- sed "${SED_INLINE[@]}" "s/^version = .*/version = \"$VERSION\"/" Cargo.toml
22
+ sed "${SED_INLINE[@]}" "s/^version = .*/version = \"$VERSION\"/" Cargo.toml
@@ -26,7 +26,19 @@ permissions:
26
26
  contents: read
27
27
 
28
28
  jobs:
29
- format-check:
29
+ rust-format-check:
30
+ name: Check Rust formatting
31
+ runs-on: ubuntu-latest
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+ - uses: dtolnay/rust-toolchain@stable
35
+ with:
36
+ components: rustfmt
37
+ - name: Check Rust formatting
38
+ run: |
39
+ cargo fmt --check
40
+
41
+ python-format-check:
30
42
  name: Check Python formatting
31
43
  runs-on: ubuntu-latest
32
44
  steps:
@@ -0,0 +1,71 @@
1
+ ci:
2
+ autofix_prs: false
3
+ autoupdate_schedule: 'monthly'
4
+
5
+ repos:
6
+ - repo: https://github.com/pre-commit/pre-commit-hooks
7
+ rev: v5.0.0
8
+ hooks:
9
+ - id: check-case-conflict
10
+ # Check for files with names that would conflict on a case-insensitive
11
+ # filesystem like MacOS HFS+ or Windows FAT.
12
+ - id: check-merge-conflict
13
+ # Check for files that contain merge conflict strings.
14
+ - id: check-symlinks
15
+ # Checks for symlinks which do not point to anything.
16
+ exclude: ".*(.github.*)$"
17
+ - id: detect-private-key
18
+ # Checks for the existence of private keys.
19
+ - id: end-of-file-fixer
20
+ # Makes sure files end in a newline and only a newline.
21
+ exclude: ".*(data.*|licenses.*|_static.*|\\.ya?ml|\\.jpe?g|\\.png|\\.svg|\\.webp)$"
22
+ - id: trailing-whitespace
23
+ # Trims trailing whitespace.
24
+ exclude_types: [python] # Covered by Ruff W291.
25
+ exclude: ".*(data.*|licenses.*|_static.*|\\.ya?ml|\\.jpe?g|\\.png|\\.svg|\\.webp)$"
26
+
27
+ - repo: local
28
+ hooks:
29
+ - id: maturin-develop
30
+ name: maturin develop
31
+ entry: maturin develop
32
+ language: system
33
+ files: ^(python/|src/|Cargo\.toml|pyproject\.toml)
34
+ pass_filenames: false
35
+
36
+ - id: cargo-fmt
37
+ name: cargo fmt
38
+ entry: cargo fmt
39
+ language: system
40
+ types: [rust]
41
+ pass_filenames: false
42
+
43
+ - id: cargo-test
44
+ name: cargo test
45
+ entry: cargo test
46
+ language: system
47
+ types: [rust]
48
+ pass_filenames: false
49
+
50
+ - id: mypy-check
51
+ name: mypy type check
52
+ entry: mypy
53
+ language: system
54
+ types: [python]
55
+ pass_filenames: false
56
+
57
+ - repo: https://github.com/astral-sh/ruff-pre-commit
58
+ rev: v0.12.0
59
+ hooks:
60
+ - id: ruff-format
61
+ types: [python]
62
+ pass_filenames: true
63
+
64
+ - repo: https://github.com/christophmeissner/pytest-pre-commit
65
+ rev: 1.0.0
66
+ hooks:
67
+ - id: pytest
68
+ language: system
69
+ types: [python]
70
+ pass_filenames: false
71
+ always_run: false
@@ -6,4 +6,4 @@
6
6
  ],
7
7
  "editor.formatOnSave": true,
8
8
  "python.formatting.provider": "ruff"
9
- }
9
+ }
@@ -1 +1 @@
1
- We love contributions from our community ❤️. Please check out our [contributing guide](https://cocoindex.io/docs/about/contributing).
1
+ We love contributions from our community ❤️. Please check out our [contributing guide](https://cocoindex.io/docs/about/contributing).
@@ -1040,7 +1040,7 @@ dependencies = [
1040
1040
 
1041
1041
  [[package]]
1042
1042
  name = "cocoindex"
1043
- version = "0.1.53"
1043
+ version = "0.1.55"
1044
1044
  dependencies = [
1045
1045
  "anyhow",
1046
1046
  "async-openai",
@@ -3293,6 +3293,7 @@ dependencies = [
3293
3293
  "pyo3-ffi",
3294
3294
  "pyo3-macros",
3295
3295
  "unindent",
3296
+ "uuid",
3296
3297
  ]
3297
3298
 
3298
3299
  [[package]]
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.53"
5
+ version = "0.1.55"
6
6
  edition = "2024"
7
7
  rust-version = "1.86"
8
8
 
@@ -15,7 +15,7 @@ name = "cocoindex_engine"
15
15
  crate-type = ["cdylib"]
16
16
 
17
17
  [dependencies]
18
- pyo3 = { version = "0.25.0", features = ["chrono", "auto-initialize"] }
18
+ pyo3 = { version = "0.25.0", features = ["chrono", "auto-initialize", "uuid"] }
19
19
  pythonize = "0.25.0"
20
20
  pyo3-async-runtimes = { version = "0.25.0", features = ["tokio-runtime"] }
21
21
 
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.53
3
+ Version: 0.1.55
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
7
7
  Requires-Dist: python-dotenv>=1.1.0
8
+ Requires-Dist: watchfiles>=1.1.0
8
9
  Requires-Dist: pytest ; extra == 'test'
9
10
  Requires-Dist: ruff ; extra == 'dev'
11
+ Requires-Dist: pre-commit ; extra == 'dev'
10
12
  Provides-Extra: test
11
13
  Provides-Extra: dev
12
14
  License-File: LICENSE
@@ -51,10 +53,10 @@ Unlike a workflow orchestration framework where data is usually opaque, in CocoI
51
53
 
52
54
  ```python
53
55
  # import
54
- data['content'] = flow_builder.add_source(...)
56
+ data['content'] = flow_builder.add_source(...)
55
57
 
56
58
  # transform
57
- data['out'] = data['content']
59
+ data['out'] = data['content']
58
60
  .transform(...)
59
61
  .transform(...)
60
62
 
@@ -75,17 +77,17 @@ As a data framework, CocoIndex takes it to the next level on data freshness. **I
75
77
  The frameworks takes care of
76
78
  - Change data capture.
77
79
  - Figure out what exactly needs to be updated, and only updating that without having to recompute everything.
78
-
80
+
79
81
  This makes it fast to reflect any source updates to the target store. If you have concerns with surfacing stale data to AI agents and are spending lots of efforts working on infra piece to optimize the latency, the framework actually handles it for you.
80
82
 
81
83
 
82
84
  ## Quick Start:
83
- If you're new to CocoIndex, we recommend checking out
85
+ If you're new to CocoIndex, we recommend checking out
84
86
  - 📖 [Documentation](https://cocoindex.io/docs)
85
87
  - ⚡ [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart)
86
- - 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
88
+ - 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
87
89
 
88
- ### Setup
90
+ ### Setup
89
91
 
90
92
  1. Install CocoIndex Python library
91
93
 
@@ -155,8 +157,8 @@ It defines an index flow like this:
155
157
  | [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
156
158
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
157
159
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
158
- | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
159
- | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
160
+ | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
161
+ | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
160
162
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
161
163
 
162
164
  More coming and stay tuned 👀!
@@ -178,7 +180,7 @@ Join our community here:
178
180
  - 📜 [Read our blog posts](https://cocoindex.io/blogs/)
179
181
 
180
182
  ## Support us:
181
- We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
183
+ We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
182
184
 
183
185
  ## License
184
186
  CocoIndex is Apache 2.0 licensed.
@@ -32,10 +32,10 @@ Unlike a workflow orchestration framework where data is usually opaque, in CocoI
32
32
 
33
33
  ```python
34
34
  # import
35
- data['content'] = flow_builder.add_source(...)
35
+ data['content'] = flow_builder.add_source(...)
36
36
 
37
37
  # transform
38
- data['out'] = data['content']
38
+ data['out'] = data['content']
39
39
  .transform(...)
40
40
  .transform(...)
41
41
 
@@ -56,17 +56,17 @@ As a data framework, CocoIndex takes it to the next level on data freshness. **I
56
56
  The frameworks takes care of
57
57
  - Change data capture.
58
58
  - Figure out what exactly needs to be updated, and only updating that without having to recompute everything.
59
-
59
+
60
60
  This makes it fast to reflect any source updates to the target store. If you have concerns with surfacing stale data to AI agents and are spending lots of efforts working on infra piece to optimize the latency, the framework actually handles it for you.
61
61
 
62
62
 
63
63
  ## Quick Start:
64
- If you're new to CocoIndex, we recommend checking out
64
+ If you're new to CocoIndex, we recommend checking out
65
65
  - 📖 [Documentation](https://cocoindex.io/docs)
66
66
  - ⚡ [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart)
67
- - 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
67
+ - 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
68
68
 
69
- ### Setup
69
+ ### Setup
70
70
 
71
71
  1. Install CocoIndex Python library
72
72
 
@@ -136,8 +136,8 @@ It defines an index flow like this:
136
136
  | [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
137
137
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
138
138
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
139
- | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
140
- | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
139
+ | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
140
+ | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
141
141
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
142
142
 
143
143
  More coming and stay tuned 👀!
@@ -159,7 +159,7 @@ Join our community here:
159
159
  - 📜 [Read our blog posts](https://cocoindex.io/blogs/)
160
160
 
161
161
  ## Support us:
162
- We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
162
+ We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
163
163
 
164
164
  ## License
165
165
  CocoIndex is Apache 2.0 licensed.
@@ -15,22 +15,22 @@ We use [GitHub Issues](https://github.com/cocoindex-io/cocoindex/issues) to trac
15
15
 
16
16
  We tag issues with the ["good first issue"](https://github.com/cocoindex-io/cocoindex/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) label for beginner contributors.
17
17
 
18
- ## How to Contribute
18
+ ## How to Contribute
19
19
  - If you decide to work on an issue, unless the PR can be sent immediately (e.g. just a few lines of code), we recommend you to leave a comment on the issue like **`I'm working on it`** or **`Can I work on this issue?`** to avoid duplicating work.
20
20
  - For larger features, we recommend you to discuss with us first in our [Discord server](https://discord.com/invite/zpA9S2DR7s) to coordinate the design and work.
21
21
  - Our [Discord server](https://discord.com/invite/zpA9S2DR7s) are constantly open. If you are unsure about anything, it is a good place to discuss! We'd love to collaborate and will always be friendly.
22
22
 
23
- ## Start hacking! Setting Up Development Environment
23
+ ## Start hacking! Setting Up Development Environment
24
24
  Following the steps below to get cocoindex build on latest codebase locally - if you are making changes to cocoindex funcionality and want to test it out.
25
25
 
26
26
  - 🦀 [Install Rust](https://rust-lang.org/tools/install)
27
-
27
+
28
28
  If you don't have Rust installed, run
29
29
  ```sh
30
30
  curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
31
31
  ```
32
- Already have Rust? Make sure it's up to date
33
- ```sh
32
+ Already have Rust? Make sure it's up to date
33
+ ```sh
34
34
  rustup update
35
35
  ```
36
36
 
@@ -46,7 +46,7 @@ Following the steps below to get cocoindex build on latest codebase locally - if
46
46
 
47
47
  - Install required tools:
48
48
  ```sh
49
- pip install maturin mypy ruff
49
+ pip install maturin mypy pre-commit
50
50
  ```
51
51
 
52
52
  - Build the library. Run at the root of cocoindex directory:
@@ -54,6 +54,11 @@ Following the steps below to get cocoindex build on latest codebase locally - if
54
54
  maturin develop
55
55
  ```
56
56
 
57
+ - Install and enable pre-commit hooks. This ensures all checks run automatically before each commit:
58
+ ```sh
59
+ pre-commit install
60
+ ```
61
+
57
62
  - Before running a specific example, set extra environment variables, for exposing extra traces, allowing dev UI, etc.
58
63
  ```sh
59
64
  . ./.env.lib_debug
@@ -67,10 +72,14 @@ To submit your code:
67
72
  1. Fork the [CocoIndex repository](https://github.com/cocoindex-io/cocoindex)
68
73
  2. [Create a new branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop) on your fork
69
74
  3. Make your changes
70
- 4. Make sure all tests and linting pass by running
71
- ```sh
72
- ./check.sh
73
- ```
75
+ 4. Run the pre-commit checks (automatically triggered on `git commit`)
76
+
77
+ :::tip
78
+ To run them manually (same as CI):
79
+ ```sh
80
+ pre-commit run --all-files
81
+ ```
82
+ :::
74
83
 
75
84
  5. [Open a Pull Request (PR)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) when your work is ready for review
76
85
 
@@ -0,0 +1,309 @@
1
+ ---
2
+ title: LLM Support
3
+ description: LLMs integrated with CocoIndex for various built-in functions
4
+ ---
5
+
6
+ import Tabs from '@theme/Tabs';
7
+ import TabItem from '@theme/TabItem';
8
+
9
+ CocoIndex provides builtin functions integrating with various LLM APIs, for various inference tasks:
10
+ * [Text Generation](#text-generation): use LLM to generate text.
11
+ * [Text Embedding](#text-embedding): embed text into a vector space.
12
+
13
+ ## LLM API Types
14
+
15
+ We support integrating with LLM with different types of APIs.
16
+ Each LLM API type is specified by a `cocoindex.LlmApiType` enum.
17
+
18
+ We support the following types of LLM APIs:
19
+
20
+ | API Name | `LlmApiType` enum | Text Generation | Text Embedding |
21
+ |----------|---------------------|--------------------|--------------------|
22
+ | [OpenAI](#openai) | `LlmApiType.OPENAI` | ✅ | ✅ |
23
+ | [Ollama](#ollama) | `LlmApiType.OLLAMA` | ✅ | ❌ |
24
+ | [Google Gemini](#google-gemini) | `LlmApiType.GEMINI` | ✅ | ✅ |
25
+ | [Anthropic](#anthropic) | `LlmApiType.ANTHROPIC` | ✅ | ❌ |
26
+ | [Voyage](#voyage) | `LlmApiType.VOYAGE` | ❌ | ✅ |
27
+ | [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` | ✅ | ❌ |
28
+ | [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` | ✅ | ❌ |
29
+
30
+ ## LLM Tasks
31
+
32
+ ### Text Generation
33
+
34
+ Generation is used as a building block for certain CocoIndex functions that process data using LLM generation.
35
+
36
+ We have one builtin functions using LLM generation for now:
37
+
38
+ * [`ExtractByLlm`](/docs/ops/functions#extractbyllm): it extracts information from input text.
39
+
40
+ #### LLM Spec
41
+
42
+ When calling a CocoIndex function that uses LLM generation, you need to provide a `cocoindex.LlmSpec` dataclass, to configure the LLM you want to use in these functions.
43
+ It has the following fields:
44
+
45
+ * `api_type` (type: [`cocoindex.LlmApiType`](/docs/ai/llm#llm-api-types), required): The type of integrated LLM API to use, e.g. `cocoindex.LlmApiType.OPENAI` or `cocoindex.LlmApiType.OLLAMA`.
46
+ See supported LLM APIs in the [LLM API integrations](#llm-api-integrations) section below.
47
+ * `model` (type: `str`, required): The name of the LLM model to use.
48
+ * `address` (type: `str`, optional): The address of the LLM API.
49
+
50
+
51
+ ### Text Embedding
52
+
53
+ Embedding means converting text into a vector space, usually for similarity matching.
54
+
55
+ We provide a builtin function [`EmbedText`](/docs/ops/functions#embedtext) that converts a given text into a vector space.
56
+ The spec takes the following fields:
57
+
58
+ * `api_type` (type: `cocoindex.LlmApiType`, required)
59
+ * `model` (type: `str`, required)
60
+ * `address` (type: `str`, optional)
61
+ * `output_dimension` (type: `int`, optional)
62
+ * `task_type` (type: `str`, optional)
63
+
64
+ See documentation for [`EmbedText`](/docs/ops/functions#embedtext) for more details about these fields.
65
+
66
+ ## LLM API Integrations
67
+
68
+ CocoIndex integrates with various LLM APIs for these functions.
69
+
70
+ ### OpenAI
71
+
72
+ To use the OpenAI LLM API, you need to set the environment variable `OPENAI_API_KEY`.
73
+ You can generate the API key from [OpenAI Dashboard](https://platform.openai.com/api-keys).
74
+
75
+ Currently we don't support custom address for OpenAI API.
76
+
77
+ You can find the full list of models supported by OpenAI [here](https://platform.openai.com/docs/models).
78
+
79
+ For text generation, a spec for OpenAI looks like this:
80
+
81
+ <Tabs>
82
+ <TabItem value="python" label="Python" default>
83
+
84
+ ```python
85
+ cocoindex.LlmSpec(
86
+ api_type=cocoindex.LlmApiType.OPENAI,
87
+ model="gpt-4o",
88
+ )
89
+ ```
90
+
91
+ </TabItem>
92
+ </Tabs>
93
+
94
+ For text embedding, a spec for OpenAI looks like this:
95
+
96
+ <Tabs>
97
+ <TabItem value="python" label="Python" default>
98
+
99
+ ```python
100
+ cocoindex.functions.EmbedText(
101
+ api_type=cocoindex.LlmApiType.OPENAI,
102
+ model="text-embedding-3-small",
103
+ )
104
+ ```
105
+
106
+ </TabItem>
107
+ </Tabs>
108
+
109
+ ### Ollama
110
+
111
+ [Ollama](https://ollama.com/) allows you to run LLM models on your local machine easily. To get started:
112
+
113
+ * [Download](https://ollama.com/download) and install Ollama.
114
+ * Pull your favorite LLM models by the `ollama pull` command, e.g.
115
+ ```bash
116
+ ollama pull llama3.2
117
+ ```
118
+ You can find the [list of models](https://ollama.com/library) supported by Ollama.
119
+
120
+ A spec for Ollama looks like this:
121
+
122
+ <Tabs>
123
+ <TabItem value="python" label="Python" default>
124
+
125
+ ```python
126
+ cocoindex.LlmSpec(
127
+ api_type=cocoindex.LlmApiType.OLLAMA,
128
+ model="llama3.2:latest",
129
+ # Optional, use Ollama's default port (11434) on localhost if not specified
130
+ address="http://localhost:11434",
131
+ )
132
+ ```
133
+
134
+ </TabItem>
135
+ </Tabs>
136
+
137
+ ### Google Gemini
138
+
139
+ To use the Gemini LLM API, you need to set the environment variable `GEMINI_API_KEY`.
140
+ You can generate the API key from [Google AI Studio](https://aistudio.google.com/apikey).
141
+
142
+ You can find the full list of models supported by Gemini [here](https://ai.google.dev/gemini-api/docs/models).
143
+
144
+ For text generation, a spec looks like this:
145
+
146
+ <Tabs>
147
+ <TabItem value="python" label="Python" default>
148
+
149
+ ```python
150
+ cocoindex.LlmSpec(
151
+ api_type=cocoindex.LlmApiType.GEMINI,
152
+ model="gemini-2.0-flash",
153
+ )
154
+ ```
155
+
156
+ </TabItem>
157
+ </Tabs>
158
+
159
+ For text embedding, a spec looks like this:
160
+
161
+ <Tabs>
162
+ <TabItem value="python" label="Python" default>
163
+
164
+ ```python
165
+ cocoindex.functions.EmbedText(
166
+ api_type=cocoindex.LlmApiType.GEMINI,
167
+ model="text-embedding-004",
168
+ task_type="SEMANTICS_SIMILARITY",
169
+ )
170
+ ```
171
+
172
+ All supported embedding models can be found [here](https://ai.google.dev/gemini-api/docs/embeddings#embeddings-models).
173
+ Gemini supports task type (optional), which can be found [here](https://ai.google.dev/gemini-api/docs/embeddings#supported-task-types).
174
+
175
+
176
+ </TabItem>
177
+ </Tabs>
178
+
179
+ ### Anthropic
180
+
181
+ To use the Anthropic LLM API, you need to set the environment variable `ANTHROPIC_API_KEY`.
182
+ You can generate the API key from [Anthropic API](https://console.anthropic.com/settings/keys).
183
+
184
+ A text generation spec for Anthropic looks like this:
185
+
186
+ <Tabs>
187
+ <TabItem value="python" label="Python" default>
188
+
189
+ ```python
190
+ cocoindex.LlmSpec(
191
+ api_type=cocoindex.LlmApiType.ANTHROPIC,
192
+ model="claude-3-5-sonnet-latest",
193
+ )
194
+ ```
195
+
196
+ </TabItem>
197
+ </Tabs>
198
+
199
+ You can find the full list of models supported by Anthropic [here](https://docs.anthropic.com/en/docs/about-claude/models/all-models).
200
+
201
+ ### Voyage
202
+
203
+ To use the Voyage LLM API, you need to set the environment variable `VOYAGE_API_KEY`.
204
+ You can generate the API key from [Voyage dashboard](https://dashboard.voyageai.com/organization/api-keys).
205
+
206
+ A text embedding spec for Voyage looks like this:
207
+
208
+ <Tabs>
209
+ <TabItem value="python" label="Python" default>
210
+
211
+ ```python
212
+ cocoindex.functions.EmbedText(
213
+ api_type=cocoindex.LlmApiType.VOYAGE,
214
+ model="voyage-code-3",
215
+ task_type="document",
216
+ )
217
+ ```
218
+
219
+ </TabItem>
220
+ </Tabs>
221
+
222
+ Voyage API supports `document` and `query` as task types (optional, a.k.a. `input_type` in Voyage API, see [Voyage API documentation](https://docs.voyageai.com/reference/embeddings-api) for details).
223
+
224
+ ### LiteLLM
225
+
226
+ To use the LiteLLM API, you need to set the environment variable `LITELLM_API_KEY`.
227
+
228
+ #### 1. Install LiteLLM Proxy
229
+
230
+ ```bash
231
+ pip install 'litellm[proxy]'
232
+ ```
233
+
234
+ #### 2. Create a `config.yml` for LiteLLM
235
+
236
+ **Example for DeepSeek:**
237
+
238
+ Use this in your `config.yml`:
239
+
240
+ ```yaml
241
+ model_list:
242
+ - model_name: deepseek-chat
243
+ litellm_params:
244
+ model: deepseek/deepseek-chat
245
+ api_key: os.environ/DEEPSEEK_API_KEY
246
+ ```
247
+
248
+ You need to set the environment variable `DEEPSEEK_API_KEY` to your DeepSeek API key.
249
+
250
+ **Example for Groq:**
251
+
252
+ Use this in your `config.yml`:
253
+
254
+ ```yaml
255
+ model_list:
256
+ - model_name: groq-llama-3.3-70b-versatile
257
+ litellm_params:
258
+ model: groq/llama-3.3-70b-versatile
259
+ api_key: "os.environ/GROQ_API_KEY"
260
+ ```
261
+
262
+ You need to set the environment variable `GROQ_API_KEY` to your Groq API key.
263
+
264
+
265
+ #### 3. Run LiteLLM Proxy
266
+
267
+ ```bash
268
+ litellm --config config.yml
269
+ ```
270
+
271
+ #### 4. A Spec for LiteLLM will look like this:
272
+
273
+ <Tabs>
274
+ <TabItem value="python" label="Python" default>
275
+
276
+ ```python
277
+ cocoindex.LlmSpec(
278
+ api_type=cocoindex.LlmApiType.LITE_LLM,
279
+ model="deepseek-chat",
280
+ address="http://127.0.0.1:4000", # default url of LiteLLM
281
+ )
282
+ ```
283
+
284
+ </TabItem>
285
+ </Tabs>
286
+
287
+ You can find the full list of models supported by LiteLLM [here](https://docs.litellm.ai/docs/providers).
288
+
289
+ ### OpenRouter
290
+
291
+ To use the OpenRouter API, you need to set the environment variable `OPENROUTER_API_KEY`.
292
+ You can generate the API key from [here](https://openrouter.ai/settings/keys).
293
+
294
+ A spec for OpenRouter looks like this:
295
+
296
+ <Tabs>
297
+ <TabItem value="python" label="Python" default>
298
+
299
+ ```python
300
+ cocoindex.LlmSpec(
301
+ api_type=cocoindex.LlmApiType.OPEN_ROUTER,
302
+ model="deepseek/deepseek-r1:free",
303
+ )
304
+ ```
305
+
306
+ </TabItem>
307
+ </Tabs>
308
+
309
+ You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
@@ -71,7 +71,7 @@ An indexing flow, once set up, maintains a long-lived relationship between data
71
71
 
72
72
  * **One time update**: Once triggered, CocoIndex updates the target data to reflect the version of source data up to the current moment.
73
73
  * **Live update**: CocoIndex continuously reacts to changes of source data and updates the target data accordingly, based on various **change capture mechanisms** for the source.
74
-
74
+
75
75
  See more details in the [build / update target data](flow_methods#build--update-target-data) section.
76
76
 
77
77
  3. CocoIndex intelligently reprocesses to propagate source changes to target by:
@@ -101,4 +101,4 @@ As an indexing flow is long-lived, it needs to store intermediate data to keep t
101
101
  CocoIndex uses internal storage for this purpose.
102
102
 
103
103
  Currently, CocoIndex uses Postgres database as the internal storage.
104
- See [Settings](settings#databaseconnectionspec) for configuring its location, and `cocoindex setup` CLI command (see [CocoIndex CLI](cli)) creates tables for the internal storage.
104
+ See [Settings](settings#databaseconnectionspec) for configuring its location, and `cocoindex setup` CLI command (see [CocoIndex CLI](cli)) creates tables for the internal storage.
@@ -72,4 +72,4 @@ Use `--help` to see the full list of subcommands, and `subcommand --help` to see
72
72
  ```sh
73
73
  cocoindex --help # Show all subcommands
74
74
  cocoindex show --help # Show usage of "show" subcommand
75
- ```
75
+ ```