cocoindex 0.1.68__tar.gz → 0.1.70__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. {cocoindex-0.1.68 → cocoindex-0.1.70}/Cargo.lock +12 -1
  2. {cocoindex-0.1.68 → cocoindex-0.1.70}/Cargo.toml +4 -1
  3. {cocoindex-0.1.68 → cocoindex-0.1.70}/PKG-INFO +1 -1
  4. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/about/community.md +1 -1
  5. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/about/contributing.md +2 -2
  6. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/flow_def.mdx +41 -10
  7. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/flow_methods.mdx +4 -4
  8. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/getting_started/quickstart.md +6 -6
  9. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/ops/functions.md +43 -4
  10. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/ops/sources.md +33 -21
  11. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/ops/targets.md +1 -1
  12. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/amazon_s3_embedding/main.py +0 -6
  13. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/__init__.py +2 -1
  14. cocoindex-0.1.70/python/cocoindex/auth_registry.py +51 -0
  15. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/sources.py +9 -0
  16. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/live_updater.rs +53 -29
  17. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/mod.rs +4 -5
  18. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/ollama.rs +75 -1
  19. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/split_recursively.rs +8 -1
  20. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sources/azure_blob.rs +20 -8
  21. cocoindex-0.1.68/python/cocoindex/auth_registry.py +0 -29
  22. {cocoindex-0.1.68 → cocoindex-0.1.70}/.cargo/config.toml +0 -0
  23. {cocoindex-0.1.68 → cocoindex-0.1.70}/.env.lib_debug +0 -0
  24. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  25. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  26. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/scripts/update_version.sh +0 -0
  27. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/CI.yml +0 -0
  28. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/_doc_release.yml +0 -0
  29. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/_test.yml +0 -0
  30. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/docs.yml +0 -0
  31. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/format.yml +0 -0
  32. {cocoindex-0.1.68 → cocoindex-0.1.70}/.github/workflows/release.yml +0 -0
  33. {cocoindex-0.1.68 → cocoindex-0.1.70}/.gitignore +0 -0
  34. {cocoindex-0.1.68 → cocoindex-0.1.70}/.pre-commit-config.yaml +0 -0
  35. {cocoindex-0.1.68 → cocoindex-0.1.70}/CODE_OF_CONDUCT.md +0 -0
  36. {cocoindex-0.1.68 → cocoindex-0.1.70}/CONTRIBUTING.md +0 -0
  37. {cocoindex-0.1.68 → cocoindex-0.1.70}/LICENSE +0 -0
  38. {cocoindex-0.1.68 → cocoindex-0.1.70}/README.md +0 -0
  39. {cocoindex-0.1.68 → cocoindex-0.1.70}/dev/neo4j.yaml +0 -0
  40. {cocoindex-0.1.68 → cocoindex-0.1.70}/dev/postgres.yaml +0 -0
  41. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/.gitignore +0 -0
  42. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/README.md +0 -0
  43. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/ai/llm.mdx +0 -0
  44. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/basics.md +0 -0
  45. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/cli.mdx +0 -0
  46. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/custom_function.mdx +0 -0
  47. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/data_example.svg +0 -0
  48. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/data_types.mdx +0 -0
  49. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/flow_example.svg +0 -0
  50. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/core/settings.mdx +0 -0
  51. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/getting_started/installation.md +0 -0
  52. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/getting_started/markdown_files.zip +0 -0
  53. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/getting_started/overview.md +0 -0
  54. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docs/query.mdx +0 -0
  55. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/docusaurus.config.ts +0 -0
  56. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/package.json +0 -0
  57. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/sidebars.ts +0 -0
  58. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  59. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  60. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/src/css/custom.css +0 -0
  61. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/src/theme/Root.js +0 -0
  62. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/.nojekyll +0 -0
  63. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/img/docusaurus.png +0 -0
  64. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/img/favicon.ico +0 -0
  65. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/img/icon.svg +0 -0
  66. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/img/incremental-etl.gif +0 -0
  67. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/static/robots.txt +0 -0
  68. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/tsconfig.json +0 -0
  69. {cocoindex-0.1.68 → cocoindex-0.1.70}/docs/yarn.lock +0 -0
  70. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/amazon_s3_embedding/.env.example +0 -0
  71. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/amazon_s3_embedding/.gitignore +0 -0
  72. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/amazon_s3_embedding/README.md +0 -0
  73. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  74. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/azure_blob_embedding/.env.example +0 -0
  75. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/azure_blob_embedding/.gitignore +0 -0
  76. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/azure_blob_embedding/README.md +0 -0
  77. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/azure_blob_embedding/main.py +0 -0
  78. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/azure_blob_embedding/pyproject.toml +0 -0
  79. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/code_embedding/.env +0 -0
  80. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/code_embedding/README.md +0 -0
  81. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/code_embedding/main.py +0 -0
  82. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/code_embedding/pyproject.toml +0 -0
  83. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/docs_to_knowledge_graph/.env +0 -0
  84. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/docs_to_knowledge_graph/README.md +0 -0
  85. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/docs_to_knowledge_graph/main.py +0 -0
  86. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  87. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/.dockerignore +0 -0
  88. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/.env +0 -0
  89. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/README.md +0 -0
  90. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/compose.yaml +0 -0
  91. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/dockerfile +0 -0
  92. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  93. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/main.py +0 -0
  94. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/fastapi_server_docker/requirements.txt +0 -0
  95. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/gdrive_text_embedding/.env.example +0 -0
  96. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/gdrive_text_embedding/.gitignore +0 -0
  97. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/gdrive_text_embedding/README.md +0 -0
  98. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/gdrive_text_embedding/main.py +0 -0
  99. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  100. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/.env +0 -0
  101. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/README.md +0 -0
  102. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/.gitignore +0 -0
  103. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/index.html +0 -0
  104. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/package-lock.json +0 -0
  105. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/package.json +0 -0
  106. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/src/App.jsx +0 -0
  107. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/src/main.jsx +0 -0
  108. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/src/style.css +0 -0
  109. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/frontend/vite.config.js +0 -0
  110. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/img/cat1.jpeg +0 -0
  111. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/img/dog1.jpeg +0 -0
  112. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/img/elephant1.jpg +0 -0
  113. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/img/giraffe.jpg +0 -0
  114. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/main.py +0 -0
  115. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/image_search/pyproject.toml +0 -0
  116. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/.env +0 -0
  117. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/README.md +0 -0
  118. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/main.py +0 -0
  119. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  120. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  121. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  122. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  123. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  124. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/.env.example +0 -0
  125. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/.gitignore +0 -0
  126. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/README.md +0 -0
  127. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/main.py +0 -0
  128. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/papers/1706.03762v7.pdf +0 -0
  129. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/papers/1810.04805v2.pdf +0 -0
  130. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/papers/2502.06786v3.pdf +0 -0
  131. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/papers/2502.20346v1.pdf +0 -0
  132. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/paper_metadata/pyproject.toml +0 -0
  133. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/.env.example +0 -0
  134. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/README.md +0 -0
  135. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/data/README.md +0 -0
  136. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  137. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  138. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  139. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  140. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/main.py +0 -0
  141. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/patient_intake_extraction/pyproject.toml +0 -0
  142. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/.env +0 -0
  143. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/README.md +0 -0
  144. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/main.py +0 -0
  145. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  146. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  147. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  148. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/pdf_embedding/pyproject.toml +0 -0
  149. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/.env +0 -0
  150. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/README.md +0 -0
  151. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/img/cocoinsight.png +0 -0
  152. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/img/neo4j.png +0 -0
  153. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/main.py +0 -0
  154. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p1.json +0 -0
  155. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p2.json +0 -0
  156. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p3.json +0 -0
  157. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p4.json +0 -0
  158. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p5.json +0 -0
  159. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p6.json +0 -0
  160. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p7.json +0 -0
  161. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p8.json +0 -0
  162. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/products/p9.json +0 -0
  163. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/product_recommendation/pyproject.toml +0 -0
  164. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/.env +0 -0
  165. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/README.md +0 -0
  166. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  167. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/main.py +0 -0
  168. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  169. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  170. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  171. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding/pyproject.toml +0 -0
  172. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding_qdrant/.env +0 -0
  173. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding_qdrant/README.md +0 -0
  174. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding_qdrant/main.py +0 -0
  175. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  176. {cocoindex-0.1.68 → cocoindex-0.1.70}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  177. {cocoindex-0.1.68 → cocoindex-0.1.70}/pyproject.toml +0 -0
  178. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/cli.py +0 -0
  179. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/convert.py +0 -0
  180. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/flow.py +0 -0
  181. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/functions.py +0 -0
  182. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/index.py +0 -0
  183. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/lib.py +0 -0
  184. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/llm.py +0 -0
  185. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/op.py +0 -0
  186. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/py.typed +0 -0
  187. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/runtime.py +0 -0
  188. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/setting.py +0 -0
  189. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/setup.py +0 -0
  190. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/targets.py +0 -0
  191. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/tests/__init__.py +0 -0
  192. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/tests/test_convert.py +0 -0
  193. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/tests/test_optional_database.py +0 -0
  194. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/tests/test_typing.py +0 -0
  195. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/typing.py +0 -0
  196. {cocoindex-0.1.68 → cocoindex-0.1.70}/python/cocoindex/utils.py +0 -0
  197. {cocoindex-0.1.68 → cocoindex-0.1.70}/ruff.toml +0 -0
  198. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/duration.rs +0 -0
  199. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/field_attrs.rs +0 -0
  200. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/json_schema.rs +0 -0
  201. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/mod.rs +0 -0
  202. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/schema.rs +0 -0
  203. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/spec.rs +0 -0
  204. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/base/value.rs +0 -0
  205. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/analyzed_flow.rs +0 -0
  206. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/analyzer.rs +0 -0
  207. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/exec_ctx.rs +0 -0
  208. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/flow_builder.rs +0 -0
  209. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/mod.rs +0 -0
  210. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/builder/plan.rs +0 -0
  211. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/db_tracking.rs +0 -0
  212. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/db_tracking_setup.rs +0 -0
  213. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/dumper.rs +0 -0
  214. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/evaluator.rs +0 -0
  215. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/indexing_status.rs +0 -0
  216. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/memoization.rs +0 -0
  217. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/mod.rs +0 -0
  218. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/row_indexer.rs +0 -0
  219. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/source_indexer.rs +0 -0
  220. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/execution/stats.rs +0 -0
  221. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/lib.rs +0 -0
  222. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/lib_context.rs +0 -0
  223. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/anthropic.rs +0 -0
  224. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/gemini.rs +0 -0
  225. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/litellm.rs +0 -0
  226. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/openai.rs +0 -0
  227. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/openrouter.rs +0 -0
  228. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/vertex_ai.rs +0 -0
  229. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/vllm.rs +0 -0
  230. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/llm/voyage.rs +0 -0
  231. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/factory_bases.rs +0 -0
  232. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/embed_text.rs +0 -0
  233. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/extract_by_llm.rs +0 -0
  234. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/mod.rs +0 -0
  235. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/parse_json.rs +0 -0
  236. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/functions/test_utils.rs +0 -0
  237. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/interface.rs +0 -0
  238. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/mod.rs +0 -0
  239. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/py_factory.rs +0 -0
  240. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/registration.rs +0 -0
  241. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/registry.rs +0 -0
  242. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sdk.rs +0 -0
  243. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sources/amazon_s3.rs +0 -0
  244. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sources/google_drive.rs +0 -0
  245. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sources/local_file.rs +0 -0
  246. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/sources/mod.rs +0 -0
  247. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/kuzu.rs +0 -0
  248. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/mod.rs +0 -0
  249. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/neo4j.rs +0 -0
  250. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/postgres.rs +0 -0
  251. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/qdrant.rs +0 -0
  252. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/shared/mod.rs +0 -0
  253. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/shared/property_graph.rs +0 -0
  254. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/ops/targets/shared/table_columns.rs +0 -0
  255. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/prelude.rs +0 -0
  256. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/py/convert.rs +0 -0
  257. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/py/mod.rs +0 -0
  258. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/server.rs +0 -0
  259. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/service/error.rs +0 -0
  260. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/service/flows.rs +0 -0
  261. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/service/mod.rs +0 -0
  262. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/settings.rs +0 -0
  263. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/auth_registry.rs +0 -0
  264. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/components.rs +0 -0
  265. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/db_metadata.rs +0 -0
  266. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/driver.rs +0 -0
  267. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/mod.rs +0 -0
  268. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/setup/states.rs +0 -0
  269. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/concur_control.rs +0 -0
  270. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/db.rs +0 -0
  271. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/fingerprint.rs +0 -0
  272. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/immutable.rs +0 -0
  273. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/mod.rs +0 -0
  274. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/retryable.rs +0 -0
  275. {cocoindex-0.1.68 → cocoindex-0.1.70}/src/utils/yaml_ser.rs +0 -0
@@ -1297,7 +1297,7 @@ dependencies = [
1297
1297
 
1298
1298
  [[package]]
1299
1299
  name = "cocoindex"
1300
- version = "0.1.68"
1300
+ version = "0.1.70"
1301
1301
  dependencies = [
1302
1302
  "anyhow",
1303
1303
  "async-openai",
@@ -1369,6 +1369,7 @@ dependencies = [
1369
1369
  "tree-sitter-java",
1370
1370
  "tree-sitter-javascript",
1371
1371
  "tree-sitter-json",
1372
+ "tree-sitter-kotlin-ng",
1372
1373
  "tree-sitter-language",
1373
1374
  "tree-sitter-md",
1374
1375
  "tree-sitter-pascal",
@@ -5724,6 +5725,16 @@ dependencies = [
5724
5725
  "tree-sitter-language",
5725
5726
  ]
5726
5727
 
5728
+ [[package]]
5729
+ name = "tree-sitter-kotlin-ng"
5730
+ version = "1.1.0"
5731
+ source = "registry+https://github.com/rust-lang/crates.io-index"
5732
+ checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450"
5733
+ dependencies = [
5734
+ "cc",
5735
+ "tree-sitter-language",
5736
+ ]
5737
+
5727
5738
  [[package]]
5728
5739
  name = "tree-sitter-language"
5729
5740
  version = "0.1.5"
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.68"
5
+ version = "0.1.70"
6
6
  edition = "2024"
7
7
  rust-version = "1.88"
8
8
 
@@ -63,6 +63,7 @@ reqwest = { version = "0.12.15", default-features = false, features = [
63
63
  "rustls-tls",
64
64
  ] }
65
65
  async-openai = "0.28.0"
66
+
66
67
  tree-sitter = "0.25.3"
67
68
  tree-sitter-language = "0.1.5"
68
69
  # Per language tree-sitter parsers
@@ -76,6 +77,8 @@ tree-sitter-html = "0.23.2"
76
77
  tree-sitter-java = "0.23.5"
77
78
  tree-sitter-javascript = "0.23.1"
78
79
  tree-sitter-json = "0.24.8"
80
+ # The other more popular crate tree-sitter-kotlin requires tree-sitter < 0.23 for now
81
+ tree-sitter-kotlin-ng = "1.1.0"
79
82
  tree-sitter-md = "0.3.2"
80
83
  tree-sitter-pascal = "0.10.0"
81
84
  tree-sitter-php = "0.23.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.68
3
+ Version: 0.1.70
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -9,7 +9,7 @@ Welcome with a huge coconut hug 🥥⋆。˚🤗.
9
9
 
10
10
  We are super excited for community contributions of all kinds - whether it's code improvements, documentation updates, issue reports, feature requests on [GitHub](https://github.com/cocoindex-io/cocoindex), and discussions in our [Discord](https://discord.com/invite/zpA9S2DR7s).
11
11
 
12
- We would love to fostering an inclusive, welcoming, and supportive environment. Contributing to CocoIndex should feel collaborative, friendly and enjoyable for everyone. Together, we can build better AI applications through robust data infrastructure.
12
+ We would love to foster an inclusive, welcoming, and supportive environment. Contributing to CocoIndex should feel collaborative, friendly and enjoyable for everyone. Together, we can build better AI applications through robust data infrastructure.
13
13
 
14
14
  :::tip Start hacking CocoIndex
15
15
  Check out our [Contributing guide](./contributing) to get started!
@@ -18,10 +18,10 @@ We tag issues with the ["good first issue"](https://github.com/cocoindex-io/coco
18
18
  ## How to Contribute
19
19
  - If you decide to work on an issue, unless the PR can be sent immediately (e.g. just a few lines of code), we recommend you to leave a comment on the issue like **`I'm working on it`** or **`Can I work on this issue?`** to avoid duplicating work.
20
20
  - For larger features, we recommend you to discuss with us first in our [Discord server](https://discord.com/invite/zpA9S2DR7s) to coordinate the design and work.
21
- - Our [Discord server](https://discord.com/invite/zpA9S2DR7s) are constantly open. If you are unsure about anything, it is a good place to discuss! We'd love to collaborate and will always be friendly.
21
+ - Our [Discord server](https://discord.com/invite/zpA9S2DR7s) is constantly open. If you are unsure about anything, it is a good place to discuss! We'd love to collaborate and will always be friendly.
22
22
 
23
23
  ## Start hacking! Setting Up Development Environment
24
- Following the steps below to get cocoindex build on latest codebase locally - if you are making changes to cocoindex funcionality and want to test it out.
24
+ Follow the steps below to get cocoindex built on the latest codebase locally - if you are making changes to cocoindex functionality and want to test it out.
25
25
 
26
26
  - 🦀 [Install Rust](https://rust-lang.org/tools/install)
27
27
 
@@ -416,22 +416,28 @@ flow_builder.declare(
416
416
  ### Auth Registry
417
417
 
418
418
  CocoIndex manages an auth registry. It's an in-memory key-value store, mainly to store authentication information for a backend.
419
+ It's usually used for targets, where key stability is important for backend cleanup.
419
420
 
420
- Operation spec is the default way to configure a persistent backend. But it has the following limitations:
421
+ Operation spec is the default way to configure sources, functions and targets. But it has the following limitations:
421
422
 
422
423
  * The spec isn't supposed to contain secret information, and it's frequently shown in various places, e.g. `cocoindex show`.
423
- * Once an operation is removed after flow definition code change, the spec is also gone.
424
- But we still need to be able to drop the backend (e.g. a table) when [setup / drop flow](/docs/core/flow_methods#setup--drop-flow).
424
+ * For targets, once an operation is removed after flow definition code change, the spec is also gone.
425
+ But we still need to be able to drop the persistent backend (e.g. a table) when [setup / drop flow](/docs/core/flow_methods#setup--drop-flow).
425
426
 
426
- Auth registry is introduced to solve the problems above. It works as follows:
427
+ Auth registry is introduced to solve the problems above.
427
428
 
428
- * You can create new **auth entry** by a key and a value.
429
- * You can references the entry by the key, and pass it as part of spec for certain operations. e.g. `Neo4j` takes `connection` field in the form of auth entry reference.
429
+
430
+ #### Auth Entry
431
+
432
+ An auth entry is an entry in the auth registry with an explicit key.
433
+
434
+ * You can create new *auth entry* by a key and a value.
435
+ * You can reference the entry by the key, and pass it as part of spec for certain operations. e.g. `Neo4j` takes `connection` field in the form of auth entry reference.
430
436
 
431
437
  <Tabs>
432
438
  <TabItem value="python" label="Python" default>
433
439
 
434
- You can add an auth entry by `cocoindex.add_auth_entry()` function, which returns a `cocoindex.AuthEntryReference`:
440
+ You can add an auth entry by `cocoindex.add_auth_entry()` function, which returns a `cocoindex.AuthEntryReference[T]`:
435
441
 
436
442
  ```python
437
443
  my_graph_conn = cocoindex.add_auth_entry(
@@ -445,7 +451,7 @@ my_graph_conn = cocoindex.add_auth_entry(
445
451
 
446
452
  Then reference it when building a spec that takes an auth entry:
447
453
 
448
- * You can either reference by the `AuthEntryReference` object directly:
454
+ * You can either reference by the `AuthEntryReference[T]` object directly:
449
455
 
450
456
  ```python
451
457
  demo_collector.export(
@@ -468,7 +474,32 @@ Then reference it when building a spec that takes an auth entry:
468
474
  Note that CocoIndex backends use the key of an auth entry to identify the backend.
469
475
 
470
476
  * Keep the key stable.
471
- If the key doesn't change, it's considered to be the same backend (even if the underlying way to connect/authenticate change).
477
+ If the key doesn't change, it's considered to be the same backend (even if the underlying way to connect/authenticate changes).
472
478
 
473
479
  * If a key is no longer referenced in any operation spec, keep it until the next flow setup / drop action,
474
- so that cocoindex will be able to clean up the backends.
480
+ so that CocoIndex will be able to clean up the backends.
481
+
482
+ #### Transient Auth Entry
483
+
484
+ A transient auth entry is an entry in the auth registry with an automatically generated key.
485
+ It's usually used for sources and functions, where key stability is not important.
486
+
487
+ <Tabs>
488
+ <TabItem value="python" label="Python" default>
489
+
490
+ You can create a new *transient auth entry* by `cocoindex.add_transient_auth_entry()` function, which returns a `cocoindex.TransientAuthEntryReference[T]`, and pass it to a source or function spec that takes it, e.g.
491
+
492
+ ```python
493
+ flow_builder.add_source(
494
+ cocoindex.sources.AzureBlob(
495
+ ...
496
+ sas_token=cocoindex.add_transient_auth_entry("...")
497
+ )
498
+ )
499
+ ```
500
+
501
+
502
+ </TabItem>
503
+ </Tabs>
504
+
505
+ Whenever a `TransientAuthEntryReference[T]` is expected, you can also pass a `AuthEntryReference[T]` instead, as `AuthEntryReference[T]` is a subtype of `TransientAuthEntryReference[T]`.
@@ -44,9 +44,9 @@ For a flow, its persistent backends need to be ready before it can run, includin
44
44
  The desired state of the backends for a flow is derived based on the flow definition itself.
45
45
  CocoIndex supports two types of actions to manage the persistent backends automatically:
46
46
 
47
- * *Setup* a flow, which will change the backends owned by the flow to a state to the desired state, e.g. create new tables for new flow, drop an existing table if the corresponding target is gone, add new column to a target table if a new field is collected, etc. It's no-op if the backend states are already in the desired state.
47
+ * *Setup* a flow, which will change the backends owned by the flow to the desired state, e.g. create new tables for new flow, drop an existing table if the corresponding target is gone, add new column to a target table if a new field is collected, etc. It's no-op if the backend states are already in the desired state.
48
48
 
49
- * *Drop* a flow, which will drop all backends owned by the flow. It's no-op if there's no existing backends owned by the flow (e.g. never setup or already dropped).
49
+ * *Drop* a flow, which will drop all backends owned by the flow. It's no-op if there are no existing backends owned by the flow (e.g. never setup or already dropped).
50
50
 
51
51
  ### CLI
52
52
 
@@ -138,7 +138,7 @@ This is to achieve best efficiency.
138
138
 
139
139
  The `cocoindex update` subcommand creates/updates data in the target.
140
140
 
141
- Once it's done, the target data is fresh up to the moment when the function is called.
141
+ Once it's done, the target data is fresh up to the moment when the command is called.
142
142
 
143
143
  ```sh
144
144
  cocoindex update main.py
@@ -203,7 +203,7 @@ To perform live update, run the `cocoindex update` subcommand with `-L` option:
203
203
  cocoindex update main.py -L
204
204
  ```
205
205
 
206
- If there's at least one data source with change capture mechanism enabled, it will keep running until the aborted (e.g. by `Ctrl-C`).
206
+ If there's at least one data source with change capture mechanism enabled, it will keep running until aborted (e.g. by `Ctrl-C`).
207
207
  Otherwise, it falls back to the same behavior as one time update, and will finish after a one-time update is done.
208
208
 
209
209
  With a `--setup` option, it will also setup the flow first if needed.
@@ -7,10 +7,10 @@ import ReactPlayer from 'react-player'
7
7
 
8
8
  # Build your first CocoIndex project
9
9
 
10
- This guide will help you get up and running with CocoIndex in just a few minutes, that does:
10
+ This guide will help you get up and running with CocoIndex in just a few minutes. We'll build a project that does:
11
11
  * Read files from a directory
12
12
  * Perform basic chunking and embedding
13
- * loads the data into a vector store (PG Vector)
13
+ * Load the data into a vector store (PG Vector)
14
14
 
15
15
  <ReactPlayer controls url='https://www.youtube.com/watch?v=gv5R8nOXsWU' />
16
16
 
@@ -107,11 +107,11 @@ Notes:
107
107
  3. A *data source* extracts data from an external source.
108
108
  In this example, the `LocalFile` data source imports local files as a KTable (table with a key field, see [KTable](../core/data_types#ktable) for details), each row has `"filename"` and `"content"` fields.
109
109
 
110
- 4. After defining the KTable, we extended a new field `"chunks"` to each row by *transforming* the `"content"` field using `SplitRecursively`. The output of the `SplitRecursively` is also a KTable representing each chunk of the document, with `"location"` and `"text"` fields.
110
+ 4. After defining the KTable, we extend a new field `"chunks"` to each row by *transforming* the `"content"` field using `SplitRecursively`. The output of the `SplitRecursively` is also a KTable representing each chunk of the document, with `"location"` and `"text"` fields.
111
111
 
112
- 5. After defining the KTable, we extended a new field `"embedding"` to each row by *transforming* the `"text"` field using `SentenceTransformerEmbed`.
112
+ 5. After defining the KTable, we extend a new field `"embedding"` to each row by *transforming* the `"text"` field using `SentenceTransformerEmbed`.
113
113
 
114
- 6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and using the collected data to build a vector index `"doc_embeddings"`, using `Postgres`.
114
+ 6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and uses the collected data to build a vector index `"doc_embeddings"`, using `Postgres`.
115
115
 
116
116
  ## Step 3: Run the indexing pipeline and queries
117
117
 
@@ -271,7 +271,7 @@ Now we can run the same Python file, which will run the new added main logic:
271
271
  python quickstart.py
272
272
  ```
273
273
 
274
- It will ask you to enter a query and it will return the top 10 results.
274
+ It will ask you to enter a query and it will return the top 5 results.
275
275
 
276
276
  ## Next Steps
277
277
 
@@ -31,7 +31,7 @@ The spec takes the following fields:
31
31
 
32
32
  * `separators_regex` (`list[str]`): A list of regex patterns to split the text.
33
33
  Higher-level boundaries should come first, and lower-level should be listed later. e.g. `[r"\n# ", r"\n## ", r"\n\n", r"\. "]`.
34
- See [regex Syntax](https://docs.rs/regex/latest/regex/#syntax) for supported regular expression syntax.
34
+ See [regex syntax](https://docs.rs/regex/latest/regex/#syntax) for supported regular expression syntax.
35
35
 
36
36
  Input data:
37
37
 
@@ -57,9 +57,12 @@ Input data:
57
57
 
58
58
  We use the `language` field to determine how to split the input text, following these rules:
59
59
 
60
- * We'll match the input `language` field against the `language_name` or `aliases` of each element of `custom_languages`, and use the matched one. If value of `language` is null, it'll be treated as empty string when matching `language_name` or `aliases`.
61
- * If no match is found, we'll match the `language` field against the builtin language configurations.
62
- For all supported builtin language names and aliases (extensions), see [the code](https://github.com/search?q=org%3Acocoindex-io+lang%3Arust++%22static+TREE_SITTER_LANGUAGE_BY_LANG%22&type=code).
60
+ * We match the input `language` field against the following registries in the following order:
61
+ * `custom_languages` in the spec, against the `language_name` or `aliases` field of each entry.
62
+ * Builtin languages (see [Supported Languages](#supported-languages) section below), against the language, aliases or file extensions of each entry.
63
+
64
+ All matches are in a case-insensitive manner. If the value of `language` is null, it'll be treated as empty string.
65
+
63
66
  * If no match is found, the input will be treated as plain text.
64
67
 
65
68
  :::
@@ -73,6 +76,42 @@ Return: [*KTable*](/docs/core/data_types#ktable), each row represents a chunk, w
73
76
  * `line` (*Int64*): The line number of the position. Starting from 1.
74
77
  * `column` (*Int64*): The column number of the position. Starting from 1.
75
78
 
79
+ ### Supported Languages
80
+
81
+ Currently, `SplitRecursively` supports the following languages:
82
+
83
+ | Language | Aliases | File Extensions |
84
+ |----------|---------|-----------------|
85
+ | C | | `.c` |
86
+ | C++ | CPP | `.cpp`, `.cc`, `.cxx`, `.h`, `.hpp` |
87
+ | C# | CSharp, CS | `.cs` |
88
+ | CSS | | `.css`, `.scss` |
89
+ | DTD | | `.dtd` |
90
+ | Fortran | F, F90, F95, F03 | `.f`, `.f90`, `.f95`, `.f03` |
91
+ | Go | Golang | `.go` |
92
+ | HTML | | `.html`, `.htm` |
93
+ | Java | | `.java` |
94
+ | JavaScript | JS | `.js` |
95
+ | JSON | | `.json` |
96
+ | Kotlin | | `.kt`, `.kts` |
97
+ | Markdown | MD | `.md`, `.mdx` |
98
+ | Pascal | PAS, DPR, Delphi | `.pas`, `.dpr` |
99
+ | PHP | | `.php` |
100
+ | Python | | `.py` |
101
+ | R | | `.r` |
102
+ | Ruby | | `.rb` |
103
+ | Rust | RS | `.rs` |
104
+ | Scala | | `.scala` |
105
+ | SQL | | `.sql` |
106
+ | Swift | | `.swift` |
107
+ | TOML | | `.toml` |
108
+ | TSX | | `.tsx` |
109
+ | TypeScript | TS | `.ts` |
110
+ | XML | | `.xml` |
111
+ | YAML | | `.yaml`, `.yml` |
112
+
113
+
114
+
76
115
  ## SentenceTransformerEmbed
77
116
 
78
117
  `SentenceTransformerEmbed` embeds a text into a vector space using the [SentenceTransformer](https://huggingface.co/sentence-transformers) library.
@@ -111,10 +111,9 @@ This is how to setup:
111
111
 
112
112
  * In the [Amazon S3 Console](https://s3.console.aws.amazon.com/s3/home), open your S3 bucket. Under *Properties* tab, click *Create event notification*.
113
113
  * Fill in an arbitrary event name, e.g. `S3ChangeNotifications`.
114
- * If you want your AmazonS3 data source expose a subset of files sharing a prefix, set the same prefix here. Otherwise, leave it empty.
114
+ * If you want your AmazonS3 data source to expose a subset of files sharing a prefix, set the same prefix here. Otherwise, leave it empty.
115
115
  * Select the following event types: *All object create events*, *All object removal events*.
116
116
  * Select *SQS queue* as the destination, and specify the SQS queue you created above.
117
- and enable *Change Event Notifications* for your bucket, and specify the SQS queue as the destination.
118
117
 
119
118
  AWS's [Guide of Configuring a Bucket for Notifications](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification) provides more details.
120
119
 
@@ -141,7 +140,7 @@ The spec takes the following fields:
141
140
  :::info
142
141
 
143
142
  We will delete messages from the queue after they're processed.
144
- If there're unrelated messages in the queue (e.g. test messages that SQS will send automatically on queue creation, messages for a different bucket, for non-included files, etc.), we will delete the message upon receiving it, to avoid keeping receiving irrelevant messages again and again after they're redelivered.
143
+ If there are unrelated messages in the queue (e.g. test messages that SQS will send automatically on queue creation, messages for a different bucket, for non-included files, etc.), we will delete the message upon receiving it, to avoid repeatedly receiving irrelevant messages after they're redelivered.
145
144
 
146
145
  :::
147
146
 
@@ -171,22 +170,33 @@ These are actions you need to take:
171
170
 
172
171
  #### Authentication
173
172
 
174
- We use Azure’s **Default Credential** system (DefaultAzureCredential) for secure and flexible authentication.
175
- This allows you to connect to Azure services without putting any secrets in the code or flow spec.
176
- It automatically chooses the best authentication method based on your environment:
173
+ We support the following authentication methods:
177
174
 
178
- * On your local machine: uses your Azure CLI login (`az login`) or environment variables.
175
+ * Shared access signature (SAS) tokens.
176
+ You can generate it from the Azure Portal in the settings for a specific container.
177
+ You need to provide at least *List* and *Read* permissions when generating the SAS token.
178
+ It's a query string in the form of
179
+ `sp=rl&st=2025-07-20T09:33:00Z&se=2025-07-19T09:48:53Z&sv=2024-11-04&sr=c&sig=i3FDjsadfklj3%23adsfkk`.
179
180
 
180
- ```sh
181
- az login
182
- # Optional: Set a default subscription if you have more than one
183
- az account set --subscription "<YOUR_SUBSCRIPTION_NAME_OR_ID>"
184
- ```
185
- * In Azure (VM, App Service, AKS, etc.): uses the resource’s Managed Identity.
186
- * In automated environments: supports Service Principals via environment variables
187
- * `AZURE_CLIENT_ID`
188
- * `AZURE_TENANT_ID`
189
- * `AZURE_CLIENT_SECRET`
181
+ * Storage account access key. You can find it in the Azure Portal in the settings for a specific storage account.
182
+
183
+ * Default credential. When none of the above is provided, it will use the default credential.
184
+
185
+ This allows you to connect to Azure services without putting any secrets in the code or flow spec.
186
+ It automatically chooses the best authentication method based on your environment:
187
+
188
+ * On your local machine: uses your Azure CLI login (`az login`) or environment variables.
189
+
190
+ ```sh
191
+ az login
192
+ # Optional: Set a default subscription if you have more than one
193
+ az account set --subscription "<YOUR_SUBSCRIPTION_NAME_OR_ID>"
194
+ ```
195
+ * In Azure (VM, App Service, AKS, etc.): uses the resource’s Managed Identity.
196
+ * In automated environments: supports Service Principals via environment variables
197
+ * `AZURE_CLIENT_ID`
198
+ * `AZURE_TENANT_ID`
199
+ * `AZURE_CLIENT_SECRET`
190
200
 
191
201
  You can refer to [this doc](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/overview) for more details.
192
202
 
@@ -203,6 +213,8 @@ The spec takes the following fields:
203
213
  * `excluded_patterns` (`list[str]`, optional): a list of glob patterns to exclude files, e.g. `["*.tmp", "**/*.log"]`.
204
214
  Any file or directory matching these patterns will be excluded even if they match `included_patterns`.
205
215
  If not specified, no files will be excluded.
216
+ * `sas_token` (`cocoindex.TransientAuthEntryReference[str]`, optional): a SAS token for authentication.
217
+ * `account_access_key` (`cocoindex.TransientAuthEntryReference[str]`, optional): an account access key for authentication.
206
218
 
207
219
  :::info
208
220
 
@@ -253,12 +265,12 @@ The spec takes the following fields:
253
265
  it's typically cheaper than a full refresh by setting the [refresh interval](../core/flow_def#refresh-interval) especially when the folder contains a large number of files.
254
266
  So you can usually set it with a smaller value compared to the `refresh_interval`.
255
267
 
256
- On the other hand, this only detects changes for files still exists.
257
- If the file is deleted (or the current account no longer has access to), this change will not be detected by this change stream.
268
+ On the other hand, this only detects changes for files that still exist.
269
+ If the file is deleted (or the current account no longer has access to it), this change will not be detected by this change stream.
258
270
 
259
- So when a `GoogleDrive` source enabled `recent_changes_poll_interval`, it's still recommended to set a `refresh_interval`, with a larger value.
271
+ So when a `GoogleDrive` source has `recent_changes_poll_interval` enabled, it's still recommended to set a `refresh_interval`, with a larger value.
260
272
  So that most changes can be covered by polling recent changes (with low latency, like 10 seconds), and remaining changes (files no longer exist or accessible) will still be covered (with a higher latency, like 5 minutes, and should be larger if you have a huge number of files like 1M).
261
- In reality, configure them based on your requirement: how freshness do you need to target index to be?
273
+ In reality, configure them based on your requirement: how fresh do you need the target index to be?
262
274
 
263
275
  :::
264
276
 
@@ -413,7 +413,7 @@ If you don't have a Neo4j database, you can start a Neo4j database using our doc
413
413
  docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/neo4j.yaml) up -d
414
414
  ```
415
415
 
416
- If will bring up a Neo4j instance, which can be accessed by username `neo4j` and password `cocoindex`.
416
+ This will bring up a Neo4j instance, which can be accessed by username `neo4j` and password `cocoindex`.
417
417
  You can access the Neo4j browser at [http://localhost:7474](http://localhost:7474).
418
418
 
419
419
  :::warning
@@ -102,12 +102,6 @@ def _main() -> None:
102
102
 
103
103
  amazon_s3_text_embedding_flow.setup()
104
104
  with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow) as updater:
105
- while True:
106
- updates = updater.next_status_updates()
107
- print(f"Updates: {updates}")
108
- if not updates.active_sources:
109
- break
110
-
111
105
  # Run queries in a loop to demonstrate the query capabilities.
112
106
  while True:
113
107
  query = input("Enter search query (or Enter to quit): ")
@@ -6,7 +6,7 @@ from . import functions, sources, targets, cli, utils
6
6
 
7
7
  from . import targets as storages # Deprecated: Use targets instead
8
8
 
9
- from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
9
+ from .auth_registry import AuthEntryReference, add_auth_entry, add_transient_auth_entry
10
10
  from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
11
11
  from .flow import flow_def
12
12
  from .flow import EvaluateAndDumpOptions, GeneratedField
@@ -42,6 +42,7 @@ __all__ = [
42
42
  # Auth registry
43
43
  "AuthEntryReference",
44
44
  "add_auth_entry",
45
+ "add_transient_auth_entry",
45
46
  "ref_auth_entry",
46
47
  # Flow
47
48
  "FlowBuilder",
@@ -0,0 +1,51 @@
1
+ """
2
+ Auth registry is used to register and reference auth entries.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Generic, TypeVar
7
+ import threading
8
+
9
+ from . import _engine # type: ignore
10
+ from .convert import dump_engine_object
11
+
12
+ T = TypeVar("T")
13
+
14
+ # Global atomic counter for generating unique auth entry keys
15
+ _counter_lock = threading.Lock()
16
+ _auth_key_counter = 0
17
+
18
+
19
+ def _generate_auth_key() -> str:
20
+ """Generate a unique auth entry key using a global atomic counter."""
21
+ global _auth_key_counter # pylint: disable=global-statement
22
+ with _counter_lock:
23
+ _auth_key_counter += 1
24
+ return f"__auth_{_auth_key_counter}"
25
+
26
+
27
+ @dataclass
28
+ class TransientAuthEntryReference(Generic[T]):
29
+ """Reference an auth entry, may or may not have a stable key."""
30
+
31
+ key: str
32
+
33
+
34
+ class AuthEntryReference(TransientAuthEntryReference[T]):
35
+ """Reference an auth entry, with a key stable across ."""
36
+
37
+
38
+ def add_transient_auth_entry(value: T) -> TransientAuthEntryReference[T]:
39
+ """Add an auth entry to the registry. Returns its reference."""
40
+ return add_auth_entry(_generate_auth_key(), value)
41
+
42
+
43
+ def add_auth_entry(key: str, value: T) -> AuthEntryReference[T]:
44
+ """Add an auth entry to the registry. Returns its reference."""
45
+ _engine.add_auth_entry(key, dump_engine_object(value))
46
+ return AuthEntryReference(key)
47
+
48
+
49
+ def ref_auth_entry(key: str) -> AuthEntryReference[T]:
50
+ """Reference an auth entry by its key."""
51
+ return AuthEntryReference(key)
@@ -1,6 +1,7 @@
1
1
  """All builtin sources."""
2
2
 
3
3
  from . import op
4
+ from .auth_registry import TransientAuthEntryReference
4
5
  import datetime
5
6
 
6
7
 
@@ -48,6 +49,11 @@ class AmazonS3(op.SourceSpec):
48
49
  class AzureBlob(op.SourceSpec):
49
50
  """
50
51
  Import data from an Azure Blob Storage container. Supports optional prefix and file filtering by glob patterns.
52
+
53
+ Authentication mechanisms taken in the following order:
54
+ - SAS token (if provided)
55
+ - Account access key (if provided)
56
+ - Default Azure credential
51
57
  """
52
58
 
53
59
  _op_category = op.OpCategory.SOURCE
@@ -58,3 +64,6 @@ class AzureBlob(op.SourceSpec):
58
64
  binary: bool = False
59
65
  included_patterns: list[str] | None = None
60
66
  excluded_patterns: list[str] | None = None
67
+
68
+ sas_token: TransientAuthEntryReference[str] | None = None
69
+ account_access_key: TransientAuthEntryReference[str] | None = None