cocoindex 0.1.58__tar.gz → 0.1.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. cocoindex-0.1.60/.github/workflows/CI.yml +31 -0
  2. cocoindex-0.1.58/.github/workflows/CI.yml → cocoindex-0.1.60/.github/workflows/format.yml +4 -10
  3. {cocoindex-0.1.58 → cocoindex-0.1.60}/Cargo.lock +1 -1
  4. {cocoindex-0.1.58 → cocoindex-0.1.60}/Cargo.toml +1 -1
  5. {cocoindex-0.1.58 → cocoindex-0.1.60}/PKG-INFO +1 -1
  6. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/ai/llm.mdx +32 -0
  7. cocoindex-0.1.60/docs/docs/core/data_types.mdx +188 -0
  8. cocoindex-0.1.60/examples/patient_intake_extraction/.env.example +4 -0
  9. cocoindex-0.1.60/examples/patient_intake_extraction/README.md +58 -0
  10. cocoindex-0.1.60/examples/patient_intake_extraction/data/README.md +4 -0
  11. cocoindex-0.1.60/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_David_Artificial.docx +0 -0
  12. cocoindex-0.1.60/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Emily_Artificial.pdf +0 -0
  13. cocoindex-0.1.60/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_Form_Joe_Artificial.pdf +0 -0
  14. cocoindex-0.1.60/examples/patient_intake_extraction/data/patient_forms/Patient_Intake_From_Jane_Artificial.docx +0 -0
  15. cocoindex-0.1.60/examples/patient_intake_extraction/main.py +148 -0
  16. cocoindex-0.1.60/examples/patient_intake_extraction/pyproject.toml +11 -0
  17. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/__init__.py +11 -1
  18. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/convert.py +43 -20
  19. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/llm.py +1 -0
  20. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/tests/test_convert.py +81 -19
  21. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/typing.py +1 -0
  22. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/mod.rs +6 -0
  23. cocoindex-0.1.60/src/llm/vllm.rs +16 -0
  24. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/py/convert.rs +19 -5
  25. cocoindex-0.1.58/docs/docs/core/data_types.mdx +0 -145
  26. {cocoindex-0.1.58 → cocoindex-0.1.60}/.cargo/config.toml +0 -0
  27. {cocoindex-0.1.58 → cocoindex-0.1.60}/.env.lib_debug +0 -0
  28. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/ISSUE_TEMPLATE//360/237/220/233-bug-report.md" +0 -0
  29. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/ISSUE_TEMPLATE//360/237/222/241-feature-request.md" +0 -0
  30. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/scripts/update_version.sh +0 -0
  31. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/workflows/_doc_release.yml +0 -0
  32. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/workflows/_test.yml +0 -0
  33. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/workflows/docs.yml +0 -0
  34. {cocoindex-0.1.58 → cocoindex-0.1.60}/.github/workflows/release.yml +0 -0
  35. {cocoindex-0.1.58 → cocoindex-0.1.60}/.gitignore +0 -0
  36. {cocoindex-0.1.58 → cocoindex-0.1.60}/.pre-commit-config.yaml +0 -0
  37. {cocoindex-0.1.58 → cocoindex-0.1.60}/.vscode/settings.json +0 -0
  38. {cocoindex-0.1.58 → cocoindex-0.1.60}/CODE_OF_CONDUCT.md +0 -0
  39. {cocoindex-0.1.58 → cocoindex-0.1.60}/CONTRIBUTING.md +0 -0
  40. {cocoindex-0.1.58 → cocoindex-0.1.60}/LICENSE +0 -0
  41. {cocoindex-0.1.58 → cocoindex-0.1.60}/README.md +0 -0
  42. {cocoindex-0.1.58 → cocoindex-0.1.60}/dev/neo4j.yaml +0 -0
  43. {cocoindex-0.1.58 → cocoindex-0.1.60}/dev/postgres.yaml +0 -0
  44. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/.gitignore +0 -0
  45. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/README.md +0 -0
  46. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/about/community.md +0 -0
  47. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/about/contributing.md +0 -0
  48. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/basics.md +0 -0
  49. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/cli.mdx +0 -0
  50. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/custom_function.mdx +0 -0
  51. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/data_example.svg +0 -0
  52. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/flow_def.mdx +0 -0
  53. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/flow_example.svg +0 -0
  54. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/flow_methods.mdx +0 -0
  55. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/core/settings.mdx +0 -0
  56. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/getting_started/installation.md +0 -0
  57. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/getting_started/markdown_files.zip +0 -0
  58. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/getting_started/overview.md +0 -0
  59. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/getting_started/quickstart.md +0 -0
  60. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/ops/functions.md +0 -0
  61. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/ops/sources.md +0 -0
  62. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/ops/targets.md +0 -0
  63. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docs/query.mdx +0 -0
  64. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/docusaurus.config.ts +0 -0
  65. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/package.json +0 -0
  66. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/sidebars.ts +0 -0
  67. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/src/components/HomepageFeatures/index.tsx +0 -0
  68. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/src/components/HomepageFeatures/styles.module.css +0 -0
  69. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/src/css/custom.css +0 -0
  70. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/src/theme/Root.js +0 -0
  71. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/.nojekyll +0 -0
  72. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/img/docusaurus.png +0 -0
  73. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/img/favicon.ico +0 -0
  74. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/img/icon.svg +0 -0
  75. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/img/incremental-etl.gif +0 -0
  76. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/static/robots.txt +0 -0
  77. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/tsconfig.json +0 -0
  78. {cocoindex-0.1.58 → cocoindex-0.1.60}/docs/yarn.lock +0 -0
  79. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/amazon_s3_embedding/.env.example +0 -0
  80. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/amazon_s3_embedding/.gitignore +0 -0
  81. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/amazon_s3_embedding/README.md +0 -0
  82. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/amazon_s3_embedding/main.py +0 -0
  83. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/amazon_s3_embedding/pyproject.toml +0 -0
  84. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/code_embedding/.env +0 -0
  85. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/code_embedding/README.md +0 -0
  86. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/code_embedding/main.py +0 -0
  87. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/code_embedding/pyproject.toml +0 -0
  88. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/docs_to_knowledge_graph/.env +0 -0
  89. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/docs_to_knowledge_graph/README.md +0 -0
  90. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/docs_to_knowledge_graph/main.py +0 -0
  91. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/docs_to_knowledge_graph/pyproject.toml +0 -0
  92. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/.dockerignore +0 -0
  93. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/.env +0 -0
  94. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/README.md +0 -0
  95. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/compose.yaml +0 -0
  96. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/dockerfile +0 -0
  97. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/files/1810.04805v2.md +0 -0
  98. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/main.py +0 -0
  99. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/fastapi_server_docker/requirements.txt +0 -0
  100. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/gdrive_text_embedding/.env.example +0 -0
  101. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/gdrive_text_embedding/.gitignore +0 -0
  102. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/gdrive_text_embedding/README.md +0 -0
  103. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/gdrive_text_embedding/main.py +0 -0
  104. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/gdrive_text_embedding/pyproject.toml +0 -0
  105. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/.env +0 -0
  106. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/README.md +0 -0
  107. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/.gitignore +0 -0
  108. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/index.html +0 -0
  109. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/package-lock.json +0 -0
  110. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/package.json +0 -0
  111. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/src/App.jsx +0 -0
  112. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/src/main.jsx +0 -0
  113. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/src/style.css +0 -0
  114. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/frontend/vite.config.js +0 -0
  115. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/img/cat1.jpeg +0 -0
  116. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/img/dog1.jpeg +0 -0
  117. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/img/elephant1.jpg +0 -0
  118. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/img/giraffe.jpg +0 -0
  119. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/main.py +0 -0
  120. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/pyproject.toml +0 -0
  121. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/image_search/requirements.txt +0 -0
  122. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/.env +0 -0
  123. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/README.md +0 -0
  124. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/main.py +0 -0
  125. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/manuals/array.pdf +0 -0
  126. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/manuals/base64.pdf +0 -0
  127. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/manuals/copy.pdf +0 -0
  128. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/manuals/glob.pdf +0 -0
  129. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/manuals_llm_extraction/pyproject.toml +0 -0
  130. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/.env +0 -0
  131. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/README.md +0 -0
  132. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/main.py +0 -0
  133. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/pdf_files/1706.03762v7.pdf +0 -0
  134. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/pdf_files/1810.04805v2.pdf +0 -0
  135. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/pdf_files/rfc8259.pdf +0 -0
  136. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/pdf_embedding/pyproject.toml +0 -0
  137. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/.env +0 -0
  138. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/README.md +0 -0
  139. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/img/cocoinsight.png +0 -0
  140. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/img/neo4j.png +0 -0
  141. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/main.py +0 -0
  142. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p1.json +0 -0
  143. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p2.json +0 -0
  144. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p3.json +0 -0
  145. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p4.json +0 -0
  146. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p5.json +0 -0
  147. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p6.json +0 -0
  148. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p7.json +0 -0
  149. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p8.json +0 -0
  150. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/products/p9.json +0 -0
  151. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/product_recommendation/pyproject.toml +0 -0
  152. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/.env +0 -0
  153. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/README.md +0 -0
  154. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/Text_Embedding.ipynb +0 -0
  155. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/main.py +0 -0
  156. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/markdown_files/1706.03762v7.md +0 -0
  157. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/markdown_files/1810.04805v2.md +0 -0
  158. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/markdown_files/rfc8259.md +0 -0
  159. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding/pyproject.toml +0 -0
  160. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding_qdrant/.env +0 -0
  161. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding_qdrant/README.md +0 -0
  162. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding_qdrant/main.py +0 -0
  163. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding_qdrant/markdown_files/rfc8259.md +0 -0
  164. {cocoindex-0.1.58 → cocoindex-0.1.60}/examples/text_embedding_qdrant/pyproject.toml +0 -0
  165. {cocoindex-0.1.58 → cocoindex-0.1.60}/pyproject.toml +0 -0
  166. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/auth_registry.py +0 -0
  167. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/cli.py +0 -0
  168. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/flow.py +0 -0
  169. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/functions.py +0 -0
  170. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/index.py +0 -0
  171. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/lib.py +0 -0
  172. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/op.py +0 -0
  173. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/py.typed +0 -0
  174. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/runtime.py +0 -0
  175. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/setting.py +0 -0
  176. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/setup.py +0 -0
  177. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/sources.py +0 -0
  178. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/targets.py +0 -0
  179. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/tests/__init__.py +0 -0
  180. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/tests/test_optional_database.py +0 -0
  181. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/tests/test_typing.py +0 -0
  182. {cocoindex-0.1.58 → cocoindex-0.1.60}/python/cocoindex/utils.py +0 -0
  183. {cocoindex-0.1.58 → cocoindex-0.1.60}/ruff.toml +0 -0
  184. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/duration.rs +0 -0
  185. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/field_attrs.rs +0 -0
  186. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/json_schema.rs +0 -0
  187. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/mod.rs +0 -0
  188. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/schema.rs +0 -0
  189. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/spec.rs +0 -0
  190. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/base/value.rs +0 -0
  191. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/analyzed_flow.rs +0 -0
  192. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/analyzer.rs +0 -0
  193. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/exec_ctx.rs +0 -0
  194. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/flow_builder.rs +0 -0
  195. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/mod.rs +0 -0
  196. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/builder/plan.rs +0 -0
  197. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/db_tracking.rs +0 -0
  198. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/db_tracking_setup.rs +0 -0
  199. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/dumper.rs +0 -0
  200. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/evaluator.rs +0 -0
  201. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/indexing_status.rs +0 -0
  202. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/live_updater.rs +0 -0
  203. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/memoization.rs +0 -0
  204. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/mod.rs +0 -0
  205. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/row_indexer.rs +0 -0
  206. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/source_indexer.rs +0 -0
  207. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/execution/stats.rs +0 -0
  208. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/lib.rs +0 -0
  209. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/lib_context.rs +0 -0
  210. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/anthropic.rs +0 -0
  211. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/gemini.rs +0 -0
  212. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/litellm.rs +0 -0
  213. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/ollama.rs +0 -0
  214. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/openai.rs +0 -0
  215. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/openrouter.rs +0 -0
  216. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/llm/voyage.rs +0 -0
  217. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/factory_bases.rs +0 -0
  218. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/functions/embed_text.rs +0 -0
  219. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/functions/extract_by_llm.rs +0 -0
  220. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/functions/mod.rs +0 -0
  221. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/functions/parse_json.rs +0 -0
  222. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/functions/split_recursively.rs +0 -0
  223. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/interface.rs +0 -0
  224. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/mod.rs +0 -0
  225. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/py_factory.rs +0 -0
  226. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/registration.rs +0 -0
  227. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/registry.rs +0 -0
  228. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/sdk.rs +0 -0
  229. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/sources/amazon_s3.rs +0 -0
  230. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/sources/google_drive.rs +0 -0
  231. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/sources/local_file.rs +0 -0
  232. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/sources/mod.rs +0 -0
  233. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/kuzu.rs +0 -0
  234. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/mod.rs +0 -0
  235. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/neo4j.rs +0 -0
  236. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/postgres.rs +0 -0
  237. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/qdrant.rs +0 -0
  238. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/shared/mod.rs +0 -0
  239. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/shared/property_graph.rs +0 -0
  240. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/ops/targets/shared/table_columns.rs +0 -0
  241. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/prelude.rs +0 -0
  242. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/py/mod.rs +0 -0
  243. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/server.rs +0 -0
  244. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/service/error.rs +0 -0
  245. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/service/flows.rs +0 -0
  246. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/service/mod.rs +0 -0
  247. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/settings.rs +0 -0
  248. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/auth_registry.rs +0 -0
  249. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/components.rs +0 -0
  250. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/db_metadata.rs +0 -0
  251. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/driver.rs +0 -0
  252. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/mod.rs +0 -0
  253. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/setup/states.rs +0 -0
  254. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/db.rs +0 -0
  255. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/fingerprint.rs +0 -0
  256. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/immutable.rs +0 -0
  257. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/mod.rs +0 -0
  258. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/retryable.rs +0 -0
  259. {cocoindex-0.1.58 → cocoindex-0.1.60}/src/utils/yaml_ser.rs +0 -0
@@ -0,0 +1,31 @@
1
+ # This file is autogenerated by maturin v1.8.1
2
+ # To update, run
3
+ #
4
+ # maturin generate-ci github
5
+ #
6
+ name: CI
7
+
8
+ on:
9
+ pull_request:
10
+ branches: [main]
11
+ paths:
12
+ - src/**
13
+ - python/**
14
+ - "*.toml"
15
+ - ".github/workflows/*.yml"
16
+ push:
17
+ branches: [main]
18
+ paths:
19
+ - src/**
20
+ - python/**
21
+ - "*.toml"
22
+ - ".github/workflows/*.yml"
23
+ workflow_dispatch:
24
+
25
+ permissions:
26
+ contents: read
27
+
28
+ jobs:
29
+ test:
30
+ name: Run test
31
+ uses: ./.github/workflows/_test.yml
@@ -3,7 +3,7 @@
3
3
  #
4
4
  # maturin generate-ci github
5
5
  #
6
- name: CI
6
+ name: format check
7
7
 
8
8
  on:
9
9
  pull_request:
@@ -11,15 +11,13 @@ on:
11
11
  paths:
12
12
  - src/**
13
13
  - python/**
14
- - "*.toml"
15
- - ".github/workflows/*.yml"
14
+ - examples/**
16
15
  push:
17
16
  branches: [main]
18
17
  paths:
19
18
  - src/**
20
19
  - python/**
21
- - "*.toml"
22
- - ".github/workflows/*.yml"
20
+ - examples/**
23
21
  workflow_dispatch:
24
22
 
25
23
  permissions:
@@ -51,8 +49,4 @@ jobs:
51
49
  pip install ruff
52
50
  - name: Check Python formatting
53
51
  run: |
54
- ruff format --check .
55
-
56
- test:
57
- name: Run test
58
- uses: ./.github/workflows/_test.yml
52
+ ruff format --check .
@@ -1040,7 +1040,7 @@ dependencies = [
1040
1040
 
1041
1041
  [[package]]
1042
1042
  name = "cocoindex"
1043
- version = "0.1.58"
1043
+ version = "0.1.60"
1044
1044
  dependencies = [
1045
1045
  "anyhow",
1046
1046
  "async-openai",
@@ -2,7 +2,7 @@
2
2
  name = "cocoindex"
3
3
  # Version used for local development is always higher than others to take precedence.
4
4
  # Will be overridden for specific release versions.
5
- version = "0.1.58"
5
+ version = "0.1.60"
6
6
  edition = "2024"
7
7
  rust-version = "1.86"
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.58
3
+ Version: 0.1.60
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -26,6 +26,7 @@ We support the following types of LLM APIs:
26
26
  | [Voyage](#voyage) | `LlmApiType.VOYAGE` | ❌ | ✅ |
27
27
  | [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` | ✅ | ❌ |
28
28
  | [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` | ✅ | ❌ |
29
+ | [vLLM](#vllm) | `LlmApiType.VLLM` | ✅ | ❌ |
29
30
 
30
31
  ## LLM Tasks
31
32
 
@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
307
308
  </Tabs>
308
309
 
309
310
  You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
311
+
312
+ ### vLLM
313
+
314
+ Install vLLM:
315
+
316
+ ```bash
317
+ pip install vllm
318
+ ```
319
+
320
+ Run vLLM Server
321
+
322
+ ```bash
323
+ vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
324
+ ```
325
+
326
+
327
+ A spec for vLLM looks like this:
328
+
329
+ <Tabs>
330
+ <TabItem value="python" label="Python" default>
331
+
332
+ ```python
333
+ cocoindex.LlmSpec(
334
+ api_type=cocoindex.LlmApiType.VLLM,
335
+ model="deepseek-ai/deepseek-coder-1.3b-instruct",
336
+ address="http://127.0.0.1:8000/v1",
337
+ )
338
+ ```
339
+
340
+ </TabItem>
341
+ </Tabs>
@@ -0,0 +1,188 @@
1
+ ---
2
+ title: Data Types
3
+ description: Data Types in CocoIndex
4
+ toc_max_heading_level: 4
5
+ ---
6
+
7
+ # Data Types in CocoIndex
8
+
9
+ In CocoIndex, all data processed by the flow have a type determined when the flow is defined, before any actual data is processed at runtime.
10
+
11
+ This makes schema of data processed by CocoIndex clear, and easily determine the schema of your index.
12
+
13
+ ## Data Types
14
+
15
+ As an engine written in Rust, designed to be used in different languages and data are always serializable, CocoIndex defines a type system independent of any specific programming language.
16
+
17
+ CocoIndex automatically infers data types of the output created by CocoIndex sources and functions.
18
+ You don't need to spell out any data type explicitly when you define the flow.
19
+ All you need to do is to make sure the data passed to functions and targets are compatible with them.
20
+
21
+ Each type in CocoIndex type system is mapped to one or multiple types in Python.
22
+ When you define a [custom function](/docs/core/custom_function), you need to annotate the data types of arguments and return values.
23
+
24
+ * For return values, type annotation is required. Because this provides the ground truth to define the type of the output of the custom function.
25
+ * For arguments, type annotation is only used to enable the conversion from data values already existing in CocoIndex engine to Python value.
26
+ Type annotation is optional for basic types. When not specified, CocoIndex will use the *default Python type* for the argument.
27
+ Type annotation is required for arguments of struct types and table types.
28
+
29
+ ### Basic Types
30
+
31
+ #### Primitive Types
32
+
33
+ Primitive types are basic types that are not composed of other types.
34
+ This is the list of all primitive types supported by CocoIndex:
35
+
36
+ | CocoIndex Type | Python Types | Convertible to | Explanation |
37
+ |------|-------------|--------------|----------------|
38
+ | *Bytes* | `bytes` | | |
39
+ | *Str* | `str` | | |
40
+ | *Bool* | `bool` | | |
41
+ | *Int64* | `cocoindex.Int64`, `int`, `numpy.int64` | | |
42
+ | *Float32* | `cocoindex.Float32`, `numpy.float32` | *Float64* | |
43
+ | *Float64* | `cocoindex.Float64`, `float`, `numpy.float64` | | |
44
+ | *Range* | `cocoindex.Range` | | |
45
+ | *Uuid* | `uuid.UUId` | | |
46
+ | *Date* | `datetime.date` | | |
47
+ | *Time* | `datetime.time` | | |
48
+ | *LocalDatetime* | `cocoindex.LocalDateTime` | *OffsetDatetime* | without timezone |
49
+ | *OffsetDatetime* | `cocoindex.OffsetDateTime`, `datetime.datetime` | | with timezone |
50
+ | *TimeDelta* | `datetime.timedelta` | | |
51
+
52
+ Notes:
53
+
54
+ * For some CocoIndex types, we support multiple Python types. You can annotate with any of these Python types.
55
+ The first one is the *default Python type*, which means CocoIndex will create a value with this type when you don't annotate the type in function arguments.
56
+
57
+ * All Python types starting with `cocoindex.` are type aliases exported by CocoIndex. They're annotated types based on certain Python types:
58
+
59
+ * `cocoindex.Int64`: `int`
60
+ * `cocoindex.Float64`: `float`
61
+ * `cocoindex.Float32`: `float`
62
+ * `cocoindex.Range`: `tuple[int, int]`, i.e. a start offset (inclusive) and an end offset (exclusive)
63
+ * `cocoindex.OffsetDateTime`: `datetime.datetime`
64
+ * `cocoindex.LocalDateTime`: `datetime.datetime`
65
+
66
+ These aliases provide a non-ambiguous way to represent a specific type in CocoIndex, given their base Python types can represent a superset of possible values.
67
+
68
+ * When we say a CocoIndex type is *convertible to* another type, it means Python types for the second type can be also used to bind to a value of the first type.
69
+ For example, *Float32* is convertible to *Float64*, so you can bind a value of *Float32* to a Python value of `float` or `np.float64` types.
70
+ For *LocalDatetime*, when you use `cocoindex.OffsetDateTime` or `datetime.datetime` as the annotation to bind its value, the timezone will be set to UTC.
71
+
72
+
73
+ #### Json Type
74
+
75
+ *Json* type can hold any data convertible to JSON by `json` package.
76
+ In Python, it's represented by `cocoindex.Json`.
77
+ It's useful to hold data without fixed schema known at flow definition time.
78
+
79
+
80
+ #### Vector Types
81
+
82
+ A vector type is a collection of elements of the same basic type.
83
+ Optionally, it can have a fixed dimension. Noted as *Vector[Type]* or *Vector[Type, Dim]*, e.g. *Vector[Float32]* or *Vector[Float32, 384]*.
84
+
85
+ It supports the following Python types:
86
+
87
+ * `cocoindex.Vector[T]` or `cocoindex.Vector[T, typing.Literal[Dim]]`, e.g. `cocoindex.Vector[cocoindex.Float32]` or `cocoindex.Vector[cocoindex.Float32, 384]`
88
+ * The underlying Python type is `numpy.typing.NDArray[T]` where `T` is a numpy numeric type (`numpy.int64`, `numpy.float32` or `numpy.float64`), or `list[T]` otherwise
89
+ * `numpy.typing.NDArray[T]` where `T` is a numpy numeric type
90
+ * `list[T]`
91
+
92
+
93
+ #### Union Types
94
+
95
+ A union type is a type that can represent values in one of multiple basic types.
96
+ Noted as *Type1* | *Type2* | ..., e.g. *Int64* | *Float32* | *Float64*.
97
+
98
+ The Python type is `T1 | T2 | ...`, e.g. `cocoindex.Int64 | cocoindex.Float32 | cocoindex.Float64`, `int | float` (equivalent to `cocoindex.Int64 | cocoindex.Float64`)
99
+
100
+
101
+ ### Struct Types
102
+
103
+ A *Struct* has a bunch of fields, each with a name and a type.
104
+
105
+ In Python, a *Struct* type is represented by either a [dataclass](https://docs.python.org/3/library/dataclasses.html)
106
+ or a [NamedTuple](https://docs.python.org/3/library/typing.html#typing.NamedTuple), with all fields annotated with a specific type.
107
+ Both options define a structured type with named fields, but they differ slightly:
108
+
109
+ - **Dataclass**: A flexible class-based structure, mutable by default, defined using the `@dataclass` decorator.
110
+ - **NamedTuple**: An immutable tuple-based structure, defined using `typing.NamedTuple`.
111
+
112
+ For example:
113
+
114
+ ```python
115
+ from dataclasses import dataclass
116
+ from typing import NamedTuple
117
+ import datetime
118
+
119
+ # Using dataclass
120
+ @dataclass
121
+ class Person:
122
+ first_name: str
123
+ last_name: str
124
+ dob: datetime.date
125
+
126
+ # Using NamedTuple
127
+ class PersonTuple(NamedTuple):
128
+ first_name: str
129
+ last_name: str
130
+ dob: datetime.date
131
+ ```
132
+
133
+ Both `Person` and `PersonTuple` are valid Struct types in CocoIndex, with identical schemas (three fields: `first_name` (Str), `last_name` (Str), `dob` (Date)).
134
+ Choose `dataclass` for mutable objects or when you need additional methods, and `NamedTuple` for immutable, lightweight structures.
135
+
136
+ ### Table Types
137
+
138
+ A *Table* type models a collection of rows, each with multiple columns.
139
+ Each column of a table has a specific type.
140
+
141
+ We have two specific types of *Table* types: *KTable* and *LTable*.
142
+
143
+ #### KTable
144
+
145
+ *KTable* is a *Table* type whose first column serves as the key.
146
+ The row order of a *KTable* is not preserved.
147
+ Type of the first column (key column) must be a [key type](#key-types).
148
+
149
+ In Python, a *KTable* type is represented by `dict[K, V]`.
150
+ The `V` should be a *Struct* type, either a `dataclass` or `NamedTuple`, representing the value fields of each row.
151
+ For example, you can use `dict[str, Person]` or `dict[str, PersonTuple]` to represent a *KTable*, with 4 columns: key (*Str*), `first_name` (*Str*), `last_name` (*Str*), `dob` (*Date*).
152
+
153
+ Note that if you want to use a *Struct* as the key, you need to ensure its value in Python is immutable. For `dataclass`, annotate it with `@dataclass(frozen=True)`. For `NamedTuple`, immutability is built-in. For example:
154
+ For example:
155
+
156
+ ```python
157
+ @dataclass(frozen=True)
158
+ class PersonKey:
159
+ id_kind: str
160
+ id: str
161
+
162
+ class PersonKeyTuple(NamedTuple):
163
+ id_kind: str
164
+ id: str
165
+ ```
166
+
167
+ Then you can use `dict[PersonKey, Person]` or `dict[PersonKeyTuple, PersonTuple]` to represent a KTable keyed by `PersonKey` or `PersonKeyTuple`.
168
+
169
+
170
+ #### LTable
171
+
172
+ *LTable* is a *Table* type whose row order is preserved. *LTable* has no key column.
173
+
174
+ In Python, a *LTable* type is represented by `list[R]`, where `R` is a dataclass representing a row.
175
+ For example, you can use `list[Person]` to represent a *LTable* with 3 columns: `first_name` (*Str*), `last_name` (*Str*), `dob` (*Date*).
176
+
177
+ ## Key Types
178
+
179
+ Currently, the following types are key types
180
+
181
+ - *Bytes*
182
+ - *Str*
183
+ - *Bool*
184
+ - *Int64*
185
+ - *Range*
186
+ - *Uuid*
187
+ - *Date*
188
+ - *Struct* with all fields being key types (using `@dataclass(frozen=True)` or `NamedTuple`)
@@ -0,0 +1,4 @@
1
+ # Postgres database address for cocoindex
2
+ COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
3
+
4
+ OPENAI_API_KEY=
@@ -0,0 +1,58 @@
1
+ # Extract structured data from patient intake forms with LLM
2
+ [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
3
+
4
+
5
+ This repo shows how to use OpenAI API to extract structured data from patient intake forms with different formats, like PDF, Docx, etc. from Google Drive.
6
+
7
+ We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
8
+
9
+ ![Structured Data From Patient Intake Forms](https://github.com/user-attachments/assets/1f6afb69-d26d-4a08-8774-13982d6aec1e)
10
+
11
+
12
+ ## Tutorials
13
+ - Step by step tutorial - Check out the [blog](https://cocoindex.io/blogs/patient-intake-form-extraction-with-llm).
14
+ - Video tutorial - [Youtube](https://youtu.be/_mjlwVtnBn0?si=cpH-4kkOAYm2HhK6).
15
+
16
+ ## Prerequisite
17
+ 1. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
18
+
19
+ 2. Install CocoIndex
20
+ ```bash
21
+ pip install -U cocoindex
22
+ ```
23
+
24
+ 3. Install MarkItDown
25
+ ```bash
26
+ pip install 'markitdown[all]'
27
+ ```
28
+ 4. Create a `.env` file from `.env.example`, and fill `OPENAI_API_KEY`.
29
+
30
+ ## Run
31
+
32
+ Setup index:
33
+
34
+ ```bash
35
+ cocoindex setup main.py
36
+ ```
37
+
38
+ Update index:
39
+
40
+ ```bash
41
+ cocoindex update main.py
42
+ ```
43
+
44
+ Run query:
45
+
46
+ ```bash
47
+ python main.py
48
+ ```
49
+
50
+ Run with CocoInsight:
51
+ ```bash
52
+ cocoindex server -ci main.py
53
+ ```
54
+ <img width="1405" alt="Screenshot 2025-07-02 at 11 59 24 AM" src="https://github.com/user-attachments/assets/6f5154cd-8a53-4baa-b914-cd60ffecf3d4" />
55
+
56
+
57
+
58
+ View results at https://cocoindex.io/cocoinsight
@@ -0,0 +1,4 @@
1
+ ## Note:
2
+ Example files here are purely artificial and not real, for testing purposes only.
3
+ Please do not use these examples for any other purpose.
4
+
@@ -0,0 +1,148 @@
1
+ import datetime
2
+ import tempfile
3
+ import dataclasses
4
+ import os
5
+
6
+ from markitdown import MarkItDown
7
+ from openai import OpenAI
8
+
9
+ import cocoindex
10
+
11
+
12
+ @dataclasses.dataclass
13
+ class Contact:
14
+ name: str
15
+ phone: str
16
+ relationship: str
17
+
18
+
19
+ @dataclasses.dataclass
20
+ class Address:
21
+ street: str
22
+ city: str
23
+ state: str
24
+ zip_code: str
25
+
26
+
27
+ @dataclasses.dataclass
28
+ class Pharmacy:
29
+ name: str
30
+ phone: str
31
+ address: Address
32
+
33
+
34
+ @dataclasses.dataclass
35
+ class Insurance:
36
+ provider: str
37
+ policy_number: str
38
+ group_number: str | None
39
+ policyholder_name: str
40
+ relationship_to_patient: str
41
+
42
+
43
+ @dataclasses.dataclass
44
+ class Condition:
45
+ name: str
46
+ diagnosed: bool
47
+
48
+
49
+ @dataclasses.dataclass
50
+ class Medication:
51
+ name: str
52
+ dosage: str
53
+
54
+
55
+ @dataclasses.dataclass
56
+ class Allergy:
57
+ name: str
58
+
59
+
60
+ @dataclasses.dataclass
61
+ class Surgery:
62
+ name: str
63
+ date: str
64
+
65
+
66
+ @dataclasses.dataclass
67
+ class Patient:
68
+ name: str
69
+ dob: datetime.date
70
+ gender: str
71
+ address: Address
72
+ phone: str
73
+ email: str
74
+ preferred_contact_method: str
75
+ emergency_contact: Contact
76
+ insurance: Insurance | None
77
+ reason_for_visit: str
78
+ symptoms_duration: str
79
+ past_conditions: list[Condition]
80
+ current_medications: list[Medication]
81
+ allergies: list[Allergy]
82
+ surgeries: list[Surgery]
83
+ occupation: str | None
84
+ pharmacy: Pharmacy | None
85
+ consent_given: bool
86
+ consent_date: datetime.date | None
87
+
88
+
89
+ class ToMarkdown(cocoindex.op.FunctionSpec):
90
+ """Convert a document to markdown."""
91
+
92
+
93
+ @cocoindex.op.executor_class(gpu=True, cache=True, behavior_version=1)
94
+ class ToMarkdownExecutor:
95
+ """Executor for ToMarkdown."""
96
+
97
+ spec: ToMarkdown
98
+ _converter: MarkItDown
99
+
100
+ def prepare(self):
101
+ client = OpenAI()
102
+ self._converter = MarkItDown(llm_client=client, llm_model="gpt-4o")
103
+
104
+ def __call__(self, content: bytes, filename: str) -> str:
105
+ suffix = os.path.splitext(filename)[1]
106
+ with tempfile.NamedTemporaryFile(delete=True, suffix=suffix) as temp_file:
107
+ temp_file.write(content)
108
+ temp_file.flush()
109
+ text = self._converter.convert(temp_file.name).text_content
110
+ return text
111
+
112
+
113
+ @cocoindex.flow_def(name="PatientIntakeExtraction")
114
+ def patient_intake_extraction_flow(
115
+ flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
116
+ ):
117
+ """
118
+ Define a flow that extracts patient information from intake forms.
119
+ """
120
+ data_scope["documents"] = flow_builder.add_source(
121
+ cocoindex.sources.LocalFile(path="data/patient_forms", binary=True)
122
+ )
123
+
124
+ patients_index = data_scope.add_collector()
125
+
126
+ with data_scope["documents"].row() as doc:
127
+ doc["markdown"] = doc["content"].transform(
128
+ ToMarkdown(), filename=doc["filename"]
129
+ )
130
+ doc["patient_info"] = doc["markdown"].transform(
131
+ cocoindex.functions.ExtractByLlm(
132
+ llm_spec=cocoindex.LlmSpec(
133
+ api_type=cocoindex.LlmApiType.OPENAI, model="gpt-4o"
134
+ ),
135
+ output_type=Patient,
136
+ instruction="Please extract patient information from the intake form.",
137
+ )
138
+ )
139
+ patients_index.collect(
140
+ filename=doc["filename"],
141
+ patient_info=doc["patient_info"],
142
+ )
143
+
144
+ patients_index.export(
145
+ "patients",
146
+ cocoindex.storages.Postgres(table_name="patients_info"),
147
+ primary_key_fields=["filename"],
148
+ )
@@ -0,0 +1,11 @@
1
+ [project]
2
+ name = "patient-intake-extraction"
3
+ version = "0.1.0"
4
+ description = "Extract structured information from patient intake forms using LLM."
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "cocoindex>=0.1.45",
8
+ "python-dotenv>=1.0.1",
9
+ "markitdown>=0.1.2",
10
+ "openai>=1.68.2"
11
+ ]
@@ -17,7 +17,16 @@ from .llm import LlmSpec, LlmApiType
17
17
  from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
18
18
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
19
19
  from .setting import get_app_namespace
20
- from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
20
+ from .typing import (
21
+ Int64,
22
+ Float32,
23
+ Float64,
24
+ LocalDateTime,
25
+ OffsetDateTime,
26
+ Range,
27
+ Vector,
28
+ Json,
29
+ )
21
30
 
22
31
  __all__ = [
23
32
  # Submodules
@@ -64,6 +73,7 @@ __all__ = [
64
73
  "ServerSettings",
65
74
  "get_app_namespace",
66
75
  # Typing
76
+ "Int64",
67
77
  "Float32",
68
78
  "Float64",
69
79
  "LocalDateTime",