datachain 0.30.4__tar.gz → 0.30.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (420) hide show
  1. {datachain-0.30.4 → datachain-0.30.5}/PKG-INFO +3 -3
  2. datachain-0.30.5/examples/get_started/nested_datamodel.py +70 -0
  3. {datachain-0.30.4 → datachain-0.30.5}/pyproject.toml +2 -2
  4. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/warehouse.py +2 -2
  5. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/arrow.py +2 -2
  6. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/datachain.py +9 -7
  7. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/model_store.py +12 -0
  8. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/dispatch.py +5 -0
  9. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/base.py +12 -11
  10. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/types.py +8 -13
  11. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/types.py +3 -3
  12. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/utils.py +1 -1
  13. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/PKG-INFO +3 -3
  14. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/SOURCES.txt +1 -0
  15. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/requires.txt +2 -2
  16. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_array.py +82 -4
  17. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_data_storage.py +2 -2
  18. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain.py +29 -0
  19. datachain-0.30.5/tests/unit/sql/sqlite/test_types.py +40 -0
  20. datachain-0.30.4/tests/unit/sql/sqlite/test_types.py +0 -19
  21. {datachain-0.30.4 → datachain-0.30.5}/.cruft.json +0 -0
  22. {datachain-0.30.4 → datachain-0.30.5}/.gitattributes +0 -0
  23. {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  24. {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  25. {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  26. {datachain-0.30.4 → datachain-0.30.5}/.github/codecov.yaml +0 -0
  27. {datachain-0.30.4 → datachain-0.30.5}/.github/dependabot.yml +0 -0
  28. {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/benchmarks.yml +0 -0
  29. {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/release.yml +0 -0
  30. {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/tests-studio.yml +0 -0
  31. {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/tests.yml +0 -0
  32. {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/update-template.yaml +0 -0
  33. {datachain-0.30.4 → datachain-0.30.5}/.gitignore +0 -0
  34. {datachain-0.30.4 → datachain-0.30.5}/.pre-commit-config.yaml +0 -0
  35. {datachain-0.30.4 → datachain-0.30.5}/CODE_OF_CONDUCT.rst +0 -0
  36. {datachain-0.30.4 → datachain-0.30.5}/LICENSE +0 -0
  37. {datachain-0.30.4 → datachain-0.30.5}/README.rst +0 -0
  38. {datachain-0.30.4 → datachain-0.30.5}/docs/assets/captioned_cartoons.png +0 -0
  39. {datachain-0.30.4 → datachain-0.30.5}/docs/assets/datachain-white.svg +0 -0
  40. {datachain-0.30.4 → datachain-0.30.5}/docs/assets/datachain.svg +0 -0
  41. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/login.md +0 -0
  42. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/logout.md +0 -0
  43. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/team.md +0 -0
  44. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/token.md +0 -0
  45. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/index.md +0 -0
  46. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/cancel.md +0 -0
  47. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/clusters.md +0 -0
  48. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/logs.md +0 -0
  49. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/ls.md +0 -0
  50. {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/run.md +0 -0
  51. {datachain-0.30.4 → datachain-0.30.5}/docs/contributing.md +0 -0
  52. {datachain-0.30.4 → datachain-0.30.5}/docs/css/github-permalink-style.css +0 -0
  53. {datachain-0.30.4 → datachain-0.30.5}/docs/examples.md +0 -0
  54. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/db_migrations.md +0 -0
  55. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/delta.md +0 -0
  56. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/env.md +0 -0
  57. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/index.md +0 -0
  58. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/namespaces.md +0 -0
  59. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/processing.md +0 -0
  60. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/remotes.md +0 -0
  61. {datachain-0.30.4 → datachain-0.30.5}/docs/guide/retry.md +0 -0
  62. {datachain-0.30.4 → datachain-0.30.5}/docs/index.md +0 -0
  63. {datachain-0.30.4 → datachain-0.30.5}/docs/overrides/main.html +0 -0
  64. {datachain-0.30.4 → datachain-0.30.5}/docs/quick-start.md +0 -0
  65. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/arrowrow.md +0 -0
  66. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/bbox.md +0 -0
  67. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/file.md +0 -0
  68. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/imagefile.md +0 -0
  69. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/index.md +0 -0
  70. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/pose.md +0 -0
  71. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/segment.md +0 -0
  72. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/tarvfile.md +0 -0
  73. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/textfile.md +0 -0
  74. {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/videofile.md +0 -0
  75. {datachain-0.30.4 → datachain-0.30.5}/docs/references/datachain.md +0 -0
  76. {datachain-0.30.4 → datachain-0.30.5}/docs/references/func.md +0 -0
  77. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/aggregate.md +0 -0
  78. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/array.md +0 -0
  79. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/conditional.md +0 -0
  80. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/numeric.md +0 -0
  81. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/path.md +0 -0
  82. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/random.md +0 -0
  83. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/string.md +0 -0
  84. {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/window.md +0 -0
  85. {datachain-0.30.4 → datachain-0.30.5}/docs/references/index.md +0 -0
  86. {datachain-0.30.4 → datachain-0.30.5}/docs/references/toolkit.md +0 -0
  87. {datachain-0.30.4 → datachain-0.30.5}/docs/references/torch.md +0 -0
  88. {datachain-0.30.4 → datachain-0.30.5}/docs/references/udf.md +0 -0
  89. {datachain-0.30.4 → datachain-0.30.5}/docs/tutorials.md +0 -0
  90. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  91. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  92. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/openimage-detect.py +0 -0
  93. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  94. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  95. {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  96. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/common_sql_functions.py +0 -0
  97. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/json-csv-reader.py +0 -0
  98. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/torch-loader.py +0 -0
  99. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/parallel.py +0 -0
  100. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/simple.py +0 -0
  101. {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/stateful.py +0 -0
  102. {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/delta.py +0 -0
  103. {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/retry.py +0 -0
  104. {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/utils.py +0 -0
  105. {datachain-0.30.4 → datachain-0.30.5}/examples/llm_and_nlp/claude-query.py +0 -0
  106. {datachain-0.30.4 → datachain-0.30.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  107. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/audio-to-text.py +0 -0
  108. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/clip_inference.py +0 -0
  109. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/hf_pipeline.py +0 -0
  110. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  111. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/wds.py +0 -0
  112. {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/wds_filtered.py +0 -0
  113. {datachain-0.30.4 → datachain-0.30.5}/mkdocs.yml +0 -0
  114. {datachain-0.30.4 → datachain-0.30.5}/noxfile.py +0 -0
  115. {datachain-0.30.4 → datachain-0.30.5}/setup.cfg +0 -0
  116. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/__init__.py +0 -0
  117. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/__main__.py +0 -0
  118. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/asyn.py +0 -0
  119. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cache.py +0 -0
  120. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/__init__.py +0 -0
  121. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/catalog.py +0 -0
  122. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/datasource.py +0 -0
  123. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/loader.py +0 -0
  124. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/__init__.py +0 -0
  125. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/__init__.py +0 -0
  126. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/datasets.py +0 -0
  127. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/du.py +0 -0
  128. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/index.py +0 -0
  129. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/ls.py +0 -0
  130. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/misc.py +0 -0
  131. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/query.py +0 -0
  132. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/show.py +0 -0
  133. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/__init__.py +0 -0
  134. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/job.py +0 -0
  135. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/studio.py +0 -0
  136. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/utils.py +0 -0
  137. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/utils.py +0 -0
  138. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/__init__.py +0 -0
  139. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/azure.py +0 -0
  140. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/fileslice.py +0 -0
  141. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/fsspec.py +0 -0
  142. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/gcs.py +0 -0
  143. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/hf.py +0 -0
  144. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/local.py +0 -0
  145. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/s3.py +0 -0
  146. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/config.py +0 -0
  147. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/__init__.py +0 -0
  148. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/db_engine.py +0 -0
  149. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/job.py +0 -0
  150. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/metastore.py +0 -0
  151. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/schema.py +0 -0
  152. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/serializer.py +0 -0
  153. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/sqlite.py +0 -0
  154. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/dataset.py +0 -0
  155. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/delta.py +0 -0
  156. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/diff/__init__.py +0 -0
  157. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/error.py +0 -0
  158. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/__init__.py +0 -0
  159. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/reference.py +0 -0
  160. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/utils.py +0 -0
  161. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/__init__.py +0 -0
  162. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/aggregate.py +0 -0
  163. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/array.py +0 -0
  164. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/base.py +0 -0
  165. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/conditional.py +0 -0
  166. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/func.py +0 -0
  167. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/numeric.py +0 -0
  168. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/path.py +0 -0
  169. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/random.py +0 -0
  170. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/string.py +0 -0
  171. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/window.py +0 -0
  172. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/job.py +0 -0
  173. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/__init__.py +0 -0
  174. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/audio.py +0 -0
  175. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/clip.py +0 -0
  176. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/__init__.py +0 -0
  177. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/flatten.py +0 -0
  178. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  179. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  180. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/unflatten.py +0 -0
  181. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  182. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/data_model.py +0 -0
  183. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dataset_info.py +0 -0
  184. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/__init__.py +0 -0
  185. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/csv.py +0 -0
  186. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/database.py +0 -0
  187. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/datasets.py +0 -0
  188. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/hf.py +0 -0
  189. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/json.py +0 -0
  190. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/listings.py +0 -0
  191. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/pandas.py +0 -0
  192. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/parquet.py +0 -0
  193. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/records.py +0 -0
  194. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/storage.py +0 -0
  195. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/utils.py +0 -0
  196. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/values.py +0 -0
  197. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/file.py +0 -0
  198. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/hf.py +0 -0
  199. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/image.py +0 -0
  200. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/listing.py +0 -0
  201. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/listing_info.py +0 -0
  202. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/meta_formats.py +0 -0
  203. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/namespaces.py +0 -0
  204. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/projects.py +0 -0
  205. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/pytorch.py +0 -0
  206. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/settings.py +0 -0
  207. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/signal_schema.py +0 -0
  208. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/tar.py +0 -0
  209. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/text.py +0 -0
  210. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/udf.py +0 -0
  211. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/udf_signature.py +0 -0
  212. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/utils.py +0 -0
  213. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/video.py +0 -0
  214. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/webdataset.py +0 -0
  215. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/webdataset_laion.py +0 -0
  216. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/listing.py +0 -0
  217. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/__init__.py +0 -0
  218. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/bbox.py +0 -0
  219. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/pose.py +0 -0
  220. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/segment.py +0 -0
  221. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  222. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/bbox.py +0 -0
  223. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/pose.py +0 -0
  224. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/segment.py +0 -0
  225. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/utils.py +0 -0
  226. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/namespace.py +0 -0
  227. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/node.py +0 -0
  228. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/nodes_fetcher.py +0 -0
  229. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/nodes_thread_pool.py +0 -0
  230. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/progress.py +0 -0
  231. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/project.py +0 -0
  232. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/py.typed +0 -0
  233. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/__init__.py +0 -0
  234. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/batch.py +0 -0
  235. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/dataset.py +0 -0
  236. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/metrics.py +0 -0
  237. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/params.py +0 -0
  238. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/queue.py +0 -0
  239. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/schema.py +0 -0
  240. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/session.py +0 -0
  241. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/udf.py +0 -0
  242. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/utils.py +0 -0
  243. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/remote/__init__.py +0 -0
  244. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/remote/studio.py +0 -0
  245. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/script_meta.py +0 -0
  246. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/semver.py +0 -0
  247. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/__init__.py +0 -0
  248. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/default/__init__.py +0 -0
  249. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/default/base.py +0 -0
  250. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/__init__.py +0 -0
  251. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/aggregate.py +0 -0
  252. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/array.py +0 -0
  253. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/conditional.py +0 -0
  254. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/numeric.py +0 -0
  255. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/path.py +0 -0
  256. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/random.py +0 -0
  257. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/string.py +0 -0
  258. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/postgresql_dialect.py +0 -0
  259. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/postgresql_types.py +0 -0
  260. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/selectable.py +0 -0
  261. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  262. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/vector.py +0 -0
  263. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/utils.py +0 -0
  264. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/studio.py +0 -0
  265. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/telemetry.py +0 -0
  266. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/toolkit/__init__.py +0 -0
  267. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/toolkit/split.py +0 -0
  268. {datachain-0.30.4 → datachain-0.30.5}/src/datachain/torch/__init__.py +0 -0
  269. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  270. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/entry_points.txt +0 -0
  271. {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/top_level.txt +0 -0
  272. {datachain-0.30.4 → datachain-0.30.5}/tests/__init__.py +0 -0
  273. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/__init__.py +0 -0
  274. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/conftest.py +0 -0
  275. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  276. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  277. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.gitignore +0 -0
  278. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  279. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_datachain.py +0 -0
  280. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_ls.py +0 -0
  281. {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_version.py +0 -0
  282. {datachain-0.30.4 → datachain-0.30.5}/tests/conftest.py +0 -0
  283. {datachain-0.30.4 → datachain-0.30.5}/tests/data.py +0 -0
  284. {datachain-0.30.4 → datachain-0.30.5}/tests/examples/__init__.py +0 -0
  285. {datachain-0.30.4 → datachain-0.30.5}/tests/examples/test_examples.py +0 -0
  286. {datachain-0.30.4 → datachain-0.30.5}/tests/examples/test_wds_e2e.py +0 -0
  287. {datachain-0.30.4 → datachain-0.30.5}/tests/examples/wds_data.py +0 -0
  288. {datachain-0.30.4 → datachain-0.30.5}/tests/func/__init__.py +0 -0
  289. {datachain-0.30.4 → datachain-0.30.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  290. {datachain-0.30.4 → datachain-0.30.5}/tests/func/data/lena.jpg +0 -0
  291. {datachain-0.30.4 → datachain-0.30.5}/tests/func/fake-service-account-credentials.json +0 -0
  292. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/__init__.py +0 -0
  293. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_aggregate.py +0 -0
  294. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_conditional.py +0 -0
  295. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_numeric.py +0 -0
  296. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_path.py +0 -0
  297. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_random.py +0 -0
  298. {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_string.py +0 -0
  299. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/__init__.py +0 -0
  300. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running-mask0.png +0 -0
  301. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running-mask1.png +0 -0
  302. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running.jpg +0 -0
  303. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/ships.jpg +0 -0
  304. {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/test_yolo.py +0 -0
  305. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_audio.py +0 -0
  306. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_batching.py +0 -0
  307. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_catalog.py +0 -0
  308. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_client.py +0 -0
  309. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_cloud_transfer.py +0 -0
  310. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datachain.py +0 -0
  311. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datachain_merge.py +0 -0
  312. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_dataset_query.py +0 -0
  313. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datasets.py +0 -0
  314. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_delta.py +0 -0
  315. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_feature_pickling.py +0 -0
  316. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_file.py +0 -0
  317. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_hf.py +0 -0
  318. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_hidden_field.py +0 -0
  319. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_image.py +0 -0
  320. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_listing.py +0 -0
  321. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_ls.py +0 -0
  322. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_meta_formats.py +0 -0
  323. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_metastore.py +0 -0
  324. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_metrics.py +0 -0
  325. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_mutate.py +0 -0
  326. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_pull.py +0 -0
  327. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_pytorch.py +0 -0
  328. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_query.py +0 -0
  329. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_database.py +0 -0
  330. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_dataset_remote.py +0 -0
  331. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  332. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_retry.py +0 -0
  333. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_session.py +0 -0
  334. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_studio_datetime_parsing.py +0 -0
  335. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_to_database.py +0 -0
  336. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_toolkit.py +0 -0
  337. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_video.py +0 -0
  338. {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_warehouse.py +0 -0
  339. {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class.py +0 -0
  340. {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_exception.py +0 -0
  341. {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_parallel.py +0 -0
  342. {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  343. {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/name_len_slow.py +0 -0
  344. {datachain-0.30.4 → datachain-0.30.5}/tests/test_atomicity.py +0 -0
  345. {datachain-0.30.4 → datachain-0.30.5}/tests/test_cli_e2e.py +0 -0
  346. {datachain-0.30.4 → datachain-0.30.5}/tests/test_cli_studio.py +0 -0
  347. {datachain-0.30.4 → datachain-0.30.5}/tests/test_import_time.py +0 -0
  348. {datachain-0.30.4 → datachain-0.30.5}/tests/test_query_e2e.py +0 -0
  349. {datachain-0.30.4 → datachain-0.30.5}/tests/test_telemetry.py +0 -0
  350. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/__init__.py +0 -0
  351. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/__init__.py +0 -0
  352. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/conftest.py +0 -0
  353. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_arrow.py +0 -0
  354. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_audio.py +0 -0
  355. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_clip.py +0 -0
  356. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  357. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  358. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_diff.py +0 -0
  359. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_feature.py +0 -0
  360. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_feature_utils.py +0 -0
  361. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_file.py +0 -0
  362. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_hf.py +0 -0
  363. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_image.py +0 -0
  364. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_listing_info.py +0 -0
  365. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_namespace.py +0 -0
  366. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_partition_by.py +0 -0
  367. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_project.py +0 -0
  368. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  369. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_schema.py +0 -0
  370. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_settings.py +0 -0
  371. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_signal_schema.py +0 -0
  372. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  373. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_text.py +0 -0
  374. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_udf.py +0 -0
  375. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_udf_signature.py +0 -0
  376. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_utils.py +0 -0
  377. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_webdataset.py +0 -0
  378. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/__init__.py +0 -0
  379. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_bbox.py +0 -0
  380. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_pose.py +0 -0
  381. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_segment.py +0 -0
  382. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_utils.py +0 -0
  383. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/__init__.py +0 -0
  384. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  385. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  386. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_array.py +0 -0
  387. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_conditional.py +0 -0
  388. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_path.py +0 -0
  389. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_random.py +0 -0
  390. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_selectable.py +0 -0
  391. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_string.py +0 -0
  392. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_asyn.py +0 -0
  393. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_cache.py +0 -0
  394. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_catalog.py +0 -0
  395. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_catalog_loader.py +0 -0
  396. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_cli_parsing.py +0 -0
  397. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client.py +0 -0
  398. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client_gcs.py +0 -0
  399. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client_s3.py +0 -0
  400. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_config.py +0 -0
  401. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_data_storage.py +0 -0
  402. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_database_engine.py +0 -0
  403. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_dataset.py +0 -0
  404. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_dispatch.py +0 -0
  405. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_fileslice.py +0 -0
  406. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_func.py +0 -0
  407. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_listing.py +0 -0
  408. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_metastore.py +0 -0
  409. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_module_exports.py +0 -0
  410. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_pytorch.py +0 -0
  411. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query.py +0 -0
  412. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query_metrics.py +0 -0
  413. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query_params.py +0 -0
  414. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_script_meta.py +0 -0
  415. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_semver.py +0 -0
  416. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_serializer.py +0 -0
  417. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_session.py +0 -0
  418. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_utils.py +0 -0
  419. {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_warehouse.py +0 -0
  420. {datachain-0.30.4 → datachain-0.30.5}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.4
3
+ Version: 0.30.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
22
22
  Requires-Dist: tqdm
23
23
  Requires-Dist: numpy<3,>=1
24
24
  Requires-Dist: pandas>=2.0.0
25
+ Requires-Dist: ujson>=5.10.0
25
26
  Requires-Dist: packaging
26
27
  Requires-Dist: pyarrow
27
28
  Requires-Dist: typing-extensions
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
38
39
  Requires-Dist: sqlalchemy>=2
39
40
  Requires-Dist: multiprocess==0.70.16
40
41
  Requires-Dist: cloudpickle
41
- Requires-Dist: orjson>=3.10.5
42
42
  Requires-Dist: pydantic
43
43
  Requires-Dist: jmespath>=1.0
44
44
  Requires-Dist: datamodel-code-generator>=0.25
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
92
92
  Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
93
93
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
94
94
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
95
- Requires-Dist: pytest-env>=1.1.0; extra == "tests"
95
+ Requires-Dist: pytest-dotenv; extra == "tests"
96
96
  Requires-Dist: virtualenv; extra == "tests"
97
97
  Requires-Dist: dulwich; extra == "tests"
98
98
  Requires-Dist: hypothesis; extra == "tests"
@@ -0,0 +1,70 @@
1
+ """Example: Nested DataModels with parallel execution.
2
+
3
+ Demonstrates mapping a function that returns a nested DataModel (a DataModel
4
+ containing other DataModels).
5
+
6
+ The example keeps things minimal: we persist a tiny dataset, run a parallel map
7
+ that returns a nested DataModel, and display the result.
8
+ """
9
+
10
+ from typing import Optional
11
+
12
+ from pydantic import Field
13
+
14
+ import datachain as dc
15
+
16
+
17
+ class Metric(dc.DataModel):
18
+ """Represents a single computed metric with quality metadata."""
19
+
20
+ value: Optional[float] = Field(default=None, description="Computed metric value")
21
+ confidence: Optional[float] = Field(
22
+ default=None, description="Confidence / quality score"
23
+ )
24
+ status: Optional[str] = Field(default=None, description="Processing status label")
25
+ metric_error: Optional[str] = Field(
26
+ default=None, description="Error message if metric computation failed"
27
+ )
28
+
29
+
30
+ class SampleMetrics(dc.DataModel):
31
+ """Container for two illustrative nested metrics.
32
+
33
+ Each sub-field is its own DataModel instance to demonstrate nested schemas
34
+ """
35
+
36
+ metric_primary: Metric = Field(
37
+ default_factory=lambda: Metric(), description="Primary metric"
38
+ )
39
+ metric_secondary: Metric = Field(
40
+ default_factory=lambda: Metric(), description="Secondary metric"
41
+ )
42
+
43
+
44
+ def generate_sample_metrics() -> SampleMetrics:
45
+ """Synthesize a pair of metrics.
46
+
47
+ In real scenarios you'd compute these values; here we just return constants
48
+ to keep the example deterministic.
49
+ """
50
+
51
+ return SampleMetrics(
52
+ metric_primary=Metric(value=50.0, confidence=0.95, status="ok"),
53
+ )
54
+
55
+
56
+ def main():
57
+ (
58
+ dc.read_values(record_id=[1, 2])
59
+ .settings(parallel=2) # Keep it parallel to test serialization
60
+ .map(metrics=generate_sample_metrics)
61
+ .save("nested_datamodel")
62
+ )
63
+
64
+ dc.read_dataset("nested_datamodel").show()
65
+
66
+ print(dc.read_dataset("nested_datamodel").to_values("metrics"))
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()
@@ -26,6 +26,7 @@ dependencies = [
26
26
  "tqdm",
27
27
  "numpy>=1,<3",
28
28
  "pandas>=2.0.0",
29
+ "ujson>=5.10.0",
29
30
  "packaging",
30
31
  "pyarrow",
31
32
  "typing-extensions",
@@ -42,7 +43,6 @@ dependencies = [
42
43
  "sqlalchemy>=2",
43
44
  "multiprocess==0.70.16",
44
45
  "cloudpickle",
45
- "orjson>=3.10.5",
46
46
  "pydantic",
47
47
  "jmespath>=1.0",
48
48
  "datamodel-code-generator>=0.25",
@@ -108,7 +108,7 @@ tests = [
108
108
  "pytest-servers[all]>=0.5.9",
109
109
  "pytest-benchmark[histogram]",
110
110
  "pytest-xdist>=3.3.1",
111
- "pytest-env>=1.1.0",
111
+ "pytest-dotenv",
112
112
  "virtualenv",
113
113
  "dulwich",
114
114
  "hypothesis",
@@ -1,5 +1,4 @@
1
1
  import glob
2
- import json
3
2
  import logging
4
3
  import posixpath
5
4
  import random
@@ -11,6 +10,7 @@ from urllib.parse import urlparse
11
10
 
12
11
  import attrs
13
12
  import sqlalchemy as sa
13
+ import ujson as json
14
14
  from sqlalchemy.sql.expression import true
15
15
 
16
16
  from datachain.client import Client
@@ -122,7 +122,7 @@ class AbstractWarehouse(ABC, Serializable):
122
122
  if value_type is str:
123
123
  return val
124
124
  if value_type in (dict, list):
125
- return json.dumps(val)
125
+ return json.dumps(val, ensure_ascii=False)
126
126
  raise ValueError(
127
127
  f"Cannot convert value {val!r} with type {value_type} to JSON"
128
128
  )
@@ -2,8 +2,8 @@ from collections.abc import Sequence
2
2
  from itertools import islice
3
3
  from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
- import orjson
6
5
  import pyarrow as pa
6
+ import ujson as json
7
7
  from pyarrow._csv import ParseOptions
8
8
  from pyarrow.dataset import CsvFileFormat, dataset
9
9
  from tqdm.auto import tqdm
@@ -269,7 +269,7 @@ def _get_hf_schema(
269
269
  def _get_datachain_schema(schema: "pa.Schema") -> Optional[SignalSchema]:
270
270
  """Return a restored SignalSchema from parquet metadata, if any is found."""
271
271
  if schema.metadata and DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY in schema.metadata:
272
- serialized_signal_schema = orjson.loads(
272
+ serialized_signal_schema = json.loads(
273
273
  schema.metadata[DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY]
274
274
  )
275
275
  return SignalSchema.deserialize(serialized_signal_schema)
@@ -19,8 +19,8 @@ from typing import (
19
19
  overload,
20
20
  )
21
21
 
22
- import orjson
23
22
  import sqlalchemy
23
+ import ujson as json
24
24
  from pydantic import BaseModel
25
25
  from sqlalchemy.sql.elements import ColumnElement
26
26
  from tqdm import tqdm
@@ -462,8 +462,6 @@ class DataChain:
462
462
  Returns:
463
463
  DataChain: A new DataChain instance with the new set of columns.
464
464
  """
465
- import json
466
-
467
465
  import pyarrow as pa
468
466
 
469
467
  from datachain.lib.arrow import schema_to_output
@@ -2129,9 +2127,9 @@ class DataChain:
2129
2127
  fsspec_fs = client.create_fs(**fs_kwargs)
2130
2128
 
2131
2129
  _partition_cols = list(partition_cols) if partition_cols else None
2132
- signal_schema_metadata = orjson.dumps(
2133
- self._effective_signals_schema.serialize()
2134
- )
2130
+ signal_schema_metadata = json.dumps(
2131
+ self._effective_signals_schema.serialize(), ensure_ascii=False
2132
+ ).encode("utf-8")
2135
2133
 
2136
2134
  column_names, column_chunks = self.to_columnar_data_with_names(chunk_size)
2137
2135
 
@@ -2278,7 +2276,11 @@ class DataChain:
2278
2276
  f.write(b"\n")
2279
2277
  else:
2280
2278
  is_first = False
2281
- f.write(orjson.dumps(row_to_nested_dict(headers, row)))
2279
+ f.write(
2280
+ json.dumps(
2281
+ row_to_nested_dict(headers, row), ensure_ascii=False
2282
+ ).encode("utf-8")
2283
+ )
2282
2284
  if include_outer_list:
2283
2285
  # This makes the file JSON instead of JSON lines.
2284
2286
  f.write(b"\n]\n")
@@ -89,3 +89,15 @@ class ModelStore:
89
89
  and ModelStore.is_pydantic(parent_type)
90
90
  and "@" in ModelStore.get_name(parent_type)
91
91
  )
92
+
93
+ @classmethod
94
+ def rebuild_all(cls) -> None:
95
+ """Ensure pydantic schemas are (re)built for all registered models.
96
+
97
+ Uses ``force=True`` to avoid subtle cases where a deserialized class
98
+ (e.g. from by-value cloudpickle in workers) reports built state but
99
+ nested model field schemas aren't fully resolved yet.
100
+ """
101
+ for versions in cls.store.values():
102
+ for model in versions.values():
103
+ model.model_rebuild(force=True)
@@ -13,6 +13,7 @@ from multiprocess import get_context
13
13
  from datachain.catalog import Catalog
14
14
  from datachain.catalog.catalog import clone_catalog_with_cache
15
15
  from datachain.catalog.loader import DISTRIBUTED_IMPORT_PATH, get_udf_distributor_class
16
+ from datachain.lib.model_store import ModelStore
16
17
  from datachain.lib.udf import _get_cache
17
18
  from datachain.query.dataset import (
18
19
  get_download_callback,
@@ -130,6 +131,8 @@ class UDFDispatcher:
130
131
 
131
132
  def _create_worker(self) -> "UDFWorker":
132
133
  udf: UDFAdapter = loads(self.udf_data)
134
+ # Ensure all registered DataModels have rebuilt schemas in worker processes.
135
+ ModelStore.rebuild_all()
133
136
  return UDFWorker(
134
137
  self.catalog,
135
138
  udf,
@@ -196,6 +199,8 @@ class UDFDispatcher:
196
199
  generated_cb: Callback = DEFAULT_CALLBACK,
197
200
  ) -> None:
198
201
  udf: UDFAdapter = loads(self.udf_data)
202
+ # Rebuild schemas in single process too for consistency (cheap, idempotent).
203
+ ModelStore.rebuild_all()
199
204
 
200
205
  if ids_only and not self.is_batching:
201
206
  input_rows = flatten(input_rows)
@@ -8,8 +8,8 @@ from functools import cache
8
8
  from types import MappingProxyType
9
9
  from typing import Callable, Optional
10
10
 
11
- import orjson
12
11
  import sqlalchemy as sa
12
+ import ujson as json
13
13
  from sqlalchemy.dialects import sqlite
14
14
  from sqlalchemy.ext.compiler import compiles
15
15
  from sqlalchemy.sql.elements import literal
@@ -182,7 +182,7 @@ def missing_vector_function(name, exc):
182
182
 
183
183
 
184
184
  def sqlite_string_split(string: str, sep: str, maxsplit: int = -1) -> str:
185
- return orjson.dumps(string.split(sep, maxsplit)).decode("utf-8")
185
+ return json.dumps(string.split(sep, maxsplit), ensure_ascii=False)
186
186
 
187
187
 
188
188
  def sqlite_int_hash_64(x: int) -> int:
@@ -453,17 +453,17 @@ def compile_byte_hamming_distance(element, compiler, **kwargs):
453
453
 
454
454
 
455
455
  def py_json_array_length(arr):
456
- return len(orjson.loads(arr))
456
+ return len(json.loads(arr))
457
457
 
458
458
 
459
459
  def py_json_array_contains(arr, value, is_json):
460
460
  if is_json:
461
- value = orjson.loads(value)
462
- return value in orjson.loads(arr)
461
+ value = json.loads(value)
462
+ return value in json.loads(arr)
463
463
 
464
464
 
465
465
  def py_json_array_get_element(val, idx):
466
- arr = orjson.loads(val)
466
+ arr = json.loads(val)
467
467
  try:
468
468
  return arr[idx]
469
469
  except IndexError:
@@ -471,17 +471,18 @@ def py_json_array_get_element(val, idx):
471
471
 
472
472
 
473
473
  def py_json_array_slice(val, offset: int, length: Optional[int] = None):
474
- arr = orjson.loads(val)
474
+ arr = json.loads(val)
475
475
  try:
476
- return orjson.dumps(
477
- list(arr[offset : offset + length] if length is not None else arr[offset:])
478
- ).decode("utf-8")
476
+ return json.dumps(
477
+ list(arr[offset : offset + length] if length is not None else arr[offset:]),
478
+ ensure_ascii=False,
479
+ )
479
480
  except IndexError:
480
481
  return None
481
482
 
482
483
 
483
484
  def py_json_array_join(val, sep: str):
484
- return sep.join(orjson.loads(val))
485
+ return sep.join(json.loads(val))
485
486
 
486
487
 
487
488
  def compile_array_get_element(element, compiler, **kwargs):
@@ -1,6 +1,6 @@
1
1
  import sqlite3
2
2
 
3
- import orjson
3
+ import ujson as json
4
4
  from sqlalchemy import types
5
5
 
6
6
  from datachain.sql.types import TypeConverter, TypeReadConverter
@@ -28,26 +28,21 @@ class Array(types.UserDefinedType):
28
28
 
29
29
 
30
30
  def adapt_array(arr):
31
- return orjson.dumps(arr).decode("utf-8")
31
+ return json.dumps(arr, ensure_ascii=False)
32
32
 
33
33
 
34
34
  def adapt_dict(dct):
35
- return orjson.dumps(dct).decode("utf-8")
35
+ return json.dumps(dct, ensure_ascii=False)
36
36
 
37
37
 
38
38
  def convert_array(arr):
39
- return orjson.loads(arr)
39
+ return json.loads(arr)
40
40
 
41
41
 
42
42
  def adapt_np_array(arr):
43
- def _json_serialize(obj):
44
- if isinstance(obj, np.ndarray):
45
- return obj.tolist()
46
- return obj
47
-
48
- return orjson.dumps(
49
- arr, option=orjson.OPT_SERIALIZE_NUMPY, default=_json_serialize
50
- ).decode("utf-8")
43
+ # Primarily needed for UDF numpy results (e.g. WDS)
44
+ # tolist() gives nested Python lists + native scalars; ujson.dumps handles NaN/Inf.
45
+ return json.dumps(arr.tolist(), ensure_ascii=False)
51
46
 
52
47
 
53
48
  def adapt_np_generic(val):
@@ -74,5 +69,5 @@ class SQLiteTypeConverter(TypeConverter):
74
69
  class SQLiteTypeReadConverter(TypeReadConverter):
75
70
  def array(self, value, item_type, dialect):
76
71
  if isinstance(value, str):
77
- value = orjson.loads(value)
72
+ value = json.loads(value)
78
73
  return super().array(value, item_type, dialect)
@@ -16,8 +16,8 @@ from datetime import datetime
16
16
  from types import MappingProxyType
17
17
  from typing import Any, Union
18
18
 
19
- import orjson
20
19
  import sqlalchemy as sa
20
+ import ujson as jsonlib
21
21
  from sqlalchemy import TypeDecorator, types
22
22
 
23
23
  from datachain.lib.data_model import StandardType
@@ -352,7 +352,7 @@ class Array(SQLType):
352
352
  def on_read_convert(self, value, dialect):
353
353
  r = read_converter(dialect).array(value, self.item_type, dialect)
354
354
  if isinstance(self.item_type, JSON):
355
- r = [orjson.loads(item) if isinstance(item, str) else item for item in r]
355
+ r = [jsonlib.loads(item) if isinstance(item, str) else item for item in r]
356
356
  return r
357
357
 
358
358
 
@@ -466,7 +466,7 @@ class TypeReadConverter:
466
466
  if isinstance(value, str):
467
467
  if value == "":
468
468
  return {}
469
- return orjson.loads(value)
469
+ return jsonlib.loads(value)
470
470
  return value
471
471
 
472
472
  def datetime(self, value):
@@ -417,7 +417,7 @@ class JSONSerialize(json.JSONEncoder):
417
417
 
418
418
  def inside_colab() -> bool:
419
419
  try:
420
- from google import colab # noqa: F401
420
+ from google import colab # type: ignore[attr-defined] # noqa: F401
421
421
  except ImportError:
422
422
  return False
423
423
  return True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.4
3
+ Version: 0.30.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
22
22
  Requires-Dist: tqdm
23
23
  Requires-Dist: numpy<3,>=1
24
24
  Requires-Dist: pandas>=2.0.0
25
+ Requires-Dist: ujson>=5.10.0
25
26
  Requires-Dist: packaging
26
27
  Requires-Dist: pyarrow
27
28
  Requires-Dist: typing-extensions
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
38
39
  Requires-Dist: sqlalchemy>=2
39
40
  Requires-Dist: multiprocess==0.70.16
40
41
  Requires-Dist: cloudpickle
41
- Requires-Dist: orjson>=3.10.5
42
42
  Requires-Dist: pydantic
43
43
  Requires-Dist: jmespath>=1.0
44
44
  Requires-Dist: datamodel-code-generator>=0.25
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
92
92
  Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
93
93
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
94
94
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
95
- Requires-Dist: pytest-env>=1.1.0; extra == "tests"
95
+ Requires-Dist: pytest-dotenv; extra == "tests"
96
96
  Requires-Dist: virtualenv; extra == "tests"
97
97
  Requires-Dist: dulwich; extra == "tests"
98
98
  Requires-Dist: hypothesis; extra == "tests"
@@ -78,6 +78,7 @@ examples/computer_vision/ultralytics-pose.py
78
78
  examples/computer_vision/ultralytics-segment.py
79
79
  examples/get_started/common_sql_functions.py
80
80
  examples/get_started/json-csv-reader.py
81
+ examples/get_started/nested_datamodel.py
81
82
  examples/get_started/torch-loader.py
82
83
  examples/get_started/udfs/parallel.py
83
84
  examples/get_started/udfs/simple.py
@@ -3,6 +3,7 @@ tomlkit
3
3
  tqdm
4
4
  numpy<3,>=1
5
5
  pandas>=2.0.0
6
+ ujson>=5.10.0
6
7
  packaging
7
8
  pyarrow
8
9
  typing-extensions
@@ -19,7 +20,6 @@ shtab<2,>=1.3.4
19
20
  sqlalchemy>=2
20
21
  multiprocess==0.70.16
21
22
  cloudpickle
22
- orjson>=3.10.5
23
23
  pydantic
24
24
  jmespath>=1.0
25
25
  datamodel-code-generator>=0.25
@@ -92,7 +92,7 @@ pytest-mock>=3.12.0
92
92
  pytest-servers[all]>=0.5.9
93
93
  pytest-benchmark[histogram]
94
94
  pytest-xdist>=3.3.1
95
- pytest-env>=1.1.0
95
+ pytest-dotenv
96
96
  virtualenv
97
97
  dulwich
98
98
  hypothesis
@@ -356,26 +356,32 @@ def test_array_contains(test_session):
356
356
 
357
357
  ds = list(
358
358
  dc.read_values(
359
- id=(1, 2, 3),
359
+ id=(1, 2, 3, 4),
360
360
  arr=(
361
361
  Arr(i=[10, 20, 30], f=[1.0, 2.0, 3.0], s=["a", "b", "c"]),
362
362
  Arr(i=[40, 50, 60], f=[4.0, 5.0, 6.0], s=["d", "e", "f"]),
363
363
  Arr(i=[50], f=[5.0], s=["g"]),
364
+ # New row with NaN/Inf values for testing
365
+ Arr(i=[100], f=[float("nan"), float("inf"), float("-inf")], s=["h"]),
364
366
  ),
365
367
  ii=(
366
368
  [20, 30, 50, 80],
367
369
  [10],
368
370
  [],
371
+ [200],
369
372
  ),
370
373
  ff=(
371
374
  [2.0, 3.0, 5.0, 7.0],
372
375
  [4.0],
373
376
  [],
377
+ # Test array with special float values
378
+ [float("inf"), float("-inf"), 1.5],
374
379
  ),
375
380
  ss=(
376
381
  ["b", "c", "e", "f"],
377
382
  ["d"],
378
383
  [],
384
+ ["i"],
379
385
  ),
380
386
  session=test_session,
381
387
  )
@@ -395,6 +401,14 @@ def test_array_contains(test_session):
395
401
  t13=func.array.contains([1, 2, 3, 4, 5], 3),
396
402
  t14=func.array.contains([1, 2, 3, 4, 5], 7),
397
403
  t15=func.array.contains([], 1),
404
+ # Test NaN/Inf handling with contains
405
+ t16=func.array.contains("arr.f", float("inf")), # Should find inf in row 4
406
+ # Should find -inf in row 4
407
+ t17=func.array.contains("arr.f", float("-inf")),
408
+ # Should NOT find nan (NaN != NaN)
409
+ t18=func.array.contains("arr.f", float("nan")),
410
+ t19=func.array.contains("ff", float("inf")), # Should find inf in row 4
411
+ t20=func.array.contains("ff", float("-inf")), # Should find -inf in row 4
398
412
  )
399
413
  .order_by("id")
400
414
  .to_list(
@@ -413,11 +427,75 @@ def test_array_contains(test_session):
413
427
  "t13",
414
428
  "t14",
415
429
  "t15",
430
+ "t16",
431
+ "t17",
432
+ "t18",
433
+ "t19",
434
+ "t20",
416
435
  )
417
436
  )
418
437
 
419
438
  assert ds == [
420
- (1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0),
421
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
422
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
439
+ # Row 1: Regular values
440
+ (1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0),
441
+ # Row 2: Regular values
442
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
443
+ # Row 3: Regular values
444
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
445
+ # Row 4: Contains NaN/Inf values - inf/-inf should be found, NaN should not
446
+ (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1),
423
447
  ]
448
+
449
+
450
+ def test_array_functions_with_nan_inf(test_session):
451
+ class ArrWithSpecial(dc.DataModel):
452
+ f: list[float] # Will contain NaN and Infinity values
453
+
454
+ ds = list(
455
+ dc.read_values(
456
+ id=(1, 2, 3),
457
+ arr=(
458
+ ArrWithSpecial(f=[1.0, float("nan"), 3.0]),
459
+ ArrWithSpecial(f=[float("inf"), 2.0, float("-inf")]),
460
+ ArrWithSpecial(f=[float("nan"), float("inf")]),
461
+ ),
462
+ special_floats=(
463
+ [1.0, float("nan"), float("inf")],
464
+ [float("-inf"), 2.0],
465
+ [float("nan")],
466
+ ),
467
+ session=test_session,
468
+ )
469
+ .mutate(
470
+ # Test array.length with NaN/INF arrays
471
+ len1=func.array.length("arr.f"),
472
+ len2=func.array.length("special_floats"),
473
+ # Test array.slice with NaN/INF arrays
474
+ slice1=func.array.slice("arr.f", 0, 2),
475
+ slice2=func.array.slice("special_floats", 1),
476
+ # Test array.get_element with NaN/INF arrays
477
+ elem1=func.array.get_element("arr.f", 0),
478
+ elem2=func.array.get_element("special_floats", 0),
479
+ )
480
+ .order_by("id")
481
+ .to_list("len1", "len2", "slice1", "slice2", "elem1", "elem2")
482
+ )
483
+
484
+ # Verify lengths are correct
485
+ assert ds[0][0] == 3 # [1.0, nan, 3.0]
486
+ assert ds[0][1] == 3 # [1.0, nan, inf]
487
+ assert ds[1][0] == 3 # [inf, 2.0, -inf]
488
+ assert ds[1][1] == 2 # [-inf, 2.0]
489
+ assert ds[2][0] == 2 # [nan, inf]
490
+ assert ds[2][1] == 1 # [nan]
491
+
492
+ # Verify slices preserve NaN/INF
493
+ assert len(ds[0][2]) == 2 # slice of [1.0, nan, 3.0]
494
+ assert ds[0][2][0] == 1.0
495
+ assert math.isnan(ds[0][2][1])
496
+
497
+ # Verify get_element preserves NaN/INF
498
+ assert ds[0][4] == 1.0 # arr.f[0] for first row
499
+ # special_floats[0] for second row (-inf)
500
+ assert math.isinf(ds[1][5]) and ds[1][5] < 0
501
+ assert ds[1][4] == float("inf") # arr.f[0] for second row
@@ -135,8 +135,8 @@ def test_convert_type(cloud_test_catalog):
135
135
 
136
136
  # JSON Tests
137
137
  assert run_convert_type('{"a": 1}', JSON()) == '{"a": 1}'
138
- assert run_convert_type({"a": 1}, JSON()) == '{"a": 1}'
139
- assert run_convert_type([{"a": 1}], JSON()) == '[{"a": 1}]'
138
+ assert run_convert_type({"a": 1}, JSON()) == '{"a":1}'
139
+ assert run_convert_type([{"a": 1}], JSON()) == '[{"a":1}]'
140
140
  with pytest.raises(ValueError):
141
141
  run_convert_type(0.5, JSON())
142
142
 
@@ -3230,6 +3230,35 @@ def test_read_csv_nan_inf(tmp_dir, test_session):
3230
3230
  assert any(r for r in res if np.isneginf(r))
3231
3231
 
3232
3232
 
3233
+ def test_dicts_nan_inf(test_session):
3234
+ metrics_data = [
3235
+ {"accuracy": 0.95, "loss": 0.1, "precision": 0.92},
3236
+ {"accuracy": float("nan"), "loss": float("inf"), "precision": 0.88},
3237
+ {"accuracy": 0.87, "loss": float("-inf"), "precision": float("nan")},
3238
+ ]
3239
+
3240
+ dc.read_values(
3241
+ id=[1, 2, 3],
3242
+ metrics=metrics_data,
3243
+ session=test_session,
3244
+ ).save("test_dicts_nan_inf")
3245
+
3246
+ res = dc.read_dataset("test_dicts_nan_inf").order_by("id").to_values("metrics")
3247
+ assert len(res) == 3
3248
+
3249
+ assert res[0]["accuracy"] == 0.95
3250
+ assert res[0]["loss"] == 0.1
3251
+ assert res[0]["precision"] == 0.92
3252
+
3253
+ assert math.isnan(res[1]["accuracy"])
3254
+ assert math.isinf(res[1]["loss"]) and res[1]["loss"] > 0
3255
+ assert res[1]["precision"] == 0.88
3256
+
3257
+ assert res[2]["accuracy"] == 0.87
3258
+ assert math.isinf(res[2]["loss"]) and res[2]["loss"] < 0
3259
+ assert math.isnan(res[2]["precision"])
3260
+
3261
+
3233
3262
  def test_group_by_int(test_session):
3234
3263
  from datachain import func
3235
3264