datachain 0.18.3__tar.gz → 0.18.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (382) hide show
  1. {datachain-0.18.3 → datachain-0.18.5}/.github/codecov.yaml +7 -0
  2. {datachain-0.18.3 → datachain-0.18.5}/.pre-commit-config.yaml +1 -1
  3. {datachain-0.18.3/src/datachain.egg-info → datachain-0.18.5}/PKG-INFO +3 -3
  4. {datachain-0.18.3 → datachain-0.18.5}/pyproject.toml +2 -2
  5. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/catalog.py +36 -22
  6. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/azure.py +1 -1
  7. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/gcs.py +1 -1
  8. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/s3.py +5 -3
  9. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/metastore.py +87 -42
  10. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/dataset.py +1 -1
  11. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/aggregate.py +64 -38
  12. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/array.py +102 -73
  13. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/conditional.py +71 -51
  14. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/func.py +1 -1
  15. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/numeric.py +55 -36
  16. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/path.py +32 -20
  17. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/random.py +2 -2
  18. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/string.py +59 -37
  19. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/window.py +7 -8
  20. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/datachain.py +9 -0
  21. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/listing.py +2 -3
  22. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/bbox.py +6 -4
  23. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/pose.py +6 -4
  24. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/segment.py +6 -4
  25. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/remote/studio.py +4 -2
  26. {datachain-0.18.3 → datachain-0.18.5/src/datachain.egg-info}/PKG-INFO +3 -3
  27. {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/SOURCES.txt +9 -1
  28. {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/requires.txt +2 -2
  29. datachain-0.18.5/tests/func/functions/test_aggregate.py +173 -0
  30. datachain-0.18.5/tests/func/functions/test_array.py +424 -0
  31. datachain-0.18.5/tests/func/functions/test_conditional.py +178 -0
  32. datachain-0.18.5/tests/func/functions/test_numeric.py +127 -0
  33. datachain-0.18.5/tests/func/functions/test_path.py +108 -0
  34. datachain-0.18.5/tests/func/functions/test_random.py +20 -0
  35. datachain-0.18.5/tests/func/functions/test_string.py +213 -0
  36. {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/test_yolo.py +73 -0
  37. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_catalog.py +1 -1
  38. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_client.py +6 -0
  39. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datachain.py +6 -9
  40. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_dataset_query.py +4 -12
  41. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datasets.py +2 -2
  42. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_listing.py +2 -8
  43. datachain-0.18.5/tests/func/test_metastore.py +889 -0
  44. datachain-0.18.5/tests/unit/sql/sqlite/__init__.py +0 -0
  45. datachain-0.18.3/tests/func/test_func.py +0 -223
  46. {datachain-0.18.3 → datachain-0.18.5}/.cruft.json +0 -0
  47. {datachain-0.18.3 → datachain-0.18.5}/.gitattributes +0 -0
  48. {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  49. {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  50. {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  51. {datachain-0.18.3 → datachain-0.18.5}/.github/dependabot.yml +0 -0
  52. {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/benchmarks.yml +0 -0
  53. {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/release.yml +0 -0
  54. {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/tests-studio.yml +0 -0
  55. {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/tests.yml +0 -0
  56. {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/update-template.yaml +0 -0
  57. {datachain-0.18.3 → datachain-0.18.5}/.gitignore +0 -0
  58. {datachain-0.18.3 → datachain-0.18.5}/CODE_OF_CONDUCT.rst +0 -0
  59. {datachain-0.18.3 → datachain-0.18.5}/LICENSE +0 -0
  60. {datachain-0.18.3 → datachain-0.18.5}/README.rst +0 -0
  61. {datachain-0.18.3 → datachain-0.18.5}/docs/assets/captioned_cartoons.png +0 -0
  62. {datachain-0.18.3 → datachain-0.18.5}/docs/assets/datachain-white.svg +0 -0
  63. {datachain-0.18.3 → datachain-0.18.5}/docs/assets/datachain.svg +0 -0
  64. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/login.md +0 -0
  65. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/logout.md +0 -0
  66. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/team.md +0 -0
  67. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/token.md +0 -0
  68. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/index.md +0 -0
  69. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/cancel.md +0 -0
  70. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/logs.md +0 -0
  71. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/ls.md +0 -0
  72. {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/run.md +0 -0
  73. {datachain-0.18.3 → datachain-0.18.5}/docs/contributing.md +0 -0
  74. {datachain-0.18.3 → datachain-0.18.5}/docs/css/github-permalink-style.css +0 -0
  75. {datachain-0.18.3 → datachain-0.18.5}/docs/examples.md +0 -0
  76. {datachain-0.18.3 → datachain-0.18.5}/docs/index.md +0 -0
  77. {datachain-0.18.3 → datachain-0.18.5}/docs/overrides/main.html +0 -0
  78. {datachain-0.18.3 → datachain-0.18.5}/docs/quick-start.md +0 -0
  79. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/arrowrow.md +0 -0
  80. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/bbox.md +0 -0
  81. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/file.md +0 -0
  82. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/imagefile.md +0 -0
  83. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/index.md +0 -0
  84. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/pose.md +0 -0
  85. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/segment.md +0 -0
  86. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/tarvfile.md +0 -0
  87. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/textfile.md +0 -0
  88. {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/videofile.md +0 -0
  89. {datachain-0.18.3 → datachain-0.18.5}/docs/references/datachain.md +0 -0
  90. {datachain-0.18.3 → datachain-0.18.5}/docs/references/func.md +0 -0
  91. {datachain-0.18.3 → datachain-0.18.5}/docs/references/index.md +0 -0
  92. {datachain-0.18.3 → datachain-0.18.5}/docs/references/remotes.md +0 -0
  93. {datachain-0.18.3 → datachain-0.18.5}/docs/references/toolkit.md +0 -0
  94. {datachain-0.18.3 → datachain-0.18.5}/docs/references/torch.md +0 -0
  95. {datachain-0.18.3 → datachain-0.18.5}/docs/references/udf.md +0 -0
  96. {datachain-0.18.3 → datachain-0.18.5}/docs/tutorials.md +0 -0
  97. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  98. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  99. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/openimage-detect.py +0 -0
  100. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  101. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  102. {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  103. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/common_sql_functions.py +0 -0
  104. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/json-csv-reader.py +0 -0
  105. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/torch-loader.py +0 -0
  106. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/parallel.py +0 -0
  107. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/simple.py +0 -0
  108. {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/stateful.py +0 -0
  109. {datachain-0.18.3 → datachain-0.18.5}/examples/incremental_processing/delta.py +0 -0
  110. {datachain-0.18.3 → datachain-0.18.5}/examples/incremental_processing/utils.py +0 -0
  111. {datachain-0.18.3 → datachain-0.18.5}/examples/llm_and_nlp/claude-query.py +0 -0
  112. {datachain-0.18.3 → datachain-0.18.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  113. {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/clip_inference.py +0 -0
  114. {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/hf_pipeline.py +0 -0
  115. {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  116. {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/wds.py +0 -0
  117. {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/wds_filtered.py +0 -0
  118. {datachain-0.18.3 → datachain-0.18.5}/mkdocs.yml +0 -0
  119. {datachain-0.18.3 → datachain-0.18.5}/noxfile.py +0 -0
  120. {datachain-0.18.3 → datachain-0.18.5}/setup.cfg +0 -0
  121. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/__init__.py +0 -0
  122. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/__main__.py +0 -0
  123. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/asyn.py +0 -0
  124. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cache.py +0 -0
  125. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/__init__.py +0 -0
  126. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/datasource.py +0 -0
  127. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/loader.py +0 -0
  128. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/__init__.py +0 -0
  129. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/__init__.py +0 -0
  130. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/datasets.py +0 -0
  131. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/du.py +0 -0
  132. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/index.py +0 -0
  133. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/ls.py +0 -0
  134. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/misc.py +0 -0
  135. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/query.py +0 -0
  136. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/show.py +0 -0
  137. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/__init__.py +0 -0
  138. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/job.py +0 -0
  139. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/studio.py +0 -0
  140. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/utils.py +0 -0
  141. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/utils.py +0 -0
  142. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/__init__.py +0 -0
  143. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/fileslice.py +0 -0
  144. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/fsspec.py +0 -0
  145. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/hf.py +0 -0
  146. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/local.py +0 -0
  147. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/config.py +0 -0
  148. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/__init__.py +0 -0
  149. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/db_engine.py +0 -0
  150. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/job.py +0 -0
  151. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/schema.py +0 -0
  152. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/serializer.py +0 -0
  153. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/sqlite.py +0 -0
  154. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/warehouse.py +0 -0
  155. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/delta.py +0 -0
  156. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/diff/__init__.py +0 -0
  157. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/error.py +0 -0
  158. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/__init__.py +0 -0
  159. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/reference.py +0 -0
  160. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/utils.py +0 -0
  161. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/__init__.py +0 -0
  162. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/base.py +0 -0
  163. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/job.py +0 -0
  164. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/__init__.py +0 -0
  165. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/arrow.py +0 -0
  166. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/clip.py +0 -0
  167. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/__init__.py +0 -0
  168. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/flatten.py +0 -0
  169. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  170. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  171. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/unflatten.py +0 -0
  172. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  173. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/data_model.py +0 -0
  174. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dataset_info.py +0 -0
  175. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/__init__.py +0 -0
  176. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/csv.py +0 -0
  177. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/database.py +0 -0
  178. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/datasets.py +0 -0
  179. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/hf.py +0 -0
  180. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/json.py +0 -0
  181. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/listings.py +0 -0
  182. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/pandas.py +0 -0
  183. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/parquet.py +0 -0
  184. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/records.py +0 -0
  185. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/storage.py +0 -0
  186. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/utils.py +0 -0
  187. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/values.py +0 -0
  188. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/file.py +0 -0
  189. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/hf.py +0 -0
  190. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/image.py +0 -0
  191. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/listing_info.py +0 -0
  192. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/meta_formats.py +0 -0
  193. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/model_store.py +0 -0
  194. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/pytorch.py +0 -0
  195. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/settings.py +0 -0
  196. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/signal_schema.py +0 -0
  197. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/tar.py +0 -0
  198. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/text.py +0 -0
  199. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/udf.py +0 -0
  200. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/udf_signature.py +0 -0
  201. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/utils.py +0 -0
  202. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/video.py +0 -0
  203. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/webdataset.py +0 -0
  204. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/webdataset_laion.py +0 -0
  205. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/listing.py +0 -0
  206. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/__init__.py +0 -0
  207. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/bbox.py +0 -0
  208. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/pose.py +0 -0
  209. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/segment.py +0 -0
  210. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  211. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/utils.py +0 -0
  212. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/node.py +0 -0
  213. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/nodes_fetcher.py +0 -0
  214. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/nodes_thread_pool.py +0 -0
  215. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/progress.py +0 -0
  216. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/py.typed +0 -0
  217. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/__init__.py +0 -0
  218. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/batch.py +0 -0
  219. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/dataset.py +0 -0
  220. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/dispatch.py +0 -0
  221. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/metrics.py +0 -0
  222. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/params.py +0 -0
  223. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/queue.py +0 -0
  224. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/schema.py +0 -0
  225. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/session.py +0 -0
  226. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/udf.py +0 -0
  227. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/utils.py +0 -0
  228. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/remote/__init__.py +0 -0
  229. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/script_meta.py +0 -0
  230. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/semver.py +0 -0
  231. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/__init__.py +0 -0
  232. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/default/__init__.py +0 -0
  233. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/default/base.py +0 -0
  234. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/__init__.py +0 -0
  235. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/aggregate.py +0 -0
  236. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/array.py +0 -0
  237. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/conditional.py +0 -0
  238. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/numeric.py +0 -0
  239. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/path.py +0 -0
  240. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/random.py +0 -0
  241. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/string.py +0 -0
  242. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/selectable.py +0 -0
  243. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  244. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/base.py +0 -0
  245. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/types.py +0 -0
  246. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/vector.py +0 -0
  247. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/types.py +0 -0
  248. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/utils.py +0 -0
  249. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/studio.py +0 -0
  250. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/telemetry.py +0 -0
  251. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/toolkit/__init__.py +0 -0
  252. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/toolkit/split.py +0 -0
  253. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/torch/__init__.py +0 -0
  254. {datachain-0.18.3 → datachain-0.18.5}/src/datachain/utils.py +0 -0
  255. {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  256. {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/entry_points.txt +0 -0
  257. {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/top_level.txt +0 -0
  258. {datachain-0.18.3 → datachain-0.18.5}/tests/__init__.py +0 -0
  259. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/__init__.py +0 -0
  260. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/conftest.py +0 -0
  261. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  262. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  263. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.gitignore +0 -0
  264. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  265. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_datachain.py +0 -0
  266. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_ls.py +0 -0
  267. {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_version.py +0 -0
  268. {datachain-0.18.3 → datachain-0.18.5}/tests/conftest.py +0 -0
  269. {datachain-0.18.3 → datachain-0.18.5}/tests/data.py +0 -0
  270. {datachain-0.18.3 → datachain-0.18.5}/tests/examples/__init__.py +0 -0
  271. {datachain-0.18.3 → datachain-0.18.5}/tests/examples/test_examples.py +0 -0
  272. {datachain-0.18.3 → datachain-0.18.5}/tests/examples/test_wds_e2e.py +0 -0
  273. {datachain-0.18.3 → datachain-0.18.5}/tests/examples/wds_data.py +0 -0
  274. {datachain-0.18.3 → datachain-0.18.5}/tests/func/__init__.py +0 -0
  275. {datachain-0.18.3 → datachain-0.18.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  276. {datachain-0.18.3 → datachain-0.18.5}/tests/func/data/lena.jpg +0 -0
  277. {datachain-0.18.3 → datachain-0.18.5}/tests/func/fake-service-account-credentials.json +0 -0
  278. {datachain-0.18.3/tests/func/model → datachain-0.18.5/tests/func/functions}/__init__.py +0 -0
  279. {datachain-0.18.3/tests/unit → datachain-0.18.5/tests/func/model}/__init__.py +0 -0
  280. {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running-mask0.png +0 -0
  281. {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running-mask1.png +0 -0
  282. {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running.jpg +0 -0
  283. {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/ships.jpg +0 -0
  284. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_batching.py +0 -0
  285. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_cloud_transfer.py +0 -0
  286. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_data_storage.py +0 -0
  287. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datachain_merge.py +0 -0
  288. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_delta.py +0 -0
  289. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_feature_pickling.py +0 -0
  290. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_file.py +0 -0
  291. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_hf.py +0 -0
  292. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_hidden_field.py +0 -0
  293. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_image.py +0 -0
  294. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_ls.py +0 -0
  295. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_meta_formats.py +0 -0
  296. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_metrics.py +0 -0
  297. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_pull.py +0 -0
  298. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_pytorch.py +0 -0
  299. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_query.py +0 -0
  300. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_read_database.py +0 -0
  301. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_session.py +0 -0
  302. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_toolkit.py +0 -0
  303. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_video.py +0 -0
  304. {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_warehouse.py +0 -0
  305. {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class.py +0 -0
  306. {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_exception.py +0 -0
  307. {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_parallel.py +0 -0
  308. {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  309. {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/name_len_slow.py +0 -0
  310. {datachain-0.18.3 → datachain-0.18.5}/tests/test_atomicity.py +0 -0
  311. {datachain-0.18.3 → datachain-0.18.5}/tests/test_cli_e2e.py +0 -0
  312. {datachain-0.18.3 → datachain-0.18.5}/tests/test_cli_studio.py +0 -0
  313. {datachain-0.18.3 → datachain-0.18.5}/tests/test_import_time.py +0 -0
  314. {datachain-0.18.3 → datachain-0.18.5}/tests/test_query_e2e.py +0 -0
  315. {datachain-0.18.3 → datachain-0.18.5}/tests/test_telemetry.py +0 -0
  316. {datachain-0.18.3/tests/unit/lib → datachain-0.18.5/tests/unit}/__init__.py +0 -0
  317. {datachain-0.18.3/tests/unit/model → datachain-0.18.5/tests/unit/lib}/__init__.py +0 -0
  318. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/conftest.py +0 -0
  319. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_arrow.py +0 -0
  320. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_clip.py +0 -0
  321. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain.py +0 -0
  322. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  323. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  324. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_diff.py +0 -0
  325. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_feature.py +0 -0
  326. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_feature_utils.py +0 -0
  327. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_file.py +0 -0
  328. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_hf.py +0 -0
  329. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_image.py +0 -0
  330. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_listing_info.py +0 -0
  331. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  332. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_schema.py +0 -0
  333. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_signal_schema.py +0 -0
  334. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  335. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_text.py +0 -0
  336. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_udf.py +0 -0
  337. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_udf_signature.py +0 -0
  338. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_utils.py +0 -0
  339. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_webdataset.py +0 -0
  340. {datachain-0.18.3/tests/unit/sql → datachain-0.18.5/tests/unit/model}/__init__.py +0 -0
  341. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_bbox.py +0 -0
  342. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_pose.py +0 -0
  343. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_segment.py +0 -0
  344. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_utils.py +0 -0
  345. {datachain-0.18.3/tests/unit/sql/sqlite → datachain-0.18.5/tests/unit/sql}/__init__.py +0 -0
  346. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/sqlite/test_types.py +0 -0
  347. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  348. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_array.py +0 -0
  349. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_conditional.py +0 -0
  350. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_path.py +0 -0
  351. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_random.py +0 -0
  352. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_selectable.py +0 -0
  353. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_string.py +0 -0
  354. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_asyn.py +0 -0
  355. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_cache.py +0 -0
  356. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_catalog.py +0 -0
  357. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_catalog_loader.py +0 -0
  358. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_cli_parsing.py +0 -0
  359. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client.py +0 -0
  360. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client_gcs.py +0 -0
  361. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client_s3.py +0 -0
  362. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_config.py +0 -0
  363. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_data_storage.py +0 -0
  364. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_database_engine.py +0 -0
  365. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_dataset.py +0 -0
  366. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_dispatch.py +0 -0
  367. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_fileslice.py +0 -0
  368. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_func.py +0 -0
  369. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_listing.py +0 -0
  370. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_metastore.py +0 -0
  371. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_module_exports.py +0 -0
  372. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_pytorch.py +0 -0
  373. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query.py +0 -0
  374. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query_metrics.py +0 -0
  375. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query_params.py +0 -0
  376. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_script_meta.py +0 -0
  377. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_semver.py +0 -0
  378. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_serializer.py +0 -0
  379. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_session.py +0 -0
  380. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_utils.py +0 -0
  381. {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_warehouse.py +0 -0
  382. {datachain-0.18.3 → datachain-0.18.5}/tests/utils.py +0 -0
@@ -9,6 +9,13 @@ coverage:
9
9
  threshold: 10%
10
10
  # non-blocking status checks
11
11
  informational: true
12
+ only_pulls: true
13
+
14
+ comment:
15
+ # sections shown in the PR comment
16
+ layout: "reach, diff, flags, files"
17
+ # updates the comment on PRs when coverage changes
18
+ behavior: default
12
19
 
13
20
  flags:
14
21
  datachain:
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.11.10'
27
+ rev: 'v0.11.11'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.3
3
+ Version: 0.18.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
23
23
  Requires-Dist: numpy<3,>=1
24
24
  Requires-Dist: pandas>=2.0.0
25
25
  Requires-Dist: packaging
26
- Requires-Dist: pyarrow<20
26
+ Requires-Dist: pyarrow
27
27
  Requires-Dist: typing-extensions
28
28
  Requires-Dist: python-dateutil>=2
29
29
  Requires-Dist: attrs>=21.3.0
@@ -38,7 +38,7 @@ Requires-Dist: sqlalchemy>=2
38
38
  Requires-Dist: multiprocess==0.70.16
39
39
  Requires-Dist: cloudpickle
40
40
  Requires-Dist: orjson>=3.10.5
41
- Requires-Dist: pydantic<2.11,>=2
41
+ Requires-Dist: pydantic
42
42
  Requires-Dist: jmespath>=1.0
43
43
  Requires-Dist: datamodel-code-generator>=0.25
44
44
  Requires-Dist: Pillow<12,>=10.0.0
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "numpy>=1,<3",
28
28
  "pandas>=2.0.0",
29
29
  "packaging",
30
- "pyarrow<20",
30
+ "pyarrow",
31
31
  "typing-extensions",
32
32
  "python-dateutil>=2",
33
33
  "attrs>=21.3.0",
@@ -42,7 +42,7 @@ dependencies = [
42
42
  "multiprocess==0.70.16",
43
43
  "cloudpickle",
44
44
  "orjson>=3.10.5",
45
- "pydantic>=2,<2.11",
45
+ "pydantic",
46
46
  "jmespath>=1.0",
47
47
  "datamodel-code-generator>=0.25",
48
48
  "Pillow>=10.0.0,<12",
@@ -66,6 +66,7 @@ if TYPE_CHECKING:
66
66
  )
67
67
  from datachain.dataset import DatasetListVersion
68
68
  from datachain.job import Job
69
+ from datachain.lib.listing_info import ListingInfo
69
70
  from datachain.listing import Listing
70
71
 
71
72
  logger = logging.getLogger("datachain")
@@ -910,11 +911,7 @@ class Catalog:
910
911
  values["num_objects"] = None
911
912
  values["size"] = None
912
913
  values["preview"] = None
913
- self.metastore.update_dataset_version(
914
- dataset,
915
- version,
916
- **values,
917
- )
914
+ self.metastore.update_dataset_version(dataset, version, **values)
918
915
  return
919
916
 
920
917
  if not dataset_version.num_objects:
@@ -934,11 +931,7 @@ class Catalog:
934
931
  if not values:
935
932
  return
936
933
 
937
- self.metastore.update_dataset_version(
938
- dataset,
939
- version,
940
- **values,
941
- )
934
+ self.metastore.update_dataset_version(dataset, version, **values)
942
935
 
943
936
  def update_dataset(
944
937
  self, dataset: DatasetRecord, conn=None, **kwargs
@@ -1116,13 +1109,16 @@ class Catalog:
1116
1109
  return direct_dependencies
1117
1110
 
1118
1111
  def ls_datasets(
1119
- self, include_listing: bool = False, studio: bool = False
1112
+ self,
1113
+ prefix: Optional[str] = None,
1114
+ include_listing: bool = False,
1115
+ studio: bool = False,
1120
1116
  ) -> Iterator[DatasetListRecord]:
1121
1117
  from datachain.remote.studio import StudioClient
1122
1118
 
1123
1119
  if studio:
1124
1120
  client = StudioClient()
1125
- response = client.ls_datasets()
1121
+ response = client.ls_datasets(prefix=prefix)
1126
1122
  if not response.ok:
1127
1123
  raise DataChainError(response.message)
1128
1124
  if not response.data:
@@ -1133,6 +1129,8 @@ class Catalog:
1133
1129
  for d in response.data
1134
1130
  if not d.get("name", "").startswith(QUERY_DATASET_PREFIX)
1135
1131
  )
1132
+ elif prefix:
1133
+ datasets = self.metastore.list_datasets_by_prefix(prefix)
1136
1134
  else:
1137
1135
  datasets = self.metastore.list_datasets()
1138
1136
 
@@ -1142,39 +1140,55 @@ class Catalog:
1142
1140
 
1143
1141
  def list_datasets_versions(
1144
1142
  self,
1143
+ prefix: Optional[str] = None,
1145
1144
  include_listing: bool = False,
1145
+ with_job: bool = True,
1146
1146
  studio: bool = False,
1147
1147
  ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
1148
1148
  """Iterate over all dataset versions with related jobs."""
1149
1149
  datasets = list(
1150
- self.ls_datasets(include_listing=include_listing, studio=studio)
1150
+ self.ls_datasets(
1151
+ prefix=prefix, include_listing=include_listing, studio=studio
1152
+ )
1151
1153
  )
1152
1154
 
1153
1155
  # preselect dataset versions jobs from db to avoid multiple queries
1154
- jobs_ids: set[str] = {
1155
- v.job_id for ds in datasets for v in ds.versions if v.job_id
1156
- }
1157
1156
  jobs: dict[str, Job] = {}
1158
- if jobs_ids:
1159
- jobs = {j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))}
1157
+ if with_job:
1158
+ jobs_ids: set[str] = {
1159
+ v.job_id for ds in datasets for v in ds.versions if v.job_id
1160
+ }
1161
+ if jobs_ids:
1162
+ jobs = {
1163
+ j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))
1164
+ }
1160
1165
 
1161
1166
  for d in datasets:
1162
1167
  yield from (
1163
- (d, v, jobs.get(str(v.job_id)) if v.job_id else None)
1168
+ (d, v, jobs.get(str(v.job_id)) if with_job and v.job_id else None)
1164
1169
  for v in d.versions
1165
1170
  )
1166
1171
 
1167
- def listings(self):
1172
+ def listings(self, prefix: Optional[str] = None) -> list["ListingInfo"]:
1168
1173
  """
1169
1174
  Returns list of ListingInfo objects which are representing specific
1170
1175
  storage listing datasets
1171
1176
  """
1172
- from datachain.lib.listing import is_listing_dataset
1177
+ from datachain.lib.listing import LISTING_PREFIX, is_listing_dataset
1173
1178
  from datachain.lib.listing_info import ListingInfo
1174
1179
 
1180
+ if prefix and not prefix.startswith(LISTING_PREFIX):
1181
+ prefix = LISTING_PREFIX + prefix
1182
+
1183
+ listing_datasets_versions = self.list_datasets_versions(
1184
+ prefix=prefix,
1185
+ include_listing=True,
1186
+ with_job=False,
1187
+ )
1188
+
1175
1189
  return [
1176
1190
  ListingInfo.from_models(d, v, j)
1177
- for d, v, j in self.list_datasets_versions(include_listing=True)
1191
+ for d, v, j in listing_datasets_versions
1178
1192
  if is_listing_dataset(d.name)
1179
1193
  ]
1180
1194
 
@@ -65,7 +65,7 @@ class AzureClient(Client):
65
65
  if entries:
66
66
  await result_queue.put(entries)
67
67
  pbar.update(len(entries))
68
- if not found:
68
+ if not found and prefix:
69
69
  raise FileNotFoundError(
70
70
  f"Unable to resolve remote path: {prefix}"
71
71
  )
@@ -74,7 +74,7 @@ class GCSClient(Client):
74
74
  try:
75
75
  await self._get_pages(prefix, page_queue)
76
76
  found = await consumer
77
- if not found:
77
+ if not found and prefix:
78
78
  raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
79
79
  finally:
80
80
  consumer.cancel() # In case _get_pages() raised
@@ -80,7 +80,7 @@ class ClientS3(Client):
80
80
  finally:
81
81
  await page_queue.put(None)
82
82
 
83
- async def process_pages(page_queue, result_queue):
83
+ async def process_pages(page_queue, result_queue, prefix):
84
84
  found = False
85
85
  with tqdm(desc=f"Listing {self.uri}", unit=" objects", leave=False) as pbar:
86
86
  while (res := await page_queue.get()) is not None:
@@ -94,7 +94,7 @@ class ClientS3(Client):
94
94
  if entries:
95
95
  await result_queue.put(entries)
96
96
  pbar.update(len(entries))
97
- if not found:
97
+ if not found and prefix:
98
98
  raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
99
99
 
100
100
  try:
@@ -118,7 +118,9 @@ class ClientS3(Client):
118
118
  Delimiter="",
119
119
  )
120
120
  page_queue: asyncio.Queue[list] = asyncio.Queue(2)
121
- consumer = asyncio.create_task(process_pages(page_queue, result_queue))
121
+ consumer = asyncio.create_task(
122
+ process_pages(page_queue, result_queue, prefix)
123
+ )
122
124
  try:
123
125
  await get_pages(it, page_queue)
124
126
  await consumer
@@ -36,6 +36,7 @@ from datachain.dataset import (
36
36
  )
37
37
  from datachain.error import (
38
38
  DatasetNotFoundError,
39
+ DatasetVersionNotFoundError,
39
40
  TableMissingError,
40
41
  )
41
42
  from datachain.job import Job
@@ -273,7 +274,6 @@ class AbstractMetastore(ABC, Serializable):
273
274
  self,
274
275
  job_id: str,
275
276
  status: Optional[JobStatus] = None,
276
- exit_code: Optional[int] = None,
277
277
  error_message: Optional[str] = None,
278
278
  error_stack: Optional[str] = None,
279
279
  finished_at: Optional[datetime] = None,
@@ -620,22 +620,36 @@ class AbstractDBMetastore(AbstractMetastore):
620
620
  self, dataset: DatasetRecord, conn=None, **kwargs
621
621
  ) -> DatasetRecord:
622
622
  """Updates dataset fields."""
623
- values = {}
624
- dataset_values = {}
623
+ values: dict[str, Any] = {}
624
+ dataset_values: dict[str, Any] = {}
625
625
  for field, value in kwargs.items():
626
- if field in self._dataset_fields[1:]:
627
- if field in ["attrs", "schema"]:
628
- values[field] = json.dumps(value) if value else None
626
+ if field in ("id", "created_at") or field not in self._dataset_fields:
627
+ continue # these fields are read-only or not applicable
628
+
629
+ if value is None and field in ("name", "status", "sources", "query_script"):
630
+ raise ValueError(f"Field {field} cannot be None")
631
+ if field == "name" and not value:
632
+ raise ValueError("name cannot be empty")
633
+
634
+ if field == "attrs":
635
+ if value is None:
636
+ values[field] = None
629
637
  else:
630
- values[field] = value
631
- if field == "schema":
632
- dataset_values[field] = DatasetRecord.parse_schema(value)
638
+ values[field] = json.dumps(value)
639
+ dataset_values[field] = value
640
+ elif field == "schema":
641
+ if value is None:
642
+ values[field] = None
643
+ dataset_values[field] = None
633
644
  else:
634
- dataset_values[field] = value
645
+ values[field] = json.dumps(value)
646
+ dataset_values[field] = DatasetRecord.parse_schema(value)
647
+ else:
648
+ values[field] = value
649
+ dataset_values[field] = value
635
650
 
636
651
  if not values:
637
- # Nothing to update
638
- return dataset
652
+ return dataset # nothing to update
639
653
 
640
654
  d = self._datasets
641
655
  self.db.execute(
@@ -651,36 +665,70 @@ class AbstractDBMetastore(AbstractMetastore):
651
665
  self, dataset: DatasetRecord, version: str, conn=None, **kwargs
652
666
  ) -> DatasetVersion:
653
667
  """Updates dataset fields."""
654
- dataset_version = dataset.get_version(version)
655
-
656
- values = {}
657
- version_values: dict = {}
668
+ values: dict[str, Any] = {}
669
+ version_values: dict[str, Any] = {}
658
670
  for field, value in kwargs.items():
659
- if field in self._dataset_version_fields[1:]:
660
- if field == "schema":
661
- values[field] = json.dumps(value) if value else None
662
- version_values[field] = DatasetRecord.parse_schema(value)
663
- elif field == "feature_schema":
664
- values[field] = json.dumps(value) if value else None
665
- version_values[field] = value
666
- elif field == "preview" and isinstance(value, list):
667
- values[field] = json.dumps(value, cls=JSONSerialize)
668
- version_values[field] = value
671
+ if (
672
+ field in ("id", "created_at")
673
+ or field not in self._dataset_version_fields
674
+ ):
675
+ continue # these fields are read-only or not applicable
676
+
677
+ if value is None and field in (
678
+ "status",
679
+ "sources",
680
+ "query_script",
681
+ "error_message",
682
+ "error_stack",
683
+ "script_output",
684
+ "uuid",
685
+ ):
686
+ raise ValueError(f"Field {field} cannot be None")
687
+
688
+ if field == "schema":
689
+ values[field] = json.dumps(value) if value else None
690
+ version_values[field] = (
691
+ DatasetRecord.parse_schema(value) if value else None
692
+ )
693
+ elif field == "feature_schema":
694
+ if value is None:
695
+ values[field] = None
696
+ else:
697
+ values[field] = json.dumps(value)
698
+ version_values[field] = value
699
+ elif field == "preview":
700
+ if value is None:
701
+ values[field] = None
702
+ elif not isinstance(value, list):
703
+ raise ValueError(
704
+ f"Field '{field}' must be a list, got {type(value).__name__}"
705
+ )
669
706
  else:
670
- values[field] = value
671
- version_values[field] = value
707
+ values[field] = json.dumps(value, cls=JSONSerialize)
708
+ version_values["_preview_data"] = value
709
+ else:
710
+ values[field] = value
711
+ version_values[field] = value
672
712
 
673
- if values:
674
- dv = self._datasets_versions
675
- self.db.execute(
676
- self._datasets_versions_update()
677
- .where(dv.c.dataset_id == dataset.id, dv.c.version == version)
678
- .values(values),
679
- conn=conn,
680
- ) # type: ignore [attr-defined]
681
- dataset_version.update(**version_values)
713
+ if not values:
714
+ return dataset.get_version(version)
715
+
716
+ dv = self._datasets_versions
717
+ self.db.execute(
718
+ self._datasets_versions_update()
719
+ .where(dv.c.dataset_id == dataset.id, dv.c.version == version)
720
+ .values(values),
721
+ conn=conn,
722
+ ) # type: ignore [attr-defined]
723
+
724
+ for v in dataset.versions:
725
+ if v.version == version:
726
+ v.update(**version_values)
727
+ return v
682
728
 
683
- return dataset_version
729
+ raise DatasetVersionNotFoundError(
730
+ f"Dataset {dataset.name} does not have version {version}"
731
+ )
684
732
 
685
733
  def _parse_dataset(self, rows) -> Optional[DatasetRecord]:
686
734
  versions = [self.dataset_class.parse(*r) for r in rows]
@@ -812,7 +860,7 @@ class AbstractDBMetastore(AbstractMetastore):
812
860
  update_data["error_message"] = error_message
813
861
  update_data["error_stack"] = error_stack
814
862
 
815
- self.update_dataset(dataset, conn=conn, **update_data)
863
+ dataset = self.update_dataset(dataset, conn=conn, **update_data)
816
864
 
817
865
  if version:
818
866
  self.update_dataset_version(dataset, version, conn=conn, **update_data)
@@ -1064,7 +1112,6 @@ class AbstractDBMetastore(AbstractMetastore):
1064
1112
  self,
1065
1113
  job_id: str,
1066
1114
  status: Optional[JobStatus] = None,
1067
- exit_code: Optional[int] = None,
1068
1115
  error_message: Optional[str] = None,
1069
1116
  error_stack: Optional[str] = None,
1070
1117
  finished_at: Optional[datetime] = None,
@@ -1075,8 +1122,6 @@ class AbstractDBMetastore(AbstractMetastore):
1075
1122
  values: dict = {}
1076
1123
  if status is not None:
1077
1124
  values["status"] = status
1078
- if exit_code is not None:
1079
- values["exit_code"] = exit_code
1080
1125
  if error_message is not None:
1081
1126
  values["error_message"] = error_message
1082
1127
  if error_stack is not None:
@@ -93,7 +93,7 @@ class DatasetDependency:
93
93
  if self.type == DatasetDependencyType.DATASET:
94
94
  return self.name
95
95
 
96
- list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"), {})
96
+ list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"))
97
97
  assert list_dataset_name
98
98
  return list_dataset_name
99
99