datachain 0.18.2__tar.gz → 0.18.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (373) hide show
  1. {datachain-0.18.2 → datachain-0.18.4}/.github/codecov.yaml +7 -0
  2. {datachain-0.18.2/src/datachain.egg-info → datachain-0.18.4}/PKG-INFO +1 -1
  3. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/catalog/catalog.py +34 -12
  4. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/dataset.py +1 -1
  5. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/listing.py +2 -3
  6. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/dataset.py +18 -4
  7. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/remote/studio.py +4 -2
  8. {datachain-0.18.2 → datachain-0.18.4/src/datachain.egg-info}/PKG-INFO +1 -1
  9. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_catalog.py +1 -1
  10. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_datachain.py +24 -8
  11. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_dataset_query.py +4 -12
  12. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_datasets.py +2 -2
  13. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_listing.py +2 -8
  14. {datachain-0.18.2 → datachain-0.18.4}/.cruft.json +0 -0
  15. {datachain-0.18.2 → datachain-0.18.4}/.gitattributes +0 -0
  16. {datachain-0.18.2 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  17. {datachain-0.18.2 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  18. {datachain-0.18.2 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  19. {datachain-0.18.2 → datachain-0.18.4}/.github/dependabot.yml +0 -0
  20. {datachain-0.18.2 → datachain-0.18.4}/.github/workflows/benchmarks.yml +0 -0
  21. {datachain-0.18.2 → datachain-0.18.4}/.github/workflows/release.yml +0 -0
  22. {datachain-0.18.2 → datachain-0.18.4}/.github/workflows/tests-studio.yml +0 -0
  23. {datachain-0.18.2 → datachain-0.18.4}/.github/workflows/tests.yml +0 -0
  24. {datachain-0.18.2 → datachain-0.18.4}/.github/workflows/update-template.yaml +0 -0
  25. {datachain-0.18.2 → datachain-0.18.4}/.gitignore +0 -0
  26. {datachain-0.18.2 → datachain-0.18.4}/.pre-commit-config.yaml +0 -0
  27. {datachain-0.18.2 → datachain-0.18.4}/CODE_OF_CONDUCT.rst +0 -0
  28. {datachain-0.18.2 → datachain-0.18.4}/LICENSE +0 -0
  29. {datachain-0.18.2 → datachain-0.18.4}/README.rst +0 -0
  30. {datachain-0.18.2 → datachain-0.18.4}/docs/assets/captioned_cartoons.png +0 -0
  31. {datachain-0.18.2 → datachain-0.18.4}/docs/assets/datachain-white.svg +0 -0
  32. {datachain-0.18.2 → datachain-0.18.4}/docs/assets/datachain.svg +0 -0
  33. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/auth/login.md +0 -0
  34. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/auth/logout.md +0 -0
  35. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/auth/team.md +0 -0
  36. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/auth/token.md +0 -0
  37. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/index.md +0 -0
  38. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/job/cancel.md +0 -0
  39. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/job/logs.md +0 -0
  40. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/job/ls.md +0 -0
  41. {datachain-0.18.2 → datachain-0.18.4}/docs/commands/job/run.md +0 -0
  42. {datachain-0.18.2 → datachain-0.18.4}/docs/contributing.md +0 -0
  43. {datachain-0.18.2 → datachain-0.18.4}/docs/css/github-permalink-style.css +0 -0
  44. {datachain-0.18.2 → datachain-0.18.4}/docs/examples.md +0 -0
  45. {datachain-0.18.2 → datachain-0.18.4}/docs/index.md +0 -0
  46. {datachain-0.18.2 → datachain-0.18.4}/docs/overrides/main.html +0 -0
  47. {datachain-0.18.2 → datachain-0.18.4}/docs/quick-start.md +0 -0
  48. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/arrowrow.md +0 -0
  49. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/bbox.md +0 -0
  50. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/file.md +0 -0
  51. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/imagefile.md +0 -0
  52. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/index.md +0 -0
  53. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/pose.md +0 -0
  54. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/segment.md +0 -0
  55. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/tarvfile.md +0 -0
  56. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/textfile.md +0 -0
  57. {datachain-0.18.2 → datachain-0.18.4}/docs/references/data-types/videofile.md +0 -0
  58. {datachain-0.18.2 → datachain-0.18.4}/docs/references/datachain.md +0 -0
  59. {datachain-0.18.2 → datachain-0.18.4}/docs/references/func.md +0 -0
  60. {datachain-0.18.2 → datachain-0.18.4}/docs/references/index.md +0 -0
  61. {datachain-0.18.2 → datachain-0.18.4}/docs/references/remotes.md +0 -0
  62. {datachain-0.18.2 → datachain-0.18.4}/docs/references/toolkit.md +0 -0
  63. {datachain-0.18.2 → datachain-0.18.4}/docs/references/torch.md +0 -0
  64. {datachain-0.18.2 → datachain-0.18.4}/docs/references/udf.md +0 -0
  65. {datachain-0.18.2 → datachain-0.18.4}/docs/tutorials.md +0 -0
  66. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  67. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  68. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/openimage-detect.py +0 -0
  69. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
  70. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/ultralytics-pose.py +0 -0
  71. {datachain-0.18.2 → datachain-0.18.4}/examples/computer_vision/ultralytics-segment.py +0 -0
  72. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/common_sql_functions.py +0 -0
  73. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/json-csv-reader.py +0 -0
  74. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/torch-loader.py +0 -0
  75. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/udfs/parallel.py +0 -0
  76. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/udfs/simple.py +0 -0
  77. {datachain-0.18.2 → datachain-0.18.4}/examples/get_started/udfs/stateful.py +0 -0
  78. {datachain-0.18.2 → datachain-0.18.4}/examples/incremental_processing/delta.py +0 -0
  79. {datachain-0.18.2 → datachain-0.18.4}/examples/incremental_processing/utils.py +0 -0
  80. {datachain-0.18.2 → datachain-0.18.4}/examples/llm_and_nlp/claude-query.py +0 -0
  81. {datachain-0.18.2 → datachain-0.18.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  82. {datachain-0.18.2 → datachain-0.18.4}/examples/multimodal/clip_inference.py +0 -0
  83. {datachain-0.18.2 → datachain-0.18.4}/examples/multimodal/hf_pipeline.py +0 -0
  84. {datachain-0.18.2 → datachain-0.18.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  85. {datachain-0.18.2 → datachain-0.18.4}/examples/multimodal/wds.py +0 -0
  86. {datachain-0.18.2 → datachain-0.18.4}/examples/multimodal/wds_filtered.py +0 -0
  87. {datachain-0.18.2 → datachain-0.18.4}/mkdocs.yml +0 -0
  88. {datachain-0.18.2 → datachain-0.18.4}/noxfile.py +0 -0
  89. {datachain-0.18.2 → datachain-0.18.4}/pyproject.toml +0 -0
  90. {datachain-0.18.2 → datachain-0.18.4}/setup.cfg +0 -0
  91. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/__init__.py +0 -0
  92. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/__main__.py +0 -0
  93. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/asyn.py +0 -0
  94. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cache.py +0 -0
  95. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/catalog/__init__.py +0 -0
  96. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/catalog/datasource.py +0 -0
  97. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/catalog/loader.py +0 -0
  98. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/__init__.py +0 -0
  99. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/__init__.py +0 -0
  100. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/datasets.py +0 -0
  101. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/du.py +0 -0
  102. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/index.py +0 -0
  103. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/ls.py +0 -0
  104. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/misc.py +0 -0
  105. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/query.py +0 -0
  106. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/commands/show.py +0 -0
  107. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/parser/__init__.py +0 -0
  108. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/parser/job.py +0 -0
  109. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/parser/studio.py +0 -0
  110. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/parser/utils.py +0 -0
  111. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/cli/utils.py +0 -0
  112. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/__init__.py +0 -0
  113. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/azure.py +0 -0
  114. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/fileslice.py +0 -0
  115. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/fsspec.py +0 -0
  116. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/gcs.py +0 -0
  117. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/hf.py +0 -0
  118. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/local.py +0 -0
  119. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/client/s3.py +0 -0
  120. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/config.py +0 -0
  121. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/__init__.py +0 -0
  122. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/db_engine.py +0 -0
  123. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/job.py +0 -0
  124. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/metastore.py +0 -0
  125. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/schema.py +0 -0
  126. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/serializer.py +0 -0
  127. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/sqlite.py +0 -0
  128. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/data_storage/warehouse.py +0 -0
  129. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/delta.py +0 -0
  130. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/diff/__init__.py +0 -0
  131. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/error.py +0 -0
  132. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/fs/__init__.py +0 -0
  133. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/fs/reference.py +0 -0
  134. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/fs/utils.py +0 -0
  135. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/__init__.py +0 -0
  136. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/aggregate.py +0 -0
  137. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/array.py +0 -0
  138. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/base.py +0 -0
  139. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/conditional.py +0 -0
  140. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/func.py +0 -0
  141. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/numeric.py +0 -0
  142. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/path.py +0 -0
  143. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/random.py +0 -0
  144. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/string.py +0 -0
  145. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/func/window.py +0 -0
  146. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/job.py +0 -0
  147. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/__init__.py +0 -0
  148. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/arrow.py +0 -0
  149. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/clip.py +0 -0
  150. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/__init__.py +0 -0
  151. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/flatten.py +0 -0
  152. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  153. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  154. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/unflatten.py +0 -0
  155. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  156. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/data_model.py +0 -0
  157. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dataset_info.py +0 -0
  158. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/__init__.py +0 -0
  159. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/csv.py +0 -0
  160. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/database.py +0 -0
  161. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/datachain.py +0 -0
  162. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/datasets.py +0 -0
  163. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/hf.py +0 -0
  164. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/json.py +0 -0
  165. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/listings.py +0 -0
  166. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/pandas.py +0 -0
  167. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/parquet.py +0 -0
  168. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/records.py +0 -0
  169. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/storage.py +0 -0
  170. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/utils.py +0 -0
  171. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/dc/values.py +0 -0
  172. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/file.py +0 -0
  173. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/hf.py +0 -0
  174. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/image.py +0 -0
  175. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/listing_info.py +0 -0
  176. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/meta_formats.py +0 -0
  177. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/model_store.py +0 -0
  178. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/pytorch.py +0 -0
  179. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/settings.py +0 -0
  180. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/signal_schema.py +0 -0
  181. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/tar.py +0 -0
  182. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/text.py +0 -0
  183. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/udf.py +0 -0
  184. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/udf_signature.py +0 -0
  185. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/utils.py +0 -0
  186. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/video.py +0 -0
  187. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/webdataset.py +0 -0
  188. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/lib/webdataset_laion.py +0 -0
  189. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/listing.py +0 -0
  190. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/__init__.py +0 -0
  191. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/bbox.py +0 -0
  192. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/pose.py +0 -0
  193. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/segment.py +0 -0
  194. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/ultralytics/__init__.py +0 -0
  195. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/ultralytics/bbox.py +0 -0
  196. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/ultralytics/pose.py +0 -0
  197. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/ultralytics/segment.py +0 -0
  198. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/model/utils.py +0 -0
  199. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/node.py +0 -0
  200. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/nodes_fetcher.py +0 -0
  201. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/nodes_thread_pool.py +0 -0
  202. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/progress.py +0 -0
  203. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/py.typed +0 -0
  204. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/__init__.py +0 -0
  205. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/batch.py +0 -0
  206. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/dispatch.py +0 -0
  207. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/metrics.py +0 -0
  208. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/params.py +0 -0
  209. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/queue.py +0 -0
  210. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/schema.py +0 -0
  211. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/session.py +0 -0
  212. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/udf.py +0 -0
  213. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/query/utils.py +0 -0
  214. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/remote/__init__.py +0 -0
  215. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/script_meta.py +0 -0
  216. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/semver.py +0 -0
  217. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/__init__.py +0 -0
  218. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/default/__init__.py +0 -0
  219. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/default/base.py +0 -0
  220. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/__init__.py +0 -0
  221. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/aggregate.py +0 -0
  222. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/array.py +0 -0
  223. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/conditional.py +0 -0
  224. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/numeric.py +0 -0
  225. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/path.py +0 -0
  226. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/random.py +0 -0
  227. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/functions/string.py +0 -0
  228. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/selectable.py +0 -0
  229. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  230. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/sqlite/base.py +0 -0
  231. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/sqlite/types.py +0 -0
  232. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/sqlite/vector.py +0 -0
  233. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/types.py +0 -0
  234. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/sql/utils.py +0 -0
  235. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/studio.py +0 -0
  236. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/telemetry.py +0 -0
  237. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/toolkit/__init__.py +0 -0
  238. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/toolkit/split.py +0 -0
  239. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/torch/__init__.py +0 -0
  240. {datachain-0.18.2 → datachain-0.18.4}/src/datachain/utils.py +0 -0
  241. {datachain-0.18.2 → datachain-0.18.4}/src/datachain.egg-info/SOURCES.txt +0 -0
  242. {datachain-0.18.2 → datachain-0.18.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  243. {datachain-0.18.2 → datachain-0.18.4}/src/datachain.egg-info/entry_points.txt +0 -0
  244. {datachain-0.18.2 → datachain-0.18.4}/src/datachain.egg-info/requires.txt +0 -0
  245. {datachain-0.18.2 → datachain-0.18.4}/src/datachain.egg-info/top_level.txt +0 -0
  246. {datachain-0.18.2 → datachain-0.18.4}/tests/__init__.py +0 -0
  247. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/__init__.py +0 -0
  248. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/conftest.py +0 -0
  249. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  250. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  251. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/datasets/.gitignore +0 -0
  252. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  253. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/test_datachain.py +0 -0
  254. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/test_ls.py +0 -0
  255. {datachain-0.18.2 → datachain-0.18.4}/tests/benchmarks/test_version.py +0 -0
  256. {datachain-0.18.2 → datachain-0.18.4}/tests/conftest.py +0 -0
  257. {datachain-0.18.2 → datachain-0.18.4}/tests/data.py +0 -0
  258. {datachain-0.18.2 → datachain-0.18.4}/tests/examples/__init__.py +0 -0
  259. {datachain-0.18.2 → datachain-0.18.4}/tests/examples/test_examples.py +0 -0
  260. {datachain-0.18.2 → datachain-0.18.4}/tests/examples/test_wds_e2e.py +0 -0
  261. {datachain-0.18.2 → datachain-0.18.4}/tests/examples/wds_data.py +0 -0
  262. {datachain-0.18.2 → datachain-0.18.4}/tests/func/__init__.py +0 -0
  263. {datachain-0.18.2 → datachain-0.18.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  264. {datachain-0.18.2 → datachain-0.18.4}/tests/func/data/lena.jpg +0 -0
  265. {datachain-0.18.2 → datachain-0.18.4}/tests/func/fake-service-account-credentials.json +0 -0
  266. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/__init__.py +0 -0
  267. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/data/running-mask0.png +0 -0
  268. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/data/running-mask1.png +0 -0
  269. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/data/running.jpg +0 -0
  270. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/data/ships.jpg +0 -0
  271. {datachain-0.18.2 → datachain-0.18.4}/tests/func/model/test_yolo.py +0 -0
  272. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_batching.py +0 -0
  273. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_client.py +0 -0
  274. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_cloud_transfer.py +0 -0
  275. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_data_storage.py +0 -0
  276. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_datachain_merge.py +0 -0
  277. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_delta.py +0 -0
  278. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_feature_pickling.py +0 -0
  279. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_file.py +0 -0
  280. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_func.py +0 -0
  281. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_hf.py +0 -0
  282. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_hidden_field.py +0 -0
  283. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_image.py +0 -0
  284. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_ls.py +0 -0
  285. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_meta_formats.py +0 -0
  286. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_metrics.py +0 -0
  287. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_pull.py +0 -0
  288. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_pytorch.py +0 -0
  289. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_query.py +0 -0
  290. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_read_database.py +0 -0
  291. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_session.py +0 -0
  292. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_toolkit.py +0 -0
  293. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_video.py +0 -0
  294. {datachain-0.18.2 → datachain-0.18.4}/tests/func/test_warehouse.py +0 -0
  295. {datachain-0.18.2 → datachain-0.18.4}/tests/scripts/feature_class.py +0 -0
  296. {datachain-0.18.2 → datachain-0.18.4}/tests/scripts/feature_class_exception.py +0 -0
  297. {datachain-0.18.2 → datachain-0.18.4}/tests/scripts/feature_class_parallel.py +0 -0
  298. {datachain-0.18.2 → datachain-0.18.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  299. {datachain-0.18.2 → datachain-0.18.4}/tests/scripts/name_len_slow.py +0 -0
  300. {datachain-0.18.2 → datachain-0.18.4}/tests/test_atomicity.py +0 -0
  301. {datachain-0.18.2 → datachain-0.18.4}/tests/test_cli_e2e.py +0 -0
  302. {datachain-0.18.2 → datachain-0.18.4}/tests/test_cli_studio.py +0 -0
  303. {datachain-0.18.2 → datachain-0.18.4}/tests/test_import_time.py +0 -0
  304. {datachain-0.18.2 → datachain-0.18.4}/tests/test_query_e2e.py +0 -0
  305. {datachain-0.18.2 → datachain-0.18.4}/tests/test_telemetry.py +0 -0
  306. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/__init__.py +0 -0
  307. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/__init__.py +0 -0
  308. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/conftest.py +0 -0
  309. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_arrow.py +0 -0
  310. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_clip.py +0 -0
  311. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_datachain.py +0 -0
  312. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  313. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  314. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_diff.py +0 -0
  315. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_feature.py +0 -0
  316. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_feature_utils.py +0 -0
  317. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_file.py +0 -0
  318. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_hf.py +0 -0
  319. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_image.py +0 -0
  320. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_listing_info.py +0 -0
  321. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_python_to_sql.py +0 -0
  322. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_schema.py +0 -0
  323. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_signal_schema.py +0 -0
  324. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  325. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_text.py +0 -0
  326. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_udf.py +0 -0
  327. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_udf_signature.py +0 -0
  328. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_utils.py +0 -0
  329. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/lib/test_webdataset.py +0 -0
  330. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/model/__init__.py +0 -0
  331. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/model/test_bbox.py +0 -0
  332. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/model/test_pose.py +0 -0
  333. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/model/test_segment.py +0 -0
  334. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/model/test_utils.py +0 -0
  335. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/__init__.py +0 -0
  336. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  337. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/sqlite/test_types.py +0 -0
  338. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  339. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_array.py +0 -0
  340. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_conditional.py +0 -0
  341. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_path.py +0 -0
  342. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_random.py +0 -0
  343. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_selectable.py +0 -0
  344. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/sql/test_string.py +0 -0
  345. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_asyn.py +0 -0
  346. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_cache.py +0 -0
  347. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_catalog.py +0 -0
  348. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_catalog_loader.py +0 -0
  349. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_cli_parsing.py +0 -0
  350. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_client.py +0 -0
  351. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_client_gcs.py +0 -0
  352. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_client_s3.py +0 -0
  353. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_config.py +0 -0
  354. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_data_storage.py +0 -0
  355. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_database_engine.py +0 -0
  356. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_dataset.py +0 -0
  357. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_dispatch.py +0 -0
  358. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_fileslice.py +0 -0
  359. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_func.py +0 -0
  360. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_listing.py +0 -0
  361. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_metastore.py +0 -0
  362. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_module_exports.py +0 -0
  363. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_pytorch.py +0 -0
  364. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_query.py +0 -0
  365. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_query_metrics.py +0 -0
  366. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_query_params.py +0 -0
  367. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_script_meta.py +0 -0
  368. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_semver.py +0 -0
  369. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_serializer.py +0 -0
  370. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_session.py +0 -0
  371. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_utils.py +0 -0
  372. {datachain-0.18.2 → datachain-0.18.4}/tests/unit/test_warehouse.py +0 -0
  373. {datachain-0.18.2 → datachain-0.18.4}/tests/utils.py +0 -0
@@ -9,6 +9,13 @@ coverage:
9
9
  threshold: 10%
10
10
  # non-blocking status checks
11
11
  informational: true
12
+ only_pulls: true
13
+
14
+ comment:
15
+ # sections shown in the PR comment
16
+ layout: "reach, diff, flags, files"
17
+ # updates the comment on PRs when coverage changes
18
+ behavior: default
12
19
 
13
20
  flags:
14
21
  datachain:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.2
3
+ Version: 0.18.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -66,6 +66,7 @@ if TYPE_CHECKING:
66
66
  )
67
67
  from datachain.dataset import DatasetListVersion
68
68
  from datachain.job import Job
69
+ from datachain.lib.listing_info import ListingInfo
69
70
  from datachain.listing import Listing
70
71
 
71
72
  logger = logging.getLogger("datachain")
@@ -1116,13 +1117,16 @@ class Catalog:
1116
1117
  return direct_dependencies
1117
1118
 
1118
1119
  def ls_datasets(
1119
- self, include_listing: bool = False, studio: bool = False
1120
+ self,
1121
+ prefix: Optional[str] = None,
1122
+ include_listing: bool = False,
1123
+ studio: bool = False,
1120
1124
  ) -> Iterator[DatasetListRecord]:
1121
1125
  from datachain.remote.studio import StudioClient
1122
1126
 
1123
1127
  if studio:
1124
1128
  client = StudioClient()
1125
- response = client.ls_datasets()
1129
+ response = client.ls_datasets(prefix=prefix)
1126
1130
  if not response.ok:
1127
1131
  raise DataChainError(response.message)
1128
1132
  if not response.data:
@@ -1133,6 +1137,8 @@ class Catalog:
1133
1137
  for d in response.data
1134
1138
  if not d.get("name", "").startswith(QUERY_DATASET_PREFIX)
1135
1139
  )
1140
+ elif prefix:
1141
+ datasets = self.metastore.list_datasets_by_prefix(prefix)
1136
1142
  else:
1137
1143
  datasets = self.metastore.list_datasets()
1138
1144
 
@@ -1142,39 +1148,55 @@ class Catalog:
1142
1148
 
1143
1149
  def list_datasets_versions(
1144
1150
  self,
1151
+ prefix: Optional[str] = None,
1145
1152
  include_listing: bool = False,
1153
+ with_job: bool = True,
1146
1154
  studio: bool = False,
1147
1155
  ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
1148
1156
  """Iterate over all dataset versions with related jobs."""
1149
1157
  datasets = list(
1150
- self.ls_datasets(include_listing=include_listing, studio=studio)
1158
+ self.ls_datasets(
1159
+ prefix=prefix, include_listing=include_listing, studio=studio
1160
+ )
1151
1161
  )
1152
1162
 
1153
1163
  # preselect dataset versions jobs from db to avoid multiple queries
1154
- jobs_ids: set[str] = {
1155
- v.job_id for ds in datasets for v in ds.versions if v.job_id
1156
- }
1157
1164
  jobs: dict[str, Job] = {}
1158
- if jobs_ids:
1159
- jobs = {j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))}
1165
+ if with_job:
1166
+ jobs_ids: set[str] = {
1167
+ v.job_id for ds in datasets for v in ds.versions if v.job_id
1168
+ }
1169
+ if jobs_ids:
1170
+ jobs = {
1171
+ j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))
1172
+ }
1160
1173
 
1161
1174
  for d in datasets:
1162
1175
  yield from (
1163
- (d, v, jobs.get(str(v.job_id)) if v.job_id else None)
1176
+ (d, v, jobs.get(str(v.job_id)) if with_job and v.job_id else None)
1164
1177
  for v in d.versions
1165
1178
  )
1166
1179
 
1167
- def listings(self):
1180
+ def listings(self, prefix: Optional[str] = None) -> list["ListingInfo"]:
1168
1181
  """
1169
1182
  Returns list of ListingInfo objects which are representing specific
1170
1183
  storage listing datasets
1171
1184
  """
1172
- from datachain.lib.listing import is_listing_dataset
1185
+ from datachain.lib.listing import LISTING_PREFIX, is_listing_dataset
1173
1186
  from datachain.lib.listing_info import ListingInfo
1174
1187
 
1188
+ if prefix and not prefix.startswith(LISTING_PREFIX):
1189
+ prefix = LISTING_PREFIX + prefix
1190
+
1191
+ listing_datasets_versions = self.list_datasets_versions(
1192
+ prefix=prefix,
1193
+ include_listing=True,
1194
+ with_job=False,
1195
+ )
1196
+
1175
1197
  return [
1176
1198
  ListingInfo.from_models(d, v, j)
1177
- for d, v, j in self.list_datasets_versions(include_listing=True)
1199
+ for d, v, j in listing_datasets_versions
1178
1200
  if is_listing_dataset(d.name)
1179
1201
  ]
1180
1202
 
@@ -93,7 +93,7 @@ class DatasetDependency:
93
93
  if self.type == DatasetDependencyType.DATASET:
94
94
  return self.name
95
95
 
96
- list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"), {})
96
+ list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"))
97
97
  assert list_dataset_name
98
98
  return list_dataset_name
99
99
 
@@ -107,11 +107,10 @@ def ls(
107
107
  return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))
108
108
 
109
109
 
110
- def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
110
+ def parse_listing_uri(uri: str) -> tuple[str, str, str]:
111
111
  """
112
112
  Parsing uri and returns listing dataset name, listing uri and listing path
113
113
  """
114
- client_config = client_config or {}
115
114
  storage_uri, path = Client.parse_url(uri)
116
115
  if uses_glob(path):
117
116
  lst_uri_path = posixpath.dirname(path)
@@ -175,7 +174,7 @@ def get_listing(
175
174
  _, path = Client.parse_url(uri)
176
175
  return None, uri, path, False
177
176
 
178
- ds_name, list_uri, list_path = parse_listing_uri(uri, client_config)
177
+ ds_name, list_uri, list_path = parse_listing_uri(uri)
179
178
  listing = None
180
179
  listings = [
181
180
  ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
@@ -1675,13 +1675,27 @@ class DatasetQuery:
1675
1675
  return query
1676
1676
 
1677
1677
  def _add_dependencies(self, dataset: "DatasetRecord", version: str):
1678
- for dependency in self.dependencies:
1679
- ds_dependency_name, ds_dependency_version = dependency
1678
+ dependencies: set[DatasetDependencyType] = set()
1679
+ for dep_name, dep_version in self.dependencies:
1680
+ if Session.is_temp_dataset(dep_name):
1681
+ # temp dataset are created for optimization and they will be removed
1682
+ # afterwards. Therefore, we should not put them as dependencies, but
1683
+ # their own direct dependencies
1684
+ for dep in self.catalog.get_dataset_dependencies(
1685
+ dep_name, dep_version, indirect=False
1686
+ ):
1687
+ if dep:
1688
+ dependencies.add((dep.name, dep.version))
1689
+ else:
1690
+ dependencies.add((dep_name, dep_version))
1691
+
1692
+ for dep_name, dep_version in dependencies:
1693
+ # ds_dependency_name, ds_dependency_version = dependency
1680
1694
  self.catalog.metastore.add_dataset_dependency(
1681
1695
  dataset.name,
1682
1696
  version,
1683
- ds_dependency_name,
1684
- ds_dependency_version,
1697
+ dep_name,
1698
+ dep_version,
1685
1699
  )
1686
1700
 
1687
1701
  def exec(self) -> "Self":
@@ -282,8 +282,10 @@ class StudioClient:
282
282
  response = self._send_request_msgpack("datachain/ls", {"source": path})
283
283
  yield path, response
284
284
 
285
- def ls_datasets(self) -> Response[LsData]:
286
- return self._send_request("datachain/datasets", {}, method="GET")
285
+ def ls_datasets(self, prefix: Optional[str] = None) -> Response[LsData]:
286
+ return self._send_request(
287
+ "datachain/datasets", {"prefix": prefix}, method="GET"
288
+ )
287
289
 
288
290
  def edit_dataset(
289
291
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.2
3
+ Version: 0.18.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ from tests.utils import DEFAULT_TREE, skip_if_not_sqlite, tree_from_path
15
15
 
16
16
 
17
17
  def listing_stats(uri, catalog):
18
- list_dataset_name, _, _ = parse_listing_uri(uri, catalog.client_config)
18
+ list_dataset_name, _, _ = parse_listing_uri(uri)
19
19
  dataset = catalog.get_dataset(list_dataset_name)
20
20
  dataset_version = dataset.get_version(dataset.latest_version)
21
21
  return dataset_version.num_objects, dataset_version.size
@@ -114,12 +114,11 @@ def test_read_storage_reindex(tmp_dir, test_session):
114
114
 
115
115
 
116
116
  def test_read_storage_reindex_expired(tmp_dir, test_session):
117
- catalog = test_session.catalog
118
117
  tmp_dir = tmp_dir / "parquets"
119
118
  os.mkdir(tmp_dir)
120
119
  uri = tmp_dir.as_uri()
121
120
 
122
- lst_ds_name = parse_listing_uri(uri, catalog.client_config)[0]
121
+ lst_ds_name = parse_listing_uri(uri)[0]
123
122
 
124
123
  pd.DataFrame({"name": ["Alice", "Bob"]}).to_parquet(tmp_dir / "test1.parquet")
125
124
  assert dc.read_storage(uri, session=test_session).count() == 1
@@ -144,10 +143,9 @@ def test_read_storage_partials(cloud_test_catalog):
144
143
  ctc = cloud_test_catalog
145
144
  src_uri = ctc.src_uri
146
145
  session = ctc.session
147
- catalog = session.catalog
148
146
 
149
147
  def _list_dataset_name(uri: str) -> str:
150
- name = parse_listing_uri(uri, catalog.client_config)[0]
148
+ name = parse_listing_uri(uri)[0]
151
149
  assert name
152
150
  return name
153
151
 
@@ -188,10 +186,9 @@ def test_read_storage_partials_with_update(cloud_test_catalog):
188
186
  ctc = cloud_test_catalog
189
187
  src_uri = ctc.src_uri
190
188
  session = ctc.session
191
- catalog = session.catalog
192
189
 
193
190
  def _list_dataset_name(uri: str) -> str:
194
- name = parse_listing_uri(uri, catalog.client_config)[0]
191
+ name = parse_listing_uri(uri)[0]
195
192
  assert name
196
193
  return name
197
194
 
@@ -222,7 +219,7 @@ def test_read_storage_listing_happens_once(cloud_test_catalog, cloud_type):
222
219
  dc_dogs = chain.filter(dc.C("file.path").glob("dogs*"))
223
220
  dc_cats.union(dc_dogs).save(ds_name)
224
221
 
225
- lst_ds_name = parse_listing_uri(uri, ctc.session.catalog.client_config)[0]
222
+ lst_ds_name = parse_listing_uri(uri)[0]
226
223
  assert _get_listing_datasets(ctc.session) == [f"{lst_ds_name}@v1.0.0"]
227
224
 
228
225
 
@@ -230,7 +227,7 @@ def test_read_storage_dependencies(cloud_test_catalog, cloud_type):
230
227
  ctc = cloud_test_catalog
231
228
  src_uri = ctc.src_uri
232
229
  uri = f"{src_uri}/cats"
233
- dep_name, _, _ = parse_listing_uri(uri, ctc.catalog.client_config)
230
+ dep_name, _, _ = parse_listing_uri(uri)
234
231
  ds_name = "dep"
235
232
  dc.read_storage(uri, session=ctc.session).save(ds_name)
236
233
  dependencies = ctc.session.catalog.get_dataset_dependencies(ds_name, "1.0.0")
@@ -239,6 +236,25 @@ def test_read_storage_dependencies(cloud_test_catalog, cloud_type):
239
236
  assert dependencies[0].name == dep_name
240
237
 
241
238
 
239
+ def test_persist_not_affects_dependencies(tmp_dir, test_session):
240
+ for i in range(4):
241
+ (tmp_dir / f"file{i}.txt").write_text(f"file{i}")
242
+
243
+ uri = tmp_dir.as_uri()
244
+ dep_name, _, _ = parse_listing_uri(uri)
245
+ chain = dc.read_storage(uri, session=test_session) # .persist()
246
+ # calling multiple persists to create temp datasets
247
+ chain = chain.persist()
248
+ chain = chain.persist()
249
+ chain = chain.persist()
250
+ chain.save("test-data")
251
+ dependencies = test_session.catalog.get_dataset_dependencies("test-data", "1.0.0")
252
+
253
+ assert len(dependencies) == 1
254
+ assert dependencies[0].name == dep_name
255
+ assert dependencies[0].type == DatasetDependencyType.STORAGE
256
+
257
+
242
258
  @pytest.mark.parametrize("use_cache", [True, False])
243
259
  @pytest.mark.parametrize("prefetch", [0, 2])
244
260
  def test_map_file(cloud_test_catalog, use_cache, prefetch, monkeypatch):
@@ -965,9 +965,7 @@ def test_dataset_dependencies_one_storage_as_dependency(
965
965
  ds_name = uuid.uuid4().hex
966
966
  catalog = cloud_test_catalog.catalog
967
967
  listing = catalog.listings()[0]
968
- dep_name, _, _ = parse_listing_uri(
969
- cloud_test_catalog.src_uri, catalog.client_config
970
- )
968
+ dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
971
969
 
972
970
  DatasetQuery(cats_dataset.name, catalog=catalog).save(ds_name)
973
971
 
@@ -996,9 +994,7 @@ def test_dataset_dependencies_one_registered_dataset_as_dependency(
996
994
  catalog = cloud_test_catalog.catalog
997
995
  listing = catalog.listings()[0]
998
996
 
999
- dep_name, _, _ = parse_listing_uri(
1000
- cloud_test_catalog.src_uri, catalog.client_config
1001
- )
997
+ dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
1002
998
 
1003
999
  DatasetQuery(name=dogs_dataset.name, catalog=catalog).save(ds_name)
1004
1000
 
@@ -1044,9 +1040,7 @@ def test_dataset_dependencies_multiple_direct_dataset_dependencies(
1044
1040
  ds_name = uuid.uuid4().hex
1045
1041
  catalog = cloud_test_catalog.catalog
1046
1042
  listing = catalog.listings()[0]
1047
- dep_name, _, _ = parse_listing_uri(
1048
- cloud_test_catalog.src_uri, catalog.client_config
1049
- )
1043
+ dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
1050
1044
 
1051
1045
  dogs = DatasetQuery(name=dogs_dataset.name, version="1.0.0", catalog=catalog)
1052
1046
  cats = DatasetQuery(name=cats_dataset.name, version="1.0.0", catalog=catalog)
@@ -1116,9 +1110,7 @@ def test_dataset_dependencies_multiple_union(
1116
1110
  ds_name = uuid.uuid4().hex
1117
1111
  catalog = cloud_test_catalog.catalog
1118
1112
  listing = catalog.listings()[0]
1119
- dep_name, _, _ = parse_listing_uri(
1120
- cloud_test_catalog.src_uri, catalog.client_config
1121
- )
1113
+ dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
1122
1114
 
1123
1115
  dogs = DatasetQuery(name=dogs_dataset.name, version="1.0.0", catalog=catalog)
1124
1116
  cats = DatasetQuery(name=cats_dataset.name, version="1.0.0", catalog=catalog)
@@ -666,12 +666,12 @@ def test_dataset_storage_dependencies(cloud_test_catalog, cloud_type, indirect):
666
666
  session = ctc.session
667
667
  catalog = session.catalog
668
668
  uri = cloud_test_catalog.src_uri
669
- dep_name, _, _ = parse_listing_uri(ctc.src_uri, catalog.client_config)
669
+ dep_name, _, _ = parse_listing_uri(ctc.src_uri)
670
670
 
671
671
  ds_name = "some_ds"
672
672
  dc.read_storage(uri, session=session).save(ds_name)
673
673
 
674
- lst_ds_name, _, _ = parse_listing_uri(uri, catalog.client_config)
674
+ lst_ds_name, _, _ = parse_listing_uri(uri)
675
675
  lst_dataset = catalog.metastore.get_dataset(lst_ds_name)
676
676
 
677
677
  assert [
@@ -36,10 +36,7 @@ def test_listing_generator(cloud_test_catalog, cloud_type):
36
36
  )
37
37
  def test_parse_listing_uri(cloud_test_catalog, cloud_type):
38
38
  ctc = cloud_test_catalog
39
- catalog = ctc.catalog
40
- dataset_name, listing_uri, listing_path = parse_listing_uri(
41
- f"{ctc.src_uri}/dogs", catalog.client_config
42
- )
39
+ dataset_name, listing_uri, listing_path = parse_listing_uri(f"{ctc.src_uri}/dogs")
43
40
  assert dataset_name == f"lst__{ctc.src_uri}/dogs/"
44
41
  assert listing_uri == f"{ctc.src_uri}/dogs/"
45
42
  if cloud_type == "file":
@@ -55,10 +52,7 @@ def test_parse_listing_uri(cloud_test_catalog, cloud_type):
55
52
  )
56
53
  def test_parse_listing_uri_with_glob(cloud_test_catalog):
57
54
  ctc = cloud_test_catalog
58
- catalog = ctc.catalog
59
- dataset_name, listing_uri, listing_path = parse_listing_uri(
60
- f"{ctc.src_uri}/dogs/*", catalog.client_config
61
- )
55
+ dataset_name, listing_uri, listing_path = parse_listing_uri(f"{ctc.src_uri}/dogs/*")
62
56
  assert dataset_name == f"lst__{ctc.src_uri}/dogs/"
63
57
  assert listing_uri == f"{ctc.src_uri}/dogs"
64
58
  assert listing_path == "dogs/*"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes