datachain 0.6.2__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (263) hide show
  1. {datachain-0.6.2 → datachain-0.6.4}/.github/workflows/tests-studio.yml +1 -1
  2. {datachain-0.6.2 → datachain-0.6.4}/.pre-commit-config.yaml +1 -1
  3. {datachain-0.6.2/src/datachain.egg-info → datachain-0.6.4}/PKG-INFO +2 -2
  4. {datachain-0.6.2 → datachain-0.6.4}/noxfile.py +1 -2
  5. {datachain-0.6.2 → datachain-0.6.4}/pyproject.toml +4 -5
  6. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/catalog/catalog.py +3 -25
  7. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/cli.py +0 -8
  8. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/fsspec.py +10 -5
  9. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/local.py +7 -3
  10. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/metastore.py +11 -478
  11. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/sqlite.py +9 -41
  12. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/warehouse.py +1 -2
  13. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/dataset.py +12 -10
  14. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/error.py +0 -4
  15. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/arrow.py +2 -15
  16. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/data_model.py +10 -2
  17. datachain-0.6.4/src/datachain/lib/utils.py +60 -0
  18. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/node.py +1 -1
  19. {datachain-0.6.2 → datachain-0.6.4/src/datachain.egg-info}/PKG-INFO +2 -2
  20. {datachain-0.6.2 → datachain-0.6.4}/src/datachain.egg-info/SOURCES.txt +0 -2
  21. {datachain-0.6.2 → datachain-0.6.4}/src/datachain.egg-info/requires.txt +1 -1
  22. datachain-0.6.4/tests/benchmarks/conftest.py +8 -0
  23. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/test_datachain.py +0 -3
  24. datachain-0.6.4/tests/benchmarks/test_ls.py +6 -0
  25. datachain-0.6.4/tests/benchmarks/test_version.py +7 -0
  26. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_catalog.py +0 -5
  27. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_datachain.py +2 -3
  28. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_dataset_query.py +20 -35
  29. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_datasets.py +0 -1
  30. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_arrow.py +11 -3
  31. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_datachain.py +34 -1
  32. datachain-0.6.4/tests/unit/lib/test_utils.py +128 -0
  33. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_catalog_loader.py +3 -8
  34. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_metastore.py +2 -6
  35. datachain-0.6.2/src/datachain/lib/utils.py +0 -30
  36. datachain-0.6.2/src/datachain/storage.py +0 -136
  37. datachain-0.6.2/tests/benchmarks/conftest.py +0 -137
  38. datachain-0.6.2/tests/benchmarks/test_ls.py +0 -2
  39. datachain-0.6.2/tests/benchmarks/test_version.py +0 -2
  40. datachain-0.6.2/tests/unit/lib/test_utils.py +0 -58
  41. datachain-0.6.2/tests/unit/test_storage.py +0 -188
  42. {datachain-0.6.2 → datachain-0.6.4}/.cruft.json +0 -0
  43. {datachain-0.6.2 → datachain-0.6.4}/.gitattributes +0 -0
  44. {datachain-0.6.2 → datachain-0.6.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  45. {datachain-0.6.2 → datachain-0.6.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  46. {datachain-0.6.2 → datachain-0.6.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  47. {datachain-0.6.2 → datachain-0.6.4}/.github/codecov.yaml +0 -0
  48. {datachain-0.6.2 → datachain-0.6.4}/.github/dependabot.yml +0 -0
  49. {datachain-0.6.2 → datachain-0.6.4}/.github/workflows/benchmarks.yml +0 -0
  50. {datachain-0.6.2 → datachain-0.6.4}/.github/workflows/release.yml +0 -0
  51. {datachain-0.6.2 → datachain-0.6.4}/.github/workflows/tests.yml +0 -0
  52. {datachain-0.6.2 → datachain-0.6.4}/.github/workflows/update-template.yaml +0 -0
  53. {datachain-0.6.2 → datachain-0.6.4}/.gitignore +0 -0
  54. {datachain-0.6.2 → datachain-0.6.4}/CODE_OF_CONDUCT.rst +0 -0
  55. {datachain-0.6.2 → datachain-0.6.4}/CONTRIBUTING.rst +0 -0
  56. {datachain-0.6.2 → datachain-0.6.4}/LICENSE +0 -0
  57. {datachain-0.6.2 → datachain-0.6.4}/README.rst +0 -0
  58. {datachain-0.6.2 → datachain-0.6.4}/docs/assets/captioned_cartoons.png +0 -0
  59. {datachain-0.6.2 → datachain-0.6.4}/docs/assets/datachain-white.svg +0 -0
  60. {datachain-0.6.2 → datachain-0.6.4}/docs/assets/datachain.svg +0 -0
  61. {datachain-0.6.2 → datachain-0.6.4}/docs/assets/flowchart.png +0 -0
  62. {datachain-0.6.2 → datachain-0.6.4}/docs/index.md +0 -0
  63. {datachain-0.6.2 → datachain-0.6.4}/docs/references/datachain.md +0 -0
  64. {datachain-0.6.2 → datachain-0.6.4}/docs/references/datatype.md +0 -0
  65. {datachain-0.6.2 → datachain-0.6.4}/docs/references/file.md +0 -0
  66. {datachain-0.6.2 → datachain-0.6.4}/docs/references/index.md +0 -0
  67. {datachain-0.6.2 → datachain-0.6.4}/docs/references/sql.md +0 -0
  68. {datachain-0.6.2 → datachain-0.6.4}/docs/references/torch.md +0 -0
  69. {datachain-0.6.2 → datachain-0.6.4}/docs/references/udf.md +0 -0
  70. {datachain-0.6.2 → datachain-0.6.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  71. {datachain-0.6.2 → datachain-0.6.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  72. {datachain-0.6.2 → datachain-0.6.4}/examples/computer_vision/openimage-detect.py +0 -0
  73. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/common_sql_functions.py +0 -0
  74. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/json-csv-reader.py +0 -0
  75. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/torch-loader.py +0 -0
  76. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/udfs/parallel.py +0 -0
  77. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/udfs/simple.py +0 -0
  78. {datachain-0.6.2 → datachain-0.6.4}/examples/get_started/udfs/stateful.py +0 -0
  79. {datachain-0.6.2 → datachain-0.6.4}/examples/llm_and_nlp/claude-query.py +0 -0
  80. {datachain-0.6.2 → datachain-0.6.4}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  81. {datachain-0.6.2 → datachain-0.6.4}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  82. {datachain-0.6.2 → datachain-0.6.4}/examples/multimodal/clip_inference.py +0 -0
  83. {datachain-0.6.2 → datachain-0.6.4}/examples/multimodal/hf_pipeline.py +0 -0
  84. {datachain-0.6.2 → datachain-0.6.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  85. {datachain-0.6.2 → datachain-0.6.4}/examples/multimodal/wds.py +0 -0
  86. {datachain-0.6.2 → datachain-0.6.4}/examples/multimodal/wds_filtered.py +0 -0
  87. {datachain-0.6.2 → datachain-0.6.4}/mkdocs.yml +0 -0
  88. {datachain-0.6.2 → datachain-0.6.4}/overrides/main.html +0 -0
  89. {datachain-0.6.2 → datachain-0.6.4}/setup.cfg +0 -0
  90. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/__init__.py +0 -0
  91. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/__main__.py +0 -0
  92. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/asyn.py +0 -0
  93. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/cache.py +0 -0
  94. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/catalog/__init__.py +0 -0
  95. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/catalog/datasource.py +0 -0
  96. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/catalog/loader.py +0 -0
  97. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/cli_utils.py +0 -0
  98. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/__init__.py +0 -0
  99. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/azure.py +0 -0
  100. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/fileslice.py +0 -0
  101. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/gcs.py +0 -0
  102. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/hf.py +0 -0
  103. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/client/s3.py +0 -0
  104. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/config.py +0 -0
  105. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/__init__.py +0 -0
  106. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/db_engine.py +0 -0
  107. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/id_generator.py +0 -0
  108. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/job.py +0 -0
  109. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/schema.py +0 -0
  110. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/data_storage/serializer.py +0 -0
  111. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/job.py +0 -0
  112. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/__init__.py +0 -0
  113. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/clip.py +0 -0
  114. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/__init__.py +0 -0
  115. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/flatten.py +0 -0
  116. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  117. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  118. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/unflatten.py +0 -0
  119. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  120. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/dataset_info.py +0 -0
  121. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/dc.py +0 -0
  122. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/file.py +0 -0
  123. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/func/__init__.py +0 -0
  124. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/func/aggregate.py +0 -0
  125. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/func/func.py +0 -0
  126. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/hf.py +0 -0
  127. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/image.py +0 -0
  128. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/listing.py +0 -0
  129. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/listing_info.py +0 -0
  130. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/meta_formats.py +0 -0
  131. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/model_store.py +0 -0
  132. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/pytorch.py +0 -0
  133. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/settings.py +0 -0
  134. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/signal_schema.py +0 -0
  135. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/tar.py +0 -0
  136. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/text.py +0 -0
  137. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/udf.py +0 -0
  138. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/udf_signature.py +0 -0
  139. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/vfile.py +0 -0
  140. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/webdataset.py +0 -0
  141. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/lib/webdataset_laion.py +0 -0
  142. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/listing.py +0 -0
  143. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/nodes_fetcher.py +0 -0
  144. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/nodes_thread_pool.py +0 -0
  145. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/progress.py +0 -0
  146. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/py.typed +0 -0
  147. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/__init__.py +0 -0
  148. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/batch.py +0 -0
  149. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/dataset.py +0 -0
  150. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/dispatch.py +0 -0
  151. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/metrics.py +0 -0
  152. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/params.py +0 -0
  153. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/queue.py +0 -0
  154. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/schema.py +0 -0
  155. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/query/session.py +0 -0
  156. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/remote/__init__.py +0 -0
  157. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/remote/studio.py +0 -0
  158. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/__init__.py +0 -0
  159. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/default/__init__.py +0 -0
  160. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/default/base.py +0 -0
  161. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/__init__.py +0 -0
  162. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/aggregate.py +0 -0
  163. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/array.py +0 -0
  164. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/conditional.py +0 -0
  165. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/path.py +0 -0
  166. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/random.py +0 -0
  167. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/functions/string.py +0 -0
  168. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/selectable.py +0 -0
  169. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  170. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/sqlite/base.py +0 -0
  171. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/sqlite/types.py +0 -0
  172. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/sqlite/vector.py +0 -0
  173. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/types.py +0 -0
  174. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/sql/utils.py +0 -0
  175. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/studio.py +0 -0
  176. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/telemetry.py +0 -0
  177. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/torch/__init__.py +0 -0
  178. {datachain-0.6.2 → datachain-0.6.4}/src/datachain/utils.py +0 -0
  179. {datachain-0.6.2 → datachain-0.6.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  180. {datachain-0.6.2 → datachain-0.6.4}/src/datachain.egg-info/entry_points.txt +0 -0
  181. {datachain-0.6.2 → datachain-0.6.4}/src/datachain.egg-info/top_level.txt +0 -0
  182. {datachain-0.6.2 → datachain-0.6.4}/tests/__init__.py +0 -0
  183. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/__init__.py +0 -0
  184. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  185. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  186. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/datasets/.gitignore +0 -0
  187. {datachain-0.6.2 → datachain-0.6.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  188. {datachain-0.6.2 → datachain-0.6.4}/tests/conftest.py +0 -0
  189. {datachain-0.6.2 → datachain-0.6.4}/tests/data.py +0 -0
  190. {datachain-0.6.2 → datachain-0.6.4}/tests/examples/__init__.py +0 -0
  191. {datachain-0.6.2 → datachain-0.6.4}/tests/examples/test_examples.py +0 -0
  192. {datachain-0.6.2 → datachain-0.6.4}/tests/examples/test_wds_e2e.py +0 -0
  193. {datachain-0.6.2 → datachain-0.6.4}/tests/examples/wds_data.py +0 -0
  194. {datachain-0.6.2 → datachain-0.6.4}/tests/func/__init__.py +0 -0
  195. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_client.py +0 -0
  196. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_feature_pickling.py +0 -0
  197. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_listing.py +0 -0
  198. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_ls.py +0 -0
  199. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_meta_formats.py +0 -0
  200. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_metrics.py +0 -0
  201. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_pull.py +0 -0
  202. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_pytorch.py +0 -0
  203. {datachain-0.6.2 → datachain-0.6.4}/tests/func/test_query.py +0 -0
  204. {datachain-0.6.2 → datachain-0.6.4}/tests/scripts/feature_class.py +0 -0
  205. {datachain-0.6.2 → datachain-0.6.4}/tests/scripts/feature_class_exception.py +0 -0
  206. {datachain-0.6.2 → datachain-0.6.4}/tests/scripts/feature_class_parallel.py +0 -0
  207. {datachain-0.6.2 → datachain-0.6.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  208. {datachain-0.6.2 → datachain-0.6.4}/tests/scripts/name_len_slow.py +0 -0
  209. {datachain-0.6.2 → datachain-0.6.4}/tests/test_atomicity.py +0 -0
  210. {datachain-0.6.2 → datachain-0.6.4}/tests/test_cli_e2e.py +0 -0
  211. {datachain-0.6.2 → datachain-0.6.4}/tests/test_cli_studio.py +0 -0
  212. {datachain-0.6.2 → datachain-0.6.4}/tests/test_query_e2e.py +0 -0
  213. {datachain-0.6.2 → datachain-0.6.4}/tests/test_telemetry.py +0 -0
  214. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/__init__.py +0 -0
  215. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/__init__.py +0 -0
  216. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/conftest.py +0 -0
  217. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_clip.py +0 -0
  218. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  219. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  220. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_feature.py +0 -0
  221. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_feature_utils.py +0 -0
  222. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_file.py +0 -0
  223. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_hf.py +0 -0
  224. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_image.py +0 -0
  225. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_listing_info.py +0 -0
  226. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_schema.py +0 -0
  227. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_signal_schema.py +0 -0
  228. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  229. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_text.py +0 -0
  230. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_udf_signature.py +0 -0
  231. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/lib/test_webdataset.py +0 -0
  232. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/__init__.py +0 -0
  233. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  234. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  235. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_array.py +0 -0
  236. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_conditional.py +0 -0
  237. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_path.py +0 -0
  238. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_random.py +0 -0
  239. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_selectable.py +0 -0
  240. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/sql/test_string.py +0 -0
  241. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_asyn.py +0 -0
  242. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_cache.py +0 -0
  243. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_catalog.py +0 -0
  244. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_cli_parsing.py +0 -0
  245. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_client.py +0 -0
  246. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_client_s3.py +0 -0
  247. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_config.py +0 -0
  248. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_data_storage.py +0 -0
  249. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_database_engine.py +0 -0
  250. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_dataset.py +0 -0
  251. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_dispatch.py +0 -0
  252. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_fileslice.py +0 -0
  253. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_id_generator.py +0 -0
  254. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_listing.py +0 -0
  255. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_module_exports.py +0 -0
  256. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_query.py +0 -0
  257. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_query_metrics.py +0 -0
  258. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_query_params.py +0 -0
  259. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_serializer.py +0 -0
  260. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_session.py +0 -0
  261. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_utils.py +0 -0
  262. {datachain-0.6.2 → datachain-0.6.4}/tests/unit/test_warehouse.py +0 -0
  263. {datachain-0.6.2 → datachain-0.6.4}/tests/utils.py +0 -0
@@ -101,6 +101,6 @@ jobs:
101
101
  pytest
102
102
  --config-file=pyproject.toml -rs
103
103
  --splits=6 --group=${{ matrix.group }} --durations-path=../../.github/.test_durations
104
- -m 'not benchmark'
104
+ --benchmark-skip
105
105
  tests ../datachain/tests
106
106
  working-directory: backend/datachain_server
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.7.0'
27
+ rev: 'v0.7.1'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -82,7 +82,7 @@ Requires-Dist: requests-mock; extra == "tests"
82
82
  Requires-Dist: scipy; extra == "tests"
83
83
  Provides-Extra: dev
84
84
  Requires-Dist: datachain[docs,tests]; extra == "dev"
85
- Requires-Dist: mypy==1.12.1; extra == "dev"
85
+ Requires-Dist: mypy==1.13.0; extra == "dev"
86
86
  Requires-Dist: types-python-dateutil; extra == "dev"
87
87
  Requires-Dist: types-pytz; extra == "dev"
88
88
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -22,8 +22,7 @@ def bench(session: nox.Session) -> None:
22
22
  session.install(".[tests]")
23
23
  session.run(
24
24
  "pytest",
25
- "-m",
26
- "benchmark",
25
+ "--benchmark-only",
27
26
  "--benchmark-group-by",
28
27
  "func",
29
28
  *session.posargs,
@@ -94,7 +94,7 @@ tests = [
94
94
  ]
95
95
  dev = [
96
96
  "datachain[docs,tests]",
97
- "mypy==1.12.1",
97
+ "mypy==1.13.0",
98
98
  "types-python-dateutil",
99
99
  "types-pytz",
100
100
  "types-PyYAML",
@@ -127,9 +127,8 @@ namespaces = false
127
127
  [tool.setuptools_scm]
128
128
 
129
129
  [tool.pytest.ini_options]
130
- addopts = "-rfEs -m 'not benchmark and not examples'"
130
+ addopts = "-rfEs -m 'not examples' --benchmark-skip"
131
131
  markers = [
132
- "benchmark: benchmarks.",
133
132
  "e2e: End-to-end tests",
134
133
  "examples: All examples",
135
134
  "computer_vision: Computer vision examples",
@@ -214,6 +213,7 @@ ignore = [
214
213
  select = [
215
214
  "B", # flake8-bugbear
216
215
  "C4", # flake8-comprehensions
216
+ "C420", # unnecessary-dict-comprehension-for-iterable
217
217
  "C90", # mccabe
218
218
  "W", # pycodestyle - Warning
219
219
  "E", # pycodestyle - Error
@@ -252,11 +252,10 @@ select = [
252
252
  "NPY", # numpy
253
253
  "TRY004", # type-check-without-type-error
254
254
  "TRY201", # verbose-raise
255
- "TRY302", # useless-try-except
255
+ "TRY203", # useless-try-except
256
256
  "TRY401", # verbose-log-message
257
257
  "RUF022", # unsorted-dunder-all
258
258
  "RUF023", # unsorted-dunder-slots
259
- "RUF025", # unnecessary-dict-comprehension-for-iterable
260
259
  "RUF027", # missing-f-string-syntax
261
260
  "RUF030", # assert-with-print-message
262
261
  "RUF101", # redirected-noqa
@@ -42,6 +42,7 @@ from datachain.dataset import (
42
42
  DatasetStats,
43
43
  DatasetStatus,
44
44
  RowDict,
45
+ StorageURI,
45
46
  create_dataset_uri,
46
47
  parse_dataset_uri,
47
48
  )
@@ -58,7 +59,6 @@ from datachain.node import DirType, Node, NodeWithPath
58
59
  from datachain.nodes_thread_pool import NodesThreadPool
59
60
  from datachain.remote.studio import StudioClient
60
61
  from datachain.sql.types import DateTime, SQLType, String
61
- from datachain.storage import StorageURI
62
62
  from datachain.utils import (
63
63
  DataChainDir,
64
64
  batched,
@@ -1702,31 +1702,9 @@ class Catalog:
1702
1702
  *,
1703
1703
  client_config=None,
1704
1704
  ) -> None:
1705
- root_sources = [
1706
- src for src in sources if Client.get_implementation(src).is_root_url(src)
1707
- ]
1708
- non_root_sources = [
1709
- src
1710
- for src in sources
1711
- if not Client.get_implementation(src).is_root_url(src)
1712
- ]
1713
-
1714
- client_config = client_config or self.client_config
1715
-
1716
- # for root sources (e.g s3://) we are just getting all buckets and
1717
- # saving them as storages, without further indexing in each bucket
1718
- for source in root_sources:
1719
- for bucket in Client.get_implementation(source).ls_buckets(**client_config):
1720
- client = self.get_client(bucket.uri, **client_config)
1721
- print(f"Registering storage {client.uri}")
1722
- self.metastore.create_storage_if_not_registered(client.uri)
1723
-
1724
1705
  self.enlist_sources(
1725
- non_root_sources,
1706
+ sources,
1726
1707
  update,
1727
- client_config=client_config,
1708
+ client_config=client_config or self.client_config,
1728
1709
  only_index=True,
1729
1710
  )
1730
-
1731
- def find_stale_storages(self) -> None:
1732
- self.metastore.find_stale_storages()
@@ -568,12 +568,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
568
568
  )
569
569
  add_sources_arg(parse_index)
570
570
 
571
- subp.add_parser(
572
- "find-stale-storages",
573
- parents=[parent_parser],
574
- description="Finds and marks stale storages",
575
- )
576
-
577
571
  show_parser = subp.add_parser(
578
572
  "show",
579
573
  parents=[parent_parser],
@@ -1100,8 +1094,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1100
1094
  )
1101
1095
  elif args.command == "completion":
1102
1096
  print(completion(args.shell))
1103
- elif args.command == "find-stale-storages":
1104
- catalog.find_stale_storages()
1105
1097
  elif args.command == "query":
1106
1098
  query(
1107
1099
  catalog,
@@ -31,11 +31,12 @@ from datachain.error import ClientError as DataChainClientError
31
31
  from datachain.lib.file import File
32
32
  from datachain.nodes_fetcher import NodesFetcher
33
33
  from datachain.nodes_thread_pool import NodeChunk
34
- from datachain.storage import StorageURI
35
34
 
36
35
  if TYPE_CHECKING:
37
36
  from fsspec.spec import AbstractFileSystem
38
37
 
38
+ from datachain.dataset import StorageURI
39
+
39
40
 
40
41
  logger = logging.getLogger("datachain")
41
42
 
@@ -63,7 +64,7 @@ def _is_win_local_path(uri: str) -> bool:
63
64
 
64
65
  class Bucket(NamedTuple):
65
66
  name: str
66
- uri: StorageURI
67
+ uri: "StorageURI"
67
68
  created: Optional[datetime]
68
69
 
69
70
 
@@ -115,7 +116,7 @@ class Client(ABC):
115
116
  return DATA_SOURCE_URI_PATTERN.match(name) is not None
116
117
 
117
118
  @staticmethod
118
- def parse_url(source: str) -> tuple[StorageURI, str]:
119
+ def parse_url(source: str) -> tuple["StorageURI", str]:
119
120
  cls = Client.get_implementation(source)
120
121
  storage_name, rel_path = cls.split_url(source)
121
122
  return cls.get_uri(storage_name), rel_path
@@ -148,7 +149,7 @@ class Client(ABC):
148
149
  @classmethod
149
150
  def from_source(
150
151
  cls,
151
- uri: StorageURI,
152
+ uri: "StorageURI",
152
153
  cache: DataChainCache,
153
154
  **kwargs,
154
155
  ) -> "Client":
@@ -156,6 +157,8 @@ class Client(ABC):
156
157
 
157
158
  @classmethod
158
159
  def ls_buckets(cls, **kwargs) -> Iterator[Bucket]:
160
+ from datachain.dataset import StorageURI
161
+
159
162
  for entry in cls.create_fs(**kwargs).ls(cls.PREFIX, detail=True):
160
163
  name = entry["name"].rstrip("/")
161
164
  yield Bucket(
@@ -169,7 +172,9 @@ class Client(ABC):
169
172
  return url == cls.PREFIX
170
173
 
171
174
  @classmethod
172
- def get_uri(cls, name) -> StorageURI:
175
+ def get_uri(cls, name) -> "StorageURI":
176
+ from datachain.dataset import StorageURI
177
+
173
178
  return StorageURI(f"{cls.PREFIX}{name}")
174
179
 
175
180
  @classmethod
@@ -2,16 +2,18 @@ import os
2
2
  import posixpath
3
3
  from datetime import datetime, timezone
4
4
  from pathlib import Path
5
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
6
6
  from urllib.parse import urlparse
7
7
 
8
8
  from fsspec.implementations.local import LocalFileSystem
9
9
 
10
10
  from datachain.lib.file import File
11
- from datachain.storage import StorageURI
12
11
 
13
12
  from .fsspec import Client
14
13
 
14
+ if TYPE_CHECKING:
15
+ from datachain.dataset import StorageURI
16
+
15
17
 
16
18
  class FileClient(Client):
17
19
  FS_CLASS = LocalFileSystem
@@ -28,7 +30,9 @@ class FileClient(Client):
28
30
  raise TypeError("Signed urls are not implemented for local file system")
29
31
 
30
32
  @classmethod
31
- def get_uri(cls, name) -> StorageURI:
33
+ def get_uri(cls, name) -> "StorageURI":
34
+ from datachain.dataset import StorageURI
35
+
32
36
  return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
33
37
 
34
38
  @classmethod