datachain 0.3.11__tar.gz → 0.3.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (247) hide show
  1. {datachain-0.3.11 → datachain-0.3.13}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.3.11/src/datachain.egg-info → datachain-0.3.13}/PKG-INFO +6 -7
  3. {datachain-0.3.11 → datachain-0.3.13}/README.rst +5 -5
  4. datachain-0.3.13/docs/assets/datachain-white.svg +1 -0
  5. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/stateful.py +4 -0
  6. {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/wds.py +1 -1
  7. {datachain-0.3.11 → datachain-0.3.13}/mkdocs.yml +2 -2
  8. {datachain-0.3.11 → datachain-0.3.13}/pyproject.toml +4 -2
  9. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cache.py +0 -1
  10. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/catalog.py +50 -153
  11. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cli.py +4 -6
  12. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/fsspec.py +0 -1
  13. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/s3.py +0 -4
  14. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/schema.py +4 -8
  15. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/warehouse.py +6 -17
  16. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/error.py +0 -4
  17. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/clip.py +1 -1
  18. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/dc.py +17 -5
  19. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/file.py +9 -11
  20. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/image.py +1 -1
  21. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/meta_formats.py +4 -8
  22. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/model_store.py +6 -1
  23. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/text.py +1 -1
  24. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/webdataset.py +13 -0
  25. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/webdataset_laion.py +13 -0
  26. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/listing.py +2 -2
  27. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/node.py +4 -26
  28. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/builtins.py +0 -14
  29. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/schema.py +1 -16
  30. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/utils.py +0 -3
  31. {datachain-0.3.11 → datachain-0.3.13/src/datachain.egg-info}/PKG-INFO +6 -7
  32. {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/SOURCES.txt +1 -1
  33. {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/requires.txt +0 -1
  34. {datachain-0.3.11 → datachain-0.3.13}/tests/conftest.py +0 -3
  35. {datachain-0.3.11 → datachain-0.3.13}/tests/data.py +0 -20
  36. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_catalog.py +21 -43
  37. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_datachain.py +0 -1
  38. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_dataset_query.py +17 -42
  39. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_datasets.py +0 -2
  40. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_ls.py +0 -15
  41. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_meta_formats.py +0 -1
  42. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_pull.py +1 -10
  43. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_pytorch.py +10 -3
  44. datachain-0.3.13/tests/func/test_query.py +173 -0
  45. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain.py +0 -1
  46. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_file.py +3 -7
  47. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_cache.py +3 -7
  48. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_client_s3.py +0 -1
  49. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_data_storage.py +28 -32
  50. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_dataset.py +0 -6
  51. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_udf.py +0 -3
  52. {datachain-0.3.11 → datachain-0.3.13}/tests/utils.py +1 -15
  53. datachain-0.3.11/docs/assets/datachain_logotype.svg +0 -33
  54. datachain-0.3.11/tests/func/test_query.py +0 -377
  55. {datachain-0.3.11 → datachain-0.3.13}/.cruft.json +0 -0
  56. {datachain-0.3.11 → datachain-0.3.13}/.gitattributes +0 -0
  57. {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  58. {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  59. {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  60. {datachain-0.3.11 → datachain-0.3.13}/.github/codecov.yaml +0 -0
  61. {datachain-0.3.11 → datachain-0.3.13}/.github/dependabot.yml +0 -0
  62. {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/benchmarks.yml +0 -0
  63. {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/release.yml +0 -0
  64. {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/tests-studio.yml +0 -0
  65. {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/tests.yml +0 -0
  66. {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/update-template.yaml +0 -0
  67. {datachain-0.3.11 → datachain-0.3.13}/.gitignore +0 -0
  68. {datachain-0.3.11 → datachain-0.3.13}/CODE_OF_CONDUCT.rst +0 -0
  69. {datachain-0.3.11 → datachain-0.3.13}/CONTRIBUTING.rst +0 -0
  70. {datachain-0.3.11 → datachain-0.3.13}/LICENSE +0 -0
  71. {datachain-0.3.11 → datachain-0.3.13}/docs/assets/captioned_cartoons.png +0 -0
  72. {datachain-0.3.11 → datachain-0.3.13}/docs/assets/datachain.svg +0 -0
  73. {datachain-0.3.11 → datachain-0.3.13}/docs/assets/flowchart.png +0 -0
  74. {datachain-0.3.11 → datachain-0.3.13}/docs/index.md +0 -0
  75. {datachain-0.3.11 → datachain-0.3.13}/docs/references/datachain.md +0 -0
  76. {datachain-0.3.11 → datachain-0.3.13}/docs/references/datatype.md +0 -0
  77. {datachain-0.3.11 → datachain-0.3.13}/docs/references/file.md +0 -0
  78. {datachain-0.3.11 → datachain-0.3.13}/docs/references/index.md +0 -0
  79. {datachain-0.3.11 → datachain-0.3.13}/docs/references/sql.md +0 -0
  80. {datachain-0.3.11 → datachain-0.3.13}/docs/references/torch.md +0 -0
  81. {datachain-0.3.11 → datachain-0.3.13}/docs/references/udf.md +0 -0
  82. {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  83. {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  84. {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/openimage-detect.py +0 -0
  85. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/common_sql_functions.py +0 -0
  86. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/json-csv-reader.py +0 -0
  87. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/torch-loader.py +0 -0
  88. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/parallel.py +0 -0
  89. {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/simple.py +0 -0
  90. {datachain-0.3.11 → datachain-0.3.13}/examples/llm_and_nlp/claude-query.py +0 -0
  91. {datachain-0.3.11 → datachain-0.3.13}/examples/llm_and_nlp/unstructured-text.py +0 -0
  92. {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/clip_inference.py +0 -0
  93. {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/hf_pipeline.py +0 -0
  94. {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/openai_image_desc_lib.py +0 -0
  95. {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/wds_filtered.py +0 -0
  96. {datachain-0.3.11 → datachain-0.3.13}/noxfile.py +0 -0
  97. {datachain-0.3.11 → datachain-0.3.13}/setup.cfg +0 -0
  98. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/__init__.py +0 -0
  99. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/__main__.py +0 -0
  100. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/asyn.py +0 -0
  101. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/__init__.py +0 -0
  102. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/datasource.py +0 -0
  103. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/loader.py +0 -0
  104. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cli_utils.py +0 -0
  105. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/__init__.py +0 -0
  106. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/azure.py +0 -0
  107. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/fileslice.py +0 -0
  108. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/gcs.py +0 -0
  109. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/hf.py +0 -0
  110. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/local.py +0 -0
  111. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/config.py +0 -0
  112. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/__init__.py +0 -0
  113. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/db_engine.py +0 -0
  114. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/id_generator.py +0 -0
  115. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/job.py +0 -0
  116. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/metastore.py +0 -0
  117. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/serializer.py +0 -0
  118. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/sqlite.py +0 -0
  119. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/dataset.py +0 -0
  120. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/job.py +0 -0
  121. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/__init__.py +0 -0
  122. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/arrow.py +0 -0
  123. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/__init__.py +0 -0
  124. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/flatten.py +0 -0
  125. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/python_to_sql.py +0 -0
  126. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/sql_to_python.py +0 -0
  127. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/unflatten.py +0 -0
  128. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  129. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/data_model.py +0 -0
  130. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/dataset_info.py +0 -0
  131. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/hf.py +0 -0
  132. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/listing.py +0 -0
  133. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/listing_info.py +0 -0
  134. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/pytorch.py +0 -0
  135. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/settings.py +0 -0
  136. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/signal_schema.py +0 -0
  137. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/udf.py +0 -0
  138. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/udf_signature.py +0 -0
  139. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/utils.py +0 -0
  140. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/vfile.py +0 -0
  141. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/nodes_fetcher.py +0 -0
  142. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/nodes_thread_pool.py +0 -0
  143. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/progress.py +0 -0
  144. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/py.typed +0 -0
  145. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/__init__.py +0 -0
  146. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/batch.py +0 -0
  147. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/dataset.py +0 -0
  148. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/dispatch.py +0 -0
  149. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/metrics.py +0 -0
  150. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/params.py +0 -0
  151. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/queue.py +0 -0
  152. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/session.py +0 -0
  153. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/udf.py +0 -0
  154. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/remote/__init__.py +0 -0
  155. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/remote/studio.py +0 -0
  156. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/__init__.py +0 -0
  157. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/default/__init__.py +0 -0
  158. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/default/base.py +0 -0
  159. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/__init__.py +0 -0
  160. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/array.py +0 -0
  161. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/conditional.py +0 -0
  162. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/path.py +0 -0
  163. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/random.py +0 -0
  164. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/string.py +0 -0
  165. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/selectable.py +0 -0
  166. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/__init__.py +0 -0
  167. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/base.py +0 -0
  168. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/types.py +0 -0
  169. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/vector.py +0 -0
  170. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/types.py +0 -0
  171. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/utils.py +0 -0
  172. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/storage.py +0 -0
  173. {datachain-0.3.11 → datachain-0.3.13}/src/datachain/torch/__init__.py +0 -0
  174. {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/dependency_links.txt +0 -0
  175. {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/entry_points.txt +0 -0
  176. {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/top_level.txt +0 -0
  177. {datachain-0.3.11 → datachain-0.3.13}/tests/__init__.py +0 -0
  178. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/__init__.py +0 -0
  179. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/conftest.py +0 -0
  180. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  181. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.dvc/config +0 -0
  182. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.gitignore +0 -0
  183. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  184. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_datachain.py +0 -0
  185. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_ls.py +0 -0
  186. {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_version.py +0 -0
  187. {datachain-0.3.11 → datachain-0.3.13}/tests/examples/__init__.py +0 -0
  188. {datachain-0.3.11 → datachain-0.3.13}/tests/examples/test_examples.py +0 -0
  189. {datachain-0.3.11 → datachain-0.3.13}/tests/examples/test_wds_e2e.py +0 -0
  190. {datachain-0.3.11 → datachain-0.3.13}/tests/examples/wds_data.py +0 -0
  191. {datachain-0.3.11 → datachain-0.3.13}/tests/func/__init__.py +0 -0
  192. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_client.py +0 -0
  193. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_feature_pickling.py +0 -0
  194. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_listing.py +0 -0
  195. {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_metrics.py +0 -0
  196. {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class.py +0 -0
  197. {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class_parallel.py +0 -0
  198. {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  199. {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/name_len_slow.py +0 -0
  200. {datachain-0.3.11 → datachain-0.3.13}/tests/test_cli_e2e.py +0 -0
  201. {datachain-0.3.11 → datachain-0.3.13}/tests/test_query_e2e.py +0 -0
  202. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/__init__.py +0 -0
  203. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/__init__.py +0 -0
  204. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/conftest.py +0 -0
  205. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_arrow.py +0 -0
  206. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_clip.py +0 -0
  207. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  208. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain_merge.py +0 -0
  209. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_feature.py +0 -0
  210. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_feature_utils.py +0 -0
  211. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_hf.py +0 -0
  212. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_image.py +0 -0
  213. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_schema.py +0 -0
  214. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_signal_schema.py +0 -0
  215. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_sql_to_python.py +0 -0
  216. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_text.py +0 -0
  217. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_udf_signature.py +0 -0
  218. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_utils.py +0 -0
  219. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_webdataset.py +0 -0
  220. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/__init__.py +0 -0
  221. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/sqlite/__init__.py +0 -0
  222. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/sqlite/test_utils.py +0 -0
  223. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_array.py +0 -0
  224. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_conditional.py +0 -0
  225. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_path.py +0 -0
  226. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_random.py +0 -0
  227. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_selectable.py +0 -0
  228. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_string.py +0 -0
  229. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_asyn.py +0 -0
  230. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_catalog.py +0 -0
  231. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_catalog_loader.py +0 -0
  232. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_cli_parsing.py +0 -0
  233. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_client.py +0 -0
  234. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_database_engine.py +0 -0
  235. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_dispatch.py +0 -0
  236. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_fileslice.py +0 -0
  237. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_id_generator.py +0 -0
  238. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_listing.py +0 -0
  239. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_metastore.py +0 -0
  240. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_module_exports.py +0 -0
  241. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_query_metrics.py +0 -0
  242. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_query_params.py +0 -0
  243. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_serializer.py +0 -0
  244. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_session.py +0 -0
  245. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_storage.py +0 -0
  246. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_utils.py +0 -0
  247. {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.6.3'
27
+ rev: 'v0.6.4'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.11
3
+ Version: 0.3.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -71,7 +71,6 @@ Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
71
71
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
72
72
  Requires-Dist: pytest-servers[all]>=0.5.5; extra == "tests"
73
73
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
74
- Requires-Dist: pytest-asyncio>=0.23.2; extra == "tests"
75
74
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
76
75
  Requires-Dist: virtualenv; extra == "tests"
77
76
  Requires-Dist: dulwich; extra == "tests"
@@ -96,12 +95,14 @@ Requires-Dist: unstructured[pdf]; extra == "examples"
96
95
  Requires-Dist: pdfplumber==0.11.4; extra == "examples"
97
96
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
98
97
 
99
- .. image:: docs/assets/datachain_logotype.svg
100
- :height: 48
101
- :alt: DataChain logo
98
+ ================
99
+ |logo| DataChain
100
+ ================
102
101
 
103
102
  |PyPI| |Python Version| |Codecov| |Tests|
104
103
 
104
+ .. |logo| image:: docs/assets/datachain.svg
105
+ :height: 24
105
106
  .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
106
107
  :target: https://pypi.org/project/datachain/
107
108
  :alt: PyPI
@@ -115,8 +116,6 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
115
116
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
116
117
  :alt: Tests
117
118
 
118
- ----------------
119
-
120
119
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
121
120
  It is made to organize your unstructured data into datasets and wrangle it at scale on
122
121
  your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
@@ -1,9 +1,11 @@
1
- .. image:: docs/assets/datachain_logotype.svg
2
- :height: 48
3
- :alt: DataChain logo
1
+ ================
2
+ |logo| DataChain
3
+ ================
4
4
 
5
5
  |PyPI| |Python Version| |Codecov| |Tests|
6
6
 
7
+ .. |logo| image:: docs/assets/datachain.svg
8
+ :height: 24
7
9
  .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
8
10
  :target: https://pypi.org/project/datachain/
9
11
  :alt: PyPI
@@ -17,8 +19,6 @@
17
19
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
18
20
  :alt: Tests
19
21
 
20
- ----------------
21
-
22
22
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
23
23
  It is made to organize your unstructured data into datasets and wrangle it at scale on
24
24
  your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
@@ -0,0 +1 @@
1
+ <svg width="180" height="33" fill="none" xmlns="http://www.w3.org/2000/svg"><style>.prefix__logo-fill{fill:#fff}</style><path fill-rule="evenodd" clip-rule="evenodd" d="M23.997 24.53l2.34-2.342 3.14 3.135-2.357 2.342a5.533 5.533 0 01-7.822 0l-4.704-4.7a5.536 5.536 0 010-7.823l4.76-4.763 3.124 3.14-4.745 4.747a1.106 1.106 0 000 1.57l4.699 4.694a1.107 1.107 0 001.565 0z" fill="url(#prefix__paint0_linear_449_28)"/><path fill-rule="evenodd" clip-rule="evenodd" d="M37.733 10.65a1.184 1.184 0 01-.234.357L26.253 22.255l3.13 3.135 11.234-11.242a5.536 5.536 0 000-7.824l-4.699-4.705a5.534 5.534 0 00-7.822 0l-3.278 3.263 3.134 3.135 3.268-3.268a1.107 1.107 0 011.564 0l4.694 4.694a1.108 1.108 0 01.244 1.208h.011z" fill="url(#prefix__paint1_linear_449_28)"/><path d="M24.54 14.722L22.2 17.063v.016l-2.405 2.388 3.14 3.134 4.741-4.75a5.534 5.534 0 000-7.822l-4.704-4.704a5.535 5.535 0 00-7.824 0l-5.955 5.954 3.14 3.13 5.944-5.945a1.107 1.107 0 011.565 0l4.7 4.694a1.107 1.107 0 010 1.564z" fill="url(#prefix__paint2_linear_449_28)"/><path d="M4.514 22.335c.054-.133.139-.256.24-.357L7.1 19.632l-.005-.011 3.14-3.129 2.147-2.135-3.135-3.14-7.629 7.638a5.534 5.534 0 000 7.822l4.705 4.704a5.536 5.536 0 007.824 0l3.175-3.18-3.134-3.13-3.165 3.165a1.106 1.106 0 01-1.57 0l-4.7-4.693a1.107 1.107 0 01-.24-1.208z" fill="url(#prefix__paint3_linear_449_28)"/><path d="M55.645 26.613c-.994 0-1.908-.182-2.745-.547a6.407 6.407 0 01-2.169-1.538 7.037 7.037 0 01-1.41-2.294 8.126 8.126 0 01-.497-2.867v-.547c0-1.008.157-1.955.47-2.841a7.478 7.478 0 011.36-2.32 6.201 6.201 0 012.116-1.538c.836-.382 1.76-.573 2.77-.573 1.115 0 2.09.243 2.927.73.854.469 1.533 1.181 2.038 2.137.506.956.784 2.155.837 3.597L60.27 16.76V7.117h3.633v19.027h-2.875v-6.02h.627c-.052 1.441-.348 2.649-.888 3.622-.54.956-1.255 1.677-2.143 2.163-.871.47-1.864.704-2.98.704zm.81-3.05c.714 0 1.367-.156 1.96-.469.592-.33 1.063-.799 1.41-1.407.367-.626.55-1.355.55-2.19v-1.042c0-.834-.183-1.53-.55-2.085a3.572 3.572 0 00-1.436-1.303 4.078 4.078 0 00-1.934-.47c-.784 0-1.481.192-2.091.574-.592.365-1.063.886-1.411 1.564-.331.678-.497 1.468-.497 2.372 0 .903.174 1.694.523 2.372.348.66.819 1.172 1.411 1.537.61.365 1.298.548 2.065.548zM76.635 26.144v-4.196h-.6v-4.666c0-.817-.201-1.425-.602-1.824-.4-.4-1.019-.6-1.855-.6a68.629 68.629 0 00-3.423.104c-.61.018-1.16.044-1.647.079v-3.076c.4-.035.854-.07 1.359-.104.505-.035 1.02-.052 1.542-.052.54-.018 1.045-.026 1.515-.026 1.464 0 2.675.19 3.633.573.976.382 1.707.982 2.195 1.799.505.816.758 1.885.758 3.205v8.784h-2.875zm-4.573.365c-1.028 0-1.934-.183-2.718-.547a4.274 4.274 0 01-1.803-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.252-1.894.758-2.555.522-.66 1.245-1.155 2.169-1.485.94-.33 2.038-.495 3.293-.495h3.292v2.163h-3.345c-.836 0-1.48.208-1.934.625-.435.4-.653.921-.653 1.564s.218 1.164.653 1.564c.453.4 1.098.6 1.934.6.505 0 .967-.087 1.385-.261a2.413 2.413 0 001.072-.938c.296-.452.462-1.06.496-1.825l.889 1.017c-.087.99-.331 1.824-.732 2.502a3.899 3.899 0 01-1.62 1.564c-.68.347-1.516.52-2.509.52zM89.569 26.326c-1.307 0-2.387-.165-3.24-.495a3.635 3.635 0 01-1.882-1.72c-.419-.817-.628-1.911-.628-3.284l.026-12.824h3.398l-.026 13.058c0 .695.183 1.234.548 1.616.384.365.924.548 1.62.548h2.222v3.101h-2.038zM81.572 14.65V11.99h10.035v2.659H81.572zM103.203 26.144v-4.196h-.601v-4.666c0-.817-.201-1.425-.601-1.824-.401-.4-1.02-.6-1.856-.6a68.629 68.629 0 00-3.423.104c-.61.018-1.159.044-1.647.079v-3.076c.4-.035.854-.07 1.36-.104.504-.035 1.018-.052 1.541-.052.54-.018 1.045-.026 1.516-.026 1.463 0 2.674.19 3.632.573.976.382 1.708.982 2.196 1.799.505.816.757 1.885.757 3.205v8.784h-2.874zm-4.574.365c-1.028 0-1.934-.183-2.718-.547a4.274 4.274 0 01-1.803-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.253-1.894.758-2.555.523-.66 1.246-1.155 2.169-1.485.94-.33 2.038-.495 3.293-.495h3.293v2.163h-3.345c-.837 0-1.481.208-1.934.625-.436.4-.654.921-.654 1.564s.218 1.164.654 1.564c.453.4 1.097.6 1.934.6.505 0 .966-.087 1.385-.261a2.417 2.417 0 001.071-.938c.296-.452.462-1.06.497-1.825l.888 1.017c-.087.99-.331 1.824-.732 2.502a3.897 3.897 0 01-1.62 1.564c-.679.347-1.516.52-2.509.52zM116.267 26.64c-1.237 0-2.309-.21-3.215-.626a6.773 6.773 0 01-2.247-1.668A7.117 7.117 0 01109.472 22a8.19 8.19 0 01-.444-2.659v-.495c0-.956.148-1.868.444-2.737a6.905 6.905 0 011.385-2.346 6.488 6.488 0 012.247-1.642c.906-.417 1.952-.625 3.136-.625 1.237 0 2.344.243 3.319.73.976.469 1.751 1.13 2.326 1.98.593.852.924 1.843.993 2.972h-3.528a2.824 2.824 0 00-.941-1.825c-.522-.486-1.245-.73-2.169-.73-.801 0-1.472.192-2.012.574-.523.382-.915.912-1.176 1.59-.261.66-.392 1.425-.392 2.294 0 .834.122 1.59.366 2.267.261.678.653 1.208 1.176 1.59.54.382 1.228.574 2.065.574.627 0 1.167-.114 1.62-.34.453-.225.81-.538 1.071-.938.279-.4.453-.851.523-1.355h3.528c-.07 1.147-.409 2.155-1.019 3.023-.593.852-1.385 1.52-2.378 2.007-.976.487-2.091.73-3.345.73zM125.919 26.144V7.117h3.633v11.104h-.628c0-1.425.183-2.633.549-3.623.366-.99.906-1.747 1.62-2.268.732-.521 1.656-.782 2.771-.782h.156c1.621 0 2.849.556 3.685 1.668.836 1.112 1.255 2.728 1.255 4.848v8.08h-3.633v-8.419c0-.903-.261-1.616-.784-2.137-.505-.521-1.176-.782-2.012-.782-.889 0-1.612.296-2.169.886-.54.574-.81 1.33-.81 2.268v8.184h-3.633zM151.463 26.144v-4.196h-.601v-4.666c0-.817-.201-1.425-.601-1.824-.401-.4-1.02-.6-1.856-.6a68.524 68.524 0 00-3.423.104c-.61.018-1.159.044-1.647.079v-3.076c.401-.035.854-.07 1.359-.104a22.491 22.491 0 011.542-.052c.54-.018 1.045-.026 1.516-.026 1.463 0 2.674.19 3.632.573.976.382 1.708.982 2.196 1.799.505.816.757 1.885.757 3.205v8.784h-2.874zm-4.574.365c-1.027 0-1.933-.183-2.717-.547a4.277 4.277 0 01-1.804-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.253-1.894.758-2.555.523-.66 1.246-1.155 2.169-1.485.941-.33 2.038-.495 3.293-.495h3.293v2.163h-3.345c-.837 0-1.481.208-1.934.625-.436.4-.654.921-.654 1.564s.218 1.164.654 1.564c.453.4 1.097.6 1.934.6.505 0 .967-.087 1.385-.261a2.417 2.417 0 001.071-.938c.296-.452.462-1.06.497-1.825l.888 1.017c-.087.99-.331 1.824-.731 2.502a3.905 3.905 0 01-1.621 1.564c-.679.347-1.516.52-2.509.52zM158.908 26.144V11.99h3.632v14.153h-3.632zm-1.986-11.442v-2.71h5.618v2.71h-5.618zm3.319-4.405c-.715 0-1.246-.183-1.594-.547-.331-.383-.497-.86-.497-1.434 0-.573.166-1.042.497-1.407.348-.365.879-.548 1.594-.548.714 0 1.237.183 1.568.548.331.365.496.834.496 1.407 0 .574-.165 1.051-.496 1.434-.331.364-.854.547-1.568.547zM166.727 26.144V11.99h2.875v6.073h-.262c0-1.442.192-2.641.575-3.597.384-.973.95-1.703 1.699-2.19.766-.486 1.716-.729 2.848-.729h.157c1.69 0 2.971.547 3.842 1.642.871 1.077 1.307 2.693 1.307 4.848v8.106h-3.633v-8.419c0-.869-.253-1.572-.758-2.11-.488-.54-1.167-.809-2.038-.809-.889 0-1.612.278-2.169.834-.54.539-.811 1.269-.811 2.19v8.314h-3.632z" class="prefix__logo-fill"/><defs><linearGradient id="prefix__paint0_linear_449_28" x1="36.032" y1="5.404" x2="18.067" y2="23.054" gradientUnits="userSpaceOnUse"><stop stop-color="#F46837"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint1_linear_449_28" x1="36.045" y1="5.607" x2="18.067" y2="23.363" gradientUnits="userSpaceOnUse"><stop stop-color="#F46837"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint2_linear_449_28" x1="5.924" y1="27.432" x2="23.883" y2="10.239" gradientUnits="userSpaceOnUse"><stop stop-color="#13ADC7"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint3_linear_449_28" x1="5.77" y1="27.586" x2="23.574" y2="9.776" gradientUnits="userSpaceOnUse"><stop stop-color="#13ADC7"/><stop offset="1" stop-color="#945DD6"/></linearGradient></defs></svg>
@@ -5,6 +5,10 @@ To install dependencies:
5
5
 
6
6
  """
7
7
 
8
+ import os
9
+
10
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
+
8
12
  import open_clip
9
13
 
10
14
  from datachain import C, DataChain, Mapper
@@ -16,7 +16,7 @@ NPZ_METADATA = os.getenv(
16
16
  )
17
17
 
18
18
  wds_images = (
19
- DataChain.from_storage(IMAGE_TARS)
19
+ DataChain.from_storage(IMAGE_TARS, type="image")
20
20
  .settings(cache=True)
21
21
  .gen(laion=process_webdataset(spec=WDSLaion), params="file")
22
22
  )
@@ -1,5 +1,5 @@
1
1
  site_name: ''
2
- site_url: https://datachain.dvc.ai
2
+ site_url: https://docs.datachain.ai
3
3
  site_description: Wrangle unstructured AI data at scale
4
4
 
5
5
  repo_url: "https://github.com/iterative/datachain"
@@ -15,7 +15,7 @@ validation:
15
15
 
16
16
  theme:
17
17
  name: material
18
- logo: assets/datachain_logotype.svg
18
+ logo: assets/datachain-white.svg
19
19
  favicon: assets/datachain.svg
20
20
  icon:
21
21
  repo: fontawesome/brands/github
@@ -82,7 +82,6 @@ tests = [
82
82
  "pytest-mock>=3.12.0",
83
83
  "pytest-servers[all]>=0.5.5",
84
84
  "pytest-benchmark[histogram]",
85
- "pytest-asyncio>=0.23.2",
86
85
  "pytest-xdist>=3.3.1",
87
86
  "virtualenv",
88
87
  "dulwich",
@@ -136,13 +135,16 @@ markers = [
136
135
  "llm_and_nlp: LLM and NLP examples",
137
136
  "multimodal: Multimodal examples"
138
137
  ]
139
- asyncio_mode = "auto"
140
138
  filterwarnings = [
141
139
  "error::pandas.errors.PerformanceWarning",
142
140
  "error::pydantic.warnings.PydanticDeprecatedSince20",
143
141
  "error::pytest_mock.PytestMockWarning",
144
142
  "error::pytest.PytestCollectionWarning",
145
143
  "error::sqlalchemy.exc.SADeprecationWarning",
144
+ "ignore::DeprecationWarning:timm.*",
145
+ "ignore::DeprecationWarning:botocore.auth",
146
+ "ignore::DeprecationWarning:datasets.utils._dill",
147
+ "ignore::DeprecationWarning:librosa.core.intervals",
146
148
  "ignore:Field name .* shadows an attribute in parent:UserWarning" # datachain.lib.feature
147
149
  ]
148
150
 
@@ -29,7 +29,6 @@ class UniqueId:
29
29
  etag: str
30
30
  version: str = ""
31
31
  is_latest: bool = True
32
- vtype: str = ""
33
32
  location: Optional[str] = None
34
33
  last_modified: datetime = TIME_ZERO
35
34
 
@@ -12,7 +12,6 @@ import sys
12
12
  import time
13
13
  import traceback
14
14
  from collections.abc import Iterable, Iterator, Mapping, Sequence
15
- from contextlib import contextmanager, nullcontext
16
15
  from copy import copy
17
16
  from dataclasses import dataclass
18
17
  from functools import cached_property, reduce
@@ -23,7 +22,6 @@ from typing import (
23
22
  TYPE_CHECKING,
24
23
  Any,
25
24
  Callable,
26
- NamedTuple,
27
25
  NoReturn,
28
26
  Optional,
29
27
  Union,
@@ -58,14 +56,13 @@ from datachain.error import (
58
56
  PendingIndexingError,
59
57
  QueryScriptCancelError,
60
58
  QueryScriptCompileError,
61
- QueryScriptDatasetNotFound,
62
59
  QueryScriptRunError,
63
60
  )
64
61
  from datachain.listing import Listing
65
62
  from datachain.node import DirType, Node, NodeWithPath
66
63
  from datachain.nodes_thread_pool import NodesThreadPool
67
64
  from datachain.remote.studio import StudioClient
68
- from datachain.sql.types import JSON, Boolean, DateTime, Int, Int64, SQLType, String
65
+ from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
69
66
  from datachain.storage import Storage, StorageStatus, StorageURI
70
67
  from datachain.utils import (
71
68
  DataChainDir,
@@ -115,44 +112,19 @@ def noop(_: str):
115
112
  pass
116
113
 
117
114
 
118
- @contextmanager
119
- def print_and_capture(
120
- stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
121
- ) -> "Iterator[list[str]]":
122
- lines: list[str] = []
123
- append = lines.append
115
+ def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
116
+ buffer = b""
117
+ while byt := stream.read(1): # Read one byte at a time
118
+ buffer += byt
124
119
 
125
- def loop() -> None:
126
- buffer = b""
127
- while byt := stream.read(1): # Read one byte at a time
128
- buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
129
-
130
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
131
- line = buffer.decode("utf-8")
132
- print(line, end="")
133
- callback(line)
134
- append(line)
135
- buffer = b"" # Clear buffer for next line
136
-
137
- if buffer: # Handle any remaining data in the buffer
120
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
138
121
  line = buffer.decode("utf-8")
139
- print(line, end="")
140
122
  callback(line)
141
- append(line)
142
-
143
- thread = Thread(target=loop, daemon=True)
144
- thread.start()
145
-
146
- try:
147
- yield lines
148
- finally:
149
- thread.join()
150
-
123
+ buffer = b"" # Clear buffer for next line
151
124
 
152
- class QueryResult(NamedTuple):
153
- dataset: Optional[DatasetRecord]
154
- version: Optional[int]
155
- output: str
125
+ if buffer: # Handle any remaining data in the buffer
126
+ line = buffer.decode("utf-8")
127
+ callback(line)
156
128
 
157
129
 
158
130
  class DatasetRowsFetcher(NodesThreadPool):
@@ -541,8 +513,6 @@ def find_column_to_str( # noqa: PLR0911
541
513
  )
542
514
  if column == "name":
543
515
  return posixpath.basename(row[field_lookup["path"]]) or ""
544
- if column == "owner":
545
- return row[field_lookup["owner_name"]] or ""
546
516
  if column == "path":
547
517
  is_dir = row[field_lookup["dir_type"]] == DirType.DIR
548
518
  path = row[field_lookup["path"]]
@@ -651,11 +621,6 @@ class Catalog:
651
621
  code_ast.body[-1:] = new_expressions
652
622
  return code_ast
653
623
 
654
- def compile_query_script(self, script: str) -> str:
655
- code_ast = ast.parse(script)
656
- code_ast = self.attach_query_wrapper(code_ast)
657
- return ast.unparse(code_ast)
658
-
659
624
  def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
660
625
  config = config or self.client_config
661
626
  return Client.parse_url(uri, self.cache, **config)
@@ -699,16 +664,12 @@ class Catalog:
699
664
  source_metastore = self.metastore.clone(client.uri)
700
665
 
701
666
  columns = [
702
- Column("vtype", String),
703
- Column("dir_type", Int),
704
667
  Column("path", String),
705
668
  Column("etag", String),
706
669
  Column("version", String),
707
670
  Column("is_latest", Boolean),
708
671
  Column("last_modified", DateTime(timezone=True)),
709
672
  Column("size", Int64),
710
- Column("owner_name", String),
711
- Column("owner_id", String),
712
673
  Column("location", JSON),
713
674
  Column("source", String),
714
675
  ]
@@ -1549,7 +1510,6 @@ class Catalog:
1549
1510
  row["etag"],
1550
1511
  row["version"],
1551
1512
  row["is_latest"],
1552
- row["vtype"],
1553
1513
  row["location"],
1554
1514
  row["last_modified"],
1555
1515
  )
@@ -1805,14 +1765,15 @@ class Catalog:
1805
1765
  def query(
1806
1766
  self,
1807
1767
  query_script: str,
1808
- envs: Optional[Mapping[str, str]] = None,
1809
- python_executable: Optional[str] = None,
1768
+ env: Optional[Mapping[str, str]] = None,
1769
+ python_executable: str = sys.executable,
1810
1770
  save: bool = False,
1811
1771
  capture_output: bool = True,
1812
1772
  output_hook: Callable[[str], None] = noop,
1813
1773
  params: Optional[dict[str, str]] = None,
1814
1774
  job_id: Optional[str] = None,
1815
- ) -> QueryResult:
1775
+ _execute_last_expression: bool = False,
1776
+ ) -> None:
1816
1777
  """
1817
1778
  Method to run custom user Python script to run a query and, as result,
1818
1779
  creates new dataset from the results of a query.
@@ -1835,92 +1796,21 @@ class Catalog:
1835
1796
  C.size > 1000
1836
1797
  )
1837
1798
  """
1838
- if not job_id:
1839
- python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
1840
- job_id = self.metastore.create_job(
1841
- name="",
1842
- query=query_script,
1843
- params=params,
1844
- python_version=python_version,
1845
- )
1846
-
1847
- lines, proc = self.run_query(
1848
- python_executable or sys.executable,
1849
- query_script,
1850
- envs,
1851
- capture_output,
1852
- output_hook,
1853
- params,
1854
- save,
1855
- job_id,
1856
- )
1857
- output = "".join(lines)
1858
-
1859
- if proc.returncode:
1860
- if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1861
- raise QueryScriptCancelError(
1862
- "Query script was canceled by user",
1863
- return_code=proc.returncode,
1864
- output=output,
1865
- )
1866
- raise QueryScriptRunError(
1867
- f"Query script exited with error code {proc.returncode}",
1868
- return_code=proc.returncode,
1869
- output=output,
1870
- )
1871
-
1872
- def _get_dataset_versions_by_job_id():
1873
- for dr, dv, job in self.list_datasets_versions():
1874
- if job and str(job.id) == job_id:
1875
- yield dr, dv
1876
-
1877
- try:
1878
- dr, dv = max(
1879
- _get_dataset_versions_by_job_id(), key=lambda x: x[1].created_at
1880
- )
1881
- except ValueError as e:
1882
- if not save:
1883
- return QueryResult(dataset=None, version=None, output=output)
1884
-
1885
- raise QueryScriptDatasetNotFound(
1886
- "No dataset found after running Query script",
1887
- output=output,
1888
- ) from e
1889
-
1890
- dr = self.update_dataset(
1891
- dr,
1892
- script_output=output,
1893
- query_script=query_script,
1894
- )
1895
- self.update_dataset_version_with_warehouse_info(
1896
- dr,
1897
- dv.version,
1898
- script_output=output,
1899
- query_script=query_script,
1900
- job_id=job_id,
1901
- is_job_result=True,
1902
- )
1903
- return QueryResult(dataset=dr, version=dv.version, output=output)
1799
+ if _execute_last_expression:
1800
+ try:
1801
+ code_ast = ast.parse(query_script)
1802
+ code_ast = self.attach_query_wrapper(code_ast)
1803
+ query_script_compiled = ast.unparse(code_ast)
1804
+ except Exception as exc:
1805
+ raise QueryScriptCompileError(
1806
+ f"Query script failed to compile, reason: {exc}"
1807
+ ) from exc
1808
+ else:
1809
+ query_script_compiled = query_script
1810
+ assert not save
1904
1811
 
1905
- def run_query(
1906
- self,
1907
- python_executable: str,
1908
- query_script: str,
1909
- envs: Optional[Mapping[str, str]],
1910
- capture_output: bool,
1911
- output_hook: Callable[[str], None],
1912
- params: Optional[dict[str, str]],
1913
- save: bool,
1914
- job_id: Optional[str],
1915
- ) -> tuple[list[str], subprocess.Popen]:
1916
- try:
1917
- query_script_compiled = self.compile_query_script(query_script)
1918
- except Exception as exc:
1919
- raise QueryScriptCompileError(
1920
- f"Query script failed to compile, reason: {exc}"
1921
- ) from exc
1922
- envs = dict(envs or os.environ)
1923
- envs.update(
1812
+ env = dict(env or os.environ)
1813
+ env.update(
1924
1814
  {
1925
1815
  "DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
1926
1816
  "PYTHONPATH": os.getcwd(), # For local imports
@@ -1929,19 +1819,28 @@ class Catalog:
1929
1819
  "DATACHAIN_JOB_ID": job_id or "",
1930
1820
  },
1931
1821
  )
1932
- with subprocess.Popen( # noqa: S603
1933
- [python_executable, "-c", query_script_compiled],
1934
- env=envs,
1935
- stdout=subprocess.PIPE if capture_output else None,
1936
- stderr=subprocess.STDOUT if capture_output else None,
1937
- bufsize=1,
1938
- text=False,
1939
- ) as proc:
1940
- out = proc.stdout
1941
- _lines: list[str] = []
1942
- ctx = print_and_capture(out, output_hook) if out else nullcontext(_lines)
1943
- with ctx as lines:
1944
- return lines, proc
1822
+ popen_kwargs = {}
1823
+ if capture_output:
1824
+ popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1825
+
1826
+ cmd = [python_executable, "-c", query_script_compiled]
1827
+ with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
1828
+ if capture_output:
1829
+ args = (proc.stdout, output_hook)
1830
+ thread = Thread(target=_process_stream, args=args, daemon=True)
1831
+ thread.start()
1832
+ thread.join() # wait for the reader thread
1833
+
1834
+ if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1835
+ raise QueryScriptCancelError(
1836
+ "Query script was canceled by user",
1837
+ return_code=proc.returncode,
1838
+ )
1839
+ if proc.returncode:
1840
+ raise QueryScriptRunError(
1841
+ f"Query script exited with error code {proc.returncode}",
1842
+ return_code=proc.returncode,
1843
+ )
1945
1844
 
1946
1845
  def cp(
1947
1846
  self,
@@ -2081,8 +1980,6 @@ class Catalog:
2081
1980
  field_set.add("path")
2082
1981
  elif column == "name":
2083
1982
  field_set.add("path")
2084
- elif column == "owner":
2085
- field_set.add("owner_name")
2086
1983
  elif column == "path":
2087
1984
  field_set.add("dir_type")
2088
1985
  field_set.add("path")
@@ -24,7 +24,7 @@ logger = logging.getLogger("datachain")
24
24
 
25
25
  TTL_HUMAN = "4h"
26
26
  TTL_INT = 4 * 60 * 60
27
- FIND_COLUMNS = ["du", "name", "owner", "path", "size", "type"]
27
+ FIND_COLUMNS = ["du", "name", "path", "size", "type"]
28
28
 
29
29
 
30
30
  def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
@@ -579,9 +579,8 @@ def _node_data_to_ls_values(row, long_format=False):
579
579
  value = name + ending
580
580
  if long_format:
581
581
  last_modified = row[2]
582
- owner_name = row[3]
583
582
  timestamp = last_modified if not is_dir else None
584
- return long_line_str(value, timestamp, owner_name)
583
+ return long_line_str(value, timestamp)
585
584
  return value
586
585
 
587
586
 
@@ -599,7 +598,7 @@ def _ls_urls_flat(
599
598
  if client_cls.is_root_url(source):
600
599
  buckets = client_cls.ls_buckets(**catalog.client_config)
601
600
  if long:
602
- values = (long_line_str(b.name, b.created, "") for b in buckets)
601
+ values = (long_line_str(b.name, b.created) for b in buckets)
603
602
  else:
604
603
  values = (b.name for b in buckets)
605
604
  yield source, values
@@ -607,7 +606,7 @@ def _ls_urls_flat(
607
606
  found = False
608
607
  fields = ["name", "dir_type"]
609
608
  if long:
610
- fields.extend(["last_modified", "owner_name"])
609
+ fields.append("last_modified")
611
610
  for data_source, results in catalog.ls([source], fields=fields, **kwargs):
612
611
  values = (_node_data_to_ls_values(r, long) for r in results)
613
612
  found = True
@@ -683,7 +682,6 @@ def ls_remote(
683
682
  entry = long_line_str(
684
683
  row["name"] + ("/" if row["dir_type"] else ""),
685
684
  row["last_modified"],
686
- row["owner_name"],
687
685
  )
688
686
  print(format_ls_entry(entry))
689
687
  else:
@@ -363,7 +363,6 @@ class Client(ABC):
363
363
  parent["path"],
364
364
  parent["size"],
365
365
  parent["etag"],
366
- vtype=parent["vtype"],
367
366
  location=parent["location"],
368
367
  )
369
368
  f = self.open_object(parent_uid, use_cache=use_cache)
@@ -119,8 +119,6 @@ class ClientS3(Client):
119
119
  is_latest=v.get("IsLatest", True),
120
120
  last_modified=v.get("LastModified", ""),
121
121
  size=v["Size"],
122
- owner_name=v.get("Owner", {}).get("DisplayName", ""),
123
- owner_id=v.get("Owner", {}).get("ID", ""),
124
122
  )
125
123
 
126
124
  async def _fetch_dir(
@@ -165,8 +163,6 @@ class ClientS3(Client):
165
163
  is_latest=v.get("IsLatest", True),
166
164
  last_modified=v.get("LastModified", ""),
167
165
  size=v["size"],
168
- owner_name=v.get("Owner", {}).get("DisplayName", ""),
169
- owner_id=v.get("Owner", {}).get("ID", ""),
170
166
  )
171
167
 
172
168
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
@@ -10,9 +10,8 @@ from typing import (
10
10
 
11
11
  import sqlalchemy as sa
12
12
  from sqlalchemy.sql import func as f
13
- from sqlalchemy.sql.expression import null, true
13
+ from sqlalchemy.sql.expression import false, null, true
14
14
 
15
- from datachain.node import DirType
16
15
  from datachain.sql.functions import path
17
16
  from datachain.sql.types import Int, SQLType, UInt64
18
17
 
@@ -81,8 +80,7 @@ class DirExpansion:
81
80
  def base_select(q):
82
81
  return sa.select(
83
82
  q.c.sys__id,
84
- q.c.vtype,
85
- (q.c.dir_type == DirType.DIR).label("is_dir"),
83
+ false().label("is_dir"),
86
84
  q.c.source,
87
85
  q.c.path,
88
86
  q.c.version,
@@ -94,7 +92,6 @@ class DirExpansion:
94
92
  return (
95
93
  sa.select(
96
94
  f.min(q.c.sys__id).label("sys__id"),
97
- q.c.vtype,
98
95
  q.c.is_dir,
99
96
  q.c.source,
100
97
  q.c.path,
@@ -102,8 +99,8 @@ class DirExpansion:
102
99
  f.max(q.c.location).label("location"),
103
100
  )
104
101
  .select_from(q)
105
- .group_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
106
- .order_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
102
+ .group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
103
+ .order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
107
104
  )
108
105
 
109
106
  @classmethod
@@ -113,7 +110,6 @@ class DirExpansion:
113
110
  q = q.union_all(
114
111
  sa.select(
115
112
  sa.literal(-1).label("sys__id"),
116
- sa.literal("").label("vtype"),
117
113
  true().label("is_dir"),
118
114
  q.c.source,
119
115
  parent.label("path"),