datachain 0.3.18__tar.gz → 0.3.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (249) hide show
  1. {datachain-0.3.18 → datachain-0.3.19}/.github/workflows/benchmarks.yml +11 -8
  2. {datachain-0.3.18 → datachain-0.3.19}/.github/workflows/release.yml +6 -7
  3. {datachain-0.3.18 → datachain-0.3.19}/.github/workflows/tests-studio.yml +8 -5
  4. {datachain-0.3.18 → datachain-0.3.19}/.github/workflows/tests.yml +27 -18
  5. {datachain-0.3.18/src/datachain.egg-info → datachain-0.3.19}/PKG-INFO +1 -2
  6. {datachain-0.3.18 → datachain-0.3.19}/docs/references/file.md +2 -2
  7. {datachain-0.3.18 → datachain-0.3.19}/noxfile.py +2 -2
  8. {datachain-0.3.18 → datachain-0.3.19}/pyproject.toml +0 -1
  9. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/__init__.py +5 -2
  10. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/catalog/catalog.py +9 -79
  11. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/cli.py +0 -1
  12. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/dataset.py +7 -2
  13. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/error.py +6 -4
  14. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/arrow.py +8 -3
  15. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/dc.py +2 -2
  16. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/file.py +23 -5
  17. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/dataset.py +0 -22
  18. {datachain-0.3.18 → datachain-0.3.19/src/datachain.egg-info}/PKG-INFO +1 -2
  19. {datachain-0.3.18 → datachain-0.3.19}/src/datachain.egg-info/SOURCES.txt +1 -0
  20. {datachain-0.3.18 → datachain-0.3.19}/src/datachain.egg-info/requires.txt +0 -1
  21. {datachain-0.3.18 → datachain-0.3.19}/tests/conftest.py +1 -0
  22. {datachain-0.3.18 → datachain-0.3.19}/tests/examples/test_examples.py +38 -30
  23. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_catalog.py +2 -108
  24. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_datachain.py +43 -1
  25. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_dataset_query.py +6 -2
  26. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_datasets.py +6 -2
  27. datachain-0.3.19/tests/func/test_query.py +112 -0
  28. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_arrow.py +8 -9
  29. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_module_exports.py +2 -2
  30. datachain-0.3.19/tests/unit/test_query.py +65 -0
  31. datachain-0.3.18/tests/func/test_query.py +0 -182
  32. {datachain-0.3.18 → datachain-0.3.19}/.cruft.json +0 -0
  33. {datachain-0.3.18 → datachain-0.3.19}/.gitattributes +0 -0
  34. {datachain-0.3.18 → datachain-0.3.19}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  35. {datachain-0.3.18 → datachain-0.3.19}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  36. {datachain-0.3.18 → datachain-0.3.19}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  37. {datachain-0.3.18 → datachain-0.3.19}/.github/codecov.yaml +0 -0
  38. {datachain-0.3.18 → datachain-0.3.19}/.github/dependabot.yml +0 -0
  39. {datachain-0.3.18 → datachain-0.3.19}/.github/workflows/update-template.yaml +0 -0
  40. {datachain-0.3.18 → datachain-0.3.19}/.gitignore +0 -0
  41. {datachain-0.3.18 → datachain-0.3.19}/.pre-commit-config.yaml +0 -0
  42. {datachain-0.3.18 → datachain-0.3.19}/CODE_OF_CONDUCT.rst +0 -0
  43. {datachain-0.3.18 → datachain-0.3.19}/CONTRIBUTING.rst +0 -0
  44. {datachain-0.3.18 → datachain-0.3.19}/LICENSE +0 -0
  45. {datachain-0.3.18 → datachain-0.3.19}/README.rst +0 -0
  46. {datachain-0.3.18 → datachain-0.3.19}/docs/assets/captioned_cartoons.png +0 -0
  47. {datachain-0.3.18 → datachain-0.3.19}/docs/assets/datachain-white.svg +0 -0
  48. {datachain-0.3.18 → datachain-0.3.19}/docs/assets/datachain.svg +0 -0
  49. {datachain-0.3.18 → datachain-0.3.19}/docs/assets/flowchart.png +0 -0
  50. {datachain-0.3.18 → datachain-0.3.19}/docs/index.md +0 -0
  51. {datachain-0.3.18 → datachain-0.3.19}/docs/references/datachain.md +0 -0
  52. {datachain-0.3.18 → datachain-0.3.19}/docs/references/datatype.md +0 -0
  53. {datachain-0.3.18 → datachain-0.3.19}/docs/references/index.md +0 -0
  54. {datachain-0.3.18 → datachain-0.3.19}/docs/references/sql.md +0 -0
  55. {datachain-0.3.18 → datachain-0.3.19}/docs/references/torch.md +0 -0
  56. {datachain-0.3.18 → datachain-0.3.19}/docs/references/udf.md +0 -0
  57. {datachain-0.3.18 → datachain-0.3.19}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  58. {datachain-0.3.18 → datachain-0.3.19}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  59. {datachain-0.3.18 → datachain-0.3.19}/examples/computer_vision/openimage-detect.py +0 -0
  60. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/common_sql_functions.py +0 -0
  61. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/json-csv-reader.py +0 -0
  62. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/torch-loader.py +0 -0
  63. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/udfs/parallel.py +0 -0
  64. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/udfs/simple.py +0 -0
  65. {datachain-0.3.18 → datachain-0.3.19}/examples/get_started/udfs/stateful.py +0 -0
  66. {datachain-0.3.18 → datachain-0.3.19}/examples/llm_and_nlp/claude-query.py +0 -0
  67. {datachain-0.3.18 → datachain-0.3.19}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  68. {datachain-0.3.18 → datachain-0.3.19}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  69. {datachain-0.3.18 → datachain-0.3.19}/examples/multimodal/clip_inference.py +0 -0
  70. {datachain-0.3.18 → datachain-0.3.19}/examples/multimodal/hf_pipeline.py +0 -0
  71. {datachain-0.3.18 → datachain-0.3.19}/examples/multimodal/openai_image_desc_lib.py +0 -0
  72. {datachain-0.3.18 → datachain-0.3.19}/examples/multimodal/wds.py +0 -0
  73. {datachain-0.3.18 → datachain-0.3.19}/examples/multimodal/wds_filtered.py +0 -0
  74. {datachain-0.3.18 → datachain-0.3.19}/mkdocs.yml +0 -0
  75. {datachain-0.3.18 → datachain-0.3.19}/setup.cfg +0 -0
  76. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/__main__.py +0 -0
  77. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/asyn.py +0 -0
  78. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/cache.py +0 -0
  79. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/catalog/__init__.py +0 -0
  80. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/catalog/datasource.py +0 -0
  81. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/catalog/loader.py +0 -0
  82. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/cli_utils.py +0 -0
  83. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/__init__.py +0 -0
  84. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/azure.py +0 -0
  85. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/fileslice.py +0 -0
  86. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/fsspec.py +0 -0
  87. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/gcs.py +0 -0
  88. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/hf.py +0 -0
  89. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/local.py +0 -0
  90. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/client/s3.py +0 -0
  91. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/config.py +0 -0
  92. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/__init__.py +0 -0
  93. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/db_engine.py +0 -0
  94. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/id_generator.py +0 -0
  95. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/job.py +0 -0
  96. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/metastore.py +0 -0
  97. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/schema.py +0 -0
  98. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/serializer.py +0 -0
  99. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/sqlite.py +0 -0
  100. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/data_storage/warehouse.py +0 -0
  101. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/job.py +0 -0
  102. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/__init__.py +0 -0
  103. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/clip.py +0 -0
  104. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/__init__.py +0 -0
  105. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/flatten.py +0 -0
  106. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/python_to_sql.py +0 -0
  107. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/sql_to_python.py +0 -0
  108. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/unflatten.py +0 -0
  109. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  110. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/data_model.py +0 -0
  111. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/dataset_info.py +0 -0
  112. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/hf.py +0 -0
  113. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/image.py +0 -0
  114. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/listing.py +0 -0
  115. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/listing_info.py +0 -0
  116. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/meta_formats.py +0 -0
  117. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/model_store.py +0 -0
  118. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/pytorch.py +0 -0
  119. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/settings.py +0 -0
  120. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/signal_schema.py +0 -0
  121. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/tar.py +0 -0
  122. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/text.py +0 -0
  123. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/udf.py +0 -0
  124. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/udf_signature.py +0 -0
  125. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/utils.py +0 -0
  126. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/vfile.py +0 -0
  127. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/webdataset.py +0 -0
  128. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/lib/webdataset_laion.py +0 -0
  129. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/listing.py +0 -0
  130. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/node.py +0 -0
  131. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/nodes_fetcher.py +0 -0
  132. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/nodes_thread_pool.py +0 -0
  133. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/progress.py +0 -0
  134. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/py.typed +0 -0
  135. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/__init__.py +0 -0
  136. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/batch.py +0 -0
  137. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/dispatch.py +0 -0
  138. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/metrics.py +0 -0
  139. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/params.py +0 -0
  140. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/queue.py +0 -0
  141. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/schema.py +0 -0
  142. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/session.py +0 -0
  143. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/query/udf.py +0 -0
  144. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/remote/__init__.py +0 -0
  145. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/remote/studio.py +0 -0
  146. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/__init__.py +0 -0
  147. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/default/__init__.py +0 -0
  148. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/default/base.py +0 -0
  149. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/__init__.py +0 -0
  150. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/array.py +0 -0
  151. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/conditional.py +0 -0
  152. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/path.py +0 -0
  153. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/random.py +0 -0
  154. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/functions/string.py +0 -0
  155. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/selectable.py +0 -0
  156. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/sqlite/__init__.py +0 -0
  157. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/sqlite/base.py +0 -0
  158. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/sqlite/types.py +0 -0
  159. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/sqlite/vector.py +0 -0
  160. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/types.py +0 -0
  161. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/sql/utils.py +0 -0
  162. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/storage.py +0 -0
  163. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/telemetry.py +0 -0
  164. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/torch/__init__.py +0 -0
  165. {datachain-0.3.18 → datachain-0.3.19}/src/datachain/utils.py +0 -0
  166. {datachain-0.3.18 → datachain-0.3.19}/src/datachain.egg-info/dependency_links.txt +0 -0
  167. {datachain-0.3.18 → datachain-0.3.19}/src/datachain.egg-info/entry_points.txt +0 -0
  168. {datachain-0.3.18 → datachain-0.3.19}/src/datachain.egg-info/top_level.txt +0 -0
  169. {datachain-0.3.18 → datachain-0.3.19}/tests/__init__.py +0 -0
  170. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/__init__.py +0 -0
  171. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/conftest.py +0 -0
  172. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  173. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/datasets/.dvc/config +0 -0
  174. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/datasets/.gitignore +0 -0
  175. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  176. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/test_datachain.py +0 -0
  177. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/test_ls.py +0 -0
  178. {datachain-0.3.18 → datachain-0.3.19}/tests/benchmarks/test_version.py +0 -0
  179. {datachain-0.3.18 → datachain-0.3.19}/tests/data.py +0 -0
  180. {datachain-0.3.18 → datachain-0.3.19}/tests/examples/__init__.py +0 -0
  181. {datachain-0.3.18 → datachain-0.3.19}/tests/examples/test_wds_e2e.py +0 -0
  182. {datachain-0.3.18 → datachain-0.3.19}/tests/examples/wds_data.py +0 -0
  183. {datachain-0.3.18 → datachain-0.3.19}/tests/func/__init__.py +0 -0
  184. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_client.py +0 -0
  185. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_feature_pickling.py +0 -0
  186. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_listing.py +0 -0
  187. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_ls.py +0 -0
  188. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_meta_formats.py +0 -0
  189. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_metrics.py +0 -0
  190. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_pull.py +0 -0
  191. {datachain-0.3.18 → datachain-0.3.19}/tests/func/test_pytorch.py +0 -0
  192. {datachain-0.3.18 → datachain-0.3.19}/tests/scripts/feature_class.py +0 -0
  193. {datachain-0.3.18 → datachain-0.3.19}/tests/scripts/feature_class_parallel.py +0 -0
  194. {datachain-0.3.18 → datachain-0.3.19}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  195. {datachain-0.3.18 → datachain-0.3.19}/tests/scripts/name_len_slow.py +0 -0
  196. {datachain-0.3.18 → datachain-0.3.19}/tests/test_cli_e2e.py +0 -0
  197. {datachain-0.3.18 → datachain-0.3.19}/tests/test_query_e2e.py +0 -0
  198. {datachain-0.3.18 → datachain-0.3.19}/tests/test_telemetry.py +0 -0
  199. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/__init__.py +0 -0
  200. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/__init__.py +0 -0
  201. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/conftest.py +0 -0
  202. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_clip.py +0 -0
  203. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_datachain.py +0 -0
  204. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  205. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_datachain_merge.py +0 -0
  206. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_feature.py +0 -0
  207. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_feature_utils.py +0 -0
  208. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_file.py +0 -0
  209. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_hf.py +0 -0
  210. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_image.py +0 -0
  211. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_schema.py +0 -0
  212. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_signal_schema.py +0 -0
  213. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_sql_to_python.py +0 -0
  214. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_text.py +0 -0
  215. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_udf_signature.py +0 -0
  216. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_utils.py +0 -0
  217. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/lib/test_webdataset.py +0 -0
  218. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/__init__.py +0 -0
  219. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/sqlite/__init__.py +0 -0
  220. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/sqlite/test_utils.py +0 -0
  221. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_array.py +0 -0
  222. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_conditional.py +0 -0
  223. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_path.py +0 -0
  224. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_random.py +0 -0
  225. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_selectable.py +0 -0
  226. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/sql/test_string.py +0 -0
  227. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_asyn.py +0 -0
  228. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_cache.py +0 -0
  229. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_catalog.py +0 -0
  230. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_catalog_loader.py +0 -0
  231. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_cli_parsing.py +0 -0
  232. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_client.py +0 -0
  233. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_client_s3.py +0 -0
  234. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_data_storage.py +0 -0
  235. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_database_engine.py +0 -0
  236. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_dataset.py +0 -0
  237. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_dispatch.py +0 -0
  238. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_fileslice.py +0 -0
  239. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_id_generator.py +0 -0
  240. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_listing.py +0 -0
  241. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_metastore.py +0 -0
  242. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_query_metrics.py +0 -0
  243. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_query_params.py +0 -0
  244. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_serializer.py +0 -0
  245. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_session.py +0 -0
  246. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_storage.py +0 -0
  247. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_utils.py +0 -0
  248. {datachain-0.3.18 → datachain-0.3.19}/tests/unit/test_warehouse.py +0 -0
  249. {datachain-0.3.18 → datachain-0.3.19}/tests/utils.py +0 -0
@@ -23,15 +23,18 @@ jobs:
23
23
  uses: actions/setup-python@v5
24
24
  with:
25
25
  python-version: '3.12'
26
- cache: 'pip'
27
26
 
28
- - name: Upgrade nox and uv
29
- run: |
30
- python -m pip install --upgrade 'nox[uv]'
31
- nox --version
32
- uv --version
27
+ - name: Setup uv
28
+ uses: astral-sh/setup-uv@v3
29
+ with:
30
+ enable-cache: true
31
+ cache-suffix: benchmarks
32
+ cache-dependency-glob: pyproject.toml
33
+
34
+ - name: Install nox and dvc
35
+ run: uv pip install dvc[gs] nox --system
33
36
 
34
- - run: uv pip install dvc[gs] --system
35
- - run: dvc --cd tests/benchmarks/datasets pull
37
+ - name: Pull dataset
38
+ run: dvc --cd tests/benchmarks/datasets pull
36
39
  - name: Run benchmarks
37
40
  run: nox -s bench
@@ -21,17 +21,16 @@ jobs:
21
21
  with:
22
22
  fetch-depth: 0
23
23
 
24
- - name: Set up Python 3.10
24
+ - name: Set up Python 3.12
25
25
  uses: actions/setup-python@v5
26
26
  with:
27
- python-version: '3.10'
27
+ python-version: '3.12'
28
28
 
29
- - name: Upgrade nox and uv
30
- run: |
31
- python -m pip install --upgrade 'nox[uv]'
32
- nox --version
33
- uv --version
29
+ - name: Setup uv
30
+ uses: astral-sh/setup-uv@v3
34
31
 
32
+ - name: Install nox
33
+ run: uv pip install nox --system
35
34
  - name: Build package
36
35
  run: nox -s build
37
36
 
@@ -82,12 +82,15 @@ jobs:
82
82
  uses: actions/setup-python@v5
83
83
  with:
84
84
  python-version: ${{ matrix.pyv }}
85
- cache: 'pip'
86
85
 
87
- - name: Install uv
88
- run: |
89
- python -m pip install --upgrade uv
90
- uv --version
86
+ - name: Setup uv
87
+ uses: astral-sh/setup-uv@v3
88
+ with:
89
+ enable-cache: true
90
+ cache-suffix: studio
91
+ cache-dependency-glob: |
92
+ backend/datachain_server/pyproject.toml
93
+ backend/datachain/pyproject.toml
91
94
 
92
95
  - name: Install dependencies
93
96
  run: uv pip install --system ./backend/datachain_server[tests] ./backend/datachain[tests]
@@ -26,13 +26,16 @@ jobs:
26
26
  uses: actions/setup-python@v5
27
27
  with:
28
28
  python-version: '3.9'
29
- cache: 'pip'
30
29
 
31
- - name: Upgrade nox and uv
32
- run: |
33
- python -m pip install --upgrade 'nox[uv]'
34
- nox --version
35
- uv --version
30
+ - name: Setup uv
31
+ uses: astral-sh/setup-uv@v3
32
+ with:
33
+ enable-cache: true
34
+ cache-suffix: lint
35
+ cache-dependency-glob: pyproject.toml
36
+
37
+ - name: Install nox
38
+ run: uv pip install nox --system
36
39
 
37
40
  - name: Cache mypy
38
41
  uses: actions/cache@v4
@@ -77,13 +80,16 @@ jobs:
77
80
  uses: actions/setup-python@v5
78
81
  with:
79
82
  python-version: ${{ matrix.pyv }}
80
- cache: 'pip'
81
83
 
82
- - name: Upgrade nox and uv
83
- run: |
84
- python -m pip install --upgrade 'nox[uv]'
85
- nox --version
86
- uv --version
84
+ - name: Setup uv
85
+ uses: astral-sh/setup-uv@v3
86
+ with:
87
+ enable-cache: true
88
+ cache-suffix: tests-${{ matrix.pyv }}
89
+ cache-dependency-glob: pyproject.toml
90
+
91
+ - name: Install nox
92
+ run: uv pip install nox --system
87
93
 
88
94
  - name: Skip flaky azure, gs remotes on macOS
89
95
  if: runner.os == 'macOS'
@@ -134,13 +140,16 @@ jobs:
134
140
  uses: actions/setup-python@v5
135
141
  with:
136
142
  python-version: ${{ matrix.pyv }}
137
- cache: 'pip'
138
143
 
139
- - name: Upgrade nox and uv
140
- run: |
141
- python -m pip install --upgrade 'nox[uv]'
142
- nox --version
143
- uv --version
144
+ - name: Setup uv
145
+ uses: astral-sh/setup-uv@v3
146
+ with:
147
+ enable-cache: true
148
+ cache-suffix: examples-${{ matrix.pyv }}
149
+ cache-dependency-glob: pyproject.toml
150
+
151
+ - name: Install nox
152
+ run: uv pip install nox --system
144
153
 
145
154
  - name: Run examples
146
155
  run: nox -s examples -p ${{ matrix.pyv }} -- -m "${{ matrix.group }}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.18
3
+ Version: 0.3.19
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -34,7 +34,6 @@ Requires-Dist: dvc-objects<6,>=4
34
34
  Requires-Dist: shtab<2,>=1.3.4
35
35
  Requires-Dist: sqlalchemy>=2
36
36
  Requires-Dist: multiprocess==0.70.16
37
- Requires-Dist: dill==0.3.8
38
37
  Requires-Dist: cloudpickle
39
38
  Requires-Dist: orjson>=3.10.5
40
39
  Requires-Dist: pydantic<3,>=2
@@ -7,6 +7,8 @@ automatically when creating a `DataChain` from files, like in
7
7
  classes include various metadata fields about the underlying file as well as methods to
8
8
  read from the files and otherwise work with the file contents.
9
9
 
10
+ ::: datachain.lib.file.ArrowRow
11
+
10
12
  ::: datachain.lib.file.ExportPlacement
11
13
 
12
14
  ::: datachain.lib.file.File
@@ -15,8 +17,6 @@ read from the files and otherwise work with the file contents.
15
17
 
16
18
  ::: datachain.lib.file.ImageFile
17
19
 
18
- ::: datachain.lib.file.IndexedFile
19
-
20
20
  ::: datachain.lib.file.TarVFile
21
21
 
22
22
  ::: datachain.lib.file.TextFile
@@ -57,8 +57,8 @@ def lint(session: nox.Session) -> None:
57
57
 
58
58
  @nox.session
59
59
  def build(session: nox.Session) -> None:
60
- session.install("build", "twine", "uv")
61
- session.run("python", "-m", "build", "--installer", "uv")
60
+ session.install("twine", "uv")
61
+ session.run("uv", "build")
62
62
  dists = glob.glob("dist/*")
63
63
  session.run("twine", "check", *dists, silent=True)
64
64
 
@@ -37,7 +37,6 @@ dependencies = [
37
37
  "shtab>=1.3.4,<2",
38
38
  "sqlalchemy>=2",
39
39
  "multiprocess==0.70.16",
40
- "dill==0.3.8",
41
40
  "cloudpickle",
42
41
  "orjson>=3.10.5",
43
42
  "pydantic>=2,<3",
@@ -1,21 +1,23 @@
1
1
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
2
2
  from datachain.lib.dc import C, Column, DataChain, Sys
3
3
  from datachain.lib.file import (
4
+ ArrowRow,
4
5
  File,
5
6
  FileError,
6
7
  ImageFile,
7
- IndexedFile,
8
8
  TarVFile,
9
9
  TextFile,
10
10
  )
11
11
  from datachain.lib.model_store import ModelStore
12
12
  from datachain.lib.udf import Aggregator, Generator, Mapper
13
13
  from datachain.lib.utils import AbstractUDF, DataChainError
14
+ from datachain.query import metrics, param
14
15
  from datachain.query.session import Session
15
16
 
16
17
  __all__ = [
17
18
  "AbstractUDF",
18
19
  "Aggregator",
20
+ "ArrowRow",
19
21
  "C",
20
22
  "Column",
21
23
  "DataChain",
@@ -26,7 +28,6 @@ __all__ = [
26
28
  "FileError",
27
29
  "Generator",
28
30
  "ImageFile",
29
- "IndexedFile",
30
31
  "Mapper",
31
32
  "ModelStore",
32
33
  "Session",
@@ -34,4 +35,6 @@ __all__ = [
34
35
  "TarVFile",
35
36
  "TextFile",
36
37
  "is_chain_type",
38
+ "metrics",
39
+ "param",
37
40
  ]
@@ -1,4 +1,3 @@
1
- import ast
2
1
  import glob
3
2
  import io
4
3
  import json
@@ -53,9 +52,9 @@ from datachain.error import (
53
52
  DataChainError,
54
53
  DatasetInvalidVersionError,
55
54
  DatasetNotFoundError,
55
+ DatasetVersionNotFoundError,
56
56
  PendingIndexingError,
57
57
  QueryScriptCancelError,
58
- QueryScriptCompileError,
59
58
  QueryScriptRunError,
60
59
  )
61
60
  from datachain.listing import Listing
@@ -588,37 +587,6 @@ class Catalog:
588
587
  def generate_query_dataset_name(cls) -> str:
589
588
  return f"{QUERY_DATASET_PREFIX}_{uuid4().hex}"
590
589
 
591
- def attach_query_wrapper(self, code_ast):
592
- if code_ast.body:
593
- last_expr = code_ast.body[-1]
594
- if isinstance(last_expr, ast.Expr):
595
- new_expressions = [
596
- ast.Import(
597
- names=[ast.alias(name="datachain.query.dataset", asname=None)]
598
- ),
599
- ast.Expr(
600
- value=ast.Call(
601
- func=ast.Attribute(
602
- value=ast.Attribute(
603
- value=ast.Attribute(
604
- value=ast.Name(id="datachain", ctx=ast.Load()),
605
- attr="query",
606
- ctx=ast.Load(),
607
- ),
608
- attr="dataset",
609
- ctx=ast.Load(),
610
- ),
611
- attr="query_wrapper",
612
- ctx=ast.Load(),
613
- ),
614
- args=[last_expr],
615
- keywords=[],
616
- )
617
- ),
618
- ]
619
- code_ast.body[-1:] = new_expressions
620
- return code_ast
621
-
622
590
  def get_client(self, uri: str, **config: Any) -> Client:
623
591
  """
624
592
  Return the client corresponding to the given source `uri`.
@@ -1218,7 +1186,9 @@ class Catalog:
1218
1186
 
1219
1187
  dataset_version = dataset.get_version(version)
1220
1188
  if not dataset_version:
1221
- raise ValueError(f"Dataset {dataset.name} does not have version {version}")
1189
+ raise DatasetVersionNotFoundError(
1190
+ f"Dataset {dataset.name} does not have version {version}"
1191
+ )
1222
1192
 
1223
1193
  if not dataset_version.is_final_status():
1224
1194
  raise ValueError("Cannot register dataset version in non final status")
@@ -1581,7 +1551,7 @@ class Catalog:
1581
1551
 
1582
1552
  try:
1583
1553
  remote_dataset_version = remote_dataset.get_version(version)
1584
- except (ValueError, StopIteration) as exc:
1554
+ except (DatasetVersionNotFoundError, StopIteration) as exc:
1585
1555
  raise DataChainError(
1586
1556
  f"Dataset {remote_dataset_name} doesn't have version {version}"
1587
1557
  " on server"
@@ -1722,64 +1692,24 @@ class Catalog:
1722
1692
  query_script: str,
1723
1693
  env: Optional[Mapping[str, str]] = None,
1724
1694
  python_executable: str = sys.executable,
1725
- save: bool = False,
1726
- capture_output: bool = True,
1695
+ capture_output: bool = False,
1727
1696
  output_hook: Callable[[str], None] = noop,
1728
1697
  params: Optional[dict[str, str]] = None,
1729
1698
  job_id: Optional[str] = None,
1730
- _execute_last_expression: bool = False,
1731
1699
  ) -> None:
1732
- """
1733
- Method to run custom user Python script to run a query and, as result,
1734
- creates new dataset from the results of a query.
1735
- Returns tuple of result dataset and script output.
1736
-
1737
- Constraints on query script:
1738
- 1. datachain.query.DatasetQuery should be used in order to create query
1739
- for a dataset
1740
- 2. There should not be any .save() call on DatasetQuery since the idea
1741
- is to create only one dataset as the outcome of the script
1742
- 3. Last statement must be an instance of DatasetQuery
1743
-
1744
- If save is set to True, we are creating new dataset with results
1745
- from dataset query. If it's set to False, we will just print results
1746
- without saving anything
1747
-
1748
- Example of query script:
1749
- from datachain.query import DatasetQuery, C
1750
- DatasetQuery('s3://ldb-public/remote/datasets/mnist-tiny/').filter(
1751
- C.size > 1000
1752
- )
1753
- """
1754
- if _execute_last_expression:
1755
- try:
1756
- code_ast = ast.parse(query_script)
1757
- code_ast = self.attach_query_wrapper(code_ast)
1758
- query_script_compiled = ast.unparse(code_ast)
1759
- except Exception as exc:
1760
- raise QueryScriptCompileError(
1761
- f"Query script failed to compile, reason: {exc}"
1762
- ) from exc
1763
- else:
1764
- query_script_compiled = query_script
1765
- assert not save
1766
-
1700
+ cmd = [python_executable, "-c", query_script]
1767
1701
  env = dict(env or os.environ)
1768
1702
  env.update(
1769
1703
  {
1770
1704
  "DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
1771
- "PYTHONPATH": os.getcwd(), # For local imports
1772
- "DATACHAIN_QUERY_SAVE": "1" if save else "",
1773
- "PYTHONUNBUFFERED": "1",
1774
1705
  "DATACHAIN_JOB_ID": job_id or "",
1775
1706
  },
1776
1707
  )
1777
- popen_kwargs = {}
1708
+ popen_kwargs: dict[str, Any] = {}
1778
1709
  if capture_output:
1779
1710
  popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1780
1711
 
1781
- cmd = [python_executable, "-c", query_script_compiled]
1782
- with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
1712
+ with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1783
1713
  if capture_output:
1784
1714
  args = (proc.stdout, output_hook)
1785
1715
  thread = Thread(target=_process_stream, args=args, daemon=True)
@@ -804,7 +804,6 @@ def query(
804
804
  catalog.query(
805
805
  script_content,
806
806
  python_executable=python_executable,
807
- capture_output=False,
808
807
  params=params,
809
808
  job_id=job_id,
810
809
  )
@@ -12,6 +12,7 @@ from typing import (
12
12
  from urllib.parse import urlparse
13
13
 
14
14
  from datachain.client import Client
15
+ from datachain.error import DatasetVersionNotFoundError
15
16
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
16
17
 
17
18
  if TYPE_CHECKING:
@@ -417,7 +418,9 @@ class DatasetRecord:
417
418
 
418
419
  def get_version(self, version: int) -> DatasetVersion:
419
420
  if not self.has_version(version):
420
- raise ValueError(f"Dataset {self.name} does not have version {version}")
421
+ raise DatasetVersionNotFoundError(
422
+ f"Dataset {self.name} does not have version {version}"
423
+ )
421
424
  return next(
422
425
  v
423
426
  for v in self.versions # type: ignore [union-attr]
@@ -435,7 +438,9 @@ class DatasetRecord:
435
438
  Get identifier in the form my-dataset@v3
436
439
  """
437
440
  if not self.has_version(version):
438
- raise ValueError(f"Dataset {self.name} doesn't have a version {version}")
441
+ raise DatasetVersionNotFoundError(
442
+ f"Dataset {self.name} doesn't have a version {version}"
443
+ )
439
444
  return f"{self.name}@v{version}"
440
445
 
441
446
  def uri(self, version: int) -> str:
@@ -10,6 +10,10 @@ class DatasetNotFoundError(NotFoundError):
10
10
  pass
11
11
 
12
12
 
13
+ class DatasetVersionNotFoundError(NotFoundError):
14
+ pass
15
+
16
+
13
17
  class DatasetInvalidVersionError(Exception):
14
18
  pass
15
19
 
@@ -32,14 +36,12 @@ class QueryScriptRunError(Exception):
32
36
  Attributes:
33
37
  message Explanation of the error
34
38
  return_code Code returned by the subprocess
35
- output STDOUT + STDERR output of the subprocess
36
39
  """
37
40
 
38
- def __init__(self, message: str, return_code: int = 0, output: str = ""):
41
+ def __init__(self, message: str, return_code: int = 0):
39
42
  self.message = message
40
43
  self.return_code = return_code
41
- self.output = output
42
- super().__init__(self.message)
44
+ super().__init__(message)
43
45
 
44
46
 
45
47
  class QueryScriptCancelError(QueryScriptRunError):
@@ -4,11 +4,11 @@ from tempfile import NamedTemporaryFile
4
4
  from typing import TYPE_CHECKING, Optional
5
5
 
6
6
  import pyarrow as pa
7
- from pyarrow.dataset import dataset
7
+ from pyarrow.dataset import CsvFileFormat, dataset
8
8
  from tqdm import tqdm
9
9
 
10
10
  from datachain.lib.data_model import dict_to_data_model
11
- from datachain.lib.file import File, IndexedFile
11
+ from datachain.lib.file import ArrowRow, File
12
12
  from datachain.lib.model_store import ModelStore
13
13
  from datachain.lib.udf import Generator
14
14
 
@@ -84,7 +84,12 @@ class ArrowGenerator(Generator):
84
84
  vals_dict[field] = val
85
85
  vals = [self.output_schema(**vals_dict)]
86
86
  if self.source:
87
- yield [IndexedFile(file=file, index=index), *vals]
87
+ kwargs: dict = self.kwargs
88
+ # Can't serialize CsvFileFormat; may lose formatting options.
89
+ if isinstance(kwargs.get("format"), CsvFileFormat):
90
+ kwargs["format"] = "csv"
91
+ arrow_file = ArrowRow(file=file, index=index, kwargs=kwargs)
92
+ yield [arrow_file, *vals]
88
93
  else:
89
94
  yield vals
90
95
  index += 1
@@ -26,8 +26,8 @@ from datachain.lib.convert.python_to_sql import python_to_sql
26
26
  from datachain.lib.convert.values_to_tuples import values_to_tuples
27
27
  from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
28
28
  from datachain.lib.dataset_info import DatasetInfo
29
+ from datachain.lib.file import ArrowRow, File, get_file_type
29
30
  from datachain.lib.file import ExportPlacement as FileExportPlacement
30
- from datachain.lib.file import File, IndexedFile, get_file_type
31
31
  from datachain.lib.listing import (
32
32
  is_listing_dataset,
33
33
  is_listing_expired,
@@ -1614,7 +1614,7 @@ class DataChain(DatasetQuery):
1614
1614
  for name, info in output.model_fields.items()
1615
1615
  }
1616
1616
  if source:
1617
- output = {"source": IndexedFile} | output # type: ignore[assignment,operator]
1617
+ output = {"source": ArrowRow} | output # type: ignore[assignment,operator]
1618
1618
  return self.gen(
1619
1619
  ArrowGenerator(schema, model, source, nrows, **kwargs), output=output
1620
1620
  )
@@ -17,6 +17,7 @@ from urllib.request import url2pathname
17
17
 
18
18
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback
19
19
  from PIL import Image
20
+ from pyarrow.dataset import dataset
20
21
  from pydantic import Field, field_validator
21
22
 
22
23
  if TYPE_CHECKING:
@@ -439,14 +440,31 @@ class ImageFile(File):
439
440
  self.read().save(destination)
440
441
 
441
442
 
442
- class IndexedFile(DataModel):
443
- """Metadata indexed from tabular files.
444
-
445
- Includes `file` and `index` signals.
446
- """
443
+ class ArrowRow(DataModel):
444
+ """`DataModel` for reading row from Arrow-supported file."""
447
445
 
448
446
  file: File
449
447
  index: int
448
+ kwargs: dict
449
+
450
+ @contextmanager
451
+ def open(self):
452
+ """Stream row contents from indexed file."""
453
+ if self.file._caching_enabled:
454
+ self.file.ensure_cached()
455
+ path = self.file.get_local_path()
456
+ ds = dataset(path, **self.kwargs)
457
+
458
+ else:
459
+ path = self.file.get_path()
460
+ ds = dataset(path, filesystem=self.file.get_fs(), **self.kwargs)
461
+
462
+ return ds.take([self.index]).to_reader()
463
+
464
+ def read(self):
465
+ """Returns row contents as dict."""
466
+ with self.open() as record_batch:
467
+ return record_batch.to_pylist()[0]
450
468
 
451
469
 
452
470
  def get_file_type(type_: Literal["binary", "text", "image"] = "binary") -> type[File]:
@@ -1604,25 +1604,3 @@ class DatasetQuery:
1604
1604
  finally:
1605
1605
  self.cleanup()
1606
1606
  return self.__class__(name=name, version=version, catalog=self.catalog)
1607
-
1608
-
1609
- def query_wrapper(dataset_query: Any) -> Any:
1610
- """
1611
- Wrapper function that wraps the last statement of user query script.
1612
- Last statement MUST be instance of DatasetQuery, otherwise script exits with
1613
- error code 10
1614
- """
1615
- if not isinstance(dataset_query, DatasetQuery):
1616
- return dataset_query
1617
-
1618
- catalog = dataset_query.catalog
1619
- save = bool(os.getenv("DATACHAIN_QUERY_SAVE"))
1620
-
1621
- is_session_temp_dataset = dataset_query.name and dataset_query.name.startswith(
1622
- dataset_query.session.get_temp_prefix()
1623
- )
1624
-
1625
- if save and (is_session_temp_dataset or not dataset_query.attached):
1626
- name = catalog.generate_query_dataset_name()
1627
- dataset_query = dataset_query.save(name)
1628
- return dataset_query
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.18
3
+ Version: 0.3.19
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -34,7 +34,6 @@ Requires-Dist: dvc-objects<6,>=4
34
34
  Requires-Dist: shtab<2,>=1.3.4
35
35
  Requires-Dist: sqlalchemy>=2
36
36
  Requires-Dist: multiprocess==0.70.16
37
- Requires-Dist: dill==0.3.8
38
37
  Requires-Dist: cloudpickle
39
38
  Requires-Dist: orjson>=3.10.5
40
39
  Requires-Dist: pydantic<3,>=2
@@ -208,6 +208,7 @@ tests/unit/test_id_generator.py
208
208
  tests/unit/test_listing.py
209
209
  tests/unit/test_metastore.py
210
210
  tests/unit/test_module_exports.py
211
+ tests/unit/test_query.py
211
212
  tests/unit/test_query_metrics.py
212
213
  tests/unit/test_query_params.py
213
214
  tests/unit/test_serializer.py
@@ -15,7 +15,6 @@ dvc-objects<6,>=4
15
15
  shtab<2,>=1.3.4
16
16
  sqlalchemy>=2
17
17
  multiprocess==0.70.16
18
- dill==0.3.8
19
18
  cloudpickle
20
19
  orjson>=3.10.5
21
20
  pydantic<3,>=2
@@ -492,6 +492,7 @@ def cloud_server(request, tmp_upath_factory, cloud_type, version_aware, tree):
492
492
  def datachain_job_id(monkeypatch):
493
493
  job_id = uuid.uuid4().hex
494
494
  monkeypatch.setenv("DATACHAIN_JOB_ID", job_id)
495
+ return job_id
495
496
 
496
497
 
497
498
  @pytest.fixture