datachain 0.14.3__tar.gz → 0.14.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (353) hide show
  1. {datachain-0.14.3 → datachain-0.14.5}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.14.3/src/datachain.egg-info → datachain-0.14.5}/PKG-INFO +3 -3
  3. {datachain-0.14.3 → datachain-0.14.5}/README.rst +2 -2
  4. {datachain-0.14.3 → datachain-0.14.5}/docs/quick-start.md +4 -4
  5. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/json-csv-reader.py +2 -2
  6. {datachain-0.14.3 → datachain-0.14.5}/examples/multimodal/hf_pipeline.py +1 -1
  7. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/catalog/catalog.py +3 -5
  8. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/schema.py +21 -23
  9. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/sqlite.py +1 -1
  10. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/warehouse.py +6 -8
  11. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dataset_info.py +5 -0
  12. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/csv.py +3 -3
  13. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/datachain.py +10 -10
  14. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/datasets.py +28 -9
  15. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/hf.py +5 -5
  16. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/json.py +7 -7
  17. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/listings.py +3 -3
  18. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/pandas.py +5 -5
  19. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/parquet.py +3 -3
  20. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/storage.py +6 -6
  21. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/values.py +3 -3
  22. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/listing.py +2 -2
  23. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/signal_schema.py +24 -9
  24. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/udf.py +3 -3
  25. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/listing.py +4 -4
  26. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/dispatch.py +56 -46
  27. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/session.py +4 -0
  28. {datachain-0.14.3 → datachain-0.14.5/src/datachain.egg-info}/PKG-INFO +3 -3
  29. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_data_storage.py +1 -1
  30. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_datachain.py +14 -3
  31. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_datachain.py +36 -32
  32. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_signal_schema.py +2 -1
  33. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_listing.py +1 -1
  34. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_session.py +12 -0
  35. {datachain-0.14.3 → datachain-0.14.5}/.cruft.json +0 -0
  36. {datachain-0.14.3 → datachain-0.14.5}/.gitattributes +0 -0
  37. {datachain-0.14.3 → datachain-0.14.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  38. {datachain-0.14.3 → datachain-0.14.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  39. {datachain-0.14.3 → datachain-0.14.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  40. {datachain-0.14.3 → datachain-0.14.5}/.github/codecov.yaml +0 -0
  41. {datachain-0.14.3 → datachain-0.14.5}/.github/dependabot.yml +0 -0
  42. {datachain-0.14.3 → datachain-0.14.5}/.github/workflows/benchmarks.yml +0 -0
  43. {datachain-0.14.3 → datachain-0.14.5}/.github/workflows/release.yml +0 -0
  44. {datachain-0.14.3 → datachain-0.14.5}/.github/workflows/tests-studio.yml +0 -0
  45. {datachain-0.14.3 → datachain-0.14.5}/.github/workflows/tests.yml +0 -0
  46. {datachain-0.14.3 → datachain-0.14.5}/.github/workflows/update-template.yaml +0 -0
  47. {datachain-0.14.3 → datachain-0.14.5}/.gitignore +0 -0
  48. {datachain-0.14.3 → datachain-0.14.5}/CODE_OF_CONDUCT.rst +0 -0
  49. {datachain-0.14.3 → datachain-0.14.5}/LICENSE +0 -0
  50. {datachain-0.14.3 → datachain-0.14.5}/docs/assets/captioned_cartoons.png +0 -0
  51. {datachain-0.14.3 → datachain-0.14.5}/docs/assets/datachain-white.svg +0 -0
  52. {datachain-0.14.3 → datachain-0.14.5}/docs/assets/datachain.svg +0 -0
  53. {datachain-0.14.3 → datachain-0.14.5}/docs/contributing.md +0 -0
  54. {datachain-0.14.3 → datachain-0.14.5}/docs/css/github-permalink-style.css +0 -0
  55. {datachain-0.14.3 → datachain-0.14.5}/docs/examples.md +0 -0
  56. {datachain-0.14.3 → datachain-0.14.5}/docs/index.md +0 -0
  57. {datachain-0.14.3 → datachain-0.14.5}/docs/overrides/main.html +0 -0
  58. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/arrowrow.md +0 -0
  59. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/bbox.md +0 -0
  60. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/file.md +0 -0
  61. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/imagefile.md +0 -0
  62. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/index.md +0 -0
  63. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/pose.md +0 -0
  64. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/segment.md +0 -0
  65. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/tarvfile.md +0 -0
  66. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/textfile.md +0 -0
  67. {datachain-0.14.3 → datachain-0.14.5}/docs/references/data-types/videofile.md +0 -0
  68. {datachain-0.14.3 → datachain-0.14.5}/docs/references/datachain.md +0 -0
  69. {datachain-0.14.3 → datachain-0.14.5}/docs/references/func.md +0 -0
  70. {datachain-0.14.3 → datachain-0.14.5}/docs/references/index.md +0 -0
  71. {datachain-0.14.3 → datachain-0.14.5}/docs/references/remotes.md +0 -0
  72. {datachain-0.14.3 → datachain-0.14.5}/docs/references/toolkit.md +0 -0
  73. {datachain-0.14.3 → datachain-0.14.5}/docs/references/torch.md +0 -0
  74. {datachain-0.14.3 → datachain-0.14.5}/docs/references/udf.md +0 -0
  75. {datachain-0.14.3 → datachain-0.14.5}/docs/tutorials.md +0 -0
  76. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  77. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  78. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/openimage-detect.py +0 -0
  79. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  80. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  81. {datachain-0.14.3 → datachain-0.14.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  82. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/common_sql_functions.py +0 -0
  83. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/torch-loader.py +0 -0
  84. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/udfs/parallel.py +0 -0
  85. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/udfs/simple.py +0 -0
  86. {datachain-0.14.3 → datachain-0.14.5}/examples/get_started/udfs/stateful.py +0 -0
  87. {datachain-0.14.3 → datachain-0.14.5}/examples/llm_and_nlp/claude-query.py +0 -0
  88. {datachain-0.14.3 → datachain-0.14.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  89. {datachain-0.14.3 → datachain-0.14.5}/examples/multimodal/clip_inference.py +0 -0
  90. {datachain-0.14.3 → datachain-0.14.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  91. {datachain-0.14.3 → datachain-0.14.5}/examples/multimodal/wds.py +0 -0
  92. {datachain-0.14.3 → datachain-0.14.5}/examples/multimodal/wds_filtered.py +0 -0
  93. {datachain-0.14.3 → datachain-0.14.5}/mkdocs.yml +0 -0
  94. {datachain-0.14.3 → datachain-0.14.5}/noxfile.py +0 -0
  95. {datachain-0.14.3 → datachain-0.14.5}/pyproject.toml +0 -0
  96. {datachain-0.14.3 → datachain-0.14.5}/setup.cfg +0 -0
  97. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/__init__.py +0 -0
  98. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/__main__.py +0 -0
  99. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/asyn.py +0 -0
  100. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cache.py +0 -0
  101. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/catalog/__init__.py +0 -0
  102. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/catalog/datasource.py +0 -0
  103. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/catalog/loader.py +0 -0
  104. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/__init__.py +0 -0
  105. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/__init__.py +0 -0
  106. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/datasets.py +0 -0
  107. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/du.py +0 -0
  108. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/index.py +0 -0
  109. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/ls.py +0 -0
  110. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/misc.py +0 -0
  111. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/query.py +0 -0
  112. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/commands/show.py +0 -0
  113. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/parser/__init__.py +0 -0
  114. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/parser/job.py +0 -0
  115. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/parser/studio.py +0 -0
  116. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/parser/utils.py +0 -0
  117. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/cli/utils.py +0 -0
  118. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/__init__.py +0 -0
  119. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/azure.py +0 -0
  120. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/fileslice.py +0 -0
  121. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/fsspec.py +0 -0
  122. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/gcs.py +0 -0
  123. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/hf.py +0 -0
  124. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/local.py +0 -0
  125. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/client/s3.py +0 -0
  126. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/config.py +0 -0
  127. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/__init__.py +0 -0
  128. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/db_engine.py +0 -0
  129. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/job.py +0 -0
  130. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/metastore.py +0 -0
  131. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/data_storage/serializer.py +0 -0
  132. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/dataset.py +0 -0
  133. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/diff/__init__.py +0 -0
  134. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/error.py +0 -0
  135. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/fs/__init__.py +0 -0
  136. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/fs/reference.py +0 -0
  137. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/fs/utils.py +0 -0
  138. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/__init__.py +0 -0
  139. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/aggregate.py +0 -0
  140. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/array.py +0 -0
  141. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/base.py +0 -0
  142. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/conditional.py +0 -0
  143. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/func.py +0 -0
  144. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/numeric.py +0 -0
  145. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/path.py +0 -0
  146. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/random.py +0 -0
  147. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/string.py +0 -0
  148. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/func/window.py +0 -0
  149. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/job.py +0 -0
  150. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/__init__.py +0 -0
  151. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/arrow.py +0 -0
  152. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/clip.py +0 -0
  153. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/__init__.py +0 -0
  154. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/flatten.py +0 -0
  155. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  156. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  157. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/unflatten.py +0 -0
  158. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  159. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/data_model.py +0 -0
  160. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/__init__.py +0 -0
  161. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/records.py +0 -0
  162. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/dc/utils.py +0 -0
  163. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/file.py +0 -0
  164. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/hf.py +0 -0
  165. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/image.py +0 -0
  166. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/listing_info.py +0 -0
  167. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/meta_formats.py +0 -0
  168. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/model_store.py +0 -0
  169. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/pytorch.py +0 -0
  170. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/settings.py +0 -0
  171. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/tar.py +0 -0
  172. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/text.py +0 -0
  173. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/udf_signature.py +0 -0
  174. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/utils.py +0 -0
  175. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/video.py +0 -0
  176. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/webdataset.py +0 -0
  177. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/lib/webdataset_laion.py +0 -0
  178. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/__init__.py +0 -0
  179. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/bbox.py +0 -0
  180. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/pose.py +0 -0
  181. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/segment.py +0 -0
  182. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  183. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/ultralytics/bbox.py +0 -0
  184. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/ultralytics/pose.py +0 -0
  185. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/ultralytics/segment.py +0 -0
  186. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/model/utils.py +0 -0
  187. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/node.py +0 -0
  188. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/nodes_fetcher.py +0 -0
  189. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/nodes_thread_pool.py +0 -0
  190. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/progress.py +0 -0
  191. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/py.typed +0 -0
  192. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/__init__.py +0 -0
  193. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/batch.py +0 -0
  194. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/dataset.py +0 -0
  195. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/metrics.py +0 -0
  196. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/params.py +0 -0
  197. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/queue.py +0 -0
  198. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/schema.py +0 -0
  199. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/udf.py +0 -0
  200. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/query/utils.py +0 -0
  201. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/remote/__init__.py +0 -0
  202. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/remote/studio.py +0 -0
  203. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/script_meta.py +0 -0
  204. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/__init__.py +0 -0
  205. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/default/__init__.py +0 -0
  206. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/default/base.py +0 -0
  207. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/__init__.py +0 -0
  208. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/aggregate.py +0 -0
  209. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/array.py +0 -0
  210. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/conditional.py +0 -0
  211. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/numeric.py +0 -0
  212. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/path.py +0 -0
  213. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/random.py +0 -0
  214. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/functions/string.py +0 -0
  215. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/selectable.py +0 -0
  216. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  217. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/sqlite/base.py +0 -0
  218. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/sqlite/types.py +0 -0
  219. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/sqlite/vector.py +0 -0
  220. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/types.py +0 -0
  221. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/sql/utils.py +0 -0
  222. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/studio.py +0 -0
  223. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/telemetry.py +0 -0
  224. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/toolkit/__init__.py +0 -0
  225. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/toolkit/split.py +0 -0
  226. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/torch/__init__.py +0 -0
  227. {datachain-0.14.3 → datachain-0.14.5}/src/datachain/utils.py +0 -0
  228. {datachain-0.14.3 → datachain-0.14.5}/src/datachain.egg-info/SOURCES.txt +0 -0
  229. {datachain-0.14.3 → datachain-0.14.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  230. {datachain-0.14.3 → datachain-0.14.5}/src/datachain.egg-info/entry_points.txt +0 -0
  231. {datachain-0.14.3 → datachain-0.14.5}/src/datachain.egg-info/requires.txt +0 -0
  232. {datachain-0.14.3 → datachain-0.14.5}/src/datachain.egg-info/top_level.txt +0 -0
  233. {datachain-0.14.3 → datachain-0.14.5}/tests/__init__.py +0 -0
  234. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/__init__.py +0 -0
  235. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/conftest.py +0 -0
  236. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  237. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  238. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/datasets/.gitignore +0 -0
  239. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  240. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/test_datachain.py +0 -0
  241. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/test_ls.py +0 -0
  242. {datachain-0.14.3 → datachain-0.14.5}/tests/benchmarks/test_version.py +0 -0
  243. {datachain-0.14.3 → datachain-0.14.5}/tests/conftest.py +0 -0
  244. {datachain-0.14.3 → datachain-0.14.5}/tests/data.py +0 -0
  245. {datachain-0.14.3 → datachain-0.14.5}/tests/examples/__init__.py +0 -0
  246. {datachain-0.14.3 → datachain-0.14.5}/tests/examples/test_examples.py +0 -0
  247. {datachain-0.14.3 → datachain-0.14.5}/tests/examples/test_wds_e2e.py +0 -0
  248. {datachain-0.14.3 → datachain-0.14.5}/tests/examples/wds_data.py +0 -0
  249. {datachain-0.14.3 → datachain-0.14.5}/tests/func/__init__.py +0 -0
  250. {datachain-0.14.3 → datachain-0.14.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  251. {datachain-0.14.3 → datachain-0.14.5}/tests/func/data/lena.jpg +0 -0
  252. {datachain-0.14.3 → datachain-0.14.5}/tests/func/fake-service-account-credentials.json +0 -0
  253. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/__init__.py +0 -0
  254. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/data/running-mask0.png +0 -0
  255. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/data/running-mask1.png +0 -0
  256. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/data/running.jpg +0 -0
  257. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/data/ships.jpg +0 -0
  258. {datachain-0.14.3 → datachain-0.14.5}/tests/func/model/test_yolo.py +0 -0
  259. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_catalog.py +0 -0
  260. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_client.py +0 -0
  261. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_cloud_transfer.py +0 -0
  262. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_datachain_merge.py +0 -0
  263. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_dataset_query.py +0 -0
  264. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_datasets.py +0 -0
  265. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_feature_pickling.py +0 -0
  266. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_file.py +0 -0
  267. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_hf.py +0 -0
  268. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_hidden_field.py +0 -0
  269. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_image.py +0 -0
  270. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_listing.py +0 -0
  271. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_ls.py +0 -0
  272. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_meta_formats.py +0 -0
  273. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_metrics.py +0 -0
  274. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_pull.py +0 -0
  275. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_pytorch.py +0 -0
  276. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_query.py +0 -0
  277. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_session.py +0 -0
  278. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_toolkit.py +0 -0
  279. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_video.py +0 -0
  280. {datachain-0.14.3 → datachain-0.14.5}/tests/func/test_warehouse.py +0 -0
  281. {datachain-0.14.3 → datachain-0.14.5}/tests/scripts/feature_class.py +0 -0
  282. {datachain-0.14.3 → datachain-0.14.5}/tests/scripts/feature_class_exception.py +0 -0
  283. {datachain-0.14.3 → datachain-0.14.5}/tests/scripts/feature_class_parallel.py +0 -0
  284. {datachain-0.14.3 → datachain-0.14.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  285. {datachain-0.14.3 → datachain-0.14.5}/tests/scripts/name_len_slow.py +0 -0
  286. {datachain-0.14.3 → datachain-0.14.5}/tests/test_atomicity.py +0 -0
  287. {datachain-0.14.3 → datachain-0.14.5}/tests/test_cli_e2e.py +0 -0
  288. {datachain-0.14.3 → datachain-0.14.5}/tests/test_cli_studio.py +0 -0
  289. {datachain-0.14.3 → datachain-0.14.5}/tests/test_import_time.py +0 -0
  290. {datachain-0.14.3 → datachain-0.14.5}/tests/test_query_e2e.py +0 -0
  291. {datachain-0.14.3 → datachain-0.14.5}/tests/test_telemetry.py +0 -0
  292. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/__init__.py +0 -0
  293. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/__init__.py +0 -0
  294. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/conftest.py +0 -0
  295. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_arrow.py +0 -0
  296. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_clip.py +0 -0
  297. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  298. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  299. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_diff.py +0 -0
  300. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_feature.py +0 -0
  301. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_feature_utils.py +0 -0
  302. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_file.py +0 -0
  303. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_hf.py +0 -0
  304. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_image.py +0 -0
  305. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_listing_info.py +0 -0
  306. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  307. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_schema.py +0 -0
  308. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  309. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_text.py +0 -0
  310. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_udf_signature.py +0 -0
  311. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_utils.py +0 -0
  312. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/lib/test_webdataset.py +0 -0
  313. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/model/__init__.py +0 -0
  314. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/model/test_bbox.py +0 -0
  315. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/model/test_pose.py +0 -0
  316. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/model/test_segment.py +0 -0
  317. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/model/test_utils.py +0 -0
  318. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/__init__.py +0 -0
  319. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  320. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/sqlite/test_types.py +0 -0
  321. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  322. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_array.py +0 -0
  323. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_conditional.py +0 -0
  324. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_path.py +0 -0
  325. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_random.py +0 -0
  326. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_selectable.py +0 -0
  327. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/sql/test_string.py +0 -0
  328. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_asyn.py +0 -0
  329. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_cache.py +0 -0
  330. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_catalog.py +0 -0
  331. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_catalog_loader.py +0 -0
  332. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_cli_parsing.py +0 -0
  333. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_client.py +0 -0
  334. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_client_gcs.py +0 -0
  335. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_client_s3.py +0 -0
  336. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_config.py +0 -0
  337. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_data_storage.py +0 -0
  338. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_database_engine.py +0 -0
  339. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_dataset.py +0 -0
  340. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_dispatch.py +0 -0
  341. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_fileslice.py +0 -0
  342. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_func.py +0 -0
  343. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_metastore.py +0 -0
  344. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_module_exports.py +0 -0
  345. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_pytorch.py +0 -0
  346. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_query.py +0 -0
  347. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_query_metrics.py +0 -0
  348. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_query_params.py +0 -0
  349. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_script_meta.py +0 -0
  350. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_serializer.py +0 -0
  351. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_utils.py +0 -0
  352. {datachain-0.14.3 → datachain-0.14.5}/tests/unit/test_warehouse.py +0 -0
  353. {datachain-0.14.3 → datachain-0.14.5}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.11.2'
27
+ rev: 'v0.11.4'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.14.3
3
+ Version: 0.14.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -171,7 +171,7 @@ high confidence scores.
171
171
 
172
172
  import datachain as dc
173
173
 
174
- meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
174
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", column="meta", anon=True)
175
175
  images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
176
176
 
177
177
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
@@ -213,7 +213,7 @@ Python code:
213
213
  return result.lower().startswith("success")
214
214
 
215
215
  chain = (
216
- dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
216
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", column="file", anon=True)
217
217
  .settings(parallel=4, cache=True)
218
218
  .map(is_success=eval_dialogue)
219
219
  .save("mistral_files")
@@ -60,7 +60,7 @@ high confidence scores.
60
60
 
61
61
  import datachain as dc
62
62
 
63
- meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
63
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", column="meta", anon=True)
64
64
  images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
65
65
 
66
66
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
@@ -102,7 +102,7 @@ Python code:
102
102
  return result.lower().startswith("success")
103
103
 
104
104
  chain = (
105
- dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
105
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", column="file", anon=True)
106
106
  .settings(parallel=4, cache=True)
107
107
  .map(is_success=eval_dialogue)
108
108
  .save("mistral_files")
@@ -39,7 +39,7 @@ using JSON metadata:
39
39
  ``` py
40
40
  import datachain as dc
41
41
 
42
- meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
42
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", column="meta", anon=True)
43
43
  images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
44
44
 
45
45
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
@@ -78,7 +78,7 @@ def is_positive_dialogue_ending(file) -> bool:
78
78
 
79
79
  chain = (
80
80
  dc.read_storage("gs://datachain-demo/chatbot-KiT/",
81
- object_name="file", type="text", anon=True)
81
+ column="file", type="text", anon=True)
82
82
  .settings(parallel=8, cache=True)
83
83
  .map(is_positive=is_positive_dialogue_ending)
84
84
  .save("file_response")
@@ -132,7 +132,7 @@ def eval_dialogue(file: dc.File) -> bool:
132
132
  return result.lower().startswith("success")
133
133
 
134
134
  chain = (
135
- dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
135
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", column="file", anon=True)
136
136
  .map(is_success=eval_dialogue)
137
137
  .save("mistral_files")
138
138
  )
@@ -177,7 +177,7 @@ def eval_dialog(file: dc.File) -> ChatCompletionResponse:
177
177
  {"role": "user", "content": file.read()}])
178
178
 
179
179
  chain = (
180
- dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
180
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", column="file", anon=True)
181
181
  .settings(parallel=4, cache=True)
182
182
  .map(response=eval_dialog)
183
183
  .map(status=lambda response: response.choices[0].message.content.lower()[:7])
@@ -63,13 +63,13 @@ def main():
63
63
 
64
64
  # Static CSV with header schema test parsing 3.5K objects
65
65
  uri = "gs://datachain-demo/chatbot-csv/"
66
- static_csv_ds = dc.read_csv(uri, output=ChatDialog, object_name="chat", anon="True")
66
+ static_csv_ds = dc.read_csv(uri, output=ChatDialog, column="chat", anon="True")
67
67
  static_csv_ds.print_schema()
68
68
  static_csv_ds.show()
69
69
 
70
70
  # Dynamic CSV with header schema test parsing 3/3M objects
71
71
  uri = "gs://datachain-demo/laion-aesthetics-csv/laion_aesthetics_1024_33M_1.csv"
72
- dynamic_csv_ds = dc.read_csv(uri, object_name="laion", nrows=3, anon="True")
72
+ dynamic_csv_ds = dc.read_csv(uri, column="laion", nrows=3, anon="True")
73
73
  dynamic_csv_ds.print_schema()
74
74
  dynamic_csv_ds.show()
75
75
 
@@ -93,7 +93,7 @@ if __name__ == "__main__":
93
93
 
94
94
  print("\nAudio emotion classification:")
95
95
  try:
96
- subprocess.run(["ffmpeg", "-L"], check=True) # noqa: S603, S607
96
+ subprocess.run(["ffmpeg", "-L"], check=True) # noqa: S607
97
97
  (
98
98
  dc.read_storage(
99
99
  audio_source,
@@ -580,15 +580,13 @@ class Catalog:
580
580
  source: str,
581
581
  update=False,
582
582
  client_config=None,
583
- object_name="file",
583
+ column="file",
584
584
  skip_indexing=False,
585
585
  ) -> tuple[Optional["Listing"], "Client", str]:
586
586
  from datachain import read_storage
587
587
  from datachain.listing import Listing
588
588
 
589
- read_storage(
590
- source, session=self.session, update=update, object_name=object_name
591
- ).exec()
589
+ read_storage(source, session=self.session, update=update, column=column).exec()
592
590
 
593
591
  list_ds_name, list_uri, list_path, _ = get_listing(
594
592
  source, self.session, update=update
@@ -602,7 +600,7 @@ class Catalog:
602
600
  self.warehouse.clone(),
603
601
  client,
604
602
  dataset_name=list_ds_name,
605
- object_name=object_name,
603
+ column=column,
606
604
  )
607
605
 
608
606
  return lst, client, list_path
@@ -30,8 +30,8 @@ if TYPE_CHECKING:
30
30
  DEFAULT_DELIMITER = "__"
31
31
 
32
32
 
33
- def col_name(name: str, object_name: str = "file") -> str:
34
- return f"{object_name}{DEFAULT_DELIMITER}{name}"
33
+ def col_name(name: str, column: str = "file") -> str:
34
+ return f"{column}{DEFAULT_DELIMITER}{name}"
35
35
 
36
36
 
37
37
  def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
@@ -84,19 +84,19 @@ def convert_rows_custom_column_types(
84
84
 
85
85
 
86
86
  class DirExpansion:
87
- def __init__(self, object_name: str):
88
- self.object_name = object_name
87
+ def __init__(self, column: str):
88
+ self.column = column
89
89
 
90
- def col_name(self, name: str, object_name: Optional[str] = None) -> str:
91
- object_name = object_name or self.object_name
92
- return col_name(name, object_name)
90
+ def col_name(self, name: str, column: Optional[str] = None) -> str:
91
+ column = column or self.column
92
+ return col_name(name, column)
93
93
 
94
- def c(self, query, name: str, object_name: Optional[str] = None) -> str:
95
- return getattr(query.c, self.col_name(name, object_name=object_name))
94
+ def c(self, query, name: str, column: Optional[str] = None) -> str:
95
+ return getattr(query.c, self.col_name(name, column=column))
96
96
 
97
97
  def base_select(self, q):
98
98
  return sa.select(
99
- self.c(q, "id", object_name="sys"),
99
+ self.c(q, "id", column="sys"),
100
100
  false().label(self.col_name("is_dir")),
101
101
  self.c(q, "source"),
102
102
  self.c(q, "path"),
@@ -153,12 +153,12 @@ class DataTable:
153
153
  name: str,
154
154
  engine: "DatabaseEngine",
155
155
  column_types: Optional[dict[str, SQLType]] = None,
156
- object_name: str = "file",
156
+ column: str = "file",
157
157
  ):
158
158
  self.name: str = name
159
159
  self.engine = engine
160
160
  self.column_types: dict[str, SQLType] = column_types or {}
161
- self.object_name = object_name
161
+ self.column = column
162
162
 
163
163
  @staticmethod
164
164
  def copy_column(
@@ -224,18 +224,16 @@ class DataTable:
224
224
  def columns(self) -> "ReadOnlyColumnCollection[str, sa.Column[Any]]":
225
225
  return self.table.columns
226
226
 
227
- def col_name(self, name: str, object_name: Optional[str] = None) -> str:
228
- object_name = object_name or self.object_name
229
- return col_name(name, object_name)
227
+ def col_name(self, name: str, column: Optional[str] = None) -> str:
228
+ column = column or self.column
229
+ return col_name(name, column)
230
230
 
231
- def without_object(
232
- self, column_name: str, object_name: Optional[str] = None
233
- ) -> str:
234
- object_name = object_name or self.object_name
235
- return column_name.removeprefix(f"{object_name}{DEFAULT_DELIMITER}")
231
+ def without_object(self, column_name: str, column: Optional[str] = None) -> str:
232
+ column = column or self.column
233
+ return column_name.removeprefix(f"{column}{DEFAULT_DELIMITER}")
236
234
 
237
- def c(self, name: str, object_name: Optional[str] = None):
238
- return getattr(self.columns, self.col_name(name, object_name=object_name))
235
+ def c(self, name: str, column: Optional[str] = None):
236
+ return getattr(self.columns, self.col_name(name, column=column))
239
237
 
240
238
  @property
241
239
  def table(self) -> "sa.Table":
@@ -275,7 +273,7 @@ class DataTable:
275
273
  ]
276
274
 
277
275
  def dir_expansion(self):
278
- return DirExpansion(self.object_name)
276
+ return DirExpansion(self.column)
279
277
 
280
278
 
281
279
  PARTITION_COLUMN_ID = "partition_id"
@@ -489,7 +489,7 @@ class SQLiteWarehouse(AbstractWarehouse):
489
489
  self, dataset: DatasetRecord, version: int
490
490
  ) -> list[StorageURI]:
491
491
  dr = self.dataset_rows(dataset, version)
492
- query = dr.select(dr.c("source", object_name="file")).distinct()
492
+ query = dr.select(dr.c("source", column="file")).distinct()
493
493
  cur = self.db.cursor()
494
494
  cur.row_factory = sqlite3.Row # type: ignore[assignment]
495
495
 
@@ -179,7 +179,7 @@ class AbstractWarehouse(ABC, Serializable):
179
179
  self,
180
180
  dataset: DatasetRecord,
181
181
  version: Optional[int] = None,
182
- object_name: str = "file",
182
+ column: str = "file",
183
183
  ):
184
184
  version = version or dataset.latest_version
185
185
 
@@ -188,7 +188,7 @@ class AbstractWarehouse(ABC, Serializable):
188
188
  table_name,
189
189
  self.db,
190
190
  dataset.get_schema(version),
191
- object_name=object_name,
191
+ column=column,
192
192
  )
193
193
 
194
194
  @property
@@ -487,7 +487,7 @@ class AbstractWarehouse(ABC, Serializable):
487
487
  dataset_rows: "DataTable",
488
488
  path_list: list[str],
489
489
  glob_name: str,
490
- object_name="file",
490
+ column="file",
491
491
  ) -> Iterator[Node]:
492
492
  """Finds all Nodes that correspond to GLOB like path pattern."""
493
493
  dr = dataset_rows
@@ -521,7 +521,7 @@ class AbstractWarehouse(ABC, Serializable):
521
521
  de = dr.dir_expansion()
522
522
  q = de.query(
523
523
  dr.select().where(dr.c("is_latest") == true()).subquery(),
524
- object_name=dr.object_name,
524
+ column=dr.column,
525
525
  ).subquery()
526
526
  q = self.expand_query(de, q, dr)
527
527
 
@@ -597,12 +597,10 @@ class AbstractWarehouse(ABC, Serializable):
597
597
  with_default(dr.c("is_latest")),
598
598
  dr.c("last_modified"),
599
599
  with_default(dr.c("size")),
600
- with_default(dr.c("rand", object_name="sys")),
600
+ with_default(dr.c("rand", column="sys")),
601
601
  dr.c("location"),
602
602
  de.c(q, "source"),
603
- ).select_from(
604
- q.outerjoin(dr.table, q.c.sys__id == dr.c("id", object_name="sys"))
605
- )
603
+ ).select_from(q.outerjoin(dr.table, q.c.sys__id == dr.c("id", column="sys")))
606
604
 
607
605
  def get_node_by_path(self, dataset_rows: "DataTable", path: str) -> Node:
608
606
  """Gets node that corresponds to some path"""
@@ -12,6 +12,7 @@ from datachain.dataset import (
12
12
  )
13
13
  from datachain.job import Job
14
14
  from datachain.lib.data_model import DataModel
15
+ from datachain.query.session import Session
15
16
  from datachain.utils import TIME_ZERO
16
17
 
17
18
  if TYPE_CHECKING:
@@ -32,6 +33,10 @@ class DatasetInfo(DataModel):
32
33
  error_message: str = Field(default="")
33
34
  error_stack: str = Field(default="")
34
35
 
36
+ @property
37
+ def is_temp(self) -> bool:
38
+ return Session.is_temp_dataset(self.name)
39
+
35
40
  @staticmethod
36
41
  def _validate_dict(
37
42
  v: Optional[Union[str, dict]],
@@ -21,7 +21,7 @@ def read_csv(
21
21
  delimiter: Optional[str] = None,
22
22
  header: bool = True,
23
23
  output: OutputType = None,
24
- object_name: str = "",
24
+ column: str = "",
25
25
  model_name: str = "",
26
26
  source: bool = True,
27
27
  nrows=None,
@@ -42,7 +42,7 @@ def read_csv(
42
42
  output : Dictionary or feature class defining column names and their
43
43
  corresponding types. List of column names is also accepted, in which
44
44
  case types will be inferred.
45
- object_name : Created object column name.
45
+ column : Created column name.
46
46
  model_name : Generated model name.
47
47
  source : Whether to include info about the source file.
48
48
  nrows : Optional row limit.
@@ -119,7 +119,7 @@ def read_csv(
119
119
  )
120
120
  return chain.parse_tabular(
121
121
  output=output,
122
- object_name=object_name,
122
+ column=column,
123
123
  model_name=model_name,
124
124
  source=source,
125
125
  nrows=nrows,
@@ -357,7 +357,7 @@ class DataChain:
357
357
  self,
358
358
  col: str,
359
359
  model_name: Optional[str] = None,
360
- object_name: Optional[str] = None,
360
+ column: Optional[str] = None,
361
361
  schema_sample_size: int = 1,
362
362
  ) -> "DataChain":
363
363
  """Explodes a column containing JSON objects (dict or str DataChain type) into
@@ -368,7 +368,7 @@ class DataChain:
368
368
  col: the name of the column containing JSON to be exploded.
369
369
  model_name: optional generated model name. By default generates the name
370
370
  automatically.
371
- object_name: optional generated object column name. By default generates the
371
+ column: optional generated column name. By default generates the
372
372
  name automatically.
373
373
  schema_sample_size: the number of rows to use for inferring the schema of
374
374
  the JSON (in case some fields are optional and it's not enough to
@@ -406,10 +406,10 @@ class DataChain:
406
406
  )
407
407
  return model.model_validate(json_dict)
408
408
 
409
- if not object_name:
410
- object_name = f"{col}_expl"
409
+ if not column:
410
+ column = f"{col}_expl"
411
411
 
412
- return self.map(json_to_model, params=col, output={object_name: model})
412
+ return self.map(json_to_model, params=col, output={column: model})
413
413
 
414
414
  @classmethod
415
415
  def datasets(
@@ -1588,7 +1588,7 @@ class DataChain:
1588
1588
  def parse_tabular(
1589
1589
  self,
1590
1590
  output: OutputType = None,
1591
- object_name: str = "",
1591
+ column: str = "",
1592
1592
  model_name: str = "",
1593
1593
  source: bool = True,
1594
1594
  nrows: Optional[int] = None,
@@ -1600,7 +1600,7 @@ class DataChain:
1600
1600
  output : Dictionary or feature class defining column names and their
1601
1601
  corresponding types. List of column names is also accepted, in which
1602
1602
  case types will be inferred.
1603
- object_name : Generated object column name.
1603
+ column : Generated column name.
1604
1604
  model_name : Generated model name.
1605
1605
  source : Whether to include info about the source file.
1606
1606
  nrows : Optional row limit.
@@ -1651,14 +1651,14 @@ class DataChain:
1651
1651
  raise DatasetPrepareError(self.name, e) from e
1652
1652
 
1653
1653
  if isinstance(output, dict):
1654
- model_name = model_name or object_name or ""
1654
+ model_name = model_name or column or ""
1655
1655
  model = dict_to_data_model(model_name, output)
1656
1656
  output = model
1657
1657
  else:
1658
1658
  model = output # type: ignore[assignment]
1659
1659
 
1660
- if object_name:
1661
- output = {object_name: model} # type: ignore[dict-item]
1660
+ if column:
1661
+ output = {column: model} # type: ignore[dict-item]
1662
1662
  elif isinstance(output, type(BaseModel)):
1663
1663
  output = {
1664
1664
  name: info.annotation # type: ignore[misc]
@@ -1,7 +1,4 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Optional,
4
- )
1
+ from typing import TYPE_CHECKING, Optional, get_origin, get_type_hints
5
2
 
6
3
  from datachain.lib.dataset_info import DatasetInfo
7
4
  from datachain.lib.file import (
@@ -102,7 +99,7 @@ def datasets(
102
99
  session: Optional[Session] = None,
103
100
  settings: Optional[dict] = None,
104
101
  in_memory: bool = False,
105
- object_name: str = "dataset",
102
+ column: Optional[str] = None,
106
103
  include_listing: bool = False,
107
104
  studio: bool = False,
108
105
  ) -> "DataChain":
@@ -112,7 +109,8 @@ def datasets(
112
109
  session: Optional session instance. If not provided, uses default session.
113
110
  settings: Optional dictionary of settings to configure the chain.
114
111
  in_memory: If True, creates an in-memory session. Defaults to False.
115
- object_name: Name of the output object in the chain. Defaults to "dataset".
112
+ column: Name of the output column in the chain. Defaults to None which
113
+ means no top level column will be created.
116
114
  include_listing: If True, includes listing datasets. Defaults to False.
117
115
  studio: If True, returns datasets from Studio only,
118
116
  otherwise returns all local datasets. Defaults to False.
@@ -124,7 +122,7 @@ def datasets(
124
122
  ```py
125
123
  import datachain as dc
126
124
 
127
- chain = dc.datasets()
125
+ chain = dc.datasets(column="dataset")
128
126
  for ds in chain.collect("dataset"):
129
127
  print(f"{ds.name}@v{ds.version}")
130
128
  ```
@@ -139,11 +137,32 @@ def datasets(
139
137
  include_listing=include_listing, studio=studio
140
138
  )
141
139
  ]
140
+ datasets_values = [d for d in datasets_values if not d.is_temp]
141
+
142
+ if not column:
143
+ # flattening dataset fields
144
+ schema = {
145
+ k: get_origin(v) if get_origin(v) is dict else v
146
+ for k, v in get_type_hints(DatasetInfo).items()
147
+ if k in DatasetInfo.model_fields
148
+ }
149
+ data = {k: [] for k in DatasetInfo.model_fields} # type: ignore[var-annotated]
150
+ for d in [d.model_dump() for d in datasets_values]:
151
+ for field, value in d.items():
152
+ data[field].append(value)
153
+
154
+ return read_values(
155
+ session=session,
156
+ settings=settings,
157
+ in_memory=in_memory,
158
+ output=schema,
159
+ **data, # type: ignore[arg-type]
160
+ )
142
161
 
143
162
  return read_values(
144
163
  session=session,
145
164
  settings=settings,
146
165
  in_memory=in_memory,
147
- output={object_name: DatasetInfo},
148
- **{object_name: datasets_values}, # type: ignore[arg-type]
166
+ output={column: DatasetInfo},
167
+ **{column: datasets_values}, # type: ignore[arg-type]
149
168
  )
@@ -23,7 +23,7 @@ def read_hf(
23
23
  *args,
24
24
  session: Optional[Session] = None,
25
25
  settings: Optional[dict] = None,
26
- object_name: str = "",
26
+ column: str = "",
27
27
  model_name: str = "",
28
28
  **kwargs,
29
29
  ) -> "DataChain":
@@ -34,7 +34,7 @@ def read_hf(
34
34
  or an instance of `datasets.Dataset`-like object.
35
35
  session : Session to use for the chain.
36
36
  settings : Settings to use for the chain.
37
- object_name : Generated object column name.
37
+ column : Generated object column name.
38
38
  model_name : Generated model name.
39
39
  kwargs : Parameters to pass to datasets.load_dataset.
40
40
 
@@ -62,12 +62,12 @@ def read_hf(
62
62
  if len(ds_dict) > 1:
63
63
  output = {"split": str}
64
64
 
65
- model_name = model_name or object_name or ""
65
+ model_name = model_name or column or ""
66
66
  hf_features = next(iter(ds_dict.values())).features
67
67
  output = output | get_output_schema(hf_features)
68
68
  model = dict_to_data_model(model_name, output)
69
- if object_name:
70
- output = {object_name: model}
69
+ if column:
70
+ output = {column: model}
71
71
 
72
72
  chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
73
73
  return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
@@ -28,7 +28,7 @@ def read_json(
28
28
  spec: Optional[DataType] = None,
29
29
  schema_from: Optional[str] = "auto",
30
30
  jmespath: Optional[str] = None,
31
- object_name: Optional[str] = "",
31
+ column: Optional[str] = "",
32
32
  model_name: Optional[str] = None,
33
33
  format: Optional[str] = "json",
34
34
  nrows=None,
@@ -42,7 +42,7 @@ def read_json(
42
42
  type : read file as "binary", "text", or "image" data. Default is "text".
43
43
  spec : optional Data Model
44
44
  schema_from : path to sample to infer spec (if schema not provided)
45
- object_name : generated object column name
45
+ column : generated column name
46
46
  model_name : optional generated model name
47
47
  format: "json", "jsonl"
48
48
  jmespath : optional JMESPATH expression to reduce JSON
@@ -70,13 +70,13 @@ def read_json(
70
70
  name_end = re.search(r"\W", s).start() if re.search(r"\W", s) else len(s) # type: ignore[union-attr]
71
71
  return s[:name_end]
72
72
 
73
- if (not object_name) and jmespath:
74
- object_name = jmespath_to_name(jmespath)
75
- if not object_name:
76
- object_name = format
73
+ if (not column) and jmespath:
74
+ column = jmespath_to_name(jmespath)
75
+ if not column:
76
+ column = format
77
77
  chain = read_storage(uri=path, type=type, **kwargs)
78
78
  signal_dict = {
79
- object_name: read_meta(
79
+ column: read_meta(
80
80
  schema_from=schema_from,
81
81
  format=format,
82
82
  spec=spec,
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
19
19
  def listings(
20
20
  session: Optional[Session] = None,
21
21
  in_memory: bool = False,
22
- object_name: str = "listing",
22
+ column: str = "listing",
23
23
  **kwargs,
24
24
  ) -> "DataChain":
25
25
  """Generate chain with list of cached listings.
@@ -38,6 +38,6 @@ def listings(
38
38
  return read_values(
39
39
  session=session,
40
40
  in_memory=in_memory,
41
- output={object_name: ListingInfo},
42
- **{object_name: catalog.listings()}, # type: ignore[arg-type]
41
+ output={column: ListingInfo},
42
+ **{column: catalog.listings()}, # type: ignore[arg-type]
43
43
  )
@@ -22,7 +22,7 @@ def read_pandas( # type: ignore[override]
22
22
  session: Optional[Session] = None,
23
23
  settings: Optional[dict] = None,
24
24
  in_memory: bool = False,
25
- object_name: str = "",
25
+ column: str = "",
26
26
  ) -> "DataChain":
27
27
  """Generate chain from pandas data-frame.
28
28
 
@@ -39,18 +39,18 @@ def read_pandas( # type: ignore[override]
39
39
 
40
40
  fr_map = {col.lower(): df[col].tolist() for col in df.columns}
41
41
 
42
- for column in fr_map:
43
- if not column.isidentifier():
42
+ for c in fr_map:
43
+ if not c.isidentifier():
44
44
  raise DatasetPrepareError(
45
45
  name,
46
- f"import from pandas error - '{column}' cannot be a column name",
46
+ f"import from pandas error - '{c}' cannot be a column name",
47
47
  )
48
48
 
49
49
  return read_values(
50
50
  name,
51
51
  session,
52
52
  settings=settings,
53
- object_name=object_name,
53
+ column=column,
54
54
  in_memory=in_memory,
55
55
  **fr_map,
56
56
  )
@@ -19,7 +19,7 @@ def read_parquet(
19
19
  path,
20
20
  partitioning: Any = "hive",
21
21
  output: Optional[dict[str, DataType]] = None,
22
- object_name: str = "",
22
+ column: str = "",
23
23
  model_name: str = "",
24
24
  source: bool = True,
25
25
  session: Optional[Session] = None,
@@ -33,7 +33,7 @@ def read_parquet(
33
33
  as `s3://`, `gs://`, `az://` or "file:///".
34
34
  partitioning : Any pyarrow partitioning schema.
35
35
  output : Dictionary defining column names and their corresponding types.
36
- object_name : Created object column name.
36
+ column : Created column name.
37
37
  model_name : Generated model name.
38
38
  source : Whether to include info about the source file.
39
39
  session : Session to use for the chain.
@@ -57,7 +57,7 @@ def read_parquet(
57
57
  chain = read_storage(path, session=session, settings=settings, **kwargs)
58
58
  return chain.parse_tabular(
59
59
  output=output,
60
- object_name=object_name,
60
+ column=column,
61
61
  model_name=model_name,
62
62
  source=source,
63
63
  format="parquet",