datachain 0.31.3__tar.gz → 0.31.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (428) hide show
  1. {datachain-0.31.3 → datachain-0.31.4}/PKG-INFO +1 -1
  2. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/catalog.py +22 -58
  3. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/file.py +95 -18
  4. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/PKG-INFO +1 -1
  5. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_file.py +68 -0
  6. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query.py +3 -22
  7. {datachain-0.31.3 → datachain-0.31.4}/.cruft.json +0 -0
  8. {datachain-0.31.3 → datachain-0.31.4}/.gitattributes +0 -0
  9. {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  10. {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  11. {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  12. {datachain-0.31.3 → datachain-0.31.4}/.github/codecov.yaml +0 -0
  13. {datachain-0.31.3 → datachain-0.31.4}/.github/dependabot.yml +0 -0
  14. {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/benchmarks.yml +0 -0
  15. {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/release.yml +0 -0
  16. {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/tests-studio.yml +0 -0
  17. {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/tests.yml +0 -0
  18. {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/update-template.yaml +0 -0
  19. {datachain-0.31.3 → datachain-0.31.4}/.gitignore +0 -0
  20. {datachain-0.31.3 → datachain-0.31.4}/.pre-commit-config.yaml +0 -0
  21. {datachain-0.31.3 → datachain-0.31.4}/CODE_OF_CONDUCT.rst +0 -0
  22. {datachain-0.31.3 → datachain-0.31.4}/LICENSE +0 -0
  23. {datachain-0.31.3 → datachain-0.31.4}/README.rst +0 -0
  24. {datachain-0.31.3 → datachain-0.31.4}/docs/api_hooks.py +0 -0
  25. {datachain-0.31.3 → datachain-0.31.4}/docs/assets/captioned_cartoons.png +0 -0
  26. {datachain-0.31.3 → datachain-0.31.4}/docs/assets/datachain-white.svg +0 -0
  27. {datachain-0.31.3 → datachain-0.31.4}/docs/assets/datachain.svg +0 -0
  28. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/login.md +0 -0
  29. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/logout.md +0 -0
  30. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/team.md +0 -0
  31. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/token.md +0 -0
  32. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/index.md +0 -0
  33. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/cancel.md +0 -0
  34. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/clusters.md +0 -0
  35. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/logs.md +0 -0
  36. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/ls.md +0 -0
  37. {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/run.md +0 -0
  38. {datachain-0.31.3 → datachain-0.31.4}/docs/contributing.md +0 -0
  39. {datachain-0.31.3 → datachain-0.31.4}/docs/css/github-permalink-style.css +0 -0
  40. {datachain-0.31.3 → datachain-0.31.4}/docs/examples.md +0 -0
  41. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/db_migrations.md +0 -0
  42. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/delta.md +0 -0
  43. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/env.md +0 -0
  44. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/index.md +0 -0
  45. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/namespaces.md +0 -0
  46. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/processing.md +0 -0
  47. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/remotes.md +0 -0
  48. {datachain-0.31.3 → datachain-0.31.4}/docs/guide/retry.md +0 -0
  49. {datachain-0.31.3 → datachain-0.31.4}/docs/index.md +0 -0
  50. {datachain-0.31.3 → datachain-0.31.4}/docs/overrides/main.html +0 -0
  51. {datachain-0.31.3 → datachain-0.31.4}/docs/quick-start.md +0 -0
  52. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/arrowrow.md +0 -0
  53. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/bbox.md +0 -0
  54. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/file.md +0 -0
  55. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/imagefile.md +0 -0
  56. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/index.md +0 -0
  57. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/pose.md +0 -0
  58. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/segment.md +0 -0
  59. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/tarvfile.md +0 -0
  60. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/textfile.md +0 -0
  61. {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/videofile.md +0 -0
  62. {datachain-0.31.3 → datachain-0.31.4}/docs/references/datachain.md +0 -0
  63. {datachain-0.31.3 → datachain-0.31.4}/docs/references/func.md +0 -0
  64. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/aggregate.md +0 -0
  65. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/array.md +0 -0
  66. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/conditional.md +0 -0
  67. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/numeric.md +0 -0
  68. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/path.md +0 -0
  69. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/random.md +0 -0
  70. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/string.md +0 -0
  71. {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/window.md +0 -0
  72. {datachain-0.31.3 → datachain-0.31.4}/docs/references/index.md +0 -0
  73. {datachain-0.31.3 → datachain-0.31.4}/docs/references/toolkit.md +0 -0
  74. {datachain-0.31.3 → datachain-0.31.4}/docs/references/torch.md +0 -0
  75. {datachain-0.31.3 → datachain-0.31.4}/docs/references/udf.md +0 -0
  76. {datachain-0.31.3 → datachain-0.31.4}/docs/studio/api/.gitkeep +0 -0
  77. {datachain-0.31.3 → datachain-0.31.4}/docs/templates/main.dot +0 -0
  78. {datachain-0.31.3 → datachain-0.31.4}/docs/templates/operation.dot +0 -0
  79. {datachain-0.31.3 → datachain-0.31.4}/docs/templates/responses.def +0 -0
  80. {datachain-0.31.3 → datachain-0.31.4}/docs/tutorials.md +0 -0
  81. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  82. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  83. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/openimage-detect.py +0 -0
  84. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
  85. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-pose.py +0 -0
  86. {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-segment.py +0 -0
  87. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/common_sql_functions.py +0 -0
  88. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/json-csv-reader.py +0 -0
  89. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/nested_datamodel.py +0 -0
  90. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/torch-loader.py +0 -0
  91. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/parallel.py +0 -0
  92. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/simple.py +0 -0
  93. {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/stateful.py +0 -0
  94. {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/delta.py +0 -0
  95. {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/retry.py +0 -0
  96. {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/utils.py +0 -0
  97. {datachain-0.31.3 → datachain-0.31.4}/examples/llm_and_nlp/claude-query.py +0 -0
  98. {datachain-0.31.3 → datachain-0.31.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  99. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/audio-to-text.py +0 -0
  100. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/clip_inference.py +0 -0
  101. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/hf_pipeline.py +0 -0
  102. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  103. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/wds.py +0 -0
  104. {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/wds_filtered.py +0 -0
  105. {datachain-0.31.3 → datachain-0.31.4}/mkdocs.yml +0 -0
  106. {datachain-0.31.3 → datachain-0.31.4}/noxfile.py +0 -0
  107. {datachain-0.31.3 → datachain-0.31.4}/pyproject.toml +0 -0
  108. {datachain-0.31.3 → datachain-0.31.4}/setup.cfg +0 -0
  109. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/__init__.py +0 -0
  110. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/__main__.py +0 -0
  111. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/asyn.py +0 -0
  112. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cache.py +0 -0
  113. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/__init__.py +0 -0
  114. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/datasource.py +0 -0
  115. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/loader.py +0 -0
  116. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/__init__.py +0 -0
  117. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/__init__.py +0 -0
  118. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/datasets.py +0 -0
  119. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/du.py +0 -0
  120. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/index.py +0 -0
  121. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/ls.py +0 -0
  122. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/misc.py +0 -0
  123. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/query.py +0 -0
  124. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/show.py +0 -0
  125. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/__init__.py +0 -0
  126. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/job.py +0 -0
  127. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/studio.py +0 -0
  128. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/utils.py +0 -0
  129. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/utils.py +0 -0
  130. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/__init__.py +0 -0
  131. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/azure.py +0 -0
  132. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/fileslice.py +0 -0
  133. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/fsspec.py +0 -0
  134. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/gcs.py +0 -0
  135. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/hf.py +0 -0
  136. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/local.py +0 -0
  137. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/s3.py +0 -0
  138. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/config.py +0 -0
  139. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/__init__.py +0 -0
  140. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/db_engine.py +0 -0
  141. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/job.py +0 -0
  142. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/metastore.py +0 -0
  143. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/schema.py +0 -0
  144. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/serializer.py +0 -0
  145. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/sqlite.py +0 -0
  146. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/warehouse.py +0 -0
  147. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/dataset.py +0 -0
  148. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/delta.py +0 -0
  149. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/diff/__init__.py +0 -0
  150. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/error.py +0 -0
  151. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/__init__.py +0 -0
  152. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/reference.py +0 -0
  153. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/utils.py +0 -0
  154. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/__init__.py +0 -0
  155. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/aggregate.py +0 -0
  156. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/array.py +0 -0
  157. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/base.py +0 -0
  158. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/conditional.py +0 -0
  159. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/func.py +0 -0
  160. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/numeric.py +0 -0
  161. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/path.py +0 -0
  162. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/random.py +0 -0
  163. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/string.py +0 -0
  164. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/window.py +0 -0
  165. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/job.py +0 -0
  166. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/__init__.py +0 -0
  167. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/arrow.py +0 -0
  168. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/audio.py +0 -0
  169. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/clip.py +0 -0
  170. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/__init__.py +0 -0
  171. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/flatten.py +0 -0
  172. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  173. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  174. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/unflatten.py +0 -0
  175. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  176. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/data_model.py +0 -0
  177. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dataset_info.py +0 -0
  178. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/__init__.py +0 -0
  179. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/csv.py +0 -0
  180. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/database.py +0 -0
  181. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/datachain.py +0 -0
  182. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/datasets.py +0 -0
  183. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/hf.py +0 -0
  184. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/json.py +0 -0
  185. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/listings.py +0 -0
  186. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/pandas.py +0 -0
  187. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/parquet.py +0 -0
  188. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/records.py +0 -0
  189. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/storage.py +0 -0
  190. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/storage_pattern.py +0 -0
  191. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/utils.py +0 -0
  192. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/values.py +0 -0
  193. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/hf.py +0 -0
  194. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/image.py +0 -0
  195. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/listing.py +0 -0
  196. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/listing_info.py +0 -0
  197. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/meta_formats.py +0 -0
  198. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/model_store.py +0 -0
  199. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/namespaces.py +0 -0
  200. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/projects.py +0 -0
  201. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/pytorch.py +0 -0
  202. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/settings.py +0 -0
  203. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/signal_schema.py +0 -0
  204. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/tar.py +0 -0
  205. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/text.py +0 -0
  206. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/udf.py +0 -0
  207. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/udf_signature.py +0 -0
  208. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/utils.py +0 -0
  209. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/video.py +0 -0
  210. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/webdataset.py +0 -0
  211. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/webdataset_laion.py +0 -0
  212. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/listing.py +0 -0
  213. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/__init__.py +0 -0
  214. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/bbox.py +0 -0
  215. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/pose.py +0 -0
  216. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/segment.py +0 -0
  217. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/__init__.py +0 -0
  218. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/bbox.py +0 -0
  219. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/pose.py +0 -0
  220. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/segment.py +0 -0
  221. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/utils.py +0 -0
  222. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/namespace.py +0 -0
  223. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/node.py +0 -0
  224. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/nodes_fetcher.py +0 -0
  225. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/nodes_thread_pool.py +0 -0
  226. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/progress.py +0 -0
  227. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/project.py +0 -0
  228. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/py.typed +0 -0
  229. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/__init__.py +0 -0
  230. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/batch.py +0 -0
  231. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/dataset.py +0 -0
  232. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/dispatch.py +0 -0
  233. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/metrics.py +0 -0
  234. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/params.py +0 -0
  235. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/queue.py +0 -0
  236. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/schema.py +0 -0
  237. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/session.py +0 -0
  238. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/udf.py +0 -0
  239. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/utils.py +0 -0
  240. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/remote/__init__.py +0 -0
  241. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/remote/studio.py +0 -0
  242. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/script_meta.py +0 -0
  243. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/semver.py +0 -0
  244. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/__init__.py +0 -0
  245. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/default/__init__.py +0 -0
  246. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/default/base.py +0 -0
  247. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/__init__.py +0 -0
  248. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/aggregate.py +0 -0
  249. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/array.py +0 -0
  250. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/conditional.py +0 -0
  251. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/numeric.py +0 -0
  252. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/path.py +0 -0
  253. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/random.py +0 -0
  254. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/string.py +0 -0
  255. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/postgresql_dialect.py +0 -0
  256. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/postgresql_types.py +0 -0
  257. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/selectable.py +0 -0
  258. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  259. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/base.py +0 -0
  260. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/types.py +0 -0
  261. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/vector.py +0 -0
  262. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/types.py +0 -0
  263. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/utils.py +0 -0
  264. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/studio.py +0 -0
  265. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/telemetry.py +0 -0
  266. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/toolkit/__init__.py +0 -0
  267. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/toolkit/split.py +0 -0
  268. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/torch/__init__.py +0 -0
  269. {datachain-0.31.3 → datachain-0.31.4}/src/datachain/utils.py +0 -0
  270. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/SOURCES.txt +0 -0
  271. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  272. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/entry_points.txt +0 -0
  273. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/requires.txt +0 -0
  274. {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/top_level.txt +0 -0
  275. {datachain-0.31.3 → datachain-0.31.4}/tests/__init__.py +0 -0
  276. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/__init__.py +0 -0
  277. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/conftest.py +0 -0
  278. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  279. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  280. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.gitignore +0 -0
  281. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  282. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_datachain.py +0 -0
  283. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_ls.py +0 -0
  284. {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_version.py +0 -0
  285. {datachain-0.31.3 → datachain-0.31.4}/tests/conftest.py +0 -0
  286. {datachain-0.31.3 → datachain-0.31.4}/tests/data.py +0 -0
  287. {datachain-0.31.3 → datachain-0.31.4}/tests/examples/__init__.py +0 -0
  288. {datachain-0.31.3 → datachain-0.31.4}/tests/examples/test_examples.py +0 -0
  289. {datachain-0.31.3 → datachain-0.31.4}/tests/examples/test_wds_e2e.py +0 -0
  290. {datachain-0.31.3 → datachain-0.31.4}/tests/examples/wds_data.py +0 -0
  291. {datachain-0.31.3 → datachain-0.31.4}/tests/func/__init__.py +0 -0
  292. {datachain-0.31.3 → datachain-0.31.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  293. {datachain-0.31.3 → datachain-0.31.4}/tests/func/data/lena.jpg +0 -0
  294. {datachain-0.31.3 → datachain-0.31.4}/tests/func/fake-service-account-credentials.json +0 -0
  295. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/__init__.py +0 -0
  296. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_aggregate.py +0 -0
  297. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_array.py +0 -0
  298. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_conditional.py +0 -0
  299. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_numeric.py +0 -0
  300. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_path.py +0 -0
  301. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_random.py +0 -0
  302. {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_string.py +0 -0
  303. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/__init__.py +0 -0
  304. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running-mask0.png +0 -0
  305. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running-mask1.png +0 -0
  306. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running.jpg +0 -0
  307. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/ships.jpg +0 -0
  308. {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/test_yolo.py +0 -0
  309. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_audio.py +0 -0
  310. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_batching.py +0 -0
  311. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_catalog.py +0 -0
  312. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_client.py +0 -0
  313. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_cloud_transfer.py +0 -0
  314. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_data_storage.py +0 -0
  315. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datachain.py +0 -0
  316. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datachain_merge.py +0 -0
  317. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_dataset_query.py +0 -0
  318. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datasets.py +0 -0
  319. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_delta.py +0 -0
  320. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_feature_pickling.py +0 -0
  321. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_hf.py +0 -0
  322. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_hidden_field.py +0 -0
  323. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_image.py +0 -0
  324. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_listing.py +0 -0
  325. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_ls.py +0 -0
  326. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_meta_formats.py +0 -0
  327. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_metastore.py +0 -0
  328. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_metrics.py +0 -0
  329. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_mutate.py +0 -0
  330. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_pull.py +0 -0
  331. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_pytorch.py +0 -0
  332. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_query.py +0 -0
  333. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_database.py +0 -0
  334. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_dataset_remote.py +0 -0
  335. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  336. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_retry.py +0 -0
  337. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_session.py +0 -0
  338. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_storage_pattern.py +0 -0
  339. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_studio_datetime_parsing.py +0 -0
  340. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_to_database.py +0 -0
  341. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_toolkit.py +0 -0
  342. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_video.py +0 -0
  343. {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_warehouse.py +0 -0
  344. {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class.py +0 -0
  345. {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_exception.py +0 -0
  346. {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_parallel.py +0 -0
  347. {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  348. {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/name_len_slow.py +0 -0
  349. {datachain-0.31.3 → datachain-0.31.4}/tests/test_atomicity.py +0 -0
  350. {datachain-0.31.3 → datachain-0.31.4}/tests/test_cli_e2e.py +0 -0
  351. {datachain-0.31.3 → datachain-0.31.4}/tests/test_cli_studio.py +0 -0
  352. {datachain-0.31.3 → datachain-0.31.4}/tests/test_import_time.py +0 -0
  353. {datachain-0.31.3 → datachain-0.31.4}/tests/test_query_e2e.py +0 -0
  354. {datachain-0.31.3 → datachain-0.31.4}/tests/test_telemetry.py +0 -0
  355. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/__init__.py +0 -0
  356. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/__init__.py +0 -0
  357. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/conftest.py +0 -0
  358. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_arrow.py +0 -0
  359. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_audio.py +0 -0
  360. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_clip.py +0 -0
  361. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain.py +0 -0
  362. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  363. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  364. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_diff.py +0 -0
  365. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_feature.py +0 -0
  366. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_feature_utils.py +0 -0
  367. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_file.py +0 -0
  368. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_hf.py +0 -0
  369. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_image.py +0 -0
  370. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_listing_info.py +0 -0
  371. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_namespace.py +0 -0
  372. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_partition_by.py +0 -0
  373. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_project.py +0 -0
  374. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_python_to_sql.py +0 -0
  375. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_schema.py +0 -0
  376. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_settings.py +0 -0
  377. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_signal_schema.py +0 -0
  378. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  379. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_storage_pattern.py +0 -0
  380. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_text.py +0 -0
  381. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_udf.py +0 -0
  382. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_udf_signature.py +0 -0
  383. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_utils.py +0 -0
  384. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_webdataset.py +0 -0
  385. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/__init__.py +0 -0
  386. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_bbox.py +0 -0
  387. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_pose.py +0 -0
  388. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_segment.py +0 -0
  389. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_utils.py +0 -0
  390. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/__init__.py +0 -0
  391. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  392. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/test_types.py +0 -0
  393. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  394. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_array.py +0 -0
  395. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_conditional.py +0 -0
  396. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_path.py +0 -0
  397. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_random.py +0 -0
  398. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_selectable.py +0 -0
  399. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_string.py +0 -0
  400. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_asyn.py +0 -0
  401. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cache.py +0 -0
  402. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_catalog.py +0 -0
  403. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_catalog_loader.py +0 -0
  404. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cli_datasets.py +0 -0
  405. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cli_parsing.py +0 -0
  406. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client.py +0 -0
  407. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client_gcs.py +0 -0
  408. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client_s3.py +0 -0
  409. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_config.py +0 -0
  410. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_data_storage.py +0 -0
  411. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_database_engine.py +0 -0
  412. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_dataset.py +0 -0
  413. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_dispatch.py +0 -0
  414. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_fileslice.py +0 -0
  415. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_func.py +0 -0
  416. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_listing.py +0 -0
  417. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_metastore.py +0 -0
  418. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_module_exports.py +0 -0
  419. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_pytorch.py +0 -0
  420. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query_metrics.py +0 -0
  421. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query_params.py +0 -0
  422. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_script_meta.py +0 -0
  423. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_semver.py +0 -0
  424. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_serializer.py +0 -0
  425. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_session.py +0 -0
  426. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_utils.py +0 -0
  427. {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_warehouse.py +0 -0
  428. {datachain-0.31.3 → datachain-0.31.4}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.3
3
+ Version: 0.31.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -144,26 +144,19 @@ def shutdown_process(
144
144
  return proc.wait()
145
145
 
146
146
 
147
- def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
147
+ def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
148
148
  buffer = b""
149
+ while byt := stream.read(1): # Read one byte at a time
150
+ buffer += byt
149
151
 
150
- try:
151
- while byt := stream.read(1): # Read one byte at a time
152
- buffer += byt
153
-
154
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
155
- line = buffer.decode("utf-8", errors="replace")
156
- callback(line)
157
- buffer = b"" # Clear buffer for the next line
158
-
159
- if buffer: # Handle any remaining data in the buffer
160
- line = buffer.decode("utf-8", errors="replace")
152
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
153
+ line = buffer.decode("utf-8")
161
154
  callback(line)
162
- finally:
163
- try:
164
- stream.close() # Ensure output is closed
165
- except Exception: # noqa: BLE001, S110
166
- pass
155
+ buffer = b"" # Clear buffer for next line
156
+
157
+ if buffer: # Handle any remaining data in the buffer
158
+ line = buffer.decode("utf-8")
159
+ callback(line)
167
160
 
168
161
 
169
162
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1767,13 +1760,13 @@ class Catalog:
1767
1760
  recursive=recursive,
1768
1761
  )
1769
1762
 
1770
- @staticmethod
1771
1763
  def query(
1764
+ self,
1772
1765
  query_script: str,
1773
1766
  env: Optional[Mapping[str, str]] = None,
1774
1767
  python_executable: str = sys.executable,
1775
- stdout_callback: Optional[Callable[[str], None]] = None,
1776
- stderr_callback: Optional[Callable[[str], None]] = None,
1768
+ capture_output: bool = False,
1769
+ output_hook: Callable[[str], None] = noop,
1777
1770
  params: Optional[dict[str, str]] = None,
1778
1771
  job_id: Optional[str] = None,
1779
1772
  interrupt_timeout: Optional[int] = None,
@@ -1788,18 +1781,13 @@ class Catalog:
1788
1781
  },
1789
1782
  )
1790
1783
  popen_kwargs: dict[str, Any] = {}
1791
-
1792
- if stdout_callback is not None:
1793
- popen_kwargs = {"stdout": subprocess.PIPE}
1794
- if stderr_callback is not None:
1795
- popen_kwargs["stderr"] = subprocess.PIPE
1784
+ if capture_output:
1785
+ popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1796
1786
 
1797
1787
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1798
1788
  raise TerminationSignal(sig)
1799
1789
 
1800
- stdout_thread: Optional[Thread] = None
1801
- stderr_thread: Optional[Thread] = None
1802
-
1790
+ thread: Optional[Thread] = None
1803
1791
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1804
1792
  logger.info("Starting process %s", proc.pid)
1805
1793
 
@@ -1813,20 +1801,10 @@ class Catalog:
1813
1801
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1814
1802
  signal.signal(signal.SIGTERM, raise_termination_signal)
1815
1803
  try:
1816
- if stdout_callback is not None:
1817
- stdout_thread = Thread(
1818
- target=process_output,
1819
- args=(proc.stdout, stdout_callback),
1820
- daemon=True,
1821
- )
1822
- stdout_thread.start()
1823
- if stderr_callback is not None:
1824
- stderr_thread = Thread(
1825
- target=process_output,
1826
- args=(proc.stderr, stderr_callback),
1827
- daemon=True,
1828
- )
1829
- stderr_thread.start()
1804
+ if capture_output:
1805
+ args = (proc.stdout, output_hook)
1806
+ thread = Thread(target=_process_stream, args=args, daemon=True)
1807
+ thread.start()
1830
1808
 
1831
1809
  proc.wait()
1832
1810
  except TerminationSignal as exc:
@@ -1844,22 +1822,8 @@ class Catalog:
1844
1822
  finally:
1845
1823
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1846
1824
  signal.signal(signal.SIGINT, orig_sigint_handler)
1847
- # wait for the reader thread
1848
- thread_join_timeout_seconds = 30
1849
- if stdout_thread is not None:
1850
- stdout_thread.join(timeout=thread_join_timeout_seconds)
1851
- if stdout_thread.is_alive():
1852
- logger.warning(
1853
- "stdout thread is still alive after %s seconds",
1854
- thread_join_timeout_seconds,
1855
- )
1856
- if stderr_thread is not None:
1857
- stderr_thread.join(timeout=thread_join_timeout_seconds)
1858
- if stderr_thread.is_alive():
1859
- logger.warning(
1860
- "stderr thread is still alive after %s seconds",
1861
- thread_join_timeout_seconds,
1862
- )
1825
+ if thread:
1826
+ thread.join() # wait for the reader thread
1863
1827
 
1864
1828
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1865
1829
  if proc.returncode in (
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
35
35
  from datachain.catalog import Catalog
36
36
  from datachain.client.fsspec import Client
37
37
  from datachain.dataset import RowDict
38
+ from datachain.query.session import Session
38
39
 
39
40
  sha256 = partial(hashlib.sha256, usedforsecurity=False)
40
41
 
@@ -252,6 +253,15 @@ class File(DataModel):
252
253
  "last_modified",
253
254
  ]
254
255
 
256
+ # Allowed kwargs we forward to TextIOWrapper
257
+ _TEXT_WRAPPER_ALLOWED: ClassVar[tuple[str, ...]] = (
258
+ "encoding",
259
+ "errors",
260
+ "newline",
261
+ "line_buffering",
262
+ "write_through",
263
+ )
264
+
255
265
  @staticmethod
256
266
  def _validate_dict(
257
267
  v: Optional[Union[str, dict, list[dict]]],
@@ -328,7 +338,6 @@ class File(DataModel):
328
338
  from datachain.catalog.loader import get_catalog
329
339
 
330
340
  catalog = get_catalog()
331
-
332
341
  from datachain.client.fsspec import Client
333
342
 
334
343
  client_cls = Client.get_implementation(path)
@@ -341,6 +350,27 @@ class File(DataModel):
341
350
  file._set_stream(catalog)
342
351
  return file
343
352
 
353
+ @classmethod
354
+ def at(cls, uri: str, session: Optional["Session"] = None) -> "Self":
355
+ """Construct a File from a full URI in one call.
356
+
357
+ Example:
358
+ file = File.at("s3://bucket/path/to/output.png")
359
+ with file.open("wb") as f: ...
360
+ """
361
+ from datachain.client.fsspec import Client
362
+ from datachain.query.session import Session
363
+
364
+ if session is None:
365
+ session = Session.get()
366
+ catalog = session.catalog
367
+
368
+ client_cls = Client.get_implementation(uri)
369
+ source, rel_path = client_cls.split_url(uri)
370
+ file = cls(source=client_cls.get_uri(source), path=rel_path)
371
+ file._set_stream(catalog)
372
+ return file
373
+
344
374
  @classmethod
345
375
  def _from_row(cls, row: "RowDict") -> "Self":
346
376
  return cls(**{key: row[key] for key in cls._datachain_column_types})
@@ -354,28 +384,70 @@ class File(DataModel):
354
384
  return str(PurePosixPath(self.path).parent)
355
385
 
356
386
  @contextmanager
357
- def open(self, mode: Literal["rb", "r"] = "rb") -> Iterator[Any]:
358
- """Open the file and return a file object."""
359
- if self.location:
360
- with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
361
- yield f
387
+ def open(self, mode: str = "rb", **open_kwargs) -> Iterator[Any]:
388
+ """Open the file and return a file-like object.
362
389
 
363
- else:
390
+ Supports both read ("rb", "r") and write modes (e.g. "wb", "w", "ab").
391
+ When opened in a write mode, metadata is refreshed after closing.
392
+ """
393
+ writing = any(ch in mode for ch in "wax+")
394
+ if self.location and writing:
395
+ raise VFileError(
396
+ "Writing to virtual file is not supported",
397
+ self.source,
398
+ self.path,
399
+ )
400
+
401
+ if self._catalog is None:
402
+ raise RuntimeError("Cannot open file: catalog is not set")
403
+
404
+ client: Client = self._catalog.get_client(self.source)
405
+
406
+ if not writing:
407
+ if self.location:
408
+ with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
409
+ yield self._wrap_text(f, mode, open_kwargs)
410
+ return
364
411
  if self._caching_enabled:
365
412
  self.ensure_cached()
366
- client: Client = self._catalog.get_client(self.source)
367
413
  with client.open_object(
368
414
  self, use_cache=self._caching_enabled, cb=self._download_cb
369
415
  ) as f:
370
- yield io.TextIOWrapper(f) if mode == "r" else f
416
+ yield self._wrap_text(f, mode, open_kwargs)
417
+ return
418
+
419
+ # write path
420
+ full_path = client.get_full_path(self.get_path_normalized())
421
+ with client.fs.open(full_path, mode, **open_kwargs) as f:
422
+ yield self._wrap_text(f, mode, open_kwargs)
423
+
424
+ # refresh metadata
425
+ info = client.fs.info(full_path)
426
+ refreshed = client.info_to_file(info, self.get_path_normalized())
427
+ for k, v in refreshed.model_dump().items():
428
+ setattr(self, k, v)
429
+
430
+ def _wrap_text(self, f: Any, mode: str, open_kwargs: dict[str, Any]) -> Any:
431
+ """Return stream possibly wrapped for text."""
432
+ if "b" in mode or isinstance(f, io.TextIOBase):
433
+ return f
434
+ filtered = {
435
+ k: open_kwargs[k] for k in self._TEXT_WRAPPER_ALLOWED if k in open_kwargs
436
+ }
437
+ return io.TextIOWrapper(f, **filtered)
371
438
 
372
439
  def read_bytes(self, length: int = -1):
373
440
  """Returns file contents as bytes."""
374
441
  with self.open() as stream:
375
442
  return stream.read(length)
376
443
 
377
- def read_text(self):
378
- """Returns file contents as text."""
444
+ def read_text(self, **open_kwargs):
445
+ """Return file contents decoded as text.
446
+
447
+ **open_kwargs : Any
448
+ Extra keyword arguments forwarded to ``open(mode="r", ...)``
449
+ (e.g. ``encoding="utf-8"``, ``errors="ignore"``)
450
+ """
379
451
  if self.location:
380
452
  raise VFileError(
381
453
  "Reading text from virtual file is not supported",
@@ -383,7 +455,7 @@ class File(DataModel):
383
455
  self.path,
384
456
  )
385
457
 
386
- with self.open(mode="r") as stream:
458
+ with self.open(mode="r", **open_kwargs) as stream:
387
459
  return stream.read()
388
460
 
389
461
  def read(self, length: int = -1):
@@ -701,14 +773,19 @@ class TextFile(File):
701
773
  """`DataModel` for reading text files."""
702
774
 
703
775
  @contextmanager
704
- def open(self, mode: Literal["rb", "r"] = "r"):
705
- """Open the file and return a file object (default to text mode)."""
706
- with super().open(mode=mode) as stream:
776
+ def open(self, mode: str = "r", **open_kwargs) -> Iterator[Any]:
777
+ """Open the file and return a file-like object.
778
+ Default to text mode"""
779
+ with super().open(mode=mode, **open_kwargs) as stream:
707
780
  yield stream
708
781
 
709
- def read_text(self):
710
- """Returns file contents as text."""
711
- with self.open() as stream:
782
+ def read_text(self, **open_kwargs):
783
+ """Return file contents as text.
784
+
785
+ **open_kwargs : Any
786
+ Extra keyword arguments forwarded to ``open()`` (e.g. encoding).
787
+ """
788
+ with self.open(**open_kwargs) as stream:
712
789
  return stream.read()
713
790
 
714
791
  def save(self, destination: str, client_config: Optional[dict] = None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.3
3
+ Version: 0.31.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -1,9 +1,12 @@
1
+ import io
2
+
1
3
  import pytest
2
4
  import pytz
3
5
 
4
6
  import datachain as dc
5
7
  from datachain.data_storage.sqlite import SQLiteWarehouse
6
8
  from datachain.lib.file import File, FileError
9
+ from datachain.query import C
7
10
  from datachain.utils import TIME_ZERO
8
11
 
9
12
 
@@ -91,3 +94,68 @@ def test_upload(cloud_test_catalog):
91
94
  assert f.read() == img_bytes
92
95
 
93
96
  client.fs.rm(dest, recursive=True)
97
+
98
+
99
+ def test_open_write_binary(cloud_test_catalog):
100
+ ctc = cloud_test_catalog
101
+ catalog = ctc.catalog
102
+ src_uri = ctc.src_uri
103
+ data = b"hello via open()"
104
+ file_path = f"{src_uri}/test-open-write-bytes.bin"
105
+
106
+ file = File.at(file_path, ctc.session)
107
+ with file.open("wb") as f:
108
+ f.write(data)
109
+
110
+ assert file.size == len(data)
111
+ assert file.read() == data
112
+
113
+ # Query storage for exactly that relative path.
114
+ # Metadata already refreshed by open() write path.
115
+ rel_path = file.path
116
+ chain = dc.read_storage(src_uri, session=ctc.session).filter(
117
+ C("file.path") == rel_path
118
+ )
119
+ results = list(chain.to_values("file"))
120
+ assert len(results) == 1
121
+ match = results[0]
122
+ for field_name in File.model_fields:
123
+ if field_name == "last_modified":
124
+ # Allow up to 1s difference across backends
125
+ # (some backends don't keep microsecond precision, we keep it simple here)
126
+ assert match.last_modified.timestamp() == pytest.approx(
127
+ file.last_modified.timestamp(), abs=1
128
+ )
129
+ else:
130
+ assert getattr(match, field_name) == getattr(file, field_name), (
131
+ f"Mismatch in field '{field_name}'"
132
+ )
133
+
134
+ catalog.get_client(src_uri).fs.rm(file_path)
135
+
136
+
137
+ def test_open_write_text(cloud_test_catalog):
138
+ ctc = cloud_test_catalog
139
+ catalog = ctc.catalog
140
+ src_uri = ctc.src_uri
141
+ file_path = f"{src_uri}/test-open-write-text.txt"
142
+ # Unicode content to exercise non-default (utf-16) encoding round trip
143
+ content = "Привет Мир\nSecond line"
144
+
145
+ file = File.at(file_path, ctc.session)
146
+ with file.open("w", encoding="utf-16-le") as f:
147
+ written_chars = f.write(content)
148
+
149
+ assert written_chars == len(content)
150
+ assert file.read_text(encoding="utf-16-le") == content
151
+
152
+ # Compute expected byte size using identical TextIOWrapper logic
153
+ buf = io.BytesIO()
154
+ tw = io.TextIOWrapper(buf, encoding="utf-16-le")
155
+ tw.write(content)
156
+ tw.flush()
157
+ expected_size = len(buf.getvalue())
158
+ tw.close()
159
+ assert file.size == expected_size
160
+
161
+ catalog.get_client(src_uri).fs.rm(file_path)
@@ -42,31 +42,12 @@ def test_args(catalog, mock_popen):
42
42
  mock_popen.assert_called_once_with(["mypython", "-c", "pass"], env=expected_env)
43
43
 
44
44
 
45
- def test_capture_stdout(catalog, mock_popen):
46
- mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
47
- stdout = []
48
-
49
- catalog.query("pass", stdout_callback=stdout.append)
50
- assert stdout == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
51
-
52
-
53
- def test_capture_stderr(catalog, mock_popen):
54
- mock_popen.stderr = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
55
- stderr = []
56
-
57
- catalog.query("pass", stderr_callback=stderr.append)
58
- assert stderr == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
59
-
60
-
61
45
  def test_capture_output(catalog, mock_popen):
62
46
  mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
63
- mock_popen.stderr = io.BytesIO(b"foo\nbar")
64
- stdout = []
65
- stderr = []
47
+ lines = []
66
48
 
67
- catalog.query("pass", stdout_callback=stdout.append, stderr_callback=stderr.append)
68
- assert stdout == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
69
- assert stderr == ["foo\n", "bar"]
49
+ catalog.query("pass", capture_output=True, output_hook=lines.append)
50
+ assert lines == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
70
51
 
71
52
 
72
53
  def test_canceled_by_user(catalog, mock_popen):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes