datachain 0.37.7__tar.gz → 0.37.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (491) hide show
  1. {datachain-0.37.7 → datachain-0.37.8}/PKG-INFO +1 -1
  2. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/datachain.py +19 -3
  3. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/dataset.py +22 -5
  4. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/toolkit/split.py +30 -8
  5. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/PKG-INFO +1 -1
  6. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_toolkit.py +34 -4
  7. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_datachain.py +14 -0
  8. {datachain-0.37.7 → datachain-0.37.8}/.cruft.json +0 -0
  9. {datachain-0.37.7 → datachain-0.37.8}/.gitattributes +0 -0
  10. {datachain-0.37.7 → datachain-0.37.8}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  11. {datachain-0.37.7 → datachain-0.37.8}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  12. {datachain-0.37.7 → datachain-0.37.8}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  13. {datachain-0.37.7 → datachain-0.37.8}/.github/codecov.yaml +0 -0
  14. {datachain-0.37.7 → datachain-0.37.8}/.github/dependabot.yml +0 -0
  15. {datachain-0.37.7 → datachain-0.37.8}/.github/workflows/benchmarks.yml +0 -0
  16. {datachain-0.37.7 → datachain-0.37.8}/.github/workflows/release.yml +0 -0
  17. {datachain-0.37.7 → datachain-0.37.8}/.github/workflows/tests-studio.yml +0 -0
  18. {datachain-0.37.7 → datachain-0.37.8}/.github/workflows/tests.yml +0 -0
  19. {datachain-0.37.7 → datachain-0.37.8}/.github/workflows/update-template.yaml +0 -0
  20. {datachain-0.37.7 → datachain-0.37.8}/.gitignore +0 -0
  21. {datachain-0.37.7 → datachain-0.37.8}/.pre-commit-config.yaml +0 -0
  22. {datachain-0.37.7 → datachain-0.37.8}/CODE_OF_CONDUCT.rst +0 -0
  23. {datachain-0.37.7 → datachain-0.37.8}/LICENSE +0 -0
  24. {datachain-0.37.7 → datachain-0.37.8}/README.rst +0 -0
  25. {datachain-0.37.7 → datachain-0.37.8}/docs/api_hooks.py +0 -0
  26. {datachain-0.37.7 → datachain-0.37.8}/docs/assets/captioned_cartoons.png +0 -0
  27. {datachain-0.37.7 → datachain-0.37.8}/docs/assets/datachain-white.svg +0 -0
  28. {datachain-0.37.7 → datachain-0.37.8}/docs/assets/datachain.svg +0 -0
  29. {datachain-0.37.7 → datachain-0.37.8}/docs/assets/webhook_dialog.png +0 -0
  30. {datachain-0.37.7 → datachain-0.37.8}/docs/assets/webhook_list.png +0 -0
  31. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/auth/login.md +0 -0
  32. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/auth/logout.md +0 -0
  33. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/auth/team.md +0 -0
  34. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/auth/token.md +0 -0
  35. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/index.md +0 -0
  36. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/job/cancel.md +0 -0
  37. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/job/clusters.md +0 -0
  38. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/job/logs.md +0 -0
  39. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/job/ls.md +0 -0
  40. {datachain-0.37.7 → datachain-0.37.8}/docs/commands/job/run.md +0 -0
  41. {datachain-0.37.7 → datachain-0.37.8}/docs/contributing.md +0 -0
  42. {datachain-0.37.7 → datachain-0.37.8}/docs/css/github-permalink-style.css +0 -0
  43. {datachain-0.37.7 → datachain-0.37.8}/docs/examples.md +0 -0
  44. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/checkpoints.md +0 -0
  45. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/db_migrations.md +0 -0
  46. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/delta.md +0 -0
  47. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/env.md +0 -0
  48. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/index.md +0 -0
  49. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/namespaces.md +0 -0
  50. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/processing.md +0 -0
  51. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/remotes.md +0 -0
  52. {datachain-0.37.7 → datachain-0.37.8}/docs/guide/retry.md +0 -0
  53. {datachain-0.37.7 → datachain-0.37.8}/docs/index.md +0 -0
  54. {datachain-0.37.7 → datachain-0.37.8}/docs/overrides/main.html +0 -0
  55. {datachain-0.37.7 → datachain-0.37.8}/docs/quick-start.md +0 -0
  56. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/arrowrow.md +0 -0
  57. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/bbox.md +0 -0
  58. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/file.md +0 -0
  59. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/imagefile.md +0 -0
  60. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/index.md +0 -0
  61. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/pose.md +0 -0
  62. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/segment.md +0 -0
  63. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/tarvfile.md +0 -0
  64. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/textfile.md +0 -0
  65. {datachain-0.37.7 → datachain-0.37.8}/docs/references/data-types/videofile.md +0 -0
  66. {datachain-0.37.7 → datachain-0.37.8}/docs/references/datachain.md +0 -0
  67. {datachain-0.37.7 → datachain-0.37.8}/docs/references/func.md +0 -0
  68. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/aggregate.md +0 -0
  69. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/array.md +0 -0
  70. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/conditional.md +0 -0
  71. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/numeric.md +0 -0
  72. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/path.md +0 -0
  73. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/random.md +0 -0
  74. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/string.md +0 -0
  75. {datachain-0.37.7 → datachain-0.37.8}/docs/references/functions/window.md +0 -0
  76. {datachain-0.37.7 → datachain-0.37.8}/docs/references/index.md +0 -0
  77. {datachain-0.37.7 → datachain-0.37.8}/docs/references/toolkit.md +0 -0
  78. {datachain-0.37.7 → datachain-0.37.8}/docs/references/torch.md +0 -0
  79. {datachain-0.37.7 → datachain-0.37.8}/docs/references/udf.md +0 -0
  80. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/api/.gitkeep +0 -0
  81. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/index.md +0 -0
  82. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/ca-certificates.md +0 -0
  83. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/git-forges/bitbucket.md +0 -0
  84. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/git-forges/github.md +0 -0
  85. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/git-forges/gitlab.md +0 -0
  86. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/git-forges/index.md +0 -0
  87. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/index.md +0 -0
  88. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/configuration/ssl-tls.md +0 -0
  89. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/index.md +0 -0
  90. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/installation/aws-ami.md +0 -0
  91. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/installation/index.md +0 -0
  92. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/installation/k8s-helm.md +0 -0
  93. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/troubleshooting/502-errors.md +0 -0
  94. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/troubleshooting/index.md +0 -0
  95. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/troubleshooting/support-bundle.md +0 -0
  96. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/upgrading/airgap-procedure.md +0 -0
  97. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/upgrading/index.md +0 -0
  98. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/self-hosting/upgrading/regular-procedure.md +0 -0
  99. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/account-management.md +0 -0
  100. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/authentication/openid-connect.md +0 -0
  101. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/authentication/single-sign-on.md +0 -0
  102. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/configure-a-project.md +0 -0
  103. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/create-a-project.md +0 -0
  104. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/explore-ml-experiments.md +0 -0
  105. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/index.md +0 -0
  106. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/live-metrics-and-plots.md +0 -0
  107. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/run-experiments.md +0 -0
  108. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/share-a-project.md +0 -0
  109. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/experiments/visualize-and-compare.md +0 -0
  110. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/git-connections/custom-gitlab-server.md +0 -0
  111. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/git-connections/github-app.md +0 -0
  112. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/git-connections/index.md +0 -0
  113. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/index.md +0 -0
  114. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/jobs/create-and-run.md +0 -0
  115. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/jobs/index.md +0 -0
  116. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/jobs/monitor-jobs.md +0 -0
  117. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/add-a-model.md +0 -0
  118. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/assign-stage.md +0 -0
  119. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/register-version.md +0 -0
  120. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/remove-a-model-or-its-details.md +0 -0
  121. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/use-models.md +0 -0
  122. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/model-registry/view-and-compare-models.md +0 -0
  123. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/team-collaboration.md +0 -0
  124. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/user-guide/troubleshooting.md +0 -0
  125. {datachain-0.37.7 → datachain-0.37.8}/docs/studio/webhooks.md +0 -0
  126. {datachain-0.37.7 → datachain-0.37.8}/docs/templates/main.dot +0 -0
  127. {datachain-0.37.7 → datachain-0.37.8}/docs/templates/operation.dot +0 -0
  128. {datachain-0.37.7 → datachain-0.37.8}/docs/templates/responses.def +0 -0
  129. {datachain-0.37.7 → datachain-0.37.8}/docs/tutorials.md +0 -0
  130. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  131. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  132. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/openimage-detect.py +0 -0
  133. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/ultralytics-bbox.py +0 -0
  134. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/ultralytics-pose.py +0 -0
  135. {datachain-0.37.7 → datachain-0.37.8}/examples/computer_vision/ultralytics-segment.py +0 -0
  136. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/common_sql_functions.py +0 -0
  137. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/json-csv-reader.py +0 -0
  138. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/nested_datamodel.py +0 -0
  139. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/torch-loader.py +0 -0
  140. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/udfs/parallel.py +0 -0
  141. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/udfs/simple.py +0 -0
  142. {datachain-0.37.7 → datachain-0.37.8}/examples/get_started/udfs/stateful.py +0 -0
  143. {datachain-0.37.7 → datachain-0.37.8}/examples/incremental_processing/delta.py +0 -0
  144. {datachain-0.37.7 → datachain-0.37.8}/examples/incremental_processing/retry.py +0 -0
  145. {datachain-0.37.7 → datachain-0.37.8}/examples/incremental_processing/utils.py +0 -0
  146. {datachain-0.37.7 → datachain-0.37.8}/examples/llm_and_nlp/claude-query.py +0 -0
  147. {datachain-0.37.7 → datachain-0.37.8}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  148. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/audio-to-text.py +0 -0
  149. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/clip_inference.py +0 -0
  150. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/hf_pipeline.py +0 -0
  151. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/openai_image_desc_lib.py +0 -0
  152. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/wds.py +0 -0
  153. {datachain-0.37.7 → datachain-0.37.8}/examples/multimodal/wds_filtered.py +0 -0
  154. {datachain-0.37.7 → datachain-0.37.8}/mkdocs.yml +0 -0
  155. {datachain-0.37.7 → datachain-0.37.8}/noxfile.py +0 -0
  156. {datachain-0.37.7 → datachain-0.37.8}/pyproject.toml +0 -0
  157. {datachain-0.37.7 → datachain-0.37.8}/setup.cfg +0 -0
  158. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/__init__.py +0 -0
  159. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/__main__.py +0 -0
  160. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/asyn.py +0 -0
  161. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cache.py +0 -0
  162. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/catalog/__init__.py +0 -0
  163. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/catalog/catalog.py +0 -0
  164. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/catalog/datasource.py +0 -0
  165. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/catalog/dependency.py +0 -0
  166. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/catalog/loader.py +0 -0
  167. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/checkpoint.py +0 -0
  168. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/__init__.py +0 -0
  169. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/__init__.py +0 -0
  170. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/datasets.py +0 -0
  171. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/du.py +0 -0
  172. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/index.py +0 -0
  173. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/ls.py +0 -0
  174. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/misc.py +0 -0
  175. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/query.py +0 -0
  176. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/commands/show.py +0 -0
  177. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/parser/__init__.py +0 -0
  178. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/parser/job.py +0 -0
  179. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/parser/studio.py +0 -0
  180. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/parser/utils.py +0 -0
  181. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/cli/utils.py +0 -0
  182. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/__init__.py +0 -0
  183. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/azure.py +0 -0
  184. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/fileslice.py +0 -0
  185. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/fsspec.py +0 -0
  186. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/gcs.py +0 -0
  187. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/hf.py +0 -0
  188. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/http.py +0 -0
  189. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/local.py +0 -0
  190. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/client/s3.py +0 -0
  191. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/config.py +0 -0
  192. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/__init__.py +0 -0
  193. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/db_engine.py +0 -0
  194. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/job.py +0 -0
  195. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/metastore.py +0 -0
  196. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/schema.py +0 -0
  197. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/serializer.py +0 -0
  198. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/sqlite.py +0 -0
  199. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/data_storage/warehouse.py +0 -0
  200. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/dataset.py +0 -0
  201. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/delta.py +0 -0
  202. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/diff/__init__.py +0 -0
  203. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/error.py +0 -0
  204. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/fs/__init__.py +0 -0
  205. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/fs/reference.py +0 -0
  206. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/fs/utils.py +0 -0
  207. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/__init__.py +0 -0
  208. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/aggregate.py +0 -0
  209. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/array.py +0 -0
  210. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/base.py +0 -0
  211. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/conditional.py +0 -0
  212. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/func.py +0 -0
  213. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/numeric.py +0 -0
  214. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/path.py +0 -0
  215. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/random.py +0 -0
  216. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/string.py +0 -0
  217. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/func/window.py +0 -0
  218. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/hash_utils.py +0 -0
  219. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/job.py +0 -0
  220. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/__init__.py +0 -0
  221. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/arrow.py +0 -0
  222. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/audio.py +0 -0
  223. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/clip.py +0 -0
  224. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/__init__.py +0 -0
  225. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/flatten.py +0 -0
  226. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/python_to_sql.py +0 -0
  227. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/sql_to_python.py +0 -0
  228. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/unflatten.py +0 -0
  229. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  230. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/data_model.py +0 -0
  231. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dataset_info.py +0 -0
  232. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/__init__.py +0 -0
  233. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/csv.py +0 -0
  234. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/database.py +0 -0
  235. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/datasets.py +0 -0
  236. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/hf.py +0 -0
  237. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/json.py +0 -0
  238. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/listings.py +0 -0
  239. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/pandas.py +0 -0
  240. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/parquet.py +0 -0
  241. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/records.py +0 -0
  242. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/storage.py +0 -0
  243. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/storage_pattern.py +0 -0
  244. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/utils.py +0 -0
  245. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/dc/values.py +0 -0
  246. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/file.py +0 -0
  247. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/hf.py +0 -0
  248. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/image.py +0 -0
  249. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/listing.py +0 -0
  250. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/listing_info.py +0 -0
  251. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/meta_formats.py +0 -0
  252. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/model_store.py +0 -0
  253. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/namespaces.py +0 -0
  254. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/projects.py +0 -0
  255. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/pytorch.py +0 -0
  256. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/settings.py +0 -0
  257. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/signal_schema.py +0 -0
  258. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/tar.py +0 -0
  259. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/text.py +0 -0
  260. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/udf.py +0 -0
  261. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/udf_signature.py +0 -0
  262. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/utils.py +0 -0
  263. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/video.py +0 -0
  264. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/webdataset.py +0 -0
  265. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/lib/webdataset_laion.py +0 -0
  266. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/listing.py +0 -0
  267. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/__init__.py +0 -0
  268. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/bbox.py +0 -0
  269. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/pose.py +0 -0
  270. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/segment.py +0 -0
  271. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/ultralytics/__init__.py +0 -0
  272. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/ultralytics/bbox.py +0 -0
  273. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/ultralytics/pose.py +0 -0
  274. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/ultralytics/segment.py +0 -0
  275. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/model/utils.py +0 -0
  276. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/namespace.py +0 -0
  277. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/node.py +0 -0
  278. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/nodes_fetcher.py +0 -0
  279. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/nodes_thread_pool.py +0 -0
  280. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/plugins.py +0 -0
  281. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/progress.py +0 -0
  282. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/project.py +0 -0
  283. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/py.typed +0 -0
  284. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/__init__.py +0 -0
  285. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/batch.py +0 -0
  286. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/dispatch.py +0 -0
  287. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/metrics.py +0 -0
  288. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/params.py +0 -0
  289. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/queue.py +0 -0
  290. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/schema.py +0 -0
  291. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/session.py +0 -0
  292. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/query/udf.py +0 -0
  293. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/remote/__init__.py +0 -0
  294. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/remote/studio.py +0 -0
  295. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/script_meta.py +0 -0
  296. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/semver.py +0 -0
  297. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/__init__.py +0 -0
  298. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/default/__init__.py +0 -0
  299. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/default/base.py +0 -0
  300. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/__init__.py +0 -0
  301. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/aggregate.py +0 -0
  302. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/array.py +0 -0
  303. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/conditional.py +0 -0
  304. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/numeric.py +0 -0
  305. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/path.py +0 -0
  306. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/random.py +0 -0
  307. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/functions/string.py +0 -0
  308. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/postgresql_dialect.py +0 -0
  309. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/postgresql_types.py +0 -0
  310. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/selectable.py +0 -0
  311. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/sqlite/__init__.py +0 -0
  312. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/sqlite/base.py +0 -0
  313. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/sqlite/types.py +0 -0
  314. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/sqlite/vector.py +0 -0
  315. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/types.py +0 -0
  316. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/sql/utils.py +0 -0
  317. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/studio.py +0 -0
  318. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/telemetry.py +0 -0
  319. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/toolkit/__init__.py +0 -0
  320. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/torch/__init__.py +0 -0
  321. {datachain-0.37.7 → datachain-0.37.8}/src/datachain/utils.py +0 -0
  322. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/SOURCES.txt +0 -0
  323. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/dependency_links.txt +0 -0
  324. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/entry_points.txt +0 -0
  325. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/requires.txt +0 -0
  326. {datachain-0.37.7 → datachain-0.37.8}/src/datachain.egg-info/top_level.txt +0 -0
  327. {datachain-0.37.7 → datachain-0.37.8}/tests/__init__.py +0 -0
  328. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/__init__.py +0 -0
  329. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/conftest.py +0 -0
  330. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  331. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/datasets/.dvc/config +0 -0
  332. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/datasets/.gitignore +0 -0
  333. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  334. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/test_datachain.py +0 -0
  335. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/test_ls.py +0 -0
  336. {datachain-0.37.7 → datachain-0.37.8}/tests/benchmarks/test_version.py +0 -0
  337. {datachain-0.37.7 → datachain-0.37.8}/tests/conftest.py +0 -0
  338. {datachain-0.37.7 → datachain-0.37.8}/tests/data.py +0 -0
  339. {datachain-0.37.7 → datachain-0.37.8}/tests/examples/__init__.py +0 -0
  340. {datachain-0.37.7 → datachain-0.37.8}/tests/examples/test_examples.py +0 -0
  341. {datachain-0.37.7 → datachain-0.37.8}/tests/examples/test_wds_e2e.py +0 -0
  342. {datachain-0.37.7 → datachain-0.37.8}/tests/examples/wds_data.py +0 -0
  343. {datachain-0.37.7 → datachain-0.37.8}/tests/func/__init__.py +0 -0
  344. {datachain-0.37.7 → datachain-0.37.8}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  345. {datachain-0.37.7 → datachain-0.37.8}/tests/func/data/lena.jpg +0 -0
  346. {datachain-0.37.7 → datachain-0.37.8}/tests/func/fake-service-account-credentials.json +0 -0
  347. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/__init__.py +0 -0
  348. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_aggregate.py +0 -0
  349. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_array.py +0 -0
  350. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_conditional.py +0 -0
  351. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_numeric.py +0 -0
  352. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_path.py +0 -0
  353. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_random.py +0 -0
  354. {datachain-0.37.7 → datachain-0.37.8}/tests/func/functions/test_string.py +0 -0
  355. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/__init__.py +0 -0
  356. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/data/running-mask0.png +0 -0
  357. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/data/running-mask1.png +0 -0
  358. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/data/running.jpg +0 -0
  359. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/data/ships.jpg +0 -0
  360. {datachain-0.37.7 → datachain-0.37.8}/tests/func/model/test_yolo.py +0 -0
  361. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_audio.py +0 -0
  362. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_catalog.py +0 -0
  363. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_checkpoints.py +0 -0
  364. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_client.py +0 -0
  365. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_cloud_transfer.py +0 -0
  366. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_data_storage.py +0 -0
  367. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_datachain.py +0 -0
  368. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_datachain_merge.py +0 -0
  369. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_dataset_query.py +0 -0
  370. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_datasets.py +0 -0
  371. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_delta.py +0 -0
  372. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_feature_pickling.py +0 -0
  373. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_file.py +0 -0
  374. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_hf.py +0 -0
  375. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_hidden_field.py +0 -0
  376. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_image.py +0 -0
  377. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_listing.py +0 -0
  378. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_ls.py +0 -0
  379. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_meta_formats.py +0 -0
  380. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_metastore.py +0 -0
  381. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_metrics.py +0 -0
  382. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_mutate.py +0 -0
  383. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_pull.py +0 -0
  384. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_pytorch.py +0 -0
  385. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_query.py +0 -0
  386. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_read_database.py +0 -0
  387. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_read_dataset_remote.py +0 -0
  388. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  389. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_retry.py +0 -0
  390. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_session.py +0 -0
  391. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_storage_pattern.py +0 -0
  392. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_studio_datetime_parsing.py +0 -0
  393. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_temp_table_tracking.py +0 -0
  394. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_to_database.py +0 -0
  395. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_udf.py +0 -0
  396. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_union.py +0 -0
  397. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_video.py +0 -0
  398. {datachain-0.37.7 → datachain-0.37.8}/tests/func/test_warehouse.py +0 -0
  399. {datachain-0.37.7 → datachain-0.37.8}/tests/scripts/feature_class.py +0 -0
  400. {datachain-0.37.7 → datachain-0.37.8}/tests/scripts/feature_class_exception.py +0 -0
  401. {datachain-0.37.7 → datachain-0.37.8}/tests/scripts/feature_class_parallel.py +0 -0
  402. {datachain-0.37.7 → datachain-0.37.8}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  403. {datachain-0.37.7 → datachain-0.37.8}/tests/scripts/name_len_slow.py +0 -0
  404. {datachain-0.37.7 → datachain-0.37.8}/tests/test_atomicity.py +0 -0
  405. {datachain-0.37.7 → datachain-0.37.8}/tests/test_cli_e2e.py +0 -0
  406. {datachain-0.37.7 → datachain-0.37.8}/tests/test_cli_studio.py +0 -0
  407. {datachain-0.37.7 → datachain-0.37.8}/tests/test_import_time.py +0 -0
  408. {datachain-0.37.7 → datachain-0.37.8}/tests/test_job_management_e2e.py +0 -0
  409. {datachain-0.37.7 → datachain-0.37.8}/tests/test_query_e2e.py +0 -0
  410. {datachain-0.37.7 → datachain-0.37.8}/tests/test_telemetry.py +0 -0
  411. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/__init__.py +0 -0
  412. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/__init__.py +0 -0
  413. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/conftest.py +0 -0
  414. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_arrow.py +0 -0
  415. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_audio.py +0 -0
  416. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_checkpoints.py +0 -0
  417. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_clip.py +0 -0
  418. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  419. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_datachain_merge.py +0 -0
  420. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_diff.py +0 -0
  421. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_feature.py +0 -0
  422. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_feature_utils.py +0 -0
  423. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_file.py +0 -0
  424. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_hf.py +0 -0
  425. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_image.py +0 -0
  426. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_listing_info.py +0 -0
  427. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_namespace.py +0 -0
  428. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_partition_by.py +0 -0
  429. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_project.py +0 -0
  430. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_python_to_sql.py +0 -0
  431. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_schema.py +0 -0
  432. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_settings.py +0 -0
  433. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_signal_schema.py +0 -0
  434. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_sql_to_python.py +0 -0
  435. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_storage_pattern.py +0 -0
  436. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_text.py +0 -0
  437. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_udf.py +0 -0
  438. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_udf_signature.py +0 -0
  439. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_utils.py +0 -0
  440. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/lib/test_webdataset.py +0 -0
  441. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/model/__init__.py +0 -0
  442. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/model/test_bbox.py +0 -0
  443. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/model/test_pose.py +0 -0
  444. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/model/test_segment.py +0 -0
  445. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/model/test_utils.py +0 -0
  446. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/__init__.py +0 -0
  447. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/sqlite/__init__.py +0 -0
  448. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/sqlite/test_types.py +0 -0
  449. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/sqlite/test_utils.py +0 -0
  450. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_array.py +0 -0
  451. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_conditional.py +0 -0
  452. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_path.py +0 -0
  453. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_random.py +0 -0
  454. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_selectable.py +0 -0
  455. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/sql/test_string.py +0 -0
  456. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_asyn.py +0 -0
  457. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_batching.py +0 -0
  458. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_cache.py +0 -0
  459. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_catalog.py +0 -0
  460. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_catalog_loader.py +0 -0
  461. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_cli_datasets.py +0 -0
  462. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_cli_parsing.py +0 -0
  463. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_client.py +0 -0
  464. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_client_gcs.py +0 -0
  465. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_client_http.py +0 -0
  466. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_client_s3.py +0 -0
  467. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_config.py +0 -0
  468. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_data_storage.py +0 -0
  469. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_database_engine.py +0 -0
  470. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_datachain_hash.py +0 -0
  471. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_dataset.py +0 -0
  472. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_dispatch.py +0 -0
  473. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_fileslice.py +0 -0
  474. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_func.py +0 -0
  475. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_hash_utils.py +0 -0
  476. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_job_management.py +0 -0
  477. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_listing.py +0 -0
  478. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_metastore.py +0 -0
  479. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_module_exports.py +0 -0
  480. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_pytorch.py +0 -0
  481. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_query.py +0 -0
  482. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_query_metrics.py +0 -0
  483. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_query_params.py +0 -0
  484. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_query_steps_hash.py +0 -0
  485. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_script_meta.py +0 -0
  486. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_semver.py +0 -0
  487. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_serializer.py +0 -0
  488. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_session.py +0 -0
  489. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_utils.py +0 -0
  490. {datachain-0.37.7 → datachain-0.37.8}/tests/unit/test_warehouse.py +0 -0
  491. {datachain-0.37.7 → datachain-0.37.8}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.7
3
+ Version: 0.37.8
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -52,7 +52,11 @@ from datachain.lib.udf_signature import UdfSignature
52
52
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
53
53
  from datachain.project import Project
54
54
  from datachain.query import Session
55
- from datachain.query.dataset import DatasetQuery, PartitionByType
55
+ from datachain.query.dataset import (
56
+ DatasetQuery,
57
+ PartitionByType,
58
+ RegenerateSystemColumns,
59
+ )
56
60
  from datachain.query.schema import DEFAULT_DELIMITER, Column
57
61
  from datachain.sql.functions import path as pathfunc
58
62
  from datachain.utils import batched_it, env2bool, inside_notebook, row_to_nested_dict
@@ -2740,8 +2744,20 @@ class DataChain:
2740
2744
  )
2741
2745
 
2742
2746
  def shuffle(self) -> "Self":
2743
- """Shuffle the rows of the chain deterministically."""
2744
- return self.order_by("sys.rand")
2747
+ """Shuffle rows with a best-effort deterministic ordering.
2748
+
2749
+ This produces repeatable shuffles. Merge and union operations can
2750
+ lead to non-deterministic results. Use order by or save a dataset
2751
+ afterward to guarantee the same result.
2752
+ """
2753
+ query = self._query.clone(new_table=False)
2754
+ query.steps.append(RegenerateSystemColumns(self._query.catalog))
2755
+
2756
+ chain = self._evolve(
2757
+ query=query,
2758
+ signal_schema=SignalSchema({"sys": Sys}) | self.signals_schema,
2759
+ )
2760
+ return chain.order_by("sys.rand")
2745
2761
 
2746
2762
  def sample(self, n: int) -> "Self":
2747
2763
  """Return a random sample from the chain.
@@ -786,10 +786,31 @@ class SQLClause(Step, ABC):
786
786
  return tuple(c.get_column() if isinstance(c, Function) else c for c in cols)
787
787
 
788
788
  @abstractmethod
789
- def apply_sql_clause(self, query):
789
+ def apply_sql_clause(self, query: Any) -> Any:
790
790
  pass
791
791
 
792
792
 
793
+ @frozen
794
+ class RegenerateSystemColumns(Step):
795
+ catalog: "Catalog"
796
+
797
+ def hash_inputs(self) -> str:
798
+ return hashlib.sha256(b"regenerate_system_columns").hexdigest()
799
+
800
+ def apply(
801
+ self, query_generator: QueryGenerator, temp_tables: list[str]
802
+ ) -> StepResult:
803
+ query = query_generator.select()
804
+ new_query = self.catalog.warehouse._regenerate_system_columns(
805
+ query, keep_existing_columns=True
806
+ )
807
+
808
+ def q(*columns):
809
+ return new_query.with_only_columns(*columns)
810
+
811
+ return step_result(q, new_query.selected_columns)
812
+
813
+
793
814
  @frozen
794
815
  class SQLSelect(SQLClause):
795
816
  args: tuple[Function | ColumnElement, ...]
@@ -1488,10 +1509,6 @@ class DatasetQuery:
1488
1509
  finally:
1489
1510
  self.cleanup()
1490
1511
 
1491
- def shuffle(self) -> "Self":
1492
- # ToDo: implement shaffle based on seed and/or generating random column
1493
- return self.order_by(C.sys__rand)
1494
-
1495
1512
  def sample(self, n) -> "Self":
1496
1513
  """
1497
1514
  Return a random sample from the dataset.
@@ -1,6 +1,7 @@
1
1
  import random
2
2
 
3
3
  from datachain import C, DataChain
4
+ from datachain.lib.signal_schema import SignalResolvingError
4
5
 
5
6
  RESOLUTION = 2**31 - 1 # Maximum positive value for a 32-bit signed integer.
6
7
 
@@ -59,7 +60,10 @@ def train_test_split(
59
60
  ```
60
61
 
61
62
  Note:
62
- The splits are random but deterministic, based on Dataset `sys__rand` field.
63
+ Splits reuse the same best-effort shuffle used by `DataChain.shuffle`. Results
64
+ are typically repeatable, but earlier operations such as `merge`, `union`, or
65
+ custom SQL that reshuffle rows can change the outcome between runs. Add order by
66
+ stable keys first when you need strict reproducibility.
63
67
  """
64
68
  if len(weights) < 2:
65
69
  raise ValueError("Weights should have at least two elements")
@@ -68,16 +72,34 @@ def train_test_split(
68
72
 
69
73
  weights_normalized = [weight / sum(weights) for weight in weights]
70
74
 
75
+ try:
76
+ dc.signals_schema.resolve("sys.rand")
77
+ except SignalResolvingError:
78
+ dc = dc.persist()
79
+
71
80
  rand_col = C("sys.rand")
72
81
  if seed is not None:
73
82
  uniform_seed = random.Random(seed).randrange(1, RESOLUTION) # noqa: S311
74
83
  rand_col = (rand_col % RESOLUTION) * uniform_seed # type: ignore[assignment]
75
84
  rand_col = rand_col % RESOLUTION # type: ignore[assignment]
76
85
 
77
- return [
78
- dc.filter(
79
- rand_col >= round(sum(weights_normalized[:index]) * (RESOLUTION - 1)),
80
- rand_col < round(sum(weights_normalized[: index + 1]) * (RESOLUTION - 1)),
81
- )
82
- for index, _ in enumerate(weights_normalized)
83
- ]
86
+ boundaries: list[int] = [0]
87
+ cumulative = 0.0
88
+ for weight in weights_normalized[:-1]:
89
+ cumulative += weight
90
+ boundary = round(cumulative * RESOLUTION)
91
+ boundaries.append(min(boundary, RESOLUTION))
92
+ boundaries.append(RESOLUTION)
93
+
94
+ splits: list[DataChain] = []
95
+ last_index = len(weights_normalized) - 1
96
+ for index in range(len(weights_normalized)):
97
+ lower = boundaries[index]
98
+ if index == last_index:
99
+ condition = rand_col >= lower
100
+ else:
101
+ upper = boundaries[index + 1]
102
+ condition = (rand_col >= lower) & (rand_col < upper)
103
+ splits.append(dc.filter(condition))
104
+
105
+ return splits
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.7
3
+ Version: 0.37.8
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,6 @@
1
1
  import pytest
2
2
 
3
+ import datachain as dc
3
4
  from datachain.toolkit import train_test_split
4
5
 
5
6
 
@@ -18,8 +19,8 @@ def test_train_test_split_not_random(not_random_ds, seed, weights, expected):
18
19
  res = train_test_split(not_random_ds, weights, seed=seed)
19
20
  assert len(res) == len(expected)
20
21
 
21
- for i, dc in enumerate(res):
22
- assert dc.to_values("sys.id") == expected[i]
22
+ for i, chain in enumerate(res):
23
+ assert chain.to_values("sys.id") == expected[i]
23
24
 
24
25
 
25
26
  @pytest.mark.parametrize(
@@ -40,8 +41,8 @@ def test_train_test_split_random(pseudo_random_ds, seed, weights, expected):
40
41
  res = train_test_split(pseudo_random_ds, weights, seed=seed)
41
42
  assert len(res) == len(expected)
42
43
 
43
- for i, dc in enumerate(res):
44
- assert dc.to_values("sys.id") == expected[i]
44
+ for i, chain in enumerate(res):
45
+ assert chain.to_values("sys.id") == expected[i]
45
46
 
46
47
 
47
48
  def test_train_test_split_errors(not_random_ds):
@@ -49,3 +50,32 @@ def test_train_test_split_errors(not_random_ds):
49
50
  train_test_split(not_random_ds, [0.5])
50
51
  with pytest.raises(ValueError, match="Weights should be non-negative"):
51
52
  train_test_split(not_random_ds, [-1, 1])
53
+
54
+
55
+ def test_split_after_merge(test_session):
56
+ left = dc.read_values(ids=[1, 2, 3, 4], session=test_session)
57
+ right = dc.read_values(
58
+ ids=[1, 2, 3, 4],
59
+ extra=["a", "b", "c", "d"],
60
+ session=test_session,
61
+ )
62
+
63
+ merged = left.merge(right, on="ids")
64
+
65
+ train, test = train_test_split(merged, [0.5, 0.5])
66
+
67
+ for split in (train, test):
68
+ sys_schema = split.signals_schema.resolve("sys.id", "sys.rand").values
69
+ assert sys_schema["sys.id"] is int
70
+ assert sys_schema["sys.rand"] is int
71
+
72
+ combined_rows = set(train.to_list("ids", "extra")) | set(
73
+ test.to_list("ids", "extra")
74
+ )
75
+
76
+ assert combined_rows == {
77
+ (1, "a"),
78
+ (2, "b"),
79
+ (3, "c"),
80
+ (4, "d"),
81
+ }
@@ -1235,6 +1235,20 @@ def test_persist_restores_sys_signals_after_merge(test_session):
1235
1235
  assert sys_schema["sys.rand"] is int
1236
1236
 
1237
1237
 
1238
+ def test_shuffle_after_merge(test_session):
1239
+ left = dc.read_values(ids=[1, 2], session=test_session)
1240
+ right = dc.read_values(ids=[1, 2], extra=["x", "y"], session=test_session)
1241
+
1242
+ shuffled = left.merge(right, on="ids").shuffle()
1243
+
1244
+ sys_schema = shuffled.signals_schema.resolve("sys.id", "sys.rand").values
1245
+ assert sys_schema["sys.id"] is int
1246
+ assert sys_schema["sys.rand"] is int
1247
+
1248
+ rows = set(shuffled.to_list("ids", "extra"))
1249
+ assert rows == {(1, "x"), (2, "y")}
1250
+
1251
+
1238
1252
  def test_unsupported_output_type(test_session):
1239
1253
  vector = [3.14, 2.72, 1.62]
1240
1254
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes