datachain 0.13.0__tar.gz → 0.13.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (341) hide show
  1. {datachain-0.13.0 → datachain-0.13.1}/PKG-INFO +3 -2
  2. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/catalog.py +13 -0
  3. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/diff/__init__.py +8 -5
  4. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/PKG-INFO +3 -2
  5. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datachain.py +10 -1
  6. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_diff.py +89 -56
  7. {datachain-0.13.0 → datachain-0.13.1}/.cruft.json +0 -0
  8. {datachain-0.13.0 → datachain-0.13.1}/.gitattributes +0 -0
  9. {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  10. {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  11. {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  12. {datachain-0.13.0 → datachain-0.13.1}/.github/codecov.yaml +0 -0
  13. {datachain-0.13.0 → datachain-0.13.1}/.github/dependabot.yml +0 -0
  14. {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/benchmarks.yml +0 -0
  15. {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/release.yml +0 -0
  16. {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/tests-studio.yml +0 -0
  17. {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/tests.yml +0 -0
  18. {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/update-template.yaml +0 -0
  19. {datachain-0.13.0 → datachain-0.13.1}/.gitignore +0 -0
  20. {datachain-0.13.0 → datachain-0.13.1}/.pre-commit-config.yaml +0 -0
  21. {datachain-0.13.0 → datachain-0.13.1}/CODE_OF_CONDUCT.rst +0 -0
  22. {datachain-0.13.0 → datachain-0.13.1}/LICENSE +0 -0
  23. {datachain-0.13.0 → datachain-0.13.1}/README.rst +0 -0
  24. {datachain-0.13.0 → datachain-0.13.1}/docs/assets/captioned_cartoons.png +0 -0
  25. {datachain-0.13.0 → datachain-0.13.1}/docs/assets/datachain-white.svg +0 -0
  26. {datachain-0.13.0 → datachain-0.13.1}/docs/assets/datachain.svg +0 -0
  27. {datachain-0.13.0 → datachain-0.13.1}/docs/contributing.md +0 -0
  28. {datachain-0.13.0 → datachain-0.13.1}/docs/css/github-permalink-style.css +0 -0
  29. {datachain-0.13.0 → datachain-0.13.1}/docs/examples.md +0 -0
  30. {datachain-0.13.0 → datachain-0.13.1}/docs/index.md +0 -0
  31. {datachain-0.13.0 → datachain-0.13.1}/docs/overrides/main.html +0 -0
  32. {datachain-0.13.0 → datachain-0.13.1}/docs/quick-start.md +0 -0
  33. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/arrowrow.md +0 -0
  34. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/bbox.md +0 -0
  35. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/file.md +0 -0
  36. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/imagefile.md +0 -0
  37. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/index.md +0 -0
  38. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/pose.md +0 -0
  39. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/segment.md +0 -0
  40. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/tarvfile.md +0 -0
  41. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/textfile.md +0 -0
  42. {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/videofile.md +0 -0
  43. {datachain-0.13.0 → datachain-0.13.1}/docs/references/datachain.md +0 -0
  44. {datachain-0.13.0 → datachain-0.13.1}/docs/references/func.md +0 -0
  45. {datachain-0.13.0 → datachain-0.13.1}/docs/references/index.md +0 -0
  46. {datachain-0.13.0 → datachain-0.13.1}/docs/references/remotes.md +0 -0
  47. {datachain-0.13.0 → datachain-0.13.1}/docs/references/toolkit.md +0 -0
  48. {datachain-0.13.0 → datachain-0.13.1}/docs/references/torch.md +0 -0
  49. {datachain-0.13.0 → datachain-0.13.1}/docs/references/udf.md +0 -0
  50. {datachain-0.13.0 → datachain-0.13.1}/docs/tutorials.md +0 -0
  51. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  52. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  53. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/openimage-detect.py +0 -0
  54. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  55. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  56. {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  57. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/common_sql_functions.py +0 -0
  58. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/json-csv-reader.py +0 -0
  59. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/torch-loader.py +0 -0
  60. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/parallel.py +0 -0
  61. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/simple.py +0 -0
  62. {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/stateful.py +0 -0
  63. {datachain-0.13.0 → datachain-0.13.1}/examples/llm_and_nlp/claude-query.py +0 -0
  64. {datachain-0.13.0 → datachain-0.13.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  65. {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/clip_inference.py +0 -0
  66. {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/hf_pipeline.py +0 -0
  67. {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  68. {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/wds.py +0 -0
  69. {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/wds_filtered.py +0 -0
  70. {datachain-0.13.0 → datachain-0.13.1}/mkdocs.yml +0 -0
  71. {datachain-0.13.0 → datachain-0.13.1}/noxfile.py +0 -0
  72. {datachain-0.13.0 → datachain-0.13.1}/pyproject.toml +0 -0
  73. {datachain-0.13.0 → datachain-0.13.1}/setup.cfg +0 -0
  74. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/__init__.py +0 -0
  75. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/__main__.py +0 -0
  76. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/asyn.py +0 -0
  77. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cache.py +0 -0
  78. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/__init__.py +0 -0
  79. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/datasource.py +0 -0
  80. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/loader.py +0 -0
  81. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/__init__.py +0 -0
  82. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/__init__.py +0 -0
  83. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/datasets.py +0 -0
  84. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/du.py +0 -0
  85. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/index.py +0 -0
  86. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/ls.py +0 -0
  87. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/misc.py +0 -0
  88. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/query.py +0 -0
  89. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/show.py +0 -0
  90. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/__init__.py +0 -0
  91. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/job.py +0 -0
  92. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/studio.py +0 -0
  93. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/utils.py +0 -0
  94. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/utils.py +0 -0
  95. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/__init__.py +0 -0
  96. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/azure.py +0 -0
  97. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/fileslice.py +0 -0
  98. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/fsspec.py +0 -0
  99. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/gcs.py +0 -0
  100. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/hf.py +0 -0
  101. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/local.py +0 -0
  102. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/s3.py +0 -0
  103. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/config.py +0 -0
  104. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/__init__.py +0 -0
  105. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/db_engine.py +0 -0
  106. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/job.py +0 -0
  107. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/metastore.py +0 -0
  108. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/schema.py +0 -0
  109. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/serializer.py +0 -0
  110. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/sqlite.py +0 -0
  111. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/warehouse.py +0 -0
  112. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/dataset.py +0 -0
  113. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/error.py +0 -0
  114. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/__init__.py +0 -0
  115. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/reference.py +0 -0
  116. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/utils.py +0 -0
  117. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/__init__.py +0 -0
  118. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/aggregate.py +0 -0
  119. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/array.py +0 -0
  120. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/base.py +0 -0
  121. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/conditional.py +0 -0
  122. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/func.py +0 -0
  123. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/numeric.py +0 -0
  124. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/path.py +0 -0
  125. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/random.py +0 -0
  126. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/string.py +0 -0
  127. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/window.py +0 -0
  128. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/job.py +0 -0
  129. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/__init__.py +0 -0
  130. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/arrow.py +0 -0
  131. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/clip.py +0 -0
  132. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/__init__.py +0 -0
  133. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/flatten.py +0 -0
  134. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  135. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  136. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/unflatten.py +0 -0
  137. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  138. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/data_model.py +0 -0
  139. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/dataset_info.py +0 -0
  140. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/dc.py +0 -0
  141. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/file.py +0 -0
  142. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/hf.py +0 -0
  143. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/image.py +0 -0
  144. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/listing.py +0 -0
  145. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/listing_info.py +0 -0
  146. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/meta_formats.py +0 -0
  147. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/model_store.py +0 -0
  148. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/pytorch.py +0 -0
  149. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/settings.py +0 -0
  150. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/signal_schema.py +0 -0
  151. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/tar.py +0 -0
  152. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/text.py +0 -0
  153. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/udf.py +0 -0
  154. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/udf_signature.py +0 -0
  155. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/utils.py +0 -0
  156. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/video.py +0 -0
  157. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/webdataset.py +0 -0
  158. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/webdataset_laion.py +0 -0
  159. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/listing.py +0 -0
  160. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/__init__.py +0 -0
  161. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/bbox.py +0 -0
  162. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/pose.py +0 -0
  163. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/segment.py +0 -0
  164. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  165. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  166. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/pose.py +0 -0
  167. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/segment.py +0 -0
  168. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/utils.py +0 -0
  169. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/node.py +0 -0
  170. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/nodes_fetcher.py +0 -0
  171. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/nodes_thread_pool.py +0 -0
  172. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/progress.py +0 -0
  173. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/py.typed +0 -0
  174. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/__init__.py +0 -0
  175. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/batch.py +0 -0
  176. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/dataset.py +0 -0
  177. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/dispatch.py +0 -0
  178. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/metrics.py +0 -0
  179. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/params.py +0 -0
  180. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/queue.py +0 -0
  181. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/schema.py +0 -0
  182. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/session.py +0 -0
  183. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/udf.py +0 -0
  184. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/utils.py +0 -0
  185. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/remote/__init__.py +0 -0
  186. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/remote/studio.py +0 -0
  187. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/script_meta.py +0 -0
  188. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/__init__.py +0 -0
  189. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/default/__init__.py +0 -0
  190. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/default/base.py +0 -0
  191. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/__init__.py +0 -0
  192. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/aggregate.py +0 -0
  193. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/array.py +0 -0
  194. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/conditional.py +0 -0
  195. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/numeric.py +0 -0
  196. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/path.py +0 -0
  197. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/random.py +0 -0
  198. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/string.py +0 -0
  199. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/selectable.py +0 -0
  200. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  201. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/base.py +0 -0
  202. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/types.py +0 -0
  203. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/vector.py +0 -0
  204. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/types.py +0 -0
  205. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/utils.py +0 -0
  206. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/studio.py +0 -0
  207. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/telemetry.py +0 -0
  208. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/toolkit/__init__.py +0 -0
  209. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/toolkit/split.py +0 -0
  210. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/torch/__init__.py +0 -0
  211. {datachain-0.13.0 → datachain-0.13.1}/src/datachain/utils.py +0 -0
  212. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/SOURCES.txt +0 -0
  213. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  214. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/entry_points.txt +0 -0
  215. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/requires.txt +0 -0
  216. {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/top_level.txt +0 -0
  217. {datachain-0.13.0 → datachain-0.13.1}/tests/__init__.py +0 -0
  218. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/__init__.py +0 -0
  219. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/conftest.py +0 -0
  220. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  221. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  222. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.gitignore +0 -0
  223. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  224. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_datachain.py +0 -0
  225. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_ls.py +0 -0
  226. {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_version.py +0 -0
  227. {datachain-0.13.0 → datachain-0.13.1}/tests/conftest.py +0 -0
  228. {datachain-0.13.0 → datachain-0.13.1}/tests/data.py +0 -0
  229. {datachain-0.13.0 → datachain-0.13.1}/tests/examples/__init__.py +0 -0
  230. {datachain-0.13.0 → datachain-0.13.1}/tests/examples/test_examples.py +0 -0
  231. {datachain-0.13.0 → datachain-0.13.1}/tests/examples/test_wds_e2e.py +0 -0
  232. {datachain-0.13.0 → datachain-0.13.1}/tests/examples/wds_data.py +0 -0
  233. {datachain-0.13.0 → datachain-0.13.1}/tests/func/__init__.py +0 -0
  234. {datachain-0.13.0 → datachain-0.13.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  235. {datachain-0.13.0 → datachain-0.13.1}/tests/func/data/lena.jpg +0 -0
  236. {datachain-0.13.0 → datachain-0.13.1}/tests/func/fake-service-account-credentials.json +0 -0
  237. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/__init__.py +0 -0
  238. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running-mask0.png +0 -0
  239. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running-mask1.png +0 -0
  240. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running.jpg +0 -0
  241. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/ships.jpg +0 -0
  242. {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/test_yolo.py +0 -0
  243. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_catalog.py +0 -0
  244. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_client.py +0 -0
  245. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_cloud_transfer.py +0 -0
  246. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_data_storage.py +0 -0
  247. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datachain_merge.py +0 -0
  248. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_dataset_query.py +0 -0
  249. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datasets.py +0 -0
  250. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_feature_pickling.py +0 -0
  251. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_file.py +0 -0
  252. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_hf.py +0 -0
  253. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_hidden_field.py +0 -0
  254. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_image.py +0 -0
  255. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_listing.py +0 -0
  256. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_ls.py +0 -0
  257. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_meta_formats.py +0 -0
  258. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_metrics.py +0 -0
  259. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_pull.py +0 -0
  260. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_pytorch.py +0 -0
  261. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_query.py +0 -0
  262. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_session.py +0 -0
  263. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_toolkit.py +0 -0
  264. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_video.py +0 -0
  265. {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_warehouse.py +0 -0
  266. {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class.py +0 -0
  267. {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_exception.py +0 -0
  268. {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_parallel.py +0 -0
  269. {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  270. {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/name_len_slow.py +0 -0
  271. {datachain-0.13.0 → datachain-0.13.1}/tests/test_atomicity.py +0 -0
  272. {datachain-0.13.0 → datachain-0.13.1}/tests/test_cli_e2e.py +0 -0
  273. {datachain-0.13.0 → datachain-0.13.1}/tests/test_cli_studio.py +0 -0
  274. {datachain-0.13.0 → datachain-0.13.1}/tests/test_import_time.py +0 -0
  275. {datachain-0.13.0 → datachain-0.13.1}/tests/test_query_e2e.py +0 -0
  276. {datachain-0.13.0 → datachain-0.13.1}/tests/test_telemetry.py +0 -0
  277. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/__init__.py +0 -0
  278. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/__init__.py +0 -0
  279. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/conftest.py +0 -0
  280. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_arrow.py +0 -0
  281. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_clip.py +0 -0
  282. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain.py +0 -0
  283. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  284. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  285. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_feature.py +0 -0
  286. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_feature_utils.py +0 -0
  287. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_file.py +0 -0
  288. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_hf.py +0 -0
  289. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_image.py +0 -0
  290. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_listing_info.py +0 -0
  291. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  292. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_schema.py +0 -0
  293. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_signal_schema.py +0 -0
  294. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  295. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_text.py +0 -0
  296. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_udf_signature.py +0 -0
  297. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_utils.py +0 -0
  298. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_webdataset.py +0 -0
  299. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/__init__.py +0 -0
  300. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_bbox.py +0 -0
  301. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_pose.py +0 -0
  302. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_segment.py +0 -0
  303. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_utils.py +0 -0
  304. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/__init__.py +0 -0
  305. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  306. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  307. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  308. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_array.py +0 -0
  309. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_conditional.py +0 -0
  310. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_path.py +0 -0
  311. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_random.py +0 -0
  312. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_selectable.py +0 -0
  313. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_string.py +0 -0
  314. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_asyn.py +0 -0
  315. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_cache.py +0 -0
  316. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_catalog.py +0 -0
  317. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_catalog_loader.py +0 -0
  318. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_cli_parsing.py +0 -0
  319. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client.py +0 -0
  320. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client_gcs.py +0 -0
  321. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client_s3.py +0 -0
  322. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_config.py +0 -0
  323. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_data_storage.py +0 -0
  324. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_database_engine.py +0 -0
  325. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_dataset.py +0 -0
  326. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_dispatch.py +0 -0
  327. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_fileslice.py +0 -0
  328. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_func.py +0 -0
  329. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_listing.py +0 -0
  330. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_metastore.py +0 -0
  331. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_module_exports.py +0 -0
  332. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_pytorch.py +0 -0
  333. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query.py +0 -0
  334. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query_metrics.py +0 -0
  335. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query_params.py +0 -0
  336. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_script_meta.py +0 -0
  337. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_serializer.py +0 -0
  338. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_session.py +0 -0
  339. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_utils.py +0 -0
  340. {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_warehouse.py +0 -0
  341. {datachain-0.13.0 → datachain-0.13.1}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.13.0
3
+ Version: 0.13.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -107,6 +107,7 @@ Requires-Dist: accelerate; extra == "examples"
107
107
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
108
108
  Requires-Dist: ultralytics; extra == "examples"
109
109
  Requires-Dist: open_clip_torch; extra == "examples"
110
+ Dynamic: license-file
110
111
 
111
112
  ================
112
113
  |logo| DataChain
@@ -795,6 +795,19 @@ class Catalog:
795
795
  try:
796
796
  dataset = self.get_dataset(name)
797
797
  default_version = dataset.next_version
798
+
799
+ if (description or labels) and (
800
+ dataset.description != description or dataset.labels != labels
801
+ ):
802
+ description = description or dataset.description
803
+ labels = labels or dataset.labels
804
+
805
+ self.update_dataset(
806
+ dataset,
807
+ description=description,
808
+ labels=labels,
809
+ )
810
+
798
811
  except DatasetNotFoundError:
799
812
  schema = {
800
813
  c.name: c.type.to_dict() for c in columns if isinstance(c.type, SQLType)
@@ -74,6 +74,7 @@ def _compare( # noqa: C901
74
74
  # all left and right columns
75
75
  cols = left.signals_schema.clone_without_sys_signals().db_signals()
76
76
  right_cols = right.signals_schema.clone_without_sys_signals().db_signals()
77
+ cols_select = list(left.signals_schema.clone_without_sys_signals().values.keys())
77
78
 
78
79
  # getting correct on and right_on column names
79
80
  on = left.signals_schema.resolve(*on).db_signals() # type: ignore[assignment]
@@ -131,10 +132,12 @@ def _compare( # noqa: C901
131
132
  # when the row is deleted, we need to take column values from the right chain
132
133
  .mutate(
133
134
  **{
134
- f"{c}": ifelse(
135
- C(diff_col) == CompareStatus.DELETED, C(f"{rname}{c}"), C(c)
135
+ f"{l_on}": ifelse(
136
+ C(diff_col) == CompareStatus.DELETED,
137
+ C(f"{rname + l_on if on == right_on else r_on}"),
138
+ C(l_on),
136
139
  )
137
- for c in [c for c in cols if c in right_cols]
140
+ for l_on, r_on in zip(on, right_on) # type: ignore[arg-type]
138
141
  }
139
142
  )
140
143
  .select_except(ldiff_col, rdiff_col)
@@ -150,9 +153,9 @@ def _compare( # noqa: C901
150
153
  dc_diff = dc_diff.filter(C(diff_col) != CompareStatus.DELETED)
151
154
 
152
155
  if status_col:
153
- cols.append(diff_col) # type: ignore[arg-type]
156
+ cols_select.append(diff_col)
154
157
 
155
- dc_diff = dc_diff.select(*cols)
158
+ dc_diff = dc_diff.select(*cols_select)
156
159
 
157
160
  # final schema is schema from the left chain with status column added if needed
158
161
  dc_diff.signals_schema = (
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.13.0
3
+ Version: 0.13.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -107,6 +107,7 @@ Requires-Dist: accelerate; extra == "examples"
107
107
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
108
108
  Requires-Dist: ultralytics; extra == "examples"
109
109
  Requires-Dist: open_clip_torch; extra == "examples"
110
+ Dynamic: license-file
110
111
 
111
112
  ================
112
113
  |logo| DataChain
@@ -461,6 +461,16 @@ def test_save(test_session):
461
461
  assert ds.description == "new description"
462
462
  assert ds.labels == ["new_label", "old_label"]
463
463
 
464
+ chain.save(
465
+ name="new_name",
466
+ description="updated description",
467
+ labels=["new_label", "old_label", "new_label2"],
468
+ )
469
+ ds = test_session.catalog.get_dataset("new_name")
470
+ assert ds.name == "new_name"
471
+ assert ds.description == "updated description"
472
+ assert ds.labels == ["new_label", "old_label", "new_label2"]
473
+
464
474
 
465
475
  def test_show_nested_empty(capsys, test_session):
466
476
  files = [File(size=s, path=p) for p, s in zip(list("abcde"), range(5))]
@@ -1000,7 +1010,6 @@ def test_udf_distributed_interrupt(cloud_test_catalog_tmpfile, capfd, datachain_
1000
1010
  with pytest.raises(RuntimeError, match=r"Worker Killed \(KeyboardInterrupt\)"):
1001
1011
  dc.show()
1002
1012
  captured = capfd.readouterr()
1003
- assert "KeyboardInterrupt" in captured.err
1004
1013
  assert "semaphore" not in captured.err
1005
1014
 
1006
1015
 
@@ -6,7 +6,7 @@ from pydantic import BaseModel
6
6
  from datachain.diff import CompareStatus, compare_and_split
7
7
  from datachain.lib.dc import DataChain
8
8
  from datachain.lib.file import File
9
- from datachain.sql.types import Int64, String
9
+ from datachain.sql.types import Int, String
10
10
  from tests.utils import sorted_dicts
11
11
 
12
12
 
@@ -14,11 +14,21 @@ def _as_utc(d):
14
14
  return d.replace(tzinfo=timezone.utc)
15
15
 
16
16
 
17
+ @pytest.fixture
18
+ def str_default(test_session):
19
+ return String.default_value(test_session.catalog.warehouse.db.dialect)
20
+
21
+
22
+ @pytest.fixture
23
+ def int_default(test_session):
24
+ return Int.default_value(test_session.catalog.warehouse.db.dialect)
25
+
26
+
17
27
  @pytest.mark.parametrize("added", (True, False))
18
28
  @pytest.mark.parametrize("deleted", (True, False))
19
29
  @pytest.mark.parametrize("modified", (True, False))
20
30
  @pytest.mark.parametrize("same", (True, False))
21
- def test_compare(test_session, added, deleted, modified, same):
31
+ def test_compare(test_session, str_default, added, deleted, modified, same):
22
32
  ds1 = DataChain.from_values(
23
33
  id=[1, 2, 4],
24
34
  name=["John1", "Doe", "Andy"],
@@ -76,7 +86,7 @@ def test_compare(test_session, added, deleted, modified, same):
76
86
 
77
87
  if deleted:
78
88
  assert "diff" not in chains[CompareStatus.DELETED].signals_schema.db_signals()
79
- expected.append((CompareStatus.DELETED, 3, "Mark"))
89
+ expected.append((CompareStatus.DELETED, 3, str_default))
80
90
 
81
91
  if same:
82
92
  assert "diff" not in chains[CompareStatus.SAME].signals_schema.db_signals()
@@ -85,7 +95,7 @@ def test_compare(test_session, added, deleted, modified, same):
85
95
  assert list(diff.order_by("id").collect("diff", "id", "name")) == expected
86
96
 
87
97
 
88
- def test_compare_no_status_col(test_session):
98
+ def test_compare_no_status_col(test_session, str_default):
89
99
  ds1 = DataChain.from_values(
90
100
  id=[1, 2, 4],
91
101
  name=["John1", "Doe", "Andy"],
@@ -108,14 +118,14 @@ def test_compare_no_status_col(test_session):
108
118
  expected = [
109
119
  (1, "John1"),
110
120
  (2, "Doe"),
111
- (3, "Mark"),
121
+ (3, str_default),
112
122
  (4, "Andy"),
113
123
  ]
114
124
 
115
125
  assert list(diff.order_by("id").collect()) == expected
116
126
 
117
127
 
118
- def test_compare_from_datasets(test_session):
128
+ def test_compare_from_datasets(test_session, str_default):
119
129
  ds1 = DataChain.from_values(
120
130
  id=[1, 2, 4],
121
131
  name=["John1", "Doe", "Andy"],
@@ -137,13 +147,13 @@ def test_compare_from_datasets(test_session):
137
147
  assert list(diff.order_by("id").collect("diff", "id", "name")) == [
138
148
  (CompareStatus.MODIFIED, 1, "John1"),
139
149
  (CompareStatus.ADDED, 2, "Doe"),
140
- (CompareStatus.DELETED, 3, "Mark"),
150
+ (CompareStatus.DELETED, 3, str_default),
141
151
  (CompareStatus.SAME, 4, "Andy"),
142
152
  ]
143
153
 
144
154
 
145
155
  @pytest.mark.parametrize("right_name", ("other_name", "name"))
146
- def test_compare_with_explicit_compare_fields(test_session, right_name):
156
+ def test_compare_with_explicit_compare_fields(test_session, str_default, right_name):
147
157
  ds1 = DataChain.from_values(
148
158
  id=[1, 2, 4],
149
159
  name=["John1", "Doe", "Andy"],
@@ -169,17 +179,10 @@ def test_compare_with_explicit_compare_fields(test_session, right_name):
169
179
  status_col="diff",
170
180
  )
171
181
 
172
- string_default = String.default_value(test_session.catalog.warehouse.db.dialect)
173
-
174
182
  expected = [
175
183
  (CompareStatus.MODIFIED, 1, "John1", "New York"),
176
184
  (CompareStatus.ADDED, 2, "Doe", "Boston"),
177
- (
178
- CompareStatus.DELETED,
179
- 3,
180
- string_default if right_name == "other_name" else "Mark",
181
- "Seattle",
182
- ),
185
+ (CompareStatus.DELETED, 3, str_default, str_default),
183
186
  (CompareStatus.SAME, 4, "Andy", "San Francisco"),
184
187
  ]
185
188
 
@@ -187,7 +190,7 @@ def test_compare_with_explicit_compare_fields(test_session, right_name):
187
190
  assert list(diff.order_by("id").collect(*collect_fields)) == expected
188
191
 
189
192
 
190
- def test_compare_different_left_right_on_columns(test_session):
193
+ def test_compare_different_left_right_on_columns(test_session, str_default):
191
194
  ds1 = DataChain.from_values(
192
195
  id=[1, 2, 4],
193
196
  name=["John1", "Doe", "Andy"],
@@ -208,17 +211,15 @@ def test_compare_different_left_right_on_columns(test_session):
208
211
  status_col="diff",
209
212
  )
210
213
 
211
- int_default = Int64.default_value(test_session.catalog.warehouse.db.dialect)
212
-
213
214
  expected = [
214
- (CompareStatus.SAME, 4, "Andy"),
215
- (CompareStatus.ADDED, 2, "Doe"),
216
215
  (CompareStatus.MODIFIED, 1, "John1"),
217
- (CompareStatus.DELETED, int_default, "Mark"),
216
+ (CompareStatus.ADDED, 2, "Doe"),
217
+ (CompareStatus.DELETED, 3, str_default),
218
+ (CompareStatus.SAME, 4, "Andy"),
218
219
  ]
219
220
 
220
221
  collect_fields = ["diff", "id", "name"]
221
- assert list(diff.order_by("name").collect(*collect_fields)) == expected
222
+ assert list(diff.order_by("id").collect(*collect_fields)) == expected
222
223
 
223
224
 
224
225
  @pytest.mark.parametrize("on_self", (True, False))
@@ -255,7 +256,7 @@ def test_compare_on_equal_datasets(test_session, on_self):
255
256
  assert list(diff.order_by("id").collect(*collect_fields)) == expected
256
257
 
257
258
 
258
- def test_compare_multiple_columns(test_session):
259
+ def test_compare_multiple_columns(test_session, str_default):
259
260
  ds1 = DataChain.from_values(
260
261
  id=[1, 2, 4],
261
262
  name=["John", "Doe", "Andy"],
@@ -275,14 +276,19 @@ def test_compare_multiple_columns(test_session):
275
276
  [
276
277
  {"diff": CompareStatus.MODIFIED, "id": 1, "name": "John", "city": "London"},
277
278
  {"diff": CompareStatus.ADDED, "id": 2, "name": "Doe", "city": "New York"},
278
- {"diff": CompareStatus.DELETED, "id": 3, "name": "Mark", "city": "Berlin"},
279
+ {
280
+ "diff": CompareStatus.DELETED,
281
+ "id": 3,
282
+ "name": str_default,
283
+ "city": str_default,
284
+ },
279
285
  {"diff": CompareStatus.SAME, "id": 4, "name": "Andy", "city": "Tokyo"},
280
286
  ],
281
287
  "id",
282
288
  )
283
289
 
284
290
 
285
- def test_compare_multiple_match_columns(test_session):
291
+ def test_compare_multiple_match_columns(test_session, str_default):
286
292
  ds1 = DataChain.from_values(
287
293
  id=[1, 2, 4],
288
294
  name=["John", "Doe", "Andy"],
@@ -302,14 +308,19 @@ def test_compare_multiple_match_columns(test_session):
302
308
  [
303
309
  {"diff": CompareStatus.MODIFIED, "id": 1, "name": "John", "city": "London"},
304
310
  {"diff": CompareStatus.ADDED, "id": 2, "name": "Doe", "city": "New York"},
305
- {"diff": CompareStatus.DELETED, "id": 3, "name": "John", "city": "Berlin"},
311
+ {
312
+ "diff": CompareStatus.DELETED,
313
+ "id": 3,
314
+ "name": "John",
315
+ "city": str_default,
316
+ },
306
317
  {"diff": CompareStatus.SAME, "id": 4, "name": "Andy", "city": "Tokyo"},
307
318
  ],
308
319
  "id",
309
320
  )
310
321
 
311
322
 
312
- def test_compare_additional_column_on_left(test_session):
323
+ def test_compare_additional_column_on_left(test_session, str_default):
313
324
  ds1 = DataChain.from_values(
314
325
  id=[1, 2, 4],
315
326
  name=["John", "Doe", "Andy"],
@@ -322,8 +333,6 @@ def test_compare_additional_column_on_left(test_session):
322
333
  session=test_session,
323
334
  ).save("ds2")
324
335
 
325
- string_default = String.default_value(test_session.catalog.warehouse.db.dialect)
326
-
327
336
  diff = ds1.compare(ds2, same=True, on=["id"], status_col="diff")
328
337
 
329
338
  assert sorted_dicts(diff.to_records(), "id") == sorted_dicts(
@@ -333,8 +342,8 @@ def test_compare_additional_column_on_left(test_session):
333
342
  {
334
343
  "diff": CompareStatus.DELETED,
335
344
  "id": 3,
336
- "name": "Mark",
337
- "city": string_default,
345
+ "name": str_default,
346
+ "city": str_default,
338
347
  },
339
348
  {"diff": CompareStatus.MODIFIED, "id": 4, "name": "Andy", "city": "Tokyo"},
340
349
  ],
@@ -342,7 +351,7 @@ def test_compare_additional_column_on_left(test_session):
342
351
  )
343
352
 
344
353
 
345
- def test_compare_additional_column_on_right(test_session):
354
+ def test_compare_additional_column_on_right(test_session, str_default):
346
355
  ds1 = DataChain.from_values(
347
356
  id=[1, 2, 4],
348
357
  name=["John", "Doe", "Andy"],
@@ -361,7 +370,7 @@ def test_compare_additional_column_on_right(test_session):
361
370
  [
362
371
  {"diff": CompareStatus.MODIFIED, "id": 1, "name": "John"},
363
372
  {"diff": CompareStatus.ADDED, "id": 2, "name": "Doe"},
364
- {"diff": CompareStatus.DELETED, "id": 3, "name": "Mark"},
373
+ {"diff": CompareStatus.DELETED, "id": 3, "name": str_default},
365
374
  {"diff": CompareStatus.MODIFIED, "id": 4, "name": "Andy"},
366
375
  ],
367
376
  "id",
@@ -413,7 +422,8 @@ def test_compare_right_compare_wrong_length(test_session):
413
422
 
414
423
 
415
424
  @pytest.mark.parametrize("status_col", ("diff", None))
416
- def test_diff(test_session, status_col):
425
+ @pytest.mark.parametrize("right_on", ("file2", None))
426
+ def test_diff(test_session, str_default, int_default, status_col, right_on):
417
427
  fs1 = File(source="s1", path="p1", version="2", etag="e2")
418
428
  fs1_updated = File(source="s1", path="p1", version="1", etag="e1")
419
429
  fs2 = File(source="s2", path="p2", version="1", etag="e1")
@@ -421,43 +431,53 @@ def test_diff(test_session, status_col):
421
431
  fs4 = File(source="s4", path="p4", version="1", etag="e1")
422
432
 
423
433
  ds1 = DataChain.from_values(
424
- file=[fs1_updated, fs2, fs4], score=[1, 2, 4], session=test_session
425
- )
426
- ds2 = DataChain.from_values(
427
- file=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
434
+ file1=[fs1_updated, fs2, fs4], score=[1, 2, 4], session=test_session
428
435
  )
429
436
 
437
+ if right_on:
438
+ ds2 = DataChain.from_values(
439
+ file2=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
440
+ )
441
+ else:
442
+ ds2 = DataChain.from_values(
443
+ file1=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
444
+ )
445
+
430
446
  diff = ds1.diff(
431
447
  ds2,
432
448
  added=True,
433
449
  deleted=True,
434
450
  modified=True,
435
451
  same=True,
436
- on="file",
452
+ on="file1",
453
+ right_on=right_on,
437
454
  status_col=status_col,
438
455
  )
439
456
 
440
457
  expected = [
441
- (CompareStatus.MODIFIED, fs1_updated, 1),
442
- (CompareStatus.ADDED, fs2, 2),
443
- (CompareStatus.DELETED, fs3, 3),
444
- (CompareStatus.SAME, fs4, 4),
458
+ (CompareStatus.MODIFIED, "s1", "p1", "1", "e1", 1),
459
+ (CompareStatus.ADDED, "s2", "p2", "1", "e1", 2),
460
+ (CompareStatus.DELETED, "s3", "p3", str_default, str_default, int_default),
461
+ (CompareStatus.SAME, "s4", "p4", "1", "e1", 4),
445
462
  ]
446
463
 
447
- collect_fields = ["diff", "file", "score"]
464
+ collect_fields = [
465
+ "diff",
466
+ "file1.source",
467
+ "file1.path",
468
+ "file1.version",
469
+ "file1.etag",
470
+ "score",
471
+ ]
448
472
  if not status_col:
449
473
  expected = [row[1:] for row in expected]
450
474
  collect_fields = collect_fields[1:]
451
475
 
452
- res = list(diff.order_by("file.source").collect(*collect_fields))
453
- for r in res:
454
- r[-2].last_modified = _as_utc(r[-2].last_modified)
455
-
456
- assert res == expected
476
+ assert list(diff.order_by("file1.source").collect(*collect_fields)) == expected
457
477
 
458
478
 
459
479
  @pytest.mark.parametrize("status_col", ("diff", None))
460
- def test_diff_nested(test_session, status_col):
480
+ def test_diff_nested(test_session, str_default, int_default, status_col):
461
481
  class Nested(BaseModel):
462
482
  file: File
463
483
 
@@ -491,12 +511,25 @@ def test_diff_nested(test_session, status_col):
491
511
  (CompareStatus.SAME, fs4, 4),
492
512
  ]
493
513
 
494
- collect_fields = ["diff", "nested", "score"]
514
+ expected = [
515
+ (CompareStatus.MODIFIED, "s1", "p1", "1", "e1", 1),
516
+ (CompareStatus.ADDED, "s2", "p2", "1", "e1", 2),
517
+ (CompareStatus.DELETED, "s3", "p3", str_default, str_default, int_default),
518
+ (CompareStatus.SAME, "s4", "p4", "1", "e1", 4),
519
+ ]
520
+
521
+ collect_fields = [
522
+ "diff",
523
+ "nested.file.source",
524
+ "nested.file.path",
525
+ "nested.file.version",
526
+ "nested.file.etag",
527
+ "score",
528
+ ]
495
529
  if not status_col:
496
530
  expected = [row[1:] for row in expected]
497
531
  collect_fields = collect_fields[1:]
498
532
 
499
- res = list(diff.order_by("nested.file.source").collect(*collect_fields))
500
- for r in res:
501
- r[-2].file.last_modified = _as_utc(r[-2].file.last_modified)
502
- assert res == expected
533
+ assert (
534
+ list(diff.order_by("nested.file.source").collect(*collect_fields)) == expected
535
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes