datachain 0.1.11__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (233) hide show
  1. {datachain-0.1.11/src/datachain.egg-info → datachain-0.1.13}/PKG-INFO +1 -1
  2. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/_version.py +2 -2
  3. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/dc.py +34 -7
  4. {datachain-0.1.11 → datachain-0.1.13/src/datachain.egg-info}/PKG-INFO +1 -1
  5. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_catalog.py +3 -0
  6. {datachain-0.1.11 → datachain-0.1.13}/tests/test_cli_e2e.py +8 -4
  7. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_datachain.py +66 -20
  8. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_datachain_merge.py +23 -9
  9. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_feature_utils.py +2 -2
  10. {datachain-0.1.11 → datachain-0.1.13}/tests/utils.py +15 -0
  11. {datachain-0.1.11 → datachain-0.1.13}/.cruft.json +0 -0
  12. {datachain-0.1.11 → datachain-0.1.13}/.gitattributes +0 -0
  13. {datachain-0.1.11 → datachain-0.1.13}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  14. {datachain-0.1.11 → datachain-0.1.13}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  15. {datachain-0.1.11 → datachain-0.1.13}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  16. {datachain-0.1.11 → datachain-0.1.13}/.github/codecov.yaml +0 -0
  17. {datachain-0.1.11 → datachain-0.1.13}/.github/dependabot.yml +0 -0
  18. {datachain-0.1.11 → datachain-0.1.13}/.github/workflows/benchmarks.yml +0 -0
  19. {datachain-0.1.11 → datachain-0.1.13}/.github/workflows/release.yml +0 -0
  20. {datachain-0.1.11 → datachain-0.1.13}/.github/workflows/tests.yml +0 -0
  21. {datachain-0.1.11 → datachain-0.1.13}/.github/workflows/update-template.yaml +0 -0
  22. {datachain-0.1.11 → datachain-0.1.13}/.gitignore +0 -0
  23. {datachain-0.1.11 → datachain-0.1.13}/.pre-commit-config.yaml +0 -0
  24. {datachain-0.1.11 → datachain-0.1.13}/.reuse/dep5 +0 -0
  25. {datachain-0.1.11 → datachain-0.1.13}/CODE_OF_CONDUCT.rst +0 -0
  26. {datachain-0.1.11 → datachain-0.1.13}/CONTRIBUTING.rst +0 -0
  27. {datachain-0.1.11 → datachain-0.1.13}/LICENSE +0 -0
  28. {datachain-0.1.11 → datachain-0.1.13}/LICENSES/Apache-2.0.txt +0 -0
  29. {datachain-0.1.11 → datachain-0.1.13}/LICENSES/BSD-3-Clause.txt +0 -0
  30. {datachain-0.1.11 → datachain-0.1.13}/LICENSES/Python-2.0.txt +0 -0
  31. {datachain-0.1.11 → datachain-0.1.13}/README.rst +0 -0
  32. {datachain-0.1.11 → datachain-0.1.13}/docs/cv_intro.md +0 -0
  33. {datachain-0.1.11 → datachain-0.1.13}/docs/udfs.md +0 -0
  34. {datachain-0.1.11 → datachain-0.1.13}/examples/blip2_image_desc_lib.py +0 -0
  35. {datachain-0.1.11 → datachain-0.1.13}/examples/clip.py +0 -0
  36. {datachain-0.1.11 → datachain-0.1.13}/examples/common_sql_functions.py +0 -0
  37. {datachain-0.1.11 → datachain-0.1.13}/examples/dir_expansion.py +0 -0
  38. {datachain-0.1.11 → datachain-0.1.13}/examples/hf_pipeline.py +0 -0
  39. {datachain-0.1.11 → datachain-0.1.13}/examples/iptc_exif_xmp_lib.py +0 -0
  40. {datachain-0.1.11 → datachain-0.1.13}/examples/json-csv-reader.py +0 -0
  41. {datachain-0.1.11 → datachain-0.1.13}/examples/llava2_image_desc_lib.py +0 -0
  42. {datachain-0.1.11 → datachain-0.1.13}/examples/llm-claude-aggregate-query.py +0 -0
  43. {datachain-0.1.11 → datachain-0.1.13}/examples/llm-claude-simple-query.py +0 -0
  44. {datachain-0.1.11 → datachain-0.1.13}/examples/llm-claude.py +0 -0
  45. {datachain-0.1.11 → datachain-0.1.13}/examples/loader.py +0 -0
  46. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/README +0 -0
  47. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/distance_to_query.py +0 -0
  48. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/llm_chat.py +0 -0
  49. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/requirements.txt +0 -0
  50. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/single_query.py +0 -0
  51. {datachain-0.1.11 → datachain-0.1.13}/examples/neurips/text_loaders.py +0 -0
  52. {datachain-0.1.11 → datachain-0.1.13}/examples/notebooks/clip_fine_tuning.ipynb +0 -0
  53. {datachain-0.1.11 → datachain-0.1.13}/examples/openai_image_desc_lib.py +0 -0
  54. {datachain-0.1.11 → datachain-0.1.13}/examples/openimage-detect.py +0 -0
  55. {datachain-0.1.11 → datachain-0.1.13}/examples/pose_detection.py +0 -0
  56. {datachain-0.1.11 → datachain-0.1.13}/examples/torch-loader.py +0 -0
  57. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/batching.py +0 -0
  58. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/image_transformation.py +0 -0
  59. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/parallel.py +0 -0
  60. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/simple.py +0 -0
  61. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/stateful.py +0 -0
  62. {datachain-0.1.11 → datachain-0.1.13}/examples/udfs/stateful_similarity.py +0 -0
  63. {datachain-0.1.11 → datachain-0.1.13}/examples/unstructured-text.py +0 -0
  64. {datachain-0.1.11 → datachain-0.1.13}/examples/wds.py +0 -0
  65. {datachain-0.1.11 → datachain-0.1.13}/examples/wds_filtered.py +0 -0
  66. {datachain-0.1.11 → datachain-0.1.13}/examples/zalando/zalando_clip.py +0 -0
  67. {datachain-0.1.11 → datachain-0.1.13}/examples/zalando/zalando_dir_as_class.py +0 -0
  68. {datachain-0.1.11 → datachain-0.1.13}/examples/zalando/zalando_splits_and_classes_ds.py +0 -0
  69. {datachain-0.1.11 → datachain-0.1.13}/examples/zalando/zalando_splits_and_classes_output.py +0 -0
  70. {datachain-0.1.11 → datachain-0.1.13}/noxfile.py +0 -0
  71. {datachain-0.1.11 → datachain-0.1.13}/pyproject.toml +0 -0
  72. {datachain-0.1.11 → datachain-0.1.13}/setup.cfg +0 -0
  73. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/__init__.py +0 -0
  74. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/__main__.py +0 -0
  75. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/asyn.py +0 -0
  76. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/cache.py +0 -0
  77. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/catalog/__init__.py +0 -0
  78. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/catalog/catalog.py +0 -0
  79. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/catalog/datasource.py +0 -0
  80. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/catalog/loader.py +0 -0
  81. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/catalog/subclass.py +0 -0
  82. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/cli.py +0 -0
  83. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/cli_utils.py +0 -0
  84. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/__init__.py +0 -0
  85. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/azure.py +0 -0
  86. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/fileslice.py +0 -0
  87. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/fsspec.py +0 -0
  88. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/gcs.py +0 -0
  89. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/local.py +0 -0
  90. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/client/s3.py +0 -0
  91. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/config.py +0 -0
  92. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/__init__.py +0 -0
  93. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/db_engine.py +0 -0
  94. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/id_generator.py +0 -0
  95. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/job.py +0 -0
  96. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/metastore.py +0 -0
  97. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/schema.py +0 -0
  98. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/serializer.py +0 -0
  99. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/sqlite.py +0 -0
  100. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/data_storage/warehouse.py +0 -0
  101. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/dataset.py +0 -0
  102. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/error.py +0 -0
  103. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/__init__.py +0 -0
  104. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/cached_stream.py +0 -0
  105. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/claude.py +0 -0
  106. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/feature.py +0 -0
  107. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/feature_registry.py +0 -0
  108. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/feature_utils.py +0 -0
  109. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/file.py +0 -0
  110. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/gpt4_vision.py +0 -0
  111. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/hf_image_to_text.py +0 -0
  112. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/hf_pipeline.py +0 -0
  113. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/image.py +0 -0
  114. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/image_transform.py +0 -0
  115. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/iptc_exif_xmp.py +0 -0
  116. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/meta_formats.py +0 -0
  117. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/parquet.py +0 -0
  118. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/pytorch.py +0 -0
  119. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/reader.py +0 -0
  120. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/settings.py +0 -0
  121. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/signal_schema.py +0 -0
  122. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/text.py +0 -0
  123. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/udf.py +0 -0
  124. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/udf_signature.py +0 -0
  125. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/unstructured.py +0 -0
  126. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/utils.py +0 -0
  127. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/vfile.py +0 -0
  128. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/webdataset.py +0 -0
  129. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/lib/webdataset_laion.py +0 -0
  130. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/listing.py +0 -0
  131. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/node.py +0 -0
  132. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/nodes_fetcher.py +0 -0
  133. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/nodes_thread_pool.py +0 -0
  134. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/progress.py +0 -0
  135. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/py.typed +0 -0
  136. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/__init__.py +0 -0
  137. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/batch.py +0 -0
  138. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/builtins.py +0 -0
  139. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/dataset.py +0 -0
  140. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/dispatch.py +0 -0
  141. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/params.py +0 -0
  142. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/schema.py +0 -0
  143. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/session.py +0 -0
  144. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/query/udf.py +0 -0
  145. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/remote/__init__.py +0 -0
  146. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/remote/studio.py +0 -0
  147. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/__init__.py +0 -0
  148. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/default/__init__.py +0 -0
  149. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/default/base.py +0 -0
  150. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/__init__.py +0 -0
  151. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/array.py +0 -0
  152. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/conditional.py +0 -0
  153. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/path.py +0 -0
  154. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/random.py +0 -0
  155. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/functions/string.py +0 -0
  156. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/selectable.py +0 -0
  157. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/sqlite/__init__.py +0 -0
  158. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/sqlite/base.py +0 -0
  159. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/sqlite/types.py +0 -0
  160. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/sqlite/vector.py +0 -0
  161. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/types.py +0 -0
  162. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/sql/utils.py +0 -0
  163. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/storage.py +0 -0
  164. {datachain-0.1.11 → datachain-0.1.13}/src/datachain/utils.py +0 -0
  165. {datachain-0.1.11 → datachain-0.1.13}/src/datachain.egg-info/SOURCES.txt +0 -0
  166. {datachain-0.1.11 → datachain-0.1.13}/src/datachain.egg-info/dependency_links.txt +0 -0
  167. {datachain-0.1.11 → datachain-0.1.13}/src/datachain.egg-info/entry_points.txt +0 -0
  168. {datachain-0.1.11 → datachain-0.1.13}/src/datachain.egg-info/requires.txt +0 -0
  169. {datachain-0.1.11 → datachain-0.1.13}/src/datachain.egg-info/top_level.txt +0 -0
  170. {datachain-0.1.11 → datachain-0.1.13}/tests/__init__.py +0 -0
  171. {datachain-0.1.11 → datachain-0.1.13}/tests/benchmarks/__init__.py +0 -0
  172. {datachain-0.1.11 → datachain-0.1.13}/tests/benchmarks/conftest.py +0 -0
  173. {datachain-0.1.11 → datachain-0.1.13}/tests/benchmarks/test_ls.py +0 -0
  174. {datachain-0.1.11 → datachain-0.1.13}/tests/benchmarks/test_version.py +0 -0
  175. {datachain-0.1.11 → datachain-0.1.13}/tests/conftest.py +0 -0
  176. {datachain-0.1.11 → datachain-0.1.13}/tests/data.py +0 -0
  177. {datachain-0.1.11 → datachain-0.1.13}/tests/func/__init__.py +0 -0
  178. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_client.py +0 -0
  179. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_dataset_query.py +0 -0
  180. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_datasets.py +0 -0
  181. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_ls.py +0 -0
  182. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_pull.py +0 -0
  183. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_pytorch.py +0 -0
  184. {datachain-0.1.11 → datachain-0.1.13}/tests/func/test_query.py +0 -0
  185. {datachain-0.1.11 → datachain-0.1.13}/tests/scripts/feature_class.py +0 -0
  186. {datachain-0.1.11 → datachain-0.1.13}/tests/scripts/feature_class_parallel.py +0 -0
  187. {datachain-0.1.11 → datachain-0.1.13}/tests/scripts/name_len_normal.py +0 -0
  188. {datachain-0.1.11 → datachain-0.1.13}/tests/scripts/name_len_slow.py +0 -0
  189. {datachain-0.1.11 → datachain-0.1.13}/tests/test_query_e2e.py +0 -0
  190. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/__init__.py +0 -0
  191. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/__init__.py +0 -0
  192. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_cached_stream.py +0 -0
  193. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_feature.py +0 -0
  194. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_file.py +0 -0
  195. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_image.py +0 -0
  196. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_parquet.py +0 -0
  197. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_reader.py +0 -0
  198. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_signal_schema.py +0 -0
  199. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_text.py +0 -0
  200. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_udf_signature.py +0 -0
  201. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_utils.py +0 -0
  202. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/lib/test_webdataset.py +0 -0
  203. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/__init__.py +0 -0
  204. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/sqlite/__init__.py +0 -0
  205. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/sqlite/test_utils.py +0 -0
  206. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_array.py +0 -0
  207. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_conditional.py +0 -0
  208. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_path.py +0 -0
  209. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_random.py +0 -0
  210. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_selectable.py +0 -0
  211. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/sql/test_string.py +0 -0
  212. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_asyn.py +0 -0
  213. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_cache.py +0 -0
  214. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_catalog.py +0 -0
  215. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_catalog_loader.py +0 -0
  216. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_cli_parsing.py +0 -0
  217. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_client.py +0 -0
  218. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_client_s3.py +0 -0
  219. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_data_storage.py +0 -0
  220. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_database_engine.py +0 -0
  221. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_dataset.py +0 -0
  222. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_dispatch.py +0 -0
  223. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_fileslice.py +0 -0
  224. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_id_generator.py +0 -0
  225. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_listing.py +0 -0
  226. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_metastore.py +0 -0
  227. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_query_params.py +0 -0
  228. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_serializer.py +0 -0
  229. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_session.py +0 -0
  230. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_storage.py +0 -0
  231. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_udf.py +0 -0
  232. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_utils.py +0 -0
  233. {datachain-0.1.11 → datachain-0.1.13}/tests/unit/test_warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.1.11'
16
- __version_tuple__ = version_tuple = (0, 1, 11)
15
+ __version__ = version = '0.1.13'
16
+ __version_tuple__ = version_tuple = (0, 1, 13)
@@ -492,16 +492,43 @@ class DataChain(DatasetQuery):
492
492
  chain.signals_schema = new_schema
493
493
  return chain
494
494
 
495
- def get_values(self) -> Iterator[list]:
496
- """Iterate over rows, getting feature values and applying reader calls."""
497
- for features in self.iterate():
498
- yield [fr.get_value() if isinstance(fr, Feature) else fr for fr in features]
495
+ def get_values(self, *cols: str) -> Iterator[list]:
496
+ """Iterate over rows, getting feature values and applying reader calls.
497
+ If columns are specified - limit them to specified columns.
498
+ """
499
+ for features in self.iterate(*cols):
500
+ yield [fr.get_value() if isinstance(fr, Feature) else fr for fr in features] # type: ignore[union-attr,call-arg]
501
+
502
+ def get_one_value(self, col: str) -> Iterator:
503
+ for item in self.get_values(col):
504
+ yield item[0]
499
505
 
500
- def iterate(self) -> Iterator[Sequence[Feature]]:
501
- db_signals = self.signals_schema.db_signals()
506
+ def iterate(self, *cols: str) -> Iterator[list[FeatureType]]:
507
+ """Iterate over rows. If columns are specified - limit them to specified
508
+ columns.
509
+ """
510
+ chain = self.select(*cols) if cols else self
511
+
512
+ db_signals = chain.signals_schema.db_signals()
502
513
  with super().select(*db_signals).as_iterable() as rows_iter:
503
514
  for row in rows_iter:
504
- yield self.signals_schema.row_to_features(row, self.session.catalog)
515
+ yield chain.signals_schema.row_to_features(row, chain.session.catalog)
516
+
517
+ def iterate_one(self, col: str) -> Iterator[FeatureType]:
518
+ for item in self.iterate(col):
519
+ yield item[0]
520
+
521
+ def collect(self, *cols: str) -> list[list[FeatureType]]:
522
+ return list(self.iterate(*cols))
523
+
524
+ def collect_one(self, col: str) -> list[FeatureType]:
525
+ return list(self.iterate_one(col))
526
+
527
+ def collect_values(self, *cols: str) -> list[list]:
528
+ return list(self.get_values(*cols))
529
+
530
+ def collect_one_value(self, col: str) -> list:
531
+ return list(self.get_one_value(col))
505
532
 
506
533
  def to_pytorch(self, **kwargs):
507
534
  """Convert to pytorch dataset format."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1078,6 +1078,7 @@ def test_garbage_collect(cloud_test_catalog, from_cli, capsys):
1078
1078
 
1079
1079
 
1080
1080
  def test_get_file_signals(cloud_test_catalog, dogs_dataset):
1081
+ skip_if_not_sqlite()
1081
1082
  catalog = cloud_test_catalog.catalog
1082
1083
  catalog.metastore.update_dataset_version(
1083
1084
  dogs_dataset,
@@ -1105,6 +1106,7 @@ def test_get_file_signals(cloud_test_catalog, dogs_dataset):
1105
1106
 
1106
1107
 
1107
1108
  def test_get_file_signals_no_signals(cloud_test_catalog, dogs_dataset):
1109
+ skip_if_not_sqlite()
1108
1110
  catalog = cloud_test_catalog.catalog
1109
1111
  catalog.metastore.update_dataset_version(
1110
1112
  dogs_dataset,
@@ -1123,6 +1125,7 @@ def test_get_file_signals_no_signals(cloud_test_catalog, dogs_dataset):
1123
1125
 
1124
1126
 
1125
1127
  def test_open_object_no_file_signals(cloud_test_catalog, dogs_dataset):
1128
+ skip_if_not_sqlite()
1126
1129
  catalog = cloud_test_catalog.catalog
1127
1130
  catalog.metastore.update_dataset_version(
1128
1131
  dogs_dataset,
@@ -139,11 +139,11 @@ E2E_STEPS = (
139
139
  },
140
140
  {
141
141
  "command": ("datachain", "ls-datasets"),
142
- "expected": "mnt (v1)\n",
142
+ "expected_in": "mnt (v1)\n",
143
143
  },
144
144
  {
145
145
  "command": ("datachain", "ls-datasets"),
146
- "expected": "mnt (v1)\n",
146
+ "expected_in": "mnt (v1)\n",
147
147
  },
148
148
  {
149
149
  "command": ("datachain", "edit-dataset", "mnt", "--new-name", "mnt-new"),
@@ -151,7 +151,7 @@ E2E_STEPS = (
151
151
  },
152
152
  {
153
153
  "command": ("datachain", "ls-datasets"),
154
- "expected": "mnt-new (v1)\n",
154
+ "expected_in": "mnt-new (v1)\n",
155
155
  },
156
156
  {
157
157
  "command": ("datachain", "rm-dataset", "mnt-new", "--version", "1"),
@@ -159,7 +159,7 @@ E2E_STEPS = (
159
159
  },
160
160
  {
161
161
  "command": ("datachain", "ls-datasets"),
162
- "expected": "",
162
+ "expected_not_in": "mnt-new (v1)\n",
163
163
  },
164
164
  {
165
165
  "command": ("datachain", "gc"),
@@ -193,6 +193,10 @@ def run_step(step):
193
193
  assert sorted(result.stdout.split("\n")) == sorted(
194
194
  step["expected"].lstrip("\n").split("\n")
195
195
  )
196
+ elif step.get("expected_in"):
197
+ assert step["expected_in"].lstrip("\n") in result.stdout
198
+ elif step.get("expected_not_in"):
199
+ assert step["expected_not_in"].lstrip("\n") not in result.stdout
196
200
  else:
197
201
  assert result.stdout == step["expected"].lstrip("\n")
198
202
  if step.get("listing"):
@@ -17,6 +17,7 @@ from datachain.lib.signal_schema import (
17
17
  )
18
18
  from datachain.lib.udf_signature import UdfSignatureError
19
19
  from datachain.lib.utils import DataChainParamsError
20
+ from tests.utils import assert_vector_is_close
20
21
 
21
22
  DF_DATA = {
22
23
  "first_name": ["Alice", "Bob", "Charlie", "David", "Eva"],
@@ -212,8 +213,7 @@ def test_gen(catalog):
212
213
  df = ds.to_pandas()
213
214
 
214
215
  assert df["x__my_name"].tolist() == ["n1", "n2", "n1"]
215
- for actual_sqrt, expected in zip(df["x__sqrt"], [3, 5, 1]):
216
- assert math.isclose(actual_sqrt, math.sqrt(expected), rel_tol=1e-7)
216
+ assert_vector_is_close(df["x__sqrt"], [3, 5, 1], expected_callback=math.sqrt)
217
217
  with pytest.raises(KeyError):
218
218
  df["x__t1__nnn"]
219
219
 
@@ -235,8 +235,7 @@ def test_map(catalog):
235
235
  ).to_pandas()
236
236
 
237
237
  assert df["x__my_name"].tolist() == ["n1_suf", "n2_suf", "n1_suf"]
238
- for actual_sqrt, expected in zip(df["x__sqrt"], [3, 5, 1]):
239
- assert math.isclose(actual_sqrt, math.sqrt(expected), rel_tol=1e-7)
238
+ assert_vector_is_close(df["x__sqrt"], [3, 5, 1], expected_callback=math.sqrt)
240
239
 
241
240
 
242
241
  def test_agg(catalog):
@@ -384,7 +383,7 @@ def test_agg_tuple_result_generator(catalog):
384
383
  assert df["x_1__size"].tolist() == [10, 5]
385
384
 
386
385
 
387
- def test_iterate():
386
+ def test_iterate(catalog):
388
387
  dc = DataChain.from_features(f1=features, num=range(len(features)))
389
388
 
390
389
  n = 0
@@ -402,7 +401,7 @@ def test_iterate():
402
401
  assert n == len(features)
403
402
 
404
403
 
405
- def test_iterate_nested_feature():
404
+ def test_iterate_nested_feature(catalog):
406
405
  dc = DataChain.from_features(sign1=features_nested)
407
406
 
408
407
  for n, sample in enumerate(dc.iterate()):
@@ -413,7 +412,7 @@ def test_iterate_nested_feature():
413
412
  assert nested == features_nested[n]
414
413
 
415
414
 
416
- def test_select_feature():
415
+ def test_select_feature(catalog):
417
416
  dc = DataChain.from_features(my_n=features_nested)
418
417
 
419
418
  samples = dc.select("my_n").iterate()
@@ -440,7 +439,7 @@ def test_select_feature():
440
439
  assert n == len(features_nested)
441
440
 
442
441
 
443
- def test_select_columns_intersection():
442
+ def test_select_columns_intersection(catalog):
444
443
  dc = DataChain.from_features(my_n=features_nested)
445
444
 
446
445
  samples = dc.select("my_n.fr", "my_n.fr.count").iterate()
@@ -453,7 +452,7 @@ def test_select_columns_intersection():
453
452
  assert n == len(features_nested)
454
453
 
455
454
 
456
- def test_select_except():
455
+ def test_select_except(catalog):
457
456
  dc = DataChain.from_features(fr1=features_nested, fr2=features)
458
457
 
459
458
  samples = dc.select_except("fr2").iterate()
@@ -465,7 +464,7 @@ def test_select_except():
465
464
  assert n == len(features_nested)
466
465
 
467
466
 
468
- def test_select_wrong_type():
467
+ def test_select_wrong_type(catalog):
469
468
  dc = DataChain.from_features(fr1=features_nested, fr2=features)
470
469
 
471
470
  with pytest.raises(SignalResolvingTypeError):
@@ -475,7 +474,7 @@ def test_select_wrong_type():
475
474
  list(dc.select_except(features[0]).iterate())
476
475
 
477
476
 
478
- def test_select_except_error():
477
+ def test_select_except_error(catalog):
479
478
  dc = DataChain.from_features(fr1=features_nested, fr2=features)
480
479
 
481
480
  with pytest.raises(SignalResolvingError):
@@ -485,7 +484,7 @@ def test_select_except_error():
485
484
  list(dc.select_except("fr1.label", "file").iterate())
486
485
 
487
486
 
488
- def test_select_restore_from_saving():
487
+ def test_select_restore_from_saving(catalog):
489
488
  dc = DataChain.from_features(my_n=features_nested)
490
489
 
491
490
  name = "test_test_select_save"
@@ -493,13 +492,15 @@ def test_select_restore_from_saving():
493
492
 
494
493
  restored = DataChain.from_dataset(name)
495
494
  n = 0
496
- for sample in restored.iterate():
497
- assert sample[0] == features[n]
495
+ restored_sorted = sorted(restored.iterate(), key=lambda x: x[0].count)
496
+ features_sorted = sorted(features, key=lambda x: x.count)
497
+ for sample in restored_sorted:
498
+ assert sample[0] == features_sorted[n]
498
499
  n += 1
499
500
  assert n == len(features_nested)
500
501
 
501
502
 
502
- def test_chain_of_maps():
503
+ def test_chain_of_maps(catalog):
503
504
  dc = (
504
505
  DataChain.from_features(my_n=features_nested)
505
506
  .map(full_name=lambda my_n: my_n.label + "-" + my_n.fr.nnn, output=str)
@@ -516,7 +517,7 @@ def test_chain_of_maps():
516
517
  assert signal in preserved.signals_schema.values
517
518
 
518
519
 
519
- def test_vector():
520
+ def test_vector(catalog):
520
521
  vector = [3.14, 2.72, 1.62]
521
522
 
522
523
  def get_vector(key) -> list[float]:
@@ -525,10 +526,10 @@ def test_vector():
525
526
  ds = DataChain.from_features(key=[123]).map(emd=get_vector)
526
527
 
527
528
  df = ds.to_pandas()
528
- assert df["emd"].tolist()[0] == vector
529
+ assert_vector_is_close(df["emd"].tolist()[0], vector)
529
530
 
530
531
 
531
- def test_vector_of_vectors():
532
+ def test_vector_of_vectors(catalog):
532
533
  vector = [[3.14, 2.72, 1.62], [1.0, 2.0, 3.0]]
533
534
 
534
535
  def get_vector(key) -> list[list[float]]:
@@ -537,10 +538,13 @@ def test_vector_of_vectors():
537
538
  ds = DataChain.from_features(key=[123]).map(emd_list=get_vector)
538
539
 
539
540
  df = ds.to_pandas()
540
- assert df["emd_list"].tolist()[0] == vector
541
+ actual = df["emd_list"].tolist()[0]
542
+ assert len(actual) == 2
543
+ assert_vector_is_close(actual[0], vector[0])
544
+ assert_vector_is_close(actual[1], vector[1])
541
545
 
542
546
 
543
- def test_unsupported_output_type():
547
+ def test_unsupported_output_type(catalog):
544
548
  vector = [3.14, 2.72, 1.62]
545
549
 
546
550
  def get_vector(key) -> list[np.float64]:
@@ -548,3 +552,45 @@ def test_unsupported_output_type():
548
552
 
549
553
  with pytest.raises(SignalSchemaError):
550
554
  DataChain.from_features(key=[123]).map(emd=get_vector)
555
+
556
+
557
+ def test_collect_one(catalog):
558
+ names = ["f1.jpg", "f1.json", "f1.txt", "f2.jpg", "f2.json"]
559
+ sizes = [1, 2, 3, 4, 5]
560
+ files = [File(name=name, size=size) for name, size in zip(names, sizes)]
561
+
562
+ scores = [0.1, 0.2, 0.3, 0.4, 0.5]
563
+
564
+ chain = DataChain.from_features(file=files, score=scores)
565
+
566
+ assert chain.collect_one("file") == files
567
+ assert chain.collect_one("file.name") == names
568
+ assert chain.collect_one("file.size") == sizes
569
+ assert chain.collect_one("file.source") == [""] * len(names)
570
+ assert_vector_is_close(chain.collect_one("score"), scores)
571
+
572
+ for actual, expected in zip(
573
+ chain.collect("file.size", "score"), [[x, y] for x, y in zip(sizes, scores)]
574
+ ):
575
+ assert len(actual) == 2
576
+ assert actual[0] == expected[0]
577
+ assert math.isclose(actual[1], expected[1], rel_tol=1e-7)
578
+
579
+
580
+ def test_get_values(catalog):
581
+ class _MyFr(Feature):
582
+ num: int
583
+
584
+ def get_value(self):
585
+ return self.num * 2
586
+
587
+ lst1 = [_MyFr(num=i) for i in range(6)]
588
+ lst2 = [_MyFr(num=-i) for i in range(6)]
589
+ chain = DataChain.from_features(fr1=lst1, fr2=lst2)
590
+
591
+ assert list(chain.get_one_value("fr1")) == [i * 2 for i in range(6)]
592
+ assert chain.collect_one_value("fr1") == [i * 2 for i in range(6)]
593
+
594
+ assert chain.collect_values("fr1", "fr2") == [
595
+ [x.get_value(), y.get_value()] for x, y in zip(lst1, lst2)
596
+ ]
@@ -1,3 +1,4 @@
1
+ import math
1
2
  from typing import Optional
2
3
 
3
4
  import pytest
@@ -5,6 +6,7 @@ import pytest
5
6
  from datachain.lib.dc import DataChain, DatasetMergeError
6
7
  from datachain.lib.feature import Feature
7
8
  from datachain.lib.signal_schema import SignalResolvingError
9
+ from datachain.sql.types import Float, String
8
10
 
9
11
 
10
12
  class TestUser(Feature):
@@ -42,11 +44,14 @@ team = [
42
44
  ]
43
45
 
44
46
 
45
- def test_merge_objects():
47
+ def test_merge_objects(catalog):
46
48
  ch1 = DataChain.from_features(emp=employees)
47
49
  ch2 = DataChain.from_features(team=team)
48
50
  ch = ch1.merge(ch2, "emp.person.name", "team.player")
49
51
 
52
+ str_default = String.default_value(catalog.warehouse.db.dialect)
53
+ float_default = Float.default_value(catalog.warehouse.db.dialect)
54
+
50
55
  i = 0
51
56
  j = 0
52
57
  for items in ch.iterate():
@@ -59,16 +64,22 @@ def test_merge_objects():
59
64
 
60
65
  assert isinstance(player, TestTeamMember)
61
66
  if empl.person.name != "Bob":
62
- assert player == team[j]
67
+ assert player.player == team[j].player
68
+ assert player.sport == team[j].sport
69
+ assert math.isclose(player.weight, team[j].weight, rel_tol=1e-7)
70
+ assert math.isclose(player.height, team[j].height, rel_tol=1e-7)
63
71
  j += 1
64
72
  else:
65
- assert player == TestTeamMember()
73
+ assert player.player == str_default
74
+ assert player.sport == str_default
75
+ assert player.weight == float_default
76
+ assert player.height == float_default
66
77
 
67
78
  assert i == len(employees)
68
79
  assert j == len(team)
69
80
 
70
81
 
71
- def test_merge_similar_objects():
82
+ def test_merge_similar_objects(catalog):
72
83
  new_employees = [
73
84
  TestEmployee(id=152, person=TestUser(name="Bob", age=27)),
74
85
  TestEmployee(id=201, person=TestUser(name="Karl", age=18)),
@@ -91,13 +102,15 @@ def test_merge_similar_objects():
91
102
  assert len(list(ch_inner.iterate())) == 2
92
103
 
93
104
 
94
- def test_merge_values():
105
+ def test_merge_values(catalog):
95
106
  order_ids = [11, 22, 33, 44]
96
107
  order_descr = ["water", "water", "paper", "water"]
97
108
 
98
109
  delivery_ids = [11, 44]
99
110
  delivery_time = [24.0, 16.5]
100
111
 
112
+ float_default = Float.default_value(catalog.warehouse.db.dialect)
113
+
101
114
  ch1 = DataChain.from_features(id=order_ids, descr=order_descr)
102
115
  ch2 = DataChain.from_features(id=delivery_ids, time=delivery_time)
103
116
 
@@ -107,7 +120,8 @@ def test_merge_values():
107
120
 
108
121
  i = 0
109
122
  j = 0
110
- for items in ch.iterate():
123
+ sorted_items_list = sorted(ch.iterate(), key=lambda x: x[0])
124
+ for items in sorted_items_list:
111
125
  assert len(items) == 4
112
126
  id, name, right_id, time = items
113
127
 
@@ -115,7 +129,7 @@ def test_merge_values():
115
129
  assert name == order_descr[i]
116
130
  i += 1
117
131
 
118
- if time is not None:
132
+ if time != float_default:
119
133
  assert id == delivery_ids[j]
120
134
  assert time == delivery_time[j]
121
135
  j += 1
@@ -124,7 +138,7 @@ def test_merge_values():
124
138
  assert j == len(delivery_ids)
125
139
 
126
140
 
127
- def test_merge_multi_conditions():
141
+ def test_merge_multi_conditions(catalog):
128
142
  order_ids = [11, 22, 33, 44]
129
143
  order_name = ["water", "water", "paper", "water"]
130
144
  order_descr = ["still water", "still water", "white paper", "sparkling water"]
@@ -151,7 +165,7 @@ def test_merge_multi_conditions():
151
165
  assert success_ids == {11}
152
166
 
153
167
 
154
- def test_merge_errors():
168
+ def test_merge_errors(catalog):
155
169
  ch1 = DataChain.from_features(emp=employees)
156
170
  ch2 = DataChain.from_features(team=team)
157
171
 
@@ -21,7 +21,7 @@ def test_basic():
21
21
  assert vals[-1] == (fib[-1], values[-1])
22
22
 
23
23
 
24
- def test_e2e():
24
+ def test_e2e(catalog):
25
25
  fib = [1, 1, 2, 3, 5, 8]
26
26
  values = ["odd" if num % 2 else "even" for num in fib]
27
27
 
@@ -44,7 +44,7 @@ def test_single_value():
44
44
  assert vals == fib
45
45
 
46
46
 
47
- def test_single_e2e():
47
+ def test_single_e2e(catalog):
48
48
  fib = [1, 1, 2, 3, 5, 8]
49
49
 
50
50
  dc = DataChain.from_features(fib=fib)
@@ -3,6 +3,7 @@ import io
3
3
  import math
4
4
  import os
5
5
  import tarfile
6
+ from collections.abc import Iterable
6
7
  from string import printable
7
8
  from tarfile import DIRTYPE, TarInfo
8
9
  from time import sleep, time
@@ -282,3 +283,17 @@ def assert_row_names(
282
283
  == {r.get("name") for r in preview}
283
284
  == expected_names
284
285
  )
286
+
287
+
288
+ def assert_vector_is_close(
289
+ actual: Iterable,
290
+ expected: Iterable,
291
+ expected_callback: Optional[Callable] = None,
292
+ rel_tol: float = 1e-7,
293
+ ) -> None:
294
+ """Asserts that two vectors of floating point numbers are equivalent, by ensuring
295
+ that all values are close together (within floating point tolerances)."""
296
+ for act, expect in zip(actual, expected):
297
+ if expected_callback:
298
+ expect = expected_callback(expect)
299
+ assert math.isclose(act, expect, rel_tol=rel_tol)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes