datachain 0.3.19__tar.gz → 0.3.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (248) hide show
  1. {datachain-0.3.19/src/datachain.egg-info → datachain-0.3.20}/PKG-INFO +1 -1
  2. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/catalog/catalog.py +19 -49
  3. {datachain-0.3.19 → datachain-0.3.20/src/datachain.egg-info}/PKG-INFO +1 -1
  4. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_catalog.py +20 -34
  5. {datachain-0.3.19 → datachain-0.3.20}/.cruft.json +0 -0
  6. {datachain-0.3.19 → datachain-0.3.20}/.gitattributes +0 -0
  7. {datachain-0.3.19 → datachain-0.3.20}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  8. {datachain-0.3.19 → datachain-0.3.20}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  9. {datachain-0.3.19 → datachain-0.3.20}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  10. {datachain-0.3.19 → datachain-0.3.20}/.github/codecov.yaml +0 -0
  11. {datachain-0.3.19 → datachain-0.3.20}/.github/dependabot.yml +0 -0
  12. {datachain-0.3.19 → datachain-0.3.20}/.github/workflows/benchmarks.yml +0 -0
  13. {datachain-0.3.19 → datachain-0.3.20}/.github/workflows/release.yml +0 -0
  14. {datachain-0.3.19 → datachain-0.3.20}/.github/workflows/tests-studio.yml +0 -0
  15. {datachain-0.3.19 → datachain-0.3.20}/.github/workflows/tests.yml +0 -0
  16. {datachain-0.3.19 → datachain-0.3.20}/.github/workflows/update-template.yaml +0 -0
  17. {datachain-0.3.19 → datachain-0.3.20}/.gitignore +0 -0
  18. {datachain-0.3.19 → datachain-0.3.20}/.pre-commit-config.yaml +0 -0
  19. {datachain-0.3.19 → datachain-0.3.20}/CODE_OF_CONDUCT.rst +0 -0
  20. {datachain-0.3.19 → datachain-0.3.20}/CONTRIBUTING.rst +0 -0
  21. {datachain-0.3.19 → datachain-0.3.20}/LICENSE +0 -0
  22. {datachain-0.3.19 → datachain-0.3.20}/README.rst +0 -0
  23. {datachain-0.3.19 → datachain-0.3.20}/docs/assets/captioned_cartoons.png +0 -0
  24. {datachain-0.3.19 → datachain-0.3.20}/docs/assets/datachain-white.svg +0 -0
  25. {datachain-0.3.19 → datachain-0.3.20}/docs/assets/datachain.svg +0 -0
  26. {datachain-0.3.19 → datachain-0.3.20}/docs/assets/flowchart.png +0 -0
  27. {datachain-0.3.19 → datachain-0.3.20}/docs/index.md +0 -0
  28. {datachain-0.3.19 → datachain-0.3.20}/docs/references/datachain.md +0 -0
  29. {datachain-0.3.19 → datachain-0.3.20}/docs/references/datatype.md +0 -0
  30. {datachain-0.3.19 → datachain-0.3.20}/docs/references/file.md +0 -0
  31. {datachain-0.3.19 → datachain-0.3.20}/docs/references/index.md +0 -0
  32. {datachain-0.3.19 → datachain-0.3.20}/docs/references/sql.md +0 -0
  33. {datachain-0.3.19 → datachain-0.3.20}/docs/references/torch.md +0 -0
  34. {datachain-0.3.19 → datachain-0.3.20}/docs/references/udf.md +0 -0
  35. {datachain-0.3.19 → datachain-0.3.20}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  36. {datachain-0.3.19 → datachain-0.3.20}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  37. {datachain-0.3.19 → datachain-0.3.20}/examples/computer_vision/openimage-detect.py +0 -0
  38. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/common_sql_functions.py +0 -0
  39. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/json-csv-reader.py +0 -0
  40. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/torch-loader.py +0 -0
  41. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/udfs/parallel.py +0 -0
  42. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/udfs/simple.py +0 -0
  43. {datachain-0.3.19 → datachain-0.3.20}/examples/get_started/udfs/stateful.py +0 -0
  44. {datachain-0.3.19 → datachain-0.3.20}/examples/llm_and_nlp/claude-query.py +0 -0
  45. {datachain-0.3.19 → datachain-0.3.20}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  46. {datachain-0.3.19 → datachain-0.3.20}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  47. {datachain-0.3.19 → datachain-0.3.20}/examples/multimodal/clip_inference.py +0 -0
  48. {datachain-0.3.19 → datachain-0.3.20}/examples/multimodal/hf_pipeline.py +0 -0
  49. {datachain-0.3.19 → datachain-0.3.20}/examples/multimodal/openai_image_desc_lib.py +0 -0
  50. {datachain-0.3.19 → datachain-0.3.20}/examples/multimodal/wds.py +0 -0
  51. {datachain-0.3.19 → datachain-0.3.20}/examples/multimodal/wds_filtered.py +0 -0
  52. {datachain-0.3.19 → datachain-0.3.20}/mkdocs.yml +0 -0
  53. {datachain-0.3.19 → datachain-0.3.20}/noxfile.py +0 -0
  54. {datachain-0.3.19 → datachain-0.3.20}/pyproject.toml +0 -0
  55. {datachain-0.3.19 → datachain-0.3.20}/setup.cfg +0 -0
  56. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/__init__.py +0 -0
  57. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/__main__.py +0 -0
  58. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/asyn.py +0 -0
  59. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/cache.py +0 -0
  60. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/catalog/__init__.py +0 -0
  61. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/catalog/datasource.py +0 -0
  62. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/catalog/loader.py +0 -0
  63. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/cli.py +0 -0
  64. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/cli_utils.py +0 -0
  65. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/__init__.py +0 -0
  66. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/azure.py +0 -0
  67. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/fileslice.py +0 -0
  68. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/fsspec.py +0 -0
  69. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/gcs.py +0 -0
  70. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/hf.py +0 -0
  71. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/local.py +0 -0
  72. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/client/s3.py +0 -0
  73. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/config.py +0 -0
  74. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/__init__.py +0 -0
  75. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/db_engine.py +0 -0
  76. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/id_generator.py +0 -0
  77. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/job.py +0 -0
  78. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/metastore.py +0 -0
  79. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/schema.py +0 -0
  80. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/serializer.py +0 -0
  81. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/sqlite.py +0 -0
  82. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/data_storage/warehouse.py +0 -0
  83. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/dataset.py +0 -0
  84. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/error.py +0 -0
  85. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/job.py +0 -0
  86. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/__init__.py +0 -0
  87. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/arrow.py +0 -0
  88. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/clip.py +0 -0
  89. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/__init__.py +0 -0
  90. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/flatten.py +0 -0
  91. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/python_to_sql.py +0 -0
  92. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/sql_to_python.py +0 -0
  93. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/unflatten.py +0 -0
  94. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  95. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/data_model.py +0 -0
  96. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/dataset_info.py +0 -0
  97. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/dc.py +0 -0
  98. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/file.py +0 -0
  99. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/hf.py +0 -0
  100. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/image.py +0 -0
  101. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/listing.py +0 -0
  102. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/listing_info.py +0 -0
  103. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/meta_formats.py +0 -0
  104. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/model_store.py +0 -0
  105. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/pytorch.py +0 -0
  106. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/settings.py +0 -0
  107. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/signal_schema.py +0 -0
  108. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/tar.py +0 -0
  109. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/text.py +0 -0
  110. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/udf.py +0 -0
  111. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/udf_signature.py +0 -0
  112. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/utils.py +0 -0
  113. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/vfile.py +0 -0
  114. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/webdataset.py +0 -0
  115. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/lib/webdataset_laion.py +0 -0
  116. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/listing.py +0 -0
  117. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/node.py +0 -0
  118. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/nodes_fetcher.py +0 -0
  119. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/nodes_thread_pool.py +0 -0
  120. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/progress.py +0 -0
  121. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/py.typed +0 -0
  122. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/__init__.py +0 -0
  123. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/batch.py +0 -0
  124. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/dataset.py +0 -0
  125. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/dispatch.py +0 -0
  126. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/metrics.py +0 -0
  127. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/params.py +0 -0
  128. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/queue.py +0 -0
  129. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/schema.py +0 -0
  130. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/session.py +0 -0
  131. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/query/udf.py +0 -0
  132. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/remote/__init__.py +0 -0
  133. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/remote/studio.py +0 -0
  134. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/__init__.py +0 -0
  135. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/default/__init__.py +0 -0
  136. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/default/base.py +0 -0
  137. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/__init__.py +0 -0
  138. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/array.py +0 -0
  139. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/conditional.py +0 -0
  140. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/path.py +0 -0
  141. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/random.py +0 -0
  142. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/functions/string.py +0 -0
  143. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/selectable.py +0 -0
  144. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/sqlite/__init__.py +0 -0
  145. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/sqlite/base.py +0 -0
  146. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/sqlite/types.py +0 -0
  147. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/sqlite/vector.py +0 -0
  148. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/types.py +0 -0
  149. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/sql/utils.py +0 -0
  150. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/storage.py +0 -0
  151. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/telemetry.py +0 -0
  152. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/torch/__init__.py +0 -0
  153. {datachain-0.3.19 → datachain-0.3.20}/src/datachain/utils.py +0 -0
  154. {datachain-0.3.19 → datachain-0.3.20}/src/datachain.egg-info/SOURCES.txt +0 -0
  155. {datachain-0.3.19 → datachain-0.3.20}/src/datachain.egg-info/dependency_links.txt +0 -0
  156. {datachain-0.3.19 → datachain-0.3.20}/src/datachain.egg-info/entry_points.txt +0 -0
  157. {datachain-0.3.19 → datachain-0.3.20}/src/datachain.egg-info/requires.txt +0 -0
  158. {datachain-0.3.19 → datachain-0.3.20}/src/datachain.egg-info/top_level.txt +0 -0
  159. {datachain-0.3.19 → datachain-0.3.20}/tests/__init__.py +0 -0
  160. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/__init__.py +0 -0
  161. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/conftest.py +0 -0
  162. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  163. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/datasets/.dvc/config +0 -0
  164. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/datasets/.gitignore +0 -0
  165. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  166. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/test_datachain.py +0 -0
  167. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/test_ls.py +0 -0
  168. {datachain-0.3.19 → datachain-0.3.20}/tests/benchmarks/test_version.py +0 -0
  169. {datachain-0.3.19 → datachain-0.3.20}/tests/conftest.py +0 -0
  170. {datachain-0.3.19 → datachain-0.3.20}/tests/data.py +0 -0
  171. {datachain-0.3.19 → datachain-0.3.20}/tests/examples/__init__.py +0 -0
  172. {datachain-0.3.19 → datachain-0.3.20}/tests/examples/test_examples.py +0 -0
  173. {datachain-0.3.19 → datachain-0.3.20}/tests/examples/test_wds_e2e.py +0 -0
  174. {datachain-0.3.19 → datachain-0.3.20}/tests/examples/wds_data.py +0 -0
  175. {datachain-0.3.19 → datachain-0.3.20}/tests/func/__init__.py +0 -0
  176. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_client.py +0 -0
  177. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_datachain.py +0 -0
  178. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_dataset_query.py +0 -0
  179. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_datasets.py +0 -0
  180. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_feature_pickling.py +0 -0
  181. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_listing.py +0 -0
  182. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_ls.py +0 -0
  183. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_meta_formats.py +0 -0
  184. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_metrics.py +0 -0
  185. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_pull.py +0 -0
  186. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_pytorch.py +0 -0
  187. {datachain-0.3.19 → datachain-0.3.20}/tests/func/test_query.py +0 -0
  188. {datachain-0.3.19 → datachain-0.3.20}/tests/scripts/feature_class.py +0 -0
  189. {datachain-0.3.19 → datachain-0.3.20}/tests/scripts/feature_class_parallel.py +0 -0
  190. {datachain-0.3.19 → datachain-0.3.20}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  191. {datachain-0.3.19 → datachain-0.3.20}/tests/scripts/name_len_slow.py +0 -0
  192. {datachain-0.3.19 → datachain-0.3.20}/tests/test_cli_e2e.py +0 -0
  193. {datachain-0.3.19 → datachain-0.3.20}/tests/test_query_e2e.py +0 -0
  194. {datachain-0.3.19 → datachain-0.3.20}/tests/test_telemetry.py +0 -0
  195. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/__init__.py +0 -0
  196. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/__init__.py +0 -0
  197. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/conftest.py +0 -0
  198. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_arrow.py +0 -0
  199. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_clip.py +0 -0
  200. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_datachain.py +0 -0
  201. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  202. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_datachain_merge.py +0 -0
  203. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_feature.py +0 -0
  204. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_feature_utils.py +0 -0
  205. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_file.py +0 -0
  206. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_hf.py +0 -0
  207. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_image.py +0 -0
  208. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_schema.py +0 -0
  209. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_signal_schema.py +0 -0
  210. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_sql_to_python.py +0 -0
  211. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_text.py +0 -0
  212. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_udf_signature.py +0 -0
  213. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_utils.py +0 -0
  214. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/lib/test_webdataset.py +0 -0
  215. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/__init__.py +0 -0
  216. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/sqlite/__init__.py +0 -0
  217. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/sqlite/test_utils.py +0 -0
  218. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_array.py +0 -0
  219. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_conditional.py +0 -0
  220. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_path.py +0 -0
  221. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_random.py +0 -0
  222. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_selectable.py +0 -0
  223. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/sql/test_string.py +0 -0
  224. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_asyn.py +0 -0
  225. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_cache.py +0 -0
  226. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_catalog.py +0 -0
  227. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_catalog_loader.py +0 -0
  228. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_cli_parsing.py +0 -0
  229. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_client.py +0 -0
  230. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_client_s3.py +0 -0
  231. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_data_storage.py +0 -0
  232. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_database_engine.py +0 -0
  233. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_dataset.py +0 -0
  234. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_dispatch.py +0 -0
  235. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_fileslice.py +0 -0
  236. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_id_generator.py +0 -0
  237. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_listing.py +0 -0
  238. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_metastore.py +0 -0
  239. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_module_exports.py +0 -0
  240. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_query.py +0 -0
  241. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_query_metrics.py +0 -0
  242. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_query_params.py +0 -0
  243. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_serializer.py +0 -0
  244. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_session.py +0 -0
  245. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_storage.py +0 -0
  246. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_utils.py +0 -0
  247. {datachain-0.3.19 → datachain-0.3.20}/tests/unit/test_warehouse.py +0 -0
  248. {datachain-0.3.19 → datachain-0.3.20}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.19
3
+ Version: 0.3.20
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -79,6 +79,7 @@ if TYPE_CHECKING:
79
79
  )
80
80
  from datachain.dataset import DatasetVersion
81
81
  from datachain.job import Job
82
+ from datachain.lib.file import File
82
83
 
83
84
  logger = logging.getLogger("datachain")
84
85
 
@@ -1399,65 +1400,34 @@ class Catalog:
1399
1400
  dataset = self.get_dataset(name)
1400
1401
  return self.update_dataset(dataset, **update_data)
1401
1402
 
1402
- def get_file_signals(
1403
- self, dataset_name: str, dataset_version: int, row: RowDict
1404
- ) -> Optional[RowDict]:
1403
+ def get_file_from_row(
1404
+ self, dataset_name: str, dataset_version: int, row: RowDict, signal_name: str
1405
+ ) -> "File":
1405
1406
  """
1406
- Function that returns file signals from dataset row.
1407
- Note that signal names are without prefix, so if there was 'laion__file__source'
1408
- in original row, result will have just 'source'
1409
- Example output:
1410
- {
1411
- "source": "s3://ldb-public",
1412
- "path": "animals/dogs/dog.jpg",
1413
- ...
1414
- }
1407
+ Function that returns specific file signal from dataset row by name.
1415
1408
  """
1416
1409
  from datachain.lib.file import File
1417
1410
  from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
1418
1411
 
1419
1412
  version = self.get_dataset(dataset_name).get_version(dataset_version)
1420
-
1421
- file_signals_values = RowDict()
1422
-
1423
1413
  schema = SignalSchema.deserialize(version.feature_schema)
1424
- for file_signals in schema.get_signals(File):
1425
- prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
1426
- file_signals_values[file_signals] = {
1427
- c_name.removeprefix(prefix): c_value
1428
- for c_name, c_value in row.items()
1429
- if c_name.startswith(prefix)
1430
- and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
1431
- }
1432
1414
 
1433
- if not file_signals_values:
1434
- return None
1435
-
1436
- # there can be multiple file signals in a schema, but taking the first
1437
- # one for now. In future we might add ability to choose from which one
1438
- # to open object
1439
- return next(iter(file_signals_values.values()))
1440
-
1441
- def open_object(
1442
- self,
1443
- dataset_name: str,
1444
- dataset_version: int,
1445
- row: RowDict,
1446
- use_cache: bool = True,
1447
- **config: Any,
1448
- ):
1449
- from datachain.lib.file import File
1415
+ if signal_name not in schema.get_signals(File):
1416
+ raise RuntimeError(
1417
+ f"File signal with path {signal_name} not found in ",
1418
+ f"dataset {dataset_name}@v{dataset_version} signals schema",
1419
+ )
1450
1420
 
1451
- file_signals = self.get_file_signals(dataset_name, dataset_version, row)
1452
- if not file_signals:
1453
- raise RuntimeError("Cannot open object without file signals")
1421
+ prefix = signal_name.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
1422
+ file_signals = {
1423
+ c_name.removeprefix(prefix): c_value
1424
+ for c_name, c_value in row.items()
1425
+ if c_name.startswith(prefix)
1426
+ and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
1427
+ and c_name.removeprefix(prefix) in File.model_fields
1428
+ }
1454
1429
 
1455
- config = config or self.client_config
1456
- client = self.get_client(file_signals["source"], **config)
1457
- return client.open_object(
1458
- File._from_row(file_signals),
1459
- use_cache=use_cache,
1460
- )
1430
+ return File(**file_signals)
1461
1431
 
1462
1432
  def ls(
1463
1433
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.19
3
+ Version: 0.3.20
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -847,7 +847,7 @@ def test_garbage_collect(cloud_test_catalog, from_cli, capsys):
847
847
  assert catalog.get_temp_table_names() == []
848
848
 
849
849
 
850
- def test_get_file_signals(cloud_test_catalog, dogs_dataset):
850
+ def test_get_file_from_row(cloud_test_catalog, dogs_dataset):
851
851
  catalog = cloud_test_catalog.catalog
852
852
  catalog.metastore.update_dataset_version(
853
853
  dogs_dataset,
@@ -863,18 +863,22 @@ def test_get_file_signals(cloud_test_catalog, dogs_dataset):
863
863
  "name": "Jon",
864
864
  "age": 25,
865
865
  "f1__source": "s3://first_bucket",
866
- "f1__name": "image1.jpg",
866
+ "f1__path": "image1.jpg",
867
867
  "f2__source": "s3://second_bucket",
868
- "f2__name": "image2.jpg",
868
+ "f2__path": "image2.jpg",
869
869
  }
870
870
 
871
- assert catalog.get_file_signals(dogs_dataset.name, 1, row) == {
872
- "source": "s3://first_bucket",
873
- "name": "image1.jpg",
874
- }
871
+ assert catalog.get_file_from_row(dogs_dataset.name, 1, row, "f1") == File(
872
+ source="s3://first_bucket",
873
+ path="image1.jpg",
874
+ )
875
+ assert catalog.get_file_from_row(dogs_dataset.name, 1, row, "f2") == File(
876
+ source="s3://second_bucket",
877
+ path="image2.jpg",
878
+ )
875
879
 
876
880
 
877
- def test_get_file_signals_with_custom_types(cloud_test_catalog, dogs_dataset):
881
+ def test_get_file_from_row_with_custom_types(cloud_test_catalog, dogs_dataset):
878
882
  catalog = cloud_test_catalog.catalog
879
883
  catalog.metastore.update_dataset_version(
880
884
  dogs_dataset,
@@ -885,7 +889,7 @@ def test_get_file_signals_with_custom_types(cloud_test_catalog, dogs_dataset):
885
889
  "f1": "File@v1",
886
890
  "f2": "File@v1",
887
891
  "_custom_types": {
888
- "File@v1": {"source": "str", "name": "str"},
892
+ "File@v1": {"source": "str", "path": "str"},
889
893
  },
890
894
  },
891
895
  )
@@ -893,36 +897,18 @@ def test_get_file_signals_with_custom_types(cloud_test_catalog, dogs_dataset):
893
897
  "name": "Jon",
894
898
  "age": 25,
895
899
  "f1__source": "s3://first_bucket",
896
- "f1__name": "image1.jpg",
900
+ "f1__path": "image1.jpg",
897
901
  "f2__source": "s3://second_bucket",
898
- "f2__name": "image2.jpg",
899
- }
900
-
901
- assert catalog.get_file_signals(dogs_dataset.name, 1, row) == {
902
- "source": "s3://first_bucket",
903
- "name": "image1.jpg",
902
+ "f2__path": "image2.jpg",
904
903
  }
905
904
 
906
-
907
- def test_get_file_signals_no_signals(cloud_test_catalog, dogs_dataset):
908
- catalog = cloud_test_catalog.catalog
909
- catalog.metastore.update_dataset_version(
910
- dogs_dataset,
911
- 1,
912
- feature_schema={
913
- "name": "str",
914
- "age": "str",
915
- },
905
+ assert catalog.get_file_from_row(dogs_dataset.name, 1, row, "f1") == File(
906
+ source="s3://first_bucket",
907
+ path="image1.jpg",
916
908
  )
917
- row = {
918
- "name": "Jon",
919
- "age": 25,
920
- }
921
-
922
- assert catalog.get_file_signals(dogs_dataset.name, 1, row) is None
923
909
 
924
910
 
925
- def test_open_object_no_file_signals(cloud_test_catalog, dogs_dataset):
911
+ def test_get_file_from_row_no_signals(cloud_test_catalog, dogs_dataset):
926
912
  catalog = cloud_test_catalog.catalog
927
913
  catalog.metastore.update_dataset_version(
928
914
  dogs_dataset,
@@ -938,4 +924,4 @@ def test_open_object_no_file_signals(cloud_test_catalog, dogs_dataset):
938
924
  }
939
925
 
940
926
  with pytest.raises(RuntimeError):
941
- assert catalog.open_object(dogs_dataset.name, 1, row)
927
+ assert catalog.get_file_from_row(dogs_dataset.name, 1, row, "missing")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes