datachain 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (260) hide show
  1. {datachain-0.2.2/src/datachain.egg-info → datachain-0.2.4}/PKG-INFO +1 -1
  2. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/catalog/catalog.py +13 -4
  3. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/signal_schema.py +1 -45
  4. {datachain-0.2.2 → datachain-0.2.4/src/datachain.egg-info}/PKG-INFO +1 -1
  5. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_datachain.py +1 -1
  6. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_datachain_bootstrap.py +3 -3
  7. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_signal_schema.py +0 -30
  8. {datachain-0.2.2 → datachain-0.2.4}/.cruft.json +0 -0
  9. {datachain-0.2.2 → datachain-0.2.4}/.gitattributes +0 -0
  10. {datachain-0.2.2 → datachain-0.2.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  11. {datachain-0.2.2 → datachain-0.2.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  12. {datachain-0.2.2 → datachain-0.2.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  13. {datachain-0.2.2 → datachain-0.2.4}/.github/codecov.yaml +0 -0
  14. {datachain-0.2.2 → datachain-0.2.4}/.github/dependabot.yml +0 -0
  15. {datachain-0.2.2 → datachain-0.2.4}/.github/workflows/benchmarks.yml +0 -0
  16. {datachain-0.2.2 → datachain-0.2.4}/.github/workflows/release.yml +0 -0
  17. {datachain-0.2.2 → datachain-0.2.4}/.github/workflows/tests.yml +0 -0
  18. {datachain-0.2.2 → datachain-0.2.4}/.github/workflows/update-template.yaml +0 -0
  19. {datachain-0.2.2 → datachain-0.2.4}/.gitignore +0 -0
  20. {datachain-0.2.2 → datachain-0.2.4}/.pre-commit-config.yaml +0 -0
  21. {datachain-0.2.2 → datachain-0.2.4}/.reuse/dep5 +0 -0
  22. {datachain-0.2.2 → datachain-0.2.4}/CODE_OF_CONDUCT.rst +0 -0
  23. {datachain-0.2.2 → datachain-0.2.4}/CONTRIBUTING.rst +0 -0
  24. {datachain-0.2.2 → datachain-0.2.4}/LICENSE +0 -0
  25. {datachain-0.2.2 → datachain-0.2.4}/LICENSES/Apache-2.0.txt +0 -0
  26. {datachain-0.2.2 → datachain-0.2.4}/LICENSES/BSD-3-Clause.txt +0 -0
  27. {datachain-0.2.2 → datachain-0.2.4}/LICENSES/Python-2.0.txt +0 -0
  28. {datachain-0.2.2 → datachain-0.2.4}/README.rst +0 -0
  29. {datachain-0.2.2 → datachain-0.2.4}/docs/assets/datachain.png +0 -0
  30. {datachain-0.2.2 → datachain-0.2.4}/docs/index.md +0 -0
  31. {datachain-0.2.2 → datachain-0.2.4}/docs/references/catalog.md +0 -0
  32. {datachain-0.2.2 → datachain-0.2.4}/docs/references/datachain.md +0 -0
  33. {datachain-0.2.2 → datachain-0.2.4}/docs/tutorials/cv_intro.md +0 -0
  34. {datachain-0.2.2 → datachain-0.2.4}/docs/tutorials/udfs.md +0 -0
  35. {datachain-0.2.2 → datachain-0.2.4}/examples/blip2_image_desc_lib.py +0 -0
  36. {datachain-0.2.2 → datachain-0.2.4}/examples/clip.py +0 -0
  37. {datachain-0.2.2 → datachain-0.2.4}/examples/common_sql_functions.py +0 -0
  38. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
  39. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
  40. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
  41. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/README.md +0 -0
  42. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
  43. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
  44. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
  45. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
  46. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
  47. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
  48. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
  49. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
  50. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
  51. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
  52. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
  53. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
  54. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
  55. {datachain-0.2.2 → datachain-0.2.4}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
  56. {datachain-0.2.2 → datachain-0.2.4}/examples/get_started/clip_fine_tuning.ipynb +0 -0
  57. {datachain-0.2.2 → datachain-0.2.4}/examples/hf_pipeline.py +0 -0
  58. {datachain-0.2.2 → datachain-0.2.4}/examples/iptc_exif_xmp_lib.py +0 -0
  59. {datachain-0.2.2 → datachain-0.2.4}/examples/json-csv-reader.py +0 -0
  60. {datachain-0.2.2 → datachain-0.2.4}/examples/llava2_image_desc_lib.py +0 -0
  61. {datachain-0.2.2 → datachain-0.2.4}/examples/llm-claude-aggregate-query.py +0 -0
  62. {datachain-0.2.2 → datachain-0.2.4}/examples/llm-claude-simple-query.py +0 -0
  63. {datachain-0.2.2 → datachain-0.2.4}/examples/llm-claude.py +0 -0
  64. {datachain-0.2.2 → datachain-0.2.4}/examples/loader.py +0 -0
  65. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/README +0 -0
  66. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/distance_to_query.py +0 -0
  67. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/llm_chat.py +0 -0
  68. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/requirements.txt +0 -0
  69. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/single_query.py +0 -0
  70. {datachain-0.2.2 → datachain-0.2.4}/examples/neurips/text_loaders.py +0 -0
  71. {datachain-0.2.2 → datachain-0.2.4}/examples/openai_image_desc_lib.py +0 -0
  72. {datachain-0.2.2 → datachain-0.2.4}/examples/openimage-detect.py +0 -0
  73. {datachain-0.2.2 → datachain-0.2.4}/examples/pose_detection.py +0 -0
  74. {datachain-0.2.2 → datachain-0.2.4}/examples/torch-loader.py +0 -0
  75. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/batching.py +0 -0
  76. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/image_transformation.py +0 -0
  77. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/parallel.py +0 -0
  78. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/simple.py +0 -0
  79. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/stateful.py +0 -0
  80. {datachain-0.2.2 → datachain-0.2.4}/examples/udfs/stateful_similarity.py +0 -0
  81. {datachain-0.2.2 → datachain-0.2.4}/examples/unstructured-text.py +0 -0
  82. {datachain-0.2.2 → datachain-0.2.4}/examples/wds.py +0 -0
  83. {datachain-0.2.2 → datachain-0.2.4}/examples/wds_filtered.py +0 -0
  84. {datachain-0.2.2 → datachain-0.2.4}/examples/zalando/zalando_clip.py +0 -0
  85. {datachain-0.2.2 → datachain-0.2.4}/examples/zalando/zalando_dir_as_class.py +0 -0
  86. {datachain-0.2.2 → datachain-0.2.4}/examples/zalando/zalando_splits_and_classes_ds.py +0 -0
  87. {datachain-0.2.2 → datachain-0.2.4}/examples/zalando/zalando_splits_and_classes_output.py +0 -0
  88. {datachain-0.2.2 → datachain-0.2.4}/mkdocs.yml +0 -0
  89. {datachain-0.2.2 → datachain-0.2.4}/noxfile.py +0 -0
  90. {datachain-0.2.2 → datachain-0.2.4}/pyproject.toml +0 -0
  91. {datachain-0.2.2 → datachain-0.2.4}/setup.cfg +0 -0
  92. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/__init__.py +0 -0
  93. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/__main__.py +0 -0
  94. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/asyn.py +0 -0
  95. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/cache.py +0 -0
  96. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/catalog/__init__.py +0 -0
  97. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/catalog/datasource.py +0 -0
  98. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/catalog/loader.py +0 -0
  99. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/catalog/subclass.py +0 -0
  100. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/cli.py +0 -0
  101. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/cli_utils.py +0 -0
  102. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/__init__.py +0 -0
  103. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/azure.py +0 -0
  104. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/fileslice.py +0 -0
  105. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/fsspec.py +0 -0
  106. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/gcs.py +0 -0
  107. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/local.py +0 -0
  108. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/client/s3.py +0 -0
  109. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/config.py +0 -0
  110. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/__init__.py +0 -0
  111. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/db_engine.py +0 -0
  112. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/id_generator.py +0 -0
  113. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/job.py +0 -0
  114. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/metastore.py +0 -0
  115. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/schema.py +0 -0
  116. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/serializer.py +0 -0
  117. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/sqlite.py +0 -0
  118. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/data_storage/warehouse.py +0 -0
  119. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/dataset.py +0 -0
  120. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/error.py +0 -0
  121. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/__init__.py +0 -0
  122. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/arrow.py +0 -0
  123. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/cached_stream.py +0 -0
  124. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/claude.py +0 -0
  125. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/clip.py +0 -0
  126. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/dc.py +0 -0
  127. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/feature.py +0 -0
  128. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/feature_registry.py +0 -0
  129. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/feature_utils.py +0 -0
  130. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/file.py +0 -0
  131. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/gpt4_vision.py +0 -0
  132. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/hf_image_to_text.py +0 -0
  133. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/hf_pipeline.py +0 -0
  134. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/image.py +0 -0
  135. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/image_transform.py +0 -0
  136. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/iptc_exif_xmp.py +0 -0
  137. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/meta_formats.py +0 -0
  138. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/pytorch.py +0 -0
  139. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/settings.py +0 -0
  140. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/text.py +0 -0
  141. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/udf.py +0 -0
  142. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/udf_signature.py +0 -0
  143. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/unstructured.py +0 -0
  144. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/utils.py +0 -0
  145. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/vfile.py +0 -0
  146. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/webdataset.py +0 -0
  147. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/lib/webdataset_laion.py +0 -0
  148. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/listing.py +0 -0
  149. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/node.py +0 -0
  150. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/nodes_fetcher.py +0 -0
  151. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/nodes_thread_pool.py +0 -0
  152. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/progress.py +0 -0
  153. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/py.typed +0 -0
  154. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/__init__.py +0 -0
  155. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/batch.py +0 -0
  156. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/builtins.py +0 -0
  157. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/dataset.py +0 -0
  158. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/dispatch.py +0 -0
  159. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/metrics.py +0 -0
  160. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/params.py +0 -0
  161. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/schema.py +0 -0
  162. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/session.py +0 -0
  163. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/query/udf.py +0 -0
  164. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/remote/__init__.py +0 -0
  165. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/remote/studio.py +0 -0
  166. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/__init__.py +0 -0
  167. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/default/__init__.py +0 -0
  168. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/default/base.py +0 -0
  169. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/__init__.py +0 -0
  170. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/array.py +0 -0
  171. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/conditional.py +0 -0
  172. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/path.py +0 -0
  173. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/random.py +0 -0
  174. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/functions/string.py +0 -0
  175. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/selectable.py +0 -0
  176. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  177. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/sqlite/base.py +0 -0
  178. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/sqlite/types.py +0 -0
  179. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/sqlite/vector.py +0 -0
  180. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/types.py +0 -0
  181. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/sql/utils.py +0 -0
  182. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/storage.py +0 -0
  183. {datachain-0.2.2 → datachain-0.2.4}/src/datachain/utils.py +0 -0
  184. {datachain-0.2.2 → datachain-0.2.4}/src/datachain.egg-info/SOURCES.txt +0 -0
  185. {datachain-0.2.2 → datachain-0.2.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  186. {datachain-0.2.2 → datachain-0.2.4}/src/datachain.egg-info/entry_points.txt +0 -0
  187. {datachain-0.2.2 → datachain-0.2.4}/src/datachain.egg-info/requires.txt +0 -0
  188. {datachain-0.2.2 → datachain-0.2.4}/src/datachain.egg-info/top_level.txt +0 -0
  189. {datachain-0.2.2 → datachain-0.2.4}/tests/__init__.py +0 -0
  190. {datachain-0.2.2 → datachain-0.2.4}/tests/benchmarks/__init__.py +0 -0
  191. {datachain-0.2.2 → datachain-0.2.4}/tests/benchmarks/conftest.py +0 -0
  192. {datachain-0.2.2 → datachain-0.2.4}/tests/benchmarks/test_ls.py +0 -0
  193. {datachain-0.2.2 → datachain-0.2.4}/tests/benchmarks/test_version.py +0 -0
  194. {datachain-0.2.2 → datachain-0.2.4}/tests/conftest.py +0 -0
  195. {datachain-0.2.2 → datachain-0.2.4}/tests/data.py +0 -0
  196. {datachain-0.2.2 → datachain-0.2.4}/tests/examples/__init__.py +0 -0
  197. {datachain-0.2.2 → datachain-0.2.4}/tests/examples/test_wds_e2e.py +0 -0
  198. {datachain-0.2.2 → datachain-0.2.4}/tests/examples/wds_data.py +0 -0
  199. {datachain-0.2.2 → datachain-0.2.4}/tests/func/__init__.py +0 -0
  200. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_catalog.py +0 -0
  201. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_client.py +0 -0
  202. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_datachain.py +0 -0
  203. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_dataset_query.py +0 -0
  204. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_datasets.py +0 -0
  205. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_ls.py +0 -0
  206. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_pull.py +0 -0
  207. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_pytorch.py +0 -0
  208. {datachain-0.2.2 → datachain-0.2.4}/tests/func/test_query.py +0 -0
  209. {datachain-0.2.2 → datachain-0.2.4}/tests/scripts/feature_class.py +0 -0
  210. {datachain-0.2.2 → datachain-0.2.4}/tests/scripts/feature_class_parallel.py +0 -0
  211. {datachain-0.2.2 → datachain-0.2.4}/tests/scripts/name_len_normal.py +0 -0
  212. {datachain-0.2.2 → datachain-0.2.4}/tests/scripts/name_len_slow.py +0 -0
  213. {datachain-0.2.2 → datachain-0.2.4}/tests/test_cli_e2e.py +0 -0
  214. {datachain-0.2.2 → datachain-0.2.4}/tests/test_query_e2e.py +0 -0
  215. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/__init__.py +0 -0
  216. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/__init__.py +0 -0
  217. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_arrow.py +0 -0
  218. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_clip.py +0 -0
  219. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  220. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_feature.py +0 -0
  221. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_feature_utils.py +0 -0
  222. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_file.py +0 -0
  223. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_image.py +0 -0
  224. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_text.py +0 -0
  225. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_udf_signature.py +0 -0
  226. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_utils.py +0 -0
  227. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/lib/test_webdataset.py +0 -0
  228. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/__init__.py +0 -0
  229. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  230. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  231. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_array.py +0 -0
  232. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_conditional.py +0 -0
  233. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_path.py +0 -0
  234. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_random.py +0 -0
  235. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_selectable.py +0 -0
  236. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/sql/test_string.py +0 -0
  237. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_asyn.py +0 -0
  238. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_cache.py +0 -0
  239. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_catalog.py +0 -0
  240. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_catalog_loader.py +0 -0
  241. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_cli_parsing.py +0 -0
  242. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_client.py +0 -0
  243. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_client_s3.py +0 -0
  244. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_data_storage.py +0 -0
  245. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_database_engine.py +0 -0
  246. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_dataset.py +0 -0
  247. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_dispatch.py +0 -0
  248. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_fileslice.py +0 -0
  249. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_id_generator.py +0 -0
  250. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_listing.py +0 -0
  251. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_metastore.py +0 -0
  252. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_query_metrics.py +0 -0
  253. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_query_params.py +0 -0
  254. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_serializer.py +0 -0
  255. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_session.py +0 -0
  256. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_storage.py +0 -0
  257. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_udf.py +0 -0
  258. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_utils.py +0 -0
  259. {datachain-0.2.2 → datachain-0.2.4}/tests/unit/test_warehouse.py +0 -0
  260. {datachain-0.2.2 → datachain-0.2.4}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1609,13 +1609,22 @@ class Catalog:
1609
1609
  ...
1610
1610
  }
1611
1611
  """
1612
- from datachain.lib.signal_schema import SignalSchema
1612
+ from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
1613
1613
 
1614
1614
  version = self.get_dataset(dataset_name).get_version(dataset_version)
1615
1615
 
1616
- file_signals_values = SignalSchema.deserialize(
1617
- version.feature_schema
1618
- ).get_file_signals_values(row)
1616
+ file_signals_values = {}
1617
+
1618
+ schema = SignalSchema.deserialize(version.feature_schema)
1619
+ for file_signals in schema.get_file_signals():
1620
+ prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
1621
+ file_signals_values[file_signals] = {
1622
+ c_name.removeprefix(prefix): c_value
1623
+ for c_name, c_value in row.items()
1624
+ if c_name.startswith(prefix)
1625
+ and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
1626
+ }
1627
+
1619
1628
  if not file_signals_values:
1620
1629
  return None
1621
1630
 
@@ -13,17 +13,13 @@ from datachain.lib.feature import (
13
13
  convert_type_to_datachain,
14
14
  )
15
15
  from datachain.lib.feature_registry import Registry
16
- from datachain.lib.file import File, IndexedFile, TextFile
17
- from datachain.lib.image import ImageFile
16
+ from datachain.lib.file import File
18
17
  from datachain.lib.utils import DataChainParamsError
19
- from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
20
- from datachain.lib.webdataset_laion import Laion, WDSLaion
21
18
 
22
19
  if TYPE_CHECKING:
23
20
  from datachain.catalog import Catalog
24
21
 
25
22
 
26
- # TODO fix hardcoded Feature class names with://github.com/iterative/dvcx/issues/1625
27
23
  NAMES_TO_TYPES = {
28
24
  "int": int,
29
25
  "str": str,
@@ -33,15 +29,6 @@ NAMES_TO_TYPES = {
33
29
  "dict": dict,
34
30
  "bytes": bytes,
35
31
  "datetime": datetime,
36
- "WDSLaion": WDSLaion,
37
- "Laion": Laion,
38
- "Source": IndexedFile,
39
- "File": File,
40
- "ImageFile": ImageFile,
41
- "TextFile": TextFile,
42
- "TarStream": TarStream,
43
- "WDSBasic": WDSBasic,
44
- "WDSAllFile": WDSAllFile,
45
32
  }
46
33
 
47
34
 
@@ -239,37 +226,6 @@ class SignalSchema:
239
226
  if has_subtree and issubclass(type_, File):
240
227
  yield ".".join(path)
241
228
 
242
- def get_file_signals_values(self, row: dict[str, Any]) -> dict[str, Any]:
243
- """
244
- Method that returns values with clean field names (without prefix) for
245
- all file signals found in this schema for some row
246
- Output example:
247
- {
248
- laion.file: {
249
- "source": "s3://ldb-public",
250
- "name": "dog.jpg",
251
- ...
252
- },
253
- meta.file: {
254
- "source": "s3://datacomp",
255
- "name": "cat.jpg",
256
- ...
257
- }
258
- }
259
- """
260
- res = {}
261
-
262
- for file_signals in self.get_file_signals():
263
- prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
264
- res[file_signals] = {
265
- c_name.removeprefix(prefix): c_value
266
- for c_name, c_value in row.items()
267
- if c_name.startswith(prefix)
268
- and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
269
- }
270
-
271
- return res
272
-
273
229
  def create_model(self, name: str) -> type[Feature]:
274
230
  fields = {key: (value, None) for key, value in self.values.items()}
275
231
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -767,7 +767,7 @@ def test_parse_parquet_filter_partitions(tmp_dir, catalog):
767
767
 
768
768
 
769
769
  @pytest.mark.parametrize("processes", [False, 2, True])
770
- def test_parallel(processes):
770
+ def test_parallel(processes, catalog):
771
771
  prefix = "t & "
772
772
  vals = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
773
773
 
@@ -24,7 +24,7 @@ class MyMapper(Mapper):
24
24
  self.value = MyMapper.TEARDOWN_VALUE
25
25
 
26
26
 
27
- def test_udf():
27
+ def test_udf(catalog):
28
28
  vals = ["a", "b", "c", "d", "e", "f"]
29
29
  chain = DataChain.from_features(key=vals)
30
30
 
@@ -36,7 +36,7 @@ def test_udf():
36
36
 
37
37
 
38
38
  @pytest.mark.skip(reason="Skip until tests module will be importer for unit-tests")
39
- def test_udf_parallel():
39
+ def test_udf_parallel(catalog):
40
40
  vals = ["a", "b", "c", "d", "e", "f"]
41
41
  chain = DataChain.from_features(key=vals)
42
42
 
@@ -45,7 +45,7 @@ def test_udf_parallel():
45
45
  assert res == [MyMapper.BOOTSTRAP_VALUE] * len(vals)
46
46
 
47
47
 
48
- def test_no_bootstrap_for_callable():
48
+ def test_no_bootstrap_for_callable(catalog):
49
49
  class MyMapper:
50
50
  def __init__(self):
51
51
  self._had_bootstrap = False
@@ -197,36 +197,6 @@ def test_get_file_signals_nested(nested_file_schema):
197
197
  assert files == ["f", "my_f", "my_f.nested_file"]
198
198
 
199
199
 
200
- def test_get_file_signals_values(nested_file_schema):
201
- row = {
202
- "name": "Jon",
203
- "age": 25,
204
- "f__source": "s3://first_bucket",
205
- "f__name": "image1.jpeg",
206
- "my_f__source": "s3://second_bucket",
207
- "my_f__name": "image2.jpeg",
208
- "my_f__ref": "reference",
209
- "my_f__nested_file__source": "s3://third_bucket",
210
- "my_f__nested_file__name": "image3.jpeg",
211
- }
212
-
213
- assert nested_file_schema.get_file_signals_values(row) == {
214
- "f": {"source": "s3://first_bucket", "name": "image1.jpeg"},
215
- "my_f": {
216
- "source": "s3://second_bucket",
217
- "name": "image2.jpeg",
218
- "ref": "reference",
219
- },
220
- "my_f.nested_file": {"source": "s3://third_bucket", "name": "image3.jpeg"},
221
- }
222
-
223
-
224
- def test_get_file_signals_values_no_files():
225
- schema = {"name": str, "age": float}
226
- row = {"name": "Jon", "age": 25}
227
- assert SignalSchema(schema).get_file_signals_values(row) == {}
228
-
229
-
230
200
  def test_create_model():
231
201
  class MyFr(Feature):
232
202
  count: int
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes