datachain 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (260) hide show
  1. {datachain-0.2.2/src/datachain.egg-info → datachain-0.2.3}/PKG-INFO +1 -1
  2. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/catalog/catalog.py +13 -4
  3. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/signal_schema.py +1 -45
  4. {datachain-0.2.2 → datachain-0.2.3/src/datachain.egg-info}/PKG-INFO +1 -1
  5. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_signal_schema.py +0 -30
  6. {datachain-0.2.2 → datachain-0.2.3}/.cruft.json +0 -0
  7. {datachain-0.2.2 → datachain-0.2.3}/.gitattributes +0 -0
  8. {datachain-0.2.2 → datachain-0.2.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  9. {datachain-0.2.2 → datachain-0.2.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  10. {datachain-0.2.2 → datachain-0.2.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  11. {datachain-0.2.2 → datachain-0.2.3}/.github/codecov.yaml +0 -0
  12. {datachain-0.2.2 → datachain-0.2.3}/.github/dependabot.yml +0 -0
  13. {datachain-0.2.2 → datachain-0.2.3}/.github/workflows/benchmarks.yml +0 -0
  14. {datachain-0.2.2 → datachain-0.2.3}/.github/workflows/release.yml +0 -0
  15. {datachain-0.2.2 → datachain-0.2.3}/.github/workflows/tests.yml +0 -0
  16. {datachain-0.2.2 → datachain-0.2.3}/.github/workflows/update-template.yaml +0 -0
  17. {datachain-0.2.2 → datachain-0.2.3}/.gitignore +0 -0
  18. {datachain-0.2.2 → datachain-0.2.3}/.pre-commit-config.yaml +0 -0
  19. {datachain-0.2.2 → datachain-0.2.3}/.reuse/dep5 +0 -0
  20. {datachain-0.2.2 → datachain-0.2.3}/CODE_OF_CONDUCT.rst +0 -0
  21. {datachain-0.2.2 → datachain-0.2.3}/CONTRIBUTING.rst +0 -0
  22. {datachain-0.2.2 → datachain-0.2.3}/LICENSE +0 -0
  23. {datachain-0.2.2 → datachain-0.2.3}/LICENSES/Apache-2.0.txt +0 -0
  24. {datachain-0.2.2 → datachain-0.2.3}/LICENSES/BSD-3-Clause.txt +0 -0
  25. {datachain-0.2.2 → datachain-0.2.3}/LICENSES/Python-2.0.txt +0 -0
  26. {datachain-0.2.2 → datachain-0.2.3}/README.rst +0 -0
  27. {datachain-0.2.2 → datachain-0.2.3}/docs/assets/datachain.png +0 -0
  28. {datachain-0.2.2 → datachain-0.2.3}/docs/index.md +0 -0
  29. {datachain-0.2.2 → datachain-0.2.3}/docs/references/catalog.md +0 -0
  30. {datachain-0.2.2 → datachain-0.2.3}/docs/references/datachain.md +0 -0
  31. {datachain-0.2.2 → datachain-0.2.3}/docs/tutorials/cv_intro.md +0 -0
  32. {datachain-0.2.2 → datachain-0.2.3}/docs/tutorials/udfs.md +0 -0
  33. {datachain-0.2.2 → datachain-0.2.3}/examples/blip2_image_desc_lib.py +0 -0
  34. {datachain-0.2.2 → datachain-0.2.3}/examples/clip.py +0 -0
  35. {datachain-0.2.2 → datachain-0.2.3}/examples/common_sql_functions.py +0 -0
  36. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
  37. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
  38. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
  39. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/README.md +0 -0
  40. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
  41. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
  42. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
  43. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
  44. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
  45. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
  46. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
  47. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
  48. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
  49. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
  50. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
  51. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
  52. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
  53. {datachain-0.2.2 → datachain-0.2.3}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
  54. {datachain-0.2.2 → datachain-0.2.3}/examples/get_started/clip_fine_tuning.ipynb +0 -0
  55. {datachain-0.2.2 → datachain-0.2.3}/examples/hf_pipeline.py +0 -0
  56. {datachain-0.2.2 → datachain-0.2.3}/examples/iptc_exif_xmp_lib.py +0 -0
  57. {datachain-0.2.2 → datachain-0.2.3}/examples/json-csv-reader.py +0 -0
  58. {datachain-0.2.2 → datachain-0.2.3}/examples/llava2_image_desc_lib.py +0 -0
  59. {datachain-0.2.2 → datachain-0.2.3}/examples/llm-claude-aggregate-query.py +0 -0
  60. {datachain-0.2.2 → datachain-0.2.3}/examples/llm-claude-simple-query.py +0 -0
  61. {datachain-0.2.2 → datachain-0.2.3}/examples/llm-claude.py +0 -0
  62. {datachain-0.2.2 → datachain-0.2.3}/examples/loader.py +0 -0
  63. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/README +0 -0
  64. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/distance_to_query.py +0 -0
  65. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/llm_chat.py +0 -0
  66. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/requirements.txt +0 -0
  67. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/single_query.py +0 -0
  68. {datachain-0.2.2 → datachain-0.2.3}/examples/neurips/text_loaders.py +0 -0
  69. {datachain-0.2.2 → datachain-0.2.3}/examples/openai_image_desc_lib.py +0 -0
  70. {datachain-0.2.2 → datachain-0.2.3}/examples/openimage-detect.py +0 -0
  71. {datachain-0.2.2 → datachain-0.2.3}/examples/pose_detection.py +0 -0
  72. {datachain-0.2.2 → datachain-0.2.3}/examples/torch-loader.py +0 -0
  73. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/batching.py +0 -0
  74. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/image_transformation.py +0 -0
  75. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/parallel.py +0 -0
  76. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/simple.py +0 -0
  77. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/stateful.py +0 -0
  78. {datachain-0.2.2 → datachain-0.2.3}/examples/udfs/stateful_similarity.py +0 -0
  79. {datachain-0.2.2 → datachain-0.2.3}/examples/unstructured-text.py +0 -0
  80. {datachain-0.2.2 → datachain-0.2.3}/examples/wds.py +0 -0
  81. {datachain-0.2.2 → datachain-0.2.3}/examples/wds_filtered.py +0 -0
  82. {datachain-0.2.2 → datachain-0.2.3}/examples/zalando/zalando_clip.py +0 -0
  83. {datachain-0.2.2 → datachain-0.2.3}/examples/zalando/zalando_dir_as_class.py +0 -0
  84. {datachain-0.2.2 → datachain-0.2.3}/examples/zalando/zalando_splits_and_classes_ds.py +0 -0
  85. {datachain-0.2.2 → datachain-0.2.3}/examples/zalando/zalando_splits_and_classes_output.py +0 -0
  86. {datachain-0.2.2 → datachain-0.2.3}/mkdocs.yml +0 -0
  87. {datachain-0.2.2 → datachain-0.2.3}/noxfile.py +0 -0
  88. {datachain-0.2.2 → datachain-0.2.3}/pyproject.toml +0 -0
  89. {datachain-0.2.2 → datachain-0.2.3}/setup.cfg +0 -0
  90. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/__init__.py +0 -0
  91. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/__main__.py +0 -0
  92. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/asyn.py +0 -0
  93. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/cache.py +0 -0
  94. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/catalog/__init__.py +0 -0
  95. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/catalog/datasource.py +0 -0
  96. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/catalog/loader.py +0 -0
  97. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/catalog/subclass.py +0 -0
  98. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/cli.py +0 -0
  99. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/cli_utils.py +0 -0
  100. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/__init__.py +0 -0
  101. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/azure.py +0 -0
  102. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/fileslice.py +0 -0
  103. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/fsspec.py +0 -0
  104. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/gcs.py +0 -0
  105. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/local.py +0 -0
  106. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/client/s3.py +0 -0
  107. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/config.py +0 -0
  108. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/__init__.py +0 -0
  109. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/db_engine.py +0 -0
  110. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/id_generator.py +0 -0
  111. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/job.py +0 -0
  112. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/metastore.py +0 -0
  113. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/schema.py +0 -0
  114. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/serializer.py +0 -0
  115. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/sqlite.py +0 -0
  116. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/data_storage/warehouse.py +0 -0
  117. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/dataset.py +0 -0
  118. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/error.py +0 -0
  119. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/__init__.py +0 -0
  120. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/arrow.py +0 -0
  121. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/cached_stream.py +0 -0
  122. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/claude.py +0 -0
  123. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/clip.py +0 -0
  124. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/dc.py +0 -0
  125. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/feature.py +0 -0
  126. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/feature_registry.py +0 -0
  127. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/feature_utils.py +0 -0
  128. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/file.py +0 -0
  129. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/gpt4_vision.py +0 -0
  130. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/hf_image_to_text.py +0 -0
  131. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/hf_pipeline.py +0 -0
  132. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/image.py +0 -0
  133. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/image_transform.py +0 -0
  134. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/iptc_exif_xmp.py +0 -0
  135. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/meta_formats.py +0 -0
  136. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/pytorch.py +0 -0
  137. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/settings.py +0 -0
  138. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/text.py +0 -0
  139. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/udf.py +0 -0
  140. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/udf_signature.py +0 -0
  141. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/unstructured.py +0 -0
  142. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/utils.py +0 -0
  143. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/vfile.py +0 -0
  144. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/webdataset.py +0 -0
  145. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/lib/webdataset_laion.py +0 -0
  146. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/listing.py +0 -0
  147. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/node.py +0 -0
  148. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/nodes_fetcher.py +0 -0
  149. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/nodes_thread_pool.py +0 -0
  150. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/progress.py +0 -0
  151. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/py.typed +0 -0
  152. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/__init__.py +0 -0
  153. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/batch.py +0 -0
  154. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/builtins.py +0 -0
  155. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/dataset.py +0 -0
  156. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/dispatch.py +0 -0
  157. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/metrics.py +0 -0
  158. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/params.py +0 -0
  159. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/schema.py +0 -0
  160. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/session.py +0 -0
  161. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/query/udf.py +0 -0
  162. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/remote/__init__.py +0 -0
  163. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/remote/studio.py +0 -0
  164. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/__init__.py +0 -0
  165. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/default/__init__.py +0 -0
  166. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/default/base.py +0 -0
  167. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/__init__.py +0 -0
  168. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/array.py +0 -0
  169. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/conditional.py +0 -0
  170. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/path.py +0 -0
  171. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/random.py +0 -0
  172. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/functions/string.py +0 -0
  173. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/selectable.py +0 -0
  174. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  175. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/sqlite/base.py +0 -0
  176. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/sqlite/types.py +0 -0
  177. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/sqlite/vector.py +0 -0
  178. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/types.py +0 -0
  179. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/sql/utils.py +0 -0
  180. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/storage.py +0 -0
  181. {datachain-0.2.2 → datachain-0.2.3}/src/datachain/utils.py +0 -0
  182. {datachain-0.2.2 → datachain-0.2.3}/src/datachain.egg-info/SOURCES.txt +0 -0
  183. {datachain-0.2.2 → datachain-0.2.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  184. {datachain-0.2.2 → datachain-0.2.3}/src/datachain.egg-info/entry_points.txt +0 -0
  185. {datachain-0.2.2 → datachain-0.2.3}/src/datachain.egg-info/requires.txt +0 -0
  186. {datachain-0.2.2 → datachain-0.2.3}/src/datachain.egg-info/top_level.txt +0 -0
  187. {datachain-0.2.2 → datachain-0.2.3}/tests/__init__.py +0 -0
  188. {datachain-0.2.2 → datachain-0.2.3}/tests/benchmarks/__init__.py +0 -0
  189. {datachain-0.2.2 → datachain-0.2.3}/tests/benchmarks/conftest.py +0 -0
  190. {datachain-0.2.2 → datachain-0.2.3}/tests/benchmarks/test_ls.py +0 -0
  191. {datachain-0.2.2 → datachain-0.2.3}/tests/benchmarks/test_version.py +0 -0
  192. {datachain-0.2.2 → datachain-0.2.3}/tests/conftest.py +0 -0
  193. {datachain-0.2.2 → datachain-0.2.3}/tests/data.py +0 -0
  194. {datachain-0.2.2 → datachain-0.2.3}/tests/examples/__init__.py +0 -0
  195. {datachain-0.2.2 → datachain-0.2.3}/tests/examples/test_wds_e2e.py +0 -0
  196. {datachain-0.2.2 → datachain-0.2.3}/tests/examples/wds_data.py +0 -0
  197. {datachain-0.2.2 → datachain-0.2.3}/tests/func/__init__.py +0 -0
  198. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_catalog.py +0 -0
  199. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_client.py +0 -0
  200. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_datachain.py +0 -0
  201. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_dataset_query.py +0 -0
  202. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_datasets.py +0 -0
  203. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_ls.py +0 -0
  204. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_pull.py +0 -0
  205. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_pytorch.py +0 -0
  206. {datachain-0.2.2 → datachain-0.2.3}/tests/func/test_query.py +0 -0
  207. {datachain-0.2.2 → datachain-0.2.3}/tests/scripts/feature_class.py +0 -0
  208. {datachain-0.2.2 → datachain-0.2.3}/tests/scripts/feature_class_parallel.py +0 -0
  209. {datachain-0.2.2 → datachain-0.2.3}/tests/scripts/name_len_normal.py +0 -0
  210. {datachain-0.2.2 → datachain-0.2.3}/tests/scripts/name_len_slow.py +0 -0
  211. {datachain-0.2.2 → datachain-0.2.3}/tests/test_cli_e2e.py +0 -0
  212. {datachain-0.2.2 → datachain-0.2.3}/tests/test_query_e2e.py +0 -0
  213. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/__init__.py +0 -0
  214. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/__init__.py +0 -0
  215. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_arrow.py +0 -0
  216. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_clip.py +0 -0
  217. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_datachain.py +0 -0
  218. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  219. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  220. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_feature.py +0 -0
  221. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_feature_utils.py +0 -0
  222. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_file.py +0 -0
  223. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_image.py +0 -0
  224. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_text.py +0 -0
  225. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_udf_signature.py +0 -0
  226. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_utils.py +0 -0
  227. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/lib/test_webdataset.py +0 -0
  228. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/__init__.py +0 -0
  229. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  230. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  231. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_array.py +0 -0
  232. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_conditional.py +0 -0
  233. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_path.py +0 -0
  234. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_random.py +0 -0
  235. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_selectable.py +0 -0
  236. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/sql/test_string.py +0 -0
  237. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_asyn.py +0 -0
  238. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_cache.py +0 -0
  239. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_catalog.py +0 -0
  240. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_catalog_loader.py +0 -0
  241. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_cli_parsing.py +0 -0
  242. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_client.py +0 -0
  243. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_client_s3.py +0 -0
  244. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_data_storage.py +0 -0
  245. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_database_engine.py +0 -0
  246. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_dataset.py +0 -0
  247. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_dispatch.py +0 -0
  248. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_fileslice.py +0 -0
  249. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_id_generator.py +0 -0
  250. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_listing.py +0 -0
  251. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_metastore.py +0 -0
  252. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_query_metrics.py +0 -0
  253. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_query_params.py +0 -0
  254. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_serializer.py +0 -0
  255. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_session.py +0 -0
  256. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_storage.py +0 -0
  257. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_udf.py +0 -0
  258. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_utils.py +0 -0
  259. {datachain-0.2.2 → datachain-0.2.3}/tests/unit/test_warehouse.py +0 -0
  260. {datachain-0.2.2 → datachain-0.2.3}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1609,13 +1609,22 @@ class Catalog:
1609
1609
  ...
1610
1610
  }
1611
1611
  """
1612
- from datachain.lib.signal_schema import SignalSchema
1612
+ from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
1613
1613
 
1614
1614
  version = self.get_dataset(dataset_name).get_version(dataset_version)
1615
1615
 
1616
- file_signals_values = SignalSchema.deserialize(
1617
- version.feature_schema
1618
- ).get_file_signals_values(row)
1616
+ file_signals_values = {}
1617
+
1618
+ schema = SignalSchema.deserialize(version.feature_schema)
1619
+ for file_signals in schema.get_file_signals():
1620
+ prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
1621
+ file_signals_values[file_signals] = {
1622
+ c_name.removeprefix(prefix): c_value
1623
+ for c_name, c_value in row.items()
1624
+ if c_name.startswith(prefix)
1625
+ and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
1626
+ }
1627
+
1619
1628
  if not file_signals_values:
1620
1629
  return None
1621
1630
 
@@ -13,17 +13,13 @@ from datachain.lib.feature import (
13
13
  convert_type_to_datachain,
14
14
  )
15
15
  from datachain.lib.feature_registry import Registry
16
- from datachain.lib.file import File, IndexedFile, TextFile
17
- from datachain.lib.image import ImageFile
16
+ from datachain.lib.file import File
18
17
  from datachain.lib.utils import DataChainParamsError
19
- from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
20
- from datachain.lib.webdataset_laion import Laion, WDSLaion
21
18
 
22
19
  if TYPE_CHECKING:
23
20
  from datachain.catalog import Catalog
24
21
 
25
22
 
26
- # TODO fix hardcoded Feature class names with://github.com/iterative/dvcx/issues/1625
27
23
  NAMES_TO_TYPES = {
28
24
  "int": int,
29
25
  "str": str,
@@ -33,15 +29,6 @@ NAMES_TO_TYPES = {
33
29
  "dict": dict,
34
30
  "bytes": bytes,
35
31
  "datetime": datetime,
36
- "WDSLaion": WDSLaion,
37
- "Laion": Laion,
38
- "Source": IndexedFile,
39
- "File": File,
40
- "ImageFile": ImageFile,
41
- "TextFile": TextFile,
42
- "TarStream": TarStream,
43
- "WDSBasic": WDSBasic,
44
- "WDSAllFile": WDSAllFile,
45
32
  }
46
33
 
47
34
 
@@ -239,37 +226,6 @@ class SignalSchema:
239
226
  if has_subtree and issubclass(type_, File):
240
227
  yield ".".join(path)
241
228
 
242
- def get_file_signals_values(self, row: dict[str, Any]) -> dict[str, Any]:
243
- """
244
- Method that returns values with clean field names (without prefix) for
245
- all file signals found in this schema for some row
246
- Output example:
247
- {
248
- laion.file: {
249
- "source": "s3://ldb-public",
250
- "name": "dog.jpg",
251
- ...
252
- },
253
- meta.file: {
254
- "source": "s3://datacomp",
255
- "name": "cat.jpg",
256
- ...
257
- }
258
- }
259
- """
260
- res = {}
261
-
262
- for file_signals in self.get_file_signals():
263
- prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
264
- res[file_signals] = {
265
- c_name.removeprefix(prefix): c_value
266
- for c_name, c_value in row.items()
267
- if c_name.startswith(prefix)
268
- and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
269
- }
270
-
271
- return res
272
-
273
229
  def create_model(self, name: str) -> type[Feature]:
274
230
  fields = {key: (value, None) for key, value in self.values.items()}
275
231
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -197,36 +197,6 @@ def test_get_file_signals_nested(nested_file_schema):
197
197
  assert files == ["f", "my_f", "my_f.nested_file"]
198
198
 
199
199
 
200
- def test_get_file_signals_values(nested_file_schema):
201
- row = {
202
- "name": "Jon",
203
- "age": 25,
204
- "f__source": "s3://first_bucket",
205
- "f__name": "image1.jpeg",
206
- "my_f__source": "s3://second_bucket",
207
- "my_f__name": "image2.jpeg",
208
- "my_f__ref": "reference",
209
- "my_f__nested_file__source": "s3://third_bucket",
210
- "my_f__nested_file__name": "image3.jpeg",
211
- }
212
-
213
- assert nested_file_schema.get_file_signals_values(row) == {
214
- "f": {"source": "s3://first_bucket", "name": "image1.jpeg"},
215
- "my_f": {
216
- "source": "s3://second_bucket",
217
- "name": "image2.jpeg",
218
- "ref": "reference",
219
- },
220
- "my_f.nested_file": {"source": "s3://third_bucket", "name": "image3.jpeg"},
221
- }
222
-
223
-
224
- def test_get_file_signals_values_no_files():
225
- schema = {"name": str, "age": float}
226
- row = {"name": "Jon", "age": 25}
227
- assert SignalSchema(schema).get_file_signals_values(row) == {}
228
-
229
-
230
200
  def test_create_model():
231
201
  class MyFr(Feature):
232
202
  count: int
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes