datachain 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (264) hide show
  1. {datachain-0.2.6/src/datachain.egg-info → datachain-0.2.7}/PKG-INFO +1 -1
  2. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/__init__.py +0 -5
  3. datachain-0.2.7/src/datachain/image/__init__.py +3 -0
  4. datachain-0.2.7/src/datachain/text/__init__.py +3 -0
  5. {datachain-0.2.6 → datachain-0.2.7/src/datachain.egg-info}/PKG-INFO +1 -1
  6. {datachain-0.2.6 → datachain-0.2.7}/src/datachain.egg-info/SOURCES.txt +3 -0
  7. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_pytorch.py +15 -13
  8. datachain-0.2.7/tests/unit/lib/conftest.py +21 -0
  9. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_clip.py +4 -8
  10. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_text.py +2 -5
  11. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_module_exports.py +2 -3
  12. {datachain-0.2.6 → datachain-0.2.7}/.cruft.json +0 -0
  13. {datachain-0.2.6 → datachain-0.2.7}/.gitattributes +0 -0
  14. {datachain-0.2.6 → datachain-0.2.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  15. {datachain-0.2.6 → datachain-0.2.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  16. {datachain-0.2.6 → datachain-0.2.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  17. {datachain-0.2.6 → datachain-0.2.7}/.github/codecov.yaml +0 -0
  18. {datachain-0.2.6 → datachain-0.2.7}/.github/dependabot.yml +0 -0
  19. {datachain-0.2.6 → datachain-0.2.7}/.github/workflows/benchmarks.yml +0 -0
  20. {datachain-0.2.6 → datachain-0.2.7}/.github/workflows/release.yml +0 -0
  21. {datachain-0.2.6 → datachain-0.2.7}/.github/workflows/tests.yml +0 -0
  22. {datachain-0.2.6 → datachain-0.2.7}/.github/workflows/update-template.yaml +0 -0
  23. {datachain-0.2.6 → datachain-0.2.7}/.gitignore +0 -0
  24. {datachain-0.2.6 → datachain-0.2.7}/.pre-commit-config.yaml +0 -0
  25. {datachain-0.2.6 → datachain-0.2.7}/.reuse/dep5 +0 -0
  26. {datachain-0.2.6 → datachain-0.2.7}/CODE_OF_CONDUCT.rst +0 -0
  27. {datachain-0.2.6 → datachain-0.2.7}/CONTRIBUTING.rst +0 -0
  28. {datachain-0.2.6 → datachain-0.2.7}/LICENSE +0 -0
  29. {datachain-0.2.6 → datachain-0.2.7}/LICENSES/Apache-2.0.txt +0 -0
  30. {datachain-0.2.6 → datachain-0.2.7}/LICENSES/BSD-3-Clause.txt +0 -0
  31. {datachain-0.2.6 → datachain-0.2.7}/LICENSES/Python-2.0.txt +0 -0
  32. {datachain-0.2.6 → datachain-0.2.7}/README.rst +0 -0
  33. {datachain-0.2.6 → datachain-0.2.7}/docs/assets/datachain.png +0 -0
  34. {datachain-0.2.6 → datachain-0.2.7}/docs/index.md +0 -0
  35. {datachain-0.2.6 → datachain-0.2.7}/docs/references/catalog.md +0 -0
  36. {datachain-0.2.6 → datachain-0.2.7}/docs/references/datachain.md +0 -0
  37. {datachain-0.2.6 → datachain-0.2.7}/docs/tutorials/cv_intro.md +0 -0
  38. {datachain-0.2.6 → datachain-0.2.7}/docs/tutorials/udfs.md +0 -0
  39. {datachain-0.2.6 → datachain-0.2.7}/examples/blip2_image_desc_lib.py +0 -0
  40. {datachain-0.2.6 → datachain-0.2.7}/examples/clip.py +0 -0
  41. {datachain-0.2.6 → datachain-0.2.7}/examples/common_sql_functions.py +0 -0
  42. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
  43. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
  44. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
  45. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/README.md +0 -0
  46. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
  47. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
  48. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
  49. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
  50. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
  51. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
  52. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
  53. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
  54. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
  55. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
  56. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
  57. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
  58. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
  59. {datachain-0.2.6 → datachain-0.2.7}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
  60. {datachain-0.2.6 → datachain-0.2.7}/examples/hf_pipeline.py +0 -0
  61. {datachain-0.2.6 → datachain-0.2.7}/examples/iptc_exif_xmp_lib.py +0 -0
  62. {datachain-0.2.6 → datachain-0.2.7}/examples/json-csv-reader.py +0 -0
  63. {datachain-0.2.6 → datachain-0.2.7}/examples/llava2_image_desc_lib.py +0 -0
  64. {datachain-0.2.6 → datachain-0.2.7}/examples/llm-claude-aggregate-query.py +0 -0
  65. {datachain-0.2.6 → datachain-0.2.7}/examples/llm-claude-simple-query.py +0 -0
  66. {datachain-0.2.6 → datachain-0.2.7}/examples/llm-claude.py +0 -0
  67. {datachain-0.2.6 → datachain-0.2.7}/examples/loader.py +0 -0
  68. {datachain-0.2.6 → datachain-0.2.7}/examples/multimodal/clip_fine_tuning.ipynb +0 -0
  69. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/README +0 -0
  70. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/distance_to_query.py +0 -0
  71. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/llm_chat.py +0 -0
  72. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/requirements.txt +0 -0
  73. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/single_query.py +0 -0
  74. {datachain-0.2.6 → datachain-0.2.7}/examples/neurips/text_loaders.py +0 -0
  75. {datachain-0.2.6 → datachain-0.2.7}/examples/openai_image_desc_lib.py +0 -0
  76. {datachain-0.2.6 → datachain-0.2.7}/examples/openimage-detect.py +0 -0
  77. {datachain-0.2.6 → datachain-0.2.7}/examples/pose_detection.py +0 -0
  78. {datachain-0.2.6 → datachain-0.2.7}/examples/torch-loader.py +0 -0
  79. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/batching.py +0 -0
  80. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/image_transformation.py +0 -0
  81. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/parallel.py +0 -0
  82. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/simple.py +0 -0
  83. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/stateful.py +0 -0
  84. {datachain-0.2.6 → datachain-0.2.7}/examples/udfs/stateful_similarity.py +0 -0
  85. {datachain-0.2.6 → datachain-0.2.7}/examples/unstructured-text.py +0 -0
  86. {datachain-0.2.6 → datachain-0.2.7}/examples/wds.py +0 -0
  87. {datachain-0.2.6 → datachain-0.2.7}/examples/wds_filtered.py +0 -0
  88. {datachain-0.2.6 → datachain-0.2.7}/examples/zalando/zalando_clip.py +0 -0
  89. {datachain-0.2.6 → datachain-0.2.7}/examples/zalando/zalando_dir_as_class.py +0 -0
  90. {datachain-0.2.6 → datachain-0.2.7}/examples/zalando/zalando_splits_and_classes_ds.py +0 -0
  91. {datachain-0.2.6 → datachain-0.2.7}/examples/zalando/zalando_splits_and_classes_output.py +0 -0
  92. {datachain-0.2.6 → datachain-0.2.7}/mkdocs.yml +0 -0
  93. {datachain-0.2.6 → datachain-0.2.7}/noxfile.py +0 -0
  94. {datachain-0.2.6 → datachain-0.2.7}/pyproject.toml +0 -0
  95. {datachain-0.2.6 → datachain-0.2.7}/setup.cfg +0 -0
  96. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/__main__.py +0 -0
  97. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/asyn.py +0 -0
  98. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/cache.py +0 -0
  99. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/catalog/__init__.py +0 -0
  100. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/catalog/catalog.py +0 -0
  101. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/catalog/datasource.py +0 -0
  102. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/catalog/loader.py +0 -0
  103. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/catalog/subclass.py +0 -0
  104. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/cli.py +0 -0
  105. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/cli_utils.py +0 -0
  106. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/__init__.py +0 -0
  107. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/azure.py +0 -0
  108. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/fileslice.py +0 -0
  109. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/fsspec.py +0 -0
  110. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/gcs.py +0 -0
  111. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/local.py +0 -0
  112. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/client/s3.py +0 -0
  113. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/config.py +0 -0
  114. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/__init__.py +0 -0
  115. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/db_engine.py +0 -0
  116. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/id_generator.py +0 -0
  117. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/job.py +0 -0
  118. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/metastore.py +0 -0
  119. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/schema.py +0 -0
  120. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/serializer.py +0 -0
  121. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/sqlite.py +0 -0
  122. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/data_storage/warehouse.py +0 -0
  123. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/dataset.py +0 -0
  124. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/error.py +0 -0
  125. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/__init__.py +0 -0
  126. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/arrow.py +0 -0
  127. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/cached_stream.py +0 -0
  128. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/claude.py +0 -0
  129. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/clip.py +0 -0
  130. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/dc.py +0 -0
  131. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/feature.py +0 -0
  132. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/feature_registry.py +0 -0
  133. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/feature_utils.py +0 -0
  134. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/file.py +0 -0
  135. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/gpt4_vision.py +0 -0
  136. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/hf_image_to_text.py +0 -0
  137. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/hf_pipeline.py +0 -0
  138. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/image.py +0 -0
  139. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/image_transform.py +0 -0
  140. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/iptc_exif_xmp.py +0 -0
  141. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/meta_formats.py +0 -0
  142. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/pytorch.py +0 -0
  143. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/settings.py +0 -0
  144. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/signal_schema.py +0 -0
  145. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/text.py +0 -0
  146. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/udf.py +0 -0
  147. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/udf_signature.py +0 -0
  148. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/unstructured.py +0 -0
  149. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/utils.py +0 -0
  150. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/vfile.py +0 -0
  151. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/webdataset.py +0 -0
  152. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/lib/webdataset_laion.py +0 -0
  153. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/listing.py +0 -0
  154. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/node.py +0 -0
  155. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/nodes_fetcher.py +0 -0
  156. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/nodes_thread_pool.py +0 -0
  157. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/progress.py +0 -0
  158. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/py.typed +0 -0
  159. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/__init__.py +0 -0
  160. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/batch.py +0 -0
  161. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/builtins.py +0 -0
  162. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/dataset.py +0 -0
  163. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/dispatch.py +0 -0
  164. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/metrics.py +0 -0
  165. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/params.py +0 -0
  166. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/schema.py +0 -0
  167. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/session.py +0 -0
  168. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/query/udf.py +0 -0
  169. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/remote/__init__.py +0 -0
  170. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/remote/studio.py +0 -0
  171. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/__init__.py +0 -0
  172. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/default/__init__.py +0 -0
  173. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/default/base.py +0 -0
  174. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/__init__.py +0 -0
  175. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/array.py +0 -0
  176. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/conditional.py +0 -0
  177. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/path.py +0 -0
  178. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/random.py +0 -0
  179. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/functions/string.py +0 -0
  180. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/selectable.py +0 -0
  181. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/sqlite/__init__.py +0 -0
  182. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/sqlite/base.py +0 -0
  183. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/sqlite/types.py +0 -0
  184. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/sqlite/vector.py +0 -0
  185. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/types.py +0 -0
  186. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/sql/utils.py +0 -0
  187. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/storage.py +0 -0
  188. {datachain-0.2.6 → datachain-0.2.7}/src/datachain/utils.py +0 -0
  189. {datachain-0.2.6 → datachain-0.2.7}/src/datachain.egg-info/dependency_links.txt +0 -0
  190. {datachain-0.2.6 → datachain-0.2.7}/src/datachain.egg-info/entry_points.txt +0 -0
  191. {datachain-0.2.6 → datachain-0.2.7}/src/datachain.egg-info/requires.txt +0 -0
  192. {datachain-0.2.6 → datachain-0.2.7}/src/datachain.egg-info/top_level.txt +0 -0
  193. {datachain-0.2.6 → datachain-0.2.7}/tests/__init__.py +0 -0
  194. {datachain-0.2.6 → datachain-0.2.7}/tests/benchmarks/__init__.py +0 -0
  195. {datachain-0.2.6 → datachain-0.2.7}/tests/benchmarks/conftest.py +0 -0
  196. {datachain-0.2.6 → datachain-0.2.7}/tests/benchmarks/test_ls.py +0 -0
  197. {datachain-0.2.6 → datachain-0.2.7}/tests/benchmarks/test_version.py +0 -0
  198. {datachain-0.2.6 → datachain-0.2.7}/tests/conftest.py +0 -0
  199. {datachain-0.2.6 → datachain-0.2.7}/tests/data.py +0 -0
  200. {datachain-0.2.6 → datachain-0.2.7}/tests/examples/__init__.py +0 -0
  201. {datachain-0.2.6 → datachain-0.2.7}/tests/examples/test_wds_e2e.py +0 -0
  202. {datachain-0.2.6 → datachain-0.2.7}/tests/examples/wds_data.py +0 -0
  203. {datachain-0.2.6 → datachain-0.2.7}/tests/func/__init__.py +0 -0
  204. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_catalog.py +0 -0
  205. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_client.py +0 -0
  206. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_datachain.py +0 -0
  207. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_dataset_query.py +0 -0
  208. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_datasets.py +0 -0
  209. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_ls.py +0 -0
  210. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_pull.py +0 -0
  211. {datachain-0.2.6 → datachain-0.2.7}/tests/func/test_query.py +0 -0
  212. {datachain-0.2.6 → datachain-0.2.7}/tests/scripts/feature_class.py +0 -0
  213. {datachain-0.2.6 → datachain-0.2.7}/tests/scripts/feature_class_parallel.py +0 -0
  214. {datachain-0.2.6 → datachain-0.2.7}/tests/scripts/name_len_normal.py +0 -0
  215. {datachain-0.2.6 → datachain-0.2.7}/tests/scripts/name_len_slow.py +0 -0
  216. {datachain-0.2.6 → datachain-0.2.7}/tests/test_cli_e2e.py +0 -0
  217. {datachain-0.2.6 → datachain-0.2.7}/tests/test_query_e2e.py +0 -0
  218. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/__init__.py +0 -0
  219. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/__init__.py +0 -0
  220. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_arrow.py +0 -0
  221. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_datachain.py +0 -0
  222. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  223. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_datachain_merge.py +0 -0
  224. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_feature.py +0 -0
  225. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_feature_utils.py +0 -0
  226. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_file.py +0 -0
  227. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_image.py +0 -0
  228. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_signal_schema.py +0 -0
  229. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_udf_signature.py +0 -0
  230. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_utils.py +0 -0
  231. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/lib/test_webdataset.py +0 -0
  232. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/__init__.py +0 -0
  233. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/sqlite/__init__.py +0 -0
  234. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
  235. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_array.py +0 -0
  236. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_conditional.py +0 -0
  237. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_path.py +0 -0
  238. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_random.py +0 -0
  239. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_selectable.py +0 -0
  240. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/sql/test_string.py +0 -0
  241. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_asyn.py +0 -0
  242. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_cache.py +0 -0
  243. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_catalog.py +0 -0
  244. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_catalog_loader.py +0 -0
  245. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_cli_parsing.py +0 -0
  246. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_client.py +0 -0
  247. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_client_s3.py +0 -0
  248. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_data_storage.py +0 -0
  249. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_database_engine.py +0 -0
  250. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_dataset.py +0 -0
  251. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_dispatch.py +0 -0
  252. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_fileslice.py +0 -0
  253. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_id_generator.py +0 -0
  254. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_listing.py +0 -0
  255. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_metastore.py +0 -0
  256. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_query_metrics.py +0 -0
  257. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_query_params.py +0 -0
  258. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_serializer.py +0 -0
  259. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_session.py +0 -0
  260. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_storage.py +0 -0
  261. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_udf.py +0 -0
  262. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_utils.py +0 -0
  263. {datachain-0.2.6 → datachain-0.2.7}/tests/unit/test_warehouse.py +0 -0
  264. {datachain-0.2.6 → datachain-0.2.7}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -2,8 +2,6 @@ from datachain.lib.dc import C, DataChain
2
2
  from datachain.lib.feature import Feature
3
3
  from datachain.lib.feature_utils import pydantic_to_feature
4
4
  from datachain.lib.file import File, FileError, FileFeature, IndexedFile, TarVFile
5
- from datachain.lib.image import ImageFile, convert_images
6
- from datachain.lib.text import convert_text
7
5
  from datachain.lib.udf import Aggregator, Generator, Mapper
8
6
  from datachain.lib.utils import AbstractUDF, DataChainError
9
7
  from datachain.query.dataset import UDF as BaseUDF # noqa: N811
@@ -23,12 +21,9 @@ __all__ = [
23
21
  "FileError",
24
22
  "FileFeature",
25
23
  "Generator",
26
- "ImageFile",
27
24
  "IndexedFile",
28
25
  "Mapper",
29
26
  "Session",
30
27
  "TarVFile",
31
- "convert_images",
32
- "convert_text",
33
28
  "pydantic_to_feature",
34
29
  ]
@@ -0,0 +1,3 @@
1
+ from datachain.lib.image import ImageFile, convert_images
2
+
3
+ __all__ = ["ImageFile", "convert_images"]
@@ -0,0 +1,3 @@
1
+ from datachain.lib.text import convert_text
2
+
3
+ __all__ = ["convert_text"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -125,6 +125,7 @@ src/datachain/data_storage/schema.py
125
125
  src/datachain/data_storage/serializer.py
126
126
  src/datachain/data_storage/sqlite.py
127
127
  src/datachain/data_storage/warehouse.py
128
+ src/datachain/image/__init__.py
128
129
  src/datachain/lib/__init__.py
129
130
  src/datachain/lib/arrow.py
130
131
  src/datachain/lib/cached_stream.py
@@ -181,6 +182,7 @@ src/datachain/sql/sqlite/__init__.py
181
182
  src/datachain/sql/sqlite/base.py
182
183
  src/datachain/sql/sqlite/types.py
183
184
  src/datachain/sql/sqlite/vector.py
185
+ src/datachain/text/__init__.py
184
186
  tests/__init__.py
185
187
  tests/conftest.py
186
188
  tests/data.py
@@ -234,6 +236,7 @@ tests/unit/test_udf.py
234
236
  tests/unit/test_utils.py
235
237
  tests/unit/test_warehouse.py
236
238
  tests/unit/lib/__init__.py
239
+ tests/unit/lib/conftest.py
237
240
  tests/unit/lib/test_arrow.py
238
241
  tests/unit/lib/test_clip.py
239
242
  tests/unit/lib/test_datachain.py
@@ -1,3 +1,5 @@
1
+ from pathlib import Path
2
+
1
3
  import open_clip
2
4
  import pytest
3
5
  from torch import Size, Tensor
@@ -8,10 +10,10 @@ from datachain.lib.dc import DataChain
8
10
  from datachain.lib.pytorch import PytorchDataset
9
11
 
10
12
 
11
- @pytest.fixture
12
- def fake_dataset(tmp_path, catalog):
13
+ @pytest.fixture(scope="module")
14
+ def fake_dataset(tmpdir_factory):
13
15
  # Create fake images in labeled dirs
14
- data_path = tmp_path / "data" / ""
16
+ data_path = Path(tmpdir_factory.mktemp("data"))
15
17
  for i, (img, label) in enumerate(FakeData()):
16
18
  label = str(label)
17
19
  (data_path / label).mkdir(parents=True, exist_ok=True)
@@ -37,11 +39,11 @@ def test_pytorch_dataset(fake_dataset):
37
39
  transform=transform,
38
40
  tokenizer=tokenizer,
39
41
  )
40
- for img, text, label in pt_dataset:
41
- assert isinstance(img, Tensor)
42
- assert isinstance(text, Tensor)
43
- assert isinstance(label, int)
44
- assert img.size() == Size([3, 64, 64])
42
+ img, text, label = next(iter(pt_dataset))
43
+ assert isinstance(img, Tensor)
44
+ assert isinstance(text, Tensor)
45
+ assert isinstance(label, int)
46
+ assert img.size() == Size([3, 64, 64])
45
47
 
46
48
 
47
49
  def test_pytorch_dataset_sample(fake_dataset):
@@ -62,8 +64,8 @@ def test_to_pytorch(fake_dataset):
62
64
  tokenizer = open_clip.get_tokenizer("ViT-B-32")
63
65
  pt_dataset = fake_dataset.to_pytorch(transform=transform, tokenizer=tokenizer)
64
66
  assert isinstance(pt_dataset, IterableDataset)
65
- for img, text, label in pt_dataset:
66
- assert isinstance(img, Tensor)
67
- assert isinstance(text, Tensor)
68
- assert isinstance(label, int)
69
- assert img.size() == Size([3, 64, 64])
67
+ img, text, label = next(iter(pt_dataset))
68
+ assert isinstance(img, Tensor)
69
+ assert isinstance(text, Tensor)
70
+ assert isinstance(label, int)
71
+ assert img.size() == Size([3, 64, 64])
@@ -0,0 +1,21 @@
1
+ import pytest
2
+ import torch
3
+ from torch import float32
4
+ from torchvision.transforms import v2
5
+
6
+
7
+ @pytest.fixture(scope="session")
8
+ def fake_clip_model():
9
+ class Model:
10
+ def encode_image(self, tensor):
11
+ return torch.randn(len(tensor), 512)
12
+
13
+ def encode_text(self, tensor):
14
+ return torch.randn(len(tensor), 512)
15
+
16
+ def tokenizer(tensor, context_length=77):
17
+ return torch.randn(len(tensor), context_length)
18
+
19
+ model = Model()
20
+ preprocess = v2.ToDtype(float32, scale=True)
21
+ return model, preprocess, tokenizer
@@ -1,4 +1,3 @@
1
- import open_clip
2
1
  import pytest
3
2
  from PIL import Image
4
3
  from transformers import CLIPModel, CLIPProcessor
@@ -7,10 +6,6 @@ from datachain.lib.clip import similarity_scores
7
6
 
8
7
  IMAGES = [Image.new(mode="RGB", size=(64, 64)), Image.new(mode="RGB", size=(32, 32))]
9
8
  TEXTS = ["text1", "text2"]
10
- MODEL, _, PREPROCESS = open_clip.create_model_and_transforms(
11
- "ViT-B-32", pretrained="laion2b_s34b_b79k"
12
- )
13
- TOKENIZER = open_clip.get_tokenizer("ViT-B-32")
14
9
 
15
10
 
16
11
  @pytest.mark.parametrize(
@@ -20,15 +15,16 @@ TOKENIZER = open_clip.get_tokenizer("ViT-B-32")
20
15
  @pytest.mark.parametrize("text", [None, "text", TEXTS])
21
16
  @pytest.mark.parametrize("prob", [True, False])
22
17
  @pytest.mark.parametrize("image_to_text", [True, False])
23
- def test_similarity_scores(images, text, prob, image_to_text):
18
+ def test_similarity_scores(fake_clip_model, images, text, prob, image_to_text):
19
+ model, preprocess, tokenizer = fake_clip_model
24
20
  if not (images or text):
25
21
  with pytest.raises(ValueError):
26
22
  scores = similarity_scores(
27
- images, text, MODEL, PREPROCESS, TOKENIZER, prob, image_to_text
23
+ images, text, model, preprocess, tokenizer, prob, image_to_text
28
24
  )
29
25
  else:
30
26
  scores = similarity_scores(
31
- images, text, MODEL, PREPROCESS, TOKENIZER, prob, image_to_text
27
+ images, text, model, preprocess, tokenizer, prob, image_to_text
32
28
  )
33
29
  assert isinstance(scores, list)
34
30
  if not images:
@@ -1,4 +1,3 @@
1
- import open_clip
2
1
  import torch
3
2
  from transformers import CLIPModel, CLIPProcessor
4
3
 
@@ -6,10 +5,9 @@ from datachain.lib.file import TextFile
6
5
  from datachain.lib.text import convert_text
7
6
 
8
7
 
9
- def test_convert_text():
8
+ def test_convert_text(fake_clip_model):
10
9
  text = "thisismytext"
11
- tokenizer_model = "ViT-B-32"
12
- tokenizer = open_clip.get_tokenizer(tokenizer_model)
10
+ model, _, tokenizer = fake_clip_model
13
11
  converted_text = convert_text(text, tokenizer=tokenizer)
14
12
  assert isinstance(converted_text, torch.Tensor)
15
13
 
@@ -22,7 +20,6 @@ def test_convert_text():
22
20
  converted_text = convert_text(
23
21
  text, tokenizer=tokenizer, tokenizer_kwargs=tokenizer_kwargs
24
22
  )
25
- model, _, _ = open_clip.create_model_and_transforms(tokenizer_model)
26
23
  converted_text = convert_text(text, tokenizer=tokenizer, encoder=model.encode_text)
27
24
  assert converted_text.dtype == torch.float32
28
25
 
@@ -18,14 +18,13 @@ def test_module_exports():
18
18
  FileError,
19
19
  FileFeature,
20
20
  Generator,
21
- ImageFile,
22
21
  IndexedFile,
23
22
  Mapper,
24
23
  Session,
25
24
  TarVFile,
26
- convert_images,
27
- convert_text,
28
25
  pydantic_to_feature,
29
26
  )
27
+ from datachain.image import ImageFile, convert_images
28
+ from datachain.text import convert_text
30
29
  except Exception as e: # noqa: BLE001
31
30
  pytest.fail(f"Importing raised an exception: {e}")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes