datachain 0.8.1__tar.gz → 0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (291) hide show
  1. {datachain-0.8.1/src/datachain.egg-info → datachain-0.8.2}/PKG-INFO +83 -1
  2. {datachain-0.8.1 → datachain-0.8.2}/README.rst +82 -0
  3. {datachain-0.8.1 → datachain-0.8.2}/docs/quick-start.md +6 -6
  4. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/gcs.py +8 -7
  5. {datachain-0.8.1 → datachain-0.8.2/src/datachain.egg-info}/PKG-INFO +83 -1
  6. datachain-0.8.2/tests/unit/test_client_gcs.py +6 -0
  7. datachain-0.8.1/tests/unit/test_client_gcs.py +0 -17
  8. {datachain-0.8.1 → datachain-0.8.2}/.cruft.json +0 -0
  9. {datachain-0.8.1 → datachain-0.8.2}/.gitattributes +0 -0
  10. {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  11. {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  12. {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  13. {datachain-0.8.1 → datachain-0.8.2}/.github/codecov.yaml +0 -0
  14. {datachain-0.8.1 → datachain-0.8.2}/.github/dependabot.yml +0 -0
  15. {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/benchmarks.yml +0 -0
  16. {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/release.yml +0 -0
  17. {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/tests-studio.yml +0 -0
  18. {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/tests.yml +0 -0
  19. {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/update-template.yaml +0 -0
  20. {datachain-0.8.1 → datachain-0.8.2}/.gitignore +0 -0
  21. {datachain-0.8.1 → datachain-0.8.2}/.pre-commit-config.yaml +0 -0
  22. {datachain-0.8.1 → datachain-0.8.2}/CODE_OF_CONDUCT.rst +0 -0
  23. {datachain-0.8.1 → datachain-0.8.2}/LICENSE +0 -0
  24. {datachain-0.8.1 → datachain-0.8.2}/docs/assets/captioned_cartoons.png +0 -0
  25. {datachain-0.8.1 → datachain-0.8.2}/docs/assets/datachain-white.svg +0 -0
  26. {datachain-0.8.1 → datachain-0.8.2}/docs/assets/datachain.svg +0 -0
  27. {datachain-0.8.1 → datachain-0.8.2}/docs/contributing.md +0 -0
  28. {datachain-0.8.1 → datachain-0.8.2}/docs/css/github-permalink-style.css +0 -0
  29. {datachain-0.8.1 → datachain-0.8.2}/docs/examples.md +0 -0
  30. {datachain-0.8.1 → datachain-0.8.2}/docs/index.md +0 -0
  31. {datachain-0.8.1 → datachain-0.8.2}/docs/overrides/main.html +0 -0
  32. {datachain-0.8.1 → datachain-0.8.2}/docs/references/datachain.md +0 -0
  33. {datachain-0.8.1 → datachain-0.8.2}/docs/references/datatype.md +0 -0
  34. {datachain-0.8.1 → datachain-0.8.2}/docs/references/file.md +0 -0
  35. {datachain-0.8.1 → datachain-0.8.2}/docs/references/index.md +0 -0
  36. {datachain-0.8.1 → datachain-0.8.2}/docs/references/sql.md +0 -0
  37. {datachain-0.8.1 → datachain-0.8.2}/docs/references/torch.md +0 -0
  38. {datachain-0.8.1 → datachain-0.8.2}/docs/references/udf.md +0 -0
  39. {datachain-0.8.1 → datachain-0.8.2}/docs/tutorials.md +0 -0
  40. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  41. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  42. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/openimage-detect.py +0 -0
  43. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  44. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  45. {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  46. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/common_sql_functions.py +0 -0
  47. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/json-csv-reader.py +0 -0
  48. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/torch-loader.py +0 -0
  49. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/parallel.py +0 -0
  50. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/simple.py +0 -0
  51. {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/stateful.py +0 -0
  52. {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/claude-query.py +0 -0
  53. {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  54. {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  55. {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  56. {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/clip_inference.py +0 -0
  57. {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/hf_pipeline.py +0 -0
  58. {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  59. {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/wds.py +0 -0
  60. {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/wds_filtered.py +0 -0
  61. {datachain-0.8.1 → datachain-0.8.2}/mkdocs.yml +0 -0
  62. {datachain-0.8.1 → datachain-0.8.2}/noxfile.py +0 -0
  63. {datachain-0.8.1 → datachain-0.8.2}/pyproject.toml +0 -0
  64. {datachain-0.8.1 → datachain-0.8.2}/setup.cfg +0 -0
  65. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/__init__.py +0 -0
  66. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/__main__.py +0 -0
  67. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/asyn.py +0 -0
  68. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cache.py +0 -0
  69. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/__init__.py +0 -0
  70. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/catalog.py +0 -0
  71. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/datasource.py +0 -0
  72. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/loader.py +0 -0
  73. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cli.py +0 -0
  74. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cli_utils.py +0 -0
  75. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/__init__.py +0 -0
  76. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/azure.py +0 -0
  77. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/fileslice.py +0 -0
  78. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/fsspec.py +0 -0
  79. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/hf.py +0 -0
  80. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/local.py +0 -0
  81. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/s3.py +0 -0
  82. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/config.py +0 -0
  83. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/__init__.py +0 -0
  84. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/db_engine.py +0 -0
  85. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/job.py +0 -0
  86. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/metastore.py +0 -0
  87. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/schema.py +0 -0
  88. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/serializer.py +0 -0
  89. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/sqlite.py +0 -0
  90. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/warehouse.py +0 -0
  91. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/dataset.py +0 -0
  92. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/error.py +0 -0
  93. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/__init__.py +0 -0
  94. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/aggregate.py +0 -0
  95. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/array.py +0 -0
  96. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/base.py +0 -0
  97. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/conditional.py +0 -0
  98. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/func.py +0 -0
  99. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/numeric.py +0 -0
  100. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/path.py +0 -0
  101. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/random.py +0 -0
  102. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/string.py +0 -0
  103. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/window.py +0 -0
  104. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/job.py +0 -0
  105. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/__init__.py +0 -0
  106. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/arrow.py +0 -0
  107. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/clip.py +0 -0
  108. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/__init__.py +0 -0
  109. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/flatten.py +0 -0
  110. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  111. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  112. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/unflatten.py +0 -0
  113. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  114. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/data_model.py +0 -0
  115. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/dataset_info.py +0 -0
  116. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/dc.py +0 -0
  117. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/diff.py +0 -0
  118. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/file.py +0 -0
  119. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/hf.py +0 -0
  120. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/image.py +0 -0
  121. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/listing.py +0 -0
  122. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/listing_info.py +0 -0
  123. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/meta_formats.py +0 -0
  124. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/model_store.py +0 -0
  125. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/pytorch.py +0 -0
  126. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/settings.py +0 -0
  127. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/signal_schema.py +0 -0
  128. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/tar.py +0 -0
  129. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/text.py +0 -0
  130. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/udf.py +0 -0
  131. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/udf_signature.py +0 -0
  132. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/utils.py +0 -0
  133. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/vfile.py +0 -0
  134. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/webdataset.py +0 -0
  135. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/webdataset_laion.py +0 -0
  136. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/listing.py +0 -0
  137. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/__init__.py +0 -0
  138. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/bbox.py +0 -0
  139. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/pose.py +0 -0
  140. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/segment.py +0 -0
  141. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  142. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  143. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/pose.py +0 -0
  144. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/segment.py +0 -0
  145. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/node.py +0 -0
  146. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/nodes_fetcher.py +0 -0
  147. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/nodes_thread_pool.py +0 -0
  148. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/progress.py +0 -0
  149. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/py.typed +0 -0
  150. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/__init__.py +0 -0
  151. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/batch.py +0 -0
  152. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/dataset.py +0 -0
  153. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/dispatch.py +0 -0
  154. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/metrics.py +0 -0
  155. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/params.py +0 -0
  156. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/queue.py +0 -0
  157. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/schema.py +0 -0
  158. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/session.py +0 -0
  159. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/udf.py +0 -0
  160. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/utils.py +0 -0
  161. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/remote/__init__.py +0 -0
  162. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/remote/studio.py +0 -0
  163. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/__init__.py +0 -0
  164. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/default/__init__.py +0 -0
  165. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/default/base.py +0 -0
  166. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/__init__.py +0 -0
  167. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/aggregate.py +0 -0
  168. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/array.py +0 -0
  169. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/conditional.py +0 -0
  170. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/numeric.py +0 -0
  171. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/path.py +0 -0
  172. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/random.py +0 -0
  173. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/string.py +0 -0
  174. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/selectable.py +0 -0
  175. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  176. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/base.py +0 -0
  177. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/types.py +0 -0
  178. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/vector.py +0 -0
  179. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/types.py +0 -0
  180. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/utils.py +0 -0
  181. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/studio.py +0 -0
  182. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/telemetry.py +0 -0
  183. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/toolkit/__init__.py +0 -0
  184. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/toolkit/split.py +0 -0
  185. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/torch/__init__.py +0 -0
  186. {datachain-0.8.1 → datachain-0.8.2}/src/datachain/utils.py +0 -0
  187. {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  188. {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  189. {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/entry_points.txt +0 -0
  190. {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/requires.txt +0 -0
  191. {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/top_level.txt +0 -0
  192. {datachain-0.8.1 → datachain-0.8.2}/tests/__init__.py +0 -0
  193. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/__init__.py +0 -0
  194. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/conftest.py +0 -0
  195. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  196. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  197. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.gitignore +0 -0
  198. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  199. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_datachain.py +0 -0
  200. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_ls.py +0 -0
  201. {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_version.py +0 -0
  202. {datachain-0.8.1 → datachain-0.8.2}/tests/conftest.py +0 -0
  203. {datachain-0.8.1 → datachain-0.8.2}/tests/data.py +0 -0
  204. {datachain-0.8.1 → datachain-0.8.2}/tests/examples/__init__.py +0 -0
  205. {datachain-0.8.1 → datachain-0.8.2}/tests/examples/test_examples.py +0 -0
  206. {datachain-0.8.1 → datachain-0.8.2}/tests/examples/test_wds_e2e.py +0 -0
  207. {datachain-0.8.1 → datachain-0.8.2}/tests/examples/wds_data.py +0 -0
  208. {datachain-0.8.1 → datachain-0.8.2}/tests/func/__init__.py +0 -0
  209. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_catalog.py +0 -0
  210. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_client.py +0 -0
  211. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_datachain.py +0 -0
  212. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_dataset_query.py +0 -0
  213. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_datasets.py +0 -0
  214. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_feature_pickling.py +0 -0
  215. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_listing.py +0 -0
  216. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_ls.py +0 -0
  217. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_meta_formats.py +0 -0
  218. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_metrics.py +0 -0
  219. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_pull.py +0 -0
  220. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_pytorch.py +0 -0
  221. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_query.py +0 -0
  222. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_session.py +0 -0
  223. {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_toolkit.py +0 -0
  224. {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class.py +0 -0
  225. {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_exception.py +0 -0
  226. {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_parallel.py +0 -0
  227. {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  228. {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/name_len_slow.py +0 -0
  229. {datachain-0.8.1 → datachain-0.8.2}/tests/test_atomicity.py +0 -0
  230. {datachain-0.8.1 → datachain-0.8.2}/tests/test_cli_e2e.py +0 -0
  231. {datachain-0.8.1 → datachain-0.8.2}/tests/test_cli_studio.py +0 -0
  232. {datachain-0.8.1 → datachain-0.8.2}/tests/test_query_e2e.py +0 -0
  233. {datachain-0.8.1 → datachain-0.8.2}/tests/test_telemetry.py +0 -0
  234. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/__init__.py +0 -0
  235. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/__init__.py +0 -0
  236. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/conftest.py +0 -0
  237. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_arrow.py +0 -0
  238. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_clip.py +0 -0
  239. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain.py +0 -0
  240. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  241. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  242. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_diff.py +0 -0
  243. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_feature.py +0 -0
  244. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_feature_utils.py +0 -0
  245. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_file.py +0 -0
  246. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_hf.py +0 -0
  247. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_image.py +0 -0
  248. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_listing_info.py +0 -0
  249. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_models.py +0 -0
  250. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_schema.py +0 -0
  251. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_signal_schema.py +0 -0
  252. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  253. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_text.py +0 -0
  254. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_udf_signature.py +0 -0
  255. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_utils.py +0 -0
  256. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_webdataset.py +0 -0
  257. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/__init__.py +0 -0
  258. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  259. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  260. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  261. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_array.py +0 -0
  262. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_conditional.py +0 -0
  263. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_path.py +0 -0
  264. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_random.py +0 -0
  265. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_selectable.py +0 -0
  266. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_string.py +0 -0
  267. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_asyn.py +0 -0
  268. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_cache.py +0 -0
  269. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_catalog.py +0 -0
  270. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_catalog_loader.py +0 -0
  271. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_cli_parsing.py +0 -0
  272. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_client.py +0 -0
  273. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_client_s3.py +0 -0
  274. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_config.py +0 -0
  275. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_data_storage.py +0 -0
  276. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_database_engine.py +0 -0
  277. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_dataset.py +0 -0
  278. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_dispatch.py +0 -0
  279. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_fileslice.py +0 -0
  280. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_func.py +0 -0
  281. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_listing.py +0 -0
  282. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_metastore.py +0 -0
  283. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_module_exports.py +0 -0
  284. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query.py +0 -0
  285. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query_metrics.py +0 -0
  286. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query_params.py +0 -0
  287. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_serializer.py +0 -0
  288. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_session.py +0 -0
  289. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_utils.py +0 -0
  290. {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_warehouse.py +0 -0
  291. {datachain-0.8.1 → datachain-0.8.2}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.8.1
3
+ Version: 0.8.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -145,6 +145,88 @@ Getting Started
145
145
  Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
146
146
  to get started with `DataChain` and learn more.
147
147
 
148
+ .. code:: bash
149
+
150
+ pip install datachain
151
+
152
+
153
+ Example: download subset of files based on metadata
154
+ ---------------------------------------------------
155
+
156
+ Sometimes users only need to download a specific subset of files from cloud storage,
157
+ rather than the entire dataset.
158
+ For example, you could use a JSON file's metadata to download just cat images with
159
+ high confidence scores.
160
+
161
+
162
+ .. code:: py
163
+
164
+ from datachain import Column, DataChain
165
+
166
+ meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
167
+ images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
168
+
169
+ images_id = images.map(id=lambda file: file.path.split('.')[-2])
170
+ annotated = images_id.merge(meta, on="id", right_on="meta.id")
171
+
172
+ likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
173
+ & (Column("meta.inference.class_") == "cat"))
174
+ likely_cats.export_files("high-confidence-cats/", signal="file")
175
+
176
+
177
+ Example: LLM based text-file evaluation
178
+ ---------------------------------------
179
+
180
+ In this example, we evaluate chatbot conversations stored in text files
181
+ using LLM based evaluation.
182
+
183
+ .. code:: shell
184
+
185
+ $ pip install mistralai # Requires version >=1.0.0
186
+ $ export MISTRAL_API_KEY=_your_key_
187
+
188
+ Python code:
189
+
190
+ .. code:: py
191
+
192
+ from mistralai import Mistral
193
+ from datachain import File, DataChain, Column
194
+
195
+ PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
196
+
197
+ def eval_dialogue(file: File) -> bool:
198
+ client = Mistral()
199
+ response = client.chat.complete(
200
+ model="open-mixtral-8x22b",
201
+ messages=[{"role": "system", "content": PROMPT},
202
+ {"role": "user", "content": file.read()}])
203
+ result = response.choices[0].message.content
204
+ return result.lower().startswith("success")
205
+
206
+ chain = (
207
+ DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
208
+ .settings(parallel=4, cache=True)
209
+ .map(is_success=eval_dialogue)
210
+ .save("mistral_files")
211
+ )
212
+
213
+ successful_chain = chain.filter(Column("is_success") == True)
214
+ successful_chain.export_files("./output_mistral")
215
+
216
+ print(f"{successful_chain.count()} files were exported")
217
+
218
+
219
+
220
+ With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
221
+
222
+ .. code:: shell
223
+
224
+ $ ls output_mistral/datachain-demo/chatbot-KiT/
225
+ 1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
226
+ $ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
227
+ 31
228
+
229
+
148
230
  Key Features
149
231
  ============
150
232
 
@@ -42,6 +42,88 @@ Getting Started
42
42
  Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
43
43
  to get started with `DataChain` and learn more.
44
44
 
45
+ .. code:: bash
46
+
47
+ pip install datachain
48
+
49
+
50
+ Example: download subset of files based on metadata
51
+ ---------------------------------------------------
52
+
53
+ Sometimes users only need to download a specific subset of files from cloud storage,
54
+ rather than the entire dataset.
55
+ For example, you could use a JSON file's metadata to download just cat images with
56
+ high confidence scores.
57
+
58
+
59
+ .. code:: py
60
+
61
+ from datachain import Column, DataChain
62
+
63
+ meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
64
+ images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
65
+
66
+ images_id = images.map(id=lambda file: file.path.split('.')[-2])
67
+ annotated = images_id.merge(meta, on="id", right_on="meta.id")
68
+
69
+ likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
70
+ & (Column("meta.inference.class_") == "cat"))
71
+ likely_cats.export_files("high-confidence-cats/", signal="file")
72
+
73
+
74
+ Example: LLM based text-file evaluation
75
+ ---------------------------------------
76
+
77
+ In this example, we evaluate chatbot conversations stored in text files
78
+ using LLM based evaluation.
79
+
80
+ .. code:: shell
81
+
82
+ $ pip install mistralai # Requires version >=1.0.0
83
+ $ export MISTRAL_API_KEY=_your_key_
84
+
85
+ Python code:
86
+
87
+ .. code:: py
88
+
89
+ from mistralai import Mistral
90
+ from datachain import File, DataChain, Column
91
+
92
+ PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
93
+
94
+ def eval_dialogue(file: File) -> bool:
95
+ client = Mistral()
96
+ response = client.chat.complete(
97
+ model="open-mixtral-8x22b",
98
+ messages=[{"role": "system", "content": PROMPT},
99
+ {"role": "user", "content": file.read()}])
100
+ result = response.choices[0].message.content
101
+ return result.lower().startswith("success")
102
+
103
+ chain = (
104
+ DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
105
+ .settings(parallel=4, cache=True)
106
+ .map(is_success=eval_dialogue)
107
+ .save("mistral_files")
108
+ )
109
+
110
+ successful_chain = chain.filter(Column("is_success") == True)
111
+ successful_chain.export_files("./output_mistral")
112
+
113
+ print(f"{successful_chain.count()} files were exported")
114
+
115
+
116
+
117
+ With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
118
+
119
+ .. code:: shell
120
+
121
+ $ ls output_mistral/datachain-demo/chatbot-KiT/
122
+ 1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
123
+ $ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
124
+ 31
125
+
126
+
45
127
  Key Features
46
128
  ============
47
129
 
@@ -39,8 +39,8 @@ using JSON metadata:
39
39
  ``` py
40
40
  from datachain import Column, DataChain
41
41
 
42
- meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta")
43
- images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg")
42
+ meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
43
+ images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
44
44
 
45
45
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
46
46
  annotated = images_id.merge(meta, on="id", right_on="meta.id")
@@ -78,7 +78,7 @@ def is_positive_dialogue_ending(file) -> bool:
78
78
 
79
79
  chain = (
80
80
  DataChain.from_storage("gs://datachain-demo/chatbot-KiT/",
81
- object_name="file", type="text")
81
+ object_name="file", type="text", anon=True)
82
82
  .settings(parallel=8, cache=True)
83
83
  .map(is_positive=is_positive_dialogue_ending)
84
84
  .save("file_response")
@@ -132,7 +132,7 @@ def eval_dialogue(file: File) -> bool:
132
132
  return result.lower().startswith("success")
133
133
 
134
134
  chain = (
135
- DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file")
135
+ DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
136
136
  .map(is_success=eval_dialogue)
137
137
  .save("mistral_files")
138
138
  )
@@ -177,7 +177,7 @@ def eval_dialog(file: File) -> ChatCompletionResponse:
177
177
  {"role": "user", "content": file.read()}])
178
178
 
179
179
  chain = (
180
- DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file")
180
+ DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
181
181
  .settings(parallel=4, cache=True)
182
182
  .map(response=eval_dialog)
183
183
  .map(status=lambda response: response.choices[0].message.content.lower()[:7])
@@ -273,7 +273,7 @@ from datachain import C, DataChain
273
273
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
274
274
 
275
275
  chain = (
276
- DataChain.from_storage("gs://datachain-demo/dogs-and-cats/", type="image")
276
+ DataChain.from_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
277
277
  .map(label=lambda name: name.split(".")[0], params=["file.name"])
278
278
  .select("file", "label").to_pytorch(
279
279
  transform=processor.image_processor,
@@ -33,13 +33,14 @@ class GCSClient(Client):
33
33
  return cast(GCSFileSystem, super().create_fs(**kwargs))
34
34
 
35
35
  def url(self, path: str, expires: int = 3600, **kwargs) -> str:
36
- try:
37
- return self.fs.sign(self.get_full_path(path), expiration=expires, **kwargs)
38
- except AttributeError as exc:
39
- is_anon = self.fs.storage_options.get("token") == "anon"
40
- if is_anon and "you need a private key to sign credentials" in str(exc):
41
- return f"https://storage.googleapis.com/{self.name}/{path}"
42
- raise
36
+ """
37
+ Generate a signed URL for the given path.
38
+ If the client is anonymous, a public URL is returned instead
39
+ (see https://cloud.google.com/storage/docs/access-public-data#api-link).
40
+ """
41
+ if self.fs.storage_options.get("token") == "anon":
42
+ return f"https://storage.googleapis.com/{self.name}/{path}"
43
+ return self.fs.sign(self.get_full_path(path), expiration=expires, **kwargs)
43
44
 
44
45
  @staticmethod
45
46
  def parse_timestamp(timestamp: str) -> datetime:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.8.1
3
+ Version: 0.8.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -145,6 +145,88 @@ Getting Started
145
145
  Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
146
146
  to get started with `DataChain` and learn more.
147
147
 
148
+ .. code:: bash
149
+
150
+ pip install datachain
151
+
152
+
153
+ Example: download subset of files based on metadata
154
+ ---------------------------------------------------
155
+
156
+ Sometimes users only need to download a specific subset of files from cloud storage,
157
+ rather than the entire dataset.
158
+ For example, you could use a JSON file's metadata to download just cat images with
159
+ high confidence scores.
160
+
161
+
162
+ .. code:: py
163
+
164
+ from datachain import Column, DataChain
165
+
166
+ meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
167
+ images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
168
+
169
+ images_id = images.map(id=lambda file: file.path.split('.')[-2])
170
+ annotated = images_id.merge(meta, on="id", right_on="meta.id")
171
+
172
+ likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
173
+ & (Column("meta.inference.class_") == "cat"))
174
+ likely_cats.export_files("high-confidence-cats/", signal="file")
175
+
176
+
177
+ Example: LLM based text-file evaluation
178
+ ---------------------------------------
179
+
180
+ In this example, we evaluate chatbot conversations stored in text files
181
+ using LLM based evaluation.
182
+
183
+ .. code:: shell
184
+
185
+ $ pip install mistralai # Requires version >=1.0.0
186
+ $ export MISTRAL_API_KEY=_your_key_
187
+
188
+ Python code:
189
+
190
+ .. code:: py
191
+
192
+ from mistralai import Mistral
193
+ from datachain import File, DataChain, Column
194
+
195
+ PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
196
+
197
+ def eval_dialogue(file: File) -> bool:
198
+ client = Mistral()
199
+ response = client.chat.complete(
200
+ model="open-mixtral-8x22b",
201
+ messages=[{"role": "system", "content": PROMPT},
202
+ {"role": "user", "content": file.read()}])
203
+ result = response.choices[0].message.content
204
+ return result.lower().startswith("success")
205
+
206
+ chain = (
207
+ DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
208
+ .settings(parallel=4, cache=True)
209
+ .map(is_success=eval_dialogue)
210
+ .save("mistral_files")
211
+ )
212
+
213
+ successful_chain = chain.filter(Column("is_success") == True)
214
+ successful_chain.export_files("./output_mistral")
215
+
216
+ print(f"{successful_chain.count()} files were exported")
217
+
218
+
219
+
220
+ With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
221
+
222
+ .. code:: shell
223
+
224
+ $ ls output_mistral/datachain-demo/chatbot-KiT/
225
+ 1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
226
+ $ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
227
+ 31
228
+
229
+
148
230
  Key Features
149
231
  ============
150
232
 
@@ -0,0 +1,6 @@
1
+ from datachain.client import Client
2
+
3
+
4
+ def test_anon_url():
5
+ client = Client.get_client("gs://foo", None, anon=True)
6
+ assert client.url("bar") == "https://storage.googleapis.com/foo/bar"
@@ -1,17 +0,0 @@
1
- from datachain.client import Client
2
-
3
-
4
- def test_anon_url(mocker):
5
- def sign(*args, **kwargs):
6
- raise AttributeError(
7
- "you need a private key to sign credentials."
8
- "the credentials you are currently using"
9
- " <class 'google.oauth2.credentials.Credentials'> just contains a token."
10
- " see https://googleapis.dev/python/google-api-core/latest/auth.html"
11
- "#setting-up-a-service-account for more details."
12
- )
13
-
14
- mocker.patch("gcsfs.GCSFileSystem.sign", side_effect=sign)
15
-
16
- client = Client.get_client("gs://foo", None, anon=True)
17
- assert client.url("bar") == "https://storage.googleapis.com/foo/bar"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes