datachain 0.3.20__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (249) hide show
  1. {datachain-0.3.20 → datachain-0.5.0}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.3.20/src/datachain.egg-info → datachain-0.5.0}/PKG-INFO +1 -1
  3. {datachain-0.3.20 → datachain-0.5.0}/mkdocs.yml +2 -0
  4. datachain-0.5.0/overrides/main.html +12 -0
  5. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/catalog.py +0 -3
  6. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cli.py +3 -2
  7. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/metastore.py +8 -12
  8. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/warehouse.py +1 -3
  9. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/dataset.py +0 -8
  10. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/dc.py +197 -113
  11. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/listing.py +5 -3
  12. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/pytorch.py +5 -1
  13. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/dataset.py +1 -1
  14. {datachain-0.3.20 → datachain-0.5.0/src/datachain.egg-info}/PKG-INFO +1 -1
  15. {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/SOURCES.txt +1 -0
  16. {datachain-0.3.20 → datachain-0.5.0}/tests/conftest.py +0 -1
  17. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_catalog.py +5 -2
  18. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_datachain.py +4 -4
  19. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_pull.py +0 -1
  20. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain.py +21 -25
  21. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain_merge.py +1 -1
  22. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query.py +1 -1
  23. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_warehouse.py +0 -2
  24. {datachain-0.3.20 → datachain-0.5.0}/.cruft.json +0 -0
  25. {datachain-0.3.20 → datachain-0.5.0}/.gitattributes +0 -0
  26. {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  27. {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  28. {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  29. {datachain-0.3.20 → datachain-0.5.0}/.github/codecov.yaml +0 -0
  30. {datachain-0.3.20 → datachain-0.5.0}/.github/dependabot.yml +0 -0
  31. {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/benchmarks.yml +0 -0
  32. {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/release.yml +0 -0
  33. {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/tests-studio.yml +0 -0
  34. {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/tests.yml +0 -0
  35. {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/update-template.yaml +0 -0
  36. {datachain-0.3.20 → datachain-0.5.0}/.gitignore +0 -0
  37. {datachain-0.3.20 → datachain-0.5.0}/CODE_OF_CONDUCT.rst +0 -0
  38. {datachain-0.3.20 → datachain-0.5.0}/CONTRIBUTING.rst +0 -0
  39. {datachain-0.3.20 → datachain-0.5.0}/LICENSE +0 -0
  40. {datachain-0.3.20 → datachain-0.5.0}/README.rst +0 -0
  41. {datachain-0.3.20 → datachain-0.5.0}/docs/assets/captioned_cartoons.png +0 -0
  42. {datachain-0.3.20 → datachain-0.5.0}/docs/assets/datachain-white.svg +0 -0
  43. {datachain-0.3.20 → datachain-0.5.0}/docs/assets/datachain.svg +0 -0
  44. {datachain-0.3.20 → datachain-0.5.0}/docs/assets/flowchart.png +0 -0
  45. {datachain-0.3.20 → datachain-0.5.0}/docs/index.md +0 -0
  46. {datachain-0.3.20 → datachain-0.5.0}/docs/references/datachain.md +0 -0
  47. {datachain-0.3.20 → datachain-0.5.0}/docs/references/datatype.md +0 -0
  48. {datachain-0.3.20 → datachain-0.5.0}/docs/references/file.md +0 -0
  49. {datachain-0.3.20 → datachain-0.5.0}/docs/references/index.md +0 -0
  50. {datachain-0.3.20 → datachain-0.5.0}/docs/references/sql.md +0 -0
  51. {datachain-0.3.20 → datachain-0.5.0}/docs/references/torch.md +0 -0
  52. {datachain-0.3.20 → datachain-0.5.0}/docs/references/udf.md +0 -0
  53. {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  54. {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  55. {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/openimage-detect.py +0 -0
  56. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/common_sql_functions.py +0 -0
  57. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/json-csv-reader.py +0 -0
  58. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/torch-loader.py +0 -0
  59. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/parallel.py +0 -0
  60. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/simple.py +0 -0
  61. {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/stateful.py +0 -0
  62. {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/claude-query.py +0 -0
  63. {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  64. {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  65. {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/clip_inference.py +0 -0
  66. {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/hf_pipeline.py +0 -0
  67. {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  68. {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/wds.py +0 -0
  69. {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/wds_filtered.py +0 -0
  70. {datachain-0.3.20 → datachain-0.5.0}/noxfile.py +0 -0
  71. {datachain-0.3.20 → datachain-0.5.0}/pyproject.toml +0 -0
  72. {datachain-0.3.20 → datachain-0.5.0}/setup.cfg +0 -0
  73. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/__init__.py +0 -0
  74. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/__main__.py +0 -0
  75. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/asyn.py +0 -0
  76. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cache.py +0 -0
  77. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/__init__.py +0 -0
  78. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/datasource.py +0 -0
  79. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/loader.py +0 -0
  80. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cli_utils.py +0 -0
  81. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/__init__.py +0 -0
  82. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/azure.py +0 -0
  83. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/fileslice.py +0 -0
  84. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/fsspec.py +0 -0
  85. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/gcs.py +0 -0
  86. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/hf.py +0 -0
  87. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/local.py +0 -0
  88. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/s3.py +0 -0
  89. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/config.py +0 -0
  90. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/__init__.py +0 -0
  91. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/db_engine.py +0 -0
  92. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/id_generator.py +0 -0
  93. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/job.py +0 -0
  94. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/schema.py +0 -0
  95. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/serializer.py +0 -0
  96. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/sqlite.py +0 -0
  97. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/error.py +0 -0
  98. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/job.py +0 -0
  99. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/__init__.py +0 -0
  100. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/arrow.py +0 -0
  101. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/clip.py +0 -0
  102. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/__init__.py +0 -0
  103. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/flatten.py +0 -0
  104. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  105. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  106. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/unflatten.py +0 -0
  107. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  108. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/data_model.py +0 -0
  109. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/dataset_info.py +0 -0
  110. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/file.py +0 -0
  111. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/hf.py +0 -0
  112. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/image.py +0 -0
  113. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/listing_info.py +0 -0
  114. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/meta_formats.py +0 -0
  115. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/model_store.py +0 -0
  116. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/settings.py +0 -0
  117. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/signal_schema.py +0 -0
  118. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/tar.py +0 -0
  119. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/text.py +0 -0
  120. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/udf.py +0 -0
  121. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/udf_signature.py +0 -0
  122. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/utils.py +0 -0
  123. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/vfile.py +0 -0
  124. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/webdataset.py +0 -0
  125. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/webdataset_laion.py +0 -0
  126. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/listing.py +0 -0
  127. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/node.py +0 -0
  128. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/nodes_fetcher.py +0 -0
  129. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/nodes_thread_pool.py +0 -0
  130. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/progress.py +0 -0
  131. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/py.typed +0 -0
  132. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/__init__.py +0 -0
  133. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/batch.py +0 -0
  134. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/dispatch.py +0 -0
  135. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/metrics.py +0 -0
  136. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/params.py +0 -0
  137. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/queue.py +0 -0
  138. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/schema.py +0 -0
  139. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/session.py +0 -0
  140. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/udf.py +0 -0
  141. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/remote/__init__.py +0 -0
  142. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/remote/studio.py +0 -0
  143. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/__init__.py +0 -0
  144. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/default/__init__.py +0 -0
  145. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/default/base.py +0 -0
  146. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/__init__.py +0 -0
  147. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/array.py +0 -0
  148. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/conditional.py +0 -0
  149. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/path.py +0 -0
  150. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/random.py +0 -0
  151. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/string.py +0 -0
  152. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/selectable.py +0 -0
  153. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  154. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/base.py +0 -0
  155. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/types.py +0 -0
  156. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/vector.py +0 -0
  157. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/types.py +0 -0
  158. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/utils.py +0 -0
  159. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/storage.py +0 -0
  160. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/telemetry.py +0 -0
  161. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/torch/__init__.py +0 -0
  162. {datachain-0.3.20 → datachain-0.5.0}/src/datachain/utils.py +0 -0
  163. {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  164. {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/entry_points.txt +0 -0
  165. {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/requires.txt +0 -0
  166. {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/top_level.txt +0 -0
  167. {datachain-0.3.20 → datachain-0.5.0}/tests/__init__.py +0 -0
  168. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/__init__.py +0 -0
  169. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/conftest.py +0 -0
  170. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  171. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  172. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.gitignore +0 -0
  173. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  174. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_datachain.py +0 -0
  175. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_ls.py +0 -0
  176. {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_version.py +0 -0
  177. {datachain-0.3.20 → datachain-0.5.0}/tests/data.py +0 -0
  178. {datachain-0.3.20 → datachain-0.5.0}/tests/examples/__init__.py +0 -0
  179. {datachain-0.3.20 → datachain-0.5.0}/tests/examples/test_examples.py +0 -0
  180. {datachain-0.3.20 → datachain-0.5.0}/tests/examples/test_wds_e2e.py +0 -0
  181. {datachain-0.3.20 → datachain-0.5.0}/tests/examples/wds_data.py +0 -0
  182. {datachain-0.3.20 → datachain-0.5.0}/tests/func/__init__.py +0 -0
  183. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_client.py +0 -0
  184. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_dataset_query.py +0 -0
  185. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_datasets.py +0 -0
  186. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_feature_pickling.py +0 -0
  187. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_listing.py +0 -0
  188. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_ls.py +0 -0
  189. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_meta_formats.py +0 -0
  190. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_metrics.py +0 -0
  191. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_pytorch.py +0 -0
  192. {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_query.py +0 -0
  193. {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class.py +0 -0
  194. {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class_parallel.py +0 -0
  195. {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  196. {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/name_len_slow.py +0 -0
  197. {datachain-0.3.20 → datachain-0.5.0}/tests/test_cli_e2e.py +0 -0
  198. {datachain-0.3.20 → datachain-0.5.0}/tests/test_query_e2e.py +0 -0
  199. {datachain-0.3.20 → datachain-0.5.0}/tests/test_telemetry.py +0 -0
  200. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/__init__.py +0 -0
  201. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/__init__.py +0 -0
  202. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/conftest.py +0 -0
  203. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_arrow.py +0 -0
  204. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_clip.py +0 -0
  205. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  206. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_feature.py +0 -0
  207. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_feature_utils.py +0 -0
  208. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_file.py +0 -0
  209. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_hf.py +0 -0
  210. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_image.py +0 -0
  211. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_schema.py +0 -0
  212. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_signal_schema.py +0 -0
  213. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  214. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_text.py +0 -0
  215. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_udf_signature.py +0 -0
  216. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_utils.py +0 -0
  217. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_webdataset.py +0 -0
  218. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/__init__.py +0 -0
  219. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  220. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  221. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_array.py +0 -0
  222. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_conditional.py +0 -0
  223. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_path.py +0 -0
  224. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_random.py +0 -0
  225. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_selectable.py +0 -0
  226. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_string.py +0 -0
  227. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_asyn.py +0 -0
  228. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_cache.py +0 -0
  229. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_catalog.py +0 -0
  230. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_catalog_loader.py +0 -0
  231. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_cli_parsing.py +0 -0
  232. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_client.py +0 -0
  233. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_client_s3.py +0 -0
  234. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_data_storage.py +0 -0
  235. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_database_engine.py +0 -0
  236. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_dataset.py +0 -0
  237. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_dispatch.py +0 -0
  238. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_fileslice.py +0 -0
  239. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_id_generator.py +0 -0
  240. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_listing.py +0 -0
  241. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_metastore.py +0 -0
  242. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_module_exports.py +0 -0
  243. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query_metrics.py +0 -0
  244. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query_params.py +0 -0
  245. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_serializer.py +0 -0
  246. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_session.py +0 -0
  247. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_storage.py +0 -0
  248. {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_utils.py +0 -0
  249. {datachain-0.3.20 → datachain-0.5.0}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.6.5'
27
+ rev: 'v0.6.7'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.20
3
+ Version: 0.5.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -15,6 +15,7 @@ validation:
15
15
 
16
16
  theme:
17
17
  name: material
18
+ custom_dir: overrides
18
19
  logo: assets/datachain-white.svg
19
20
  favicon: assets/datachain.svg
20
21
  icon:
@@ -71,6 +72,7 @@ nav:
71
72
  - references/udf.md
72
73
  - references/torch.md
73
74
  - references/sql.md
75
+ - DataChain Website: https://datachain.ai" target="_blank"
74
76
 
75
77
  markdown_extensions:
76
78
  - abbr
@@ -0,0 +1,12 @@
1
+ {% extends "base.html" %}
2
+
3
+ {% block scripts %}
4
+
5
+ {{ super() }}
6
+
7
+ <script type="text/javascript">
8
+ !function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
9
+ </script>
10
+
11
+
12
+ {% endblock %}
@@ -979,7 +979,6 @@ class Catalog:
979
979
  script_output="",
980
980
  create_rows_table=True,
981
981
  job_id: Optional[str] = None,
982
- is_job_result: bool = False,
983
982
  ) -> DatasetRecord:
984
983
  """
985
984
  Creates dataset version if it doesn't exist.
@@ -1001,7 +1000,6 @@ class Catalog:
1001
1000
  script_output=script_output,
1002
1001
  schema=schema,
1003
1002
  job_id=job_id,
1004
- is_job_result=is_job_result,
1005
1003
  ignore_if_exists=True,
1006
1004
  )
1007
1005
 
@@ -1211,7 +1209,6 @@ class Catalog:
1211
1209
  size=dataset_version.size,
1212
1210
  preview=dataset_version.preview,
1213
1211
  job_id=dataset_version.job_id,
1214
- is_job_result=dataset_version.is_job_result,
1215
1212
  )
1216
1213
  # to avoid re-creating rows table, we are just renaming it for a new version
1217
1214
  # of target dataset
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Optional, Union
12
12
 
13
13
  import shtab
14
14
 
15
- from datachain import utils
15
+ from datachain import Session, utils
16
16
  from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
17
17
  from datachain.lib.dc import DataChain
18
18
  from datachain.telemetry import telemetry
@@ -770,7 +770,8 @@ def show(
770
770
  show_records(records, collapse_columns=not no_collapse)
771
771
  if schema and dataset_version.feature_schema:
772
772
  print("\nSchema:")
773
- dc = DataChain(name=name, version=version, catalog=catalog)
773
+ session = Session.get(catalog=catalog)
774
+ dc = DataChain.from_dataset(name=name, version=version, session=session)
774
775
  dc.print_schema()
775
776
 
776
777
 
@@ -15,7 +15,6 @@ from uuid import uuid4
15
15
  from sqlalchemy import (
16
16
  JSON,
17
17
  BigInteger,
18
- Boolean,
19
18
  Column,
20
19
  DateTime,
21
20
  ForeignKey,
@@ -228,7 +227,7 @@ class AbstractMetastore(ABC, Serializable):
228
227
  self,
229
228
  dataset: DatasetRecord,
230
229
  version: int,
231
- status: int = DatasetStatus.CREATED,
230
+ status: int,
232
231
  sources: str = "",
233
232
  feature_schema: Optional[dict] = None,
234
233
  query_script: str = "",
@@ -243,7 +242,6 @@ class AbstractMetastore(ABC, Serializable):
243
242
  size: Optional[int] = None,
244
243
  preview: Optional[list[dict]] = None,
245
244
  job_id: Optional[str] = None,
246
- is_job_result: bool = False,
247
245
  ) -> DatasetRecord:
248
246
  """Creates new dataset version."""
249
247
 
@@ -449,7 +447,6 @@ class AbstractDBMetastore(AbstractMetastore):
449
447
  Column("name", Text, nullable=False),
450
448
  Column("description", Text),
451
449
  Column("labels", JSON, nullable=True),
452
- Column("shadow", Boolean, nullable=False),
453
450
  Column("status", Integer, nullable=False),
454
451
  Column("feature_schema", JSON, nullable=True),
455
452
  Column("created_at", DateTime(timezone=True)),
@@ -482,8 +479,11 @@ class AbstractDBMetastore(AbstractMetastore):
482
479
  nullable=False,
483
480
  ),
484
481
  Column("version", Integer, nullable=False),
485
- # adding default for now until we fully remove shadow datasets
486
- Column("status", Integer, nullable=False, default=DatasetStatus.COMPLETE),
482
+ Column(
483
+ "status",
484
+ Integer,
485
+ nullable=False,
486
+ ),
487
487
  Column("feature_schema", JSON, nullable=True),
488
488
  Column("created_at", DateTime(timezone=True)),
489
489
  Column("finished_at", DateTime(timezone=True)),
@@ -497,7 +497,6 @@ class AbstractDBMetastore(AbstractMetastore):
497
497
  Column("query_script", Text, nullable=False, default=""),
498
498
  Column("schema", JSON, nullable=True),
499
499
  Column("job_id", Text, nullable=True),
500
- Column("is_job_result", Boolean, nullable=False, default=False),
501
500
  UniqueConstraint("dataset_id", "version"),
502
501
  ]
503
502
 
@@ -971,7 +970,6 @@ class AbstractDBMetastore(AbstractMetastore):
971
970
  # TODO abstract this method and add registered = True based on kwargs
972
971
  query = self._datasets_insert().values(
973
972
  name=name,
974
- shadow=False,
975
973
  status=status,
976
974
  feature_schema=json.dumps(feature_schema or {}),
977
975
  created_at=datetime.now(timezone.utc),
@@ -994,7 +992,7 @@ class AbstractDBMetastore(AbstractMetastore):
994
992
  self,
995
993
  dataset: DatasetRecord,
996
994
  version: int,
997
- status: int = DatasetStatus.CREATED,
995
+ status: int,
998
996
  sources: str = "",
999
997
  feature_schema: Optional[dict] = None,
1000
998
  query_script: str = "",
@@ -1009,7 +1007,6 @@ class AbstractDBMetastore(AbstractMetastore):
1009
1007
  size: Optional[int] = None,
1010
1008
  preview: Optional[list[dict]] = None,
1011
1009
  job_id: Optional[str] = None,
1012
- is_job_result: bool = False,
1013
1010
  conn=None,
1014
1011
  ) -> DatasetRecord:
1015
1012
  """Creates new dataset version."""
@@ -1021,7 +1018,7 @@ class AbstractDBMetastore(AbstractMetastore):
1021
1018
  query = self._datasets_versions_insert().values(
1022
1019
  dataset_id=dataset.id,
1023
1020
  version=version,
1024
- status=status, # for now until we remove shadow datasets
1021
+ status=status,
1025
1022
  feature_schema=json.dumps(feature_schema or {}),
1026
1023
  created_at=created_at or datetime.now(timezone.utc),
1027
1024
  finished_at=finished_at,
@@ -1035,7 +1032,6 @@ class AbstractDBMetastore(AbstractMetastore):
1035
1032
  size=size,
1036
1033
  preview=json.dumps(preview or []),
1037
1034
  job_id=job_id or os.getenv("DATACHAIN_JOB_ID"),
1038
- is_job_result=is_job_result,
1039
1035
  )
1040
1036
  if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
1041
1037
  # SQLite and PostgreSQL both support 'on_conflict_do_nothing',
@@ -919,9 +919,7 @@ class AbstractWarehouse(ABC, Serializable):
919
919
  def is_temp_table_name(self, name: str) -> bool:
920
920
  """Returns if the given table name refers to a temporary
921
921
  or no longer needed table."""
922
- return name.startswith(
923
- (self.TMP_TABLE_NAME_PREFIX, self.UDF_TABLE_NAME_PREFIX, "ds_shadow_")
924
- ) or name.endswith("_shadow")
922
+ return name.startswith((self.TMP_TABLE_NAME_PREFIX, self.UDF_TABLE_NAME_PREFIX))
925
923
 
926
924
  def get_temp_table_names(self) -> list[str]:
927
925
  return [
@@ -179,7 +179,6 @@ class DatasetVersion:
179
179
  sources: str = ""
180
180
  query_script: str = ""
181
181
  job_id: Optional[str] = None
182
- is_job_result: bool = False
183
182
 
184
183
  @classmethod
185
184
  def parse( # noqa: PLR0913
@@ -201,7 +200,6 @@ class DatasetVersion:
201
200
  sources: str = "",
202
201
  query_script: str = "",
203
202
  job_id: Optional[str] = None,
204
- is_job_result: bool = False,
205
203
  ):
206
204
  return cls(
207
205
  id,
@@ -221,7 +219,6 @@ class DatasetVersion:
221
219
  sources,
222
220
  query_script,
223
221
  job_id,
224
- is_job_result,
225
222
  )
226
223
 
227
224
  def __eq__(self, other):
@@ -270,7 +267,6 @@ class DatasetRecord:
270
267
  name: str
271
268
  description: Optional[str]
272
269
  labels: list[str]
273
- shadow: bool
274
270
  schema: dict[str, Union[SQLType, type[SQLType]]]
275
271
  feature_schema: dict
276
272
  versions: list[DatasetVersion]
@@ -299,7 +295,6 @@ class DatasetRecord:
299
295
  name: str,
300
296
  description: Optional[str],
301
297
  labels: str,
302
- shadow: int,
303
298
  status: int,
304
299
  feature_schema: Optional[str],
305
300
  created_at: datetime,
@@ -327,7 +322,6 @@ class DatasetRecord:
327
322
  version_query_script: Optional[str],
328
323
  version_schema: str,
329
324
  version_job_id: Optional[str] = None,
330
- version_is_job_result: bool = False,
331
325
  ) -> "DatasetRecord":
332
326
  labels_lst: list[str] = json.loads(labels) if labels else []
333
327
  schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
@@ -353,7 +347,6 @@ class DatasetRecord:
353
347
  version_sources, # type: ignore[arg-type]
354
348
  version_query_script, # type: ignore[arg-type]
355
349
  version_job_id,
356
- version_is_job_result,
357
350
  )
358
351
 
359
352
  return cls(
@@ -361,7 +354,6 @@ class DatasetRecord:
361
354
  name,
362
355
  description,
363
356
  labels_lst,
364
- bool(shadow),
365
357
  cls.parse_schema(schema_dct), # type: ignore[arg-type]
366
358
  json.loads(feature_schema) if feature_schema else {},
367
359
  [dataset_version],