datachain 0.8.9__tar.gz → 0.8.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (306) hide show
  1. {datachain-0.8.9 → datachain-0.8.10}/PKG-INFO +1 -1
  2. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/__init__.py +2 -3
  3. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/__init__.py +20 -15
  4. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/job.py +1 -1
  5. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/studio.py +29 -29
  6. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/utils.py +1 -1
  7. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/utils.py +1 -1
  8. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/sqlite.py +38 -7
  9. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/warehouse.py +2 -2
  10. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/python_to_sql.py +15 -3
  11. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/dc.py +3 -1
  12. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/dataset.py +8 -4
  13. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/remote/studio.py +2 -2
  14. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/base.py +35 -14
  15. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/studio.py +7 -7
  16. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/PKG-INFO +1 -1
  17. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/SOURCES.txt +1 -0
  18. {datachain-0.8.9 → datachain-0.8.10}/tests/test_cli_studio.py +9 -9
  19. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain_merge.py +48 -1
  20. datachain-0.8.10/tests/unit/lib/test_python_to_sql.py +45 -0
  21. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_utils.py +3 -19
  22. {datachain-0.8.9 → datachain-0.8.10}/.cruft.json +0 -0
  23. {datachain-0.8.9 → datachain-0.8.10}/.gitattributes +0 -0
  24. {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  25. {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  26. {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  27. {datachain-0.8.9 → datachain-0.8.10}/.github/codecov.yaml +0 -0
  28. {datachain-0.8.9 → datachain-0.8.10}/.github/dependabot.yml +0 -0
  29. {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/benchmarks.yml +0 -0
  30. {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/release.yml +0 -0
  31. {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/tests-studio.yml +0 -0
  32. {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/tests.yml +0 -0
  33. {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/update-template.yaml +0 -0
  34. {datachain-0.8.9 → datachain-0.8.10}/.gitignore +0 -0
  35. {datachain-0.8.9 → datachain-0.8.10}/.pre-commit-config.yaml +0 -0
  36. {datachain-0.8.9 → datachain-0.8.10}/CODE_OF_CONDUCT.rst +0 -0
  37. {datachain-0.8.9 → datachain-0.8.10}/LICENSE +0 -0
  38. {datachain-0.8.9 → datachain-0.8.10}/README.rst +0 -0
  39. {datachain-0.8.9 → datachain-0.8.10}/docs/assets/captioned_cartoons.png +0 -0
  40. {datachain-0.8.9 → datachain-0.8.10}/docs/assets/datachain-white.svg +0 -0
  41. {datachain-0.8.9 → datachain-0.8.10}/docs/assets/datachain.svg +0 -0
  42. {datachain-0.8.9 → datachain-0.8.10}/docs/contributing.md +0 -0
  43. {datachain-0.8.9 → datachain-0.8.10}/docs/css/github-permalink-style.css +0 -0
  44. {datachain-0.8.9 → datachain-0.8.10}/docs/examples.md +0 -0
  45. {datachain-0.8.9 → datachain-0.8.10}/docs/index.md +0 -0
  46. {datachain-0.8.9 → datachain-0.8.10}/docs/overrides/main.html +0 -0
  47. {datachain-0.8.9 → datachain-0.8.10}/docs/quick-start.md +0 -0
  48. {datachain-0.8.9 → datachain-0.8.10}/docs/references/datachain.md +0 -0
  49. {datachain-0.8.9 → datachain-0.8.10}/docs/references/datatype.md +0 -0
  50. {datachain-0.8.9 → datachain-0.8.10}/docs/references/file.md +0 -0
  51. {datachain-0.8.9 → datachain-0.8.10}/docs/references/index.md +0 -0
  52. {datachain-0.8.9 → datachain-0.8.10}/docs/references/sql.md +0 -0
  53. {datachain-0.8.9 → datachain-0.8.10}/docs/references/torch.md +0 -0
  54. {datachain-0.8.9 → datachain-0.8.10}/docs/references/udf.md +0 -0
  55. {datachain-0.8.9 → datachain-0.8.10}/docs/tutorials.md +0 -0
  56. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  57. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  58. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/openimage-detect.py +0 -0
  59. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-bbox.py +0 -0
  60. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-pose.py +0 -0
  61. {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-segment.py +0 -0
  62. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/common_sql_functions.py +0 -0
  63. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/json-csv-reader.py +0 -0
  64. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/torch-loader.py +0 -0
  65. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/parallel.py +0 -0
  66. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/simple.py +0 -0
  67. {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/stateful.py +0 -0
  68. {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/claude-query.py +0 -0
  69. {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  70. {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  71. {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  72. {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/clip_inference.py +0 -0
  73. {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/hf_pipeline.py +0 -0
  74. {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/openai_image_desc_lib.py +0 -0
  75. {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/wds.py +0 -0
  76. {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/wds_filtered.py +0 -0
  77. {datachain-0.8.9 → datachain-0.8.10}/mkdocs.yml +0 -0
  78. {datachain-0.8.9 → datachain-0.8.10}/noxfile.py +0 -0
  79. {datachain-0.8.9 → datachain-0.8.10}/pyproject.toml +0 -0
  80. {datachain-0.8.9 → datachain-0.8.10}/setup.cfg +0 -0
  81. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/__init__.py +0 -0
  82. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/__main__.py +0 -0
  83. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/asyn.py +0 -0
  84. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cache.py +0 -0
  85. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/__init__.py +0 -0
  86. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/catalog.py +0 -0
  87. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/datasource.py +0 -0
  88. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/loader.py +0 -0
  89. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/__init__.py +0 -0
  90. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/datasets.py +0 -0
  91. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/du.py +0 -0
  92. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/index.py +0 -0
  93. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/ls.py +0 -0
  94. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/misc.py +0 -0
  95. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/query.py +0 -0
  96. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/show.py +0 -0
  97. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/__init__.py +0 -0
  98. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/azure.py +0 -0
  99. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/fileslice.py +0 -0
  100. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/fsspec.py +0 -0
  101. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/gcs.py +0 -0
  102. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/hf.py +0 -0
  103. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/local.py +0 -0
  104. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/s3.py +0 -0
  105. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/config.py +0 -0
  106. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/__init__.py +0 -0
  107. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/db_engine.py +0 -0
  108. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/job.py +0 -0
  109. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/metastore.py +0 -0
  110. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/schema.py +0 -0
  111. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/serializer.py +0 -0
  112. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/dataset.py +0 -0
  113. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/diff/__init__.py +0 -0
  114. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/error.py +0 -0
  115. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/__init__.py +0 -0
  116. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/aggregate.py +0 -0
  117. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/array.py +0 -0
  118. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/base.py +0 -0
  119. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/conditional.py +0 -0
  120. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/func.py +0 -0
  121. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/numeric.py +0 -0
  122. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/path.py +0 -0
  123. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/random.py +0 -0
  124. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/string.py +0 -0
  125. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/window.py +0 -0
  126. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/job.py +0 -0
  127. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/__init__.py +0 -0
  128. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/arrow.py +0 -0
  129. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/clip.py +0 -0
  130. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/__init__.py +0 -0
  131. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/flatten.py +0 -0
  132. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/sql_to_python.py +0 -0
  133. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/unflatten.py +0 -0
  134. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  135. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/data_model.py +0 -0
  136. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/dataset_info.py +0 -0
  137. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/file.py +0 -0
  138. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/hf.py +0 -0
  139. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/image.py +0 -0
  140. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/listing.py +0 -0
  141. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/listing_info.py +0 -0
  142. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/meta_formats.py +0 -0
  143. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/model_store.py +0 -0
  144. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/pytorch.py +0 -0
  145. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/settings.py +0 -0
  146. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/signal_schema.py +0 -0
  147. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/tar.py +0 -0
  148. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/text.py +0 -0
  149. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/udf.py +0 -0
  150. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/udf_signature.py +0 -0
  151. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/utils.py +0 -0
  152. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/vfile.py +0 -0
  153. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/webdataset.py +0 -0
  154. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/webdataset_laion.py +0 -0
  155. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/listing.py +0 -0
  156. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/__init__.py +0 -0
  157. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/bbox.py +0 -0
  158. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/pose.py +0 -0
  159. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/segment.py +0 -0
  160. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/__init__.py +0 -0
  161. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/bbox.py +0 -0
  162. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/pose.py +0 -0
  163. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/segment.py +0 -0
  164. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/node.py +0 -0
  165. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/nodes_fetcher.py +0 -0
  166. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/nodes_thread_pool.py +0 -0
  167. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/progress.py +0 -0
  168. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/py.typed +0 -0
  169. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/__init__.py +0 -0
  170. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/batch.py +0 -0
  171. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/dispatch.py +0 -0
  172. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/metrics.py +0 -0
  173. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/params.py +0 -0
  174. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/queue.py +0 -0
  175. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/schema.py +0 -0
  176. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/session.py +0 -0
  177. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/udf.py +0 -0
  178. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/utils.py +0 -0
  179. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/remote/__init__.py +0 -0
  180. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/__init__.py +0 -0
  181. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/default/__init__.py +0 -0
  182. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/default/base.py +0 -0
  183. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/__init__.py +0 -0
  184. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/aggregate.py +0 -0
  185. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/array.py +0 -0
  186. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/conditional.py +0 -0
  187. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/numeric.py +0 -0
  188. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/path.py +0 -0
  189. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/random.py +0 -0
  190. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/string.py +0 -0
  191. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/selectable.py +0 -0
  192. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/__init__.py +0 -0
  193. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/types.py +0 -0
  194. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/vector.py +0 -0
  195. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/types.py +0 -0
  196. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/utils.py +0 -0
  197. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/telemetry.py +0 -0
  198. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/toolkit/__init__.py +0 -0
  199. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/toolkit/split.py +0 -0
  200. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/torch/__init__.py +0 -0
  201. {datachain-0.8.9 → datachain-0.8.10}/src/datachain/utils.py +0 -0
  202. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/dependency_links.txt +0 -0
  203. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/entry_points.txt +0 -0
  204. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/requires.txt +0 -0
  205. {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/top_level.txt +0 -0
  206. {datachain-0.8.9 → datachain-0.8.10}/tests/__init__.py +0 -0
  207. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/__init__.py +0 -0
  208. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/conftest.py +0 -0
  209. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  210. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.dvc/config +0 -0
  211. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.gitignore +0 -0
  212. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  213. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_datachain.py +0 -0
  214. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_ls.py +0 -0
  215. {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_version.py +0 -0
  216. {datachain-0.8.9 → datachain-0.8.10}/tests/conftest.py +0 -0
  217. {datachain-0.8.9 → datachain-0.8.10}/tests/data.py +0 -0
  218. {datachain-0.8.9 → datachain-0.8.10}/tests/examples/__init__.py +0 -0
  219. {datachain-0.8.9 → datachain-0.8.10}/tests/examples/test_examples.py +0 -0
  220. {datachain-0.8.9 → datachain-0.8.10}/tests/examples/test_wds_e2e.py +0 -0
  221. {datachain-0.8.9 → datachain-0.8.10}/tests/examples/wds_data.py +0 -0
  222. {datachain-0.8.9 → datachain-0.8.10}/tests/func/__init__.py +0 -0
  223. {datachain-0.8.9 → datachain-0.8.10}/tests/func/fake-service-account-credentials.json +0 -0
  224. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_catalog.py +0 -0
  225. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_client.py +0 -0
  226. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_datachain.py +0 -0
  227. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_dataset_query.py +0 -0
  228. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_datasets.py +0 -0
  229. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_feature_pickling.py +0 -0
  230. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_listing.py +0 -0
  231. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_ls.py +0 -0
  232. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_meta_formats.py +0 -0
  233. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_metrics.py +0 -0
  234. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_pull.py +0 -0
  235. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_pytorch.py +0 -0
  236. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_query.py +0 -0
  237. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_session.py +0 -0
  238. {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_toolkit.py +0 -0
  239. {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class.py +0 -0
  240. {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_exception.py +0 -0
  241. {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_parallel.py +0 -0
  242. {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  243. {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/name_len_slow.py +0 -0
  244. {datachain-0.8.9 → datachain-0.8.10}/tests/test_atomicity.py +0 -0
  245. {datachain-0.8.9 → datachain-0.8.10}/tests/test_cli_e2e.py +0 -0
  246. {datachain-0.8.9 → datachain-0.8.10}/tests/test_query_e2e.py +0 -0
  247. {datachain-0.8.9 → datachain-0.8.10}/tests/test_telemetry.py +0 -0
  248. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/__init__.py +0 -0
  249. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/__init__.py +0 -0
  250. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/conftest.py +0 -0
  251. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_arrow.py +0 -0
  252. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_clip.py +0 -0
  253. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain.py +0 -0
  254. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  255. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_diff.py +0 -0
  256. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_feature.py +0 -0
  257. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_feature_utils.py +0 -0
  258. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_file.py +0 -0
  259. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_hf.py +0 -0
  260. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_image.py +0 -0
  261. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_listing_info.py +0 -0
  262. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_models.py +0 -0
  263. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_schema.py +0 -0
  264. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_signal_schema.py +0 -0
  265. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_sql_to_python.py +0 -0
  266. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_text.py +0 -0
  267. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_udf_signature.py +0 -0
  268. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_webdataset.py +0 -0
  269. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/__init__.py +0 -0
  270. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/__init__.py +0 -0
  271. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/test_types.py +0 -0
  272. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/test_utils.py +0 -0
  273. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_array.py +0 -0
  274. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_conditional.py +0 -0
  275. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_path.py +0 -0
  276. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_random.py +0 -0
  277. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_selectable.py +0 -0
  278. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_string.py +0 -0
  279. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_asyn.py +0 -0
  280. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_cache.py +0 -0
  281. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_catalog.py +0 -0
  282. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_catalog_loader.py +0 -0
  283. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_cli_parsing.py +0 -0
  284. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client.py +0 -0
  285. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client_gcs.py +0 -0
  286. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client_s3.py +0 -0
  287. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_config.py +0 -0
  288. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_data_storage.py +0 -0
  289. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_database_engine.py +0 -0
  290. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_dataset.py +0 -0
  291. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_diff.py +0 -0
  292. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_dispatch.py +0 -0
  293. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_fileslice.py +0 -0
  294. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_func.py +0 -0
  295. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_listing.py +0 -0
  296. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_metastore.py +0 -0
  297. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_module_exports.py +0 -0
  298. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_pytorch.py +0 -0
  299. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query.py +0 -0
  300. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query_metrics.py +0 -0
  301. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query_params.py +0 -0
  302. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_serializer.py +0 -0
  303. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_session.py +0 -0
  304. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_utils.py +0 -0
  305. {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_warehouse.py +0 -0
  306. {datachain-0.8.9 → datachain-0.8.10}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.9
3
+ Version: 0.8.10
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -48,7 +48,6 @@ def main(argv: Optional[list[str]] = None) -> int:
48
48
  logger.setLevel(logging_level)
49
49
 
50
50
  client_config = {
51
- "aws_endpoint_url": args.aws_endpoint_url,
52
51
  "anon": args.anon,
53
52
  }
54
53
 
@@ -73,7 +72,7 @@ def main(argv: Optional[list[str]] = None) -> int:
73
72
 
74
73
  def handle_command(args, catalog, client_config) -> int:
75
74
  """Handle the different CLI commands."""
76
- from datachain.studio import process_jobs_args, process_studio_cli_args
75
+ from datachain.studio import process_auth_cli_args, process_jobs_args
77
76
 
78
77
  command_handlers = {
79
78
  "cp": lambda: handle_cp_command(args, catalog),
@@ -89,7 +88,7 @@ def handle_command(args, catalog, client_config) -> int:
89
88
  "query": lambda: handle_query_command(args, catalog),
90
89
  "clear-cache": lambda: clear_cache(catalog),
91
90
  "gc": lambda: garbage_collect(catalog),
92
- "studio": lambda: process_studio_cli_args(args),
91
+ "auth": lambda: process_auth_cli_args(args),
93
92
  "job": lambda: process_jobs_args(args),
94
93
  }
95
94
 
@@ -7,7 +7,7 @@ import shtab
7
7
  from datachain.cli.utils import BooleanOptionalAction, KeyValueArgs
8
8
 
9
9
  from .job import add_jobs_parser
10
- from .studio import add_studio_parser
10
+ from .studio import add_auth_parser
11
11
  from .utils import FIND_COLUMNS, add_show_args, add_sources_arg, find_columns_type
12
12
 
13
13
 
@@ -26,24 +26,25 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
26
26
 
27
27
  parent_parser = ArgumentParser(add_help=False)
28
28
  parent_parser.add_argument(
29
- "--aws-endpoint-url",
30
- type=str,
31
- help="AWS endpoint URL",
32
- )
33
- parent_parser.add_argument(
34
- "--anon",
35
- action="store_true",
36
- help="anon flag for remote storage (like awscli's --no-sign-request)",
29
+ "-v", "--verbose", action="count", default=0, help="Be verbose"
37
30
  )
38
31
  parent_parser.add_argument(
39
- "-u", "--update", action="count", default=0, help="Update cache"
32
+ "-q", "--quiet", action="count", default=0, help="Be quiet"
40
33
  )
34
+
41
35
  parent_parser.add_argument(
42
- "-v", "--verbose", action="count", default=0, help="Be verbose"
36
+ "--anon",
37
+ action="store_true",
38
+ help="Use anonymous access to storage",
43
39
  )
44
40
  parent_parser.add_argument(
45
- "-q", "--quiet", action="count", default=0, help="Be quiet"
41
+ "-u",
42
+ "--update",
43
+ action="count",
44
+ default=0,
45
+ help="Update cached list of files for the sources",
46
46
  )
47
+
47
48
  parent_parser.add_argument(
48
49
  "--debug-sql",
49
50
  action="store_true",
@@ -67,7 +68,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
67
68
  "cp", parents=[parent_parser], description="Copy data files from the cloud."
68
69
  )
69
70
  add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
70
- parse_cp.add_argument("output", type=str, help="Output")
71
+ parse_cp.add_argument(
72
+ "output", type=str, help="Path to a directory or file to put data to"
73
+ )
71
74
  parse_cp.add_argument(
72
75
  "-f",
73
76
  "--force",
@@ -94,7 +97,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
94
97
  "clone", parents=[parent_parser], description="Copy data files from the cloud."
95
98
  )
96
99
  add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
97
- parse_clone.add_argument("output", type=str, help="Output")
100
+ parse_clone.add_argument(
101
+ "output", type=str, help="Path to a directory or file to put data to"
102
+ )
98
103
  parse_clone.add_argument(
99
104
  "-f",
100
105
  "--force",
@@ -123,7 +128,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
123
128
  help="Do not copy files, just create a dataset",
124
129
  )
125
130
 
126
- add_studio_parser(subp, parent_parser)
131
+ add_auth_parser(subp, parent_parser)
127
132
  add_jobs_parser(subp, parent_parser)
128
133
 
129
134
  datasets_parser = subp.add_parser(
@@ -6,7 +6,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
6
6
  )
7
7
  jobs_subparser = jobs_parser.add_subparsers(
8
8
  dest="cmd",
9
- help="Use `datachain studio CMD --help` to display command-specific help",
9
+ help="Use `datachain auth CMD --help` to display command-specific help",
10
10
  )
11
11
 
12
12
  studio_run_help = "Run a job in Studio"
@@ -1,31 +1,31 @@
1
- def add_studio_parser(subparsers, parent_parser) -> None:
2
- studio_help = "Manage Studio authentication"
3
- studio_description = (
1
+ def add_auth_parser(subparsers, parent_parser) -> None:
2
+ auth_help = "Manage Studio authentication"
3
+ auth_description = (
4
4
  "Manage authentication and settings for Studio. "
5
5
  "Configure tokens for sharing datasets and using Studio features."
6
6
  )
7
7
 
8
- studio_parser = subparsers.add_parser(
9
- "studio",
8
+ auth_parser = subparsers.add_parser(
9
+ "auth",
10
10
  parents=[parent_parser],
11
- description=studio_description,
12
- help=studio_help,
11
+ description=auth_description,
12
+ help=auth_help,
13
13
  )
14
- studio_subparser = studio_parser.add_subparsers(
14
+ auth_subparser = auth_parser.add_subparsers(
15
15
  dest="cmd",
16
- help="Use `datachain studio CMD --help` to display command-specific help",
16
+ help="Use `datachain auth CMD --help` to display command-specific help",
17
17
  )
18
18
 
19
- studio_login_help = "Authenticate with Studio"
20
- studio_login_description = (
19
+ auth_login_help = "Authenticate with Studio"
20
+ auth_login_description = (
21
21
  "Authenticate with Studio using default scopes. "
22
22
  "A random name will be assigned as the token name if not specified."
23
23
  )
24
- login_parser = studio_subparser.add_parser(
24
+ login_parser = auth_subparser.add_parser(
25
25
  "login",
26
26
  parents=[parent_parser],
27
- description=studio_login_description,
28
- help=studio_login_help,
27
+ description=auth_login_description,
28
+ help=auth_login_help,
29
29
  )
30
30
 
31
31
  login_parser.add_argument(
@@ -58,26 +58,26 @@ def add_studio_parser(subparsers, parent_parser) -> None:
58
58
  help="Use code-based authentication without browser",
59
59
  )
60
60
 
61
- studio_logout_help = "Log out from Studio"
62
- studio_logout_description = (
61
+ auth_logout_help = "Log out from Studio"
62
+ auth_logout_description = (
63
63
  "Remove the Studio authentication token from global config."
64
64
  )
65
65
 
66
- studio_subparser.add_parser(
66
+ auth_subparser.add_parser(
67
67
  "logout",
68
68
  parents=[parent_parser],
69
- description=studio_logout_description,
70
- help=studio_logout_help,
69
+ description=auth_logout_description,
70
+ help=auth_logout_help,
71
71
  )
72
72
 
73
- studio_team_help = "Set default team for Studio operations"
74
- studio_team_description = "Set the default team for Studio operations."
73
+ auth_team_help = "Set default team for Studio operations"
74
+ auth_team_description = "Set the default team for Studio operations."
75
75
 
76
- team_parser = studio_subparser.add_parser(
76
+ team_parser = auth_subparser.add_parser(
77
77
  "team",
78
78
  parents=[parent_parser],
79
- description=studio_team_description,
80
- help=studio_team_help,
79
+ description=auth_team_description,
80
+ help=auth_team_help,
81
81
  )
82
82
  team_parser.add_argument(
83
83
  "team_name",
@@ -91,12 +91,12 @@ def add_studio_parser(subparsers, parent_parser) -> None:
91
91
  help="Set team globally for all projects",
92
92
  )
93
93
 
94
- studio_token_help = "View Studio authentication token" # noqa: S105
95
- studio_token_description = "Display the current authentication token for Studio." # noqa: S105
94
+ auth_token_help = "View Studio authentication token" # noqa: S105
95
+ auth_token_description = "Display the current authentication token for Studio." # noqa: S105
96
96
 
97
- studio_subparser.add_parser(
97
+ auth_subparser.add_parser(
98
98
  "token",
99
99
  parents=[parent_parser],
100
- description=studio_token_description,
101
- help=studio_token_help,
100
+ description=auth_token_description,
101
+ help=auth_token_help,
102
102
  )
@@ -30,7 +30,7 @@ def add_sources_arg(parser: ArgumentParser, nargs: Union[str, int] = "+") -> Act
30
30
  "sources",
31
31
  type=str,
32
32
  nargs=nargs,
33
- help="Data sources - paths to cloud storage directories",
33
+ help="Data sources - paths to source storage directories or files",
34
34
  )
35
35
 
36
36
 
@@ -87,7 +87,7 @@ def get_logging_level(args: Namespace) -> int:
87
87
  def determine_flavors(studio: bool, local: bool, all: bool, token: Optional[str]):
88
88
  if studio and not token:
89
89
  raise DataChainError(
90
- "Not logged in to Studio. Log in with 'datachain studio login'."
90
+ "Not logged in to Studio. Log in with 'datachain auth login'."
91
91
  )
92
92
 
93
93
  if local or studio:
@@ -19,6 +19,7 @@ from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
19
19
  from sqlalchemy.dialects import sqlite
20
20
  from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
21
21
  from sqlalchemy.sql import func
22
+ from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList
22
23
  from sqlalchemy.sql.expression import bindparam, cast
23
24
  from sqlalchemy.sql.selectable import Select
24
25
  from tqdm.auto import tqdm
@@ -40,7 +41,6 @@ if TYPE_CHECKING:
40
41
  from sqlalchemy.schema import SchemaItem
41
42
  from sqlalchemy.sql._typing import _FromClauseArgument, _OnClauseArgument
42
43
  from sqlalchemy.sql.elements import ColumnElement
43
- from sqlalchemy.sql.selectable import Join
44
44
  from sqlalchemy.types import TypeEngine
45
45
 
46
46
  from datachain.lib.file import File
@@ -654,16 +654,47 @@ class SQLiteWarehouse(AbstractWarehouse):
654
654
  right: "_FromClauseArgument",
655
655
  onclause: "_OnClauseArgument",
656
656
  inner: bool = True,
657
- ) -> "Join":
657
+ full: bool = False,
658
+ columns=None,
659
+ ) -> "Select":
658
660
  """
659
661
  Join two tables together.
660
662
  """
661
- return sqlalchemy.join(
662
- left,
663
- right,
664
- onclause,
665
- isouter=not inner,
663
+ if not full:
664
+ join_query = sqlalchemy.join(
665
+ left,
666
+ right,
667
+ onclause,
668
+ isouter=not inner,
669
+ )
670
+ return sqlalchemy.select(*columns).select_from(join_query)
671
+
672
+ left_right_join = sqlalchemy.select(*columns).select_from(
673
+ sqlalchemy.join(left, right, onclause, isouter=True)
666
674
  )
675
+ right_left_join = sqlalchemy.select(*columns).select_from(
676
+ sqlalchemy.join(right, left, onclause, isouter=True)
677
+ )
678
+
679
+ def add_left_rows_filter(exp: BinaryExpression):
680
+ """
681
+ Adds filter to right_left_join to remove unmatched left table rows by
682
+ getting column names that need to be NULL from BinaryExpressions in onclause
683
+ """
684
+ return right_left_join.where(
685
+ getattr(left.c, exp.left.name) == None # type: ignore[union-attr] # noqa: E711
686
+ )
687
+
688
+ if isinstance(onclause, BinaryExpression):
689
+ right_left_join = add_left_rows_filter(onclause)
690
+
691
+ if isinstance(onclause, BooleanClauseList):
692
+ for c in onclause.get_children():
693
+ if isinstance(c, BinaryExpression):
694
+ right_left_join = add_left_rows_filter(c)
695
+
696
+ union = sqlalchemy.union(left_right_join, right_left_join).subquery()
697
+ return sqlalchemy.select(*union.c).select_from(union)
667
698
 
668
699
  def create_pre_udf_table(self, query: "Select") -> "Table":
669
700
  """
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
31
31
  _FromClauseArgument,
32
32
  _OnClauseArgument,
33
33
  )
34
- from sqlalchemy.sql.selectable import Join, Select
34
+ from sqlalchemy.sql.selectable import Select
35
35
  from sqlalchemy.types import TypeEngine
36
36
 
37
37
  from datachain.data_storage import schema
@@ -873,7 +873,7 @@ class AbstractWarehouse(ABC, Serializable):
873
873
  right: "_FromClauseArgument",
874
874
  onclause: "_OnClauseArgument",
875
875
  inner: bool = True,
876
- ) -> "Join":
876
+ ) -> "Select":
877
877
  """
878
878
  Join two tables together.
879
879
  """
@@ -52,15 +52,15 @@ def python_to_sql(typ): # noqa: PLR0911
52
52
 
53
53
  args = get_args(typ)
54
54
  if inspect.isclass(orig) and (issubclass(list, orig) or issubclass(tuple, orig)):
55
- if args is None or len(args) != 1:
55
+ if args is None:
56
56
  raise TypeError(f"Cannot resolve type '{typ}' for flattening features")
57
57
 
58
58
  args0 = args[0]
59
59
  if ModelStore.is_pydantic(args0):
60
60
  return Array(JSON())
61
61
 
62
- next_type = python_to_sql(args0)
63
- return Array(next_type)
62
+ list_type = list_of_args_to_type(args)
63
+ return Array(list_type)
64
64
 
65
65
  if orig is Annotated:
66
66
  # Ignoring annotations
@@ -82,6 +82,18 @@ def python_to_sql(typ): # noqa: PLR0911
82
82
  raise TypeError(f"Cannot recognize type {typ}")
83
83
 
84
84
 
85
+ def list_of_args_to_type(args) -> SQLType:
86
+ first_type = python_to_sql(args[0])
87
+ for next_arg in args[1:]:
88
+ try:
89
+ next_type = python_to_sql(next_arg)
90
+ if next_type != first_type:
91
+ return JSON()
92
+ except TypeError:
93
+ return JSON()
94
+ return first_type
95
+
96
+
85
97
  def _is_json_inside_union(orig, args) -> bool:
86
98
  if orig == Union and len(args) >= 2:
87
99
  # List in JSON: Union[dict, list[dict]]
@@ -1332,6 +1332,7 @@ class DataChain:
1332
1332
  on: Union[MergeColType, Sequence[MergeColType]],
1333
1333
  right_on: Optional[Union[MergeColType, Sequence[MergeColType]]] = None,
1334
1334
  inner=False,
1335
+ full=False,
1335
1336
  rname="right_",
1336
1337
  ) -> "Self":
1337
1338
  """Merge two chains based on the specified criteria.
@@ -1345,6 +1346,7 @@ class DataChain:
1345
1346
  right_on: Optional predicate or list of Predicates for the `right_ds`
1346
1347
  to join.
1347
1348
  inner (bool): Whether to run inner join or outer join.
1349
+ full (bool): Whether to run full outer join.
1348
1350
  rname (str): Name prefix for conflicting signal names.
1349
1351
 
1350
1352
  Examples:
@@ -1419,7 +1421,7 @@ class DataChain:
1419
1421
  )
1420
1422
 
1421
1423
  query = self._query.join(
1422
- right_ds._query, sqlalchemy.and_(*ops), inner, rname + "{name}"
1424
+ right_ds._query, sqlalchemy.and_(*ops), inner, full, rname + "{name}"
1423
1425
  )
1424
1426
  query.feature_schema = None
1425
1427
  ds = self._evolve(query=query)
@@ -875,6 +875,7 @@ class SQLJoin(Step):
875
875
  query2: "DatasetQuery"
876
876
  predicates: Union[JoinPredicateType, tuple[JoinPredicateType, ...]]
877
877
  inner: bool
878
+ full: bool
878
879
  rname: str
879
880
 
880
881
  def get_query(self, dq: "DatasetQuery", temp_tables: list[str]) -> sa.Subquery:
@@ -977,14 +978,14 @@ class SQLJoin(Step):
977
978
  self.validate_expression(join_expression, q1, q2)
978
979
 
979
980
  def q(*columns):
980
- join_query = self.catalog.warehouse.join(
981
+ return self.catalog.warehouse.join(
981
982
  q1,
982
983
  q2,
983
984
  join_expression,
984
985
  inner=self.inner,
986
+ full=self.full,
987
+ columns=columns,
985
988
  )
986
- return sqlalchemy.select(*columns).select_from(join_query)
987
- # return sqlalchemy.select(*subquery.c).select_from(subquery)
988
989
 
989
990
  return step_result(
990
991
  q,
@@ -1489,6 +1490,7 @@ class DatasetQuery:
1489
1490
  dataset_query: "DatasetQuery",
1490
1491
  predicates: Union[JoinPredicateType, Sequence[JoinPredicateType]],
1491
1492
  inner=False,
1493
+ full=False,
1492
1494
  rname="{name}_right",
1493
1495
  ) -> "Self":
1494
1496
  left = self.clone(new_table=False)
@@ -1504,7 +1506,9 @@ class DatasetQuery:
1504
1506
  if isinstance(predicates, (str, ColumnClause, ColumnElement))
1505
1507
  else tuple(predicates)
1506
1508
  )
1507
- new_query.steps = [SQLJoin(self.catalog, left, right, predicates, inner, rname)]
1509
+ new_query.steps = [
1510
+ SQLJoin(self.catalog, left, right, predicates, inner, full, rname)
1511
+ ]
1508
1512
  return new_query
1509
1513
 
1510
1514
  @detach
@@ -75,7 +75,7 @@ class StudioClient:
75
75
 
76
76
  if not token:
77
77
  raise DataChainError(
78
- "Studio token is not set. Use `datachain studio login` "
78
+ "Studio token is not set. Use `datachain auth login` "
79
79
  "or environment variable `DVC_STUDIO_TOKEN` to set it."
80
80
  )
81
81
 
@@ -105,7 +105,7 @@ class StudioClient:
105
105
  if not team:
106
106
  raise DataChainError(
107
107
  "Studio team is not set. "
108
- "Use `datachain studio team <team_name>` "
108
+ "Use `datachain auth team <team_name>` "
109
109
  "or environment variable `DVC_STUDIO_TEAM` to set it."
110
110
  "You can also set it in the config file as team under studio."
111
111
  )
@@ -4,6 +4,7 @@ import sqlite3
4
4
  import warnings
5
5
  from collections.abc import Iterable
6
6
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
7
+ from functools import cache
7
8
  from types import MappingProxyType
8
9
  from typing import Callable, Optional
9
10
 
@@ -526,24 +527,44 @@ def compile_collect(element, compiler, **kwargs):
526
527
  return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
527
528
 
528
529
 
529
- def load_usearch_extension(conn: sqlite3.Connection) -> bool:
530
+ @cache
531
+ def usearch_sqlite_path() -> Optional[str]:
530
532
  try:
531
- # usearch is part of the vector optional dependencies
532
- # we use the extension's cosine and euclidean distance functions
533
- from usearch import sqlite_path
533
+ import usearch
534
+ except ImportError:
535
+ return None
534
536
 
535
- conn.enable_load_extension(True)
537
+ with warnings.catch_warnings():
538
+ # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
+ # and, sometimes fail to download the binary in other platforms
540
+ # triggering UserWarning.
536
541
 
537
- with warnings.catch_warnings():
538
- # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
- # and, sometimes fail to download the binary in other platforms
540
- # triggering UserWarning.
542
+ warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
541
543
 
542
- warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
543
- conn.load_extension(sqlite_path())
544
+ try:
545
+ return usearch.sqlite_path()
546
+ except FileNotFoundError:
547
+ return None
544
548
 
545
- conn.enable_load_extension(False)
546
- return True
547
549
 
548
- except Exception: # noqa: BLE001
550
+ def load_usearch_extension(conn: sqlite3.Connection) -> bool:
551
+ # usearch is part of the vector optional dependencies
552
+ # we use the extension's cosine and euclidean distance functions
553
+ ext_path = usearch_sqlite_path()
554
+ if ext_path is None:
555
+ return False
556
+
557
+ try:
558
+ conn.enable_load_extension(True)
559
+ except AttributeError:
560
+ # sqlite3 module is not built with loadable extension support by default.
561
+ return False
562
+
563
+ try:
564
+ conn.load_extension(ext_path)
565
+ except sqlite3.OperationalError:
549
566
  return False
567
+ else:
568
+ return True
569
+ finally:
570
+ conn.enable_load_extension(False)
@@ -47,7 +47,7 @@ def process_jobs_args(args: "Namespace"):
47
47
  raise DataChainError(f"Unknown command '{args.cmd}'.")
48
48
 
49
49
 
50
- def process_studio_cli_args(args: "Namespace"):
50
+ def process_auth_cli_args(args: "Namespace"):
51
51
  if args.cmd is None:
52
52
  print(
53
53
  f"Use 'datachain {args.command} --help' to see available options",
@@ -95,7 +95,7 @@ def login(args: "Namespace"):
95
95
  raise DataChainError(
96
96
  "Token already exists. "
97
97
  "To login with a different token, "
98
- "logout using `datachain studio logout`."
98
+ "logout using `datachain auth logout`."
99
99
  )
100
100
 
101
101
  open_browser = not args.no_open
@@ -121,12 +121,12 @@ def logout():
121
121
  token = conf.get("studio", {}).get("token")
122
122
  if not token:
123
123
  raise DataChainError(
124
- "Not logged in to Studio. Log in with 'datachain studio login'."
124
+ "Not logged in to Studio. Log in with 'datachain auth login'."
125
125
  )
126
126
 
127
127
  del conf["studio"]["token"]
128
128
 
129
- print("Logged out from Studio. (you can log back in with 'datachain studio login')")
129
+ print("Logged out from Studio. (you can log back in with 'datachain auth login')")
130
130
 
131
131
 
132
132
  def token():
@@ -134,7 +134,7 @@ def token():
134
134
  token = config.get("token")
135
135
  if not token:
136
136
  raise DataChainError(
137
- "Not logged in to Studio. Log in with 'datachain studio login'."
137
+ "Not logged in to Studio. Log in with 'datachain auth login'."
138
138
  )
139
139
 
140
140
  print(token)
@@ -299,7 +299,7 @@ def cancel_job(job_id: str, team_name: Optional[str]):
299
299
  token = Config().read().get("studio", {}).get("token")
300
300
  if not token:
301
301
  raise DataChainError(
302
- "Not logged in to Studio. Log in with 'datachain studio login'."
302
+ "Not logged in to Studio. Log in with 'datachain auth login'."
303
303
  )
304
304
 
305
305
  client = StudioClient(team=team_name)
@@ -314,7 +314,7 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
314
314
  token = Config().read().get("studio", {}).get("token")
315
315
  if not token:
316
316
  raise DataChainError(
317
- "Not logged in to Studio. Log in with 'datachain studio login'."
317
+ "Not logged in to Studio. Log in with 'datachain auth login'."
318
318
  )
319
319
 
320
320
  client = StudioClient(team=team_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.9
3
+ Version: 0.8.10
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -284,6 +284,7 @@ tests/unit/lib/test_hf.py
284
284
  tests/unit/lib/test_image.py
285
285
  tests/unit/lib/test_listing_info.py
286
286
  tests/unit/lib/test_models.py
287
+ tests/unit/lib/test_python_to_sql.py
287
288
  tests/unit/lib/test_schema.py
288
289
  tests/unit/lib/test_signal_schema.py
289
290
  tests/unit/lib/test_sql_to_python.py