datachain 0.7.6__tar.gz → 0.7.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (278) hide show
  1. {datachain-0.7.6/src/datachain.egg-info → datachain-0.7.7}/PKG-INFO +1 -1
  2. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/__init__.py +2 -1
  3. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/func.py +7 -2
  4. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/dc.py +4 -4
  5. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/dataset.py +0 -2
  6. {datachain-0.7.6 → datachain-0.7.7/src/datachain.egg-info}/PKG-INFO +1 -1
  7. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_datachain.py +71 -8
  8. {datachain-0.7.6 → datachain-0.7.7}/.cruft.json +0 -0
  9. {datachain-0.7.6 → datachain-0.7.7}/.gitattributes +0 -0
  10. {datachain-0.7.6 → datachain-0.7.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  11. {datachain-0.7.6 → datachain-0.7.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  12. {datachain-0.7.6 → datachain-0.7.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  13. {datachain-0.7.6 → datachain-0.7.7}/.github/codecov.yaml +0 -0
  14. {datachain-0.7.6 → datachain-0.7.7}/.github/dependabot.yml +0 -0
  15. {datachain-0.7.6 → datachain-0.7.7}/.github/workflows/benchmarks.yml +0 -0
  16. {datachain-0.7.6 → datachain-0.7.7}/.github/workflows/release.yml +0 -0
  17. {datachain-0.7.6 → datachain-0.7.7}/.github/workflows/tests-studio.yml +0 -0
  18. {datachain-0.7.6 → datachain-0.7.7}/.github/workflows/tests.yml +0 -0
  19. {datachain-0.7.6 → datachain-0.7.7}/.github/workflows/update-template.yaml +0 -0
  20. {datachain-0.7.6 → datachain-0.7.7}/.gitignore +0 -0
  21. {datachain-0.7.6 → datachain-0.7.7}/.pre-commit-config.yaml +0 -0
  22. {datachain-0.7.6 → datachain-0.7.7}/CODE_OF_CONDUCT.rst +0 -0
  23. {datachain-0.7.6 → datachain-0.7.7}/CONTRIBUTING.rst +0 -0
  24. {datachain-0.7.6 → datachain-0.7.7}/LICENSE +0 -0
  25. {datachain-0.7.6 → datachain-0.7.7}/README.rst +0 -0
  26. {datachain-0.7.6 → datachain-0.7.7}/docs/assets/captioned_cartoons.png +0 -0
  27. {datachain-0.7.6 → datachain-0.7.7}/docs/assets/datachain-white.svg +0 -0
  28. {datachain-0.7.6 → datachain-0.7.7}/docs/assets/datachain.svg +0 -0
  29. {datachain-0.7.6 → datachain-0.7.7}/docs/index.md +0 -0
  30. {datachain-0.7.6 → datachain-0.7.7}/docs/overrides/main.html +0 -0
  31. {datachain-0.7.6 → datachain-0.7.7}/docs/references/datachain.md +0 -0
  32. {datachain-0.7.6 → datachain-0.7.7}/docs/references/datatype.md +0 -0
  33. {datachain-0.7.6 → datachain-0.7.7}/docs/references/file.md +0 -0
  34. {datachain-0.7.6 → datachain-0.7.7}/docs/references/index.md +0 -0
  35. {datachain-0.7.6 → datachain-0.7.7}/docs/references/sql.md +0 -0
  36. {datachain-0.7.6 → datachain-0.7.7}/docs/references/torch.md +0 -0
  37. {datachain-0.7.6 → datachain-0.7.7}/docs/references/udf.md +0 -0
  38. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  39. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  40. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/openimage-detect.py +0 -0
  41. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/ultralytics-bbox.py +0 -0
  42. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/ultralytics-pose.py +0 -0
  43. {datachain-0.7.6 → datachain-0.7.7}/examples/computer_vision/ultralytics-segment.py +0 -0
  44. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/common_sql_functions.py +0 -0
  45. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/json-csv-reader.py +0 -0
  46. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/torch-loader.py +0 -0
  47. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/udfs/parallel.py +0 -0
  48. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/udfs/simple.py +0 -0
  49. {datachain-0.7.6 → datachain-0.7.7}/examples/get_started/udfs/stateful.py +0 -0
  50. {datachain-0.7.6 → datachain-0.7.7}/examples/llm_and_nlp/claude-query.py +0 -0
  51. {datachain-0.7.6 → datachain-0.7.7}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  52. {datachain-0.7.6 → datachain-0.7.7}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  53. {datachain-0.7.6 → datachain-0.7.7}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  54. {datachain-0.7.6 → datachain-0.7.7}/examples/multimodal/clip_inference.py +0 -0
  55. {datachain-0.7.6 → datachain-0.7.7}/examples/multimodal/hf_pipeline.py +0 -0
  56. {datachain-0.7.6 → datachain-0.7.7}/examples/multimodal/openai_image_desc_lib.py +0 -0
  57. {datachain-0.7.6 → datachain-0.7.7}/examples/multimodal/wds.py +0 -0
  58. {datachain-0.7.6 → datachain-0.7.7}/examples/multimodal/wds_filtered.py +0 -0
  59. {datachain-0.7.6 → datachain-0.7.7}/mkdocs.yml +0 -0
  60. {datachain-0.7.6 → datachain-0.7.7}/noxfile.py +0 -0
  61. {datachain-0.7.6 → datachain-0.7.7}/pyproject.toml +0 -0
  62. {datachain-0.7.6 → datachain-0.7.7}/setup.cfg +0 -0
  63. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/__init__.py +0 -0
  64. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/__main__.py +0 -0
  65. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/asyn.py +0 -0
  66. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/cache.py +0 -0
  67. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/catalog/__init__.py +0 -0
  68. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/catalog/catalog.py +0 -0
  69. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/catalog/datasource.py +0 -0
  70. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/catalog/loader.py +0 -0
  71. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/cli.py +0 -0
  72. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/cli_utils.py +0 -0
  73. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/__init__.py +0 -0
  74. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/azure.py +0 -0
  75. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/fileslice.py +0 -0
  76. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/fsspec.py +0 -0
  77. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/gcs.py +0 -0
  78. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/hf.py +0 -0
  79. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/local.py +0 -0
  80. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/client/s3.py +0 -0
  81. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/config.py +0 -0
  82. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/__init__.py +0 -0
  83. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/db_engine.py +0 -0
  84. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/job.py +0 -0
  85. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/metastore.py +0 -0
  86. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/schema.py +0 -0
  87. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/serializer.py +0 -0
  88. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/sqlite.py +0 -0
  89. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/data_storage/warehouse.py +0 -0
  90. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/dataset.py +0 -0
  91. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/error.py +0 -0
  92. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/aggregate.py +0 -0
  93. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/array.py +0 -0
  94. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/base.py +0 -0
  95. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/conditional.py +0 -0
  96. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/path.py +0 -0
  97. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/random.py +0 -0
  98. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/string.py +0 -0
  99. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/func/window.py +0 -0
  100. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/job.py +0 -0
  101. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/__init__.py +0 -0
  102. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/arrow.py +0 -0
  103. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/clip.py +0 -0
  104. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/__init__.py +0 -0
  105. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/flatten.py +0 -0
  106. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/python_to_sql.py +0 -0
  107. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/sql_to_python.py +0 -0
  108. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/unflatten.py +0 -0
  109. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  110. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/data_model.py +0 -0
  111. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/dataset_info.py +0 -0
  112. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/file.py +0 -0
  113. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/hf.py +0 -0
  114. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/image.py +0 -0
  115. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/listing.py +0 -0
  116. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/listing_info.py +0 -0
  117. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/meta_formats.py +0 -0
  118. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/model_store.py +0 -0
  119. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/pytorch.py +0 -0
  120. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/settings.py +0 -0
  121. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/signal_schema.py +0 -0
  122. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/tar.py +0 -0
  123. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/text.py +0 -0
  124. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/udf.py +0 -0
  125. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/udf_signature.py +0 -0
  126. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/utils.py +0 -0
  127. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/vfile.py +0 -0
  128. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/webdataset.py +0 -0
  129. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/lib/webdataset_laion.py +0 -0
  130. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/listing.py +0 -0
  131. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/__init__.py +0 -0
  132. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/bbox.py +0 -0
  133. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/pose.py +0 -0
  134. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/segment.py +0 -0
  135. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/ultralytics/__init__.py +0 -0
  136. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/ultralytics/bbox.py +0 -0
  137. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/ultralytics/pose.py +0 -0
  138. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/model/ultralytics/segment.py +0 -0
  139. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/node.py +0 -0
  140. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/nodes_fetcher.py +0 -0
  141. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/nodes_thread_pool.py +0 -0
  142. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/progress.py +0 -0
  143. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/py.typed +0 -0
  144. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/__init__.py +0 -0
  145. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/batch.py +0 -0
  146. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/dispatch.py +0 -0
  147. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/metrics.py +0 -0
  148. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/params.py +0 -0
  149. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/queue.py +0 -0
  150. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/schema.py +0 -0
  151. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/query/session.py +0 -0
  152. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/remote/__init__.py +0 -0
  153. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/remote/studio.py +0 -0
  154. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/__init__.py +0 -0
  155. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/default/__init__.py +0 -0
  156. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/default/base.py +0 -0
  157. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/__init__.py +0 -0
  158. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/aggregate.py +0 -0
  159. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/array.py +0 -0
  160. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/conditional.py +0 -0
  161. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/path.py +0 -0
  162. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/random.py +0 -0
  163. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/functions/string.py +0 -0
  164. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/selectable.py +0 -0
  165. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/sqlite/__init__.py +0 -0
  166. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/sqlite/base.py +0 -0
  167. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/sqlite/types.py +0 -0
  168. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/sqlite/vector.py +0 -0
  169. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/types.py +0 -0
  170. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/sql/utils.py +0 -0
  171. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/studio.py +0 -0
  172. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/telemetry.py +0 -0
  173. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/toolkit/__init__.py +0 -0
  174. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/toolkit/split.py +0 -0
  175. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/torch/__init__.py +0 -0
  176. {datachain-0.7.6 → datachain-0.7.7}/src/datachain/utils.py +0 -0
  177. {datachain-0.7.6 → datachain-0.7.7}/src/datachain.egg-info/SOURCES.txt +0 -0
  178. {datachain-0.7.6 → datachain-0.7.7}/src/datachain.egg-info/dependency_links.txt +0 -0
  179. {datachain-0.7.6 → datachain-0.7.7}/src/datachain.egg-info/entry_points.txt +0 -0
  180. {datachain-0.7.6 → datachain-0.7.7}/src/datachain.egg-info/requires.txt +0 -0
  181. {datachain-0.7.6 → datachain-0.7.7}/src/datachain.egg-info/top_level.txt +0 -0
  182. {datachain-0.7.6 → datachain-0.7.7}/tests/__init__.py +0 -0
  183. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/__init__.py +0 -0
  184. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/conftest.py +0 -0
  185. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  186. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/datasets/.dvc/config +0 -0
  187. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/datasets/.gitignore +0 -0
  188. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  189. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/test_datachain.py +0 -0
  190. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/test_ls.py +0 -0
  191. {datachain-0.7.6 → datachain-0.7.7}/tests/benchmarks/test_version.py +0 -0
  192. {datachain-0.7.6 → datachain-0.7.7}/tests/conftest.py +0 -0
  193. {datachain-0.7.6 → datachain-0.7.7}/tests/data.py +0 -0
  194. {datachain-0.7.6 → datachain-0.7.7}/tests/examples/__init__.py +0 -0
  195. {datachain-0.7.6 → datachain-0.7.7}/tests/examples/test_examples.py +0 -0
  196. {datachain-0.7.6 → datachain-0.7.7}/tests/examples/test_wds_e2e.py +0 -0
  197. {datachain-0.7.6 → datachain-0.7.7}/tests/examples/wds_data.py +0 -0
  198. {datachain-0.7.6 → datachain-0.7.7}/tests/func/__init__.py +0 -0
  199. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_catalog.py +0 -0
  200. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_client.py +0 -0
  201. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_datachain.py +0 -0
  202. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_dataset_query.py +0 -0
  203. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_datasets.py +0 -0
  204. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_feature_pickling.py +0 -0
  205. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_listing.py +0 -0
  206. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_ls.py +0 -0
  207. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_meta_formats.py +0 -0
  208. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_metrics.py +0 -0
  209. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_pull.py +0 -0
  210. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_pytorch.py +0 -0
  211. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_query.py +0 -0
  212. {datachain-0.7.6 → datachain-0.7.7}/tests/func/test_toolkit.py +0 -0
  213. {datachain-0.7.6 → datachain-0.7.7}/tests/scripts/feature_class.py +0 -0
  214. {datachain-0.7.6 → datachain-0.7.7}/tests/scripts/feature_class_exception.py +0 -0
  215. {datachain-0.7.6 → datachain-0.7.7}/tests/scripts/feature_class_parallel.py +0 -0
  216. {datachain-0.7.6 → datachain-0.7.7}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  217. {datachain-0.7.6 → datachain-0.7.7}/tests/scripts/name_len_slow.py +0 -0
  218. {datachain-0.7.6 → datachain-0.7.7}/tests/test_atomicity.py +0 -0
  219. {datachain-0.7.6 → datachain-0.7.7}/tests/test_cli_e2e.py +0 -0
  220. {datachain-0.7.6 → datachain-0.7.7}/tests/test_cli_studio.py +0 -0
  221. {datachain-0.7.6 → datachain-0.7.7}/tests/test_query_e2e.py +0 -0
  222. {datachain-0.7.6 → datachain-0.7.7}/tests/test_telemetry.py +0 -0
  223. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/__init__.py +0 -0
  224. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/__init__.py +0 -0
  225. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/conftest.py +0 -0
  226. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_arrow.py +0 -0
  227. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_clip.py +0 -0
  228. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  229. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_datachain_merge.py +0 -0
  230. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_feature.py +0 -0
  231. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_feature_utils.py +0 -0
  232. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_file.py +0 -0
  233. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_hf.py +0 -0
  234. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_image.py +0 -0
  235. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_listing_info.py +0 -0
  236. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_models.py +0 -0
  237. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_schema.py +0 -0
  238. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_signal_schema.py +0 -0
  239. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_sql_to_python.py +0 -0
  240. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_text.py +0 -0
  241. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_udf_signature.py +0 -0
  242. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_utils.py +0 -0
  243. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/lib/test_webdataset.py +0 -0
  244. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/__init__.py +0 -0
  245. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/sqlite/__init__.py +0 -0
  246. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/sqlite/test_types.py +0 -0
  247. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
  248. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_array.py +0 -0
  249. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_conditional.py +0 -0
  250. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_path.py +0 -0
  251. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_random.py +0 -0
  252. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_selectable.py +0 -0
  253. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/sql/test_string.py +0 -0
  254. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_asyn.py +0 -0
  255. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_cache.py +0 -0
  256. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_catalog.py +0 -0
  257. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_catalog_loader.py +0 -0
  258. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_cli_parsing.py +0 -0
  259. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_client.py +0 -0
  260. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_client_s3.py +0 -0
  261. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_config.py +0 -0
  262. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_data_storage.py +0 -0
  263. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_database_engine.py +0 -0
  264. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_dataset.py +0 -0
  265. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_dispatch.py +0 -0
  266. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_fileslice.py +0 -0
  267. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_func.py +0 -0
  268. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_listing.py +0 -0
  269. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_metastore.py +0 -0
  270. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_module_exports.py +0 -0
  271. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_query.py +0 -0
  272. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_query_metrics.py +0 -0
  273. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_query_params.py +0 -0
  274. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_serializer.py +0 -0
  275. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_session.py +0 -0
  276. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_utils.py +0 -0
  277. {datachain-0.7.6 → datachain-0.7.7}/tests/unit/test_warehouse.py +0 -0
  278. {datachain-0.7.6 → datachain-0.7.7}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.6
3
+ Version: 0.7.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -1,4 +1,4 @@
1
- from sqlalchemy import literal
1
+ from sqlalchemy import case, literal
2
2
 
3
3
  from . import array, path, random, string
4
4
  from .aggregate import (
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "any_value",
25
25
  "array",
26
26
  "avg",
27
+ "case",
27
28
  "collect",
28
29
  "concat",
29
30
  "cosine_distance",
@@ -2,9 +2,11 @@ import inspect
2
2
  from collections.abc import Sequence
3
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
4
 
5
- from sqlalchemy import BindParameter, ColumnElement, desc
5
+ from sqlalchemy import BindParameter, Case, ColumnElement, desc
6
+ from sqlalchemy.ext.hybrid import Comparator
6
7
 
7
8
  from datachain.lib.convert.python_to_sql import python_to_sql
9
+ from datachain.lib.convert.sql_to_python import sql_to_python
8
10
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
9
11
  from datachain.query.schema import Column, ColumnMeta
10
12
 
@@ -71,7 +73,7 @@ class Func(Function):
71
73
  return (
72
74
  [
73
75
  col
74
- if isinstance(col, (Func, BindParameter))
76
+ if isinstance(col, (Func, BindParameter, Case, Comparator))
75
77
  else ColumnMeta.to_db_name(
76
78
  col.name if isinstance(col, ColumnElement) else col
77
79
  )
@@ -273,6 +275,9 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
273
275
  if isinstance(col, Func):
274
276
  return col.get_result_type(signals_schema)
275
277
 
278
+ if isinstance(col, ColumnElement) and not hasattr(col, "name"):
279
+ return sql_to_python(col)
280
+
276
281
  return signals_schema.get_column_type(
277
282
  col.name if isinstance(col, ColumnElement) else col
278
283
  )
@@ -1150,7 +1150,7 @@ class DataChain:
1150
1150
  def group_by(
1151
1151
  self,
1152
1152
  *,
1153
- partition_by: Union[str, Func, Sequence[Union[str, Func]]],
1153
+ partition_by: Optional[Union[str, Func, Sequence[Union[str, Func]]]] = None,
1154
1154
  **kwargs: Func,
1155
1155
  ) -> "Self":
1156
1156
  """Group rows by specified set of signals and return new signals
@@ -1167,10 +1167,10 @@ class DataChain:
1167
1167
  )
1168
1168
  ```
1169
1169
  """
1170
- if isinstance(partition_by, (str, Func)):
1170
+ if partition_by is None:
1171
+ partition_by = []
1172
+ elif isinstance(partition_by, (str, Func)):
1171
1173
  partition_by = [partition_by]
1172
- if not partition_by:
1173
- raise ValueError("At least one column should be provided for partition_by")
1174
1174
 
1175
1175
  partition_by_columns: list[Column] = []
1176
1176
  signal_columns: list[Column] = []
@@ -966,8 +966,6 @@ class SQLGroupBy(SQLClause):
966
966
  def apply_sql_clause(self, query) -> Select:
967
967
  if not self.cols:
968
968
  raise ValueError("No columns to select")
969
- if not self.group_by:
970
- raise ValueError("No columns to group by")
971
969
 
972
970
  subquery = query.subquery()
973
971
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.6
3
+ Version: 0.7.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -2731,6 +2731,49 @@ def test_group_by_multiple_partition_by(test_session):
2731
2731
  )
2732
2732
 
2733
2733
 
2734
+ def test_group_by_no_partition_by(test_session):
2735
+ from datachain import func
2736
+
2737
+ ds = (
2738
+ DataChain.from_values(
2739
+ col1=["a", "a", "b", "b", "b", "c"],
2740
+ col2=[1, 2, 1, 2, 1, 2],
2741
+ col3=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
2742
+ col4=["1", "2", "3", "4", "5", "6"],
2743
+ session=test_session,
2744
+ )
2745
+ .order_by("col4")
2746
+ .group_by(
2747
+ cnt=func.count(),
2748
+ cnt_col=func.count("col2"),
2749
+ sum=func.sum("col3"),
2750
+ concat=func.concat("col4"),
2751
+ value=func.any_value("col3"),
2752
+ collect=func.collect("col3"),
2753
+ )
2754
+ .save("my-ds")
2755
+ )
2756
+
2757
+ assert ds.signals_schema.serialize() == {
2758
+ "cnt": "int",
2759
+ "cnt_col": "int",
2760
+ "sum": "float",
2761
+ "concat": "str",
2762
+ "value": "float",
2763
+ "collect": "list[float]",
2764
+ }
2765
+ assert ds.to_records() == [
2766
+ {
2767
+ "cnt": 6,
2768
+ "cnt_col": 6,
2769
+ "sum": 21.0,
2770
+ "concat": "123456",
2771
+ "value": 1.0,
2772
+ "collect": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
2773
+ },
2774
+ ]
2775
+
2776
+
2734
2777
  def test_group_by_error(test_session):
2735
2778
  from datachain import func
2736
2779
 
@@ -2740,14 +2783,6 @@ def test_group_by_error(test_session):
2740
2783
  session=test_session,
2741
2784
  )
2742
2785
 
2743
- with pytest.raises(TypeError):
2744
- dc.group_by(cnt=func.count())
2745
-
2746
- with pytest.raises(
2747
- ValueError, match="At least one column should be provided for partition_by"
2748
- ):
2749
- dc.group_by(cnt=func.count(), partition_by=())
2750
-
2751
2786
  with pytest.raises(
2752
2787
  ValueError, match="At least one column should be provided for group_by"
2753
2788
  ):
@@ -2770,6 +2805,34 @@ def test_group_by_error(test_session):
2770
2805
  dc.group_by(foo=func.sum("col2"), partition_by="col3")
2771
2806
 
2772
2807
 
2808
+ def test_group_by_case(test_session):
2809
+ from datachain import func
2810
+
2811
+ ds = (
2812
+ DataChain.from_values(
2813
+ col1=[1.0, 0.0, 3.2, 0.1, 5.9, -1.0],
2814
+ col2=[0.0, 6.1, -0.05, 3.7, 0.1, -3.0],
2815
+ session=test_session,
2816
+ )
2817
+ .group_by(
2818
+ col1=func.sum(func.case((C("col1") > 0.1, 1), else_=0)),
2819
+ col2=func.sum(func.case((C("col2") < 0.0, 1), else_=0)),
2820
+ )
2821
+ .save("my-ds")
2822
+ )
2823
+
2824
+ assert ds.signals_schema.serialize() == {
2825
+ "col1": "int",
2826
+ "col2": "int",
2827
+ }
2828
+ assert ds.to_records() == [
2829
+ {
2830
+ "col1": 3,
2831
+ "col2": 2,
2832
+ }
2833
+ ]
2834
+
2835
+
2773
2836
  @pytest.mark.parametrize("desc", [True, False])
2774
2837
  def test_window_functions(test_session, desc):
2775
2838
  from datachain import func
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes