datachain 0.7.6__py3-none-any.whl → 0.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from sqlalchemy import literal
1
+ from sqlalchemy import case, literal
2
2
 
3
3
  from . import array, path, random, string
4
4
  from .aggregate import (
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "any_value",
25
25
  "array",
26
26
  "avg",
27
+ "case",
27
28
  "collect",
28
29
  "concat",
29
30
  "cosine_distance",
datachain/func/func.py CHANGED
@@ -2,9 +2,11 @@ import inspect
2
2
  from collections.abc import Sequence
3
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
4
 
5
- from sqlalchemy import BindParameter, ColumnElement, desc
5
+ from sqlalchemy import BindParameter, Case, ColumnElement, desc
6
+ from sqlalchemy.ext.hybrid import Comparator
6
7
 
7
8
  from datachain.lib.convert.python_to_sql import python_to_sql
9
+ from datachain.lib.convert.sql_to_python import sql_to_python
8
10
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
9
11
  from datachain.query.schema import Column, ColumnMeta
10
12
 
@@ -71,7 +73,7 @@ class Func(Function):
71
73
  return (
72
74
  [
73
75
  col
74
- if isinstance(col, (Func, BindParameter))
76
+ if isinstance(col, (Func, BindParameter, Case, Comparator))
75
77
  else ColumnMeta.to_db_name(
76
78
  col.name if isinstance(col, ColumnElement) else col
77
79
  )
@@ -273,6 +275,9 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
273
275
  if isinstance(col, Func):
274
276
  return col.get_result_type(signals_schema)
275
277
 
278
+ if isinstance(col, ColumnElement) and not hasattr(col, "name"):
279
+ return sql_to_python(col)
280
+
276
281
  return signals_schema.get_column_type(
277
282
  col.name if isinstance(col, ColumnElement) else col
278
283
  )
datachain/lib/dc.py CHANGED
@@ -1150,7 +1150,7 @@ class DataChain:
1150
1150
  def group_by(
1151
1151
  self,
1152
1152
  *,
1153
- partition_by: Union[str, Func, Sequence[Union[str, Func]]],
1153
+ partition_by: Optional[Union[str, Func, Sequence[Union[str, Func]]]] = None,
1154
1154
  **kwargs: Func,
1155
1155
  ) -> "Self":
1156
1156
  """Group rows by specified set of signals and return new signals
@@ -1167,10 +1167,10 @@ class DataChain:
1167
1167
  )
1168
1168
  ```
1169
1169
  """
1170
- if isinstance(partition_by, (str, Func)):
1170
+ if partition_by is None:
1171
+ partition_by = []
1172
+ elif isinstance(partition_by, (str, Func)):
1171
1173
  partition_by = [partition_by]
1172
- if not partition_by:
1173
- raise ValueError("At least one column should be provided for partition_by")
1174
1174
 
1175
1175
  partition_by_columns: list[Column] = []
1176
1176
  signal_columns: list[Column] = []
@@ -966,8 +966,6 @@ class SQLGroupBy(SQLClause):
966
966
  def apply_sql_clause(self, query) -> Select:
967
967
  if not self.cols:
968
968
  raise ValueError("No columns to select")
969
- if not self.group_by:
970
- raise ValueError("No columns to group by")
971
969
 
972
970
  subquery = query.subquery()
973
971
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.6
3
+ Version: 0.7.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -37,12 +37,12 @@ datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
38
  datachain/data_storage/sqlite.py,sha256=D_ZQ0PHmZzHO2dinv4naVJocUDIZUwV4WAz692C1cyk,22521
39
39
  datachain/data_storage/warehouse.py,sha256=tjIkU-5JywBR0apCyqTcwSyaRtGxhu2L7IVjrz-55uc,30802
40
- datachain/func/__init__.py,sha256=4VUt5BaLdBAl_BnAku0Jb8plqd7kDOiYrQTMG3pN0c4,794
40
+ datachain/func/__init__.py,sha256=VAN7N2-eCHgidMCFI-fJTkCwdI1U_NIuCOgYc4sfYUQ,812
41
41
  datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
42
42
  datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
43
43
  datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
44
44
  datachain/func/conditional.py,sha256=mQroxsoExpBW84Zm5dAYP4OpBblWmzfnF2qJq9rba54,2223
45
- datachain/func/func.py,sha256=9wqdxxisoDL0w8qKGQmL6sNdgJeIOzotEUPlxu9t2IQ,12326
45
+ datachain/func/func.py,sha256=GykhTvNbACFSwaSXsgVlDnqR48kpP_GNAxm3bcq1RYg,12560
46
46
  datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
47
47
  datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
48
48
  datachain/func/string.py,sha256=NQzaXXYu7yb72HPADy4WrFlcgvTS77L9x7-qvCKJtnk,4522
@@ -52,7 +52,7 @@ datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
52
52
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
53
53
  datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
54
54
  datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
55
- datachain/lib/dc.py,sha256=J7liATKQBJCkeHanVLr0s3d1t5wxiiiSJuSbuxKBbLg,89527
55
+ datachain/lib/dc.py,sha256=t5y5tsYyU7uuk3gEPPhhcDSZ1tL1aHkKG2W54eHiUq8,89492
56
56
  datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
57
57
  datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
58
58
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
@@ -87,7 +87,7 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
87
87
  datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
88
88
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
89
89
  datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
90
- datachain/query/dataset.py,sha256=o9Ssa47t1IM78qcaoCeTL-rp4fZCpYfR7XFjw2hGWeY,54632
90
+ datachain/query/dataset.py,sha256=J6SbCLnFlZgCxRchc3tVk5tcC7xo1Hp616JGlEZXCDo,54547
91
91
  datachain/query/dispatch.py,sha256=fZ0TgGFRcsrYh1iXQoZVjkUl4Xetom9PSHoeDes3IRs,11606
92
92
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
93
93
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -116,9 +116,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
116
116
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
117
117
  datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
118
118
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
119
- datachain-0.7.6.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
120
- datachain-0.7.6.dist-info/METADATA,sha256=KMChqSG7d_lMaF9BYNIgmijvnxZbDm5gCEg980gUGOA,18006
121
- datachain-0.7.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
122
- datachain-0.7.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
123
- datachain-0.7.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
124
- datachain-0.7.6.dist-info/RECORD,,
119
+ datachain-0.7.7.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
120
+ datachain-0.7.7.dist-info/METADATA,sha256=laxYaz9f-PIJ30f3krSjRu45CjyfbnBM8Q4kddXa9dM,18006
121
+ datachain-0.7.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
122
+ datachain-0.7.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
123
+ datachain-0.7.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
124
+ datachain-0.7.7.dist-info/RECORD,,