datachain 0.34.4__py3-none-any.whl → 0.34.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/lib/udf.py CHANGED
@@ -160,9 +160,15 @@ class UDFBase(AbstractUDF):
160
160
  """
161
161
  Creates SHA hash of this UDF function. It takes into account function,
162
162
  inputs and outputs.
163
+
164
+ For function-based UDFs, hashes self._func.
165
+ For class-based UDFs, hashes the process method.
163
166
  """
167
+ # Hash user code: either _func (function-based) or process method (class-based)
168
+ func_to_hash = self._func if self._func else self.process
169
+
164
170
  parts = [
165
- hash_callable(self._func),
171
+ hash_callable(func_to_hash),
166
172
  self.params.hash() if self.params else "",
167
173
  self.output.hash(),
168
174
  ]
@@ -982,18 +982,26 @@ class SQLUnion(Step):
982
982
 
983
983
  columns1, columns2 = _order_columns(q1.columns, q2.columns)
984
984
 
985
+ union_select = sqlalchemy.select(*columns1).union_all(
986
+ sqlalchemy.select(*columns2)
987
+ )
988
+ union_cte = union_select.cte()
989
+ regenerated = self.query1.catalog.warehouse._regenerate_system_columns(
990
+ union_cte
991
+ )
992
+ result_columns = tuple(regenerated.selected_columns)
993
+
985
994
  def q(*columns):
986
- names = {c.name for c in columns}
987
- col1 = [c for c in columns1 if c.name in names]
988
- col2 = [c for c in columns2 if c.name in names]
989
- res = sqlalchemy.select(*col1).union_all(sqlalchemy.select(*col2))
995
+ if not columns:
996
+ return regenerated
990
997
 
991
- subquery = res.subquery()
992
- return sqlalchemy.select(*subquery.c).select_from(subquery)
998
+ names = {c.name for c in columns}
999
+ selected = [c for c in result_columns if c.name in names]
1000
+ return regenerated.with_only_columns(*selected)
993
1001
 
994
1002
  return step_result(
995
1003
  q,
996
- columns1,
1004
+ result_columns,
997
1005
  dependencies=self.query1.dependencies | self.query2.dependencies,
998
1006
  )
999
1007
 
datachain/query/utils.py CHANGED
@@ -2,12 +2,16 @@ from typing import Optional, Union
2
2
 
3
3
  import sqlalchemy as sa
4
4
 
5
- ColT = Union[sa.Column, sa.ColumnElement, sa.TextClause, sa.Label]
5
+ ColT = Union[sa.ColumnClause, sa.Column, sa.ColumnElement, sa.TextClause, sa.Label]
6
6
 
7
7
 
8
8
  def column_name(col: ColT) -> str:
9
9
  """Returns column name from column element."""
10
- return col.name if isinstance(col, (sa.Column, sa.Label)) else str(col)
10
+ return (
11
+ col.name
12
+ if isinstance(col, (sa.ColumnClause, sa.Column, sa.Label))
13
+ else str(col)
14
+ )
11
15
 
12
16
 
13
17
  def get_query_column(query: sa.Select, name: str) -> Optional[ColT]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.4
3
+ Version: 0.34.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -93,7 +93,7 @@ datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,766
93
93
  datachain/lib/signal_schema.py,sha256=NsL2ISnSRN-lKRpXzB9CtsUj2tVKcoAe73TaaZKMT-0,40774
94
94
  datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
95
95
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
96
- datachain/lib/udf.py,sha256=DdUxGBo9Y7Jz6aTBKgwex7YfK1RNaGm1JUlXCqs7qnw,18122
96
+ datachain/lib/udf.py,sha256=bU_7xj6Mz4SsajFJ6tmrZm9Ygoi1ESoOyh1Q00W2zX4,18389
97
97
  datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
98
98
  datachain/lib/utils.py,sha256=RLji1gHnfDXtJCnBo8BcNu1obndFpVsXJ_1Vb-FQ9Qo,4554
99
99
  datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
@@ -131,7 +131,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
131
131
  datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
132
132
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
133
133
  datachain/query/batch.py,sha256=ocPeNgrJM6Y_6SYCx3O2cwlCFAhNMfoYgB99GP6A1Bg,4294
134
- datachain/query/dataset.py,sha256=RQLNc368vrKI6EdsugbXWFbJobl430yXV-Ks1i4sdfo,67893
134
+ datachain/query/dataset.py,sha256=I55ubMnoWpjoc4Ntw8zbp-i-49w0I95J7hCk_OCU6IU,68110
135
135
  datachain/query/dispatch.py,sha256=pygp7xg3lUDKlYHhecKxW5fB3zOSX1fPJfZBU4dfijk,16067
136
136
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
137
137
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -139,7 +139,7 @@ datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
139
139
  datachain/query/schema.py,sha256=qLpEyvnzKlNCOrThQiTNpUKTUEsVIHT9trt-0UMt6ko,6704
140
140
  datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,6809
141
141
  datachain/query/udf.py,sha256=jqutTpvkT6eHl96ZEgYiiTMAhI7vmTQA6JH9y4WCibI,1405
142
- datachain/query/utils.py,sha256=a2PTBZ3qsG6XlUcp9XsoGiQfKkca4Q3m-VzFgiGQPAc,1230
142
+ datachain/query/utils.py,sha256=K2yECxgc6ywcSV-XenzpFL9csZbtbRtoDn36_Sni2M0,1296
143
143
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
144
  datachain/remote/studio.py,sha256=amjcV0B8qumsVBnxPQnt8oSrnfMK2vAdOurVMA9L_zA,16868
145
145
  datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.34.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.34.4.dist-info/METADATA,sha256=pjivvNYJPbaTLyOpWYRJiaaoyC8k-LUaDl-dczGFUQc,13655
170
- datachain-0.34.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.34.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.34.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.34.4.dist-info/RECORD,,
168
+ datachain-0.34.6.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.34.6.dist-info/METADATA,sha256=Bp94LT21H__KZ3gAyTsqd-xt-oM-4o8qht42-wsf9CQ,13655
170
+ datachain-0.34.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.34.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.34.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.34.6.dist-info/RECORD,,