datachain 0.34.3__py3-none-any.whl → 0.34.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/lib/signal_schema.py +35 -4
- datachain/lib/udf.py +7 -1
- datachain/query/dataset.py +18 -8
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/METADATA +1 -1
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/RECORD +9 -9
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/WHEEL +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/entry_points.txt +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.5.dist-info}/top_level.txt +0 -0
datachain/lib/signal_schema.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import hashlib
|
|
3
3
|
import json
|
|
4
|
+
import logging
|
|
5
|
+
import math
|
|
4
6
|
import warnings
|
|
5
7
|
from collections.abc import Iterator, Sequence
|
|
6
8
|
from dataclasses import dataclass
|
|
@@ -23,7 +25,7 @@ from typing import ( # noqa: UP035
|
|
|
23
25
|
get_origin,
|
|
24
26
|
)
|
|
25
27
|
|
|
26
|
-
from pydantic import BaseModel, Field, create_model
|
|
28
|
+
from pydantic import BaseModel, Field, ValidationError, create_model
|
|
27
29
|
from sqlalchemy import ColumnElement
|
|
28
30
|
from typing_extensions import Literal as LiteralEx
|
|
29
31
|
|
|
@@ -43,6 +45,8 @@ if TYPE_CHECKING:
|
|
|
43
45
|
from datachain.catalog import Catalog
|
|
44
46
|
|
|
45
47
|
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
46
50
|
NAMES_TO_TYPES = {
|
|
47
51
|
"int": int,
|
|
48
52
|
"str": str,
|
|
@@ -463,12 +467,32 @@ class SignalSchema:
|
|
|
463
467
|
objs.append(self.setup_values.get(name))
|
|
464
468
|
elif (fr := ModelStore.to_pydantic(fr_type)) is not None:
|
|
465
469
|
j, pos = unflatten_to_json_pos(fr, row, pos)
|
|
466
|
-
|
|
470
|
+
try:
|
|
471
|
+
obj = fr(**j)
|
|
472
|
+
except ValidationError as e:
|
|
473
|
+
if self._all_values_none(j):
|
|
474
|
+
logger.debug("Failed to create input for %s: %s", name, e)
|
|
475
|
+
obj = None
|
|
476
|
+
else:
|
|
477
|
+
raise
|
|
478
|
+
objs.append(obj)
|
|
467
479
|
else:
|
|
468
480
|
objs.append(row[pos])
|
|
469
481
|
pos += 1
|
|
470
482
|
return objs
|
|
471
483
|
|
|
484
|
+
@staticmethod
|
|
485
|
+
def _all_values_none(value: Any) -> bool:
|
|
486
|
+
if isinstance(value, dict):
|
|
487
|
+
return all(SignalSchema._all_values_none(v) for v in value.values())
|
|
488
|
+
if isinstance(value, (list, tuple, set)):
|
|
489
|
+
return all(SignalSchema._all_values_none(v) for v in value)
|
|
490
|
+
if isinstance(value, float):
|
|
491
|
+
# NaN is used to represent NULL and NaN float values in datachain
|
|
492
|
+
# Since SQLite does not have a separate NULL type, we need to check for NaN
|
|
493
|
+
return math.isnan(value) or value is None
|
|
494
|
+
return value is None
|
|
495
|
+
|
|
472
496
|
def get_file_signal(self) -> Optional[str]:
|
|
473
497
|
for signal_name, signal_type in self.values.items():
|
|
474
498
|
if (fr := ModelStore.to_pydantic(signal_type)) is not None and issubclass(
|
|
@@ -546,8 +570,15 @@ class SignalSchema:
|
|
|
546
570
|
pos += 1
|
|
547
571
|
else:
|
|
548
572
|
json, pos = unflatten_to_json_pos(fr, row, pos) # type: ignore[union-attr]
|
|
549
|
-
|
|
550
|
-
|
|
573
|
+
try:
|
|
574
|
+
obj = fr(**json)
|
|
575
|
+
SignalSchema._set_file_stream(obj, catalog, cache)
|
|
576
|
+
except ValidationError as e:
|
|
577
|
+
if self._all_values_none(json):
|
|
578
|
+
logger.debug("Failed to create feature for %s: %s", fr_cls, e)
|
|
579
|
+
obj = None
|
|
580
|
+
else:
|
|
581
|
+
raise
|
|
551
582
|
res.append(obj)
|
|
552
583
|
return res
|
|
553
584
|
|
datachain/lib/udf.py
CHANGED
|
@@ -160,9 +160,15 @@ class UDFBase(AbstractUDF):
|
|
|
160
160
|
"""
|
|
161
161
|
Creates SHA hash of this UDF function. It takes into account function,
|
|
162
162
|
inputs and outputs.
|
|
163
|
+
|
|
164
|
+
For function-based UDFs, hashes self._func.
|
|
165
|
+
For class-based UDFs, hashes the process method.
|
|
163
166
|
"""
|
|
167
|
+
# Hash user code: either _func (function-based) or process method (class-based)
|
|
168
|
+
func_to_hash = self._func if self._func else self.process
|
|
169
|
+
|
|
164
170
|
parts = [
|
|
165
|
-
hash_callable(
|
|
171
|
+
hash_callable(func_to_hash),
|
|
166
172
|
self.params.hash() if self.params else "",
|
|
167
173
|
self.output.hash(),
|
|
168
174
|
]
|
datachain/query/dataset.py
CHANGED
|
@@ -982,18 +982,26 @@ class SQLUnion(Step):
|
|
|
982
982
|
|
|
983
983
|
columns1, columns2 = _order_columns(q1.columns, q2.columns)
|
|
984
984
|
|
|
985
|
+
union_select = sqlalchemy.select(*columns1).union_all(
|
|
986
|
+
sqlalchemy.select(*columns2)
|
|
987
|
+
)
|
|
988
|
+
union_cte = union_select.cte()
|
|
989
|
+
regenerated = self.query1.catalog.warehouse._regenerate_system_columns(
|
|
990
|
+
union_cte
|
|
991
|
+
)
|
|
992
|
+
result_columns = tuple(regenerated.selected_columns)
|
|
993
|
+
|
|
985
994
|
def q(*columns):
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
col2 = [c for c in columns2 if c.name in names]
|
|
989
|
-
res = sqlalchemy.select(*col1).union_all(sqlalchemy.select(*col2))
|
|
995
|
+
if not columns:
|
|
996
|
+
return regenerated
|
|
990
997
|
|
|
991
|
-
|
|
992
|
-
|
|
998
|
+
names = {c.name for c in columns}
|
|
999
|
+
selected = [c for c in result_columns if c.name in names]
|
|
1000
|
+
return regenerated.with_only_columns(*selected)
|
|
993
1001
|
|
|
994
1002
|
return step_result(
|
|
995
1003
|
q,
|
|
996
|
-
|
|
1004
|
+
result_columns,
|
|
997
1005
|
dependencies=self.query1.dependencies | self.query2.dependencies,
|
|
998
1006
|
)
|
|
999
1007
|
|
|
@@ -1009,7 +1017,9 @@ class SQLJoin(Step):
|
|
|
1009
1017
|
rname: str
|
|
1010
1018
|
|
|
1011
1019
|
def hash_inputs(self) -> str:
|
|
1012
|
-
predicates =
|
|
1020
|
+
predicates = (
|
|
1021
|
+
ensure_sequence(self.predicates) if self.predicates is not None else []
|
|
1022
|
+
)
|
|
1013
1023
|
|
|
1014
1024
|
parts = [
|
|
1015
1025
|
bytes.fromhex(self.query1.hash()),
|
|
@@ -90,10 +90,10 @@ datachain/lib/namespaces.py,sha256=ZyIYUa3WMrv6R5HrSoLsmLiEbvUQDl8sBINLUmWOYG0,3
|
|
|
90
90
|
datachain/lib/projects.py,sha256=_YeU9PPcH_pC8-sbX-47XtWSdl1ltVKnALY8azWLJkM,4112
|
|
91
91
|
datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
|
|
92
92
|
datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,7666
|
|
93
|
-
datachain/lib/signal_schema.py,sha256=
|
|
93
|
+
datachain/lib/signal_schema.py,sha256=NsL2ISnSRN-lKRpXzB9CtsUj2tVKcoAe73TaaZKMT-0,40774
|
|
94
94
|
datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
|
|
95
95
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
96
|
-
datachain/lib/udf.py,sha256=
|
|
96
|
+
datachain/lib/udf.py,sha256=bU_7xj6Mz4SsajFJ6tmrZm9Ygoi1ESoOyh1Q00W2zX4,18389
|
|
97
97
|
datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
|
|
98
98
|
datachain/lib/utils.py,sha256=RLji1gHnfDXtJCnBo8BcNu1obndFpVsXJ_1Vb-FQ9Qo,4554
|
|
99
99
|
datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
|
|
@@ -131,7 +131,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
|
|
|
131
131
|
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
132
132
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
133
133
|
datachain/query/batch.py,sha256=ocPeNgrJM6Y_6SYCx3O2cwlCFAhNMfoYgB99GP6A1Bg,4294
|
|
134
|
-
datachain/query/dataset.py,sha256=
|
|
134
|
+
datachain/query/dataset.py,sha256=I55ubMnoWpjoc4Ntw8zbp-i-49w0I95J7hCk_OCU6IU,68110
|
|
135
135
|
datachain/query/dispatch.py,sha256=pygp7xg3lUDKlYHhecKxW5fB3zOSX1fPJfZBU4dfijk,16067
|
|
136
136
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
137
137
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
165
165
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
166
166
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
167
167
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
168
|
-
datachain-0.34.
|
|
169
|
-
datachain-0.34.
|
|
170
|
-
datachain-0.34.
|
|
171
|
-
datachain-0.34.
|
|
172
|
-
datachain-0.34.
|
|
173
|
-
datachain-0.34.
|
|
168
|
+
datachain-0.34.5.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
169
|
+
datachain-0.34.5.dist-info/METADATA,sha256=aovavI1WF3QvHC23Vcp4HxUykcEFNypIK3LlKo_wrDA,13655
|
|
170
|
+
datachain-0.34.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
171
|
+
datachain-0.34.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
172
|
+
datachain-0.34.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
173
|
+
datachain-0.34.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|