datachain 0.34.3__py3-none-any.whl → 0.34.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/lib/signal_schema.py +35 -4
- datachain/query/dataset.py +3 -1
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/METADATA +1 -1
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/RECORD +8 -8
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/WHEEL +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/entry_points.txt +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.34.3.dist-info → datachain-0.34.4.dist-info}/top_level.txt +0 -0
datachain/lib/signal_schema.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import hashlib
|
|
3
3
|
import json
|
|
4
|
+
import logging
|
|
5
|
+
import math
|
|
4
6
|
import warnings
|
|
5
7
|
from collections.abc import Iterator, Sequence
|
|
6
8
|
from dataclasses import dataclass
|
|
@@ -23,7 +25,7 @@ from typing import ( # noqa: UP035
|
|
|
23
25
|
get_origin,
|
|
24
26
|
)
|
|
25
27
|
|
|
26
|
-
from pydantic import BaseModel, Field, create_model
|
|
28
|
+
from pydantic import BaseModel, Field, ValidationError, create_model
|
|
27
29
|
from sqlalchemy import ColumnElement
|
|
28
30
|
from typing_extensions import Literal as LiteralEx
|
|
29
31
|
|
|
@@ -43,6 +45,8 @@ if TYPE_CHECKING:
|
|
|
43
45
|
from datachain.catalog import Catalog
|
|
44
46
|
|
|
45
47
|
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
46
50
|
NAMES_TO_TYPES = {
|
|
47
51
|
"int": int,
|
|
48
52
|
"str": str,
|
|
@@ -463,12 +467,32 @@ class SignalSchema:
|
|
|
463
467
|
objs.append(self.setup_values.get(name))
|
|
464
468
|
elif (fr := ModelStore.to_pydantic(fr_type)) is not None:
|
|
465
469
|
j, pos = unflatten_to_json_pos(fr, row, pos)
|
|
466
|
-
|
|
470
|
+
try:
|
|
471
|
+
obj = fr(**j)
|
|
472
|
+
except ValidationError as e:
|
|
473
|
+
if self._all_values_none(j):
|
|
474
|
+
logger.debug("Failed to create input for %s: %s", name, e)
|
|
475
|
+
obj = None
|
|
476
|
+
else:
|
|
477
|
+
raise
|
|
478
|
+
objs.append(obj)
|
|
467
479
|
else:
|
|
468
480
|
objs.append(row[pos])
|
|
469
481
|
pos += 1
|
|
470
482
|
return objs
|
|
471
483
|
|
|
484
|
+
@staticmethod
|
|
485
|
+
def _all_values_none(value: Any) -> bool:
|
|
486
|
+
if isinstance(value, dict):
|
|
487
|
+
return all(SignalSchema._all_values_none(v) for v in value.values())
|
|
488
|
+
if isinstance(value, (list, tuple, set)):
|
|
489
|
+
return all(SignalSchema._all_values_none(v) for v in value)
|
|
490
|
+
if isinstance(value, float):
|
|
491
|
+
# NaN is used to represent NULL and NaN float values in datachain
|
|
492
|
+
# Since SQLite does not have a separate NULL type, we need to check for NaN
|
|
493
|
+
return math.isnan(value) or value is None
|
|
494
|
+
return value is None
|
|
495
|
+
|
|
472
496
|
def get_file_signal(self) -> Optional[str]:
|
|
473
497
|
for signal_name, signal_type in self.values.items():
|
|
474
498
|
if (fr := ModelStore.to_pydantic(signal_type)) is not None and issubclass(
|
|
@@ -546,8 +570,15 @@ class SignalSchema:
|
|
|
546
570
|
pos += 1
|
|
547
571
|
else:
|
|
548
572
|
json, pos = unflatten_to_json_pos(fr, row, pos) # type: ignore[union-attr]
|
|
549
|
-
|
|
550
|
-
|
|
573
|
+
try:
|
|
574
|
+
obj = fr(**json)
|
|
575
|
+
SignalSchema._set_file_stream(obj, catalog, cache)
|
|
576
|
+
except ValidationError as e:
|
|
577
|
+
if self._all_values_none(json):
|
|
578
|
+
logger.debug("Failed to create feature for %s: %s", fr_cls, e)
|
|
579
|
+
obj = None
|
|
580
|
+
else:
|
|
581
|
+
raise
|
|
551
582
|
res.append(obj)
|
|
552
583
|
return res
|
|
553
584
|
|
datachain/query/dataset.py
CHANGED
|
@@ -1009,7 +1009,9 @@ class SQLJoin(Step):
|
|
|
1009
1009
|
rname: str
|
|
1010
1010
|
|
|
1011
1011
|
def hash_inputs(self) -> str:
|
|
1012
|
-
predicates =
|
|
1012
|
+
predicates = (
|
|
1013
|
+
ensure_sequence(self.predicates) if self.predicates is not None else []
|
|
1014
|
+
)
|
|
1013
1015
|
|
|
1014
1016
|
parts = [
|
|
1015
1017
|
bytes.fromhex(self.query1.hash()),
|
|
@@ -90,7 +90,7 @@ datachain/lib/namespaces.py,sha256=ZyIYUa3WMrv6R5HrSoLsmLiEbvUQDl8sBINLUmWOYG0,3
|
|
|
90
90
|
datachain/lib/projects.py,sha256=_YeU9PPcH_pC8-sbX-47XtWSdl1ltVKnALY8azWLJkM,4112
|
|
91
91
|
datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
|
|
92
92
|
datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,7666
|
|
93
|
-
datachain/lib/signal_schema.py,sha256=
|
|
93
|
+
datachain/lib/signal_schema.py,sha256=NsL2ISnSRN-lKRpXzB9CtsUj2tVKcoAe73TaaZKMT-0,40774
|
|
94
94
|
datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
|
|
95
95
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
96
96
|
datachain/lib/udf.py,sha256=DdUxGBo9Y7Jz6aTBKgwex7YfK1RNaGm1JUlXCqs7qnw,18122
|
|
@@ -131,7 +131,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
|
|
|
131
131
|
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
132
132
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
133
133
|
datachain/query/batch.py,sha256=ocPeNgrJM6Y_6SYCx3O2cwlCFAhNMfoYgB99GP6A1Bg,4294
|
|
134
|
-
datachain/query/dataset.py,sha256=
|
|
134
|
+
datachain/query/dataset.py,sha256=RQLNc368vrKI6EdsugbXWFbJobl430yXV-Ks1i4sdfo,67893
|
|
135
135
|
datachain/query/dispatch.py,sha256=pygp7xg3lUDKlYHhecKxW5fB3zOSX1fPJfZBU4dfijk,16067
|
|
136
136
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
137
137
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
165
165
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
166
166
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
167
167
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
168
|
-
datachain-0.34.
|
|
169
|
-
datachain-0.34.
|
|
170
|
-
datachain-0.34.
|
|
171
|
-
datachain-0.34.
|
|
172
|
-
datachain-0.34.
|
|
173
|
-
datachain-0.34.
|
|
168
|
+
datachain-0.34.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
169
|
+
datachain-0.34.4.dist-info/METADATA,sha256=pjivvNYJPbaTLyOpWYRJiaaoyC8k-LUaDl-dczGFUQc,13655
|
|
170
|
+
datachain-0.34.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
171
|
+
datachain-0.34.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
172
|
+
datachain-0.34.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
173
|
+
datachain-0.34.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|