datachain 0.34.3__py3-none-any.whl → 0.34.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -1,6 +1,8 @@
1
1
  import copy
2
2
  import hashlib
3
3
  import json
4
+ import logging
5
+ import math
4
6
  import warnings
5
7
  from collections.abc import Iterator, Sequence
6
8
  from dataclasses import dataclass
@@ -23,7 +25,7 @@ from typing import ( # noqa: UP035
23
25
  get_origin,
24
26
  )
25
27
 
26
- from pydantic import BaseModel, Field, create_model
28
+ from pydantic import BaseModel, Field, ValidationError, create_model
27
29
  from sqlalchemy import ColumnElement
28
30
  from typing_extensions import Literal as LiteralEx
29
31
 
@@ -43,6 +45,8 @@ if TYPE_CHECKING:
43
45
  from datachain.catalog import Catalog
44
46
 
45
47
 
48
+ logger = logging.getLogger(__name__)
49
+
46
50
  NAMES_TO_TYPES = {
47
51
  "int": int,
48
52
  "str": str,
@@ -463,12 +467,32 @@ class SignalSchema:
463
467
  objs.append(self.setup_values.get(name))
464
468
  elif (fr := ModelStore.to_pydantic(fr_type)) is not None:
465
469
  j, pos = unflatten_to_json_pos(fr, row, pos)
466
- objs.append(fr(**j))
470
+ try:
471
+ obj = fr(**j)
472
+ except ValidationError as e:
473
+ if self._all_values_none(j):
474
+ logger.debug("Failed to create input for %s: %s", name, e)
475
+ obj = None
476
+ else:
477
+ raise
478
+ objs.append(obj)
467
479
  else:
468
480
  objs.append(row[pos])
469
481
  pos += 1
470
482
  return objs
471
483
 
484
+ @staticmethod
485
+ def _all_values_none(value: Any) -> bool:
486
+ if isinstance(value, dict):
487
+ return all(SignalSchema._all_values_none(v) for v in value.values())
488
+ if isinstance(value, (list, tuple, set)):
489
+ return all(SignalSchema._all_values_none(v) for v in value)
490
+ if isinstance(value, float):
491
+ # NaN is used to represent NULL and NaN float values in datachain
492
+ # Since SQLite does not have a separate NULL type, we need to check for NaN
493
+ return math.isnan(value) or value is None
494
+ return value is None
495
+
472
496
  def get_file_signal(self) -> Optional[str]:
473
497
  for signal_name, signal_type in self.values.items():
474
498
  if (fr := ModelStore.to_pydantic(signal_type)) is not None and issubclass(
@@ -546,8 +570,15 @@ class SignalSchema:
546
570
  pos += 1
547
571
  else:
548
572
  json, pos = unflatten_to_json_pos(fr, row, pos) # type: ignore[union-attr]
549
- obj = fr(**json)
550
- SignalSchema._set_file_stream(obj, catalog, cache)
573
+ try:
574
+ obj = fr(**json)
575
+ SignalSchema._set_file_stream(obj, catalog, cache)
576
+ except ValidationError as e:
577
+ if self._all_values_none(json):
578
+ logger.debug("Failed to create feature for %s: %s", fr_cls, e)
579
+ obj = None
580
+ else:
581
+ raise
551
582
  res.append(obj)
552
583
  return res
553
584
 
@@ -1009,7 +1009,9 @@ class SQLJoin(Step):
1009
1009
  rname: str
1010
1010
 
1011
1011
  def hash_inputs(self) -> str:
1012
- predicates = ensure_sequence(self.predicates or [])
1012
+ predicates = (
1013
+ ensure_sequence(self.predicates) if self.predicates is not None else []
1014
+ )
1013
1015
 
1014
1016
  parts = [
1015
1017
  bytes.fromhex(self.query1.hash()),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.3
3
+ Version: 0.34.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -90,7 +90,7 @@ datachain/lib/namespaces.py,sha256=ZyIYUa3WMrv6R5HrSoLsmLiEbvUQDl8sBINLUmWOYG0,3
90
90
  datachain/lib/projects.py,sha256=_YeU9PPcH_pC8-sbX-47XtWSdl1ltVKnALY8azWLJkM,4112
91
91
  datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
92
92
  datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,7666
93
- datachain/lib/signal_schema.py,sha256=WDFLbzXEOhgv865TePcFpLQHxsKQHtn8kTzaQGUG_XA,39479
93
+ datachain/lib/signal_schema.py,sha256=NsL2ISnSRN-lKRpXzB9CtsUj2tVKcoAe73TaaZKMT-0,40774
94
94
  datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
95
95
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
96
96
  datachain/lib/udf.py,sha256=DdUxGBo9Y7Jz6aTBKgwex7YfK1RNaGm1JUlXCqs7qnw,18122
@@ -131,7 +131,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
131
131
  datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
132
132
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
133
133
  datachain/query/batch.py,sha256=ocPeNgrJM6Y_6SYCx3O2cwlCFAhNMfoYgB99GP6A1Bg,4294
134
- datachain/query/dataset.py,sha256=P7pyRiWc9G3AfzxvyB2yToKW3bXoUCrfFOtFdiVbCrU,67836
134
+ datachain/query/dataset.py,sha256=RQLNc368vrKI6EdsugbXWFbJobl430yXV-Ks1i4sdfo,67893
135
135
  datachain/query/dispatch.py,sha256=pygp7xg3lUDKlYHhecKxW5fB3zOSX1fPJfZBU4dfijk,16067
136
136
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
137
137
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.34.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.34.3.dist-info/METADATA,sha256=l1d5np6lvB4K8ohVibIbhzlNobGtlglmBhK0VcQqV-U,13655
170
- datachain-0.34.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.34.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.34.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.34.3.dist-info/RECORD,,
168
+ datachain-0.34.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.34.4.dist-info/METADATA,sha256=pjivvNYJPbaTLyOpWYRJiaaoyC8k-LUaDl-dczGFUQc,13655
170
+ datachain-0.34.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.34.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.34.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.34.4.dist-info/RECORD,,