datachain 0.37.1__py3-none-any.whl → 0.37.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/delta.py +3 -34
- datachain/lib/dc/datachain.py +2 -3
- datachain/lib/dc/datasets.py +4 -0
- datachain/lib/dc/storage.py +6 -9
- datachain/query/dataset.py +1 -1
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/METADATA +1 -1
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/RECORD +11 -11
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/WHEEL +0 -0
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/entry_points.txt +0 -0
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.37.1.dist-info → datachain-0.37.2.dist-info}/top_level.txt +0 -0
datachain/delta.py
CHANGED
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import hashlib
|
|
2
1
|
from collections.abc import Sequence
|
|
3
2
|
from copy import copy
|
|
4
3
|
from functools import wraps
|
|
5
4
|
from typing import TYPE_CHECKING, TypeVar
|
|
6
5
|
|
|
7
|
-
from attrs import frozen
|
|
8
|
-
|
|
9
6
|
import datachain
|
|
10
7
|
from datachain.dataset import DatasetDependency, DatasetRecord
|
|
11
8
|
from datachain.error import DatasetNotFoundError
|
|
12
9
|
from datachain.project import Project
|
|
13
|
-
from datachain.query.dataset import Step, step_result
|
|
14
10
|
|
|
15
11
|
if TYPE_CHECKING:
|
|
16
12
|
from collections.abc import Callable
|
|
@@ -18,9 +14,7 @@ if TYPE_CHECKING:
|
|
|
18
14
|
|
|
19
15
|
from typing_extensions import ParamSpec
|
|
20
16
|
|
|
21
|
-
from datachain.catalog import Catalog
|
|
22
17
|
from datachain.lib.dc import DataChain
|
|
23
|
-
from datachain.query.dataset import QueryGenerator
|
|
24
18
|
|
|
25
19
|
P = ParamSpec("P")
|
|
26
20
|
|
|
@@ -49,38 +43,11 @@ def delta_disabled(
|
|
|
49
43
|
return _inner
|
|
50
44
|
|
|
51
45
|
|
|
52
|
-
@frozen
|
|
53
|
-
class _RegenerateSystemColumnsStep(Step):
|
|
54
|
-
catalog: "Catalog"
|
|
55
|
-
|
|
56
|
-
def hash_inputs(self) -> str:
|
|
57
|
-
return hashlib.sha256(b"regenerate_sys_columns").hexdigest()
|
|
58
|
-
|
|
59
|
-
def apply(self, query_generator: "QueryGenerator", temp_tables: list[str]):
|
|
60
|
-
selectable = query_generator.select()
|
|
61
|
-
regenerated = self.catalog.warehouse._regenerate_system_columns(
|
|
62
|
-
selectable,
|
|
63
|
-
keep_existing_columns=True,
|
|
64
|
-
regenerate_columns=None,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
def q(*columns):
|
|
68
|
-
return regenerated.with_only_columns(*columns)
|
|
69
|
-
|
|
70
|
-
return step_result(q, regenerated.selected_columns)
|
|
71
|
-
|
|
72
|
-
|
|
73
46
|
def _append_steps(dc: "DataChain", other: "DataChain"):
|
|
74
47
|
"""Returns cloned chain with appended steps from other chain.
|
|
75
48
|
Steps are all those modification methods applied like filters, mappers etc.
|
|
76
49
|
"""
|
|
77
50
|
dc = dc.clone()
|
|
78
|
-
dc._query.steps.append(
|
|
79
|
-
_RegenerateSystemColumnsStep(
|
|
80
|
-
catalog=dc.session.catalog,
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
|
|
84
51
|
dc._query.steps += other._query.steps.copy()
|
|
85
52
|
dc.signals_schema = other.signals_schema
|
|
86
53
|
return dc
|
|
@@ -150,7 +117,9 @@ def _get_retry_chain(
|
|
|
150
117
|
error_records = result_dataset.filter(C(delta_retry) != "")
|
|
151
118
|
error_source_records = source_dc.merge(
|
|
152
119
|
error_records, on=on, right_on=right_on, inner=True
|
|
153
|
-
).select(
|
|
120
|
+
).select(
|
|
121
|
+
*list(source_dc.signals_schema.clone_without_sys_signals().values.keys())
|
|
122
|
+
)
|
|
154
123
|
retry_chain = error_source_records
|
|
155
124
|
|
|
156
125
|
# Handle missing records if delta_retry is True
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -1697,14 +1697,13 @@ class DataChain:
|
|
|
1697
1697
|
query.feature_schema = None
|
|
1698
1698
|
ds = self._evolve(query=query)
|
|
1699
1699
|
|
|
1700
|
+
# Note: merge drops sys signals from both sides, make sure to not include it
|
|
1701
|
+
# in the resulting schema
|
|
1700
1702
|
signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1701
1703
|
right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
|
|
1702
1704
|
|
|
1703
1705
|
ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
|
|
1704
1706
|
|
|
1705
|
-
if not full:
|
|
1706
|
-
ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
|
|
1707
|
-
|
|
1708
1707
|
return ds
|
|
1709
1708
|
|
|
1710
1709
|
@delta_disabled
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -200,6 +200,10 @@ def read_dataset(
|
|
|
200
200
|
signals_schema |= SignalSchema.deserialize(query.feature_schema)
|
|
201
201
|
else:
|
|
202
202
|
signals_schema |= SignalSchema.from_column_types(query.column_types or {})
|
|
203
|
+
|
|
204
|
+
if delta:
|
|
205
|
+
signals_schema = signals_schema.clone_without_sys_signals()
|
|
206
|
+
|
|
203
207
|
chain = DataChain(query, _settings, signals_schema)
|
|
204
208
|
|
|
205
209
|
if delta:
|
datachain/lib/dc/storage.py
CHANGED
|
@@ -187,6 +187,12 @@ def read_storage(
|
|
|
187
187
|
project=listing_project_name,
|
|
188
188
|
session=session,
|
|
189
189
|
settings=settings,
|
|
190
|
+
delta=delta,
|
|
191
|
+
delta_on=delta_on,
|
|
192
|
+
delta_result_on=delta_result_on,
|
|
193
|
+
delta_compare=delta_compare,
|
|
194
|
+
delta_retry=delta_retry,
|
|
195
|
+
delta_unsafe=delta_unsafe,
|
|
190
196
|
)
|
|
191
197
|
dc._query.update = update
|
|
192
198
|
dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
|
|
@@ -252,13 +258,4 @@ def read_storage(
|
|
|
252
258
|
|
|
253
259
|
assert storage_chain is not None
|
|
254
260
|
|
|
255
|
-
if delta:
|
|
256
|
-
storage_chain = storage_chain._as_delta(
|
|
257
|
-
on=delta_on,
|
|
258
|
-
right_on=delta_result_on,
|
|
259
|
-
compare=delta_compare,
|
|
260
|
-
delta_retry=delta_retry,
|
|
261
|
-
delta_unsafe=delta_unsafe,
|
|
262
|
-
)
|
|
263
|
-
|
|
264
261
|
return storage_chain
|
datachain/query/dataset.py
CHANGED
|
@@ -1065,7 +1065,7 @@ class SQLJoin(Step):
|
|
|
1065
1065
|
q1 = self.get_query(self.query1, temp_tables)
|
|
1066
1066
|
q2 = self.get_query(self.query2, temp_tables)
|
|
1067
1067
|
|
|
1068
|
-
q1_columns = _drop_system_columns(q1.c)
|
|
1068
|
+
q1_columns = _drop_system_columns(q1.c)
|
|
1069
1069
|
q1_column_names = {c.name for c in q1_columns}
|
|
1070
1070
|
|
|
1071
1071
|
q2_columns = []
|
|
@@ -5,7 +5,7 @@ datachain/cache.py,sha256=Klkc7iL_KvryeZk-UNjtByTFk7URbpb60XblalqHoYI,3604
|
|
|
5
5
|
datachain/checkpoint.py,sha256=AOMqN_2fNuEBJDAsmc-P4L7FU444eQxTU4MCgr-XEH8,1121
|
|
6
6
|
datachain/config.py,sha256=KPXef6P4NAZiEbSDMUcFwuNVTul2fZBs5xrCbyRl6Tg,4193
|
|
7
7
|
datachain/dataset.py,sha256=PQwgeFPmEyN8xucaU41q371VJ1EAFXdMVbeQOVeCPFQ,24995
|
|
8
|
-
datachain/delta.py,sha256=
|
|
8
|
+
datachain/delta.py,sha256=8Bj7v4NQbH2ufNwAm1wYWrA-7vGJuLFVd4Mie1mowQs,10711
|
|
9
9
|
datachain/error.py,sha256=P_5KXlfVIsW4E42JJCoFhGsgvY8la-6jXBEWbHbgqKo,1846
|
|
10
10
|
datachain/hash_utils.py,sha256=FHzZS8WC4Qr_e-kZeQlfl-ilZ78IXWxj-xMZOqm8Ies,4455
|
|
11
11
|
datachain/job.py,sha256=vCcHJHKRo5uZTpmUYt_1oVkeawFF0x8jbnm-XZYaKfI,1358
|
|
@@ -109,15 +109,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=Sxj0ojeMSpAwM_NNoXa1dMR_2L_cQ6X
|
|
|
109
109
|
datachain/lib/dc/__init__.py,sha256=UrUzmDH6YyVl8fxM5iXTSFtl5DZTUzEYm1MaazK4vdQ,900
|
|
110
110
|
datachain/lib/dc/csv.py,sha256=fIfj5-2Ix4z5D5yZueagd5WUWw86pusJ9JJKD-U3KGg,4407
|
|
111
111
|
datachain/lib/dc/database.py,sha256=Wqob3dQc9Mol_0vagzVEXzteCKS9M0E3U5130KVmQKg,14629
|
|
112
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
113
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
112
|
+
datachain/lib/dc/datachain.py,sha256=RYhinLQ6CMU3tudLpiJGh-vfCL24KDKbKM3Q1EsWoAE,104072
|
|
113
|
+
datachain/lib/dc/datasets.py,sha256=oY1t8QBAaZdhjwR439zZT74hMOspewVCrgdwy6juXng,15321
|
|
114
114
|
datachain/lib/dc/hf.py,sha256=FeruEO176L2qQ1Mnx0QmK4kV0GuQ4xtj717N8fGJrBI,2849
|
|
115
115
|
datachain/lib/dc/json.py,sha256=iJ6G0jwTKz8xtfh1eICShnWk_bAMWjF5bFnOXLHaTlw,2683
|
|
116
116
|
datachain/lib/dc/listings.py,sha256=0XTZERQZ2ErP3LSVg9lF9i3alKebqA1Kip2Zf15unUM,4507
|
|
117
117
|
datachain/lib/dc/pandas.py,sha256=o9rTcZf27-3mCEaDdX1ZzM0I4bSOsu-4mA2zK6rWoS4,1460
|
|
118
118
|
datachain/lib/dc/parquet.py,sha256=wa_VazXotY5RZ8ypC0_M9Qo30tamzXmYeVE6P-NcQ1Y,2375
|
|
119
119
|
datachain/lib/dc/records.py,sha256=WvbaLhMqM9e54gJLLeG54QX5ZXkkBIK3FokojLTSbZc,2974
|
|
120
|
-
datachain/lib/dc/storage.py,sha256=
|
|
120
|
+
datachain/lib/dc/storage.py,sha256=zfVMkYqwmhI4bnOqyO6bW5gg_DfdYPM7ltWLTHDjGZo,9737
|
|
121
121
|
datachain/lib/dc/storage_pattern.py,sha256=TqaDb5yq050W9IxpESz9iotjs0R__i5ngRtVo5BmJ-8,7645
|
|
122
122
|
datachain/lib/dc/utils.py,sha256=kTpzS8CBQmle1A0XZzu4b5zAHo8piFiBSP1lS8ztkQU,4088
|
|
123
123
|
datachain/lib/dc/values.py,sha256=-EI3xYUNzfwzogbW8WdHX0XbWev-je6_5-CnDsLRcF4,1399
|
|
@@ -132,7 +132,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
|
|
|
132
132
|
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
133
133
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
134
134
|
datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
|
|
135
|
-
datachain/query/dataset.py,sha256=
|
|
135
|
+
datachain/query/dataset.py,sha256=vYSbtpVaobIC7uCWdx-wgPOh9_T8oRQoVw1ZC_9ArKE,67527
|
|
136
136
|
datachain/query/dispatch.py,sha256=Tg73zB6vDnYYYAvtlS9l7BI3sI1EfRCbDjiasvNxz2s,16385
|
|
137
137
|
datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
|
|
138
138
|
datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
|
|
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
165
165
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
166
166
|
datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
|
|
167
167
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
168
|
-
datachain-0.37.
|
|
169
|
-
datachain-0.37.
|
|
170
|
-
datachain-0.37.
|
|
171
|
-
datachain-0.37.
|
|
172
|
-
datachain-0.37.
|
|
173
|
-
datachain-0.37.
|
|
168
|
+
datachain-0.37.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
169
|
+
datachain-0.37.2.dist-info/METADATA,sha256=TysOdteujF1qCj3q8AwEJicm-CqwufUUZapZo0ADMNQ,13762
|
|
170
|
+
datachain-0.37.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
171
|
+
datachain-0.37.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
172
|
+
datachain-0.37.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
173
|
+
datachain-0.37.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|