datachain 0.36.5__py3-none-any.whl → 0.36.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/delta.py CHANGED
@@ -1,12 +1,16 @@
1
+ import hashlib
1
2
  from collections.abc import Sequence
2
3
  from copy import copy
3
4
  from functools import wraps
4
5
  from typing import TYPE_CHECKING, TypeVar
5
6
 
7
+ from attrs import frozen
8
+
6
9
  import datachain
7
10
  from datachain.dataset import DatasetDependency, DatasetRecord
8
11
  from datachain.error import DatasetNotFoundError
9
12
  from datachain.project import Project
13
+ from datachain.query.dataset import Step, step_result
10
14
 
11
15
  if TYPE_CHECKING:
12
16
  from collections.abc import Callable
@@ -14,7 +18,9 @@ if TYPE_CHECKING:
14
18
 
15
19
  from typing_extensions import ParamSpec
16
20
 
21
+ from datachain.catalog import Catalog
17
22
  from datachain.lib.dc import DataChain
23
+ from datachain.query.dataset import QueryGenerator
18
24
 
19
25
  P = ParamSpec("P")
20
26
 
@@ -43,11 +49,38 @@ def delta_disabled(
43
49
  return _inner
44
50
 
45
51
 
52
+ @frozen
53
+ class _RegenerateSystemColumnsStep(Step):
54
+ catalog: "Catalog"
55
+
56
+ def hash_inputs(self) -> str:
57
+ return hashlib.sha256(b"regenerate_sys_columns").hexdigest()
58
+
59
+ def apply(self, query_generator: "QueryGenerator", temp_tables: list[str]):
60
+ selectable = query_generator.select()
61
+ regenerated = self.catalog.warehouse._regenerate_system_columns(
62
+ selectable,
63
+ keep_existing_columns=True,
64
+ regenerate_columns=None,
65
+ )
66
+
67
+ def q(*columns):
68
+ return regenerated.with_only_columns(*columns)
69
+
70
+ return step_result(q, regenerated.selected_columns)
71
+
72
+
46
73
  def _append_steps(dc: "DataChain", other: "DataChain"):
47
74
  """Returns cloned chain with appended steps from other chain.
48
75
  Steps are all those modification methods applied like filters, mappers etc.
49
76
  """
50
77
  dc = dc.clone()
78
+ dc._query.steps.append(
79
+ _RegenerateSystemColumnsStep(
80
+ catalog=dc.session.catalog,
81
+ )
82
+ )
83
+
51
84
  dc._query.steps += other._query.steps.copy()
52
85
  dc.signals_schema = other.signals_schema
53
86
  return dc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.36.5
3
+ Version: 0.36.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -5,7 +5,7 @@ datachain/cache.py,sha256=Klkc7iL_KvryeZk-UNjtByTFk7URbpb60XblalqHoYI,3604
5
5
  datachain/checkpoint.py,sha256=AOMqN_2fNuEBJDAsmc-P4L7FU444eQxTU4MCgr-XEH8,1121
6
6
  datachain/config.py,sha256=KPXef6P4NAZiEbSDMUcFwuNVTul2fZBs5xrCbyRl6Tg,4193
7
7
  datachain/dataset.py,sha256=PQwgeFPmEyN8xucaU41q371VJ1EAFXdMVbeQOVeCPFQ,24995
8
- datachain/delta.py,sha256=RJ0KtI8M7Jlrlp8sA6Zcnv6INaOrsbju-9e91b_zxlE,10634
8
+ datachain/delta.py,sha256=rqQAPlNFdjpVpRyVDJ2jA_2WJ6uWEgmZZBgIRatoefY,11585
9
9
  datachain/error.py,sha256=P_5KXlfVIsW4E42JJCoFhGsgvY8la-6jXBEWbHbgqKo,1846
10
10
  datachain/hash_utils.py,sha256=FHzZS8WC4Qr_e-kZeQlfl-ilZ78IXWxj-xMZOqm8Ies,4455
11
11
  datachain/job.py,sha256=Mix3Nc56nh1iER4p_nH2h2zIymIVknn_gD-RaKPiIz4,1326
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.36.5.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.36.5.dist-info/METADATA,sha256=ZgoI7ZqtJ43SPJ6svwlk44SgmJy0YAx4BiF_hwFYopU,13762
170
- datachain-0.36.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.36.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.36.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.36.5.dist-info/RECORD,,
168
+ datachain-0.36.6.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.36.6.dist-info/METADATA,sha256=GALJIHTdB1xuW7KltCGtl1wusxhZHgZH6IKd_ipzMr8,13762
170
+ datachain-0.36.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.36.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.36.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.36.6.dist-info/RECORD,,