datachain 0.36.2__py3-none-any.whl → 0.36.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -231,8 +231,9 @@ class DatasetDiffOperation(Step):
231
231
 
232
232
  def apply(self, query_generator, temp_tables: list[str]) -> "StepResult":
233
233
  source_query = query_generator.exclude(("sys__id",))
234
+ right_before = len(self.dq.temp_table_names)
234
235
  target_query = self.dq.apply_steps().select()
235
- temp_tables.extend(self.dq.temp_table_names)
236
+ temp_tables.extend(self.dq.temp_table_names[right_before:])
236
237
 
237
238
  # creating temp table that will hold subtract results
238
239
  temp_table_name = self.catalog.warehouse.temp_table_name()
@@ -951,10 +952,12 @@ class SQLUnion(Step):
951
952
  def apply(
952
953
  self, query_generator: QueryGenerator, temp_tables: list[str]
953
954
  ) -> StepResult:
955
+ left_before = len(self.query1.temp_table_names)
954
956
  q1 = self.query1.apply_steps().select().subquery()
955
- temp_tables.extend(self.query1.temp_table_names)
957
+ temp_tables.extend(self.query1.temp_table_names[left_before:])
958
+ right_before = len(self.query2.temp_table_names)
956
959
  q2 = self.query2.apply_steps().select().subquery()
957
- temp_tables.extend(self.query2.temp_table_names)
960
+ temp_tables.extend(self.query2.temp_table_names[right_before:])
958
961
 
959
962
  columns1 = _drop_system_columns(q1.columns)
960
963
  columns2 = _drop_system_columns(q2.columns)
@@ -1004,8 +1007,9 @@ class SQLJoin(Step):
1004
1007
  return hashlib.sha256(b"".join(parts)).hexdigest()
1005
1008
 
1006
1009
  def get_query(self, dq: "DatasetQuery", temp_tables: list[str]) -> sa.Subquery:
1010
+ temp_tables_before = len(dq.temp_table_names)
1007
1011
  query = dq.apply_steps().select()
1008
- temp_tables.extend(dq.temp_table_names)
1012
+ temp_tables.extend(dq.temp_table_names[temp_tables_before:])
1009
1013
 
1010
1014
  if not any(isinstance(step, (SQLJoin, SQLUnion)) for step in dq.steps):
1011
1015
  return query.subquery(dq.table.name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.36.2
3
+ Version: 0.36.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -76,6 +76,7 @@ Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
76
76
  Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
77
77
  Requires-Dist: fsspec>=2024.12.0; extra == "hf"
78
78
  Requires-Dist: torch<2.9.0; extra == "hf"
79
+ Requires-Dist: torchcodec<0.8.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
79
80
  Provides-Extra: video
80
81
  Requires-Dist: ffmpeg-python; extra == "video"
81
82
  Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
@@ -132,7 +132,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
132
132
  datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
133
133
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
134
134
  datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
135
- datachain/query/dataset.py,sha256=Pu8FC11VcIj8ewXJxe0mjJpr4HBr2-gvCtMk4GQCva0,67419
135
+ datachain/query/dataset.py,sha256=vyjlToDBkIglAIY9nu89rx8JOSY6GdV9nJhz0Md7MfY,67704
136
136
  datachain/query/dispatch.py,sha256=Tg73zB6vDnYYYAvtlS9l7BI3sI1EfRCbDjiasvNxz2s,16385
137
137
  datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
138
138
  datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.36.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.36.2.dist-info/METADATA,sha256=yuMCOoIfCTY3nwDSkXV8W63z3IwwCpgcA1jIrE3ba0s,13657
170
- datachain-0.36.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.36.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.36.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.36.2.dist-info/RECORD,,
168
+ datachain-0.36.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.36.4.dist-info/METADATA,sha256=BrCzzQa80tx5dIpcBIuV8D7q6A3Zw7PSLHdboLYPWvk,13762
170
+ datachain-0.36.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.36.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.36.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.36.4.dist-info/RECORD,,