datachain 0.37.4__py3-none-any.whl → 0.37.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -426,8 +426,10 @@ class UDFStep(Step, ABC):
426
426
  """Method that creates a table where temp udf results will be saved"""
427
427
 
428
428
  def process_input_query(self, query: Select) -> tuple[Select, list["Table"]]:
429
- """Apply any necessary processing to the input query"""
430
- return query, []
429
+ """Materialize inputs, ensure sys columns are available, needed for checkpoints,
430
+ needed for map to work (merge results)"""
431
+ table = self.catalog.warehouse.create_pre_udf_table(query)
432
+ return sqlalchemy.select(*table.c), [table]
431
433
 
432
434
  @abstractmethod
433
435
  def create_result_query(
@@ -628,23 +630,18 @@ class UDFStep(Step, ABC):
628
630
  def apply(
629
631
  self, query_generator: QueryGenerator, temp_tables: list[str]
630
632
  ) -> "StepResult":
631
- _query = query = query_generator.select()
633
+ query, tables = self.process_input_query(query_generator.select())
634
+ _query = query
632
635
 
633
636
  # Apply partitioning if needed.
634
637
  if self.partition_by is not None:
635
- _query = query = self.catalog.warehouse._regenerate_system_columns(
636
- query_generator.select(),
637
- keep_existing_columns=True,
638
- regenerate_columns=["sys__id"],
639
- )
640
638
  partition_tbl = self.create_partitions_table(query)
641
- temp_tables.append(partition_tbl.name)
642
639
  query = query.outerjoin(
643
640
  partition_tbl,
644
641
  partition_tbl.c.sys__id == query.selected_columns.sys__id,
645
642
  ).add_columns(*partition_columns())
643
+ tables = [*tables, partition_tbl]
646
644
 
647
- query, tables = self.process_input_query(query)
648
645
  temp_tables.extend(t.name for t in tables)
649
646
  udf_table = self.create_udf_table(_query)
650
647
  temp_tables.append(udf_table.name)
@@ -675,13 +672,6 @@ class UDFSignal(UDFStep):
675
672
 
676
673
  return self.catalog.warehouse.create_udf_table(udf_output_columns)
677
674
 
678
- def process_input_query(self, query: Select) -> tuple[Select, list["Table"]]:
679
- if os.getenv("DATACHAIN_DISABLE_QUERY_CACHE", "") not in ("", "0"):
680
- return query, []
681
- table = self.catalog.warehouse.create_pre_udf_table(query)
682
- q: Select = sqlalchemy.select(*table.c)
683
- return q, [table]
684
-
685
675
  def create_result_query(
686
676
  self, udf_table, query
687
677
  ) -> tuple[QueryGeneratorFunc, list["sqlalchemy.Column"]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.4
3
+ Version: 0.37.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -132,7 +132,7 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
132
132
  datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
133
133
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
134
134
  datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
135
- datachain/query/dataset.py,sha256=vYSbtpVaobIC7uCWdx-wgPOh9_T8oRQoVw1ZC_9ArKE,67527
135
+ datachain/query/dataset.py,sha256=kfNh6B6pYSz3batUpwW_6vJ7XRLwLfC08hKOZUMjf3o,67126
136
136
  datachain/query/dispatch.py,sha256=Tg73zB6vDnYYYAvtlS9l7BI3sI1EfRCbDjiasvNxz2s,16385
137
137
  datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
138
138
  datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
165
165
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
166
  datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
167
167
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
- datachain-0.37.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
- datachain-0.37.4.dist-info/METADATA,sha256=00fv6-Lg7pJ2yBxYEaRtnMDF4C_Y5SUp5tsPwS8hUXk,13762
170
- datachain-0.37.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
- datachain-0.37.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
- datachain-0.37.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
- datachain-0.37.4.dist-info/RECORD,,
168
+ datachain-0.37.6.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.37.6.dist-info/METADATA,sha256=zBPCt_CUJzcP3rNzpykwH9v9A388r273Huo6Hp_f0Jk,13762
170
+ datachain-0.37.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.37.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.37.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.37.6.dist-info/RECORD,,