PyPI - datachain - Versions diffs - 0.34.7__py3-none-any.whl → 0.35.1__py3-none-any.whl - Mend

datachain 0.34.7py3-none-any.whl → 0.35.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (12) hide show

datachain/data_storage/warehouse.py +34 -18
datachain/lib/file.py +6 -2
datachain/query/batch.py +1 -2
datachain/query/dataset.py +12 -22
datachain/query/dispatch.py +25 -35
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/METADATA +1 -1
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/RECORD +11 -12
datachain/query/utils.py +0 -38
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/WHEEL +0 -0
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/entry_points.txt +0 -0
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/licenses/LICENSE +0 -0
{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/top_level.txt +0 -0

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -22,7 +22,6 @@ from datachain.lib.signal_schema import SignalSchema
 from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
 from datachain.query.batch import RowsOutput
 from datachain.query.schema import ColumnMeta
-from datachain.query.utils import get_query_id_column
 from datachain.sql.functions import path as pathfunc
 from datachain.sql.types import Int, SQLType
 from datachain.utils import sql_escape_like
@@ -228,7 +227,8 @@ class AbstractWarehouse(ABC, Serializable):
             while True:
                 if limit is not None:
                     limit -= num_yielded
-                    if limit == 0:
+                    num_yielded = 0
+                    if limit <= 0:
                         break
                     if limit < page_size:
                         paginated_query = paginated_query.limit(None).limit(limit)
@@ -246,32 +246,48 @@ class AbstractWarehouse(ABC, Serializable):
                     break  # no more results
                 offset += page_size
-    def _regenerate_system_columns(self, selectable):
-        """Return a SELECT that regenerates sys__id and sys__rand deterministically."""
+    def _regenerate_system_columns(
+        self,
+        selectable: sa.Select | sa.CTE,
+        keep_existing_columns: bool = False,
+    ) -> sa.Select:
+        """
+        Return a SELECT that regenerates sys__id and sys__rand deterministically.
+        If keep_existing_columns is True, existing sys__id and sys__rand columns
+        will be kept as-is if they exist in the input selectable.
+        """
         base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
+        result_columns: dict[str, sa.ColumnElement] = {}
+        for col in base.c:
+            if col.name in result_columns:
+                raise ValueError(f"Duplicate column name {col.name} in SELECT")
+            if col.name in ("sys__id", "sys__rand"):
+                if keep_existing_columns:
+                    result_columns[col.name] = col
+            else:
+                result_columns[col.name] = col
         system_types: dict[str, sa.types.TypeEngine] = {
             sys_col.name: sys_col.type
             for sys_col in self.schema.dataset_row_cls.sys_columns()
         }
-        result_columns = []
-        for col in base.c:
-            if col.name == "sys__id":
-                expr = self._system_row_number_expr()
-                expr = sa.cast(expr, system_types["sys__id"])
-                result_columns.append(expr.label("sys__id"))
-            elif col.name == "sys__rand":
-                expr = self._system_random_expr()
-                expr = sa.cast(expr, system_types["sys__rand"])
-                result_columns.append(expr.label("sys__rand"))
-            else:
-                result_columns.append(col)
+        # Add missing system columns if needed
+        if "sys__id" not in result_columns:
+            expr = self._system_row_number_expr()
+            expr = sa.cast(expr, system_types["sys__id"])
+            result_columns["sys__id"] = expr.label("sys__id")
+        if "sys__rand" not in result_columns:
+            expr = self._system_random_expr()
+            expr = sa.cast(expr, system_types["sys__rand"])
+            result_columns["sys__rand"] = expr.label("sys__rand")
         # Wrap in subquery to materialize window functions, then wrap again in SELECT
         # This ensures window functions are computed before INSERT...FROM SELECT
-        inner = sa.select(*result_columns).select_from(base).subquery()
+        columns = list(result_columns.values())
+        inner = sa.select(*columns).select_from(base).subquery()
         return sa.select(*inner.c).select_from(inner)
     def _system_row_number_expr(self):
@@ -380,7 +396,7 @@ class AbstractWarehouse(ABC, Serializable):
         """
         Fetch dataset rows from database using a list of IDs.
         """
-        if (id_col := get_query_id_column(query)) is None:
+        if (id_col := query.selected_columns.get("sys__id")) is None:
             raise RuntimeError("sys__id column not found in query")
         query = query._clone().offset(None).limit(None).order_by(None)

datachain/lib/file.py CHANGED Viewed

@@ -681,7 +681,7 @@ class File(DataModel):
             normalized_path = self.get_path_normalized()
             info = client.fs.info(client.get_full_path(normalized_path))
             converted_info = client.info_to_file(info, normalized_path)
-            return type(self)(
+            res = type(self)(
                 path=self.path,
                 source=self.source,
                 size=converted_info.size,
@@ -691,6 +691,8 @@ class File(DataModel):
                 last_modified=converted_info.last_modified,
                 location=self.location,
             )
+            res._set_stream(self._catalog)
+            return res
         except FileError as e:
             logger.warning(
                 "File error when resolving %s/%s: %s", self.source, self.path, str(e)
@@ -703,7 +705,7 @@ class File(DataModel):
                 str(e),
             )
-        return type(self)(
+        res = type(self)(
             path=self.path,
             source=self.source,
             size=0,
@@ -713,6 +715,8 @@ class File(DataModel):
             last_modified=TIME_ZERO,
             location=self.location,
         )
+        res._set_stream(self._catalog)
+        return res
     def rebase(
         self,

datachain/query/batch.py CHANGED Viewed

@@ -6,7 +6,6 @@ from collections.abc import Callable, Generator, Sequence
 import sqlalchemy as sa
 from datachain.data_storage.schema import PARTITION_COLUMN_ID
-from datachain.query.utils import get_query_column
 RowsOutputBatch = Sequence[Sequence]
 RowsOutput = Sequence | RowsOutputBatch
@@ -106,7 +105,7 @@ class Partition(BatchingStrategy):
         query: sa.Select,
         id_col: sa.ColumnElement | None = None,
     ) -> Generator[RowsOutput, None, None]:
-        if (partition_col := get_query_column(query, PARTITION_COLUMN_ID)) is None:
+        if (partition_col := query.selected_columns.get(PARTITION_COLUMN_ID)) is None:
             raise RuntimeError("partition column not found in query")
         ids_only = False

datachain/query/dataset.py CHANGED Viewed

@@ -438,6 +438,9 @@ class UDFStep(Step, ABC):
         """
     def populate_udf_table(self, udf_table: "Table", query: Select) -> None:
+        if "sys__id" not in query.selected_columns:
+            raise RuntimeError("Query must have sys__id column to run UDF")
         if (rows_total := self.catalog.warehouse.query_count(query)) == 0:
             return
@@ -580,13 +583,10 @@ class UDFStep(Step, ABC):
         """
         Create temporary table with group by partitions.
         """
-        # Check if partition_by is set, we need it to create partitions.
-        assert self.partition_by is not None
-        # Check if sys__id is in the query, we need it to be able to join
-        # the partition table with the udf table later.
-        assert any(c.name == "sys__id" for c in query.selected_columns), (
-            "Query must have sys__id column to use partitioning."
-        )
+        if self.partition_by is None:
+            raise RuntimeError("Query must have partition_by set to use partitioning")
+        if (id_col := query.selected_columns.get("sys__id")) is None:
+            raise RuntimeError("Query must have sys__id column to use partitioning")
         if isinstance(self.partition_by, (list, tuple, GeneratorType)):
             list_partition_by = list(self.partition_by)
@@ -602,7 +602,7 @@ class UDFStep(Step, ABC):
         # fill table with partitions
         cols = [
-            query.selected_columns.sys__id,
+            id_col,
             f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
         ]
         self.catalog.warehouse.db.execute(
@@ -634,21 +634,11 @@ class UDFStep(Step, ABC):
         # Apply partitioning if needed.
         if self.partition_by is not None:
-            if not any(c.name == "sys__id" for c in query.selected_columns):
-                # If sys__id is not in the query, we need to create a temp table
-                # to hold the query results, so we can join it with the
-                # partition table later.
-                columns = [
-                    c if isinstance(c, Column) else Column(c.name, c.type)
-                    for c in query.subquery().columns
-                ]
-                temp_table = self.catalog.warehouse.create_dataset_rows_table(
-                    self.catalog.warehouse.temp_table_name(),
-                    columns=columns,
+            if "sys__id" not in query.selected_columns:
+                _query = query = self.catalog.warehouse._regenerate_system_columns(
+                    query,
+                    keep_existing_columns=True,
                 )
-                temp_tables.append(temp_table.name)
-                self.catalog.warehouse.copy_table(temp_table, query)
-                _query = query = temp_table.select()
             partition_tbl = self.create_partitions_table(query)
             temp_tables.append(partition_tbl.name)

datachain/query/dispatch.py CHANGED Viewed

@@ -22,7 +22,6 @@ from datachain.query.dataset import (
 )
 from datachain.query.queue import get_from_queue, put_into_queue
 from datachain.query.udf import UdfInfo
-from datachain.query.utils import get_query_id_column
 from datachain.utils import batched, flatten, safe_closing
 if TYPE_CHECKING:
@@ -55,6 +54,9 @@ def udf_entrypoint() -> int:
     udf_info: UdfInfo = load(stdin.buffer)
     query = udf_info["query"]
+    if "sys__id" not in query.selected_columns:
+        raise RuntimeError("sys__id column is required in UDF query")
     batching = udf_info["batching"]
     is_generator = udf_info["is_generator"]
@@ -65,15 +67,16 @@ def udf_entrypoint() -> int:
     wh_cls, wh_args, wh_kwargs = udf_info["warehouse_clone_params"]
     warehouse: AbstractWarehouse = wh_cls(*wh_args, **wh_kwargs)
-    id_col = get_query_id_column(query)
     with contextlib.closing(
-        batching(warehouse.dataset_select_paginated, query, id_col=id_col)
+        batching(
+            warehouse.dataset_select_paginated,
+            query,
+            id_col=query.selected_columns.sys__id,
+        )
     ) as udf_inputs:
         try:
             UDFDispatcher(udf_info).run_udf(
                 udf_inputs,
-                ids_only=id_col is not None,
                 download_cb=download_cb,
                 processed_cb=processed_cb,
                 generated_cb=generated_cb,
@@ -147,10 +150,10 @@ class UDFDispatcher:
             self.udf_fields,
         )
-    def _run_worker(self, ids_only: bool) -> None:
+    def _run_worker(self) -> None:
         try:
             worker = self._create_worker()
-            worker.run(ids_only)
+            worker.run()
         except (Exception, KeyboardInterrupt) as e:
             if self.done_queue:
                 put_into_queue(
@@ -164,7 +167,6 @@ class UDFDispatcher:
     def run_udf(
         self,
         input_rows: Iterable["RowsOutput"],
-        ids_only: bool,
         download_cb: Callback = DEFAULT_CALLBACK,
         processed_cb: Callback = DEFAULT_CALLBACK,
         generated_cb: Callback = DEFAULT_CALLBACK,
@@ -178,9 +180,7 @@ class UDFDispatcher:
         if n_workers == 1:
             # no need to spawn worker processes if we are running in a single process
-            self.run_udf_single(
-                input_rows, ids_only, download_cb, processed_cb, generated_cb
-            )
+            self.run_udf_single(input_rows, download_cb, processed_cb, generated_cb)
         else:
             if self.buffer_size < n_workers:
                 raise RuntimeError(
@@ -189,13 +189,12 @@ class UDFDispatcher:
                 )
             self.run_udf_parallel(
-                n_workers, input_rows, ids_only, download_cb, processed_cb, generated_cb
+                n_workers, input_rows, download_cb, processed_cb, generated_cb
             )
     def run_udf_single(
         self,
         input_rows: Iterable["RowsOutput"],
-        ids_only: bool,
         download_cb: Callback = DEFAULT_CALLBACK,
         processed_cb: Callback = DEFAULT_CALLBACK,
         generated_cb: Callback = DEFAULT_CALLBACK,
@@ -204,18 +203,15 @@ class UDFDispatcher:
         # Rebuild schemas in single process too for consistency (cheap, idempotent).
         ModelStore.rebuild_all()
-        if ids_only and not self.is_batching:
+        if not self.is_batching:
             input_rows = flatten(input_rows)
         def get_inputs() -> Iterable["RowsOutput"]:
             warehouse = self.catalog.warehouse.clone()
-            if ids_only:
-                for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
-                    yield from warehouse.dataset_rows_select_from_ids(
-                        self.query, ids, self.is_batching
-                    )
-            else:
-                yield from input_rows
+            for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
+                yield from warehouse.dataset_rows_select_from_ids(
+                    self.query, ids, self.is_batching
+                )
         prefetch = udf.prefetch
         with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
@@ -249,7 +245,6 @@ class UDFDispatcher:
         self,
         n_workers: int,
         input_rows: Iterable["RowsOutput"],
-        ids_only: bool,
         download_cb: Callback = DEFAULT_CALLBACK,
         processed_cb: Callback = DEFAULT_CALLBACK,
         generated_cb: Callback = DEFAULT_CALLBACK,
@@ -258,9 +253,7 @@ class UDFDispatcher:
         self.done_queue = self.ctx.Queue()
         pool = [
-            self.ctx.Process(
-                name=f"Worker-UDF-{i}", target=self._run_worker, args=[ids_only]
-            )
+            self.ctx.Process(name=f"Worker-UDF-{i}", target=self._run_worker)
             for i in range(n_workers)
         ]
         for p in pool:
@@ -406,13 +399,13 @@ class UDFWorker:
         self.processed_cb = ProcessedCallback("processed", self.done_queue)
         self.generated_cb = ProcessedCallback("generated", self.done_queue)
-    def run(self, ids_only: bool) -> None:
+    def run(self) -> None:
         prefetch = self.udf.prefetch
         with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
             catalog = clone_catalog_with_cache(self.catalog, _cache)
             udf_results = self.udf.run(
                 self.udf_fields,
-                self.get_inputs(ids_only),
+                self.get_inputs(),
                 catalog,
                 self.cache,
                 download_cb=self.download_cb,
@@ -434,13 +427,10 @@ class UDFWorker:
             put_into_queue(self.done_queue, {"status": OK_STATUS})
             yield row
-    def get_inputs(self, ids_only: bool) -> Iterable["RowsOutput"]:
+    def get_inputs(self) -> Iterable["RowsOutput"]:
         warehouse = self.catalog.warehouse.clone()
         while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
-            if ids_only:
-                for ids in batched(batch, DEFAULT_BATCH_SIZE):
-                    yield from warehouse.dataset_rows_select_from_ids(
-                        self.query, ids, self.is_batching
-                    )
-            else:
-                yield from batch
+            for ids in batched(batch, DEFAULT_BATCH_SIZE):
+                yield from warehouse.dataset_rows_select_from_ids(
+                    self.query, ids, self.is_batching
+                )

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datachain
-Version: 0.34.7
+Version: 0.35.1
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License-Expression: Apache-2.0

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/RECORD RENAMED Viewed

@@ -57,7 +57,7 @@ datachain/data_storage/metastore.py,sha256=uh8oFO9NeYN8tosi5F2QhWpdXR8dzDyfN2rrD
 datachain/data_storage/schema.py,sha256=4FZZFgPTI9e3gUFdlm1smPdES7FHctwXQNdNfY69tj8,9807
 datachain/data_storage/serializer.py,sha256=oL8i8smyAeVUyDepk8Xhf3lFOGOEHMoZjA5GdFzvfGI,3862
 datachain/data_storage/sqlite.py,sha256=xQZ944neP57K_25HSetIy35IakAcyA0cUKVe-xeIEgQ,31168
-datachain/data_storage/warehouse.py,sha256=ENbFoTUGreRxSlOu1eeK_9hd_QWjhk9z_40e8tPLP4U,34346
+datachain/data_storage/warehouse.py,sha256=rNz2wFlFA-pyBAuy14RL6lRIFhrNEnX02c9SgGs4v58,34994
 datachain/diff/__init__.py,sha256=pixXOnbOcoxfkBvbaiDNGPhJMEyTiHb9EIFxR7QqY5A,9533
 datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -79,7 +79,7 @@ datachain/lib/audio.py,sha256=3QWQ7PHuRnen7al8EjgjWuKbRKe4SvrbWELJ1T_Cin0,7545
 datachain/lib/clip.py,sha256=nF8-N6Uz0MbAsPJBY2iXEYa3DPLo80OOer5SRNAtcGM,6149
 datachain/lib/data_model.py,sha256=H-bagx24-cLlC7ngSP6Dby4mB6kSxxV7KDiHxQjzwlg,3798
 datachain/lib/dataset_info.py,sha256=Ym7yYcGpfUmPLrfdxueijCVRP2Go6KbyuLk_fmzYgDU,3273
-datachain/lib/file.py,sha256=Z2bjhX-Y3q30c3OpYYgV1fNHdVzt33YyxAWaBbLI6mw,47379
+datachain/lib/file.py,sha256=YO4QUaZVZ0TVW9fahERZ3HJXPNXjB4oYzvLQntQYT9s,47501
 datachain/lib/hf.py,sha256=jmyqRDXdksojUJCiU_2XFSIoMzzDJAZQs9xr-sEwEJc,7281
 datachain/lib/image.py,sha256=xKyVsFKi1Shji7oluvd4Ibr3Atiz-Q0MNJhIsXeGcMI,3197
 datachain/lib/listing.py,sha256=pXRzHCUxX0b1sZrFWPN77bHY69Hrn6rFwr5IzSxuhvI,7060
@@ -130,16 +130,15 @@ datachain/model/ultralytics/bbox.py,sha256=C-aDiBhVa_ML2oERWvksRkyMU1XuYSpb6eItH
 datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigFYNZWUA,3392
 datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
 datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
-datachain/query/batch.py,sha256=98W21A3u6jYW-DpRV9HET6wS0h_FpyeZvgnly5BN4eI,4238
-datachain/query/dataset.py,sha256=G4uK02GaFsFzskyuQK4fihWy7FpI7DeXjdKKJ1bkPOQ,67981
-datachain/query/dispatch.py,sha256=rx5TBM2XzwI4Oio8Ypw5uRJMV_GJvtkNRvDlCS49mqc,16050
+datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
+datachain/query/dataset.py,sha256=lv5Ta7FjFZWQRUTz9_97oeoT5OvD62unRoNLgEueWUU,67384
+datachain/query/dispatch.py,sha256=B0sxnyN6unU8VFc35eWa_pe_TX6JfHDDbzyIQtp8AoM,15665
 datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
 datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
 datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
 datachain/query/schema.py,sha256=Cn1keXjktptAbEDbHlxSzdoCu5H6h_Vzp_DtNpMSr5w,6697
 datachain/query/session.py,sha256=lbwMDvxjZ2BS2rA9qk7MVBRzlsSrwH92yJ_waP3uvDc,6781
 datachain/query/udf.py,sha256=SLLLNLz3QmtaM04ZVTu7K6jo58I-1j5Jf7Lb4ORv4tQ,1385
-datachain/query/utils.py,sha256=UbsyU2QVJCHLnm3dAYOjacXOiwa0-tSOawwMb8SrRdY,1251
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/remote/studio.py,sha256=4voPFVDXAU6BSBHDAvB_LTYiCACA6Zr0IfYnDjrnN6s,16737
 datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
@@ -165,9 +164,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
 datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
 datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.34.7.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.34.7.dist-info/METADATA,sha256=uf1XDltCYB3tf3yoifWIkeb_N4JGpYqk5-I_vHHmqAY,13606
-datachain-0.34.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-datachain-0.34.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.34.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.34.7.dist-info/RECORD,,
+datachain-0.35.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.35.1.dist-info/METADATA,sha256=269z2Y2d1NZiTqvHExCQMAtcEcz2qYEb7RiIvvAZnKw,13606
+datachain-0.35.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+datachain-0.35.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.35.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.35.1.dist-info/RECORD,,

datachain/query/utils.py DELETED Viewed

@@ -1,38 +0,0 @@
-import sqlalchemy as sa
-ColT = sa.ColumnClause | sa.Column | sa.ColumnElement | sa.TextClause | sa.Label
-def column_name(col: ColT) -> str:
-    """Returns column name from column element."""
-    return (
-        col.name
-        if isinstance(col, (sa.ColumnClause, sa.Column, sa.Label))
-        else str(col)
-    )
-def get_query_column(query: sa.Select, name: str) -> ColT | None:
-    """Returns column element from query by name or None if column not found."""
-    return next((col for col in query.inner_columns if column_name(col) == name), None)
-def get_query_id_column(query: sa.Select) -> sa.ColumnElement | None:
-    """Returns ID column element from query or None if column not found."""
-    col = get_query_column(query, "sys__id")
-    return col if col is not None and isinstance(col, sa.ColumnElement) else None
-def select_only_columns(query: sa.Select, *names: str) -> sa.Select:
-    """Returns query selecting defined columns only."""
-    if not names:
-        return query
-    cols: list[ColT] = []
-    for name in names:
-        col = get_query_column(query, name)
-        if col is None:
-            raise ValueError(f"Column '{name}' not found in query")
-        cols.append(col)
-    return query.with_only_columns(*cols)

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datachain-0.34.7.dist-info → datachain-0.35.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.34.7__py3-none-any.whl → 0.35.1__py3-none-any.whl

Potentially problematic release.

datachain 0.34.7py3-none-any.whl → 0.35.1py3-none-any.whl