PyPI - datachain - Versions diffs - 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

datachain 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (47) hide show

datachain/__init__.py +0 -2
datachain/cache.py +2 -2
datachain/catalog/catalog.py +65 -180
datachain/cli/__init__.py +7 -0
datachain/cli/commands/datasets.py +28 -43
datachain/cli/commands/ls.py +2 -2
datachain/cli/parser/__init__.py +35 -1
datachain/client/fsspec.py +3 -5
datachain/client/hf.py +0 -10
datachain/client/local.py +4 -4
datachain/data_storage/metastore.py +37 -403
datachain/data_storage/sqlite.py +7 -139
datachain/data_storage/warehouse.py +7 -26
datachain/dataset.py +12 -126
datachain/delta.py +7 -11
datachain/error.py +0 -36
datachain/func/func.py +1 -1
datachain/lib/arrow.py +3 -3
datachain/lib/dataset_info.py +0 -4
datachain/lib/dc/datachain.py +92 -259
datachain/lib/dc/datasets.py +49 -87
datachain/lib/dc/listings.py +3 -3
datachain/lib/dc/records.py +0 -1
datachain/lib/dc/storage.py +40 -38
datachain/lib/file.py +23 -77
datachain/lib/listing.py +1 -3
datachain/lib/meta_formats.py +1 -1
datachain/lib/pytorch.py +1 -1
datachain/lib/settings.py +0 -10
datachain/lib/tar.py +2 -1
datachain/lib/udf_signature.py +1 -1
datachain/lib/webdataset.py +20 -30
datachain/listing.py +1 -3
datachain/query/dataset.py +46 -71
datachain/query/session.py +1 -1
datachain/remote/studio.py +26 -61
datachain/studio.py +7 -23
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/METADATA +2 -2
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/RECORD +43 -47
datachain/lib/namespaces.py +0 -71
datachain/lib/projects.py +0 -86
datachain/namespace.py +0 -65
datachain/project.py +0 -78
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/WHEEL +0 -0
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/entry_points.txt +0 -0
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/top_level.txt +0 -0

datachain/func/func.py CHANGED Viewed

@@ -25,7 +25,7 @@ if TYPE_CHECKING:
 ColT = Union[str, Column, ColumnElement, "Func", tuple]
-class Func(Function):  # noqa: PLW1641
+class Func(Function):
     """Represents a function to be applied to a column in a SQL query."""
     def __init__(

datachain/lib/arrow.py CHANGED Viewed

@@ -76,7 +76,7 @@ class ArrowGenerator(Generator):
             fs_path = file.path
             fs = ReferenceFileSystem({fs_path: [cache_path]})
         else:
-            fs, fs_path = file.get_fs(), file.get_fs_path()
+            fs, fs_path = file.get_fs(), file.get_path()
         kwargs = self.kwargs
         if format := kwargs.get("format"):
@@ -160,8 +160,8 @@ def infer_schema(chain: "DataChain", **kwargs) -> pa.Schema:
         kwargs["format"] = fix_pyarrow_format(format, parse_options)
     schemas = []
-    for (file,) in chain.to_iter("file"):
-        ds = dataset(file.get_fs_path(), filesystem=file.get_fs(), **kwargs)  # type: ignore[union-attr]
+    for file in chain.collect("file"):
+        ds = dataset(file.get_path(), filesystem=file.get_fs(), **kwargs)  # type: ignore[union-attr]
         schemas.append(ds.schema)
     if not schemas:
         raise ValueError(

datachain/lib/dataset_info.py CHANGED Viewed

@@ -22,8 +22,6 @@ if TYPE_CHECKING:
 class DatasetInfo(DataModel):
     name: str
-    namespace: str
-    project: str
     uuid: str = Field(default=str(uuid4()))
     version: str = Field(default=DEFAULT_DATASET_VERSION)
     status: int = Field(default=DatasetStatus.CREATED)
@@ -93,8 +91,6 @@ class DatasetInfo(DataModel):
         return cls(
             uuid=version.uuid,
             name=dataset.name,
-            namespace=dataset.project.namespace.name,
-            project=dataset.project.name,
             version=version.version,
             status=version.status,
             created_at=version.created_at,

datachain/lib/dc/datachain.py CHANGED Viewed

@@ -24,9 +24,8 @@ from pydantic import BaseModel
 from tqdm import tqdm
 from datachain import semver
-from datachain.dataset import DatasetRecord, parse_dataset_name
+from datachain.dataset import DatasetRecord
 from datachain.delta import delta_disabled
-from datachain.error import ProjectCreateNotAllowedError, ProjectNotFoundError
 from datachain.func import literal
 from datachain.func.base import Function
 from datachain.func.func import Func
@@ -38,7 +37,6 @@ from datachain.lib.file import (
     FileExporter,
 )
 from datachain.lib.file import ExportPlacement as FileExportPlacement
-from datachain.lib.projects import get as get_project
 from datachain.lib.settings import Settings
 from datachain.lib.signal_schema import SignalSchema
 from datachain.lib.udf import Aggregator, BatchMapper, Generator, Mapper, UDFBase
@@ -263,7 +261,7 @@ class DataChain:
         """Underlying dataset, if there is one."""
         if not self.name:
             return None
-        return self.session.catalog.get_dataset(self.name, self._query.project)
+        return self.session.catalog.get_dataset(self.name)
     def __or__(self, other: "Self") -> "Self":
         """Return `self.union(other)`."""
@@ -314,8 +312,6 @@ class DataChain:
         min_task_size=None,
         prefetch: Optional[int] = None,
         sys: Optional[bool] = None,
-        namespace: Optional[str] = None,
-        project: Optional[str] = None,
     ) -> "Self":
         """Change settings for chain.
@@ -331,8 +327,6 @@ class DataChain:
             prefetch: number of workers to use for downloading files in advance.
                       This is enabled by default and uses 2 workers.
                       To disable prefetching, set it to 0.
-            namespace: namespace name.
-            project: project name.
         Example:
             ```py
@@ -346,11 +340,7 @@ class DataChain:
         if sys is None:
             sys = self._sys
         settings = copy.copy(self._settings)
-        settings.add(
-            Settings(
-                cache, parallel, workers, min_task_size, prefetch, namespace, project
-            )
-        )
+        settings.add(Settings(cache, parallel, workers, min_task_size, prefetch))
         return self._evolve(settings=settings, _sys=sys)
     def reset_settings(self, settings: Optional[Settings] = None) -> "Self":
@@ -440,10 +430,10 @@ class DataChain:
         from datachain.lib.arrow import schema_to_output
-        json_values = self.limit(schema_sample_size).to_list(col)
+        json_values = list(self.limit(schema_sample_size).collect(col))
         json_dicts = [
             json.loads(json_value) if isinstance(json_value, str) else json_value
-            for (json_value,) in json_values
+            for json_value in json_values
         ]
         if any(not isinstance(json_dict, dict) for json_dict in json_dicts):
@@ -500,22 +490,6 @@ class DataChain:
         )
         return listings(*args, **kwargs)
-    @property
-    def namespace_name(self) -> str:
-        """Current namespace name in which the chain is running"""
-        return (
-            self._settings.namespace
-            or self.session.catalog.metastore.default_namespace_name
-        )
-    @property
-    def project_name(self) -> str:
-        """Current project name in which the chain is running"""
-        return (
-            self._settings.project
-            or self.session.catalog.metastore.default_project_name
-        )
     def persist(self) -> "Self":
         """Saves temporary chain that will be removed after the process ends.
         Temporary datasets are useful for optimization, for example when we have
@@ -525,12 +499,7 @@ class DataChain:
         It returns the chain itself.
         """
         schema = self.signals_schema.clone_without_sys_signals().serialize()
-        project = get_project(
-            self.project_name, self.namespace_name, session=self.session
-        )
-        return self._evolve(
-            query=self._query.save(project=project, feature_schema=schema)
-        )
+        return self._evolve(query=self._query.save(feature_schema=schema))
     def save(  # type: ignore[override]
         self,
@@ -544,10 +513,7 @@ class DataChain:
         """Save to a Dataset. It returns the chain itself.
         Parameters:
-            name : dataset name. It can be full name consisting of namespace and
-                project, but it can also be just a regular dataset name in which
-                case we are taking namespace and project from settings, if they
-                are defined there, or default ones instead.
+            name : dataset name.
             version : version of a dataset. If version is not specified and dataset
                 already exists, version patch increment will happen e.g 1.2.1 -> 1.2.2.
             description : description of a dataset.
@@ -569,29 +535,6 @@ class DataChain:
                 " patch"
             )
-        namespace_name, project_name, name = parse_dataset_name(name)
-        namespace_name = (
-            namespace_name
-            or self._settings.namespace
-            or self.session.catalog.metastore.default_namespace_name
-        )
-        project_name = (
-            project_name
-            or self._settings.project
-            or self.session.catalog.metastore.default_project_name
-        )
-        try:
-            project = self.session.catalog.metastore.get_project(
-                project_name,
-                namespace_name,
-                create=self.session.catalog.metastore.project_allowed_to_create,
-            )
-        except ProjectNotFoundError as e:
-            # not being able to create it as creation is not allowed
-            raise ProjectCreateNotAllowedError("Creating project is not allowed") from e
         schema = self.signals_schema.clone_without_sys_signals().serialize()
         # Handle retry and delta functionality
@@ -615,7 +558,6 @@ class DataChain:
                     query=result_ds._query.save(
                         name=name,
                         version=version,
-                        project=project,
                         feature_schema=schema,
                         dependencies=dependencies,
                         **kwargs,
@@ -635,7 +577,6 @@ class DataChain:
             query=self._query.save(
                 name=name,
                 version=version,
-                project=project,
                 description=description,
                 attrs=attrs,
                 feature_schema=schema,
@@ -902,7 +843,7 @@ class DataChain:
             Order is not guaranteed when steps are added after an `order_by` statement.
             I.e. when using `read_dataset` an `order_by` statement should be used if
             the order of the records in the chain is important.
-            Using `order_by` directly before `limit`, `to_list` and similar methods
+            Using `order_by` directly before `limit`, `collect` and `collect_flatten`
             will give expected results.
             See https://github.com/iterative/datachain/issues/477 for further details.
         """
@@ -1107,32 +1048,32 @@ class DataChain:
     @property
     def _effective_signals_schema(self) -> "SignalSchema":
-        """Effective schema used for user-facing API like to_list, to_pandas, etc."""
+        """Effective schema used for user-facing API like collect, to_pandas, etc."""
         signals_schema = self.signals_schema
         if not self._sys:
             return signals_schema.clone_without_sys_signals()
         return signals_schema
     @overload
-    def _leaf_values(self) -> Iterator[tuple[Any, ...]]: ...
+    def collect_flatten(self) -> Iterator[tuple[Any, ...]]: ...
     @overload
-    def _leaf_values(self, *, include_hidden: bool) -> Iterator[tuple[Any, ...]]: ...
+    def collect_flatten(self, *, include_hidden: bool) -> Iterator[tuple[Any, ...]]: ...
     @overload
-    def _leaf_values(
+    def collect_flatten(
         self, *, row_factory: Callable[[list[str], tuple[Any, ...]], _T]
     ) -> Iterator[_T]: ...
     @overload
-    def _leaf_values(
+    def collect_flatten(
         self,
         *,
         row_factory: Callable[[list[str], tuple[Any, ...]], _T],
         include_hidden: bool,
     ) -> Iterator[_T]: ...
-    def _leaf_values(self, *, row_factory=None, include_hidden: bool = True):
+    def collect_flatten(self, *, row_factory=None, include_hidden: bool = True):
         """Yields flattened rows of values as a tuple.
         Args:
@@ -1160,7 +1101,7 @@ class DataChain:
         headers, _ = self._effective_signals_schema.get_headers_with_length()
         column_names = [".".join(filter(None, header)) for header in headers]
-        results_iter = self._leaf_values()
+        results_iter = self.collect_flatten()
         def column_chunks() -> Iterator[list[list[Any]]]:
             for chunk_iter in batched_it(results_iter, chunk_size):
@@ -1193,9 +1134,9 @@ class DataChain:
     def results(self, *, row_factory=None, include_hidden=True):
         if row_factory is None:
-            return list(self._leaf_values(include_hidden=include_hidden))
+            return list(self.collect_flatten(include_hidden=include_hidden))
         return list(
-            self._leaf_values(row_factory=row_factory, include_hidden=include_hidden)
+            self.collect_flatten(row_factory=row_factory, include_hidden=include_hidden)
         )
     def to_records(self) -> list[dict[str, Any]]:
@@ -1206,38 +1147,42 @@ class DataChain:
         return self.results(row_factory=to_dict)
-    def to_iter(self, *cols: str) -> Iterator[tuple[DataValue, ...]]:
+    @overload
+    def collect(self) -> Iterator[tuple[DataValue, ...]]: ...
+    @overload
+    def collect(self, col: str) -> Iterator[DataValue]: ...
+    @overload
+    def collect(self, *cols: str) -> Iterator[tuple[DataValue, ...]]: ...
+    def collect(self, *cols: str) -> Iterator[Union[DataValue, tuple[DataValue, ...]]]:  # type: ignore[overload-overlap,misc]
         """Yields rows of values, optionally limited to the specified columns.
         Args:
             *cols: Limit to the specified columns. By default, all columns are selected.
         Yields:
-            (tuple[DataType, ...]): Yields a tuple of items for each row.
+            (DataType): Yields a single item if a column is selected.
+            (tuple[DataType, ...]): Yields a tuple of items if multiple columns are
+                selected.
         Example:
             Iterating over all rows:
             ```py
-            for row in ds.to_iter():
-                print(row)
-            ```
-            DataChain is iterable and can be used in a for loop directly which is
-            equivalent to `ds.to_iter()`:
-            ```py
-            for row in ds:
+            for row in dc.collect():
                 print(row)
             ```
             Iterating over all rows with selected columns:
             ```py
-            for name, size in ds.to_iter("file.path", "file.size"):
+            for name, size in dc.collect("file.path", "file.size"):
                 print(name, size)
             ```
             Iterating over a single column:
             ```py
-            for (file,) in ds.to_iter("file.path"):
+            for file in dc.collect("file.path"):
                 print(file)
             ```
         """
@@ -1249,31 +1194,7 @@ class DataChain:
                 ret = signals_schema.row_to_features(
                     row, catalog=chain.session.catalog, cache=chain._settings.cache
                 )
-                yield tuple(ret)
-    @overload
-    def collect(self) -> Iterator[tuple[DataValue, ...]]: ...
-    @overload
-    def collect(self, col: str) -> Iterator[DataValue]: ...
-    @overload
-    def collect(self, *cols: str) -> Iterator[tuple[DataValue, ...]]: ...
-    def collect(self, *cols: str) -> Iterator[Union[DataValue, tuple[DataValue, ...]]]:  # type: ignore[overload-overlap,misc]
-        """
-        Deprecated. Use `to_iter` method instead.
-        """
-        warnings.warn(
-            "Method `collect` is deprecated. Use `to_iter` method instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if len(cols) == 1:
-            yield from [item[0] for item in self.to_iter(*cols)]
-        else:
-            yield from self.to_iter(*cols)
+                yield ret[0] if len(cols) == 1 else tuple(ret)
     def to_pytorch(
         self,
@@ -1508,7 +1429,7 @@ class DataChain:
             )
         return self._evolve(query=self._query.subtract(other._query, signals))  # type: ignore[arg-type]
-    def diff(
+    def compare(
         self,
         other: "DataChain",
         on: Union[str, Sequence[str]],
@@ -1521,33 +1442,41 @@ class DataChain:
         same: bool = False,
         status_col: Optional[str] = None,
     ) -> "DataChain":
-        """Calculate differences between two chains.
-        This method identifies records that are added, deleted, modified, or unchanged
-        between two chains. It adds a status column with values: A=added, D=deleted,
-        M=modified, S=same.
+        """Comparing two chains by identifying rows that are added, deleted, modified
+        or same. Result is the new chain that has additional column with possible
+        values: `A`, `D`, `M`, `U` representing added, deleted, modified and same
+        rows respectively. Note that if only one "status" is asked, by setting proper
+        flags, this additional column is not created as it would have only one value
+        for all rows. Beside additional diff column, new chain has schema of the chain
+        on which method was called.
         Parameters:
-            other: Chain to compare against.
-            on: Column(s) to match records between chains.
-            right_on: Column(s) in the other chain to match against. Defaults to `on`.
-            compare: Column(s) to check for changes.
-                     If not specified,all columns are used.
-            right_compare: Column(s) in the other chain to compare against.
-                     Defaults to values of `compare`.
-            added (bool): Include records that exist in this chain but not in the other.
-            deleted (bool): Include records that exist only in the other chain.
-            modified (bool): Include records that exist in both
-                     but have different values.
-            same (bool): Include records that are identical in both chains.
-            status_col (str): Name for the status column showing differences.
-        Default behavior: By default, shows added, deleted, and modified records,
-        but excludes unchanged records (same=False). Status column is not created.
+            other: Chain to calculate diff from.
+            on: Column or list of columns to match on. If both chains have the
+                same columns then this column is enough for the match. Otherwise,
+                `right_on` parameter has to specify the columns for the other chain.
+                This value is used to find corresponding row in other dataset. If not
+                found there, row is considered as added (or removed if vice versa), and
+                if found then row can be either modified or same.
+            right_on: Optional column or list of columns
+                for the `other` to match.
+            compare: Column or list of columns to compare on. If both chains have
+                the same columns then this column is enough for the compare. Otherwise,
+                `right_compare` parameter has to specify the columns for the other
+                chain. This value is used to see if row is modified or same. If
+                not set, all columns will be used for comparison
+            right_compare: Optional column or list of columns
+                    for the `other` to compare to.
+            added (bool): Whether to return added rows in resulting chain.
+            deleted (bool): Whether to return deleted rows in resulting chain.
+            modified (bool): Whether to return modified rows in resulting chain.
+            same (bool): Whether to return unchanged rows in resulting chain.
+            status_col (str): Name of the new column that is created in resulting chain
+                representing diff status.
         Example:
             ```py
-            res = persons.diff(
+            res = persons.compare(
                 new_persons,
                 on=["id"],
                 right_on=["other_id"],
@@ -1576,7 +1505,7 @@ class DataChain:
             status_col=status_col,
         )
-    def file_diff(
+    def diff(
         self,
         other: "DataChain",
         on: str = "file",
@@ -1587,29 +1516,31 @@ class DataChain:
         same: bool = False,
         status_col: Optional[str] = None,
     ) -> "DataChain":
-        """Calculate differences between two chains containing files.
-        This method is specifically designed for file chains. It uses file `source`
-        and `path` to match files, and file `version` and `etag` to detect changes.
+        """Similar to `.compare()`, which is more generic method to calculate difference
+        between two chains. Unlike `.compare()`, this method works only on those chains
+        that have `File` object, or it's derivatives, in it. File `source` and `path`
+        are used for matching, and file `version` and `etag` for comparing, while in
+        `.compare()` user needs to provide arbitrary columns for matching and comparing.
         Parameters:
-            other: Chain to compare against.
-            on: File column name in this chain. Default is "file".
-            right_on: File column name in the other chain. Defaults to `on`.
-            added (bool): Include files that exist in this chain but not in the other.
-            deleted (bool): Include files that exist only in the other chain.
-            modified (bool): Include files that exist in both but have different
-                             versions/etags.
-            same (bool): Include files that are identical in both chains.
-            status_col (str): Name for the status column showing differences
-                              (A=added, D=deleted, M=modified, S=same).
-        Default behavior: By default, includes only new files (added=True and
-        modified=True). This is useful for incremental processing.
+            other: Chain to calculate diff from.
+            on: File signal to match on. If both chains have the
+                same file signal then this column is enough for the match. Otherwise,
+                `right_on` parameter has to specify the file signal for the other chain.
+                This value is used to find corresponding row in other dataset. If not
+                found there, row is considered as added (or removed if vice versa), and
+                if found then row can be either modified or same.
+            right_on: Optional file signal for the `other` to match.
+            added (bool): Whether to return added rows in resulting chain.
+            deleted (bool): Whether to return deleted rows in resulting chain.
+            modified (bool): Whether to return modified rows in resulting chain.
+            same (bool): Whether to return unchanged rows in resulting chain.
+            status_col (str): Optional name of the new column that is created in
+                resulting chain representing diff status.
         Example:
             ```py
-            diff = images.file_diff(
+            diff = images.diff(
                 new_images,
                 on="file",
                 right_on="other_file",
@@ -1634,7 +1565,7 @@ class DataChain:
         compare_cols = get_file_signals(on, compare_file_signals)
         right_compare_cols = get_file_signals(right_on, compare_file_signals)
-        return self.diff(
+        return self.compare(
             other,
             on_cols,
             right_on=right_on_cols,
@@ -2046,7 +1977,7 @@ class DataChain:
         headers, _ = self._effective_signals_schema.get_headers_with_length()
         column_names = [".".join(filter(None, header)) for header in headers]
-        results_iter = self._leaf_values()
+        results_iter = self.collect_flatten()
         with opener(path, "w", newline="") as f:
             writer = csv.writer(f, delimiter=delimiter, **kwargs)
@@ -2098,7 +2029,7 @@ class DataChain:
             if include_outer_list:
                 # This makes the file JSON instead of JSON lines.
                 f.write(b"[\n")
-            for row in self._leaf_values():
+            for row in self.collect_flatten():
                 if not is_first:
                     if include_outer_list:
                         # This makes the file JSON instead of JSON lines.
@@ -2263,7 +2194,7 @@ class DataChain:
             max_threads=num_threads or 1,
             client_config=client_config,
         )
-        file_exporter.run(self.to_values(signal), progress_bar)
+        file_exporter.run(self.collect(signal), progress_bar)
     def shuffle(self) -> "Self":
         """Shuffle the rows of the chain deterministically."""
@@ -2308,45 +2239,16 @@ class DataChain:
             Combining filters with "or"
             ```py
-            dc.filter(
-                C("file.path").glob("cat*") |
-                C("file.path").glob("dog*")
-            )
-            ```
-            ```py
-            dc.filter(dc.func.or_(
-                C("file.path").glob("cat*"),
-                C("file.path").glob("dog*")
-            ))
+            dc.filter(C("file.path").glob("cat*") | C("file.path").glob("dog*))
             ```
             Combining filters with "and"
             ```py
             dc.filter(
-                C("file.path").glob("*.jpg"),
-                string.length(C("file.path")) > 5
-            )
-            ```
-            ```py
-            dc.filter(
-                C("file.path").glob("*.jpg") &
+                C("file.path").glob("*.jpg) &
                 (string.length(C("file.path")) > 5)
             )
             ```
-            ```py
-            dc.filter(dc.func.and_(
-                C("file.path").glob("*.jpg"),
-                string.length(C("file.path")) > 5
-            ))
-            ```
-            Combining filters with "not"
-            ```py
-            dc.filter(~(C("file.path").glob("*.jpg")))
-            ```
         """
         return self._evolve(query=self._query.filter(*args))
@@ -2397,72 +2299,3 @@ class DataChain:
             Use 0/3, 1/3 and 2/3, not 1/3, 2/3 and 3/3.
         """
         return self._evolve(query=self._query.chunk(index, total))
-    def to_list(self, *cols: str) -> list[tuple[DataValue, ...]]:
-        """Returns a list of rows of values, optionally limited to the specified
-        columns.
-        Args:
-            *cols: Limit to the specified columns. By default, all columns are selected.
-        Returns:
-            list[tuple[DataType, ...]]: Returns a list of tuples of items for each row.
-        Example:
-            Getting all rows as a list:
-            ```py
-            rows = dc.to_list()
-            print(rows)
-            ```
-            Getting all rows with selected columns as a list:
-            ```py
-            name_size_pairs = dc.to_list("file.path", "file.size")
-            print(name_size_pairs)
-            ```
-            Getting a single column as a list:
-            ```py
-            files = dc.to_list("file.path")
-            print(files)  # Returns list of 1-tuples
-            ```
-        """
-        return list(self.to_iter(*cols))
-    def to_values(self, col: str) -> list[DataValue]:
-        """Returns a flat list of values from a single column.
-        Args:
-            col: The name of the column to extract values from.
-        Returns:
-            list[DataValue]: Returns a flat list of values from the specified column.
-        Example:
-            Getting all values from a single column:
-            ```py
-            file_paths = dc.to_values("file.path")
-            print(file_paths)  # Returns list of strings
-            ```
-            Getting all file sizes:
-            ```py
-            sizes = dc.to_values("file.size")
-            print(sizes)  # Returns list of integers
-            ```
-        """
-        return [row[0] for row in self.to_list(col)]
-    def __iter__(self) -> Iterator[tuple[DataValue, ...]]:
-        """Make DataChain objects iterable.
-        Yields:
-            (tuple[DataValue, ...]): Yields tuples of all column values for each row.
-        Example:
-            ```py
-            for row in chain:
-                print(row)
-            ```
-        """
-        return self.to_iter()

datachain 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

Potentially problematic release.

datachain 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl