PyPI - datachain - Versions diffs - 0.15.0__tar.gz → 0.16.1__tar.gz - Mend

datachain 0.15.0tar.gz → 0.16.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (355) hide show

{datachain-0.15.0/src/datachain.egg-info → datachain-0.16.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datachain
-Version: 0.15.0
+Version: 0.16.1
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License-Expression: Apache-2.0

{datachain-0.15.0 → datachain-0.16.1}/docs/examples.md RENAMED Viewed

@@ -94,7 +94,7 @@ dc.DataModel.register(MistralModel)
 chain = (
     dc
     .read_storage("gs://datachain-demo/chatbot-KiT/", type="text")
-    .filter(dc.Column("file.name").glob("*.txt"))
+    .filter(dc.Column("file.path").glob("*.txt"))
     .limit(5)
     .settings(parallel=4, cache=True)
     .map(
@@ -228,7 +228,7 @@ Here is an example from MS COCO “captions” JSON which employs separate secti
 Note how complicated the setup is. Every image is references by the name, and the metadata for this file is keyed by the “id” field. This same field is references later in the “annotations” array, which is present in JSON files describing captions and the detected instances. The categories for the instances are stored in the “categories” array.
-However, Datachain can easily parse the entire COCO structure via several reading and merging operators:
+However, DataChain can easily parse the entire COCO structure via several reading and merging operators:
 ```python
 import datachain as dc
@@ -240,7 +240,7 @@ images = dc.read_storage(images_uri)
 meta = dc.read_json(captions_uri, jmespath="images")
 captions = dc.read_json(captions_uri, jmespath="annotations")
-images_meta = images.merge(meta, on="file.name", right_on="images.file_name")
+images_meta = images.merge(meta, on="file.path", right_on="images.file_name")
 captioned_images = images_meta.merge(captions, on="images.id", right_on="annotations.image_id")
 ```
@@ -248,12 +248,12 @@ The resulting dataset has image entries as files decorated with all the metadata
 ```python
 images_with_dogs = captioned_images.filter(dc.Column("annotations.caption").glob("*dog*"))
-images_with_dogs.select("annotations", "file.name").show()
+images_with_dogs.select("annotations", "file.path").show()
 ```
 ```
    captions captions                                           captions              file
-   image_id       id                                            caption              name
+   image_id       id                                            caption              path
 0     17029   778902         a dog jumping to catch a frisbee in a yard  000000017029.jpg
 1     17029   779838   A dog jumping to catch a red frisbee in a garden  000000017029.jpg
 2     17029   781941  The dog is catching the Frisbee in mid air in ...  000000017029.jpg

{datachain-0.15.0 → datachain-0.16.1}/docs/quick-start.md RENAMED Viewed

@@ -184,7 +184,7 @@ chain = (
    .save("response")
 )
-chain.select("file.name", "status", "response.usage").show(5)
+chain.select("file.path", "status", "response.usage").show(5)
 success_rate = chain.filter(dc.Column("status") == "success").count() / chain.count()
 print(f"{100*success_rate:.1f}% dialogs were successful")
@@ -194,7 +194,7 @@ Output:
 ``` shell
 file   status      response     response          response
-name                  usage        usage             usage
+path                  usage        usage             usage
               prompt_tokens total_tokens completion_tokens
 0   1.txt  success           547          548                 1
 1  10.txt  failure          3576         3578                 2
@@ -277,7 +277,7 @@ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 chain = (
     dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
-    .map(label=lambda name: name.split(".")[0], params=["file.name"])
+    .map(label=lambda name: name.split(".")[0], params=["file.path"])
     .select("file", "label").to_pytorch(
         transform=processor.image_processor,
         tokenizer=processor.tokenizer,

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/catalog/catalog.py RENAMED Viewed

@@ -776,7 +776,7 @@ class Catalog:
         listing: Optional[bool] = False,
         uuid: Optional[str] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> "DatasetRecord":
         """
         Creates new dataset of a specific version.
@@ -794,16 +794,16 @@ class Catalog:
             dataset = self.get_dataset(name)
             default_version = dataset.next_version
-            if (description or labels) and (
-                dataset.description != description or dataset.labels != labels
+            if (description or attrs) and (
+                dataset.description != description or dataset.attrs != attrs
             ):
                 description = description or dataset.description
-                labels = labels or dataset.labels
+                attrs = attrs or dataset.attrs
                 self.update_dataset(
                     dataset,
                     description=description,
-                    labels=labels,
+                    attrs=attrs,
                 )
         except DatasetNotFoundError:
@@ -817,7 +817,7 @@ class Catalog:
                 schema=schema,
                 ignore_if_exists=True,
                 description=description,
-                labels=labels,
+                attrs=attrs,
             )
         version = version or default_version
@@ -1334,15 +1334,15 @@ class Catalog:
         name: str,
         new_name: Optional[str] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> DatasetRecord:
         update_data = {}
         if new_name:
             update_data["name"] = new_name
         if description is not None:
             update_data["description"] = description
-        if labels is not None:
-            update_data["labels"] = labels  # type: ignore[assignment]
+        if attrs is not None:
+            update_data["attrs"] = attrs  # type: ignore[assignment]
         dataset = self.get_dataset(name)
         return self.update_dataset(dataset, **update_data)

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/cli/__init__.py RENAMED Viewed

@@ -149,7 +149,7 @@ def handle_dataset_command(args, catalog):
             args.name,
             new_name=args.new_name,
             description=args.description,
-            labels=args.labels,
+            attrs=args.attrs,
             studio=args.studio,
             local=args.local,
             all=args.all,

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/cli/commands/datasets.py RENAMED Viewed

@@ -154,7 +154,7 @@ def edit_dataset(
     name: str,
     new_name: Optional[str] = None,
     description: Optional[str] = None,
-    labels: Optional[list[str]] = None,
+    attrs: Optional[list[str]] = None,
     studio: bool = False,
     local: bool = False,
     all: bool = True,
@@ -167,9 +167,9 @@ def edit_dataset(
     if all or local:
         try:
-            catalog.edit_dataset(name, new_name, description, labels)
+            catalog.edit_dataset(name, new_name, description, attrs)
         except DatasetNotFoundError:
             print("Dataset not found in local", file=sys.stderr)
     if (all or studio) and token:
-        edit_studio_dataset(team, name, new_name, description, labels)
+        edit_studio_dataset(team, name, new_name, description, attrs)

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/cli/commands/show.py RENAMED Viewed

@@ -42,8 +42,8 @@ def show(
     print("Name: ", name)
     if dataset.description:
         print("Description: ", dataset.description)
-    if dataset.labels:
-        print("Labels: ", ",".join(dataset.labels))
+    if dataset.attrs:
+        print("Attributes: ", ",".join(dataset.attrs))
     print("\n")
     show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/cli/parser/__init__.py RENAMED Viewed

@@ -217,9 +217,9 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         help="Dataset description",
     )
     parse_edit_dataset.add_argument(
-        "--labels",
+        "--attrs",
         nargs="+",
-        help="Dataset labels",
+        help="Dataset attributes",
     )
     parse_edit_dataset.add_argument(
         "--studio",

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/data_storage/metastore.py RENAMED Viewed

@@ -120,7 +120,7 @@ class AbstractMetastore(ABC, Serializable):
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -326,7 +326,7 @@ class AbstractDBMetastore(AbstractMetastore):
             Column("id", Integer, primary_key=True),
             Column("name", Text, nullable=False),
             Column("description", Text),
-            Column("labels", JSON, nullable=True),
+            Column("attrs", JSON, nullable=True),
             Column("status", Integer, nullable=False),
             Column("feature_schema", JSON, nullable=True),
             Column("created_at", DateTime(timezone=True)),
@@ -521,7 +521,7 @@ class AbstractDBMetastore(AbstractMetastore):
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
         **kwargs,  # TODO registered = True / False
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -538,7 +538,7 @@ class AbstractDBMetastore(AbstractMetastore):
             query_script=query_script,
             schema=json.dumps(schema or {}),
             description=description,
-            labels=json.dumps(labels or []),
+            attrs=json.dumps(attrs or []),
         )
         if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
             # SQLite and PostgreSQL both support 'on_conflict_do_nothing',
@@ -621,7 +621,7 @@ class AbstractDBMetastore(AbstractMetastore):
         dataset_values = {}
         for field, value in kwargs.items():
             if field in self._dataset_fields[1:]:
-                if field in ["labels", "schema"]:
+                if field in ["attrs", "schema"]:
                     values[field] = json.dumps(value) if value else None
                 else:
                     values[field] = value

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/dataset.py RENAMED Viewed

@@ -329,7 +329,7 @@ class DatasetRecord:
     id: int
     name: str
     description: Optional[str]
-    labels: list[str]
+    attrs: list[str]
     schema: dict[str, Union[SQLType, type[SQLType]]]
     feature_schema: dict
     versions: list[DatasetVersion]
@@ -357,7 +357,7 @@ class DatasetRecord:
         id: int,
         name: str,
         description: Optional[str],
-        labels: str,
+        attrs: str,
         status: int,
         feature_schema: Optional[str],
         created_at: datetime,
@@ -387,7 +387,7 @@ class DatasetRecord:
         version_schema: str,
         version_job_id: Optional[str] = None,
     ) -> "DatasetRecord":
-        labels_lst: list[str] = json.loads(labels) if labels else []
+        attrs_lst: list[str] = json.loads(attrs) if attrs else []
         schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
         version_schema_dct: dict[str, str] = (
             json.loads(version_schema) if version_schema else {}
@@ -418,7 +418,7 @@ class DatasetRecord:
             id,
             name,
             description,
-            labels_lst,
+            attrs_lst,
             cls.parse_schema(schema_dct),  # type: ignore[arg-type]
             json.loads(feature_schema) if feature_schema else {},
             [dataset_version],
@@ -562,7 +562,7 @@ class DatasetListRecord:
     id: int
     name: str
     description: Optional[str]
-    labels: list[str]
+    attrs: list[str]
     versions: list[DatasetListVersion]
     created_at: Optional[datetime] = None
@@ -572,7 +572,7 @@ class DatasetListRecord:
         id: int,
         name: str,
         description: Optional[str],
-        labels: str,
+        attrs: str,
         created_at: datetime,
         version_id: int,
         version_uuid: str,
@@ -588,7 +588,7 @@ class DatasetListRecord:
         version_query_script: Optional[str],
         version_job_id: Optional[str] = None,
     ) -> "DatasetListRecord":
-        labels_lst: list[str] = json.loads(labels) if labels else []
+        attrs_lst: list[str] = json.loads(attrs) if attrs else []
         dataset_version = DatasetListVersion.parse(
             version_id,
@@ -610,7 +610,7 @@ class DatasetListRecord:
             id,
             name,
             description,
-            labels_lst,
+            attrs_lst,
             [dataset_version],
             created_at,
         )

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/func/aggregate.py RENAMED Viewed

@@ -165,7 +165,7 @@ def any_value(col: str) -> Func:
     Example:
         ```py
         dc.group_by(
-            file_example=func.any_value("file.name"),
+            file_example=func.any_value("file.path"),
             partition_by="signal.category",
         )
         ```
@@ -227,7 +227,7 @@ def concat(col: str, separator="") -> Func:
     Example:
         ```py
         dc.group_by(
-            files=func.concat("file.name", separator=", "),
+            files=func.concat("file.path", separator=", "),
             partition_by="signal.category",
         )
         ```
@@ -343,7 +343,7 @@ def first(col: str) -> Func:
         ```py
         window = func.window(partition_by="signal.category", order_by="created_at")
         dc.mutate(
-            first_file=func.first("file.name").over(window),
+            first_file=func.first("file.path").over(window),
         )
         ```

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/convert/values_to_tuples.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import itertools
 from collections.abc import Sequence
-from typing import Any, Optional, Union
+from typing import Any, Union
 from datachain.lib.data_model import (
     DataType,
@@ -71,14 +71,13 @@ def values_to_tuples(  # noqa: C901, PLR0912
             # If a non-None value appears early, it won't check the remaining items for
             # `None` values.
             try:
-                pos, first_not_none_element = next(
-                    itertools.dropwhile(lambda pair: pair[1] is None, enumerate(v))
+                first_not_none_element = next(
+                    itertools.dropwhile(lambda i: i is None, v)
                 )
             except StopIteration:
-                typ = str  # default to str if all values are None or has length 0
-                nullable = True
+                # set default type to `str` if column is empty or all values are `None`
+                typ = str
             else:
-                nullable = pos > 0
                 typ = type(first_not_none_element)  # type: ignore[assignment]
                 if not is_chain_type(typ):
                     raise ValuesToTupleError(
@@ -88,8 +87,7 @@ def values_to_tuples(  # noqa: C901, PLR0912
                     )
                 if isinstance(first_not_none_element, list):
                     typ = list[type(first_not_none_element[0])]  # type: ignore[assignment, misc]
-            types_map[k] = Optional[typ] if nullable else typ  # type: ignore[assignment]
+            types_map[k] = typ
         if length < 0:
             length = len_

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/dataset_info.py RENAMED Viewed

@@ -32,11 +32,28 @@ class DatasetInfo(DataModel):
     metrics: dict[str, Any] = Field(default={})
     error_message: str = Field(default="")
     error_stack: str = Field(default="")
+    attrs: list[str] = Field(default=[])
     @property
     def is_temp(self) -> bool:
         return Session.is_temp_dataset(self.name)
+    def has_attr(self, attr: str) -> bool:
+        s = attr.split("=")
+        if len(s) == 1:
+            return attr in self.attrs
+        name = s[0]
+        value = s[1]
+        for a in self.attrs:
+            s = a.split("=")
+            if value == "*" and s[0] == name:
+                return True
+            if len(s) == 2 and s[0] == name and s[1] == value:
+                return True
+        return False
     @staticmethod
     def _validate_dict(
         v: Optional[Union[str, dict]],
@@ -83,4 +100,5 @@ class DatasetInfo(DataModel):
             metrics=job.metrics if job else {},
             error_message=version.error_message,
             error_stack=version.error_stack,
+            attrs=dataset.attrs,
         )

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/dc/datachain.py RENAMED Viewed

@@ -459,7 +459,7 @@ class DataChain:
         name: str,
         version: Optional[int] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
         **kwargs,
     ) -> "Self":
         """Save to a Dataset. It returns the chain itself.
@@ -468,7 +468,8 @@ class DataChain:
             name : dataset name.
             version : version of a dataset. Default - the last version that exist.
             description : description of a dataset.
-            labels : labels of a dataset.
+            attrs : attributes of a dataset. They can be without value, e.g "NLP",
+                or with a value, e.g "location=US".
         """
         schema = self.signals_schema.clone_without_sys_signals().serialize()
         return self._evolve(
@@ -476,7 +477,7 @@ class DataChain:
                 name=name,
                 version=version,
                 description=description,
-                labels=labels,
+                attrs=attrs,
                 feature_schema=schema,
                 **kwargs,
             )
@@ -755,7 +756,7 @@ class DataChain:
         Example:
             ```py
-            dc.distinct("file.parent", "file.name")
+            dc.distinct("file.path")
             ```
         """
         return self._evolve(
@@ -881,7 +882,7 @@ class DataChain:
         ```py
          dc.mutate(
             area=Column("image.height") * Column("image.width"),
-            extension=file_ext(Column("file.name")),
+            extension=file_ext(Column("file.path")),
             dist=cosine_distance(embedding_text, embedding_image)
         )
         ```
@@ -1070,13 +1071,13 @@ class DataChain:
             Iterating over all rows with selected columns:
             ```py
-            for name, size in dc.collect("file.name", "file.size"):
+            for name, size in dc.collect("file.path", "file.size"):
                 print(name, size)
             ```
             Iterating over a single column:
             ```py
-            for file in dc.collect("file.name"):
+            for file in dc.collect("file.path"):
                 print(file)
             ```
         """
@@ -1629,7 +1630,7 @@ class DataChain:
             import datachain as dc
             chain = dc.read_storage("s3://mybucket")
-            chain = chain.filter(dc.C("file.name").glob("*.jsonl"))
+            chain = chain.filter(dc.C("file.path").glob("*.jsonl"))
             chain = chain.parse_tabular(format="json")
             ```
         """
@@ -2088,25 +2089,31 @@ class DataChain:
             Using glob to match patterns
             ```py
-            dc.filter(C("file.name").glob("*.jpg"))
+            dc.filter(C("file.path").glob("*.jpg"))
+            ```
+            Using in to match lists
+            ```py
+            ids = [1,2,3]
+            dc.filter(C("experiment_id").in_(ids))
             ```
             Using `datachain.func`
             ```py
             from datachain.func import string
-            dc.filter(string.length(C("file.name")) > 5)
+            dc.filter(string.length(C("file.path")) > 5)
             ```
             Combining filters with "or"
             ```py
-            dc.filter(C("file.name").glob("cat*") | C("file.name").glob("dog*))
+            dc.filter(C("file.path").glob("cat*") | C("file.path").glob("dog*))
             ```
             Combining filters with "and"
             ```py
             dc.filter(
-                C("file.name").glob("*.jpg) &
-                (string.length(C("file.name")) > 5)
+                C("file.path").glob("*.jpg) &
+                (string.length(C("file.path")) > 5)
             )
             ```
         """

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/dc/datasets.py RENAMED Viewed

@@ -102,6 +102,7 @@ def datasets(
     column: Optional[str] = None,
     include_listing: bool = False,
     studio: bool = False,
+    attrs: Optional[list[str]] = None,
 ) -> "DataChain":
     """Generate chain with list of registered datasets.
@@ -114,6 +115,10 @@ def datasets(
         include_listing: If True, includes listing datasets. Defaults to False.
         studio: If True, returns datasets from Studio only,
             otherwise returns all local datasets. Defaults to False.
+        attrs: Optional list of attributes to filter datasets on. It can be just
+            attribute without value e.g "NLP", or attribute with value
+            e.g "location=US". Attribute with value can also accept "*" to target
+            all that have specific name e.g "location=*"
     Returns:
         DataChain: A new DataChain instance containing dataset information.
@@ -139,6 +144,10 @@ def datasets(
     ]
     datasets_values = [d for d in datasets_values if not d.is_temp]
+    if attrs:
+        for attr in attrs:
+            datasets_values = [d for d in datasets_values if d.has_attr(attr)]
     if not column:
         # flattening dataset fields
         schema = {

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/dc/records.py RENAMED Viewed

@@ -4,12 +4,9 @@ from typing import TYPE_CHECKING, Optional, Union
 import sqlalchemy
 from datachain.lib.data_model import DataType
-from datachain.lib.file import (
-    File,
-)
+from datachain.lib.file import File
 from datachain.lib.signal_schema import SignalSchema
 from datachain.query import Session
-from datachain.query.schema import Column
 if TYPE_CHECKING:
     from typing_extensions import ParamSpec
@@ -41,6 +38,9 @@ def read_records(
         single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
         ```
     """
+    from datachain.query.dataset import adjust_outputs, get_col_types
+    from datachain.sql.types import SQLType
     from .datasets import read_dataset
     session = Session.get(session, in_memory=in_memory)
@@ -52,11 +52,10 @@ def read_records(
     if schema:
         signal_schema = SignalSchema(schema)
-        columns = []
-        for c in signal_schema.db_signals(as_columns=True):
-            assert isinstance(c, Column)
-            kw = {"nullable": c.nullable} if c.nullable is not None else {}
-            columns.append(sqlalchemy.Column(c.name, c.type, **kw))
+        columns = [
+            sqlalchemy.Column(c.name, c.type)  # type: ignore[union-attr]
+            for c in signal_schema.db_signals(as_columns=True)
+        ]
     else:
         columns = [
             sqlalchemy.Column(name, typ)
@@ -83,6 +82,13 @@ def read_records(
     warehouse = catalog.warehouse
     dr = warehouse.dataset_rows(dsr)
     table = dr.get_table()
-    warehouse.insert_rows(table, to_insert)
+    # Optimization: Compute row types once, rather than for every row.
+    col_types = get_col_types(
+        warehouse,
+        {c.name: c.type for c in columns if isinstance(c.type, SQLType)},
+    )
+    records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
+    warehouse.insert_rows(table, records)
     warehouse.insert_rows_done(table)
     return read_dataset(name=dsr.name, session=session, settings=settings)

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/dc/utils.py RENAMED Viewed

@@ -31,8 +31,8 @@ def resolve_columns(
 ) -> "Callable[Concatenate[D, P], D]":
     """Decorator that resolvs input column names to their actual DB names. This is
     specially important for nested columns as user works with them by using dot
-    notation e.g (file.name) but are actually defined with default delimiter
-    in DB, e.g file__name.
+    notation e.g (file.path) but are actually defined with default delimiter
+    in DB, e.g file__path.
     If there are any sql functions in arguments, they will just be transferred as is
     to a method.
     """

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/signal_schema.py RENAMED Viewed

@@ -581,11 +581,7 @@ class SignalSchema:
         signals = [
             DEFAULT_DELIMITER.join(path)
             if not as_columns
-            else Column(
-                DEFAULT_DELIMITER.join(path),
-                python_to_sql(_type),
-                nullable=is_optional(_type),
-            )
+            else Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
             for path, _type, has_subtree, _ in self.get_flat_tree(
                 include_hidden=include_hidden
             )
@@ -994,8 +990,3 @@ class SignalSchema:
             }
         return SignalSchema.deserialize(schema)
-def is_optional(type_: Any) -> bool:
-    """Check if a type is Optional."""
-    return get_origin(type_) is Union and type(None) in get_args(type_)

{datachain-0.15.0 → datachain-0.16.1}/src/datachain/lib/udf.py RENAMED Viewed

@@ -474,8 +474,9 @@ class Generator(UDFBase):
             remove_prefetched=bool(self.prefetch) and not cache,
         )
         with closing(prepared_inputs):
-            for row in processed_cb.wrap(prepared_inputs):
+            for row in prepared_inputs:
                 yield _process_row(row)
+                processed_cb.relative_update(1)
         self.teardown()

datachain 0.15.0__tar.gz → 0.16.1__tar.gz

Potentially problematic release.

datachain 0.15.0tar.gz → 0.16.1tar.gz