PyPI - datachain - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

datachain 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (24) hide show

datachain/catalog/catalog.py +0 -81
datachain/cli.py +0 -37
datachain/data_storage/schema.py +1 -1
datachain/data_storage/sqlite.py +1 -10
datachain/data_storage/warehouse.py +12 -5
datachain/lib/arrow.py +4 -4
datachain/lib/clip.py +14 -3
datachain/lib/convert/python_to_sql.py +9 -0
datachain/lib/data_model.py +10 -1
datachain/lib/dc.py +95 -30
datachain/lib/hf.py +166 -0
datachain/lib/image.py +9 -1
datachain/lib/pytorch.py +1 -2
datachain/lib/signal_schema.py +124 -20
datachain/lib/text.py +4 -0
datachain/lib/udf.py +14 -20
datachain/query/dataset.py +10 -3
datachain/query/session.py +5 -3
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/METADATA +8 -3
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/RECORD +24 -23
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/WHEEL +1 -1
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/LICENSE +0 -0
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/entry_points.txt +0 -0
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/top_level.txt +0 -0

datachain/catalog/catalog.py CHANGED Viewed

@@ -1540,87 +1540,6 @@ class Catalog:
         dataset = self.get_dataset(name)
         return self.update_dataset(dataset, **update_data)
-    def merge_datasets(
-        self,
-        src: DatasetRecord,
-        dst: DatasetRecord,
-        src_version: int,
-        dst_version: Optional[int] = None,
-    ) -> DatasetRecord:
-        """
-        Merges records from source to destination dataset.
-        It will create new version
-        of a dataset with records merged from old version and the source, unless
-        existing version is specified for destination in which case it must
-        be in non final status as datasets are immutable
-        """
-        if (
-            dst_version
-            and not dst.is_valid_next_version(dst_version)
-            and dst.get_version(dst_version).is_final_status()
-        ):
-            raise DatasetInvalidVersionError(
-                f"Version {dst_version} must be higher than the current latest one"
-            )
-        src_dep = self.get_dataset_dependencies(src.name, src_version)
-        dst_dep = self.get_dataset_dependencies(
-            dst.name,
-            dst.latest_version,  # type: ignore[arg-type]
-        )
-        if dst.has_version(dst_version):  # type: ignore[arg-type]
-            # case where we don't create new version, but append to the existing one
-            self.warehouse.merge_dataset_rows(
-                src,
-                dst,
-                src_version,
-                dst_version=dst_version,  # type: ignore[arg-type]
-            )
-            merged_schema = src.serialized_schema | dst.serialized_schema
-            self.update_dataset(dst, schema=merged_schema)
-            self.update_dataset_version_with_warehouse_info(
-                dst,
-                dst_version,  # type: ignore[arg-type]
-                schema=merged_schema,
-            )
-            for dep in src_dep:
-                if dep and dep not in dst_dep:
-                    self.metastore.add_dependency(
-                        dep,
-                        dst.name,
-                        dst_version,  # type: ignore[arg-type]
-                    )
-        else:
-            # case where we create new version of merged results
-            src_dr = self.warehouse.dataset_rows(src, src_version)
-            dst_dr = self.warehouse.dataset_rows(dst)
-            merge_result_columns = list(
-                {
-                    c.name: c for c in list(src_dr.table.c) + list(dst_dr.table.c)
-                }.values()
-            )
-            dst_version = dst_version or dst.next_version
-            dst = self.create_new_dataset_version(
-                dst,
-                dst_version,
-                columns=merge_result_columns,
-            )
-            self.warehouse.merge_dataset_rows(
-                src,
-                dst,
-                src_version,
-                dst_version,
-            )
-            self.update_dataset_version_with_warehouse_info(dst, dst_version)
-            for dep in set(src_dep + dst_dep):
-                if dep:
-                    self.metastore.add_dependency(dep, dst.name, dst_version)
-        return dst
     def get_file_signals(
         self, dataset_name: str, dataset_version: int, row: RowDict
     ) -> Optional[dict]:

datachain/cli.py CHANGED Viewed

@@ -336,36 +336,6 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         help="Display size using powers of 1000 not 1024",
     )
-    parse_merge_datasets = subp.add_parser(
-        "merge-datasets", parents=[parent_parser], description="Merges datasets"
-    )
-    parse_merge_datasets.add_argument(
-        "--src",
-        action="store",
-        default=None,
-        help="Source dataset name",
-    )
-    parse_merge_datasets.add_argument(
-        "--dst",
-        action="store",
-        default=None,
-        help="Destination dataset name",
-    )
-    parse_merge_datasets.add_argument(
-        "--src-version",
-        action="store",
-        default=None,
-        type=int,
-        help="Source dataset version",
-    )
-    parse_merge_datasets.add_argument(
-        "--dst-version",
-        action="store",
-        default=None,
-        type=int,
-        help="Destination dataset version",
-    )
     parse_ls = subp.add_parser(
         "ls", parents=[parent_parser], description="List storage contents"
     )
@@ -996,13 +966,6 @@ def main(argv: Optional[list[str]] = None) -> int:  # noqa: C901, PLR0912, PLR09
                 new_name=args.new_name,
                 labels=args.labels,
             )
-        elif args.command == "merge-datasets":
-            catalog.merge_datasets(
-                catalog.get_dataset(args.src),
-                catalog.get_dataset(args.dst),
-                args.src_version,
-                dst_version=args.dst_version,
-            )
         elif args.command == "ls":
             ls(
                 args.sources,

datachain/data_storage/schema.py CHANGED Viewed

@@ -50,7 +50,7 @@ def convert_rows_custom_column_types(
     columns: "ColumnCollection[str, ColumnElement[Any]]",
     rows: Iterator[tuple[Any, ...]],
     dialect: "Dialect",
-):
+) -> Iterator[tuple[Any, ...]]:
     """
     This function converts values of rows columns based on their types which are
     defined in columns. We are only converting column values for which types are

datachain/data_storage/sqlite.py CHANGED Viewed

@@ -27,10 +27,7 @@ import datachain.sql.sqlite
 from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
 from datachain.data_storage.db_engine import DatabaseEngine
 from datachain.data_storage.id_generator import AbstractDBIDGenerator
-from datachain.data_storage.schema import (
-    DefaultSchema,
-    convert_rows_custom_column_types,
-)
+from datachain.data_storage.schema import DefaultSchema
 from datachain.dataset import DatasetRecord
 from datachain.error import DataChainError
 from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
@@ -651,12 +648,6 @@ class SQLiteWarehouse(AbstractWarehouse):
         self.db.create_table(table, if_not_exists=if_not_exists)
         return table
-    def dataset_rows_select(self, select_query: Select, **kwargs):
-        rows = self.db.execute(select_query, **kwargs)
-        yield from convert_rows_custom_column_types(
-            select_query.selected_columns, rows, sqlite_dialect
-        )
     def get_dataset_sources(
         self, dataset: DatasetRecord, version: int
     ) -> list[StorageURI]:

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -17,6 +17,7 @@ from sqlalchemy.sql.expression import true
 from tqdm import tqdm
 from datachain.client import Client
+from datachain.data_storage.schema import convert_rows_custom_column_types
 from datachain.data_storage.serializer import Serializable
 from datachain.dataset import DatasetRecord
 from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
@@ -226,7 +227,7 @@ class AbstractWarehouse(ABC, Serializable):
                     if limit < page_size:
                         paginated_query = paginated_query.limit(None).limit(limit)
-                results = self.db.execute(paginated_query.offset(offset))
+                results = self.dataset_rows_select(paginated_query.offset(offset))
                 processed = False
                 for row in results:
@@ -309,12 +310,18 @@ class AbstractWarehouse(ABC, Serializable):
         Merge results should not contain duplicates.
         """
-    @abstractmethod
-    def dataset_rows_select(self, select_query: sa.sql.selectable.Select, **kwargs):
+    def dataset_rows_select(
+        self,
+        query: sa.sql.selectable.Select,
+        **kwargs,
+    ) -> Iterator[tuple[Any, ...]]:
         """
-        Method for fetching dataset rows from database. This is abstract since
-        in some DBs we need to use special settings
+        Fetch dataset rows from database.
         """
+        rows = self.db.execute(query, **kwargs)
+        yield from convert_rows_custom_column_types(
+            query.selected_columns, rows, self.db.dialect
+        )
     @abstractmethod
     def get_dataset_sources(

datachain/lib/arrow.py CHANGED Viewed

@@ -95,7 +95,7 @@ def schema_to_output(schema: pa.Schema, col_names: Optional[Sequence[str]] = Non
         if not column:
             column = f"c{default_column}"
             default_column += 1
-        dtype = _arrow_type_mapper(field.type)  # type: ignore[assignment]
+        dtype = arrow_type_mapper(field.type)  # type: ignore[assignment]
         if field.nullable:
             dtype = Optional[dtype]  # type: ignore[assignment]
         output[column] = dtype
@@ -103,7 +103,7 @@ def schema_to_output(schema: pa.Schema, col_names: Optional[Sequence[str]] = Non
     return output
-def _arrow_type_mapper(col_type: pa.DataType) -> type:  # noqa: PLR0911
+def arrow_type_mapper(col_type: pa.DataType) -> type:  # noqa: PLR0911
     """Convert pyarrow types to basic types."""
     from datetime import datetime
@@ -122,11 +122,11 @@ def _arrow_type_mapper(col_type: pa.DataType) -> type:  # noqa: PLR0911
     if pa.types.is_string(col_type) or pa.types.is_large_string(col_type):
         return str
     if pa.types.is_list(col_type):
-        return list[_arrow_type_mapper(col_type.value_type)]  # type: ignore[return-value, misc]
+        return list[arrow_type_mapper(col_type.value_type)]  # type: ignore[return-value, misc]
     if pa.types.is_struct(col_type) or pa.types.is_map(col_type):
         return dict
     if isinstance(col_type, pa.lib.DictionaryType):
-        return _arrow_type_mapper(col_type.value_type)  # type: ignore[return-value]
+        return arrow_type_mapper(col_type.value_type)  # type: ignore[return-value]
     raise TypeError(f"{col_type!r} datatypes not supported")

datachain/lib/clip.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import inspect
-from typing import TYPE_CHECKING, Any, Callable, Literal, Union
+from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union
 import torch
 from transformers.modeling_utils import PreTrainedModel
@@ -39,6 +39,7 @@ def clip_similarity_scores(
     tokenizer: Callable,
     prob: bool = False,
     image_to_text: bool = True,
+    device: Optional[Union[str, torch.device]] = None,
 ) -> list[list[float]]:
     """
     Calculate CLIP similarity scores between one or more images and/or text.
@@ -52,6 +53,7 @@ def clip_similarity_scores(
         prob : Compute softmax probabilities.
         image_to_text : Whether to compute for image-to-text or text-to-image. Ignored
             if only one of images or text provided.
+        device : Device to use. Defaults is None - use model's device.
     Example:
@@ -130,17 +132,26 @@ def clip_similarity_scores(
         ```
     """
+    if device is None:
+        if hasattr(model, "device"):
+            device = model.device
+        else:
+            device = next(model.parameters()).device
+    else:
+        model = model.to(device)
     with torch.no_grad():
         if images is not None:
             encoder = _get_encoder(model, "image")
             image_features = convert_images(
-                images, transform=preprocess, encoder=encoder
+                images, transform=preprocess, encoder=encoder, device=device
             )
             image_features /= image_features.norm(dim=-1, keepdim=True)  # type: ignore[union-attr]
         if text is not None:
             encoder = _get_encoder(model, "text")
-            text_features = convert_text(text, tokenizer, encoder=encoder)
+            text_features = convert_text(
+                text, tokenizer, encoder=encoder, device=device
+            )
             text_features /= text_features.norm(dim=-1, keepdim=True)  # type: ignore[union-attr]
         if images is not None and text is not None:

datachain/lib/convert/python_to_sql.py CHANGED Viewed

@@ -73,6 +73,9 @@ def python_to_sql(typ):  # noqa: PLR0911
         if len(args) == 2 and (type(None) in args):
             return python_to_sql(args[0])
+        if _is_union_str_literal(orig, args):
+            return String
         if _is_json_inside_union(orig, args):
             return JSON
@@ -94,3 +97,9 @@ def _is_json_inside_union(orig, args) -> bool:
         if any(inspect.isclass(arg) and issubclass(arg, BaseModel) for arg in args):
             return True
     return False
+def _is_union_str_literal(orig, args) -> bool:
+    if orig != Union:
+        return False
+    return all(arg is str or get_origin(arg) in (Literal, LiteralEx) for arg in args)

datachain/lib/data_model.py CHANGED Viewed

@@ -2,7 +2,7 @@ from collections.abc import Sequence
 from datetime import datetime
 from typing import ClassVar, Union, get_args, get_origin
-from pydantic import BaseModel
+from pydantic import BaseModel, create_model
 from datachain.lib.model_store import ModelStore
@@ -57,3 +57,12 @@ def is_chain_type(t: type) -> bool:
         return is_chain_type(args[0])
     return False
+def dict_to_data_model(name: str, data_dict: dict[str, DataType]) -> type[BaseModel]:
+    fields = {name: (anno, ...) for name, anno in data_dict.items()}
+    return create_model(
+        name,
+        __base__=(DataModel,),  # type: ignore[call-overload]
+        **fields,
+    )  # type: ignore[call-overload]

datachain/lib/dc.py CHANGED Viewed

@@ -18,14 +18,13 @@ from typing import (
 import pandas as pd
 import sqlalchemy
-from pydantic import BaseModel, create_model
+from pydantic import BaseModel
 from sqlalchemy.sql.functions import GenericFunction
 from sqlalchemy.sql.sqltypes import NullType
-from datachain import DataModel
 from datachain.lib.convert.python_to_sql import python_to_sql
 from datachain.lib.convert.values_to_tuples import values_to_tuples
-from datachain.lib.data_model import DataType
+from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
 from datachain.lib.dataset_info import DatasetInfo
 from datachain.lib.file import ExportPlacement as FileExportPlacement
 from datachain.lib.file import File, IndexedFile, get_file
@@ -55,6 +54,8 @@ from datachain.utils import inside_notebook
 if TYPE_CHECKING:
     from typing_extensions import Concatenate, ParamSpec, Self
+    from datachain.lib.hf import HFDatasetType
     P = ParamSpec("P")
 C = Column
@@ -77,12 +78,12 @@ def resolve_columns(
     @wraps(method)
     def _inner(self: D, *args: "P.args", **kwargs: "P.kwargs") -> D:
         resolved_args = self.signals_schema.resolve(
-            *[arg for arg in args if not isinstance(arg, GenericFunction)]
+            *[arg for arg in args if not isinstance(arg, GenericFunction)]  # type: ignore[arg-type]
         ).db_signals()
         for idx, arg in enumerate(args):
             if isinstance(arg, GenericFunction):
-                resolved_args.insert(idx, arg)
+                resolved_args.insert(idx, arg)  # type: ignore[arg-type]
         return method(self, *resolved_args, **kwargs)
@@ -208,23 +209,28 @@ class DataChain(DatasetQuery):
         "size": 0,
     }
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, settings: Optional[dict] = None, **kwargs):
         """This method needs to be redefined as a part of Dataset and DataChain
         decoupling.
         """
-        super().__init__(
+        super().__init__(  # type: ignore[misc]
             *args,
             **kwargs,
             indexing_column_types=File._datachain_column_types,
         )
-        self._settings = Settings()
-        self._setup = {}
+        if settings:
+            self._settings = Settings(**settings)
+        else:
+            self._settings = Settings()
+        self._setup: dict = {}
         self.signals_schema = SignalSchema({"sys": Sys})
         if self.feature_schema:
             self.signals_schema |= SignalSchema.deserialize(self.feature_schema)
         else:
-            self.signals_schema |= SignalSchema.from_column_types(self.column_types)
+            self.signals_schema |= SignalSchema.from_column_types(
+                self.column_types or {}
+            )
         self._sys = False
@@ -309,6 +315,7 @@ class DataChain(DatasetQuery):
         *,
         type: Literal["binary", "text", "image"] = "binary",
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         in_memory: bool = False,
         recursive: Optional[bool] = True,
         object_name: str = "file",
@@ -336,6 +343,7 @@ class DataChain(DatasetQuery):
             cls(
                 path,
                 session=session,
+                settings=settings,
                 recursive=recursive,
                 update=update,
                 in_memory=in_memory,
@@ -489,6 +497,7 @@ class DataChain(DatasetQuery):
     def datasets(
         cls,
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         in_memory: bool = False,
         object_name: str = "dataset",
     ) -> "DataChain":
@@ -513,6 +522,7 @@ class DataChain(DatasetQuery):
         return cls.from_values(
             session=session,
+            settings=settings,
             in_memory=in_memory,
             output={object_name: DatasetInfo},
             **{object_name: datasets},  # type: ignore[arg-type]
@@ -895,7 +905,7 @@ class DataChain(DatasetQuery):
             if isinstance(value, Column):
                 # renaming existing column
                 for signal in schema.db_signals(name=value.name, as_columns=True):
-                    mutated[signal.name.replace(value.name, name, 1)] = signal
+                    mutated[signal.name.replace(value.name, name, 1)] = signal  # type: ignore[union-attr]
             else:
                 # adding new signal
                 mutated[name] = value
@@ -1086,7 +1096,7 @@ class DataChain(DatasetQuery):
             )
         signals_schema = self.signals_schema.clone_without_sys_signals()
-        on_columns = signals_schema.resolve(*on).db_signals()
+        on_columns: list[str] = signals_schema.resolve(*on).db_signals()  # type: ignore[assignment]
         right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
         if right_on is not None:
@@ -1105,7 +1115,9 @@ class DataChain(DatasetQuery):
                     on, right_on, "'on' and 'right_on' must have the same length'"
                 )
-            right_on_columns = right_signals_schema.resolve(*right_on).db_signals()
+            right_on_columns: list[str] = right_signals_schema.resolve(
+                *right_on
+            ).db_signals()  # type: ignore[assignment]
             if len(right_on_columns) != len(on_columns):
                 on_str = ", ".join(right_on_columns)
@@ -1169,7 +1181,7 @@ class DataChain(DatasetQuery):
                 "'on' cannot be empty",
             )
         else:
-            signals = self.signals_schema.resolve(*on).db_signals()
+            signals = self.signals_schema.resolve(*on).db_signals()  # type: ignore[assignment]
         return super()._subtract(other, signals)  # type: ignore[arg-type]
     @classmethod
@@ -1177,6 +1189,7 @@ class DataChain(DatasetQuery):
         cls,
         ds_name: str = "",
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         in_memory: bool = False,
         output: OutputType = None,
         object_name: str = "",
@@ -1195,10 +1208,13 @@ class DataChain(DatasetQuery):
             yield from tuples
         chain = DataChain.from_records(
-            DataChain.DEFAULT_FILE_RECORD, session=session, in_memory=in_memory
+            DataChain.DEFAULT_FILE_RECORD,
+            session=session,
+            settings=settings,
+            in_memory=in_memory,
         )
         if object_name:
-            output = {object_name: DataChain._dict_to_data_model(object_name, output)}  # type: ignore[arg-type]
+            output = {object_name: dict_to_data_model(object_name, output)}  # type: ignore[arg-type]
         return chain.gen(_func_fr, output=output)
     @classmethod
@@ -1207,6 +1223,7 @@ class DataChain(DatasetQuery):
         df: "pd.DataFrame",
         name: str = "",
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         in_memory: bool = False,
         object_name: str = "",
     ) -> "DataChain":
@@ -1236,7 +1253,12 @@ class DataChain(DatasetQuery):
                 )
         return cls.from_values(
-            name, session, object_name=object_name, in_memory=in_memory, **fr_map
+            name,
+            session,
+            settings=settings,
+            object_name=object_name,
+            in_memory=in_memory,
+            **fr_map,
         )
     def to_pandas(self, flatten=False) -> "pd.DataFrame":
@@ -1306,6 +1328,59 @@ class DataChain(DatasetQuery):
         if len(df) == limit:
             print(f"\n[Limited by {len(df)} rows]")
+    @classmethod
+    def from_hf(
+        cls,
+        dataset: Union[str, "HFDatasetType"],
+        *args,
+        session: Optional[Session] = None,
+        settings: Optional[dict] = None,
+        object_name: str = "",
+        model_name: str = "",
+        **kwargs,
+    ) -> "DataChain":
+        """Generate chain from huggingface hub dataset.
+        Parameters:
+            dataset : Path or name of the dataset to read from Hugging Face Hub,
+                or an instance of `datasets.Dataset`-like object.
+            session : Session to use for the chain.
+            settings : Settings to use for the chain.
+            object_name : Generated object column name.
+            model_name : Generated model name.
+            kwargs : Parameters to pass to datasets.load_dataset.
+        Example:
+            Load from Hugging Face Hub:
+            ```py
+            DataChain.from_hf("beans", split="train")
+            ```
+            Generate chain from loaded dataset:
+            ```py
+            from datasets import load_dataset
+            ds = load_dataset("beans", split="train")
+            DataChain.from_hf(ds)
+            ```
+        """
+        from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
+        output: dict[str, DataType] = {}
+        ds_dict = stream_splits(dataset, *args, **kwargs)
+        if len(ds_dict) > 1:
+            output = {"split": str}
+        model_name = model_name or object_name or ""
+        output = output | get_output_schema(next(iter(ds_dict.values())), model_name)
+        model = dict_to_data_model(model_name, output)
+        if object_name:
+            output = {object_name: model}
+        chain = DataChain.from_values(
+            split=list(ds_dict.keys()), session=session, settings=settings
+        )
+        return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
     def parse_tabular(
         self,
         output: OutputType = None,
@@ -1367,7 +1442,7 @@ class DataChain(DatasetQuery):
         if isinstance(output, dict):
             model_name = model_name or object_name or ""
-            model = DataChain._dict_to_data_model(model_name, output)
+            model = dict_to_data_model(model_name, output)
         else:
             model = output  # type: ignore[assignment]
@@ -1384,17 +1459,6 @@ class DataChain(DatasetQuery):
             ArrowGenerator(schema, model, source, nrows, **kwargs), output=output
         )
-    @staticmethod
-    def _dict_to_data_model(
-        name: str, data_dict: dict[str, DataType]
-    ) -> type[BaseModel]:
-        fields = {name: (anno, ...) for name, anno in data_dict.items()}
-        return create_model(
-            name,
-            __base__=(DataModel,),  # type: ignore[call-overload]
-            **fields,
-        )  # type: ignore[call-overload]
     @classmethod
     def from_csv(
         cls,
@@ -1543,6 +1607,7 @@ class DataChain(DatasetQuery):
         cls,
         to_insert: Optional[Union[dict, list[dict]]],
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         in_memory: bool = False,
         schema: Optional[dict[str, DataType]] = None,
     ) -> "DataChain":
@@ -1597,7 +1662,7 @@ class DataChain(DatasetQuery):
         insert_q = dr.get_table().insert()
         for record in to_insert:
             db.execute(insert_q.values(**record))
-        return DataChain(name=dsr.name)
+        return DataChain(name=dsr.name, settings=settings)
     def sum(self, fr: DataType):  # type: ignore[override]
         """Compute the sum of a column."""

datachain 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

Potentially problematic release.

datachain 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl