PyPI - datachain - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

datachain 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (30) hide show

datachain/catalog/catalog.py +17 -2
datachain/data_storage/db_engine.py +0 -2
datachain/data_storage/schema.py +10 -27
datachain/data_storage/warehouse.py +1 -7
datachain/lib/arrow.py +7 -13
datachain/lib/clip.py +151 -0
datachain/lib/dc.py +35 -57
datachain/lib/feature_utils.py +1 -2
datachain/lib/file.py +7 -0
datachain/lib/image.py +37 -79
datachain/lib/pytorch.py +4 -2
datachain/lib/signal_schema.py +3 -4
datachain/lib/text.py +18 -49
datachain/lib/udf.py +58 -30
datachain/lib/udf_signature.py +11 -10
datachain/lib/utils.py +17 -0
datachain/lib/webdataset.py +2 -2
datachain/listing.py +0 -3
datachain/query/dataset.py +63 -37
datachain/query/dispatch.py +2 -2
datachain/query/schema.py +1 -8
datachain/query/udf.py +16 -18
datachain/utils.py +28 -0
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/METADATA +2 -1
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/RECORD +29 -29
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/WHEEL +1 -1
datachain/lib/reader.py +0 -49
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/LICENSE +0 -0
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/entry_points.txt +0 -0
{datachain-0.2.1.dist-info → datachain-0.2.2.dist-info}/top_level.txt +0 -0

datachain/lib/pytorch.py CHANGED Viewed

@@ -116,10 +116,12 @@ class PytorchDataset(IterableDataset):
                     self.transform = None
             if self.tokenizer:
                 for i, val in enumerate(row):
-                    if isinstance(val, str):
+                    if isinstance(val, str) or (
+                        isinstance(val, list) and isinstance(val[0], str)
+                    ):
                         row[i] = convert_text(
                             val, self.tokenizer, self.tokenizer_kwargs
-                        )
+                        ).squeeze(0)  # type: ignore[union-attr]
             yield row
     @staticmethod

datachain/lib/signal_schema.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Any, Optional, Union, get_args, get_origin
 from pydantic import create_model
-from datachain.lib.arrow import Source
 from datachain.lib.feature import (
     DATACHAIN_TO_TYPE,
     DEFAULT_DELIMITER,
@@ -14,7 +13,7 @@ from datachain.lib.feature import (
     convert_type_to_datachain,
 )
 from datachain.lib.feature_registry import Registry
-from datachain.lib.file import File, TextFile
+from datachain.lib.file import File, IndexedFile, TextFile
 from datachain.lib.image import ImageFile
 from datachain.lib.utils import DataChainParamsError
 from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
@@ -36,7 +35,7 @@ NAMES_TO_TYPES = {
     "datetime": datetime,
     "WDSLaion": WDSLaion,
     "Laion": Laion,
-    "Source": Source,
+    "Source": IndexedFile,
     "File": File,
     "ImageFile": ImageFile,
     "TextFile": TextFile,
@@ -150,7 +149,7 @@ class SignalSchema:
         )
     def slice(self, keys: Sequence[str]) -> "SignalSchema":
-        return SignalSchema({k: v for k, v in self.values.items() if k in keys})
+        return SignalSchema({k: self.values[k] for k in keys if k in self.values})
     def row_to_features(self, row: Sequence, catalog: "Catalog") -> list[FeatureType]:
         res = []

datachain/lib/text.py CHANGED Viewed

@@ -1,19 +1,15 @@
-import inspect
 from typing import TYPE_CHECKING, Any, Callable, Optional, Union
-from datachain.lib.file import TextFile
-from datachain.lib.reader import FeatureReader
 if TYPE_CHECKING:
-    from datachain.lib.feature_utils import FeatureLike
+    import torch
 def convert_text(
     text: Union[str, list[str]],
     tokenizer: Optional[Callable] = None,
     tokenizer_kwargs: Optional[dict[str, Any]] = None,
-    open_clip_model: Optional[Any] = None,
-):
+    encoder: Optional[Callable] = None,
+) -> Union[str, list[str], "torch.Tensor"]:
     """
     Tokenize and otherwise transform text.
@@ -21,18 +17,8 @@ def convert_text(
         text (str): Text to convert.
         tokenizer (Callable): Tokenizer to use to tokenize objects.
         tokenizer_kwargs (dict): Additional kwargs to pass when calling tokenizer.
-        open_clip_model (Any): Encode text using model from open_clip library.
+        encoder (Callable): Encode text using model.
     """
-    if open_clip_model:
-        method_name = "encode_text"
-        if not (
-            hasattr(open_clip_model, method_name)
-            and inspect.ismethod(getattr(open_clip_model, method_name))
-        ):
-            raise ValueError(
-                f"TextColumn error: 'model' doesn't support '{method_name}()'"
-            )
     if not tokenizer:
         return text
@@ -43,38 +29,21 @@ def convert_text(
         res = tokenizer(text, **tokenizer_kwargs)
     else:
         res = tokenizer(text)
-    from transformers.tokenization_utils_base import PreTrainedTokenizerBase
-    tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
-    if not open_clip_model:
-        return tokens.squeeze(0)
-    return open_clip_model.encode_text(tokens).squeeze(0)
+    try:
+        from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+        tokens = (
+            res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
+        )
+    except ImportError:
+        tokens = res
-class TextReader(FeatureReader):
-    def __init__(
-        self,
-        fr_class: "FeatureLike" = TextFile,
-        tokenizer: Optional[Callable] = None,
-        tokenizer_kwargs: Optional[dict[str, Any]] = None,
-        open_clip_model: Optional[Any] = None,
-    ):
-        """
-        Read and optionally transform a text column.
+    if not encoder:
+        return tokens
-        All kwargs are passed to `convert_text()`.
-        """
-        self.tokenizer = tokenizer
-        self.tokenizer_kwargs = tokenizer_kwargs
-        self.open_clip_model = open_clip_model
-        super().__init__(fr_class)
+    try:
+        import torch
+    except ImportError:
+        "Missing dependency 'torch' needed to encode text."
-    def __call__(self, value: Union[str, list[str]]):
-        return convert_text(
-            value,
-            tokenizer=self.tokenizer,
-            tokenizer_kwargs=self.tokenizer_kwargs,
-            open_clip_model=self.open_clip_model,
-        )
+    return encoder(torch.tensor(tokens))

datachain/lib/udf.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import inspect
 import sys
 import traceback
-from typing import TYPE_CHECKING, Callable, Optional
+from typing import TYPE_CHECKING, Callable
 from datachain.lib.feature import Feature
 from datachain.lib.signal_schema import SignalSchema
-from datachain.lib.utils import DataChainError, DataChainParamsError
+from datachain.lib.udf_signature import UdfSignature
+from datachain.lib.utils import AbstractUDF, DataChainError, DataChainParamsError
 from datachain.query import udf
 if TYPE_CHECKING:
@@ -17,26 +18,68 @@ class UdfError(DataChainParamsError):
         super().__init__(f"UDF error: {msg}")
-class UDFBase:
+class UDFBase(AbstractUDF):
     is_input_batched = False
     is_output_batched = False
     is_input_grouped = False
-    def __init__(
-        self,
-        params: SignalSchema,
-        output: SignalSchema,
-        func: Optional[Callable] = None,
-    ):
+    def __init__(self):
+        self.params = None
+        self.output = None
+        self.params_spec = None
+        self.output_spec = None
+        self._contains_stream = None
+        self._catalog = None
+        self._func = None
+    def process(self, *args, **kwargs):
+        """Processing function that needs to be defined by user"""
+        if not self._func:
+            raise NotImplementedError("UDF processing is not implemented")
+        return self._func(*args, **kwargs)
+    def setup(self):
+        """Initialization process executed on each worker before processing begins.
+        This is needed for tasks like pre-loading ML models prior to scoring.
+        """
+    def teardown(self):
+        """Teardown process executed on each process/worker after processing ends.
+        This is needed for tasks like closing connections to end-points.
+        """
+    def _init(self, sign: UdfSignature, params: SignalSchema, func: Callable):
         self.params = params
-        self.output = output
-        self._func = func
+        self.output = sign.output_schema
-        params_spec = params.to_udf_spec()
+        params_spec = self.params.to_udf_spec()
         self.params_spec = list(params_spec.keys())
-        self.output_spec = output.to_udf_spec()
+        self.output_spec = self.output.to_udf_spec()
-        self._catalog = None
+        self._func = func
+    @classmethod
+    def _create(
+        cls,
+        target_class: type["UDFBase"],
+        sign: UdfSignature,
+        params: SignalSchema,
+        catalog,
+    ) -> "UDFBase":
+        if isinstance(sign.func, AbstractUDF):
+            if not isinstance(sign.func, target_class):  # type: ignore[unreachable]
+                raise UdfError(
+                    f"cannot create UDF: provided UDF '{sign.func.__name__}'"
+                    f" must be a child of target class '{target_class.__name__}'",
+                )
+            result = sign.func
+            func = None
+        else:
+            result = target_class()
+            func = sign.func
+        result._init(sign, params, func)
+        return result
     @property
     def name(self):
@@ -53,25 +96,10 @@ class UDFBase:
         udf_wrapper = udf(self.params_spec, self.output_spec, batch=batch)
         return udf_wrapper(self)
-    def bootstrap(self):
-        """Initialization process executed on each worker before processing begins.
-        This is needed for tasks like pre-loading ML models prior to scoring.
-        """
-    def teardown(self):
-        """Teardown process executed on each process/worker after processing ends.
-        This is needed for tasks like closing connections to end-points.
-        """
-    def process(self, *args, **kwargs):
-        if not self._func:
-            raise NotImplementedError("UDF processing is not implemented")
-        return self._func(*args, **kwargs)
     def validate_results(self, results, *args, **kwargs):
         return results
-    def __call__(self, *rows, **kwargs):
+    def __call__(self, *rows):
         if self.is_input_grouped:
             objs = self._parse_grouped_rows(rows)
         else:

datachain/lib/udf_signature.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Callable, Optional, Union, get_args, get_origin
 from datachain.lib.feature import Feature, FeatureType, FeatureTypeNames
 from datachain.lib.signal_schema import SignalSchema
-from datachain.lib.utils import DataChainParamsError
+from datachain.lib.utils import AbstractUDF, DataChainParamsError
 class UdfSignatureError(DataChainParamsError):
@@ -49,10 +49,13 @@ class UdfSignature:
         else:
             if func is None:
                 raise UdfSignatureError(chain, "user function is not defined")
             udf_func = func
             signal_name = None
         if not callable(udf_func):
-            raise UdfSignatureError(chain, f"function '{func}' is not callable")
+            raise UdfSignatureError(chain, f"UDF '{udf_func}' is not callable")
         func_params_map_sign, func_outs_sign, is_iterator = (
             UdfSignature._func_signature(chain, udf_func)
         )
@@ -108,13 +111,6 @@ class UdfSignature:
         if isinstance(output, str):
             output = [output]
         if isinstance(output, Sequence):
-            if not func_outs_sign:
-                raise UdfSignatureError(
-                    chain,
-                    "output types are not specified. Specify types in 'output' as"
-                    " a dict or as function return value hint.",
-                )
             if len(func_outs_sign) != len(output):
                 raise UdfSignatureError(
                     chain,
@@ -158,8 +154,13 @@ class UdfSignature:
     @staticmethod
     def _func_signature(
-        chain: str, func: Callable
+        chain: str, udf_func: Callable
     ) -> tuple[dict[str, type], Sequence[type], bool]:
+        if isinstance(udf_func, AbstractUDF):
+            func = udf_func.process  # type: ignore[unreachable]
+        else:
+            func = udf_func
         sign = inspect.signature(func)
         input_map = {prm.name: prm.annotation for prm in sign.parameters.values()}

datachain/lib/utils.py CHANGED Viewed

@@ -1,3 +1,20 @@
+from abc import ABC, abstractmethod
+class AbstractUDF(ABC):
+    @abstractmethod
+    def process(self, *args, **kwargs):
+        pass
+    @abstractmethod
+    def setup(self):
+        pass
+    @abstractmethod
+    def teardown(self):
+        pass
 class DataChainError(Exception):
     def __init__(self, message):
         super().__init__(message)

datachain/lib/webdataset.py CHANGED Viewed

@@ -2,6 +2,7 @@ import hashlib
 import json
 import tarfile
 from collections.abc import Iterator, Sequence
+from pathlib import Path
 from typing import (
     Any,
     Callable,
@@ -240,10 +241,9 @@ class TarStream(File):
 def get_tar_groups(stream, tar, core_extensions, spec, encoding="utf-8"):
     builder = Builder(stream, core_extensions, spec, tar, encoding)
-    for item in tar.getmembers():
+    for item in sorted(tar.getmembers(), key=lambda m: Path(m.name).stem):
         if not item.isfile():
             continue
         try:
             builder.add(item)
         except StopIteration:

datachain/listing.py CHANGED Viewed

@@ -20,9 +20,6 @@ if TYPE_CHECKING:
     from datachain.storage import Storage
-RANDOM_BITS = 63  # size of the random integer field
 class Listing:
     def __init__(
         self,

datachain/query/dataset.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import ast
 import contextlib
 import datetime
 import inspect
@@ -51,9 +52,10 @@ from datachain.data_storage.schema import (
 from datachain.dataset import DatasetStatus, RowDict
 from datachain.error import DatasetNotFoundError, QueryScriptCancelError
 from datachain.progress import CombinedDownloadCallback
+from datachain.query.schema import DEFAULT_DELIMITER
 from datachain.sql.functions import rand
 from datachain.storage import Storage, StorageURI
-from datachain.utils import batched, determine_processes
+from datachain.utils import batched, determine_processes, inside_notebook
 from .batch import RowBatch
 from .metrics import metrics
@@ -62,7 +64,6 @@ from .session import Session
 from .udf import UDFBase, UDFClassWrapper, UDFFactory, UDFType
 if TYPE_CHECKING:
-    import pandas as pd
     from sqlalchemy.sql.elements import ClauseElement
     from sqlalchemy.sql.schema import Table
     from sqlalchemy.sql.selectable import GenerativeSelect
@@ -547,8 +548,9 @@ class UDF(Step, ABC):
                 else:
                     udf = self.udf
-                if hasattr(udf.func, "bootstrap") and callable(udf.func.bootstrap):
-                    udf.func.bootstrap()
+                if hasattr(udf.func, "setup") and callable(udf.func.setup):
+                    udf.func.setup()
                 warehouse = self.catalog.warehouse
                 with contextlib.closing(
@@ -599,12 +601,15 @@ class UDF(Step, ABC):
         # Create a dynamic module with the generated name
         dynamic_module = types.ModuleType(feature_module_name)
         # Get the import lines for the necessary objects from the main module
-        import_lines = [
-            source.getimport(obj, alias=name)
-            for name, obj in inspect.getmembers(sys.modules["__main__"], _imports)
-            if not (name.startswith("__") and name.endswith("__"))
-        ]
         main_module = sys.modules["__main__"]
+        if getattr(main_module, "__file__", None):
+            import_lines = list(get_imports(main_module))
+        else:
+            import_lines = [
+                source.getimport(obj, alias=name)
+                for name, obj in main_module.__dict__.items()
+                if _imports(obj) and not (name.startswith("__") and name.endswith("__"))
+            ]
         # Get the feature classes from the main module
         feature_classes = {
@@ -612,6 +617,10 @@ class UDF(Step, ABC):
             for name, obj in main_module.__dict__.items()
             if _feature_predicate(obj)
         }
+        if not feature_classes:
+            yield None
+            return
         # Get the source code of the feature classes
         feature_sources = [source.getsource(cls) for _, cls in feature_classes.items()]
         # Set the module name for the feature classes to the generated name
@@ -621,7 +630,7 @@ class UDF(Step, ABC):
         # Add the dynamic module to the sys.modules dictionary
         sys.modules[feature_module_name] = dynamic_module
         # Combine the import lines and feature sources
-        feature_file = "".join(import_lines) + "\n".join(feature_sources)
+        feature_file = "\n".join(import_lines) + "\n" + "\n".join(feature_sources)
         # Write the module content to a .py file
         with open(f"{feature_module_name}.py", "w") as module_file:
@@ -1362,33 +1371,11 @@ class DatasetQuery:
             cols = result.columns
             return [dict(zip(cols, row)) for row in result]
-    @classmethod
-    def create_empty_record(
-        cls, name: Optional[str] = None, session: Optional[Session] = None
-    ) -> "DatasetRecord":
-        session = Session.get(session)
-        if name is None:
-            name = session.generate_temp_dataset_name()
-        columns = session.catalog.warehouse.dataset_row_cls.file_columns()
-        return session.catalog.create_dataset(name, columns=columns)
-    @classmethod
-    def insert_record(
-        cls,
-        dsr: "DatasetRecord",
-        record: dict[str, Any],
-        session: Optional[Session] = None,
-    ) -> None:
-        session = Session.get(session)
-        dr = session.catalog.warehouse.dataset_rows(dsr)
-        insert_q = dr.get_table().insert().values(**record)
-        session.catalog.warehouse.db.execute(insert_q)
     def to_pandas(self) -> "pd.DataFrame":
-        import pandas as pd
         records = self.to_records()
-        return pd.DataFrame.from_records(records)
+        df = pd.DataFrame.from_records(records)
+        df.columns = [c.replace(DEFAULT_DELIMITER, ".") for c in df.columns]
+        return df
     def shuffle(self) -> "Self":
         # ToDo: implement shaffle based on seed and/or generating random column
@@ -1410,8 +1397,17 @@ class DatasetQuery:
     def show(self, limit=20) -> None:
         df = self.limit(limit).to_pandas()
-        no_footer = re.sub(r"\n\[\d+ rows x \d+ columns\]$", "", str(df))
-        print(no_footer.rstrip(" \n"))
+        options = ["display.max_colwidth", 50, "display.show_dimensions", False]
+        with pd.option_context(*options):
+            if inside_notebook():
+                from IPython.display import display
+                display(df)
+            else:
+                print(df.to_string())
         if len(df) == limit:
             print(f"[limited by {limit} objects]")
@@ -1692,6 +1688,15 @@ class DatasetQuery:
                     storage.timestamp_str,
                 )
+    def exec(self) -> "Self":
+        """Execute the query."""
+        try:
+            query = self.clone()
+            query.apply_steps()
+        finally:
+            self.cleanup()
+        return query
     def save(
         self,
         name: Optional[str] = None,
@@ -1878,3 +1883,24 @@ def _feature_predicate(obj):
 def _imports(obj):
     return not source.isfrommain(obj)
+def get_imports(m):
+    root = ast.parse(inspect.getsource(m))
+    for node in ast.iter_child_nodes(root):
+        if isinstance(node, ast.Import):
+            module = None
+        elif isinstance(node, ast.ImportFrom):
+            module = node.module
+        else:
+            continue
+        for n in node.names:
+            import_script = ""
+            if module:
+                import_script += f"from {module} "
+            import_script += f"import {n.name}"
+            if n.asname:
+                import_script += f" as {n.asname}"
+            yield import_script

datachain/query/dispatch.py CHANGED Viewed

@@ -370,8 +370,8 @@ class UDFWorker:
         return WorkerCallback(self.done_queue)
     def run(self) -> None:
-        if hasattr(self.udf.func, "bootstrap") and callable(self.udf.func.bootstrap):
-            self.udf.func.bootstrap()
+        if hasattr(self.udf.func, "setup") and callable(self.udf.func.setup):
+            self.udf.func.setup()
         while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
             n_rows = len(batch.rows) if isinstance(batch, RowBatch) else 1
             udf_output = self.udf(

datachain/query/schema.py CHANGED Viewed

@@ -3,14 +3,12 @@ import json
 from abc import ABC, abstractmethod
 from datetime import datetime, timezone
 from fnmatch import fnmatch
-from random import getrandbits
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union
 import attrs
 import sqlalchemy as sa
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
-from datachain.data_storage.warehouse import RANDOM_BITS
 from datachain.sql.types import JSON, Boolean, DateTime, Int, Int64, SQLType, String
 if TYPE_CHECKING:
@@ -217,7 +215,7 @@ class DatasetRow:
         "source": String,
         "parent": String,
         "name": String,
-        "size": Int,
+        "size": Int64,
         "location": JSON,
         "vtype": String,
         "dir_type": Int,
@@ -227,8 +225,6 @@ class DatasetRow:
         "last_modified": DateTime,
         "version": String,
         "etag": String,
-        # system column
-        "random": Int64,
     }
     @staticmethod
@@ -267,8 +263,6 @@ class DatasetRow:
         last_modified = last_modified or datetime.now(timezone.utc)
-        random = getrandbits(RANDOM_BITS)
         return (  # type: ignore [return-value]
             source,
             parent,
@@ -283,7 +277,6 @@ class DatasetRow:
             last_modified,
             version,
             etag,
-            random,
         )
     @staticmethod

datachain/query/udf.py CHANGED Viewed

@@ -14,6 +14,7 @@ from typing import (
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from datachain.dataset import RowDict
+from datachain.lib.utils import AbstractUDF
 from .batch import Batch, BatchingStrategy, NoBatching, Partition, RowBatch
 from .schema import (
@@ -58,14 +59,6 @@ class UDFProperties:
     def signal_names(self) -> Iterable[str]:
         return self.output.keys()
-    def parameter_parser(self) -> Callable:
-        """Generate a parameter list from a dataset row."""
-        def plist(catalog: "Catalog", row: "RowDict", **kwargs) -> list:
-            return [p.get_value(catalog, row, **kwargs) for p in self.params]
-        return plist
 def udf(
     params: Sequence[UDFParamSpec],
@@ -113,32 +106,37 @@ class UDFBase:
         self.func = func
         self.properties = properties
         self.signal_names = properties.signal_names()
-        self.parameter_parser = properties.parameter_parser()
         self.output = properties.output
     def __call__(
         self,
         catalog: "Catalog",
-        param: "BatchingResult",
+        arg: "BatchingResult",
         is_generator: bool = False,
         cache: bool = False,
         cb: Callback = DEFAULT_CALLBACK,
     ) -> Iterable[UDFResult]:
-        if isinstance(param, RowBatch):
+        if isinstance(self.func, AbstractUDF):
+            self.func._catalog = catalog  # type: ignore[unreachable]
+        if isinstance(arg, RowBatch):
             udf_inputs = [
-                self.parameter_parser(catalog, row, cache=cache, cb=cb)
-                for row in param.rows
+                self.bind_parameters(catalog, row, cache=cache, cb=cb)
+                for row in arg.rows
             ]
             udf_outputs = self.func(udf_inputs)
-            return self._process_results(param.rows, udf_outputs, is_generator)
-        if isinstance(param, RowDict):
-            udf_inputs = self.parameter_parser(catalog, param, cache=cache, cb=cb)
+            return self._process_results(arg.rows, udf_outputs, is_generator)
+        if isinstance(arg, RowDict):
+            udf_inputs = self.bind_parameters(catalog, arg, cache=cache, cb=cb)
             udf_outputs = self.func(*udf_inputs)
             if not is_generator:
                 # udf_outputs is generator already if is_generator=True
                 udf_outputs = [udf_outputs]
-            return self._process_results([param], udf_outputs, is_generator)
-        raise ValueError(f"unexpected UDF parameter {param}")
+            return self._process_results([arg], udf_outputs, is_generator)
+        raise ValueError(f"Unexpected UDF argument: {arg}")
+    def bind_parameters(self, catalog: "Catalog", row: "RowDict", **kwargs) -> list:
+        return [p.get_value(catalog, row, **kwargs) for p in self.properties.params]
     def _process_results(
         self,

datachain 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

datachain 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl