PyPI - datachain - Versions diffs - 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

datachain/__init__.py +20 -0
datachain/asyn.py +11 -12
datachain/cache.py +7 -7
datachain/catalog/__init__.py +2 -2
datachain/catalog/catalog.py +621 -507
datachain/catalog/dependency.py +164 -0
datachain/catalog/loader.py +28 -18
datachain/checkpoint.py +43 -0
datachain/cli/__init__.py +24 -33
datachain/cli/commands/__init__.py +1 -8
datachain/cli/commands/datasets.py +83 -52
datachain/cli/commands/ls.py +17 -17
datachain/cli/commands/show.py +4 -4
datachain/cli/parser/__init__.py +8 -74
datachain/cli/parser/job.py +95 -3
datachain/cli/parser/studio.py +11 -4
datachain/cli/parser/utils.py +1 -2
datachain/cli/utils.py +2 -15
datachain/client/azure.py +4 -4
datachain/client/fsspec.py +45 -28
datachain/client/gcs.py +6 -6
datachain/client/hf.py +29 -2
datachain/client/http.py +157 -0
datachain/client/local.py +15 -11
datachain/client/s3.py +17 -9
datachain/config.py +4 -8
datachain/data_storage/db_engine.py +12 -6
datachain/data_storage/job.py +5 -1
datachain/data_storage/metastore.py +1252 -186
datachain/data_storage/schema.py +58 -45
datachain/data_storage/serializer.py +105 -15
datachain/data_storage/sqlite.py +286 -127
datachain/data_storage/warehouse.py +250 -113
datachain/dataset.py +353 -148
datachain/delta.py +391 -0
datachain/diff/__init__.py +27 -29
datachain/error.py +60 -0
datachain/func/__init__.py +2 -1
datachain/func/aggregate.py +66 -42
datachain/func/array.py +242 -38
datachain/func/base.py +7 -4
datachain/func/conditional.py +110 -60
datachain/func/func.py +96 -45
datachain/func/numeric.py +55 -38
datachain/func/path.py +32 -20
datachain/func/random.py +2 -2
datachain/func/string.py +67 -37
datachain/func/window.py +7 -8
datachain/hash_utils.py +123 -0
datachain/job.py +11 -7
datachain/json.py +138 -0
datachain/lib/arrow.py +58 -22
datachain/lib/audio.py +245 -0
datachain/lib/clip.py +14 -13
datachain/lib/convert/flatten.py +5 -3
datachain/lib/convert/python_to_sql.py +6 -10
datachain/lib/convert/sql_to_python.py +8 -0
datachain/lib/convert/values_to_tuples.py +156 -51
datachain/lib/data_model.py +42 -20
datachain/lib/dataset_info.py +36 -8
datachain/lib/dc/__init__.py +8 -2
datachain/lib/dc/csv.py +25 -28
datachain/lib/dc/database.py +398 -0
datachain/lib/dc/datachain.py +1289 -425
datachain/lib/dc/datasets.py +320 -38
datachain/lib/dc/hf.py +38 -24
datachain/lib/dc/json.py +29 -32
datachain/lib/dc/listings.py +112 -8
datachain/lib/dc/pandas.py +16 -12
datachain/lib/dc/parquet.py +35 -23
datachain/lib/dc/records.py +31 -23
datachain/lib/dc/storage.py +154 -64
datachain/lib/dc/storage_pattern.py +251 -0
datachain/lib/dc/utils.py +24 -16
datachain/lib/dc/values.py +8 -9
datachain/lib/file.py +622 -89
datachain/lib/hf.py +69 -39
datachain/lib/image.py +14 -14
datachain/lib/listing.py +14 -11
datachain/lib/listing_info.py +1 -2
datachain/lib/meta_formats.py +3 -4
datachain/lib/model_store.py +39 -7
datachain/lib/namespaces.py +125 -0
datachain/lib/projects.py +130 -0
datachain/lib/pytorch.py +32 -21
datachain/lib/settings.py +192 -56
datachain/lib/signal_schema.py +427 -104
datachain/lib/tar.py +1 -2
datachain/lib/text.py +8 -7
datachain/lib/udf.py +164 -76
datachain/lib/udf_signature.py +60 -35
datachain/lib/utils.py +118 -4
datachain/lib/video.py +17 -9
datachain/lib/webdataset.py +61 -56
datachain/lib/webdataset_laion.py +15 -16
datachain/listing.py +22 -10
datachain/model/bbox.py +3 -1
datachain/model/ultralytics/bbox.py +16 -12
datachain/model/ultralytics/pose.py +16 -12
datachain/model/ultralytics/segment.py +16 -12
datachain/namespace.py +84 -0
datachain/node.py +6 -6
datachain/nodes_thread_pool.py +0 -1
datachain/plugins.py +24 -0
datachain/project.py +78 -0
datachain/query/batch.py +40 -41
datachain/query/dataset.py +604 -322
datachain/query/dispatch.py +261 -154
datachain/query/metrics.py +4 -6
datachain/query/params.py +2 -3
datachain/query/queue.py +3 -12
datachain/query/schema.py +11 -6
datachain/query/session.py +200 -33
datachain/query/udf.py +34 -2
datachain/remote/studio.py +171 -69
datachain/script_meta.py +12 -12
datachain/semver.py +68 -0
datachain/sql/__init__.py +2 -0
datachain/sql/functions/array.py +33 -1
datachain/sql/postgresql_dialect.py +9 -0
datachain/sql/postgresql_types.py +21 -0
datachain/sql/sqlite/__init__.py +5 -1
datachain/sql/sqlite/base.py +102 -29
datachain/sql/sqlite/types.py +8 -13
datachain/sql/types.py +70 -15
datachain/studio.py +223 -46
datachain/toolkit/split.py +31 -10
datachain/utils.py +101 -59
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/METADATA +77 -22
datachain-0.39.0.dist-info/RECORD +173 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/WHEEL +1 -1
datachain/cli/commands/query.py +0 -53
datachain/query/utils.py +0 -42
datachain-0.14.2.dist-info/RECORD +0 -158
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0

datachain/lib/convert/sql_to_python.py CHANGED Viewed

@@ -9,6 +9,14 @@ def sql_to_python(sql_exp: ColumnElement) -> Any:
         type_ = sql_exp.type.python_type
         if type_ == Decimal:
             type_ = float
+        elif type_ is list:
+            if hasattr(sql_exp.type, "item_type") and hasattr(
+                sql_exp.type.item_type, "python_type"
+            ):
+                item_type = getattr(sql_exp.type.item_type, "python_type", Any)
+                type_ = list[item_type]  # type: ignore[valid-type]
+            else:
+                type_ = list
     except NotImplementedError:
         type_ = str
     return type_

datachain/lib/convert/values_to_tuples.py CHANGED Viewed

@@ -1,62 +1,177 @@
+import itertools
 from collections.abc import Sequence
-from typing import Any, Union
-from datachain.lib.data_model import (
-    DataType,
-    DataTypeNames,
-    DataValue,
-    is_chain_type,
-)
+from typing import Any
+from datachain.lib.data_model import DataType, DataTypeNames, DataValue, is_chain_type
 from datachain.lib.utils import DataChainParamsError
 class ValuesToTupleError(DataChainParamsError):
     def __init__(self, ds_name: str, msg: str):
+        self.ds_name = ds_name
+        self.msg = msg
         if ds_name:
             ds_name = f"' {ds_name}'"
         super().__init__(f"Cannot convert signals for dataset{ds_name}: {msg}")
+    def __reduce__(self):
+        return ValuesToTupleError, (self.ds_name, self.msg)
-def values_to_tuples(  # noqa: C901, PLR0912
-    ds_name: str = "",
-    output: Union[None, DataType, Sequence[str], dict[str, DataType]] = None,
-    **fr_map: Sequence[DataValue],
-) -> tuple[Any, Any, Any]:
-    if output:
-        if not isinstance(output, (Sequence, str, dict)):
-            if len(fr_map) != 1:
-                raise ValuesToTupleError(
-                    ds_name,
-                    f"only one output type was specified, {len(fr_map)} expected",
-                )
-            if not isinstance(output, type):
-                raise ValuesToTupleError(
-                    ds_name,
-                    f"output must specify a type while '{output}' was given",
-                )
-            key: str = next(iter(fr_map.keys()))
-            output = {key: output}  # type: ignore[dict-item]
+def _find_first_non_none(sequence: Sequence[Any]) -> Any | None:
+    """Find the first non-None element in a sequence."""
+    try:
+        return next(itertools.dropwhile(lambda i: i is None, sequence))
+    except StopIteration:
+        return None
+def _infer_list_item_type(lst: list) -> type:
+    """Infer the item type of a list, handling None values and nested lists."""
+    if len(lst) == 0:
+        # Default to str when list is empty to avoid generic list
+        return str
+    first_item = _find_first_non_none(lst)
+    if first_item is None:
+        # Default to str when all items are None
+        return str
-        if not isinstance(output, dict):
+    item_type = type(first_item)
+    # Handle nested lists one level deep
+    if isinstance(first_item, list) and len(first_item) > 0:
+        nested_item = _find_first_non_none(first_item)
+        if nested_item is not None:
+            return list[type(nested_item)]  # type: ignore[misc, return-value]
+        # Default to str for nested lists with all None
+        return list[str]  # type: ignore[return-value]
+    return item_type
+def _infer_dict_value_type(dct: dict) -> type:
+    """Infer the value type of a dict, handling None values and list values."""
+    if len(dct) == 0:
+        # Default to str when dict is empty to avoid generic dict values
+        return str
+    # Find first non-None value
+    first_value = None
+    for val in dct.values():
+        if val is not None:
+            first_value = val
+            break
+    if first_value is None:
+        # Default to str when all values are None
+        return str
+    # Handle list values
+    if isinstance(first_value, list) and len(first_value) > 0:
+        list_item = _find_first_non_none(first_value)
+        if list_item is not None:
+            return list[type(list_item)]  # type: ignore[misc, return-value]
+        # Default to str for lists with all None
+        return list[str]  # type: ignore[return-value]
+    return type(first_value)
+def _infer_type_from_sequence(
+    sequence: Sequence[DataValue], signal_name: str, ds_name: str
+) -> type:
+    """
+    Infer the type from a sequence of values.
+    Returns str if all values are None, otherwise infers from the first non-None value.
+    Handles lists and dicts with proper type inference for nested structures.
+    """
+    first_element = _find_first_non_none(sequence)
+    if first_element is None:
+        # Default to str if column is empty or all values are None
+        return str
+    typ = type(first_element)
+    if not is_chain_type(typ):
+        raise ValuesToTupleError(
+            ds_name,
+            f"signal '{signal_name}' has unsupported type '{typ.__name__}'."
+            f" Please use DataModel types: {DataTypeNames}",
+        )
+    if isinstance(first_element, list):
+        item_type = _infer_list_item_type(first_element)
+        return list[item_type]  # type: ignore[valid-type, return-value]
+    if isinstance(first_element, dict):
+        # If the first dict is empty, use str as default key/value types
+        if len(first_element) == 0:
+            return dict[str, str]  # type: ignore[return-value]
+        first_key = next(iter(first_element.keys()))
+        value_type = _infer_dict_value_type(first_element)
+        return dict[type(first_key), value_type]  # type: ignore[misc, return-value]
+    return typ
+def _validate_and_normalize_output(
+    output: DataType | Sequence[str] | dict[str, DataType] | None,
+    fr_map: dict[str, Sequence[DataValue]],
+    ds_name: str,
+) -> dict[str, DataType] | None:
+    """Validate and normalize the output parameter to a dict format."""
+    if not output:
+        return None
+    if not isinstance(output, (Sequence, str, dict)):
+        if len(fr_map) != 1:
             raise ValuesToTupleError(
                 ds_name,
-                "output type must be dict[str, DataType] while "
-                f"'{type(output).__name__}' is given",
+                f"only one output type was specified, {len(fr_map)} expected",
             )
-        if len(output) != len(fr_map):
+        if not isinstance(output, type):
             raise ValuesToTupleError(
                 ds_name,
-                f"number of outputs '{len(output)}' should match"
-                f" number of signals '{len(fr_map)}'",
+                f"output must specify a type while '{output}' was given",
             )
+        key: str = next(iter(fr_map.keys()))
+        return {key: output}  # type: ignore[dict-item]
+    if not isinstance(output, dict):
+        raise ValuesToTupleError(
+            ds_name,
+            "output type must be dict[str, DataType] while "
+            f"'{type(output).__name__}' is given",
+        )
+    if len(output) != len(fr_map):
+        raise ValuesToTupleError(
+            ds_name,
+            f"number of outputs '{len(output)}' should match"
+            f" number of signals '{len(fr_map)}'",
+        )
+    return output  # type: ignore[return-value]
+def values_to_tuples(
+    ds_name: str = "",
+    output: DataType | Sequence[str] | dict[str, DataType] | None = None,
+    **fr_map: Sequence[DataValue],
+) -> tuple[Any, Any, Any]:
+    output = _validate_and_normalize_output(output, fr_map, ds_name)
     types_map: dict[str, type] = {}
     length = -1
     for k, v in fr_map.items():
         if not isinstance(v, Sequence) or isinstance(v, str):  # type: ignore[unreachable]
-            raise ValuesToTupleError(ds_name, f"signals '{k}' is not a sequence")
+            raise ValuesToTupleError(ds_name, f"signal '{k}' is not a sequence")
         len_ = len(v)
         if output:
@@ -66,21 +181,11 @@ def values_to_tuples(  # noqa: C901, PLR0912
                     f"signal '{k}' is not present in the output",
                 )
         else:
-            if len_ == 0:
-                raise ValuesToTupleError(ds_name, f"signal '{k}' is empty list")
-            first_element = next(iter(v))
-            typ = type(first_element)
-            if not is_chain_type(typ):
-                raise ValuesToTupleError(
-                    ds_name,
-                    f"signal '{k}' has unsupported type '{typ.__name__}'."
-                    f" Please use DataModel types: {DataTypeNames}",
-                )
-            if isinstance(first_element, list):
-                types_map[k] = list[type(first_element[0])]  # type: ignore[assignment, misc]
-            else:
-                types_map[k] = typ
+            # FIXME: Stops as soon as it finds the first non-None value.
+            # If a non-None value appears early, it won't check the remaining items for
+            # `None` values.
+            typ = _infer_type_from_sequence(v, k, ds_name)
+            types_map[k] = typ
         if length < 0:
             length = len_
@@ -104,7 +209,7 @@ def values_to_tuples(  # noqa: C901, PLR0912
     if len(output) > 1:  # type: ignore[arg-type]
         tuple_type = tuple(output_types)
         res_type = tuple[tuple_type]  # type: ignore[valid-type]
-        res_values: Sequence[Any] = list(zip(*fr_map.values()))
+        res_values: Sequence[Any] = list(zip(*fr_map.values(), strict=False))
     else:
         res_type = output_types[0]  # type: ignore[misc]
         res_values = next(iter(fr_map.values()))

datachain/lib/data_model.py CHANGED Viewed

@@ -1,25 +1,29 @@
+import inspect
+import types
+import uuid
 from collections.abc import Sequence
 from datetime import datetime
-from typing import ClassVar, Optional, Union, get_args, get_origin
+from typing import ClassVar, Union, get_args, get_origin
 from pydantic import AliasChoices, BaseModel, Field, create_model
+from pydantic.fields import FieldInfo
 from datachain.lib.model_store import ModelStore
 from datachain.lib.utils import normalize_col_names
-StandardType = Union[
-    type[int],
-    type[str],
-    type[float],
-    type[bool],
-    type[list],
-    type[dict],
-    type[bytes],
-    type[datetime],
-]
-DataType = Union[type[BaseModel], StandardType]
+StandardType = (
+    type[int]
+    | type[str]
+    | type[float]
+    | type[bool]
+    | type[list]
+    | type[dict]
+    | type[bytes]
+    | type[datetime]
+)
+DataType = type[BaseModel] | StandardType
 DataTypeNames = "BaseModel, int, str, float, bool, list, dict, bytes, datetime"
-DataValue = Union[BaseModel, int, str, float, bool, list, dict, bytes, datetime]
+DataValue = BaseModel | int | str | float | bool | list | dict | bytes | datetime
 class DataModel(BaseModel):
@@ -34,7 +38,7 @@ class DataModel(BaseModel):
         ModelStore.register(cls)
     @staticmethod
-    def register(models: Union[DataType, Sequence[DataType]]):
+    def register(models: DataType | Sequence[DataType]):
         """For registering classes manually. It accepts a single class or a sequence of
         classes."""
         if not isinstance(models, Sequence):
@@ -60,8 +64,11 @@ def is_chain_type(t: type) -> bool:
     if orig is list and len(args) == 1:
         return is_chain_type(get_args(t)[0])
-    if orig is Union and len(args) == 2 and (type(None) in args):
-        return is_chain_type(args[0])
+    if orig is dict and len(args) == 2:
+        return is_chain_type(args[0]) and is_chain_type(args[1])
+    if orig in (Union, types.UnionType) and len(args) == 2 and (type(None) in args):
+        return is_chain_type(args[0] if args[1] is type(None) else args[1])
     return False
@@ -69,17 +76,19 @@ def is_chain_type(t: type) -> bool:
 def dict_to_data_model(
     name: str,
     data_dict: dict[str, DataType],
-    original_names: Optional[list[str]] = None,
+    original_names: list[str] | None = None,
 ) -> type[BaseModel]:
     if not original_names:
         # Gets a map of a normalized_name -> original_name
         columns = normalize_col_names(list(data_dict))
-        data_dict = dict(zip(columns.keys(), data_dict.values()))
+        data_dict = dict(zip(columns.keys(), data_dict.values(), strict=False))
         original_names = list(columns.values())
     fields = {
         name: (
-            anno,
+            anno
+            if inspect.isclass(anno) and issubclass(anno, BaseModel)
+            else anno | None,
             Field(
                 validation_alias=AliasChoices(name, original_names[idx] or name),
                 default=None,
@@ -89,7 +98,20 @@ def dict_to_data_model(
     }
     class _DataModelStrict(BaseModel, extra="forbid"):
-        pass
+        @classmethod
+        def _model_fields_by_aliases(cls) -> dict[str, tuple[str, FieldInfo]]:
+            """Returns a map of aliases to original field names and info."""
+            field_info = {}
+            for _name, field in cls.model_fields.items():
+                assert isinstance(field.validation_alias, AliasChoices)
+                # Add mapping for all aliases (both normalized and original names)
+                for alias in field.validation_alias.choices:
+                    field_info[str(alias)] = (_name, field)
+            return field_info
+    # Generate random unique name if not provided
+    if not name:
+        name = f"DataModel_{uuid.uuid4().hex[:8]}"
     return create_model(
         name,

datachain/lib/dataset_info.py CHANGED Viewed

@@ -1,17 +1,19 @@
-import json
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any
 from uuid import uuid4
 from pydantic import Field, field_validator
+from datachain import json
 from datachain.dataset import (
+    DEFAULT_DATASET_VERSION,
     DatasetListRecord,
     DatasetListVersion,
     DatasetStatus,
 )
 from datachain.job import Job
 from datachain.lib.data_model import DataModel
+from datachain.query.session import Session
 from datachain.utils import TIME_ZERO
 if TYPE_CHECKING:
@@ -20,21 +22,44 @@ if TYPE_CHECKING:
 class DatasetInfo(DataModel):
     name: str
+    namespace: str
+    project: str
     uuid: str = Field(default=str(uuid4()))
-    version: int = Field(default=1)
+    version: str = Field(default=DEFAULT_DATASET_VERSION)
     status: int = Field(default=DatasetStatus.CREATED)
     created_at: datetime = Field(default=TIME_ZERO)
-    finished_at: Optional[datetime] = Field(default=None)
-    num_objects: Optional[int] = Field(default=None)
-    size: Optional[int] = Field(default=None)
+    finished_at: datetime | None = Field(default=None)
+    num_objects: int | None = Field(default=None)
+    size: int | None = Field(default=None)
     params: dict[str, str] = Field(default={})
     metrics: dict[str, Any] = Field(default={})
     error_message: str = Field(default="")
     error_stack: str = Field(default="")
+    attrs: list[str] = Field(default=[])
+    @property
+    def is_temp(self) -> bool:
+        return Session.is_temp_dataset(self.name)
+    def has_attr(self, attr: str) -> bool:
+        s = attr.split("=")
+        if len(s) == 1:
+            return attr in self.attrs
+        name = s[0]
+        value = s[1]
+        for a in self.attrs:
+            s = a.split("=")
+            if value == "*" and s[0] == name:
+                return True
+            if len(s) == 2 and s[0] == name and s[1] == value:
+                return True
+        return False
     @staticmethod
     def _validate_dict(
-        v: Optional[Union[str, dict]],
+        v: str | dict | None,
     ) -> dict:
         if v is None or v == "":
             return {}
@@ -63,11 +88,13 @@ class DatasetInfo(DataModel):
         cls,
         dataset: DatasetListRecord,
         version: DatasetListVersion,
-        job: Optional[Job],
+        job: Job | None,
     ) -> "Self":
         return cls(
             uuid=version.uuid,
             name=dataset.name,
+            namespace=dataset.project.namespace.name,
+            project=dataset.project.name,
             version=version.version,
             status=version.status,
             created_at=version.created_at,
@@ -78,4 +105,5 @@ class DatasetInfo(DataModel):
             metrics=job.metrics if job else {},
             error_message=version.error_message,
             error_stack=version.error_stack,
+            attrs=dataset.attrs,
         )

datachain/lib/dc/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .csv import read_csv
+from .database import read_database
 from .datachain import C, Column, DataChain
-from .datasets import datasets, read_dataset
+from .datasets import datasets, delete_dataset, move_dataset, read_dataset
 from .hf import read_hf
 from .json import read_json
 from .listings import listings
@@ -8,7 +9,7 @@ from .pandas import read_pandas
 from .parquet import read_parquet
 from .records import read_records
 from .storage import read_storage
-from .utils import DatasetMergeError, DatasetPrepareError, Sys
+from .utils import DatasetMergeError, DatasetPrepareError, Sys, is_local, is_studio
 from .values import read_values
 __all__ = [
@@ -19,8 +20,13 @@ __all__ = [
     "DatasetPrepareError",
     "Sys",
     "datasets",
+    "delete_dataset",
+    "is_local",
+    "is_studio",
     "listings",
+    "move_dataset",
     "read_csv",
+    "read_database",
     "read_dataset",
     "read_hf",
     "read_json",

datachain/lib/dc/csv.py CHANGED Viewed

@@ -1,10 +1,6 @@
-from collections.abc import Sequence
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Optional,
-    Union,
-)
+import os
+from collections.abc import Callable, Sequence
+from typing import TYPE_CHECKING
 from datachain.lib.dc.utils import DatasetPrepareError, OutputType
 from datachain.lib.model_store import ModelStore
@@ -17,38 +13,38 @@ if TYPE_CHECKING:
 def read_csv(
-    path,
-    delimiter: Optional[str] = None,
+    path: str | os.PathLike[str] | list[str] | list[os.PathLike[str]],
+    delimiter: str | None = None,
     header: bool = True,
     output: OutputType = None,
-    object_name: str = "",
+    column: str = "",
     model_name: str = "",
     source: bool = True,
-    nrows=None,
-    session: Optional[Session] = None,
-    settings: Optional[dict] = None,
-    column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
-    parse_options: Optional[dict[str, "Union[str, Union[bool, Callable]]"]] = None,
+    nrows: int | None = None,
+    session: Session | None = None,
+    settings: dict | None = None,
+    column_types: dict[str, "str | ArrowDataType"] | None = None,
+    parse_options: dict[str, str | bool | Callable] | None = None,
     **kwargs,
 ) -> "DataChain":
     """Generate chain from csv files.
     Parameters:
-        path : Storage URI with directory. URI must start with storage prefix such
+        path: Storage URI with directory. URI must start with storage prefix such
             as `s3://`, `gs://`, `az://` or "file:///".
-        delimiter : Character for delimiting columns. Takes precedence if also
+        delimiter: Character for delimiting columns. Takes precedence if also
             specified in `parse_options`. Defaults to ",".
-        header : Whether the files include a header row.
-        output : Dictionary or feature class defining column names and their
+        header: Whether the files include a header row.
+        output: Dictionary or feature class defining column names and their
             corresponding types. List of column names is also accepted, in which
             case types will be inferred.
-        object_name : Created object column name.
-        model_name : Generated model name.
-        source : Whether to include info about the source file.
-        nrows : Optional row limit.
-        session : Session to use for the chain.
-        settings : Settings to use for the chain.
-        column_types : Dictionary of column names and their corresponding types.
+        column: Created column name.
+        model_name: Generated model name.
+        source: Whether to include info about the source file.
+        nrows: Optional row limit.
+        session: Session to use for the chain.
+        settings: Settings to use for the chain.
+        column_types: Dictionary of column names and their corresponding types.
             It is passed to CSV reader and for each column specified type auto
             inference is disabled.
         parse_options: Tells the parser how to process lines.
@@ -67,7 +63,7 @@ def read_csv(
         chain = dc.read_csv("s3://mybucket/dir")
         ```
     """
-    from pandas.io.parsers.readers import STR_NA_VALUES
+    from pandas._libs.parsers import STR_NA_VALUES
     from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions
     from pyarrow.dataset import CsvFileFormat
     from pyarrow.lib import type_for_alias
@@ -119,9 +115,10 @@ def read_csv(
     )
     return chain.parse_tabular(
         output=output,
-        object_name=object_name,
+        column=column,
         model_name=model_name,
         source=source,
         nrows=nrows,
         format=format,
+        parse_options=parse_options,
     )

datachain 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl