PyPI - datachain - Versions diffs - 0.30.5__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

datachain 0.30.5py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

datachain/__init__.py +4 -0
datachain/asyn.py +11 -12
datachain/cache.py +5 -5
datachain/catalog/__init__.py +0 -2
datachain/catalog/catalog.py +276 -354
datachain/catalog/dependency.py +164 -0
datachain/catalog/loader.py +8 -3
datachain/checkpoint.py +43 -0
datachain/cli/__init__.py +10 -17
datachain/cli/commands/__init__.py +1 -8
datachain/cli/commands/datasets.py +42 -27
datachain/cli/commands/ls.py +15 -15
datachain/cli/commands/show.py +2 -2
datachain/cli/parser/__init__.py +3 -43
datachain/cli/parser/job.py +1 -1
datachain/cli/parser/utils.py +1 -2
datachain/cli/utils.py +2 -15
datachain/client/azure.py +2 -2
datachain/client/fsspec.py +34 -23
datachain/client/gcs.py +3 -3
datachain/client/http.py +157 -0
datachain/client/local.py +11 -7
datachain/client/s3.py +3 -3
datachain/config.py +4 -8
datachain/data_storage/db_engine.py +12 -6
datachain/data_storage/job.py +2 -0
datachain/data_storage/metastore.py +716 -137
datachain/data_storage/schema.py +20 -27
datachain/data_storage/serializer.py +105 -15
datachain/data_storage/sqlite.py +114 -114
datachain/data_storage/warehouse.py +140 -48
datachain/dataset.py +109 -89
datachain/delta.py +117 -42
datachain/diff/__init__.py +25 -33
datachain/error.py +24 -0
datachain/func/aggregate.py +9 -11
datachain/func/array.py +12 -12
datachain/func/base.py +7 -4
datachain/func/conditional.py +9 -13
datachain/func/func.py +63 -45
datachain/func/numeric.py +5 -7
datachain/func/string.py +2 -2
datachain/hash_utils.py +123 -0
datachain/job.py +11 -7
datachain/json.py +138 -0
datachain/lib/arrow.py +18 -15
datachain/lib/audio.py +60 -59
datachain/lib/clip.py +14 -13
datachain/lib/convert/python_to_sql.py +6 -10
datachain/lib/convert/values_to_tuples.py +151 -53
datachain/lib/data_model.py +23 -19
datachain/lib/dataset_info.py +7 -7
datachain/lib/dc/__init__.py +2 -1
datachain/lib/dc/csv.py +22 -26
datachain/lib/dc/database.py +37 -34
datachain/lib/dc/datachain.py +518 -324
datachain/lib/dc/datasets.py +38 -30
datachain/lib/dc/hf.py +16 -20
datachain/lib/dc/json.py +17 -18
datachain/lib/dc/listings.py +5 -8
datachain/lib/dc/pandas.py +3 -6
datachain/lib/dc/parquet.py +33 -21
datachain/lib/dc/records.py +9 -13
datachain/lib/dc/storage.py +103 -65
datachain/lib/dc/storage_pattern.py +251 -0
datachain/lib/dc/utils.py +17 -14
datachain/lib/dc/values.py +3 -6
datachain/lib/file.py +187 -50
datachain/lib/hf.py +7 -5
datachain/lib/image.py +13 -13
datachain/lib/listing.py +5 -5
datachain/lib/listing_info.py +1 -2
datachain/lib/meta_formats.py +2 -3
datachain/lib/model_store.py +20 -8
datachain/lib/namespaces.py +59 -7
datachain/lib/projects.py +51 -9
datachain/lib/pytorch.py +31 -23
datachain/lib/settings.py +188 -85
datachain/lib/signal_schema.py +302 -64
datachain/lib/text.py +8 -7
datachain/lib/udf.py +103 -63
datachain/lib/udf_signature.py +59 -34
datachain/lib/utils.py +20 -0
datachain/lib/video.py +3 -4
datachain/lib/webdataset.py +31 -36
datachain/lib/webdataset_laion.py +15 -16
datachain/listing.py +12 -5
datachain/model/bbox.py +3 -1
datachain/namespace.py +22 -3
datachain/node.py +6 -6
datachain/nodes_thread_pool.py +0 -1
datachain/plugins.py +24 -0
datachain/project.py +4 -4
datachain/query/batch.py +10 -12
datachain/query/dataset.py +376 -194
datachain/query/dispatch.py +112 -84
datachain/query/metrics.py +3 -4
datachain/query/params.py +2 -3
datachain/query/queue.py +2 -1
datachain/query/schema.py +7 -6
datachain/query/session.py +190 -33
datachain/query/udf.py +9 -6
datachain/remote/studio.py +90 -53
datachain/script_meta.py +12 -12
datachain/sql/sqlite/base.py +37 -25
datachain/sql/sqlite/types.py +1 -1
datachain/sql/types.py +36 -5
datachain/studio.py +49 -40
datachain/toolkit/split.py +31 -10
datachain/utils.py +39 -48
{datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/METADATA +26 -38
datachain-0.39.0.dist-info/RECORD +173 -0
datachain/cli/commands/query.py +0 -54
datachain/query/utils.py +0 -36
datachain-0.30.5.dist-info/RECORD +0 -168
{datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/WHEEL +0 -0
{datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
{datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0

datachain/lib/dc/storage.py CHANGED Viewed

@@ -1,22 +1,17 @@
-import os.path
+import os
 from collections.abc import Sequence
 from functools import reduce
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-    Union,
-)
+from typing import TYPE_CHECKING
-from datachain.lib.file import (
-    FileType,
-    get_file_type,
-)
-from datachain.lib.listing import (
-    get_file_info,
-    get_listing,
-    list_bucket,
-    ls,
+from datachain.lib.dc.storage_pattern import (
+    apply_glob_filter,
+    expand_brace_pattern,
+    should_use_recursion,
+    split_uri_pattern,
+    validate_cloud_bucket_name,
 )
+from datachain.lib.file import FileType, get_file_type
+from datachain.lib.listing import get_file_info, get_listing, list_bucket, ls
 from datachain.query import Session
 if TYPE_CHECKING:
@@ -24,40 +19,46 @@ if TYPE_CHECKING:
 def read_storage(
-    uri: Union[str, os.PathLike[str], list[str], list[os.PathLike[str]]],
+    uri: str | os.PathLike[str] | list[str] | list[os.PathLike[str]],
     *,
     type: FileType = "binary",
-    session: Optional[Session] = None,
-    settings: Optional[dict] = None,
+    session: Session | None = None,
+    settings: dict | None = None,
     in_memory: bool = False,
-    recursive: Optional[bool] = True,
+    recursive: bool | None = True,
     column: str = "file",
     update: bool = False,
-    anon: Optional[bool] = None,
-    delta: Optional[bool] = False,
-    delta_on: Optional[Union[str, Sequence[str]]] = (
+    anon: bool | None = None,
+    delta: bool | None = False,
+    delta_on: str | Sequence[str] | None = (
         "file.path",
         "file.etag",
         "file.version",
     ),
-    delta_result_on: Optional[Union[str, Sequence[str]]] = None,
-    delta_compare: Optional[Union[str, Sequence[str]]] = None,
-    delta_retry: Optional[Union[bool, str]] = None,
-    client_config: Optional[dict] = None,
+    delta_result_on: str | Sequence[str] | None = None,
+    delta_compare: str | Sequence[str] | None = None,
+    delta_retry: bool | str | None = None,
+    delta_unsafe: bool = False,
+    client_config: dict | None = None,
 ) -> "DataChain":
     """Get data from storage(s) as a list of file with all file attributes.
     It returns the chain itself as usual.
     Parameters:
-        uri : storage URI with directory or list of URIs.
-            URIs must start with storage prefix such
-            as `s3://`, `gs://`, `az://` or "file:///"
-        type : read file as "binary", "text", or "image" data. Default is "binary".
-        recursive : search recursively for the given path.
-        column : Created column name.
-        update : force storage reindexing. Default is False.
-        anon : If True, we will treat cloud bucket as public one
-        client_config : Optional client configuration for the storage client.
+        uri: Storage path(s) or URI(s). Can be a local path or start with a
+            storage prefix like `s3://`, `gs://`, `az://`, `hf://` or "file:///".
+            Supports glob patterns:
+              - `*` : wildcard
+              - `**` : recursive wildcard
+              - `?` : single character
+              - `{a,b}` : brace expansion list
+              - `{1..9}` : brace numeric or alphabetic range
+        type: read file as "binary", "text", or "image" data. Default is "binary".
+        recursive: search recursively for the given path.
+        column: Column name that will contain File objects. Default is "file".
+        update: force storage reindexing. Default is False.
+        anon: If True, we will treat cloud bucket as public one.
+        client_config: Optional client configuration for the storage client.
         delta: If True, only process new or changed files instead of reprocessing
             everything. This saves time by skipping files that were already processed in
             previous versions. The optimization is working when a new version of the
@@ -77,6 +78,9 @@ def read_storage(
               (error mode)
             - True: Reprocess records missing from the result dataset (missing mode)
             - None: No retry processing (default)
+        delta_unsafe: Allow restricted ops in delta: merge, agg, union, group_by,
+            distinct. Caller must ensure datasets are consistent and not partially
+            updated.
     Returns:
         DataChain: A DataChain object containing the file information.
@@ -85,37 +89,36 @@ def read_storage(
         Simple call from s3:
         ```python
         import datachain as dc
-        chain = dc.read_storage("s3://my-bucket/my-dir")
+        dc.read_storage("s3://my-bucket/my-dir")
+        ```
+        Match all .json files recursively using glob pattern
+        ```py
+        dc.read_storage("gs://bucket/meta/**/*.json")
+        ```
+        Match image file extensions for directories with pattern
+        ```py
+        dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
+        ```
+        By ranges in filenames:
+        ```py
+        dc.read_storage("s3://bucket/202{1..4}/**/*.{jpg,jpeg,png}")
         ```
         Multiple URIs:
         ```python
-        chain = dc.read_storage([
-            "s3://bucket1/dir1",
-            "s3://bucket2/dir2"
-        ])
+        dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
         ```
         With AWS S3-compatible storage:
         ```python
-        chain = dc.read_storage(
+        dc.read_storage(
             "s3://my-bucket/my-dir",
             client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
         )
         ```
-        Pass existing session
-        ```py
-        session = Session.get()
-        chain = dc.read_storage([
-            "path/to/dir1",
-            "path/to/dir2"
-        ], session=session, recursive=True)
-        ```
-    Note:
-        When using multiple URIs with `update=True`, the function optimizes by
-        avoiding redundant updates for URIs pointing to the same storage location.
     """
     from .datachain import DataChain
     from .datasets import read_dataset
@@ -138,13 +141,36 @@ def read_storage(
     if not uris:
         raise ValueError("No URIs provided")
+    # Then expand all URIs that contain brace patterns
+    expanded_uris = []
+    for single_uri in uris:
+        uri_str = str(single_uri)
+        validate_cloud_bucket_name(uri_str)
+        expanded_uris.extend(expand_brace_pattern(uri_str))
+    # Now process each expanded URI
     chains = []
     listed_ds_name = set()
     file_values = []
-    for single_uri in uris:
+    updated_uris = set()
+    for single_uri in expanded_uris:
+        # Check if URI contains glob patterns and split them
+        base_uri, glob_pattern = split_uri_pattern(single_uri)
+        # If a pattern is found, use the base_uri for listing
+        # The pattern will be used for filtering later
+        list_uri_to_use = base_uri if glob_pattern else single_uri
+        # Avoid double updates for the same URI
+        update_single_uri = False
+        if update and (list_uri_to_use not in updated_uris):
+            updated_uris.add(list_uri_to_use)
+            update_single_uri = True
         list_ds_name, list_uri, list_path, list_ds_exists = get_listing(
-            single_uri, session, update=update
+            list_uri_to_use, session, update=update_single_uri
         )
         # list_ds_name is None if object is a file, we don't want to use cache
@@ -161,6 +187,12 @@ def read_storage(
             project=listing_project_name,
             session=session,
             settings=settings,
+            delta=delta,
+            delta_on=delta_on,
+            delta_result_on=delta_result_on,
+            delta_compare=delta_compare,
+            delta_retry=delta_retry,
+            delta_unsafe=delta_unsafe,
         )
         dc._query.update = update
         dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
@@ -193,7 +225,21 @@ def read_storage(
                 lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
             )
-        chains.append(ls(dc, list_path, recursive=recursive, column=column))
+        # If a glob pattern was detected, use it for filtering
+        # Otherwise, use the original list_path from get_listing
+        if glob_pattern:
+            # Determine if we should use recursive listing based on the pattern
+            use_recursive = should_use_recursion(glob_pattern, recursive or False)
+            # Apply glob filter - no need for brace expansion here as it's done above
+            chain = apply_glob_filter(
+                dc, glob_pattern, list_path, use_recursive, column
+            )
+            chains.append(chain)
+        else:
+            # No glob pattern detected, use normal ls behavior
+            chains.append(ls(dc, list_path, recursive=recursive, column=column))
         listed_ds_name.add(list_ds_name)
     storage_chain = None if not chains else reduce(lambda x, y: x.union(y), chains)
@@ -212,12 +258,4 @@ def read_storage(
     assert storage_chain is not None
-    if delta:
-        storage_chain = storage_chain._as_delta(
-            on=delta_on,
-            right_on=delta_result_on,
-            compare=delta_compare,
-            delta_retry=delta_retry,
-        )
     return storage_chain

datachain/lib/dc/storage_pattern.py ADDED Viewed

@@ -0,0 +1,251 @@
+import glob
+from typing import TYPE_CHECKING
+from datachain.client.fsspec import is_cloud_uri
+from datachain.lib.listing import ls
+if TYPE_CHECKING:
+    from .datachain import DataChain
+def validate_cloud_bucket_name(uri: str) -> None:
+    """
+    Validate that cloud storage bucket names don't contain glob patterns.
+    Raises:
+        ValueError: If a cloud storage bucket name contains glob patterns
+    """
+    if not is_cloud_uri(uri):
+        return
+    if "://" in uri:
+        scheme_end = uri.index("://") + 3
+        path_part = uri[scheme_end:]
+        if "/" in path_part:
+            bucket_name = path_part.split("/")[0]
+        else:
+            bucket_name = path_part
+        glob_chars = ["*", "?", "[", "]", "{", "}"]
+        if any(char in bucket_name for char in glob_chars):
+            raise ValueError(f"Glob patterns in bucket names are not supported: {uri}")
+def split_uri_pattern(uri: str) -> tuple[str, str | None]:
+    """Split a URI into base path and glob pattern."""
+    if not any(char in uri for char in ["*", "?", "[", "{", "}"]):
+        return uri, None
+    if "://" in uri:
+        scheme_end = uri.index("://") + 3
+        scheme_part = uri[:scheme_end]
+        path_part = uri[scheme_end:]
+        path_segments = path_part.split("/")
+        pattern_start_idx = None
+        for i, segment in enumerate(path_segments):
+            # Check for glob patterns including brace expansion
+            if glob.has_magic(segment) or "{" in segment:
+                pattern_start_idx = i
+                break
+        if pattern_start_idx is None:
+            return uri, None
+        if pattern_start_idx == 0:
+            base = scheme_part + path_segments[0]
+            pattern = "/".join(path_segments[1:]) if len(path_segments) > 1 else "*"
+        else:
+            base = scheme_part + "/".join(path_segments[:pattern_start_idx])
+            pattern = "/".join(path_segments[pattern_start_idx:])
+        return base, pattern
+    path_segments = uri.split("/")
+    pattern_start_idx = None
+    for i, segment in enumerate(path_segments):
+        if glob.has_magic(segment) or "{" in segment:
+            pattern_start_idx = i
+            break
+    if pattern_start_idx is None:
+        return uri, None
+    base = "/".join(path_segments[:pattern_start_idx]) if pattern_start_idx > 0 else "/"
+    pattern = "/".join(path_segments[pattern_start_idx:])
+    return base, pattern
+def should_use_recursion(pattern: str, user_recursive: bool) -> bool:
+    if not user_recursive:
+        return False
+    if "**" in pattern:
+        return True
+    return "/" in pattern
+def expand_brace_pattern(pattern: str) -> list[str]:
+    """
+    Recursively expand brace patterns into multiple glob patterns.
+    Supports:
+    - Comma-separated lists: *.{mp3,wav}
+    - Numeric ranges: file{1..10}
+    - Zero-padded numeric ranges: file{01..10}
+    - Character ranges: file{a..z}
+    Examples:
+        "*.{mp3,wav}" -> ["*.mp3", "*.wav"]
+        "file{1..3}" -> ["file1", "file2", "file3"]
+        "file{01..03}" -> ["file01", "file02", "file03"]
+        "file{a..c}" -> ["filea", "fileb", "filec"]
+        "{a,b}/{c,d}" -> ["a/c", "a/d", "b/c", "b/d"]
+    """
+    if "{" not in pattern or "}" not in pattern:
+        return [pattern]
+    return _expand_single_braces(pattern)
+def _expand_single_braces(pattern: str) -> list[str]:
+    if "{" not in pattern or "}" not in pattern:
+        return [pattern]
+    start = pattern.index("{")
+    end = start
+    depth = 0
+    for i in range(start, len(pattern)):
+        if pattern[i] == "{":
+            depth += 1
+        elif pattern[i] == "}":
+            depth -= 1
+            if depth == 0:
+                end = i
+                break
+    if start >= end:
+        return [pattern]
+    prefix = pattern[:start]
+    suffix = pattern[end + 1 :]
+    brace_content = pattern[start + 1 : end]
+    if ".." in brace_content:
+        options = _expand_range(brace_content)
+    else:
+        options = [opt.strip() for opt in brace_content.split(",")]
+    expanded = []
+    for option in options:
+        combined = prefix + option + suffix
+        expanded.extend(_expand_single_braces(combined))
+    return expanded
+def _expand_range(range_spec: str) -> list[str]:  # noqa: PLR0911
+    if ".." not in range_spec:
+        return [range_spec]
+    parts = range_spec.split("..")
+    if len(parts) != 2:
+        return [range_spec]
+    start, end = parts[0], parts[1]
+    if start.isdigit() and end.isdigit():
+        pad_width = max(len(start), len(end)) if start[0] == "0" or end[0] == "0" else 0
+        start_num = int(start)
+        end_num = int(end)
+        if start_num <= end_num:
+            if pad_width > 0:
+                return [str(i).zfill(pad_width) for i in range(start_num, end_num + 1)]
+            return [str(i) for i in range(start_num, end_num + 1)]
+        if pad_width > 0:
+            return [str(i).zfill(pad_width) for i in range(start_num, end_num - 1, -1)]
+        return [str(i) for i in range(start_num, end_num - 1, -1)]
+    if len(start) == 1 and len(end) == 1 and start.isalpha() and end.isalpha():
+        start_ord = ord(start)
+        end_ord = ord(end)
+        if start_ord <= end_ord:
+            return [chr(i) for i in range(start_ord, end_ord + 1)]
+        return [chr(i) for i in range(start_ord, end_ord - 1, -1)]
+    return [range_spec]
+def convert_globstar_to_glob(filter_pattern: str) -> str:
+    if "**" not in filter_pattern:
+        return filter_pattern
+    parts = filter_pattern.split("/")
+    globstar_positions = [i for i, p in enumerate(parts) if p == "**"]
+    num_globstars = len(globstar_positions)
+    if num_globstars <= 1:
+        if filter_pattern == "**/*":
+            return "*"
+        if filter_pattern.startswith("**/"):
+            remaining = filter_pattern[3:]
+            if "/" not in remaining:
+                # Pattern like **/*.ext or **/temp?.*
+                # The ** means zero or more directories
+                # For zero directories: pattern should be just the filename pattern
+                # For one or more: pattern should be */filename
+                # Since we can't OR in GLOB, we choose the more permissive option
+                # that works with recursive listing
+                # Special handling: if it's a simple extension pattern, match broadly
+                if remaining.startswith("*."):
+                    return remaining
+                return f"*/{remaining}"
+        return filter_pattern.replace("**", "*")
+    middle_parts = []
+    start_idx = globstar_positions[0] + 1
+    end_idx = globstar_positions[-1]
+    for i in range(start_idx, end_idx):
+        if parts[i] != "**":
+            middle_parts.append(parts[i])
+    if not middle_parts:
+        result = filter_pattern.replace("**", "*")
+    else:
+        middle_pattern = "/".join(middle_parts)
+        last_part = parts[-1] if parts[-1] != "**" else "*"
+        if last_part != "*":
+            result = f"*{middle_pattern}*{last_part}"
+        else:
+            result = f"*{middle_pattern}*"
+    return result
+def apply_glob_filter(
+    dc: "DataChain",
+    pattern: str,
+    list_path: str,
+    use_recursive: bool,
+    column: str,
+) -> "DataChain":
+    from datachain.query.schema import Column
+    chain = ls(dc, list_path, recursive=use_recursive, column=column)
+    if list_path and "/" not in pattern:
+        filter_pattern = f"{list_path.rstrip('/')}/{pattern}"
+    else:
+        filter_pattern = pattern
+    glob_pattern = convert_globstar_to_glob(filter_pattern)
+    return chain.filter(Column(f"{column}.path").glob(glob_pattern))

datachain/lib/dc/utils.py CHANGED Viewed

@@ -1,12 +1,6 @@
 from collections.abc import Sequence
 from functools import wraps
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Optional,
-    TypeVar,
-    Union,
-)
+from typing import TYPE_CHECKING, TypeVar
 import sqlalchemy
 from sqlalchemy.sql.functions import GenericFunction
@@ -18,7 +12,10 @@ from datachain.query.schema import DEFAULT_DELIMITER
 from datachain.utils import getenv_bool
 if TYPE_CHECKING:
-    from typing_extensions import Concatenate, ParamSpec
+    from collections.abc import Callable
+    from typing import Concatenate
+    from typing_extensions import ParamSpec
     from .datachain import DataChain
@@ -28,9 +25,15 @@ D = TypeVar("D", bound="DataChain")
 def is_studio() -> bool:
+    """Check if the runtime environment is Studio (not local)."""
     return getenv_bool("DATACHAIN_IS_STUDIO", default=False)
+def is_local() -> bool:
+    """Check if the runtime environment is local (not Studio)."""
+    return not is_studio()
 def resolve_columns(
     method: "Callable[Concatenate[D, P], D]",
 ) -> "Callable[Concatenate[D, P], D]":
@@ -70,11 +73,11 @@ class DatasetFromValuesError(DataChainParamsError):
         super().__init__(f"Dataset{name} from values error: {msg}")
-MergeColType = Union[str, Function, sqlalchemy.ColumnElement]
+MergeColType = str | Function | sqlalchemy.ColumnElement
 def _validate_merge_on(
-    on: Union[MergeColType, Sequence[MergeColType]],
+    on: MergeColType | Sequence[MergeColType],
     ds: "DataChain",
 ) -> Sequence[MergeColType]:
     if isinstance(on, (str, sqlalchemy.ColumnElement)):
@@ -103,12 +106,12 @@ def _get_merge_error_str(col: MergeColType) -> str:
 class DatasetMergeError(DataChainParamsError):
     def __init__(
         self,
-        on: Union[MergeColType, Sequence[MergeColType]],
-        right_on: Optional[Union[MergeColType, Sequence[MergeColType]]],
+        on: MergeColType | Sequence[MergeColType],
+        right_on: MergeColType | Sequence[MergeColType] | None,
         msg: str,
     ):
         def _get_str(
-            on: Union[MergeColType, Sequence[MergeColType]],
+            on: MergeColType | Sequence[MergeColType],
         ) -> str:
             if not isinstance(on, Sequence):
                 return str(on)  # type: ignore[unreachable]
@@ -123,7 +126,7 @@ class DatasetMergeError(DataChainParamsError):
         super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
-OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
+OutputType = DataType | Sequence[str] | dict[str, DataType] | None
 class Sys(DataModel):

datachain/lib/dc/values.py CHANGED Viewed

@@ -1,8 +1,5 @@
 from collections.abc import Iterator
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-)
+from typing import TYPE_CHECKING
 from datachain.lib.convert.values_to_tuples import values_to_tuples
 from datachain.lib.data_model import dict_to_data_model
@@ -20,8 +17,8 @@ if TYPE_CHECKING:
 def read_values(
     ds_name: str = "",
-    session: Optional[Session] = None,
-    settings: Optional[dict] = None,
+    session: Session | None = None,
+    settings: dict | None = None,
     in_memory: bool = False,
     output: OutputType = None,
     column: str = "",

datachain 0.30.5__py3-none-any.whl → 0.39.0__py3-none-any.whl

datachain 0.30.5py3-none-any.whl → 0.39.0py3-none-any.whl