PyPI - datachain - Versions diffs - 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

datachain 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

datachain/__init__.py +18 -18
datachain/catalog/catalog.py +5 -5
datachain/catalog/loader.py +4 -9
datachain/cli/commands/show.py +2 -2
datachain/data_storage/warehouse.py +9 -0
datachain/lib/dc/__init__.py +18 -18
datachain/lib/dc/csv.py +5 -5
datachain/lib/dc/datachain.py +42 -42
datachain/lib/dc/datasets.py +7 -7
datachain/lib/dc/hf.py +5 -5
datachain/lib/dc/json.py +5 -5
datachain/lib/dc/listings.py +2 -2
datachain/lib/dc/pandas.py +4 -4
datachain/lib/dc/parquet.py +5 -5
datachain/lib/dc/records.py +4 -4
datachain/lib/dc/storage.py +13 -12
datachain/lib/dc/values.py +4 -4
datachain/lib/listing.py +11 -0
datachain/lib/meta_formats.py +2 -2
datachain/lib/pytorch.py +2 -2
datachain/lib/udf.py +1 -1
datachain/query/dataset.py +62 -50
datachain/query/dispatch.py +6 -12
datachain/query/udf.py +30 -1
datachain/toolkit/split.py +1 -1
datachain/utils.py +30 -4
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/METADATA +5 -5
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/RECORD +32 -32
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/WHEEL +0 -0
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/entry_points.txt +0 -0
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/licenses/LICENSE +0 -0
{datachain-0.14.1.dist-info → datachain-0.14.3.dist-info}/top_level.txt +0 -0

datachain/lib/dc/pandas.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import (
 from datachain.query import Session
-from .values import from_values
+from .values import read_values
 if TYPE_CHECKING:
     import pandas as pd
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
-def from_pandas(  # type: ignore[override]
+def read_pandas(  # type: ignore[override]
     df: "pd.DataFrame",
     name: str = "",
     session: Optional[Session] = None,
@@ -32,7 +32,7 @@ def from_pandas(  # type: ignore[override]
         import datachain as dc
         df = pd.DataFrame({"fib": [1, 2, 3, 5, 8]})
-        dc.from_pandas(df)
+        dc.read_pandas(df)
         ```
     """
     from .utils import DatasetPrepareError
@@ -46,7 +46,7 @@ def from_pandas(  # type: ignore[override]
                 f"import from pandas error - '{column}' cannot be a column name",
             )
-    return from_values(
+    return read_values(
         name,
         session,
         settings=settings,

datachain/lib/dc/parquet.py CHANGED Viewed

@@ -15,7 +15,7 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
-def from_parquet(
+def read_parquet(
     path,
     partitioning: Any = "hive",
     output: Optional[dict[str, DataType]] = None,
@@ -43,18 +43,18 @@ def from_parquet(
         Reading a single file:
         ```py
         import datachain as dc
-        dc.from_parquet("s3://mybucket/file.parquet")
+        dc.read_parquet("s3://mybucket/file.parquet")
         ```
         Reading a partitioned dataset from a directory:
         ```py
         import datachain as dc
-        dc.from_parquet("s3://mybucket/dir")
+        dc.read_parquet("s3://mybucket/dir")
         ```
     """
-    from .storage import from_storage
+    from .storage import read_storage
-    chain = from_storage(path, session=session, settings=settings, **kwargs)
+    chain = read_storage(path, session=session, settings=settings, **kwargs)
     return chain.parse_tabular(
         output=output,
         object_name=object_name,

datachain/lib/dc/records.py CHANGED Viewed

@@ -21,7 +21,7 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
-def from_records(
+def read_records(
     to_insert: Optional[Union[dict, list[dict]]],
     session: Optional[Session] = None,
     settings: Optional[dict] = None,
@@ -40,10 +40,10 @@ def from_records(
     Example:
         ```py
         import datachain as dc
-        single_record = dc.from_records(dc.DEFAULT_FILE_RECORD)
+        single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
         ```
     """
-    from .datasets import from_dataset
+    from .datasets import read_dataset
     session = Session.get(session, in_memory=in_memory)
     catalog = session.catalog
@@ -87,4 +87,4 @@ def from_records(
     insert_q = dr.get_table().insert()
     for record in to_insert:
         db.execute(insert_q.values(**record))
-    return from_dataset(name=dsr.name, session=session, settings=settings)
+    return read_dataset(name=dsr.name, session=session, settings=settings)

datachain/lib/dc/storage.py CHANGED Viewed

@@ -21,7 +21,7 @@ if TYPE_CHECKING:
     from .datachain import DataChain
-def from_storage(
+def read_storage(
     uri: Union[str, os.PathLike[str], list[str], list[os.PathLike[str]]],
     *,
     type: FileType = "binary",
@@ -55,12 +55,12 @@ def from_storage(
         Simple call from s3:
         ```python
         import datachain as dc
-        chain = dc.from_storage("s3://my-bucket/my-dir")
+        chain = dc.read_storage("s3://my-bucket/my-dir")
         ```
         Multiple URIs:
         ```python
-        chain = dc.from_storage([
+        chain = dc.read_storage([
             "s3://bucket1/dir1",
             "s3://bucket2/dir2"
         ])
@@ -68,7 +68,7 @@ def from_storage(
         With AWS S3-compatible storage:
         ```python
-        chain = dc.from_storage(
+        chain = dc.read_storage(
             "s3://my-bucket/my-dir",
             client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
         )
@@ -77,7 +77,7 @@ def from_storage(
         Pass existing session
         ```py
         session = Session.get()
-        chain = dc.from_storage([
+        chain = dc.read_storage([
             "path/to/dir1",
             "path/to/dir2"
         ], session=session, recursive=True)
@@ -88,9 +88,9 @@ def from_storage(
         avoiding redundant updates for URIs pointing to the same storage location.
     """
     from .datachain import DataChain
-    from .datasets import from_dataset
-    from .records import from_records
-    from .values import from_values
+    from .datasets import read_dataset
+    from .records import read_records
+    from .values import read_values
     file_type = get_file_type(type)
@@ -122,7 +122,8 @@ def from_storage(
             )
             continue
-        dc = from_dataset(list_ds_name, session=session, settings=settings)
+        dc = read_dataset(list_ds_name, session=session, settings=settings)
+        dc._query.update = update
         dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
         if update or not list_ds_exists:
@@ -130,7 +131,7 @@ def from_storage(
             def lst_fn(ds_name, lst_uri):
                 # disable prefetch for listing, as it pre-downloads all files
                 (
-                    from_records(
+                    read_records(
                         DataChain.DEFAULT_FILE_RECORD,
                         session=session,
                         settings=settings,
@@ -144,7 +145,7 @@ def from_storage(
                     .save(ds_name, listing=True)
                 )
-            dc._query.add_before_steps(
+            dc._query.set_listing_fn(
                 lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
             )
@@ -154,7 +155,7 @@ def from_storage(
         listed_ds_name.add(list_ds_name)
     if file_values:
-        file_chain = from_values(
+        file_chain = read_values(
             session=session,
             settings=settings,
             in_memory=in_memory,

datachain/lib/dc/values.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import (
 from datachain.lib.convert.values_to_tuples import values_to_tuples
 from datachain.lib.data_model import dict_to_data_model
-from datachain.lib.dc.records import from_records
+from datachain.lib.dc.records import read_records
 from datachain.lib.dc.utils import OutputType
 from datachain.query import Session
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
-def from_values(
+def read_values(
     ds_name: str = "",
     session: Optional[Session] = None,
     settings: Optional[dict] = None,
@@ -32,7 +32,7 @@ def from_values(
     Example:
         ```py
         import datachain as dc
-        dc.from_values(fib=[1, 2, 3, 5, 8])
+        dc.read_values(fib=[1, 2, 3, 5, 8])
         ```
     """
     from .datachain import DataChain
@@ -42,7 +42,7 @@ def from_values(
     def _func_fr() -> Iterator[tuple_type]:  # type: ignore[valid-type]
         yield from tuples
-    chain = from_records(
+    chain = read_records(
         DataChain.DEFAULT_FILE_RECORD,
         session=session,
         settings=settings,

datachain/lib/listing.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import posixpath
 from collections.abc import Iterator
 from contextlib import contextmanager
+from datetime import datetime, timedelta, timezone
 from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
 from fsspec.asyn import get_loop
@@ -32,6 +33,16 @@ logging.getLogger("aiobotocore.credentials").setLevel(logging.CRITICAL)
 logging.getLogger("gcsfs").setLevel(logging.CRITICAL)
+def listing_dataset_expired(lst_ds) -> bool:
+    """Function that checks if listing dataset is expired or not"""
+    lst_version = lst_ds.versions[-1]
+    if not lst_version.finished_at:
+        return False
+    expires = lst_version.finished_at + timedelta(seconds=LISTING_TTL)
+    return datetime.now(timezone.utc) > expires
 def list_bucket(uri: str, cache, client_config=None) -> Callable:
     """
     Function that returns another generator function that yields File objects

datachain/lib/meta_formats.py CHANGED Viewed

@@ -103,10 +103,10 @@ def read_meta(  # noqa: C901
     model_name=None,
     nrows=None,
 ) -> Callable:
-    from datachain import from_storage
+    from datachain import read_storage
     if schema_from:
-        file = next(from_storage(schema_from, type="text").limit(1).collect("file"))
+        file = next(read_storage(schema_from, type="text").limit(1).collect("file"))
         model_code = gen_datamodel_code(
             file, format=format, jmespath=jmespath, model_name=model_name
         )

datachain/lib/pytorch.py CHANGED Viewed

@@ -14,7 +14,7 @@ from torchvision.transforms import v2
 from datachain import Session
 from datachain.cache import get_temp_cache
 from datachain.catalog import Catalog, get_catalog
-from datachain.lib.dc.datasets import from_dataset
+from datachain.lib.dc.datasets import read_dataset
 from datachain.lib.settings import Settings
 from datachain.lib.text import convert_text
 from datachain.progress import CombinedDownloadCallback
@@ -122,7 +122,7 @@ class PytorchDataset(IterableDataset):
     ) -> Generator[tuple[Any, ...], None, None]:
         catalog = self._get_catalog()
         session = Session("PyTorch", catalog=catalog)
-        ds = from_dataset(
+        ds = read_dataset(
             name=self.name, version=self.version, session=session
         ).settings(cache=self.cache, prefetch=self.prefetch)
         ds = ds.remove_file_signals()

datachain/lib/udf.py CHANGED Viewed

@@ -145,7 +145,7 @@ class UDFBase(AbstractUDF):
                 return emb[0].tolist()
         (
-            dc.from_storage(
+            dc.read_storage(
                 "gs://datachain-demo/fashion-product-images/images", type="image"
             )
             .limit(5)

datachain/query/dataset.py CHANGED Viewed

@@ -47,15 +47,20 @@ from datachain.error import (
     QueryScriptCancelError,
 )
 from datachain.func.base import Function
-from datachain.lib.listing import is_listing_dataset
+from datachain.lib.listing import (
+    is_listing_dataset,
+    listing_dataset_expired,
+)
 from datachain.lib.udf import UDFAdapter, _get_cache
 from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
 from datachain.query.schema import C, UDFParamSpec, normalize_param
 from datachain.query.session import Session
+from datachain.query.udf import UdfInfo
 from datachain.sql.functions.random import rand
 from datachain.utils import (
     batched,
     determine_processes,
+    determine_workers,
     filtered_cloudpickle_dumps,
     get_datachain_executable,
     safe_closing,
@@ -71,7 +76,6 @@ if TYPE_CHECKING:
     from datachain.data_storage import AbstractWarehouse
     from datachain.dataset import DatasetRecord
     from datachain.lib.udf import UDFAdapter, UDFResult
-    from datachain.query.udf import UdfInfo
     P = ParamSpec("P")
@@ -411,20 +415,15 @@ class UDFStep(Step, ABC):
     def populate_udf_table(self, udf_table: "Table", query: Select) -> None:
         from datachain.catalog import QUERY_SCRIPT_CANCELED_EXIT_CODE
-        use_partitioning = self.partition_by is not None
-        batching = self.udf.get_batching(use_partitioning)
-        workers = self.workers
-        if (
-            not workers
-            and os.environ.get("DATACHAIN_DISTRIBUTED")
-            and os.environ.get("DATACHAIN_SETTINGS_WORKERS")
-        ):
-            # Enable distributed processing by default if the module is available,
-            # and a default number of workers is provided.
-            workers = True
+        rows_total = self.catalog.warehouse.query_count(query)
+        if rows_total == 0:
+            return
-        processes = determine_processes(self.parallel)
+        workers = determine_workers(self.workers, rows_total=rows_total)
+        processes = determine_processes(self.parallel, rows_total=rows_total)
+        use_partitioning = self.partition_by is not None
+        batching = self.udf.get_batching(use_partitioning)
         udf_fields = [str(c.name) for c in query.selected_columns]
         prefetch = self.udf.prefetch
@@ -438,23 +437,24 @@ class UDFStep(Step, ABC):
                             "distributed processing."
                         )
-                    from datachain.catalog.loader import get_distributed_class
-                    distributor = get_distributed_class(
-                        min_task_size=self.min_task_size
-                    )
-                    distributor(
-                        self.udf,
-                        catalog,
-                        udf_table,
-                        query,
-                        workers,
-                        processes,
+                    from datachain.catalog.loader import get_udf_distributor_class
+                    udf_distributor_class = get_udf_distributor_class()
+                    udf_distributor = udf_distributor_class(
+                        catalog=catalog,
+                        table=udf_table,
+                        query=query,
+                        udf_data=filtered_cloudpickle_dumps(self.udf),
+                        batching=batching,
+                        workers=workers,
+                        processes=processes,
                         udf_fields=udf_fields,
+                        rows_total=rows_total,
+                        use_cache=self.cache,
                         is_generator=self.is_generator,
-                        use_partitioning=use_partitioning,
-                        cache=self.cache,
+                        min_task_size=self.min_task_size,
                     )
+                    udf_distributor()
                 elif processes:
                     # Parallel processing (faster for more CPU-heavy UDFs)
                     if catalog.in_memory:
@@ -462,19 +462,21 @@ class UDFStep(Step, ABC):
                             "In-memory databases cannot be used "
                             "with parallel processing."
                         )
-                    udf_info: UdfInfo = {
-                        "udf_data": filtered_cloudpickle_dumps(self.udf),
-                        "catalog_init": catalog.get_init_params(),
-                        "metastore_clone_params": catalog.metastore.clone_params(),
-                        "warehouse_clone_params": catalog.warehouse.clone_params(),
-                        "table": udf_table,
-                        "query": query,
-                        "udf_fields": udf_fields,
-                        "batching": batching,
-                        "processes": processes,
-                        "is_generator": self.is_generator,
-                        "cache": self.cache,
-                    }
+                    udf_info = UdfInfo(
+                        udf_data=filtered_cloudpickle_dumps(self.udf),
+                        catalog_init=catalog.get_init_params(),
+                        metastore_clone_params=catalog.metastore.clone_params(),
+                        warehouse_clone_params=catalog.warehouse.clone_params(),
+                        table=udf_table,
+                        query=query,
+                        udf_fields=udf_fields,
+                        batching=batching,
+                        processes=processes,
+                        is_generator=self.is_generator,
+                        cache=self.cache,
+                        rows_total=rows_total,
+                    )
                     # Run the UDFDispatcher in another process to avoid needing
                     # if __name__ == '__main__': in user scripts
@@ -1080,6 +1082,7 @@ class DatasetQuery:
         indexing_column_types: Optional[dict[str, Any]] = None,
         in_memory: bool = False,
         fallback_to_studio: bool = True,
+        update: bool = False,
     ) -> None:
         from datachain.remote.studio import is_token_set
@@ -1097,6 +1100,8 @@ class DatasetQuery:
         self.feature_schema: Optional[dict] = None
         self.column_types: Optional[dict[str, Any]] = None
         self.before_steps: list[Callable] = []
+        self.listing_fn: Optional[Callable] = None
+        self.update = update
         self.list_ds_name: Optional[str] = None
@@ -1190,23 +1195,30 @@ class DatasetQuery:
         col.table = self.table
         return col
-    def add_before_steps(self, fn: Callable) -> None:
-        """
-        Setting custom function to be run before applying steps
-        """
-        self.before_steps.append(fn)
+    def set_listing_fn(self, fn: Callable) -> None:
+        """Setting listing function to be run if needed"""
+        self.listing_fn = fn
     def apply_steps(self) -> QueryGenerator:
         """
         Apply the steps in the query and return the resulting
         sqlalchemy.SelectBase.
         """
-        for fn in self.before_steps:
-            fn()
+        if self.list_ds_name and not self.starting_step:
+            listing_ds = None
+            try:
+                listing_ds = self.catalog.get_dataset(self.list_ds_name)
+            except DatasetNotFoundError:
+                pass
+            if not listing_ds or self.update or listing_dataset_expired(listing_ds):
+                assert self.listing_fn
+                self.listing_fn()
+                listing_ds = self.catalog.get_dataset(self.list_ds_name)
-        if self.list_ds_name:
             # at this point we know what is our starting listing dataset name
-            self._set_starting_step(self.catalog.get_dataset(self.list_ds_name))  # type: ignore [arg-type]
+            self._set_starting_step(listing_ds)  # type: ignore [arg-type]
         query = self.clone()
         index = os.getenv("DATACHAIN_QUERY_CHUNK_INDEX", self._chunk_index)

datachain/query/dispatch.py CHANGED Viewed

@@ -11,11 +11,10 @@ import multiprocess
 from cloudpickle import load, loads
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from multiprocess import get_context
-from sqlalchemy.sql import func
 from datachain.catalog import Catalog
 from datachain.catalog.catalog import clone_catalog_with_cache
-from datachain.catalog.loader import get_distributed_class
+from datachain.catalog.loader import get_udf_distributor_class
 from datachain.lib.udf import _get_cache
 from datachain.query.batch import RowsOutput, RowsOutputBatch
 from datachain.query.dataset import (
@@ -59,6 +58,7 @@ def udf_entrypoint() -> int:
     dispatch = UDFDispatcher(udf_info)
     query = udf_info["query"]
+    rows_total = udf_info["rows_total"]
     batching = udf_info["batching"]
     n_workers = udf_info["processes"]
     if n_workers is True:
@@ -67,12 +67,6 @@ def udf_entrypoint() -> int:
     wh_cls, wh_args, wh_kwargs = udf_info["warehouse_clone_params"]
     warehouse: AbstractWarehouse = wh_cls(*wh_args, **wh_kwargs)
-    total_rows = next(
-        warehouse.db.execute(
-            query.with_only_columns(func.count(query.c.sys__id)).order_by(None)
-        )
-    )[0]
     with contextlib.closing(
         batching(warehouse.dataset_select_paginated, query, ids_only=True)
     ) as udf_inputs:
@@ -81,7 +75,7 @@ def udf_entrypoint() -> int:
         try:
             dispatch.run_udf_parallel(
                 udf_inputs,
-                total_rows=total_rows,
+                rows_total=rows_total,
                 n_workers=n_workers,
                 processed_cb=processed_cb,
                 download_cb=download_cb,
@@ -94,7 +88,7 @@ def udf_entrypoint() -> int:
 def udf_worker_entrypoint() -> int:
-    return get_distributed_class().run_worker()
+    return get_udf_distributor_class().run_worker()
 class UDFDispatcher:
@@ -164,14 +158,14 @@ class UDFDispatcher:
     def run_udf_parallel(  # noqa: C901, PLR0912
         self,
         input_rows: Iterable[RowsOutput],
-        total_rows: int,
+        rows_total: int,
         n_workers: Optional[int] = None,
         processed_cb: Callback = DEFAULT_CALLBACK,
         download_cb: Callback = DEFAULT_CALLBACK,
     ) -> None:
         n_workers = get_n_workers_from_arg(n_workers)
-        input_batch_size = total_rows // n_workers
+        input_batch_size = rows_total // n_workers
         if input_batch_size == 0:
             input_batch_size = 1
         elif input_batch_size > DEFAULT_BATCH_SIZE:

datachain/query/udf.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypedDict
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypedDict, Union
 if TYPE_CHECKING:
     from sqlalchemy import Select, Table
+    from datachain.catalog import Catalog
     from datachain.query.batch import BatchingStrategy
@@ -18,3 +20,30 @@ class UdfInfo(TypedDict):
     processes: Optional[int]
     is_generator: bool
     cache: bool
+    rows_total: int
+class AbstractUDFDistributor(ABC):
+    @abstractmethod
+    def __init__(
+        self,
+        catalog: "Catalog",
+        table: "Table",
+        query: "Select",
+        udf_data: bytes,
+        batching: "BatchingStrategy",
+        workers: Union[bool, int],
+        processes: Union[bool, int],
+        udf_fields: list[str],
+        rows_total: int,
+        use_cache: bool,
+        is_generator: bool = False,
+        min_task_size: Optional[Union[str, int]] = None,
+    ) -> None: ...
+    @abstractmethod
+    def __call__(self) -> None: ...
+    @staticmethod
+    @abstractmethod
+    def run_worker() -> int: ...

datachain/toolkit/split.py CHANGED Viewed

@@ -41,7 +41,7 @@ def train_test_split(
         from datachain.toolkit import train_test_split
         # Load a DataChain from a storage source (e.g., S3 bucket)
-        dc = dc.from_storage("s3://bucket/dir/")
+        dc = dc.read_storage("s3://bucket/dir/")
         # Perform a 70/30 train-test split
         train, test = train_test_split(dc, [0.7, 0.3])

datachain/utils.py CHANGED Viewed

@@ -286,15 +286,41 @@ def retry_with_backoff(retries=5, backoff_sec=1, errors=(Exception,)):
     return retry
-def determine_processes(parallel: Optional[Union[bool, int]]) -> Union[bool, int]:
+def determine_workers(
+    workers: Union[bool, int],
+    rows_total: Optional[int] = None,
+) -> Union[bool, int]:
+    """Determine the number of workers to use for distributed processing."""
+    if rows_total is not None and rows_total <= 1:
+        # Disable distributed processing if there is no rows or only one row.
+        return False
+    if (
+        workers is False
+        and os.environ.get("DATACHAIN_DISTRIBUTED")
+        and os.environ.get("DATACHAIN_SETTINGS_WORKERS")
+    ):
+        # Enable distributed processing by default if the module is available,
+        # and a default number of workers is provided.
+        workers = int(os.environ["DATACHAIN_SETTINGS_WORKERS"])
+    if not workers or workers <= 0:
+        return False
+    return workers
+def determine_processes(
+    parallel: Optional[Union[bool, int]] = None,
+    rows_total: Optional[int] = None,
+) -> Union[bool, int]:
+    """Determine the number of processes to use for parallel processing."""
+    if rows_total is not None and rows_total <= 1:
+        # Disable parallel processing if there is no rows or only one row.
+        return False
     if parallel is None and os.environ.get("DATACHAIN_SETTINGS_PARALLEL") is not None:
         parallel = int(os.environ["DATACHAIN_SETTINGS_PARALLEL"])
-    if parallel is None or parallel is False:
+    if parallel is None or parallel is False or parallel == 0:
         return False
     if parallel is True:
         return True
-    if parallel == 0:
-        return False
     if parallel < 0:
         return True
     return parallel

datachain 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl

datachain 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl