PyPI - datachain - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

datachain 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (30) hide show

datachain/catalog/catalog.py +30 -6
datachain/data_storage/db_engine.py +0 -2
datachain/data_storage/schema.py +10 -27
datachain/data_storage/warehouse.py +1 -7
datachain/lib/arrow.py +7 -13
datachain/lib/clip.py +151 -0
datachain/lib/dc.py +35 -57
datachain/lib/feature_utils.py +1 -2
datachain/lib/file.py +7 -0
datachain/lib/image.py +37 -79
datachain/lib/pytorch.py +4 -2
datachain/lib/signal_schema.py +2 -47
datachain/lib/text.py +18 -49
datachain/lib/udf.py +58 -30
datachain/lib/udf_signature.py +11 -10
datachain/lib/utils.py +17 -0
datachain/lib/webdataset.py +2 -2
datachain/listing.py +0 -3
datachain/query/dataset.py +63 -37
datachain/query/dispatch.py +2 -2
datachain/query/schema.py +1 -8
datachain/query/udf.py +16 -18
datachain/utils.py +28 -0
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/METADATA +2 -1
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/RECORD +29 -29
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/WHEEL +1 -1
datachain/lib/reader.py +0 -49
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/LICENSE +0 -0
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/entry_points.txt +0 -0
{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/top_level.txt +0 -0

datachain/query/dataset.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import ast
 import contextlib
 import datetime
 import inspect
@@ -51,9 +52,10 @@ from datachain.data_storage.schema import (
 from datachain.dataset import DatasetStatus, RowDict
 from datachain.error import DatasetNotFoundError, QueryScriptCancelError
 from datachain.progress import CombinedDownloadCallback
+from datachain.query.schema import DEFAULT_DELIMITER
 from datachain.sql.functions import rand
 from datachain.storage import Storage, StorageURI
-from datachain.utils import batched, determine_processes
+from datachain.utils import batched, determine_processes, inside_notebook
 from .batch import RowBatch
 from .metrics import metrics
@@ -62,7 +64,6 @@ from .session import Session
 from .udf import UDFBase, UDFClassWrapper, UDFFactory, UDFType
 if TYPE_CHECKING:
-    import pandas as pd
     from sqlalchemy.sql.elements import ClauseElement
     from sqlalchemy.sql.schema import Table
     from sqlalchemy.sql.selectable import GenerativeSelect
@@ -547,8 +548,9 @@ class UDF(Step, ABC):
                 else:
                     udf = self.udf
-                if hasattr(udf.func, "bootstrap") and callable(udf.func.bootstrap):
-                    udf.func.bootstrap()
+                if hasattr(udf.func, "setup") and callable(udf.func.setup):
+                    udf.func.setup()
                 warehouse = self.catalog.warehouse
                 with contextlib.closing(
@@ -599,12 +601,15 @@ class UDF(Step, ABC):
         # Create a dynamic module with the generated name
         dynamic_module = types.ModuleType(feature_module_name)
         # Get the import lines for the necessary objects from the main module
-        import_lines = [
-            source.getimport(obj, alias=name)
-            for name, obj in inspect.getmembers(sys.modules["__main__"], _imports)
-            if not (name.startswith("__") and name.endswith("__"))
-        ]
         main_module = sys.modules["__main__"]
+        if getattr(main_module, "__file__", None):
+            import_lines = list(get_imports(main_module))
+        else:
+            import_lines = [
+                source.getimport(obj, alias=name)
+                for name, obj in main_module.__dict__.items()
+                if _imports(obj) and not (name.startswith("__") and name.endswith("__"))
+            ]
         # Get the feature classes from the main module
         feature_classes = {
@@ -612,6 +617,10 @@ class UDF(Step, ABC):
             for name, obj in main_module.__dict__.items()
             if _feature_predicate(obj)
         }
+        if not feature_classes:
+            yield None
+            return
         # Get the source code of the feature classes
         feature_sources = [source.getsource(cls) for _, cls in feature_classes.items()]
         # Set the module name for the feature classes to the generated name
@@ -621,7 +630,7 @@ class UDF(Step, ABC):
         # Add the dynamic module to the sys.modules dictionary
         sys.modules[feature_module_name] = dynamic_module
         # Combine the import lines and feature sources
-        feature_file = "".join(import_lines) + "\n".join(feature_sources)
+        feature_file = "\n".join(import_lines) + "\n" + "\n".join(feature_sources)
         # Write the module content to a .py file
         with open(f"{feature_module_name}.py", "w") as module_file:
@@ -1362,33 +1371,11 @@ class DatasetQuery:
             cols = result.columns
             return [dict(zip(cols, row)) for row in result]
-    @classmethod
-    def create_empty_record(
-        cls, name: Optional[str] = None, session: Optional[Session] = None
-    ) -> "DatasetRecord":
-        session = Session.get(session)
-        if name is None:
-            name = session.generate_temp_dataset_name()
-        columns = session.catalog.warehouse.dataset_row_cls.file_columns()
-        return session.catalog.create_dataset(name, columns=columns)
-    @classmethod
-    def insert_record(
-        cls,
-        dsr: "DatasetRecord",
-        record: dict[str, Any],
-        session: Optional[Session] = None,
-    ) -> None:
-        session = Session.get(session)
-        dr = session.catalog.warehouse.dataset_rows(dsr)
-        insert_q = dr.get_table().insert().values(**record)
-        session.catalog.warehouse.db.execute(insert_q)
     def to_pandas(self) -> "pd.DataFrame":
-        import pandas as pd
         records = self.to_records()
-        return pd.DataFrame.from_records(records)
+        df = pd.DataFrame.from_records(records)
+        df.columns = [c.replace(DEFAULT_DELIMITER, ".") for c in df.columns]
+        return df
     def shuffle(self) -> "Self":
         # ToDo: implement shaffle based on seed and/or generating random column
@@ -1410,8 +1397,17 @@ class DatasetQuery:
     def show(self, limit=20) -> None:
         df = self.limit(limit).to_pandas()
-        no_footer = re.sub(r"\n\[\d+ rows x \d+ columns\]$", "", str(df))
-        print(no_footer.rstrip(" \n"))
+        options = ["display.max_colwidth", 50, "display.show_dimensions", False]
+        with pd.option_context(*options):
+            if inside_notebook():
+                from IPython.display import display
+                display(df)
+            else:
+                print(df.to_string())
         if len(df) == limit:
             print(f"[limited by {limit} objects]")
@@ -1692,6 +1688,15 @@ class DatasetQuery:
                     storage.timestamp_str,
                 )
+    def exec(self) -> "Self":
+        """Execute the query."""
+        try:
+            query = self.clone()
+            query.apply_steps()
+        finally:
+            self.cleanup()
+        return query
     def save(
         self,
         name: Optional[str] = None,
@@ -1878,3 +1883,24 @@ def _feature_predicate(obj):
 def _imports(obj):
     return not source.isfrommain(obj)
+def get_imports(m):
+    root = ast.parse(inspect.getsource(m))
+    for node in ast.iter_child_nodes(root):
+        if isinstance(node, ast.Import):
+            module = None
+        elif isinstance(node, ast.ImportFrom):
+            module = node.module
+        else:
+            continue
+        for n in node.names:
+            import_script = ""
+            if module:
+                import_script += f"from {module} "
+            import_script += f"import {n.name}"
+            if n.asname:
+                import_script += f" as {n.asname}"
+            yield import_script

datachain/query/dispatch.py CHANGED Viewed

@@ -370,8 +370,8 @@ class UDFWorker:
         return WorkerCallback(self.done_queue)
     def run(self) -> None:
-        if hasattr(self.udf.func, "bootstrap") and callable(self.udf.func.bootstrap):
-            self.udf.func.bootstrap()
+        if hasattr(self.udf.func, "setup") and callable(self.udf.func.setup):
+            self.udf.func.setup()
         while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
             n_rows = len(batch.rows) if isinstance(batch, RowBatch) else 1
             udf_output = self.udf(

datachain/query/schema.py CHANGED Viewed

@@ -3,14 +3,12 @@ import json
 from abc import ABC, abstractmethod
 from datetime import datetime, timezone
 from fnmatch import fnmatch
-from random import getrandbits
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union
 import attrs
 import sqlalchemy as sa
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
-from datachain.data_storage.warehouse import RANDOM_BITS
 from datachain.sql.types import JSON, Boolean, DateTime, Int, Int64, SQLType, String
 if TYPE_CHECKING:
@@ -217,7 +215,7 @@ class DatasetRow:
         "source": String,
         "parent": String,
         "name": String,
-        "size": Int,
+        "size": Int64,
         "location": JSON,
         "vtype": String,
         "dir_type": Int,
@@ -227,8 +225,6 @@ class DatasetRow:
         "last_modified": DateTime,
         "version": String,
         "etag": String,
-        # system column
-        "random": Int64,
     }
     @staticmethod
@@ -267,8 +263,6 @@ class DatasetRow:
         last_modified = last_modified or datetime.now(timezone.utc)
-        random = getrandbits(RANDOM_BITS)
         return (  # type: ignore [return-value]
             source,
             parent,
@@ -283,7 +277,6 @@ class DatasetRow:
             last_modified,
             version,
             etag,
-            random,
         )
     @staticmethod

datachain/query/udf.py CHANGED Viewed

@@ -14,6 +14,7 @@ from typing import (
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from datachain.dataset import RowDict
+from datachain.lib.utils import AbstractUDF
 from .batch import Batch, BatchingStrategy, NoBatching, Partition, RowBatch
 from .schema import (
@@ -58,14 +59,6 @@ class UDFProperties:
     def signal_names(self) -> Iterable[str]:
         return self.output.keys()
-    def parameter_parser(self) -> Callable:
-        """Generate a parameter list from a dataset row."""
-        def plist(catalog: "Catalog", row: "RowDict", **kwargs) -> list:
-            return [p.get_value(catalog, row, **kwargs) for p in self.params]
-        return plist
 def udf(
     params: Sequence[UDFParamSpec],
@@ -113,32 +106,37 @@ class UDFBase:
         self.func = func
         self.properties = properties
         self.signal_names = properties.signal_names()
-        self.parameter_parser = properties.parameter_parser()
         self.output = properties.output
     def __call__(
         self,
         catalog: "Catalog",
-        param: "BatchingResult",
+        arg: "BatchingResult",
         is_generator: bool = False,
         cache: bool = False,
         cb: Callback = DEFAULT_CALLBACK,
     ) -> Iterable[UDFResult]:
-        if isinstance(param, RowBatch):
+        if isinstance(self.func, AbstractUDF):
+            self.func._catalog = catalog  # type: ignore[unreachable]
+        if isinstance(arg, RowBatch):
             udf_inputs = [
-                self.parameter_parser(catalog, row, cache=cache, cb=cb)
-                for row in param.rows
+                self.bind_parameters(catalog, row, cache=cache, cb=cb)
+                for row in arg.rows
             ]
             udf_outputs = self.func(udf_inputs)
-            return self._process_results(param.rows, udf_outputs, is_generator)
-        if isinstance(param, RowDict):
-            udf_inputs = self.parameter_parser(catalog, param, cache=cache, cb=cb)
+            return self._process_results(arg.rows, udf_outputs, is_generator)
+        if isinstance(arg, RowDict):
+            udf_inputs = self.bind_parameters(catalog, arg, cache=cache, cb=cb)
             udf_outputs = self.func(*udf_inputs)
             if not is_generator:
                 # udf_outputs is generator already if is_generator=True
                 udf_outputs = [udf_outputs]
-            return self._process_results([param], udf_outputs, is_generator)
-        raise ValueError(f"unexpected UDF parameter {param}")
+            return self._process_results([arg], udf_outputs, is_generator)
+        raise ValueError(f"Unexpected UDF argument: {arg}")
+    def bind_parameters(self, catalog: "Catalog", row: "RowDict", **kwargs) -> list:
+        return [p.get_value(catalog, row, **kwargs) for p in self.properties.params]
     def _process_results(
         self,

datachain/utils.py CHANGED Viewed

@@ -360,3 +360,31 @@ class JSONSerialize(json.JSONEncoder):
             return str(obj)
         return super().default(obj)
+def inside_colab() -> bool:
+    try:
+        from google import colab  # noqa: F401
+    except ImportError:
+        return False
+    return True
+def inside_notebook() -> bool:
+    if inside_colab():
+        return True
+    try:
+        shell = get_ipython().__class__.__name__  # type: ignore[name-defined]
+    except NameError:
+        return False
+    if shell == "ZMQInteractiveShell":
+        try:
+            import IPython
+            return IPython.__version__ >= "6.0.0"
+        except ImportError:
+            return False
+    return False

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,10 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.2.1
+Version: 0.2.3
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
+Project-URL: Documentation, https://datachain.dvc.ai
 Project-URL: Issues, https://github.com/iterative/dvcx/issues
 Project-URL: Source, https://github.com/iterative/dvcx
 Classifier: Programming Language :: Python :: 3

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -7,16 +7,16 @@ datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
 datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
 datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
-datachain/listing.py,sha256=1arE_9gpjhHqGQCpQZj_mLoocrZWRNDHJ-bkPc08NQs,8247
+datachain/listing.py,sha256=sX8vZNzAzoTel1li6VJiYeHUJwseUERVEoW9D5P7tII,8192
 datachain/node.py,sha256=fHe7k5ajI2g2qnzsG-_NQR_T-QdBYctVeEa8c8dsu_Y,5703
 datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
 datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
 datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
 datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
-datachain/utils.py,sha256=DV-_OON2OomEbxuQuK1lE_2qNTf28QByNcNcEhYsilE,10202
+datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
-datachain/catalog/catalog.py,sha256=5WkICtTYCN5xSMGDd5djLnEBw8kkcDf-IpFYf7kfeuQ,78654
+datachain/catalog/catalog.py,sha256=pulKGJgAmxqSmFqBhA-J0wCKdBqGX4vqpV0cAvV6vUw,79578
 datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
 datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
 datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
@@ -28,52 +28,52 @@ datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
 datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
 datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
 datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
-datachain/data_storage/db_engine.py,sha256=mxOoWP4ntBMgLeTAk4dlEeIJArAz4x_tFrHytcAfLpo,3341
+datachain/data_storage/db_engine.py,sha256=rgBuqJ-M1j5QyqiUQuJRewctuvRRj8LBDL54-aPEFxE,3287
 datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
 datachain/data_storage/metastore.py,sha256=y-4fYvuOPnWeYxAvqhDnw6CdlTvQiurg0Gg4TaG9LR0,54074
-datachain/data_storage/schema.py,sha256=t58LexPOCam_vWV0W52otEDNXgtFPHX3QFApEncFy2s,8809
+datachain/data_storage/schema.py,sha256=bY3q2OUaUraos0s5BnwWkhgce8YpeNmIl7M1ifshoes,8074
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=F68Q_AIqNAObZ5kJ0GnBqRC6e2D2sRehkQo8UzrHgtI,25079
-datachain/data_storage/warehouse.py,sha256=tL2mYoXVZe-coKLTRXEJ0sMdEr2BD0GwgIWip5PP5CM,33300
+datachain/data_storage/warehouse.py,sha256=h35JiJoCGtwkMctis_x3NHxkwEejX5sIWvJOluZxrOI,33132
 datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/lib/arrow.py,sha256=7lAas8hSh3vL7S7s2KOlkYn4viQpfVbM_FQ_hLCh5oc,2593
+datachain/lib/arrow.py,sha256=FF3WWUOjB6Prw8ygfiLsrVfrdob0S01lPzEazuGqoO8,2556
 datachain/lib/cached_stream.py,sha256=t2ifK0hZVZiVn0MQ8D3FaFK1-qK84TwJW2Dw1SRsw9g,1066
 datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
-datachain/lib/dc.py,sha256=PBbEZhSPnbvB6jh2eTgZyDSouAGbjgEv8xabW45_vmk,35460
+datachain/lib/clip.py,sha256=rDeZlFGs0DXBlpmh5ZQJhR9Sz13bWAZGQjfYm1hsUI4,5388
+datachain/lib/dc.py,sha256=Sf99R0oOqf7tlS2gieaG56z3bF7YVcMjhJOZrFRfFs8,34778
 datachain/lib/feature.py,sha256=QDloA9HE7URf9J_veKrguYBvSg-0cbXZFTswNxrKsB8,12135
 datachain/lib/feature_registry.py,sha256=K3jGQzBp2HZDjR9hdGe1BZaXOAne8RpkCRRQdTVjkTs,1622
-datachain/lib/feature_utils.py,sha256=LIK233IWGWFhuav5Rm8de0xIOSnuwA1ubk6OYrxrfN0,4712
-datachain/lib/file.py,sha256=GQrqGgCEHICrUTdzTz_yhXqJWiae9EPTte1sd3hKeEU,8246
+datachain/lib/feature_utils.py,sha256=F4ZENO6tTQvd36a-O1AurYjFSUpoyZaT4qgXsKjQDts,4650
+datachain/lib/file.py,sha256=TdhsPYmG0Atkd_QAO997oA8AuM854wNbjjLLT1uiD2M,8346
 datachain/lib/gpt4_vision.py,sha256=idyXVZVWzltstGaVIu5RYE5UNbdqcPEjIWy81O1MwkM,2922
 datachain/lib/hf_image_to_text.py,sha256=HiPSWzJRDT-vnz9DXJbJBNCMNl9wmpxiSS3PbbVz8SE,3310
 datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,2287
-datachain/lib/image.py,sha256=l2lgUR3YQzjpBmTJewzUtL5zJsLDQH32lbbaLu9WvWA,3631
+datachain/lib/image.py,sha256=ZYfDqr9p-RRmWBeWFQwXLS1J3vQS616ykfMUvQVpqBY,2717
 datachain/lib/image_transform.py,sha256=NXWtnVOcofWBgl_YMxb4ABpaT7JTBMx7tLKvErH1IC4,3024
 datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0A,2236
 datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
-datachain/lib/pytorch.py,sha256=oU16XXAyAmiiabe1IoQoID00-u3uZ5GhCN48uAl6WDs,5421
-datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
+datachain/lib/pytorch.py,sha256=Z7iZCsqJzUT0PynVo23Xu4Fx7qIuuEZyH83R1tR5mfI,5561
 datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
-datachain/lib/signal_schema.py,sha256=KaH194dAH8Zt8FtlNAgdVqcZlJc42y7RbcB37ldPPAY,11688
-datachain/lib/text.py,sha256=EEZrYohADi5rAGg3aLLRwtvyAV9js_yWAGhr2C3QbwI,2424
-datachain/lib/udf.py,sha256=D9TMxkAvj3zPRnZmkCxadEDtiG3B45t2xAEpuO14MOQ,5600
-datachain/lib/udf_signature.py,sha256=DAWMQ0dvFkKabpY5MV5K2q9YmOSTKfiV8KuUBs_6kMg,7258
+datachain/lib/signal_schema.py,sha256=KTegbx-yMvtaKEoUxLgDx5MxMA8De-nmdtqnV1932N8,10151
+datachain/lib/text.py,sha256=PUT1O0jNJoQGsuhff2LgDpzTWk2eMdwIKqEDBrE448M,1307
+datachain/lib/udf.py,sha256=kMlOsHCVybnnq4AMtYqjylZH7x2tGE62FsDPOu9qhWM,6612
+datachain/lib/udf_signature.py,sha256=CUKgoVpM_N8CgvMncpAw2RYchoiJdAGdDSdluoP0hIk,7161
 datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
-datachain/lib/utils.py,sha256=YQKzuW096SGe7QwHwdyS47k_9l2Rh73b-wBqt1-niw4,213
+datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
 datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/lib/webdataset.py,sha256=JouI5WORgkl-am_DwQwWqO8RI1UwgbUPWsauZZj2Fmc,8221
+datachain/lib/webdataset.py,sha256=GWB_pocfRZGoU4Lhd7Wh3hx2Rnm_fJWXX4S_zXJIEmk,8286
 datachain/lib/webdataset_laion.py,sha256=HAtSCbVvEQqzKkoRamRxDKaQALSB3QmJRU2yWRFNxwY,2147
 datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
 datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
 datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
-datachain/query/dataset.py,sha256=QYrtZApS8djybkuDfGO0tt8O6sCBlmkg9TE__R4eM-I,64475
-datachain/query/dispatch.py,sha256=fEk1qalxAb5JJhN-iq0Mg9MyWve4XoN1Q7uvrX4mJY4,13106
+datachain/query/dataset.py,sha256=vpu2wQYC5uWc-LdZrNV-PV7xQapbYCtqyrXiiIa77DI,64982
+datachain/query/dispatch.py,sha256=ZeL5dga5d4cJDBftK7gAQ_mx4C7zq6t3z0Hdt7mcZYY,13094
 datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
-datachain/query/schema.py,sha256=tWlUiu9eiS5y8BTQaPI2raGclt0YzcO3DoUN1OkwnrE,7946
+datachain/query/schema.py,sha256=n1NBOj6JO2I26mZD4vSURmVC2rk3mjIkJQheeLogoy4,7748
 datachain/query/session.py,sha256=e4_vv4RqAjU-g3KK0avgLd9MEsmJBzRVEj1w8v7fP1k,3663
-datachain/query/udf.py,sha256=0WkBPW5ymZbOGMimSXpVWVc8whjTuYfRrnxPWNHabSk,7127
+datachain/query/udf.py,sha256=gnLDM7LKH8_bbdDeVHnlDKaBdbWc_NAbwvYCc4i-OlU,7101
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/remote/studio.py,sha256=bZb85WjtqMNFBoRuPbH-TEGpAyz0afROR7E9UgIef_Y,7438
 datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
@@ -92,9 +92,9 @@ datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7
 datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
 datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
-datachain-0.2.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.2.1.dist-info/METADATA,sha256=kgX6auIOqU0DtW6dRyGWs1TrlGYLf1kN_By0XFW3t0Q,14346
-datachain-0.2.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
-datachain-0.2.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.2.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.2.1.dist-info/RECORD,,
+datachain-0.2.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.2.3.dist-info/METADATA,sha256=NmviJ7UsETesadrJjeyoYjeNqul6GMd9D4zDZLk23Co,14399
+datachain-0.2.3.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
+datachain-0.2.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.2.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.2.3.dist-info/RECORD,,

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.2.0)
+Generator: setuptools (70.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

datachain/lib/reader.py DELETED Viewed

@@ -1,49 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any
-if TYPE_CHECKING:
-    from datachain.lib.feature_utils import FeatureLike
-class FeatureReader(ABC):
-    def __init__(self, fr_class: "FeatureLike"):
-        """
-        Class to call on feature values to perform post-processing. Used when
-        iterating over dataset with `ds.to_pytorch()` and `ds.get_values()`.
-        The class must include:
-        - `self.fr_class` to define the feature class to read.
-        - `self.__call__(self, value)` to call on the feature value returned by
-          `self.fr_class.get_value()`.
-        Examples:
-            >>> class PrefixReader(FeatureReader):
-            >>>     def __call__(self, value):
-            >>>         return "prefix-" + value
-            >>> for row in ds.get_values(PrefixReader(MyFeature)):
-            >>>     print(row)
-            >>> class SuffixReader(FeatureReader):
-            >>>     def __init__(self, fr_class, suffix):
-            >>>         self.suffix = suffix
-            >>>         super().__init__(fr_class)
-            >>>     def __call__(self, value):
-            >>>         return value + self.suffix
-            >>> for row in ds.get_values(SuffixReader(MyFeature, "-suffix")):
-            >>>     print(row)
-        """
-        self.fr_class = fr_class
-    @abstractmethod
-    def __call__(self, value: Any) -> Any:
-        pass
-class LabelReader(FeatureReader):
-    def __init__(self, fr_class: "FeatureLike", classes: list):
-        """Get column values as 0-based integer index of classes."""
-        self.classes = classes
-        super().__init__(fr_class)
-    def __call__(self, value: str) -> int:
-        return self.classes.index(value)

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.2.1.dist-info → datachain-0.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

Potentially problematic release.

datachain 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl