PyPI - datachain - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl - Mend

datachain 0.2.10py3-none-any.whl → 0.2.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (13) hide show

datachain/lib/dc.py +33 -1
datachain/lib/signal_schema.py +21 -4
datachain/lib/webdataset.py +4 -3
datachain/query/dataset.py +15 -122
datachain/query/dispatch.py +15 -13
datachain/utils.py +39 -0
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/METADATA +2 -1
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/RECORD +12 -13
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/WHEEL +1 -1
datachain/lib/feature_registry.py +0 -77
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/LICENSE +0 -0
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/entry_points.txt +0 -0
{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/top_level.txt +0 -0

datachain/lib/dc.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import (
     Union,
 )
+import pandas as pd
 import sqlalchemy
 from pydantic import BaseModel, create_model
@@ -38,9 +39,9 @@ from datachain.query.dataset import (
     detach,
 )
 from datachain.query.schema import Column, DatasetRow
+from datachain.utils import inside_notebook
 if TYPE_CHECKING:
-    import pandas as pd
     from typing_extensions import Self
 C = Column
@@ -731,6 +732,37 @@ class DataChain(DatasetQuery):
         return cls.from_values(name, session, object_name=object_name, **fr_map)
+    def to_pandas(self, flatten=False) -> "pd.DataFrame":
+        headers, max_length = self.signals_schema.get_headers_with_length()
+        if flatten or max_length < 2:
+            df = pd.DataFrame.from_records(self.to_records())
+            if headers:
+                df.columns = [".".join(filter(None, header)) for header in headers]
+            return df
+        transposed_result = list(map(list, zip(*self.results())))
+        data = {tuple(n): val for n, val in zip(headers, transposed_result)}
+        return pd.DataFrame(data)
+    def show(self, limit: int = 20, flatten=False, transpose=False) -> None:
+        dc = self.limit(limit) if limit > 0 else self
+        df = dc.to_pandas(flatten)
+        if transpose:
+            df = df.T
+        with pd.option_context(
+            "display.max_columns", None, "display.multi_sparse", False
+        ):
+            if inside_notebook():
+                from IPython.display import display
+                display(df)
+            else:
+                print(df)
+        if len(df) == limit:
+            print(f"\n[Limited by {len(df)} rows]")
     def parse_tabular(
         self,
         output: OutputType = None,

datachain/lib/signal_schema.py CHANGED Viewed

@@ -143,8 +143,8 @@ class SignalSchema:
                     if not fr:
                         raise SignalSchemaError(
                             f"cannot deserialize '{signal}': "
-                            f"unregistered type '{type_name}'."
-                            f" Try to register it with `Registry.add({type_name})`."
+                            f"unknown type '{type_name}'."
+                            f" Try to add it with `ModelStore.add({type_name})`."
                         )
             except TypeError as err:
                 raise SignalSchemaError(
@@ -192,10 +192,17 @@ class SignalSchema:
     def slice(
         self, keys: Sequence[str], setup: Optional[dict[str, Callable]] = None
     ) -> "SignalSchema":
+        # Make new schema that combines current schema and setup signals
         setup = setup or {}
         setup_no_types = dict.fromkeys(setup.keys(), str)
-        union = self.values | setup_no_types
-        schema = {k: union[k] for k in keys if k in union}
+        union = SignalSchema(self.values | setup_no_types)
+        # Slice combined schema by keys
+        schema = {}
+        for k in keys:
+            try:
+                schema[k] = union._find_in_tree(k.split("."))
+            except SignalResolvingError:
+                pass
         return SignalSchema(schema, setup)
     def row_to_features(
@@ -331,6 +338,16 @@ class SignalSchema:
                     sub_schema = SignalSchema({"* list of": args[0]})
                     sub_schema.print_tree(indent=indent, start_at=total_indent + indent)
+    def get_headers_with_length(self):
+        paths = [
+            path for path, _, has_subtree, _ in self.get_flat_tree() if not has_subtree
+        ]
+        max_length = max([len(path) for path in paths], default=0)
+        return [
+            path + [""] * (max_length - len(path)) if len(path) < max_length else path
+            for path in paths
+        ], max_length
     def __or__(self, other):
         return self.__class__(self.values | other.values)

datachain/lib/webdataset.py CHANGED Viewed

@@ -13,8 +13,9 @@ from typing import (
     get_origin,
 )
-from pydantic import BaseModel, Field
+from pydantic import Field
+from datachain.lib.data_model import DataModel
 from datachain.lib.file import File, TarVFile
 from datachain.lib.utils import DataChainError
@@ -45,7 +46,7 @@ class UnknownFileExtensionError(WDSError):
         super().__init__(tar_stream, f"unknown extension '{ext}' for file '{name}'")
-class WDSBasic(BaseModel):
+class WDSBasic(DataModel):
     file: File
@@ -74,7 +75,7 @@ class WDSAllFile(WDSBasic):
     cbor: Optional[bytes] = Field(default=None)
-class WDSReadableSubclass(BaseModel):
+class WDSReadableSubclass(DataModel):
     @staticmethod
     def _reader(builder, item: tarfile.TarInfo) -> "WDSReadableSubclass":
         raise NotImplementedError

datachain/query/dataset.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import ast
 import contextlib
 import datetime
 import inspect
@@ -10,7 +9,6 @@ import re
 import string
 import subprocess
 import sys
-import types
 from abc import ABC, abstractmethod
 from collections.abc import Generator, Iterable, Iterator, Sequence
 from copy import copy
@@ -26,12 +24,9 @@ from typing import (
 )
 import attrs
-import pandas as pd
 import sqlalchemy
 from attrs import frozen
-from dill import dumps, source
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback
-from pydantic import BaseModel
 from sqlalchemy import Column
 from sqlalchemy.sql import func as f
 from sqlalchemy.sql.elements import ColumnClause, ColumnElement
@@ -53,10 +48,13 @@ from datachain.data_storage.schema import (
 from datachain.dataset import DatasetStatus, RowDict
 from datachain.error import DatasetNotFoundError, QueryScriptCancelError
 from datachain.progress import CombinedDownloadCallback
-from datachain.query.schema import DEFAULT_DELIMITER
 from datachain.sql.functions import rand
 from datachain.storage import Storage, StorageURI
-from datachain.utils import batched, determine_processes, inside_notebook
+from datachain.utils import (
+    batched,
+    determine_processes,
+    filtered_cloudpickle_dumps,
+)
 from .metrics import metrics
 from .schema import C, UDFParamSpec, normalize_param
@@ -492,7 +490,7 @@ class UDF(Step, ABC):
             elif processes:
                 # Parallel processing (faster for more CPU-heavy UDFs)
                 udf_info = {
-                    "udf": self.udf,
+                    "udf_data": filtered_cloudpickle_dumps(self.udf),
                     "catalog_init": self.catalog.get_init_params(),
                     "id_generator_clone_params": (
                         self.catalog.id_generator.clone_params()
@@ -513,16 +511,15 @@ class UDF(Step, ABC):
                 envs = dict(os.environ)
                 envs.update({"PYTHONPATH": os.getcwd()})
-                with self.process_feature_module():
-                    process_data = dumps(udf_info, recurse=True)
-                    result = subprocess.run(  # noqa: S603
-                        [datachain_exec_path, "--internal-run-udf"],
-                        input=process_data,
-                        check=False,
-                        env=envs,
-                    )
-                    if result.returncode != 0:
-                        raise RuntimeError("UDF Execution Failed!")
+                process_data = filtered_cloudpickle_dumps(udf_info)
+                result = subprocess.run(  # noqa: S603
+                    [datachain_exec_path, "--internal-run-udf"],
+                    input=process_data,
+                    check=False,
+                    env=envs,
+                )
+                if result.returncode != 0:
+                    raise RuntimeError("UDF Execution Failed!")
             else:
                 # Otherwise process single-threaded (faster for smaller UDFs)
@@ -571,57 +568,6 @@ class UDF(Step, ABC):
             self.catalog.warehouse.close()
             raise
-    @contextlib.contextmanager
-    def process_feature_module(self):
-        # Generate a random name for the feature module
-        feature_module_name = "tmp" + _random_string(10)
-        # Create a dynamic module with the generated name
-        dynamic_module = types.ModuleType(feature_module_name)
-        # Get the import lines for the necessary objects from the main module
-        main_module = sys.modules["__main__"]
-        if getattr(main_module, "__file__", None):
-            import_lines = list(get_imports(main_module))
-        else:
-            import_lines = [
-                source.getimport(obj, alias=name)
-                for name, obj in main_module.__dict__.items()
-                if _imports(obj) and not (name.startswith("__") and name.endswith("__"))
-            ]
-        # Get the feature classes from the main module
-        feature_classes = {
-            name: obj
-            for name, obj in main_module.__dict__.items()
-            if _feature_predicate(obj)
-        }
-        if not feature_classes:
-            yield None
-            return
-        # Get the source code of the feature classes
-        feature_sources = [source.getsource(cls) for _, cls in feature_classes.items()]
-        # Set the module name for the feature classes to the generated name
-        for name, cls in feature_classes.items():
-            cls.__module__ = feature_module_name
-            setattr(dynamic_module, name, cls)
-        # Add the dynamic module to the sys.modules dictionary
-        sys.modules[feature_module_name] = dynamic_module
-        # Combine the import lines and feature sources
-        feature_file = "\n".join(import_lines) + "\n" + "\n".join(feature_sources)
-        # Write the module content to a .py file
-        with open(f"{feature_module_name}.py", "w") as module_file:
-            module_file.write(feature_file)
-        try:
-            yield feature_module_name
-        finally:
-            for cls in feature_classes.values():
-                cls.__module__ = main_module.__name__
-            os.unlink(f"{feature_module_name}.py")
-            # Remove the dynamic module from sys.modules
-            del sys.modules[feature_module_name]
     def create_partitions_table(self, query: Select) -> "Table":
         """
         Create temporary table with group by partitions.
@@ -1346,12 +1292,6 @@ class DatasetQuery:
     def to_records(self) -> list[dict[str, Any]]:
         return self.results(lambda cols, row: dict(zip(cols, row)))
-    def to_pandas(self) -> "pd.DataFrame":
-        records = self.to_records()
-        df = pd.DataFrame.from_records(records)
-        df.columns = [c.replace(DEFAULT_DELIMITER, ".") for c in df.columns]
-        return df
     def shuffle(self) -> "Self":
         # ToDo: implement shaffle based on seed and/or generating random column
         return self.order_by(C.sys__rand)
@@ -1370,22 +1310,6 @@ class DatasetQuery:
         return sampled.limit(n)
-    def show(self, limit=20) -> None:
-        df = self.limit(limit).to_pandas()
-        options = ["display.max_colwidth", 50, "display.show_dimensions", False]
-        with pd.option_context(*options):
-            if inside_notebook():
-                from IPython.display import display
-                display(df)
-            else:
-                print(df.to_string())
-        if len(df) == limit:
-            print(f"[limited by {limit} objects]")
     def clone(self, new_table=True) -> "Self":
         obj = copy(self)
         obj.steps = obj.steps.copy()
@@ -1853,34 +1777,3 @@ def _random_string(length: int) -> str:
         random.choice(string.ascii_letters + string.digits)  # noqa: S311
         for i in range(length)
     )
-def _feature_predicate(obj):
-    return (
-        inspect.isclass(obj) and source.isfrommain(obj) and issubclass(obj, BaseModel)
-    )
-def _imports(obj):
-    return not source.isfrommain(obj)
-def get_imports(m):
-    root = ast.parse(inspect.getsource(m))
-    for node in ast.iter_child_nodes(root):
-        if isinstance(node, ast.Import):
-            module = None
-        elif isinstance(node, ast.ImportFrom):
-            module = node.module
-        else:
-            continue
-        for n in node.names:
-            import_script = ""
-            if module:
-                import_script += f"from {module} "
-            import_script += f"import {n.name}"
-            if n.asname:
-                import_script += f" as {n.asname}"
-            yield import_script

datachain/query/dispatch.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import Any, Optional
 import attrs
 import multiprocess
-from dill import load
+from cloudpickle import load, loads
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from multiprocess import get_context
@@ -84,7 +84,7 @@ def put_into_queue(queue: Queue, item: Any) -> None:
 def udf_entrypoint() -> int:
     # Load UDF info from stdin
-    udf_info = load(stdin.buffer)  # noqa: S301
+    udf_info = load(stdin.buffer)
     (
         warehouse_class,
@@ -95,7 +95,7 @@ def udf_entrypoint() -> int:
     # Parallel processing (faster for more CPU-heavy UDFs)
     dispatch = UDFDispatcher(
-        udf_info["udf"],
+        udf_info["udf_data"],
         udf_info["catalog_init"],
         udf_info["id_generator_clone_params"],
         udf_info["metastore_clone_params"],
@@ -108,7 +108,7 @@ def udf_entrypoint() -> int:
     batching = udf_info["batching"]
     table = udf_info["table"]
     n_workers = udf_info["processes"]
-    udf = udf_info["udf"]
+    udf = loads(udf_info["udf_data"])
     if n_workers is True:
         # Use default number of CPUs (cores)
         n_workers = None
@@ -146,7 +146,7 @@ class UDFDispatcher:
     def __init__(
         self,
-        udf,
+        udf_data,
         catalog_init_params,
         id_generator_clone_params,
         metastore_clone_params,
@@ -155,14 +155,7 @@ class UDFDispatcher:
         is_generator=False,
         buffer_size=DEFAULT_BATCH_SIZE,
     ):
-        # isinstance cannot be used here, as dill packages the entire class definition,
-        # and so these two types are not considered exactly equal,
-        # even if they have the same import path.
-        if full_module_type_path(type(udf)) != full_module_type_path(UDFFactory):
-            self.udf = udf
-        else:
-            self.udf = None
-            self.udf_factory = udf
+        self.udf_data = udf_data
         self.catalog_init_params = catalog_init_params
         (
             self.id_generator_class,
@@ -214,6 +207,15 @@ class UDFDispatcher:
             self.catalog = Catalog(
                 id_generator, metastore, warehouse, **self.catalog_init_params
             )
+        udf = loads(self.udf_data)
+        # isinstance cannot be used here, as cloudpickle packages the entire class
+        # definition, and so these two types are not considered exactly equal,
+        # even if they have the same import path.
+        if full_module_type_path(type(udf)) != full_module_type_path(UDFFactory):
+            self.udf = udf
+        else:
+            self.udf = None
+            self.udf_factory = udf
         if not self.udf:
             self.udf = self.udf_factory()

datachain/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import glob
 import importlib.util
+import io
 import json
 import os
 import os.path as osp
@@ -13,8 +14,10 @@ from itertools import islice
 from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
 from uuid import UUID
+import cloudpickle
 from dateutil import tz
 from dateutil.parser import isoparse
+from pydantic import BaseModel
 if TYPE_CHECKING:
     import pandas as pd
@@ -388,3 +391,39 @@ def inside_notebook() -> bool:
             return False
     return False
+def get_all_subclasses(cls):
+    """Return all subclasses of a given class.
+    Can return duplicates due to multiple inheritance."""
+    for subclass in cls.__subclasses__():
+        yield from get_all_subclasses(subclass)
+        yield subclass
+def filtered_cloudpickle_dumps(obj: Any) -> bytes:
+    """Equivalent to cloudpickle.dumps, but this supports Pydantic models."""
+    model_namespaces = {}
+    with io.BytesIO() as f:
+        pickler = cloudpickle.CloudPickler(f)
+        for model_class in get_all_subclasses(BaseModel):
+            # This "is not None" check is needed, because due to multiple inheritance,
+            # it is theoretically possible to get the same class twice from
+            # get_all_subclasses.
+            if model_class.__pydantic_parent_namespace__ is not None:
+                # __pydantic_parent_namespace__ can contain many unnecessary and
+                # unpickleable entities, so should be removed for serialization.
+                model_namespaces[model_class] = (
+                    model_class.__pydantic_parent_namespace__
+                )
+                model_class.__pydantic_parent_namespace__ = None
+        try:
+            pickler.dump(obj)
+            return f.getvalue()
+        finally:
+            for model_class, namespace in model_namespaces.items():
+                # Restore original __pydantic_parent_namespace__ locally.
+                model_class.__pydantic_parent_namespace__ = namespace

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.2.10
+Version: 0.2.11
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -34,6 +34,7 @@ Requires-Dist: shtab <2,>=1.3.4
 Requires-Dist: sqlalchemy >=2
 Requires-Dist: multiprocess ==0.70.16
 Requires-Dist: dill ==0.3.8
+Requires-Dist: cloudpickle
 Requires-Dist: ujson >=5.9.0
 Requires-Dist: pydantic <3,>=2
 Requires-Dist: jmespath >=1.0

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/RECORD RENAMED Viewed

@@ -14,7 +14,7 @@ datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A
 datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
 datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
-datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
+datachain/utils.py,sha256=AWUXRk7yvDpHcqzzPWwzv8HtF1-jDVEBHKxAgT7u02E,12288
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
 datachain/catalog/catalog.py,sha256=A5W9Ffoz1lZkzl6A3igaMC5jrus8VIYVLJLX8JTVKrk,79603
 datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
@@ -40,8 +40,7 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=ttSiH8Xr08zxypAa3-BNTxMO2NBuZfYICwmG1qQwvWU,3268
 datachain/lib/clip.py,sha256=YRa15Whnn6C8BMA-OAu0mYjc4h9i_n7pffRGdtfrTBA,5222
 datachain/lib/data_model.py,sha256=DpV_-1JqJptCf0w4cnzPlHm5Yl4FQaveRgVCDZFaHXs,2012
-datachain/lib/dc.py,sha256=Px7zj1mrAsL3sBLu1pezssBQkvY0YAoGJm4VbT2yRwc,34699
-datachain/lib/feature_registry.py,sha256=LUrBvDom-k1shFuCv46-OdgntbIUQ5008oyIS0iPM6Q,2298
+datachain/lib/dc.py,sha256=rd-7gVcMRZ2M-O8aQhNx85H31w-kRQHpXSwtf26dSk4,35849
 datachain/lib/file.py,sha256=Uik1sq2l-uknpikH4Gdm7ZR0EcQYP2TrNg-urECjbW4,8304
 datachain/lib/gpt4_vision.py,sha256=CZ-a64olZNp9TNmLGngmbN6b02UYImzwK3dPClnjxTI,2716
 datachain/lib/hf_image_to_text.py,sha256=uVl4mnUl8gnHrJ3wfSZlxBevH-cxqOswxLArLAHxRrE,3077
@@ -53,14 +52,14 @@ datachain/lib/meta_formats.py,sha256=SF7UPPe-U-1HL6DBO1NfwZLIChjkHrHasgHf5ztCUoU
 datachain/lib/model_store.py,sha256=JFpI1P0WFpsO6eAU49AdWmff5T8azqLrqOMB08pYJjg,2331
 datachain/lib/pytorch.py,sha256=7fd2g0dI9zrMfRl3IVwIvXRH0v6TwSAyZGAbqKdEjcI,5505
 datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
-datachain/lib/signal_schema.py,sha256=xzVHauGrhGcS5aOE1UMqT5YjJeZIMAZYQq76tZShhnY,13550
+datachain/lib/signal_schema.py,sha256=mRdq5qEGnFQgbSawzDPi2MCZ6PULTMigd51B2RuNxpg,14173
 datachain/lib/text.py,sha256=d2V-52cqzVm5OT68BcLYyHrglvFMVR5DPzsbtRRv3D0,1063
 datachain/lib/udf.py,sha256=RqCiGuNKL5P8eS84s_mmVYjK1gvkuRYdnIKm9qe-i2U,9698
 datachain/lib/udf_signature.py,sha256=R81QqZseG_xeBFzJSgt-wrTQeUU-1RrWkHckLm_HEUU,7135
 datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
 datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
 datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/lib/webdataset.py,sha256=eqIDSqfBOhEK43JMp-6lYdYy2x3Ge5lwqR-hKGV8aG0,8259
+datachain/lib/webdataset.py,sha256=nIa6ubv94CwnATeeSdE7f_F9Zkz9LuBTfbXvFg3_-Ak,8295
 datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
 datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/convert/flatten.py,sha256=XdAj0f9W32ABjOo8UyYm0y0H_yHDn3qEHERTyXuhJxk,1592
@@ -70,8 +69,8 @@ datachain/lib/convert/values_to_tuples.py,sha256=MWz9pHT-AaPQN8hNMUYfuOHstyuNv0Q
 datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
 datachain/query/batch.py,sha256=j-_ZcuQra2Ro3Wj4crtqQCg-7xuv-p84hr4QHdvT7as,3479
 datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
-datachain/query/dataset.py,sha256=Pmaz16phEummJpWJD3x-8SMMbCb6xcOtWTyMdsFOdOE,64414
-datachain/query/dispatch.py,sha256=Qv5QpP5-K9JAmZLntifRzS5_AYHbK82Ahreo302Ntq8,13218
+datachain/query/dataset.py,sha256=P1KBv_R0YnKjNDHzOJwAx9qhwI08l0dLgaXfak3ps7k,60578
+datachain/query/dispatch.py,sha256=oGX9ZuoKWPB_EyqAZD_eULcO3OejY44_keSmFS6SHT0,13315
 datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
 datachain/query/schema.py,sha256=n1NBOj6JO2I26mZD4vSURmVC2rk3mjIkJQheeLogoy4,7748
@@ -97,9 +96,9 @@ datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
 datachain/text/__init__.py,sha256=-yxHL2gVl3H0Zxam6iWUO6F1Mc4QAFHX6z-5fjHND74,72
 datachain/torch/__init__.py,sha256=9QJW8h0FevIXEykRsxQ7XzMDXvdIkv3kVf_UY95CTyg,600
-datachain-0.2.10.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.2.10.dist-info/METADATA,sha256=bWvqTD9c2joLmkDGpdcutjjF_s1LpccbSCLbkIaKQYQ,16732
-datachain-0.2.10.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
-datachain-0.2.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.2.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.2.10.dist-info/RECORD,,
+datachain-0.2.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.2.11.dist-info/METADATA,sha256=OVKgVc-Wc75AAQIY6hGL1CEBmnwksfgOXfiUen_xAOM,16759
+datachain-0.2.11.dist-info/WHEEL,sha256=FZ75kcLy9M91ncbIgG8dnpCncbiKXSRGJ_PFILs6SFg,91
+datachain-0.2.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.2.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.2.11.dist-info/RECORD,,

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.3.0)
+Generator: setuptools (71.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

datachain/lib/feature_registry.py DELETED Viewed

@@ -1,77 +0,0 @@
-import logging
-from typing import Any, ClassVar, Optional
-from pydantic import BaseModel
-logger = logging.getLogger(__name__)
-class Registry:
-    reg: ClassVar[dict[str, dict[int, Any]]] = {}
-    @classmethod
-    def get_version(cls, model: type[BaseModel]) -> int:
-        if not hasattr(model, "_version"):
-            return 0
-        return model._version
-    @classmethod
-    def get_name(cls, model) -> str:
-        if (version := cls.get_version(model)) > 0:
-            return f"{model.__name__}@v{version}"
-        return model.__name__
-    @classmethod
-    def add(cls, fr: type):
-        if (model := Registry.to_pydantic(fr)) is None:
-            return
-        name = model.__name__
-        if name not in cls.reg:
-            cls.reg[name] = {}
-        version = Registry.get_version(model)
-        cls.reg[name][version] = model
-        for f_info in model.model_fields.values():
-            if (anno := Registry.to_pydantic(f_info.annotation)) is not None:
-                cls.add(anno)
-    @classmethod
-    def get(cls, name: str, version: Optional[int] = None) -> Optional[type]:
-        class_dict = cls.reg.get(name, None)
-        if class_dict is None:
-            return None
-        if version is None:
-            max_ver = max(class_dict.keys(), default=None)
-            if max_ver is None:
-                return None
-            return class_dict[max_ver]
-        return class_dict.get(version, None)
-    @classmethod
-    def parse_name_version(cls, fullname: str) -> tuple[str, int]:
-        name = fullname
-        version = 0
-        if "@" in fullname:
-            name, version_str = fullname.split("@")
-            if version_str.strip() != "":
-                version = int(version_str[1:])
-        return name, version
-    @classmethod
-    def remove(cls, fr: type) -> None:
-        version = fr._version  # type: ignore[attr-defined]
-        if fr.__name__ in cls.reg and version in cls.reg[fr.__name__]:
-            del cls.reg[fr.__name__][version]
-    @staticmethod
-    def is_pydantic(val):
-        return not hasattr(val, "__origin__") and issubclass(val, BaseModel)
-    @staticmethod
-    def to_pydantic(val) -> Optional[type[BaseModel]]:
-        if val is None or not Registry.is_pydantic(val):
-            return None
-        return val

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.2.10.dist-info → datachain-0.2.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl

Potentially problematic release.

datachain 0.2.10py3-none-any.whl → 0.2.11py3-none-any.whl