PyPI - datachain - Versions diffs - 0.3.19__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

datachain 0.3.19py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (9) hide show

datachain/catalog/catalog.py CHANGED Viewed

@@ -79,6 +79,7 @@ if TYPE_CHECKING:
     )
     from datachain.dataset import DatasetVersion
     from datachain.job import Job
+    from datachain.lib.file import File
 logger = logging.getLogger("datachain")
@@ -978,7 +979,6 @@ class Catalog:
         script_output="",
         create_rows_table=True,
         job_id: Optional[str] = None,
-        is_job_result: bool = False,
     ) -> DatasetRecord:
         """
         Creates dataset version if it doesn't exist.
@@ -1000,7 +1000,6 @@ class Catalog:
             script_output=script_output,
             schema=schema,
             job_id=job_id,
-            is_job_result=is_job_result,
             ignore_if_exists=True,
         )
@@ -1210,7 +1209,6 @@ class Catalog:
             size=dataset_version.size,
             preview=dataset_version.preview,
             job_id=dataset_version.job_id,
-            is_job_result=dataset_version.is_job_result,
         )
         # to avoid re-creating rows table, we are just renaming it for a new version
         # of target dataset
@@ -1399,65 +1397,34 @@ class Catalog:
         dataset = self.get_dataset(name)
         return self.update_dataset(dataset, **update_data)
-    def get_file_signals(
-        self, dataset_name: str, dataset_version: int, row: RowDict
-    ) -> Optional[RowDict]:
+    def get_file_from_row(
+        self, dataset_name: str, dataset_version: int, row: RowDict, signal_name: str
+    ) -> "File":
         """
-        Function that returns file signals from dataset row.
-        Note that signal names are without prefix, so if there was 'laion__file__source'
-        in original row, result will have just 'source'
-        Example output:
-            {
-                "source": "s3://ldb-public",
-                "path": "animals/dogs/dog.jpg",
-                ...
-            }
+        Function that returns specific file signal from dataset row by name.
         """
         from datachain.lib.file import File
         from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
         version = self.get_dataset(dataset_name).get_version(dataset_version)
-        file_signals_values = RowDict()
         schema = SignalSchema.deserialize(version.feature_schema)
-        for file_signals in schema.get_signals(File):
-            prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
-            file_signals_values[file_signals] = {
-                c_name.removeprefix(prefix): c_value
-                for c_name, c_value in row.items()
-                if c_name.startswith(prefix)
-                and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
-            }
-        if not file_signals_values:
-            return None
-        # there can be multiple file signals in a schema, but taking the first
-        # one for now. In future we might add ability to choose from which one
-        # to open object
-        return next(iter(file_signals_values.values()))
-    def open_object(
-        self,
-        dataset_name: str,
-        dataset_version: int,
-        row: RowDict,
-        use_cache: bool = True,
-        **config: Any,
-    ):
-        from datachain.lib.file import File
+        if signal_name not in schema.get_signals(File):
+            raise RuntimeError(
+                f"File signal with path {signal_name} not found in ",
+                f"dataset {dataset_name}@v{dataset_version} signals schema",
+            )
-        file_signals = self.get_file_signals(dataset_name, dataset_version, row)
-        if not file_signals:
-            raise RuntimeError("Cannot open object without file signals")
+        prefix = signal_name.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
+        file_signals = {
+            c_name.removeprefix(prefix): c_value
+            for c_name, c_value in row.items()
+            if c_name.startswith(prefix)
+            and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
+            and c_name.removeprefix(prefix) in File.model_fields
+        }
-        config = config or self.client_config
-        client = self.get_client(file_signals["source"], **config)
-        return client.open_object(
-            File._from_row(file_signals),
-            use_cache=use_cache,
-        )
+        return File(**file_signals)
     def ls(
         self,

datachain/data_storage/metastore.py CHANGED Viewed

@@ -243,7 +243,6 @@ class AbstractMetastore(ABC, Serializable):
         size: Optional[int] = None,
         preview: Optional[list[dict]] = None,
         job_id: Optional[str] = None,
-        is_job_result: bool = False,
     ) -> DatasetRecord:
         """Creates new dataset version."""
@@ -497,7 +496,6 @@ class AbstractDBMetastore(AbstractMetastore):
             Column("query_script", Text, nullable=False, default=""),
             Column("schema", JSON, nullable=True),
             Column("job_id", Text, nullable=True),
-            Column("is_job_result", Boolean, nullable=False, default=False),
             UniqueConstraint("dataset_id", "version"),
         ]
@@ -1009,7 +1007,6 @@ class AbstractDBMetastore(AbstractMetastore):
         size: Optional[int] = None,
         preview: Optional[list[dict]] = None,
         job_id: Optional[str] = None,
-        is_job_result: bool = False,
         conn=None,
     ) -> DatasetRecord:
         """Creates new dataset version."""
@@ -1035,7 +1032,6 @@ class AbstractDBMetastore(AbstractMetastore):
             size=size,
             preview=json.dumps(preview or []),
             job_id=job_id or os.getenv("DATACHAIN_JOB_ID"),
-            is_job_result=is_job_result,
         )
         if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
             # SQLite and PostgreSQL both support 'on_conflict_do_nothing',

datachain/dataset.py CHANGED Viewed

@@ -179,7 +179,6 @@ class DatasetVersion:
     sources: str = ""
     query_script: str = ""
     job_id: Optional[str] = None
-    is_job_result: bool = False
     @classmethod
     def parse(  # noqa: PLR0913
@@ -201,7 +200,6 @@ class DatasetVersion:
         sources: str = "",
         query_script: str = "",
         job_id: Optional[str] = None,
-        is_job_result: bool = False,
     ):
         return cls(
             id,
@@ -221,7 +219,6 @@ class DatasetVersion:
             sources,
             query_script,
             job_id,
-            is_job_result,
         )
     def __eq__(self, other):
@@ -327,7 +324,6 @@ class DatasetRecord:
         version_query_script: Optional[str],
         version_schema: str,
         version_job_id: Optional[str] = None,
-        version_is_job_result: bool = False,
     ) -> "DatasetRecord":
         labels_lst: list[str] = json.loads(labels) if labels else []
         schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
@@ -353,7 +349,6 @@ class DatasetRecord:
             version_sources,  # type: ignore[arg-type]
             version_query_script,  # type: ignore[arg-type]
             version_job_id,
-            version_is_job_result,
         )
         return cls(

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.3.19
+Version: 0.4.0
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
 datachain/cli.py,sha256=TQ1OKMulAcsJndKLCyxJpfNqbMWQgOa4Aeihnu36cR8,30095
 datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
-datachain/dataset.py,sha256=2NCQU9ZSgNGhA01SP5ON18VhMohXif-btOB4Lz-Uvds,14911
+datachain/dataset.py,sha256=HWcFckJpmTU5AGsg8ILW8JInpNQqaWmJoasls18q5kI,14735
 datachain/error.py,sha256=vbIbamnFMIojh1UpmxWoA6Omup7WFAFNJnf8xAkGWwI,1146
 datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
 datachain/listing.py,sha256=TkMmBzCiru26x4RaZiagWJTmTGbiy6yGrAsSJMr8cFE,8213
@@ -18,7 +18,7 @@ datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
 datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
 datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
-datachain/catalog/catalog.py,sha256=poTu_B5va35MTCV60ntsn4jvAFXepqa2peCjYCXWeU0,64982
+datachain/catalog/catalog.py,sha256=FuKuIiCwPgN5Ea25hnFe_ZFZH9YEUZ2ma9k_Lczk-JU,63867
 datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
 datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
 datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
@@ -33,7 +33,7 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
 datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
 datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
-datachain/data_storage/metastore.py,sha256=BePe3bVxo-Zuuccok8TLRo4cMHVnAIa8hfZMadbxzqM,52649
+datachain/data_storage/metastore.py,sha256=Ztw86JbN4-1gobZea1oqAAT2kotvi46pxNRjqncZ7B8,52457
 datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=EBKJncuzcyQfcKFm2mUjvHjHRTODsteM-k_zndunBrw,28834
@@ -97,9 +97,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
 datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.3.19.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.3.19.dist-info/METADATA,sha256=yMBpXwOmeoWOmpS0m_hp8GFiMs3Zu_ixMzkG6GF_Z2U,17157
-datachain-0.3.19.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-datachain-0.3.19.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.3.19.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.3.19.dist-info/RECORD,,
+datachain-0.4.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.4.0.dist-info/METADATA,sha256=UmW4n6_qqsTZe_bXdjwCe6n6zWSVq35Kn_-h_u_b0RA,17156
+datachain-0.4.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+datachain-0.4.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.4.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.4.0.dist-info/RECORD,,

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.3.19.dist-info → datachain-0.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.3.19__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

datachain 0.3.19py3-none-any.whl → 0.4.0py3-none-any.whl