datachain 0.7.3__py3-none-any.whl → 0.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -685,13 +685,6 @@ class AbstractDBMetastore(AbstractMetastore):
685
685
  return None
686
686
  return reduce(lambda ds, version: ds.merge_versions(version), versions)
687
687
 
688
- def _parse_datasets(self, rows) -> Iterator["DatasetRecord"]:
689
- # grouping rows by dataset id
690
- for _, g in groupby(rows, lambda r: r[0]):
691
- dataset = self._parse_dataset(list(g))
692
- if dataset:
693
- yield dataset
694
-
695
688
  def _parse_list_dataset(self, rows) -> Optional[DatasetListRecord]:
696
689
  versions = [self.dataset_list_class.parse(*r) for r in rows]
697
690
  if not versions:
datachain/dataset.py CHANGED
@@ -2,6 +2,7 @@ import builtins
2
2
  import json
3
3
  from dataclasses import dataclass, fields
4
4
  from datetime import datetime
5
+ from functools import cached_property
5
6
  from typing import (
6
7
  Any,
7
8
  NewType,
@@ -11,6 +12,8 @@ from typing import (
11
12
  )
12
13
  from urllib.parse import urlparse
13
14
 
15
+ import orjson
16
+
14
17
  from datachain.error import DatasetVersionNotFoundError
15
18
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
16
19
 
@@ -178,7 +181,7 @@ class DatasetVersion:
178
181
  schema: dict[str, Union[SQLType, type[SQLType]]]
179
182
  num_objects: Optional[int]
180
183
  size: Optional[int]
181
- preview: Optional[list[dict]]
184
+ _preview_data: Optional[Union[str, list[dict]]]
182
185
  sources: str = ""
183
186
  query_script: str = ""
184
187
  job_id: Optional[str] = None
@@ -199,7 +202,7 @@ class DatasetVersion:
199
202
  script_output: str,
200
203
  num_objects: Optional[int],
201
204
  size: Optional[int],
202
- preview: Optional[str],
205
+ preview: Optional[Union[str, list[dict]]],
203
206
  schema: dict[str, Union[SQLType, type[SQLType]]],
204
207
  sources: str = "",
205
208
  query_script: str = "",
@@ -220,7 +223,7 @@ class DatasetVersion:
220
223
  schema,
221
224
  num_objects,
222
225
  size,
223
- json.loads(preview) if preview else None,
226
+ preview,
224
227
  sources,
225
228
  query_script,
226
229
  job_id,
@@ -260,9 +263,17 @@ class DatasetVersion:
260
263
  for c_name, c_type in self.schema.items()
261
264
  }
262
265
 
266
+ @cached_property
267
+ def preview(self) -> Optional[list[dict]]:
268
+ if isinstance(self._preview_data, str):
269
+ return orjson.loads(self._preview_data)
270
+ return self._preview_data if self._preview_data else None
271
+
263
272
  @classmethod
264
273
  def from_dict(cls, d: dict[str, Any]) -> "DatasetVersion":
265
274
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
275
+ if not hasattr(kwargs, "_preview_data"):
276
+ kwargs["_preview_data"] = d.get("preview")
266
277
  return cls(**kwargs)
267
278
 
268
279
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -5,7 +5,7 @@ datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
5
5
  datachain/cli.py,sha256=1hiBClE1kbRyx0DK3uX5KMVa0ktbsG6TsFSNvoT2xxs,39399
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
8
- datachain/dataset.py,sha256=-9uPdOn1uWkGucouhsFVGRIuFdWkCdUrhV0U9f6Ihgc,18218
8
+ datachain/dataset.py,sha256=PKHaEXeYOL2gE5BaEmc9rzPJdDg5O9X8_7FvSh_Q9Vg,18614
9
9
  datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
11
  datachain/listing.py,sha256=TgKg25ZWAP5enzKgw2_2GUPJVdnQUh6uySHB5SJrUY4,7773
@@ -33,7 +33,7 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
33
33
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
34
34
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
35
35
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
36
- datachain/data_storage/metastore.py,sha256=S9pkbAi7yJlU_CTuhB-eTZgzZgkPMhJ5Br90AVLDXsQ,37922
36
+ datachain/data_storage/metastore.py,sha256=VPq-Dl8P-RbZQMzn6vB9aXBPKUWPTwP8ypkaVfE-7PU,37661
37
37
  datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
38
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
39
39
  datachain/data_storage/sqlite.py,sha256=nF-2B-n8YZh9cJlZv4XnbahAJDW6pvrp1h9L-140M7A,27538
@@ -117,9 +117,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
117
117
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
118
118
  datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
119
119
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
120
- datachain-0.7.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
121
- datachain-0.7.3.dist-info/METADATA,sha256=E1-nP4rZghwCV5kSS09620YEJdwaTAiVpI5DmmRnZy0,18006
122
- datachain-0.7.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
123
- datachain-0.7.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
124
- datachain-0.7.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
125
- datachain-0.7.3.dist-info/RECORD,,
120
+ datachain-0.7.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
121
+ datachain-0.7.4.dist-info/METADATA,sha256=tr5ReyIE9nUfhvCwuGujJC1MmfO07A10N1sLfvOBcYQ,18006
122
+ datachain-0.7.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
123
+ datachain-0.7.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
124
+ datachain-0.7.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
125
+ datachain-0.7.4.dist-info/RECORD,,