datachain 0.18.9__py3-none-any.whl → 0.18.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/data_storage/warehouse.py +9 -4
- datachain/lib/dc/json.py +9 -11
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/METADATA +1 -1
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/RECORD +8 -8
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/WHEEL +0 -0
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/entry_points.txt +0 -0
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.18.9.dist-info → datachain-0.18.10.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,8 @@ from datachain.client import Client
|
|
|
17
17
|
from datachain.data_storage.schema import convert_rows_custom_column_types
|
|
18
18
|
from datachain.data_storage.serializer import Serializable
|
|
19
19
|
from datachain.dataset import DatasetRecord, StorageURI
|
|
20
|
+
from datachain.lib.file import File
|
|
21
|
+
from datachain.lib.signal_schema import SignalSchema
|
|
20
22
|
from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
|
|
21
23
|
from datachain.query.batch import RowsOutput
|
|
22
24
|
from datachain.query.utils import get_query_id_column
|
|
@@ -35,7 +37,6 @@ if TYPE_CHECKING:
|
|
|
35
37
|
from datachain.data_storage import schema
|
|
36
38
|
from datachain.data_storage.db_engine import DatabaseEngine
|
|
37
39
|
from datachain.data_storage.schema import DataTable
|
|
38
|
-
from datachain.lib.file import File
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
logger = logging.getLogger("datachain")
|
|
@@ -370,14 +371,18 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
370
371
|
if not (self.db.has_table(self.dataset_table_name(dataset.name, version))):
|
|
371
372
|
return None, None
|
|
372
373
|
|
|
374
|
+
file_signals = list(
|
|
375
|
+
SignalSchema.deserialize(dataset.feature_schema).get_signals(File)
|
|
376
|
+
)
|
|
377
|
+
|
|
373
378
|
dr = self.dataset_rows(dataset, version)
|
|
374
379
|
table = dr.get_table()
|
|
375
380
|
expressions: tuple[_ColumnsClauseArgument[Any], ...] = (
|
|
376
381
|
sa.func.count(table.c.sys__id),
|
|
377
382
|
)
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
383
|
+
size_column_names = [s.replace(".", "__") + "__size" for s in file_signals]
|
|
384
|
+
size_columns = [c for c in table.columns if c.name in size_column_names]
|
|
385
|
+
|
|
381
386
|
if size_columns:
|
|
382
387
|
expressions = (*expressions, sa.func.sum(sum(size_columns)))
|
|
383
388
|
query = sa.select(*expressions)
|
datachain/lib/dc/json.py
CHANGED
|
@@ -1,18 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import os.path
|
|
3
3
|
import re
|
|
4
|
-
from typing import
|
|
5
|
-
TYPE_CHECKING,
|
|
6
|
-
Optional,
|
|
7
|
-
Union,
|
|
8
|
-
)
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
9
5
|
|
|
6
|
+
import cloudpickle
|
|
7
|
+
|
|
8
|
+
from datachain.lib import meta_formats
|
|
10
9
|
from datachain.lib.data_model import DataType
|
|
11
|
-
from datachain.lib.file import
|
|
12
|
-
File,
|
|
13
|
-
FileType,
|
|
14
|
-
)
|
|
15
|
-
from datachain.lib.meta_formats import read_meta
|
|
10
|
+
from datachain.lib.file import File, FileType
|
|
16
11
|
|
|
17
12
|
if TYPE_CHECKING:
|
|
18
13
|
from typing_extensions import ParamSpec
|
|
@@ -76,7 +71,7 @@ def read_json(
|
|
|
76
71
|
column = format
|
|
77
72
|
chain = read_storage(uri=path, type=type, **kwargs)
|
|
78
73
|
signal_dict = {
|
|
79
|
-
column: read_meta(
|
|
74
|
+
column: meta_formats.read_meta(
|
|
80
75
|
schema_from=schema_from,
|
|
81
76
|
format=format,
|
|
82
77
|
spec=spec,
|
|
@@ -88,4 +83,7 @@ def read_json(
|
|
|
88
83
|
}
|
|
89
84
|
# disable prefetch if nrows is set
|
|
90
85
|
settings = {"prefetch": 0} if nrows else {}
|
|
86
|
+
|
|
87
|
+
cloudpickle.register_pickle_by_value(meta_formats)
|
|
88
|
+
|
|
91
89
|
return chain.settings(**settings).gen(**signal_dict) # type: ignore[misc, arg-type]
|
|
@@ -51,7 +51,7 @@ datachain/data_storage/metastore.py,sha256=1PaRTQbL7kjcU1BVjiLjXJLrrLzQtUvpqLmm0
|
|
|
51
51
|
datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
|
|
52
52
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
53
53
|
datachain/data_storage/sqlite.py,sha256=bwZAB_NUMT2WMv5tPQnnLFA0P-PiQtxzSaQ1q6xDxOU,24590
|
|
54
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
54
|
+
datachain/data_storage/warehouse.py,sha256=imPm4R2V7TkqgGNSO2FGnKu03axU9UVLMfdUPfpwgHE,31747
|
|
55
55
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
56
56
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
57
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
@@ -102,7 +102,7 @@ datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,
|
|
|
102
102
|
datachain/lib/dc/datachain.py,sha256=5rR_QqG4vesq-x545ZTSFJDSb6Oc5CW4-ziQYD6DpW4,80993
|
|
103
103
|
datachain/lib/dc/datasets.py,sha256=G65leCuo_3bItmvjoV1wK0pzj7a2IQqe3xRsflpF3xM,10794
|
|
104
104
|
datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
|
|
105
|
-
datachain/lib/dc/json.py,sha256=
|
|
105
|
+
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
106
106
|
datachain/lib/dc/listings.py,sha256=2na9v63xO1vPUNaoBSzA-TSN49V7zQAb-4iS1wOPLFE,1029
|
|
107
107
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
108
108
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
153
153
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
154
154
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
155
155
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
156
|
-
datachain-0.18.
|
|
157
|
-
datachain-0.18.
|
|
158
|
-
datachain-0.18.
|
|
159
|
-
datachain-0.18.
|
|
160
|
-
datachain-0.18.
|
|
161
|
-
datachain-0.18.
|
|
156
|
+
datachain-0.18.10.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
157
|
+
datachain-0.18.10.dist-info/METADATA,sha256=Vjkb16V4J8lNJphVuqD2DZ_V_7BLIf8YPRlvJNtsLaM,11320
|
|
158
|
+
datachain-0.18.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
159
|
+
datachain-0.18.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
160
|
+
datachain-0.18.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
161
|
+
datachain-0.18.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|