datachain 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +13 -4
- datachain/lib/signal_schema.py +1 -45
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/METADATA +1 -1
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/RECORD +8 -8
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/LICENSE +0 -0
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/WHEEL +0 -0
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/entry_points.txt +0 -0
- {datachain-0.2.2.dist-info → datachain-0.2.4.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -1609,13 +1609,22 @@ class Catalog:
|
|
|
1609
1609
|
...
|
|
1610
1610
|
}
|
|
1611
1611
|
"""
|
|
1612
|
-
from datachain.lib.signal_schema import SignalSchema
|
|
1612
|
+
from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
|
|
1613
1613
|
|
|
1614
1614
|
version = self.get_dataset(dataset_name).get_version(dataset_version)
|
|
1615
1615
|
|
|
1616
|
-
file_signals_values =
|
|
1617
|
-
|
|
1618
|
-
|
|
1616
|
+
file_signals_values = {}
|
|
1617
|
+
|
|
1618
|
+
schema = SignalSchema.deserialize(version.feature_schema)
|
|
1619
|
+
for file_signals in schema.get_file_signals():
|
|
1620
|
+
prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
|
|
1621
|
+
file_signals_values[file_signals] = {
|
|
1622
|
+
c_name.removeprefix(prefix): c_value
|
|
1623
|
+
for c_name, c_value in row.items()
|
|
1624
|
+
if c_name.startswith(prefix)
|
|
1625
|
+
and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1619
1628
|
if not file_signals_values:
|
|
1620
1629
|
return None
|
|
1621
1630
|
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -13,17 +13,13 @@ from datachain.lib.feature import (
|
|
|
13
13
|
convert_type_to_datachain,
|
|
14
14
|
)
|
|
15
15
|
from datachain.lib.feature_registry import Registry
|
|
16
|
-
from datachain.lib.file import File
|
|
17
|
-
from datachain.lib.image import ImageFile
|
|
16
|
+
from datachain.lib.file import File
|
|
18
17
|
from datachain.lib.utils import DataChainParamsError
|
|
19
|
-
from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
|
|
20
|
-
from datachain.lib.webdataset_laion import Laion, WDSLaion
|
|
21
18
|
|
|
22
19
|
if TYPE_CHECKING:
|
|
23
20
|
from datachain.catalog import Catalog
|
|
24
21
|
|
|
25
22
|
|
|
26
|
-
# TODO fix hardcoded Feature class names with://github.com/iterative/dvcx/issues/1625
|
|
27
23
|
NAMES_TO_TYPES = {
|
|
28
24
|
"int": int,
|
|
29
25
|
"str": str,
|
|
@@ -33,15 +29,6 @@ NAMES_TO_TYPES = {
|
|
|
33
29
|
"dict": dict,
|
|
34
30
|
"bytes": bytes,
|
|
35
31
|
"datetime": datetime,
|
|
36
|
-
"WDSLaion": WDSLaion,
|
|
37
|
-
"Laion": Laion,
|
|
38
|
-
"Source": IndexedFile,
|
|
39
|
-
"File": File,
|
|
40
|
-
"ImageFile": ImageFile,
|
|
41
|
-
"TextFile": TextFile,
|
|
42
|
-
"TarStream": TarStream,
|
|
43
|
-
"WDSBasic": WDSBasic,
|
|
44
|
-
"WDSAllFile": WDSAllFile,
|
|
45
32
|
}
|
|
46
33
|
|
|
47
34
|
|
|
@@ -239,37 +226,6 @@ class SignalSchema:
|
|
|
239
226
|
if has_subtree and issubclass(type_, File):
|
|
240
227
|
yield ".".join(path)
|
|
241
228
|
|
|
242
|
-
def get_file_signals_values(self, row: dict[str, Any]) -> dict[str, Any]:
|
|
243
|
-
"""
|
|
244
|
-
Method that returns values with clean field names (without prefix) for
|
|
245
|
-
all file signals found in this schema for some row
|
|
246
|
-
Output example:
|
|
247
|
-
{
|
|
248
|
-
laion.file: {
|
|
249
|
-
"source": "s3://ldb-public",
|
|
250
|
-
"name": "dog.jpg",
|
|
251
|
-
...
|
|
252
|
-
},
|
|
253
|
-
meta.file: {
|
|
254
|
-
"source": "s3://datacomp",
|
|
255
|
-
"name": "cat.jpg",
|
|
256
|
-
...
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
"""
|
|
260
|
-
res = {}
|
|
261
|
-
|
|
262
|
-
for file_signals in self.get_file_signals():
|
|
263
|
-
prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
|
|
264
|
-
res[file_signals] = {
|
|
265
|
-
c_name.removeprefix(prefix): c_value
|
|
266
|
-
for c_name, c_value in row.items()
|
|
267
|
-
if c_name.startswith(prefix)
|
|
268
|
-
and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
return res
|
|
272
|
-
|
|
273
229
|
def create_model(self, name: str) -> type[Feature]:
|
|
274
230
|
fields = {key: (value, None) for key, value in self.values.items()}
|
|
275
231
|
|
|
@@ -16,7 +16,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
16
16
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
17
17
|
datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
|
|
18
18
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
19
|
-
datachain/catalog/catalog.py,sha256=
|
|
19
|
+
datachain/catalog/catalog.py,sha256=pulKGJgAmxqSmFqBhA-J0wCKdBqGX4vqpV0cAvV6vUw,79578
|
|
20
20
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
21
21
|
datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
|
|
22
22
|
datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
|
|
@@ -55,7 +55,7 @@ datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0
|
|
|
55
55
|
datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
|
|
56
56
|
datachain/lib/pytorch.py,sha256=Z7iZCsqJzUT0PynVo23Xu4Fx7qIuuEZyH83R1tR5mfI,5561
|
|
57
57
|
datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
|
|
58
|
-
datachain/lib/signal_schema.py,sha256=
|
|
58
|
+
datachain/lib/signal_schema.py,sha256=KTegbx-yMvtaKEoUxLgDx5MxMA8De-nmdtqnV1932N8,10151
|
|
59
59
|
datachain/lib/text.py,sha256=PUT1O0jNJoQGsuhff2LgDpzTWk2eMdwIKqEDBrE448M,1307
|
|
60
60
|
datachain/lib/udf.py,sha256=kMlOsHCVybnnq4AMtYqjylZH7x2tGE62FsDPOu9qhWM,6612
|
|
61
61
|
datachain/lib/udf_signature.py,sha256=CUKgoVpM_N8CgvMncpAw2RYchoiJdAGdDSdluoP0hIk,7161
|
|
@@ -92,9 +92,9 @@ datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7
|
|
|
92
92
|
datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
|
|
93
93
|
datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
|
|
94
94
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
95
|
-
datachain-0.2.
|
|
96
|
-
datachain-0.2.
|
|
97
|
-
datachain-0.2.
|
|
98
|
-
datachain-0.2.
|
|
99
|
-
datachain-0.2.
|
|
100
|
-
datachain-0.2.
|
|
95
|
+
datachain-0.2.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
96
|
+
datachain-0.2.4.dist-info/METADATA,sha256=rWswQ1xeEbhMXJ3xRTiFhWjgEqjhSX4ay-ashGNxf8o,14399
|
|
97
|
+
datachain-0.2.4.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
|
|
98
|
+
datachain-0.2.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
99
|
+
datachain-0.2.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
100
|
+
datachain-0.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|