datachain 0.3.19__py3-none-any.whl → 0.3.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +19 -49
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/METADATA +1 -1
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/RECORD +7 -7
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/LICENSE +0 -0
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/WHEEL +0 -0
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/entry_points.txt +0 -0
- {datachain-0.3.19.dist-info → datachain-0.3.20.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -79,6 +79,7 @@ if TYPE_CHECKING:
|
|
|
79
79
|
)
|
|
80
80
|
from datachain.dataset import DatasetVersion
|
|
81
81
|
from datachain.job import Job
|
|
82
|
+
from datachain.lib.file import File
|
|
82
83
|
|
|
83
84
|
logger = logging.getLogger("datachain")
|
|
84
85
|
|
|
@@ -1399,65 +1400,34 @@ class Catalog:
|
|
|
1399
1400
|
dataset = self.get_dataset(name)
|
|
1400
1401
|
return self.update_dataset(dataset, **update_data)
|
|
1401
1402
|
|
|
1402
|
-
def
|
|
1403
|
-
self, dataset_name: str, dataset_version: int, row: RowDict
|
|
1404
|
-
) ->
|
|
1403
|
+
def get_file_from_row(
|
|
1404
|
+
self, dataset_name: str, dataset_version: int, row: RowDict, signal_name: str
|
|
1405
|
+
) -> "File":
|
|
1405
1406
|
"""
|
|
1406
|
-
Function that returns file
|
|
1407
|
-
Note that signal names are without prefix, so if there was 'laion__file__source'
|
|
1408
|
-
in original row, result will have just 'source'
|
|
1409
|
-
Example output:
|
|
1410
|
-
{
|
|
1411
|
-
"source": "s3://ldb-public",
|
|
1412
|
-
"path": "animals/dogs/dog.jpg",
|
|
1413
|
-
...
|
|
1414
|
-
}
|
|
1407
|
+
Function that returns specific file signal from dataset row by name.
|
|
1415
1408
|
"""
|
|
1416
1409
|
from datachain.lib.file import File
|
|
1417
1410
|
from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
|
|
1418
1411
|
|
|
1419
1412
|
version = self.get_dataset(dataset_name).get_version(dataset_version)
|
|
1420
|
-
|
|
1421
|
-
file_signals_values = RowDict()
|
|
1422
|
-
|
|
1423
1413
|
schema = SignalSchema.deserialize(version.feature_schema)
|
|
1424
|
-
for file_signals in schema.get_signals(File):
|
|
1425
|
-
prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
|
|
1426
|
-
file_signals_values[file_signals] = {
|
|
1427
|
-
c_name.removeprefix(prefix): c_value
|
|
1428
|
-
for c_name, c_value in row.items()
|
|
1429
|
-
if c_name.startswith(prefix)
|
|
1430
|
-
and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
|
|
1431
|
-
}
|
|
1432
1414
|
|
|
1433
|
-
if not
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
# to open object
|
|
1439
|
-
return next(iter(file_signals_values.values()))
|
|
1440
|
-
|
|
1441
|
-
def open_object(
|
|
1442
|
-
self,
|
|
1443
|
-
dataset_name: str,
|
|
1444
|
-
dataset_version: int,
|
|
1445
|
-
row: RowDict,
|
|
1446
|
-
use_cache: bool = True,
|
|
1447
|
-
**config: Any,
|
|
1448
|
-
):
|
|
1449
|
-
from datachain.lib.file import File
|
|
1415
|
+
if signal_name not in schema.get_signals(File):
|
|
1416
|
+
raise RuntimeError(
|
|
1417
|
+
f"File signal with path {signal_name} not found in ",
|
|
1418
|
+
f"dataset {dataset_name}@v{dataset_version} signals schema",
|
|
1419
|
+
)
|
|
1450
1420
|
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1421
|
+
prefix = signal_name.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
|
|
1422
|
+
file_signals = {
|
|
1423
|
+
c_name.removeprefix(prefix): c_value
|
|
1424
|
+
for c_name, c_value in row.items()
|
|
1425
|
+
if c_name.startswith(prefix)
|
|
1426
|
+
and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
|
|
1427
|
+
and c_name.removeprefix(prefix) in File.model_fields
|
|
1428
|
+
}
|
|
1454
1429
|
|
|
1455
|
-
|
|
1456
|
-
client = self.get_client(file_signals["source"], **config)
|
|
1457
|
-
return client.open_object(
|
|
1458
|
-
File._from_row(file_signals),
|
|
1459
|
-
use_cache=use_cache,
|
|
1460
|
-
)
|
|
1430
|
+
return File(**file_signals)
|
|
1461
1431
|
|
|
1462
1432
|
def ls(
|
|
1463
1433
|
self,
|
|
@@ -18,7 +18,7 @@ datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
|
|
20
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
21
|
-
datachain/catalog/catalog.py,sha256=
|
|
21
|
+
datachain/catalog/catalog.py,sha256=MC8qxu5r0eWtVSWBxPmnYsc-0sUnkzGUZZxgwFQDhH0,64002
|
|
22
22
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
23
23
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
@@ -97,9 +97,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
|
|
|
97
97
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
98
98
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
99
99
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
100
|
-
datachain-0.3.
|
|
101
|
-
datachain-0.3.
|
|
102
|
-
datachain-0.3.
|
|
103
|
-
datachain-0.3.
|
|
104
|
-
datachain-0.3.
|
|
105
|
-
datachain-0.3.
|
|
100
|
+
datachain-0.3.20.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
101
|
+
datachain-0.3.20.dist-info/METADATA,sha256=zFk_QWL3Ag3kxLdQPqYAFEXnTD2WkxrvJmLLGOxXpsE,17157
|
|
102
|
+
datachain-0.3.20.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
103
|
+
datachain-0.3.20.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
104
|
+
datachain-0.3.20.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
105
|
+
datachain-0.3.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|