datachain 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cache.py +5 -10
- datachain/catalog/catalog.py +9 -19
- datachain/client/azure.py +5 -12
- datachain/client/fsspec.py +5 -6
- datachain/client/gcs.py +4 -14
- datachain/client/local.py +2 -4
- datachain/client/s3.py +4 -8
- datachain/data_storage/schema.py +7 -15
- datachain/data_storage/warehouse.py +26 -31
- datachain/lib/convert/sql_to_python.py +13 -18
- datachain/lib/dc.py +31 -5
- datachain/lib/file.py +19 -18
- datachain/lib/webdataset.py +2 -3
- datachain/listing.py +14 -20
- datachain/node.py +32 -21
- datachain/query/builtins.py +5 -12
- datachain/query/dataset.py +2 -2
- datachain/query/schema.py +3 -7
- datachain/sql/functions/__init__.py +3 -2
- datachain/sql/functions/array.py +8 -0
- datachain/sql/sqlite/base.py +5 -0
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/METADATA +1 -1
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/RECORD +27 -27
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/LICENSE +0 -0
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/WHEEL +0 -0
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/top_level.txt +0 -0
datachain/cache.py
CHANGED
|
@@ -24,8 +24,7 @@ sha256 = partial(hashlib.sha256, usedforsecurity=False)
|
|
|
24
24
|
@attrs.frozen
|
|
25
25
|
class UniqueId:
|
|
26
26
|
storage: "StorageURI"
|
|
27
|
-
|
|
28
|
-
name: str
|
|
27
|
+
path: str
|
|
29
28
|
size: int
|
|
30
29
|
etag: str
|
|
31
30
|
version: str = ""
|
|
@@ -34,10 +33,6 @@ class UniqueId:
|
|
|
34
33
|
location: Optional[str] = None
|
|
35
34
|
last_modified: datetime = TIME_ZERO
|
|
36
35
|
|
|
37
|
-
@property
|
|
38
|
-
def path(self) -> str:
|
|
39
|
-
return f"{self.parent}/{self.name}" if self.parent else self.name
|
|
40
|
-
|
|
41
36
|
def get_parsed_location(self) -> Optional[dict]:
|
|
42
37
|
if not self.location:
|
|
43
38
|
return None
|
|
@@ -53,10 +48,10 @@ class UniqueId:
|
|
|
53
48
|
return loc_stack[0]
|
|
54
49
|
|
|
55
50
|
def get_hash(self) -> str:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
f"
|
|
59
|
-
).hexdigest()
|
|
51
|
+
fingerprint = f"{self.storage}/{self.path}/{self.version}/{self.etag}"
|
|
52
|
+
if self.location:
|
|
53
|
+
fingerprint += f"/{self.location}"
|
|
54
|
+
return sha256(fingerprint.encode()).hexdigest()
|
|
60
55
|
|
|
61
56
|
|
|
62
57
|
def try_scandir(path):
|
datachain/catalog/catalog.py
CHANGED
|
@@ -529,21 +529,16 @@ def find_column_to_str( # noqa: PLR0911
|
|
|
529
529
|
if column == "du":
|
|
530
530
|
return str(
|
|
531
531
|
src.listing.du(
|
|
532
|
-
{
|
|
533
|
-
f: row[field_lookup[f]]
|
|
534
|
-
for f in ["dir_type", "size", "parent", "name"]
|
|
535
|
-
}
|
|
532
|
+
{f: row[field_lookup[f]] for f in ["dir_type", "size", "path"]}
|
|
536
533
|
)[0]
|
|
537
534
|
)
|
|
538
535
|
if column == "name":
|
|
539
|
-
return row[field_lookup["
|
|
536
|
+
return posixpath.basename(row[field_lookup["path"]]) or ""
|
|
540
537
|
if column == "owner":
|
|
541
538
|
return row[field_lookup["owner_name"]] or ""
|
|
542
539
|
if column == "path":
|
|
543
540
|
is_dir = row[field_lookup["dir_type"]] == DirType.DIR
|
|
544
|
-
|
|
545
|
-
name = row[field_lookup["name"]]
|
|
546
|
-
path = f"{parent}/{name}" if parent else name
|
|
541
|
+
path = row[field_lookup["path"]]
|
|
547
542
|
if is_dir and path:
|
|
548
543
|
full_path = path + "/"
|
|
549
544
|
else:
|
|
@@ -724,8 +719,7 @@ class Catalog:
|
|
|
724
719
|
columns = [
|
|
725
720
|
Column("vtype", String),
|
|
726
721
|
Column("dir_type", Int),
|
|
727
|
-
Column("
|
|
728
|
-
Column("name", String),
|
|
722
|
+
Column("path", String),
|
|
729
723
|
Column("etag", String),
|
|
730
724
|
Column("version", String),
|
|
731
725
|
Column("is_latest", Boolean),
|
|
@@ -1623,8 +1617,7 @@ class Catalog:
|
|
|
1623
1617
|
Example output:
|
|
1624
1618
|
{
|
|
1625
1619
|
"source": "s3://ldb-public",
|
|
1626
|
-
"
|
|
1627
|
-
"name": "dog.jpg",
|
|
1620
|
+
"path": "animals/dogs/dog.jpg",
|
|
1628
1621
|
...
|
|
1629
1622
|
}
|
|
1630
1623
|
"""
|
|
@@ -1675,8 +1668,7 @@ class Catalog:
|
|
|
1675
1668
|
def _get_row_uid(self, row: RowDict) -> UniqueId:
|
|
1676
1669
|
return UniqueId(
|
|
1677
1670
|
row["source"],
|
|
1678
|
-
row["
|
|
1679
|
-
row["name"],
|
|
1671
|
+
row["path"],
|
|
1680
1672
|
row["size"],
|
|
1681
1673
|
row["etag"],
|
|
1682
1674
|
row["version"],
|
|
@@ -2308,16 +2300,14 @@ class Catalog:
|
|
|
2308
2300
|
if column == "du":
|
|
2309
2301
|
field_set.add("dir_type")
|
|
2310
2302
|
field_set.add("size")
|
|
2311
|
-
field_set.add("
|
|
2312
|
-
field_set.add("name")
|
|
2303
|
+
field_set.add("path")
|
|
2313
2304
|
elif column == "name":
|
|
2314
|
-
field_set.add("
|
|
2305
|
+
field_set.add("path")
|
|
2315
2306
|
elif column == "owner":
|
|
2316
2307
|
field_set.add("owner_name")
|
|
2317
2308
|
elif column == "path":
|
|
2318
2309
|
field_set.add("dir_type")
|
|
2319
|
-
field_set.add("
|
|
2320
|
-
field_set.add("name")
|
|
2310
|
+
field_set.add("path")
|
|
2321
2311
|
elif column == "size":
|
|
2322
2312
|
field_set.add("size")
|
|
2323
2313
|
elif column == "type":
|
datachain/client/azure.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import posixpath
|
|
2
1
|
from typing import Any
|
|
3
2
|
|
|
4
3
|
from adlfs import AzureBlobFileSystem
|
|
@@ -14,16 +13,10 @@ class AzureClient(Client):
|
|
|
14
13
|
PREFIX = "az://"
|
|
15
14
|
protocol = "az"
|
|
16
15
|
|
|
17
|
-
def convert_info(self, v: dict[str, Any],
|
|
16
|
+
def convert_info(self, v: dict[str, Any], path: str) -> Entry:
|
|
18
17
|
version_id = v.get("version_id")
|
|
19
|
-
name = v.get("name", "").split(DELIMITER)[-1]
|
|
20
|
-
if version_id:
|
|
21
|
-
version_suffix = f"?versionid={version_id}"
|
|
22
|
-
if name.endswith(version_suffix):
|
|
23
|
-
name = name[: -len(version_suffix)]
|
|
24
18
|
return Entry.from_file(
|
|
25
|
-
|
|
26
|
-
name=name,
|
|
19
|
+
path=path,
|
|
27
20
|
etag=v.get("etag", "").strip('"'),
|
|
28
21
|
version=version_id or "",
|
|
29
22
|
is_latest=version_id is None or bool(v.get("is_current_version")),
|
|
@@ -50,9 +43,9 @@ class AzureClient(Client):
|
|
|
50
43
|
if not self._is_valid_key(b["name"]):
|
|
51
44
|
continue
|
|
52
45
|
info = (await self.fs._details([b]))[0]
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
46
|
+
entries.append(
|
|
47
|
+
self.convert_info(info, self.rel_path(info["name"]))
|
|
48
|
+
)
|
|
56
49
|
if entries:
|
|
57
50
|
await result_queue.put(entries)
|
|
58
51
|
pbar.update(len(entries))
|
datachain/client/fsspec.py
CHANGED
|
@@ -277,7 +277,7 @@ class Client(ABC):
|
|
|
277
277
|
if info["type"] == "directory":
|
|
278
278
|
subdirs.add(subprefix)
|
|
279
279
|
else:
|
|
280
|
-
files.append(self.convert_info(info,
|
|
280
|
+
files.append(self.convert_info(info, subprefix))
|
|
281
281
|
if files:
|
|
282
282
|
await result_queue.put(files)
|
|
283
283
|
found_count = len(subdirs) + len(files)
|
|
@@ -360,12 +360,11 @@ class Client(ABC):
|
|
|
360
360
|
|
|
361
361
|
parent_uid = UniqueId(
|
|
362
362
|
parent["source"],
|
|
363
|
-
parent["
|
|
364
|
-
parent["name"],
|
|
365
|
-
parent["etag"],
|
|
363
|
+
parent["path"],
|
|
366
364
|
parent["size"],
|
|
367
|
-
parent["
|
|
368
|
-
parent["
|
|
365
|
+
parent["etag"],
|
|
366
|
+
vtype=parent["vtype"],
|
|
367
|
+
location=parent["location"],
|
|
369
368
|
)
|
|
370
369
|
f = self.open_object(parent_uid, use_cache=use_cache)
|
|
371
370
|
return FileSlice(f, offset, size, posixpath.basename(uid.path))
|
datachain/client/gcs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
|
-
import posixpath
|
|
5
4
|
from collections.abc import Iterable
|
|
6
5
|
from datetime import datetime
|
|
7
6
|
from typing import Any, Optional, cast
|
|
@@ -110,20 +109,11 @@ class GCSClient(Client):
|
|
|
110
109
|
|
|
111
110
|
def _entry_from_dict(self, d: dict[str, Any]) -> Entry:
|
|
112
111
|
info = self.fs._process_object(self.name, d)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
return self.convert_info(info, parent)
|
|
117
|
-
|
|
118
|
-
def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
|
|
119
|
-
name = v.get("name", "").split(DELIMITER)[-1]
|
|
120
|
-
if "generation" in v:
|
|
121
|
-
gen = f"#{v['generation']}"
|
|
122
|
-
if name.endswith(gen):
|
|
123
|
-
name = name[: -len(gen)]
|
|
112
|
+
return self.convert_info(info, self.rel_path(info["name"]))
|
|
113
|
+
|
|
114
|
+
def convert_info(self, v: dict[str, Any], path: str) -> Entry:
|
|
124
115
|
return Entry.from_file(
|
|
125
|
-
|
|
126
|
-
name=name,
|
|
116
|
+
path=path,
|
|
127
117
|
etag=v.get("etag", ""),
|
|
128
118
|
version=v.get("generation", ""),
|
|
129
119
|
is_latest=not v.get("timeDeleted"),
|
datachain/client/local.py
CHANGED
|
@@ -140,11 +140,9 @@ class FileClient(Client):
|
|
|
140
140
|
full_path += "/"
|
|
141
141
|
return full_path
|
|
142
142
|
|
|
143
|
-
def convert_info(self, v: dict[str, Any],
|
|
144
|
-
name = posixpath.basename(v["name"])
|
|
143
|
+
def convert_info(self, v: dict[str, Any], path: str) -> Entry:
|
|
145
144
|
return Entry.from_file(
|
|
146
|
-
|
|
147
|
-
name=name,
|
|
145
|
+
path=path,
|
|
148
146
|
etag=v["mtime"].hex(),
|
|
149
147
|
is_latest=True,
|
|
150
148
|
last_modified=datetime.fromtimestamp(v["mtime"], timezone.utc),
|
datachain/client/s3.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import posixpath
|
|
3
2
|
from typing import Any, cast
|
|
4
3
|
|
|
5
4
|
from botocore.exceptions import NoCredentialsError
|
|
@@ -112,10 +111,8 @@ class ClientS3(Client):
|
|
|
112
111
|
await self._fetch_flat(start_prefix, result_queue)
|
|
113
112
|
|
|
114
113
|
def _entry_from_boto(self, v, bucket, versions=False):
|
|
115
|
-
parent, name = posixpath.split(v["Key"])
|
|
116
114
|
return Entry.from_file(
|
|
117
|
-
|
|
118
|
-
name=name,
|
|
115
|
+
path=v["Key"],
|
|
119
116
|
etag=v.get("ETag", "").strip('"'),
|
|
120
117
|
version=ClientS3.clean_s3_version(v.get("VersionId", "")),
|
|
121
118
|
is_latest=v.get("IsLatest", True),
|
|
@@ -145,7 +142,7 @@ class ClientS3(Client):
|
|
|
145
142
|
if info["type"] == "directory":
|
|
146
143
|
subdirs.add(subprefix)
|
|
147
144
|
else:
|
|
148
|
-
files.append(self.convert_info(info,
|
|
145
|
+
files.append(self.convert_info(info, subprefix))
|
|
149
146
|
pbar.update()
|
|
150
147
|
found = True
|
|
151
148
|
if not found:
|
|
@@ -159,10 +156,9 @@ class ClientS3(Client):
|
|
|
159
156
|
def clean_s3_version(ver):
|
|
160
157
|
return ver if ver != "null" else ""
|
|
161
158
|
|
|
162
|
-
def convert_info(self, v: dict[str, Any],
|
|
159
|
+
def convert_info(self, v: dict[str, Any], path: str) -> Entry:
|
|
163
160
|
return Entry.from_file(
|
|
164
|
-
|
|
165
|
-
name=v.get("Key", "").split(DELIMITER)[-1],
|
|
161
|
+
path=path,
|
|
166
162
|
etag=v.get("ETag", "").strip('"'),
|
|
167
163
|
version=ClientS3.clean_s3_version(v.get("VersionId", "")),
|
|
168
164
|
is_latest=v.get("IsLatest", True),
|
datachain/data_storage/schema.py
CHANGED
|
@@ -80,8 +80,7 @@ class DirExpansion:
|
|
|
80
80
|
q.c.vtype,
|
|
81
81
|
(q.c.dir_type == DirType.DIR).label("is_dir"),
|
|
82
82
|
q.c.source,
|
|
83
|
-
q.c.
|
|
84
|
-
q.c.name,
|
|
83
|
+
q.c.path,
|
|
85
84
|
q.c.version,
|
|
86
85
|
q.c.location,
|
|
87
86
|
)
|
|
@@ -94,36 +93,29 @@ class DirExpansion:
|
|
|
94
93
|
q.c.vtype,
|
|
95
94
|
q.c.is_dir,
|
|
96
95
|
q.c.source,
|
|
97
|
-
q.c.
|
|
98
|
-
q.c.name,
|
|
96
|
+
q.c.path,
|
|
99
97
|
q.c.version,
|
|
100
98
|
f.max(q.c.location).label("location"),
|
|
101
99
|
)
|
|
102
100
|
.select_from(q)
|
|
103
|
-
.group_by(
|
|
104
|
-
|
|
105
|
-
)
|
|
106
|
-
.order_by(
|
|
107
|
-
q.c.source, q.c.parent, q.c.name, q.c.vtype, q.c.is_dir, q.c.version
|
|
108
|
-
)
|
|
101
|
+
.group_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
|
|
102
|
+
.order_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
|
|
109
103
|
)
|
|
110
104
|
|
|
111
105
|
@classmethod
|
|
112
106
|
def query(cls, q):
|
|
113
107
|
q = cls.base_select(q).cte(recursive=True)
|
|
114
|
-
|
|
115
|
-
parent_name = path.name(q.c.parent)
|
|
108
|
+
parent = path.parent(q.c.path)
|
|
116
109
|
q = q.union_all(
|
|
117
110
|
sa.select(
|
|
118
111
|
sa.literal(-1).label("sys__id"),
|
|
119
112
|
sa.literal("").label("vtype"),
|
|
120
113
|
true().label("is_dir"),
|
|
121
114
|
q.c.source,
|
|
122
|
-
|
|
123
|
-
parent_name.label("name"),
|
|
115
|
+
parent.label("path"),
|
|
124
116
|
sa.literal("").label("version"),
|
|
125
117
|
null().label("location"),
|
|
126
|
-
).where(
|
|
118
|
+
).where(parent != "")
|
|
127
119
|
)
|
|
128
120
|
return cls.apply_group_by(q)
|
|
129
121
|
|
|
@@ -19,6 +19,7 @@ from datachain.client import Client
|
|
|
19
19
|
from datachain.data_storage.serializer import Serializable
|
|
20
20
|
from datachain.dataset import DatasetRecord, RowDict
|
|
21
21
|
from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
|
|
22
|
+
from datachain.sql.functions import path as pathfunc
|
|
22
23
|
from datachain.sql.types import Int, SQLType
|
|
23
24
|
from datachain.storage import StorageURI
|
|
24
25
|
from datachain.utils import sql_escape_like
|
|
@@ -373,9 +374,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
373
374
|
|
|
374
375
|
else:
|
|
375
376
|
parent = self.get_node_by_path(dr, path.lstrip("/").rstrip("/*"))
|
|
376
|
-
select_query = select_query.where(
|
|
377
|
-
(dr.c.parent == parent.path) | (self.path_expr(dr) == path)
|
|
378
|
-
)
|
|
377
|
+
select_query = select_query.where(pathfunc.parent(dr.c.path) == parent.path)
|
|
379
378
|
return select_query
|
|
380
379
|
|
|
381
380
|
def rename_dataset_table(
|
|
@@ -532,8 +531,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
532
531
|
dr,
|
|
533
532
|
parent_path,
|
|
534
533
|
type="dir",
|
|
535
|
-
conds=[sa.Column("
|
|
536
|
-
order_by=["source", "
|
|
534
|
+
conds=[pathfunc.parent(sa.Column("path")) == parent_path],
|
|
535
|
+
order_by=["source", "path"],
|
|
537
536
|
)
|
|
538
537
|
return self.get_nodes(query)
|
|
539
538
|
|
|
@@ -556,7 +555,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
556
555
|
& ~self.instr(relpath, "/")
|
|
557
556
|
& (self.path_expr(de) != dirpath)
|
|
558
557
|
)
|
|
559
|
-
.order_by(de.c.source, de.c.
|
|
558
|
+
.order_by(de.c.source, de.c.path, de.c.version)
|
|
560
559
|
)
|
|
561
560
|
|
|
562
561
|
def _get_node_by_path_list(
|
|
@@ -572,8 +571,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
572
571
|
).subquery()
|
|
573
572
|
query = self.expand_query(de, dr)
|
|
574
573
|
|
|
575
|
-
q = query.where(
|
|
576
|
-
de.c.source, de.c.
|
|
574
|
+
q = query.where(de.c.path == get_path(parent, name)).order_by(
|
|
575
|
+
de.c.source, de.c.path, de.c.version
|
|
577
576
|
)
|
|
578
577
|
row = next(self.dataset_rows_select(q), None)
|
|
579
578
|
if not row:
|
|
@@ -636,8 +635,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
636
635
|
case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
|
|
637
636
|
"dir_type"
|
|
638
637
|
),
|
|
639
|
-
de.c.
|
|
640
|
-
de.c.name,
|
|
638
|
+
de.c.path,
|
|
641
639
|
with_default(dr.c.etag),
|
|
642
640
|
de.c.version,
|
|
643
641
|
with_default(dr.c.is_latest),
|
|
@@ -670,7 +668,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
670
668
|
.where(
|
|
671
669
|
dr.c.is_latest == true(),
|
|
672
670
|
dr.c.dir_type != DirType.DIR,
|
|
673
|
-
|
|
671
|
+
dr.c.path.startswith(path),
|
|
674
672
|
)
|
|
675
673
|
.exists()
|
|
676
674
|
)
|
|
@@ -678,8 +676,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
678
676
|
if not row:
|
|
679
677
|
raise FileNotFoundError(f"Unable to resolve path {path}")
|
|
680
678
|
path = path.removesuffix("/")
|
|
681
|
-
|
|
682
|
-
return Node.from_dir(parent, name)
|
|
679
|
+
return Node.from_dir(path)
|
|
683
680
|
|
|
684
681
|
def expand_path(self, dataset_rows: "DataTable", path: str) -> list[Node]:
|
|
685
682
|
"""Simulates Unix-like shell expansion"""
|
|
@@ -703,18 +700,21 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
703
700
|
de = dr.dataset_dir_expansion(
|
|
704
701
|
dr.select().where(dr.c.is_latest == true()).subquery()
|
|
705
702
|
).subquery()
|
|
706
|
-
where_cond = de.c.
|
|
703
|
+
where_cond = pathfunc.parent(de.c.path) == parent_path
|
|
707
704
|
if parent_path == "":
|
|
708
705
|
# Exclude the root dir
|
|
709
|
-
where_cond = where_cond & (de.c.
|
|
706
|
+
where_cond = where_cond & (de.c.path != "")
|
|
710
707
|
inner_query = self.expand_query(de, dr).where(where_cond).subquery()
|
|
708
|
+
|
|
709
|
+
def field_to_expr(f):
|
|
710
|
+
if f == "name":
|
|
711
|
+
return pathfunc.name(inner_query.c.path)
|
|
712
|
+
return getattr(inner_query.c, f)
|
|
713
|
+
|
|
711
714
|
return self.db.execute(
|
|
712
|
-
|
|
713
|
-
.select_from(inner_query)
|
|
714
|
-
.order_by(
|
|
715
|
+
select(*(field_to_expr(f) for f in fields)).order_by(
|
|
715
716
|
inner_query.c.source,
|
|
716
|
-
inner_query.c.
|
|
717
|
-
inner_query.c.name,
|
|
717
|
+
inner_query.c.path,
|
|
718
718
|
inner_query.c.version,
|
|
719
719
|
)
|
|
720
720
|
)
|
|
@@ -727,21 +727,20 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
727
727
|
"""
|
|
728
728
|
dr = dataset_rows
|
|
729
729
|
dirpath = f"{parent_path}/"
|
|
730
|
-
relpath = func.substr(self.path_expr(dr), len(dirpath) + 1)
|
|
731
730
|
|
|
732
731
|
def field_to_expr(f):
|
|
733
732
|
if f == "name":
|
|
734
|
-
return
|
|
733
|
+
return pathfunc.name(dr.c.path)
|
|
735
734
|
return getattr(dr.c, f)
|
|
736
735
|
|
|
737
736
|
q = (
|
|
738
737
|
select(*(field_to_expr(f) for f in fields))
|
|
739
738
|
.where(
|
|
740
739
|
self.path_expr(dr).like(f"{sql_escape_like(dirpath)}%"),
|
|
741
|
-
~self.instr(
|
|
740
|
+
~self.instr(pathfunc.name(dr.c.path), "/"),
|
|
742
741
|
dr.c.is_latest == true(),
|
|
743
742
|
)
|
|
744
|
-
.order_by(dr.c.source, dr.c.
|
|
743
|
+
.order_by(dr.c.source, dr.c.path, dr.c.version, dr.c.etag)
|
|
745
744
|
)
|
|
746
745
|
return self.db.execute(q)
|
|
747
746
|
|
|
@@ -758,7 +757,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
758
757
|
if isinstance(node, dict):
|
|
759
758
|
is_dir = node.get("is_dir", node["dir_type"] in DirTypeGroup.SUBOBJ_DIR)
|
|
760
759
|
node_size = node["size"]
|
|
761
|
-
path =
|
|
760
|
+
path = node["path"]
|
|
762
761
|
else:
|
|
763
762
|
is_dir = node.is_container
|
|
764
763
|
node_size = node.size
|
|
@@ -790,7 +789,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
790
789
|
return results[0] or 0, 0
|
|
791
790
|
|
|
792
791
|
def path_expr(self, t):
|
|
793
|
-
return
|
|
792
|
+
return t.c.path
|
|
794
793
|
|
|
795
794
|
def _find_query(
|
|
796
795
|
self,
|
|
@@ -947,11 +946,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
947
946
|
tq = target_query.alias("target_query")
|
|
948
947
|
|
|
949
948
|
source_target_join = sa.join(
|
|
950
|
-
sq,
|
|
951
|
-
tq,
|
|
952
|
-
(sq.c.source == tq.c.source)
|
|
953
|
-
& (sq.c.parent == tq.c.parent)
|
|
954
|
-
& (sq.c.name == tq.c.name),
|
|
949
|
+
sq, tq, (sq.c.source == tq.c.source) & (sq.c.path == tq.c.path)
|
|
955
950
|
)
|
|
956
951
|
|
|
957
952
|
return (
|
|
@@ -1,23 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from decimal import Decimal
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from sqlalchemy import
|
|
4
|
+
from sqlalchemy import ColumnElement
|
|
5
5
|
|
|
6
|
-
from datachain.data_storage.sqlite import Column
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
7
|
+
def sql_to_python(args_map: dict[str, ColumnElement]) -> dict[str, Any]:
|
|
8
|
+
res = {}
|
|
9
|
+
for name, sql_exp in args_map.items():
|
|
10
|
+
try:
|
|
11
|
+
type_ = sql_exp.type.python_type
|
|
12
|
+
if type_ == Decimal:
|
|
13
|
+
type_ = float
|
|
14
|
+
except NotImplementedError:
|
|
15
|
+
type_ = str
|
|
16
|
+
res[name] = type_
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
def sql_to_python(args_map: dict[str, Column]) -> dict[str, Any]:
|
|
20
|
-
return {
|
|
21
|
-
k: SQL_TO_PYTHON.get(type(v.type), str) # type: ignore[union-attr]
|
|
22
|
-
for k, v in args_map.items()
|
|
23
|
-
}
|
|
18
|
+
return res
|
datachain/lib/dc.py
CHANGED
|
@@ -20,8 +20,10 @@ import pandas as pd
|
|
|
20
20
|
import sqlalchemy
|
|
21
21
|
from pydantic import BaseModel, create_model
|
|
22
22
|
from sqlalchemy.sql.functions import GenericFunction
|
|
23
|
+
from sqlalchemy.sql.sqltypes import NullType
|
|
23
24
|
|
|
24
25
|
from datachain import DataModel
|
|
26
|
+
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
25
27
|
from datachain.lib.convert.values_to_tuples import values_to_tuples
|
|
26
28
|
from datachain.lib.data_model import DataType
|
|
27
29
|
from datachain.lib.dataset_info import DatasetInfo
|
|
@@ -47,6 +49,7 @@ from datachain.query.dataset import (
|
|
|
47
49
|
detach,
|
|
48
50
|
)
|
|
49
51
|
from datachain.query.schema import Column, DatasetRow
|
|
52
|
+
from datachain.sql.functions import path as pathfunc
|
|
50
53
|
from datachain.utils import inside_notebook
|
|
51
54
|
|
|
52
55
|
if TYPE_CHECKING:
|
|
@@ -110,6 +113,11 @@ class DatasetMergeError(DataChainParamsError): # noqa: D101
|
|
|
110
113
|
super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
|
|
111
114
|
|
|
112
115
|
|
|
116
|
+
class DataChainColumnError(DataChainParamsError): # noqa: D101
|
|
117
|
+
def __init__(self, col_name, msg): # noqa: D107
|
|
118
|
+
super().__init__(f"Error for column {col_name}: {msg}")
|
|
119
|
+
|
|
120
|
+
|
|
113
121
|
OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
|
|
114
122
|
|
|
115
123
|
|
|
@@ -195,7 +203,7 @@ class DataChain(DatasetQuery):
|
|
|
195
203
|
|
|
196
204
|
DEFAULT_FILE_RECORD: ClassVar[dict] = {
|
|
197
205
|
"source": "",
|
|
198
|
-
"
|
|
206
|
+
"path": "",
|
|
199
207
|
"vtype": "",
|
|
200
208
|
"size": 0,
|
|
201
209
|
}
|
|
@@ -225,6 +233,17 @@ class DataChain(DatasetQuery):
|
|
|
225
233
|
"""Get schema of the chain."""
|
|
226
234
|
return self._effective_signals_schema.values
|
|
227
235
|
|
|
236
|
+
def column(self, name: str) -> Column:
|
|
237
|
+
"""Returns Column instance with a type if name is found in current schema,
|
|
238
|
+
otherwise raises an exception.
|
|
239
|
+
"""
|
|
240
|
+
name_path = name.split(".")
|
|
241
|
+
for path, type_, _, _ in self.signals_schema.get_flat_tree():
|
|
242
|
+
if path == name_path:
|
|
243
|
+
return Column(name, python_to_sql(type_))
|
|
244
|
+
|
|
245
|
+
raise ValueError(f"Column with name {name} not found in the schema")
|
|
246
|
+
|
|
228
247
|
def print_schema(self) -> None:
|
|
229
248
|
"""Print schema of the chain."""
|
|
230
249
|
self._effective_signals_schema.print_tree()
|
|
@@ -829,6 +848,12 @@ class DataChain(DatasetQuery):
|
|
|
829
848
|
)
|
|
830
849
|
```
|
|
831
850
|
"""
|
|
851
|
+
for col_name, expr in kwargs.items():
|
|
852
|
+
if not isinstance(expr, Column) and isinstance(expr.type, NullType):
|
|
853
|
+
raise DataChainColumnError(
|
|
854
|
+
col_name, f"Cannot infer type with expression {expr}"
|
|
855
|
+
)
|
|
856
|
+
|
|
832
857
|
mutated = {}
|
|
833
858
|
schema = self.signals_schema
|
|
834
859
|
for name, value in kwargs.items():
|
|
@@ -1562,10 +1587,11 @@ class DataChain(DatasetQuery):
|
|
|
1562
1587
|
use_cache: bool = True,
|
|
1563
1588
|
) -> None:
|
|
1564
1589
|
"""Method that exports all files from chain to some folder."""
|
|
1565
|
-
if placement == "filename"
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1590
|
+
if placement == "filename" and (
|
|
1591
|
+
super().distinct(pathfunc.name(C(f"{signal}__path"))).count()
|
|
1592
|
+
!= self.count()
|
|
1593
|
+
):
|
|
1594
|
+
raise ValueError("Files with the same name found")
|
|
1569
1595
|
|
|
1570
1596
|
for file in self.collect(signal):
|
|
1571
1597
|
file.export(output, placement, use_cache) # type: ignore[union-attr]
|
datachain/lib/file.py
CHANGED
|
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
|
|
|
6
6
|
from contextlib import contextmanager
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from io import BytesIO
|
|
9
|
-
from pathlib import Path
|
|
9
|
+
from pathlib import Path, PurePosixPath
|
|
10
10
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union
|
|
11
11
|
from urllib.parse import unquote, urlparse
|
|
12
12
|
from urllib.request import url2pathname
|
|
@@ -111,8 +111,7 @@ class File(DataModel):
|
|
|
111
111
|
"""`DataModel` for reading binary files."""
|
|
112
112
|
|
|
113
113
|
source: str = Field(default="")
|
|
114
|
-
|
|
115
|
-
name: str
|
|
114
|
+
path: str
|
|
116
115
|
size: int = Field(default=0)
|
|
117
116
|
version: str = Field(default="")
|
|
118
117
|
etag: str = Field(default="")
|
|
@@ -123,8 +122,7 @@ class File(DataModel):
|
|
|
123
122
|
|
|
124
123
|
_datachain_column_types: ClassVar[dict[str, Any]] = {
|
|
125
124
|
"source": String,
|
|
126
|
-
"
|
|
127
|
-
"name": String,
|
|
125
|
+
"path": String,
|
|
128
126
|
"size": Int,
|
|
129
127
|
"version": String,
|
|
130
128
|
"etag": String,
|
|
@@ -136,8 +134,7 @@ class File(DataModel):
|
|
|
136
134
|
|
|
137
135
|
_unique_id_keys: ClassVar[list[str]] = [
|
|
138
136
|
"source",
|
|
139
|
-
"
|
|
140
|
-
"name",
|
|
137
|
+
"path",
|
|
141
138
|
"size",
|
|
142
139
|
"etag",
|
|
143
140
|
"version",
|
|
@@ -168,11 +165,9 @@ class File(DataModel):
|
|
|
168
165
|
def validate_location(cls, v):
|
|
169
166
|
return File._validate_dict(v)
|
|
170
167
|
|
|
171
|
-
@field_validator("
|
|
168
|
+
@field_validator("path", mode="before")
|
|
172
169
|
@classmethod
|
|
173
170
|
def validate_path(cls, path):
|
|
174
|
-
if path == "":
|
|
175
|
-
return ""
|
|
176
171
|
return Path(path).as_posix()
|
|
177
172
|
|
|
178
173
|
def model_dump_custom(self):
|
|
@@ -185,6 +180,14 @@ class File(DataModel):
|
|
|
185
180
|
self._catalog = None
|
|
186
181
|
self._caching_enabled = False
|
|
187
182
|
|
|
183
|
+
@property
|
|
184
|
+
def name(self):
|
|
185
|
+
return PurePosixPath(self.path).name
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def parent(self):
|
|
189
|
+
return str(PurePosixPath(self.path).parent)
|
|
190
|
+
|
|
188
191
|
@contextmanager
|
|
189
192
|
def open(self, mode: Literal["rb", "r"] = "rb"):
|
|
190
193
|
"""Open the file and return a file object."""
|
|
@@ -261,19 +264,19 @@ class File(DataModel):
|
|
|
261
264
|
|
|
262
265
|
def get_file_suffix(self):
|
|
263
266
|
"""Returns last part of file name with `.`."""
|
|
264
|
-
return
|
|
267
|
+
return PurePosixPath(self.path).suffix
|
|
265
268
|
|
|
266
269
|
def get_file_ext(self):
|
|
267
270
|
"""Returns last part of file name without `.`."""
|
|
268
|
-
return
|
|
271
|
+
return PurePosixPath(self.path).suffix.strip(".")
|
|
269
272
|
|
|
270
273
|
def get_file_stem(self):
|
|
271
274
|
"""Returns file name without extension."""
|
|
272
|
-
return
|
|
275
|
+
return PurePosixPath(self.path).stem
|
|
273
276
|
|
|
274
277
|
def get_full_name(self):
|
|
275
278
|
"""Returns name with parent directories."""
|
|
276
|
-
return
|
|
279
|
+
return self.path
|
|
277
280
|
|
|
278
281
|
def get_uri(self):
|
|
279
282
|
"""Returns file URI."""
|
|
@@ -355,8 +358,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
|
|
|
355
358
|
|
|
356
359
|
def get_file_type(
|
|
357
360
|
source: str,
|
|
358
|
-
|
|
359
|
-
name: str,
|
|
361
|
+
path: str,
|
|
360
362
|
size: int,
|
|
361
363
|
version: str,
|
|
362
364
|
etag: str,
|
|
@@ -367,8 +369,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
|
|
|
367
369
|
) -> file: # type: ignore[valid-type]
|
|
368
370
|
return file(
|
|
369
371
|
source=source,
|
|
370
|
-
|
|
371
|
-
name=name,
|
|
372
|
+
path=path,
|
|
372
373
|
size=size,
|
|
373
374
|
version=version,
|
|
374
375
|
etag=etag,
|
datachain/lib/webdataset.py
CHANGED
|
@@ -119,7 +119,7 @@ class Builder:
|
|
|
119
119
|
return self._tar.extractfile(item).read().decode(self._encoding)
|
|
120
120
|
|
|
121
121
|
def add(self, file: tarfile.TarInfo):
|
|
122
|
-
fstream = File(
|
|
122
|
+
fstream = File(path=file.name)
|
|
123
123
|
ext = fstream.get_file_ext()
|
|
124
124
|
stem = fstream.get_file_stem()
|
|
125
125
|
|
|
@@ -176,9 +176,8 @@ class Builder:
|
|
|
176
176
|
)
|
|
177
177
|
etag = hashlib.md5(etag_string.encode(), usedforsecurity=False).hexdigest()
|
|
178
178
|
return File(
|
|
179
|
-
name=core_file.name,
|
|
180
179
|
source=self._tar_stream.source,
|
|
181
|
-
|
|
180
|
+
path=f"{new_parent}/{core_file.name}",
|
|
182
181
|
version=self._tar_stream.version,
|
|
183
182
|
size=core_file.size,
|
|
184
183
|
etag=etag,
|
datachain/listing.py
CHANGED
|
@@ -5,11 +5,12 @@ from itertools import zip_longest
|
|
|
5
5
|
from typing import TYPE_CHECKING, Optional
|
|
6
6
|
|
|
7
7
|
from fsspec.asyn import get_loop, sync
|
|
8
|
-
from sqlalchemy import Column
|
|
8
|
+
from sqlalchemy import Column
|
|
9
9
|
from sqlalchemy.sql import func
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
12
12
|
from datachain.node import DirType, Entry, Node, NodeWithPath
|
|
13
|
+
from datachain.sql.functions import path as pathfunc
|
|
13
14
|
from datachain.utils import suffix_to_number
|
|
14
15
|
|
|
15
16
|
if TYPE_CHECKING:
|
|
@@ -129,7 +130,7 @@ class Listing:
|
|
|
129
130
|
dir_path = []
|
|
130
131
|
if not copy_dir_contents:
|
|
131
132
|
dir_path.append(node.name)
|
|
132
|
-
subtree_nodes = src.find(sort=["
|
|
133
|
+
subtree_nodes = src.find(sort=["path"])
|
|
133
134
|
all_nodes.extend(
|
|
134
135
|
NodeWithPath(n.n, path=dir_path + n.path) for n in subtree_nodes
|
|
135
136
|
)
|
|
@@ -148,8 +149,7 @@ class Listing:
|
|
|
148
149
|
elif from_dataset:
|
|
149
150
|
node_path = [
|
|
150
151
|
src.listing.client.name,
|
|
151
|
-
node.
|
|
152
|
-
node.name,
|
|
152
|
+
node.path,
|
|
153
153
|
]
|
|
154
154
|
else:
|
|
155
155
|
node_path = [node.name]
|
|
@@ -201,25 +201,19 @@ class Listing:
|
|
|
201
201
|
dr = self.dataset_rows
|
|
202
202
|
conds = []
|
|
203
203
|
if names:
|
|
204
|
-
|
|
205
|
-
|
|
204
|
+
for name in names:
|
|
205
|
+
conds.append(pathfunc.name(Column("path")).op("GLOB")(name))
|
|
206
206
|
if inames:
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
for iname in inames:
|
|
208
|
+
conds.append(
|
|
209
|
+
func.lower(pathfunc.name(Column("path"))).op("GLOB")(iname.lower())
|
|
210
|
+
)
|
|
209
211
|
if paths:
|
|
210
|
-
|
|
211
|
-
(Column("
|
|
212
|
-
else_=Column("parent") + "/" + Column("name"),
|
|
213
|
-
)
|
|
214
|
-
f = node_path.op("GLOB")
|
|
215
|
-
conds.extend(f(path) for path in paths)
|
|
212
|
+
for path in paths:
|
|
213
|
+
conds.append(Column("path").op("GLOB")(path))
|
|
216
214
|
if ipaths:
|
|
217
|
-
|
|
218
|
-
(Column("
|
|
219
|
-
else_=Column("parent") + "/" + Column("name"),
|
|
220
|
-
)
|
|
221
|
-
f = func.lower(node_path).op("GLOB")
|
|
222
|
-
conds.extend(f(ipath.lower()) for ipath in ipaths)
|
|
215
|
+
for ipath in ipaths:
|
|
216
|
+
conds.append(func.lower(Column("path")).op("GLOB")(ipath.lower()))
|
|
223
217
|
|
|
224
218
|
if size is not None:
|
|
225
219
|
size_limit = suffix_to_number(size)
|
datachain/node.py
CHANGED
|
@@ -50,8 +50,7 @@ class Node:
|
|
|
50
50
|
sys__rand: int = -1
|
|
51
51
|
vtype: str = ""
|
|
52
52
|
dir_type: Optional[int] = None
|
|
53
|
-
|
|
54
|
-
name: str = ""
|
|
53
|
+
path: str = ""
|
|
55
54
|
etag: str = ""
|
|
56
55
|
version: Optional[str] = None
|
|
57
56
|
is_latest: bool = True
|
|
@@ -62,10 +61,6 @@ class Node:
|
|
|
62
61
|
location: Optional[str] = None
|
|
63
62
|
source: StorageURI = StorageURI("")
|
|
64
63
|
|
|
65
|
-
@property
|
|
66
|
-
def path(self) -> str:
|
|
67
|
-
return f"{self.parent}/{self.name}" if self.parent else self.name
|
|
68
|
-
|
|
69
64
|
@property
|
|
70
65
|
def is_dir(self) -> bool:
|
|
71
66
|
return self.dir_type == DirType.DIR
|
|
@@ -107,13 +102,12 @@ class Node:
|
|
|
107
102
|
return self.path + "/"
|
|
108
103
|
return self.path
|
|
109
104
|
|
|
110
|
-
def as_uid(self, storage: Optional[StorageURI] = None):
|
|
105
|
+
def as_uid(self, storage: Optional[StorageURI] = None) -> UniqueId:
|
|
111
106
|
if storage is None:
|
|
112
107
|
storage = self.source
|
|
113
108
|
return UniqueId(
|
|
114
109
|
storage=storage,
|
|
115
|
-
|
|
116
|
-
name=self.name,
|
|
110
|
+
path=self.path,
|
|
117
111
|
size=self.size,
|
|
118
112
|
version=self.version or "",
|
|
119
113
|
etag=self.etag,
|
|
@@ -129,20 +123,30 @@ class Node:
|
|
|
129
123
|
return cls(**kw)
|
|
130
124
|
|
|
131
125
|
@classmethod
|
|
132
|
-
def from_dir(cls,
|
|
133
|
-
return cls(sys__id=-1, dir_type=DirType.DIR,
|
|
126
|
+
def from_dir(cls, path, **kwargs) -> "Node":
|
|
127
|
+
return cls(sys__id=-1, dir_type=DirType.DIR, path=path, **kwargs)
|
|
134
128
|
|
|
135
129
|
@classmethod
|
|
136
130
|
def root(cls) -> "Node":
|
|
137
131
|
return cls(sys__id=-1, dir_type=DirType.DIR)
|
|
138
132
|
|
|
133
|
+
@property
|
|
134
|
+
def name(self):
|
|
135
|
+
return self.path.rsplit("/", 1)[-1]
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def parent(self):
|
|
139
|
+
split = self.path.rsplit("/", 1)
|
|
140
|
+
if len(split) <= 1:
|
|
141
|
+
return ""
|
|
142
|
+
return split[0]
|
|
143
|
+
|
|
139
144
|
|
|
140
145
|
@attrs.define
|
|
141
146
|
class Entry:
|
|
142
147
|
vtype: str = ""
|
|
143
148
|
dir_type: Optional[int] = None
|
|
144
|
-
|
|
145
|
-
name: str = ""
|
|
149
|
+
path: str = ""
|
|
146
150
|
etag: str = ""
|
|
147
151
|
version: str = ""
|
|
148
152
|
is_latest: bool = True
|
|
@@ -157,27 +161,34 @@ class Entry:
|
|
|
157
161
|
return self.dir_type == DirType.DIR
|
|
158
162
|
|
|
159
163
|
@classmethod
|
|
160
|
-
def from_dir(cls,
|
|
161
|
-
return cls(dir_type=DirType.DIR,
|
|
164
|
+
def from_dir(cls, path: str, **kwargs) -> "Entry":
|
|
165
|
+
return cls(dir_type=DirType.DIR, path=path, **kwargs)
|
|
162
166
|
|
|
163
167
|
@classmethod
|
|
164
|
-
def from_file(cls,
|
|
165
|
-
return cls(dir_type=DirType.FILE,
|
|
168
|
+
def from_file(cls, path: str, **kwargs) -> "Entry":
|
|
169
|
+
return cls(dir_type=DirType.FILE, path=path, **kwargs)
|
|
166
170
|
|
|
167
171
|
@classmethod
|
|
168
172
|
def root(cls):
|
|
169
173
|
return cls(dir_type=DirType.DIR)
|
|
170
174
|
|
|
171
|
-
@property
|
|
172
|
-
def path(self) -> str:
|
|
173
|
-
return f"{self.parent}/{self.name}" if self.parent else self.name
|
|
174
|
-
|
|
175
175
|
@property
|
|
176
176
|
def full_path(self) -> str:
|
|
177
177
|
if self.is_dir and self.path:
|
|
178
178
|
return self.path + "/"
|
|
179
179
|
return self.path
|
|
180
180
|
|
|
181
|
+
@property
|
|
182
|
+
def name(self):
|
|
183
|
+
return self.path.rsplit("/", 1)[-1]
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def parent(self):
|
|
187
|
+
split = self.path.rsplit("/", 1)
|
|
188
|
+
if len(split) <= 1:
|
|
189
|
+
return ""
|
|
190
|
+
return split[0]
|
|
191
|
+
|
|
181
192
|
|
|
182
193
|
def get_path(parent: str, name: str):
|
|
183
194
|
return f"{parent}/{name}" if parent else name
|
datachain/query/builtins.py
CHANGED
|
@@ -20,8 +20,7 @@ def load_tar(raw):
|
|
|
20
20
|
@udf(
|
|
21
21
|
(
|
|
22
22
|
C.source,
|
|
23
|
-
C.
|
|
24
|
-
C.parent,
|
|
23
|
+
C.path,
|
|
25
24
|
C.size,
|
|
26
25
|
C.vtype,
|
|
27
26
|
C.dir_type,
|
|
@@ -37,8 +36,7 @@ def load_tar(raw):
|
|
|
37
36
|
)
|
|
38
37
|
def index_tar(
|
|
39
38
|
source,
|
|
40
|
-
|
|
41
|
-
parent,
|
|
39
|
+
parent_path,
|
|
42
40
|
size,
|
|
43
41
|
vtype,
|
|
44
42
|
dir_type,
|
|
@@ -52,9 +50,8 @@ def index_tar(
|
|
|
52
50
|
):
|
|
53
51
|
# generate original tar files as well, along with subobjects
|
|
54
52
|
yield DatasetRow.create(
|
|
55
|
-
name,
|
|
56
53
|
source=source,
|
|
57
|
-
|
|
54
|
+
path=parent_path,
|
|
58
55
|
size=size,
|
|
59
56
|
vtype=vtype,
|
|
60
57
|
dir_type=dir_type,
|
|
@@ -66,15 +63,12 @@ def index_tar(
|
|
|
66
63
|
etag=etag,
|
|
67
64
|
)
|
|
68
65
|
|
|
69
|
-
parent_path = name if not parent else f"{parent}/{name}"
|
|
70
66
|
for info in tar_entries:
|
|
71
67
|
if info.isfile():
|
|
72
68
|
full_path = f"{parent_path}/{info.name}"
|
|
73
|
-
parent_dir, subobject_name = full_path.rsplit("/", 1)
|
|
74
69
|
yield DatasetRow.create(
|
|
75
|
-
subobject_name,
|
|
76
70
|
source=source,
|
|
77
|
-
|
|
71
|
+
path=full_path,
|
|
78
72
|
size=info.size,
|
|
79
73
|
vtype="tar",
|
|
80
74
|
location={
|
|
@@ -83,8 +77,7 @@ def index_tar(
|
|
|
83
77
|
"size": info.size,
|
|
84
78
|
"parent": {
|
|
85
79
|
"source": source,
|
|
86
|
-
"
|
|
87
|
-
"name": name,
|
|
80
|
+
"path": parent_path,
|
|
88
81
|
"version": version,
|
|
89
82
|
"size": size,
|
|
90
83
|
"etag": etag,
|
datachain/query/dataset.py
CHANGED
|
@@ -307,7 +307,7 @@ class Subtract(DatasetDiffOperation):
|
|
|
307
307
|
class Changed(DatasetDiffOperation):
|
|
308
308
|
"""
|
|
309
309
|
Calculates rows that are changed in a source query compared to target query
|
|
310
|
-
Changed means it has same source +
|
|
310
|
+
Changed means it has same source + path but different last_modified
|
|
311
311
|
Example:
|
|
312
312
|
>>> ds = DatasetQuery(name="dogs_cats") # some older dataset with embeddings
|
|
313
313
|
>>> ds_updated = (
|
|
@@ -1526,7 +1526,7 @@ class DatasetQuery:
|
|
|
1526
1526
|
|
|
1527
1527
|
@detach
|
|
1528
1528
|
def subtract(self, dq: "DatasetQuery") -> "Self":
|
|
1529
|
-
return self._subtract(dq, on=["source", "
|
|
1529
|
+
return self._subtract(dq, on=["source", "path"])
|
|
1530
1530
|
|
|
1531
1531
|
@detach
|
|
1532
1532
|
def _subtract(self, dq: "DatasetQuery", on: Sequence[str]) -> "Self":
|
datachain/query/schema.py
CHANGED
|
@@ -215,8 +215,7 @@ def normalize_param(param: UDFParamSpec) -> UDFParameter:
|
|
|
215
215
|
class DatasetRow:
|
|
216
216
|
schema: ClassVar[dict[str, type[SQLType]]] = {
|
|
217
217
|
"source": String,
|
|
218
|
-
"
|
|
219
|
-
"name": String,
|
|
218
|
+
"path": String,
|
|
220
219
|
"size": Int64,
|
|
221
220
|
"location": JSON,
|
|
222
221
|
"vtype": String,
|
|
@@ -231,9 +230,8 @@ class DatasetRow:
|
|
|
231
230
|
|
|
232
231
|
@staticmethod
|
|
233
232
|
def create(
|
|
234
|
-
|
|
233
|
+
path: str,
|
|
235
234
|
source: str = "",
|
|
236
|
-
parent: str = "",
|
|
237
235
|
size: int = 0,
|
|
238
236
|
location: Optional[dict[str, Any]] = None,
|
|
239
237
|
vtype: str = "",
|
|
@@ -245,7 +243,6 @@ class DatasetRow:
|
|
|
245
243
|
version: str = "",
|
|
246
244
|
etag: str = "",
|
|
247
245
|
) -> tuple[
|
|
248
|
-
str,
|
|
249
246
|
str,
|
|
250
247
|
str,
|
|
251
248
|
int,
|
|
@@ -267,8 +264,7 @@ class DatasetRow:
|
|
|
267
264
|
|
|
268
265
|
return ( # type: ignore [return-value]
|
|
269
266
|
source,
|
|
270
|
-
|
|
271
|
-
name,
|
|
267
|
+
path,
|
|
272
268
|
size,
|
|
273
269
|
location,
|
|
274
270
|
vtype,
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from sqlalchemy.sql.expression import func
|
|
2
2
|
|
|
3
|
-
from . import path, string
|
|
3
|
+
from . import array, path, string
|
|
4
|
+
from .array import avg
|
|
4
5
|
from .conditional import greatest, least
|
|
5
6
|
from .random import rand
|
|
6
7
|
|
|
7
8
|
count = func.count
|
|
8
9
|
sum = func.sum
|
|
9
|
-
avg = func.avg
|
|
10
10
|
min = func.min
|
|
11
11
|
max = func.max
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
|
+
"array",
|
|
14
15
|
"avg",
|
|
15
16
|
"count",
|
|
16
17
|
"func",
|
datachain/sql/functions/array.py
CHANGED
|
@@ -44,7 +44,15 @@ class sip_hash_64(GenericFunction): # noqa: N801
|
|
|
44
44
|
inherit_cache = True
|
|
45
45
|
|
|
46
46
|
|
|
47
|
+
class avg(GenericFunction): # noqa: N801
|
|
48
|
+
type = Float()
|
|
49
|
+
package = "array"
|
|
50
|
+
name = "avg"
|
|
51
|
+
inherit_cache = True
|
|
52
|
+
|
|
53
|
+
|
|
47
54
|
compiler_not_implemented(cosine_distance)
|
|
48
55
|
compiler_not_implemented(euclidean_distance)
|
|
49
56
|
compiler_not_implemented(length)
|
|
50
57
|
compiler_not_implemented(sip_hash_64)
|
|
58
|
+
compiler_not_implemented(avg)
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -78,6 +78,7 @@ def setup():
|
|
|
78
78
|
compiles(conditional.least, "sqlite")(compile_least)
|
|
79
79
|
compiles(Values, "sqlite")(compile_values)
|
|
80
80
|
compiles(random.rand, "sqlite")(compile_rand)
|
|
81
|
+
compiles(array.avg, "sqlite")(compile_avg)
|
|
81
82
|
|
|
82
83
|
if load_usearch_extension(sqlite3.connect(":memory:")):
|
|
83
84
|
compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
|
|
@@ -349,6 +350,10 @@ def compile_rand(element, compiler, **kwargs):
|
|
|
349
350
|
return compiler.process(func.random(), **kwargs)
|
|
350
351
|
|
|
351
352
|
|
|
353
|
+
def compile_avg(element, compiler, **kwargs):
|
|
354
|
+
return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
|
|
355
|
+
|
|
356
|
+
|
|
352
357
|
def load_usearch_extension(conn) -> bool:
|
|
353
358
|
try:
|
|
354
359
|
# usearch is part of the vector optional dependencies
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
|
|
4
|
-
datachain/cache.py,sha256=
|
|
4
|
+
datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
|
|
5
5
|
datachain/cli.py,sha256=DbmI1sXs7-KCQz6RdLE_JAp3XO3yrTSRJ71LdUzx-XE,33099
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
8
8
|
datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
|
|
9
9
|
datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
|
|
10
10
|
datachain/job.py,sha256=bk25bIqClhgRPzlXAhxpTtDeewibQe5l3S8Cf7db0gM,1229
|
|
11
|
-
datachain/listing.py,sha256=
|
|
12
|
-
datachain/node.py,sha256=
|
|
11
|
+
datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
|
|
12
|
+
datachain/node.py,sha256=frxZWoEvqUvk9pyXmVaeiNCs3W-xjC_sENmUD11V06Q,6006
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
|
|
15
15
|
datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
|
|
@@ -17,33 +17,33 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
17
17
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
18
|
datachain/utils.py,sha256=kgH5NPj47eC_KrFTd6ZS206lKVhnJVFt5XsqkK6ppTc,12483
|
|
19
19
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=BJ8ZP9mleUbN5Y4CoYJ94R_tnnsA9sHdZq2RBGwVN5Y,80291
|
|
21
21
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
22
|
datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
|
|
23
23
|
datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
25
|
-
datachain/client/azure.py,sha256=
|
|
25
|
+
datachain/client/azure.py,sha256=3RfDTAI_TszDy9WazHQd3bI3sS2wDFrNXfNqCDewZgE,2214
|
|
26
26
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
27
|
-
datachain/client/fsspec.py,sha256=
|
|
28
|
-
datachain/client/gcs.py,sha256=
|
|
29
|
-
datachain/client/local.py,sha256=
|
|
30
|
-
datachain/client/s3.py,sha256=
|
|
27
|
+
datachain/client/fsspec.py,sha256=VrssoNenXsFxznr-Xx1haZPlXU-dr-WHdxmdbgFI_UA,13378
|
|
28
|
+
datachain/client/gcs.py,sha256=Mt77W_l8_fK61gLm4mmxNmENuOM0ETwxdiFp4S8d-_w,4105
|
|
29
|
+
datachain/client/local.py,sha256=yhC-pMKdprJ-rMGwPpBmPkdkG5riIIKkVSe6kNpyCok,5076
|
|
30
|
+
datachain/client/s3.py,sha256=GfRZZzNPQPRsYjoef8bbsLbanJPUlCbyGTTK8ojzp8A,6136
|
|
31
31
|
datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
|
|
32
32
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
33
33
|
datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
35
|
datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
|
|
36
|
-
datachain/data_storage/schema.py,sha256=
|
|
36
|
+
datachain/data_storage/schema.py,sha256=Idi-29fckvZozzvkyz3nTR2FOIajPlSuPdIEO7SMvXM,7863
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=0r6L_a2hdGRoR_gl06v1qWhEFOS_Q31aldHyk07Yx-M,26857
|
|
39
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/warehouse.py,sha256=eEZvzYwpqwzzLXqHWjB6l4tRsIHifIr8VWI5STm53LE,33310
|
|
40
40
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
datachain/lib/arrow.py,sha256=R8wDUDEa-5hYjI3HW9cqvOYYJpeeah5lbhFIL3gkmcE,4915
|
|
42
42
|
datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
|
|
43
43
|
datachain/lib/data_model.py,sha256=qfTtQNncS5pt9SvXdMEa5kClniaT6XBGBfO7onEz2TI,1632
|
|
44
44
|
datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
|
|
45
|
-
datachain/lib/dc.py,sha256=
|
|
46
|
-
datachain/lib/file.py,sha256=
|
|
45
|
+
datachain/lib/dc.py,sha256=bU45N7vBlxSyS6bpe0ShQ1c0DpXKFVfNcFcvbBrE1Ag,58011
|
|
46
|
+
datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
|
|
47
47
|
datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
|
|
48
48
|
datachain/lib/meta_formats.py,sha256=jlSYWRUeDMjun_YCsQ2JxyaDJpEpokzHDPmKUAoCXnU,7034
|
|
49
49
|
datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
|
|
@@ -55,22 +55,22 @@ datachain/lib/udf.py,sha256=IjuDt2B8E3xEHhcJnaK_ZhmivdrOYPXz5uf7ylpktws,11815
|
|
|
55
55
|
datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
|
|
56
56
|
datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
|
|
57
57
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
datachain/lib/webdataset.py,sha256=
|
|
58
|
+
datachain/lib/webdataset.py,sha256=SsjCKLSKEkHRRfeTHQhjoGqNPqIWw_SCWQcUwgUWWP0,8282
|
|
59
59
|
datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
|
|
60
60
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
datachain/lib/convert/flatten.py,sha256=YMoC00BqEy3zSpvCp6Q0DfxihuPmgjUJj1g2cesWGPs,1790
|
|
62
62
|
datachain/lib/convert/python_to_sql.py,sha256=4gplGlr_Kg-Z40OpJUzJiarDWj7pwbUOk-dPOYYCJ9Q,2629
|
|
63
|
-
datachain/lib/convert/sql_to_python.py,sha256=
|
|
63
|
+
datachain/lib/convert/sql_to_python.py,sha256=lGnKzSF_tz9Y_5SSKkrIU95QEjpcDzvOxIRkEKTQag0,443
|
|
64
64
|
datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
|
|
65
65
|
datachain/lib/convert/values_to_tuples.py,sha256=aVoHWMOUGLAiS6_BBwKJqVIne91VffOW6-dWyNE7oHg,3715
|
|
66
66
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
67
67
|
datachain/query/batch.py,sha256=j-_ZcuQra2Ro3Wj4crtqQCg-7xuv-p84hr4QHdvT7as,3479
|
|
68
|
-
datachain/query/builtins.py,sha256=
|
|
69
|
-
datachain/query/dataset.py,sha256
|
|
68
|
+
datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
|
|
69
|
+
datachain/query/dataset.py,sha256=nfRRz6mkUz0tcD084rx-ps4PUWnZr5JQlIlRUF-PpSc,59919
|
|
70
70
|
datachain/query/dispatch.py,sha256=oGX9ZuoKWPB_EyqAZD_eULcO3OejY44_keSmFS6SHT0,13315
|
|
71
71
|
datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
|
|
72
72
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
73
|
-
datachain/query/schema.py,sha256=
|
|
73
|
+
datachain/query/schema.py,sha256=O3mTM5DRjvRAJCI7O9mR8wOdFJbgI1jIjvtfl5YvjI4,7755
|
|
74
74
|
datachain/query/session.py,sha256=qTzkXgwMJdJhal3rVt3hdv3x1EXT1IHuXcwkC-Ex0As,4111
|
|
75
75
|
datachain/query/udf.py,sha256=c0IOTkcedpOQEmX-Idlrrl1__1IecNXL0N9oUO9Dtkg,7755
|
|
76
76
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -81,20 +81,20 @@ datachain/sql/types.py,sha256=SShudhdIpdfTKDxWDDqOajYRkTCkIgQbilA94g4i-4E,10389
|
|
|
81
81
|
datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
|
|
82
82
|
datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
|
|
83
83
|
datachain/sql/default/base.py,sha256=h44005q3qtMc9cjWmRufWwcBr5CfK_dnvG4IrcSQs_8,536
|
|
84
|
-
datachain/sql/functions/__init__.py,sha256=
|
|
85
|
-
datachain/sql/functions/array.py,sha256=
|
|
84
|
+
datachain/sql/functions/__init__.py,sha256=Ioyy7nSetrTLVnHGcGcmZU99HxUFcx-5PFbrh2dPNH0,396
|
|
85
|
+
datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2KFY,1297
|
|
86
86
|
datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
|
|
87
87
|
datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
|
|
88
88
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
89
89
|
datachain/sql/functions/string.py,sha256=hIrF1fTvlPamDtm8UMnWDcnGfbbjCsHxZXS30U2Rzxo,651
|
|
90
90
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
91
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
91
|
+
datachain/sql/sqlite/base.py,sha256=LBYmXqXsVF30fbcnR55evCZHbPDCzMdGk_ogPLps63s,12236
|
|
92
92
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
93
93
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
94
94
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
95
|
-
datachain-0.
|
|
96
|
-
datachain-0.
|
|
97
|
-
datachain-0.
|
|
98
|
-
datachain-0.
|
|
99
|
-
datachain-0.
|
|
100
|
-
datachain-0.
|
|
95
|
+
datachain-0.3.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
96
|
+
datachain-0.3.0.dist-info/METADATA,sha256=x0jqtxoQE9ynjAAKFeyrz0rvyuv_E2e0D6UuhU3Yu_I,17268
|
|
97
|
+
datachain-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
98
|
+
datachain-0.3.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
99
|
+
datachain-0.3.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
100
|
+
datachain-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|