datachain 0.3.12__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cache.py +0 -1
- datachain/catalog/catalog.py +1 -10
- datachain/cli.py +4 -6
- datachain/client/fsspec.py +0 -1
- datachain/client/s3.py +0 -4
- datachain/data_storage/schema.py +4 -8
- datachain/data_storage/warehouse.py +6 -17
- datachain/lib/dc.py +0 -1
- datachain/lib/file.py +0 -3
- datachain/listing.py +2 -2
- datachain/node.py +4 -26
- datachain/query/builtins.py +0 -14
- datachain/query/schema.py +1 -16
- datachain/utils.py +0 -3
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/METADATA +1 -1
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/RECORD +20 -20
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/LICENSE +0 -0
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/WHEEL +0 -0
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/entry_points.txt +0 -0
- {datachain-0.3.12.dist-info → datachain-0.3.13.dist-info}/top_level.txt +0 -0
datachain/cache.py
CHANGED
datachain/catalog/catalog.py
CHANGED
|
@@ -62,7 +62,7 @@ from datachain.listing import Listing
|
|
|
62
62
|
from datachain.node import DirType, Node, NodeWithPath
|
|
63
63
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
64
64
|
from datachain.remote.studio import StudioClient
|
|
65
|
-
from datachain.sql.types import JSON, Boolean, DateTime,
|
|
65
|
+
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
66
66
|
from datachain.storage import Storage, StorageStatus, StorageURI
|
|
67
67
|
from datachain.utils import (
|
|
68
68
|
DataChainDir,
|
|
@@ -513,8 +513,6 @@ def find_column_to_str( # noqa: PLR0911
|
|
|
513
513
|
)
|
|
514
514
|
if column == "name":
|
|
515
515
|
return posixpath.basename(row[field_lookup["path"]]) or ""
|
|
516
|
-
if column == "owner":
|
|
517
|
-
return row[field_lookup["owner_name"]] or ""
|
|
518
516
|
if column == "path":
|
|
519
517
|
is_dir = row[field_lookup["dir_type"]] == DirType.DIR
|
|
520
518
|
path = row[field_lookup["path"]]
|
|
@@ -666,16 +664,12 @@ class Catalog:
|
|
|
666
664
|
source_metastore = self.metastore.clone(client.uri)
|
|
667
665
|
|
|
668
666
|
columns = [
|
|
669
|
-
Column("vtype", String),
|
|
670
|
-
Column("dir_type", Int),
|
|
671
667
|
Column("path", String),
|
|
672
668
|
Column("etag", String),
|
|
673
669
|
Column("version", String),
|
|
674
670
|
Column("is_latest", Boolean),
|
|
675
671
|
Column("last_modified", DateTime(timezone=True)),
|
|
676
672
|
Column("size", Int64),
|
|
677
|
-
Column("owner_name", String),
|
|
678
|
-
Column("owner_id", String),
|
|
679
673
|
Column("location", JSON),
|
|
680
674
|
Column("source", String),
|
|
681
675
|
]
|
|
@@ -1516,7 +1510,6 @@ class Catalog:
|
|
|
1516
1510
|
row["etag"],
|
|
1517
1511
|
row["version"],
|
|
1518
1512
|
row["is_latest"],
|
|
1519
|
-
row["vtype"],
|
|
1520
1513
|
row["location"],
|
|
1521
1514
|
row["last_modified"],
|
|
1522
1515
|
)
|
|
@@ -1987,8 +1980,6 @@ class Catalog:
|
|
|
1987
1980
|
field_set.add("path")
|
|
1988
1981
|
elif column == "name":
|
|
1989
1982
|
field_set.add("path")
|
|
1990
|
-
elif column == "owner":
|
|
1991
|
-
field_set.add("owner_name")
|
|
1992
1983
|
elif column == "path":
|
|
1993
1984
|
field_set.add("dir_type")
|
|
1994
1985
|
field_set.add("path")
|
datachain/cli.py
CHANGED
|
@@ -24,7 +24,7 @@ logger = logging.getLogger("datachain")
|
|
|
24
24
|
|
|
25
25
|
TTL_HUMAN = "4h"
|
|
26
26
|
TTL_INT = 4 * 60 * 60
|
|
27
|
-
FIND_COLUMNS = ["du", "name", "
|
|
27
|
+
FIND_COLUMNS = ["du", "name", "path", "size", "type"]
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
|
|
@@ -579,9 +579,8 @@ def _node_data_to_ls_values(row, long_format=False):
|
|
|
579
579
|
value = name + ending
|
|
580
580
|
if long_format:
|
|
581
581
|
last_modified = row[2]
|
|
582
|
-
owner_name = row[3]
|
|
583
582
|
timestamp = last_modified if not is_dir else None
|
|
584
|
-
return long_line_str(value, timestamp
|
|
583
|
+
return long_line_str(value, timestamp)
|
|
585
584
|
return value
|
|
586
585
|
|
|
587
586
|
|
|
@@ -599,7 +598,7 @@ def _ls_urls_flat(
|
|
|
599
598
|
if client_cls.is_root_url(source):
|
|
600
599
|
buckets = client_cls.ls_buckets(**catalog.client_config)
|
|
601
600
|
if long:
|
|
602
|
-
values = (long_line_str(b.name, b.created
|
|
601
|
+
values = (long_line_str(b.name, b.created) for b in buckets)
|
|
603
602
|
else:
|
|
604
603
|
values = (b.name for b in buckets)
|
|
605
604
|
yield source, values
|
|
@@ -607,7 +606,7 @@ def _ls_urls_flat(
|
|
|
607
606
|
found = False
|
|
608
607
|
fields = ["name", "dir_type"]
|
|
609
608
|
if long:
|
|
610
|
-
fields.
|
|
609
|
+
fields.append("last_modified")
|
|
611
610
|
for data_source, results in catalog.ls([source], fields=fields, **kwargs):
|
|
612
611
|
values = (_node_data_to_ls_values(r, long) for r in results)
|
|
613
612
|
found = True
|
|
@@ -683,7 +682,6 @@ def ls_remote(
|
|
|
683
682
|
entry = long_line_str(
|
|
684
683
|
row["name"] + ("/" if row["dir_type"] else ""),
|
|
685
684
|
row["last_modified"],
|
|
686
|
-
row["owner_name"],
|
|
687
685
|
)
|
|
688
686
|
print(format_ls_entry(entry))
|
|
689
687
|
else:
|
datachain/client/fsspec.py
CHANGED
datachain/client/s3.py
CHANGED
|
@@ -119,8 +119,6 @@ class ClientS3(Client):
|
|
|
119
119
|
is_latest=v.get("IsLatest", True),
|
|
120
120
|
last_modified=v.get("LastModified", ""),
|
|
121
121
|
size=v["Size"],
|
|
122
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
123
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
124
122
|
)
|
|
125
123
|
|
|
126
124
|
async def _fetch_dir(
|
|
@@ -165,8 +163,6 @@ class ClientS3(Client):
|
|
|
165
163
|
is_latest=v.get("IsLatest", True),
|
|
166
164
|
last_modified=v.get("LastModified", ""),
|
|
167
165
|
size=v["size"],
|
|
168
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
169
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
170
166
|
)
|
|
171
167
|
|
|
172
168
|
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
datachain/data_storage/schema.py
CHANGED
|
@@ -10,9 +10,8 @@ from typing import (
|
|
|
10
10
|
|
|
11
11
|
import sqlalchemy as sa
|
|
12
12
|
from sqlalchemy.sql import func as f
|
|
13
|
-
from sqlalchemy.sql.expression import null, true
|
|
13
|
+
from sqlalchemy.sql.expression import false, null, true
|
|
14
14
|
|
|
15
|
-
from datachain.node import DirType
|
|
16
15
|
from datachain.sql.functions import path
|
|
17
16
|
from datachain.sql.types import Int, SQLType, UInt64
|
|
18
17
|
|
|
@@ -81,8 +80,7 @@ class DirExpansion:
|
|
|
81
80
|
def base_select(q):
|
|
82
81
|
return sa.select(
|
|
83
82
|
q.c.sys__id,
|
|
84
|
-
|
|
85
|
-
(q.c.dir_type == DirType.DIR).label("is_dir"),
|
|
83
|
+
false().label("is_dir"),
|
|
86
84
|
q.c.source,
|
|
87
85
|
q.c.path,
|
|
88
86
|
q.c.version,
|
|
@@ -94,7 +92,6 @@ class DirExpansion:
|
|
|
94
92
|
return (
|
|
95
93
|
sa.select(
|
|
96
94
|
f.min(q.c.sys__id).label("sys__id"),
|
|
97
|
-
q.c.vtype,
|
|
98
95
|
q.c.is_dir,
|
|
99
96
|
q.c.source,
|
|
100
97
|
q.c.path,
|
|
@@ -102,8 +99,8 @@ class DirExpansion:
|
|
|
102
99
|
f.max(q.c.location).label("location"),
|
|
103
100
|
)
|
|
104
101
|
.select_from(q)
|
|
105
|
-
.group_by(q.c.source, q.c.path, q.c.
|
|
106
|
-
.order_by(q.c.source, q.c.path, q.c.
|
|
102
|
+
.group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
103
|
+
.order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
107
104
|
)
|
|
108
105
|
|
|
109
106
|
@classmethod
|
|
@@ -113,7 +110,6 @@ class DirExpansion:
|
|
|
113
110
|
q = q.union_all(
|
|
114
111
|
sa.select(
|
|
115
112
|
sa.literal(-1).label("sys__id"),
|
|
116
|
-
sa.literal("").label("vtype"),
|
|
117
113
|
true().label("is_dir"),
|
|
118
114
|
q.c.source,
|
|
119
115
|
parent.label("path"),
|
|
@@ -28,7 +28,6 @@ from datachain.utils import sql_escape_like
|
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from sqlalchemy.sql._typing import _ColumnsClauseArgument
|
|
31
|
-
from sqlalchemy.sql.elements import ColumnElement
|
|
32
31
|
from sqlalchemy.sql.selectable import Select
|
|
33
32
|
from sqlalchemy.types import TypeEngine
|
|
34
33
|
|
|
@@ -341,9 +340,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
341
340
|
|
|
342
341
|
column_objects = [dr.c[c] for c in column_names]
|
|
343
342
|
# include all object types - file, tar archive, tar file (subobject)
|
|
344
|
-
select_query = dr.select(*column_objects).where(
|
|
345
|
-
dr.c.dir_type.in_(DirTypeGroup.FILE) & (dr.c.is_latest == true())
|
|
346
|
-
)
|
|
343
|
+
select_query = dr.select(*column_objects).where(dr.c.is_latest == true())
|
|
347
344
|
if path is None:
|
|
348
345
|
return select_query
|
|
349
346
|
if recursive:
|
|
@@ -420,7 +417,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
420
417
|
"""
|
|
421
418
|
|
|
422
419
|
def _prepare_entry(entry: Entry):
|
|
423
|
-
assert entry.dir_type is not None
|
|
424
420
|
return attrs.asdict(entry) | {"source": uri}
|
|
425
421
|
|
|
426
422
|
return [_prepare_entry(e) for e in entries]
|
|
@@ -440,7 +436,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
440
436
|
"""Inserts dataset rows directly into dataset table"""
|
|
441
437
|
|
|
442
438
|
@abstractmethod
|
|
443
|
-
def instr(self, source, target) ->
|
|
439
|
+
def instr(self, source, target) -> sa.ColumnElement:
|
|
444
440
|
"""
|
|
445
441
|
Return SQLAlchemy Boolean determining if a target substring is present in
|
|
446
442
|
source string column
|
|
@@ -500,7 +496,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
500
496
|
c = query.selected_columns
|
|
501
497
|
q = query.where(c.dir_type.in_(file_group))
|
|
502
498
|
if not include_subobjects:
|
|
503
|
-
q = q.where(c.
|
|
499
|
+
q = q.where((c.location == "") | (c.location.is_(None)))
|
|
504
500
|
return q
|
|
505
501
|
|
|
506
502
|
def get_nodes(self, query) -> Iterator[Node]:
|
|
@@ -624,8 +620,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
624
620
|
|
|
625
621
|
return sa.select(
|
|
626
622
|
de.c.sys__id,
|
|
627
|
-
|
|
628
|
-
case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
|
|
623
|
+
case((de.c.is_dir == true(), DirType.DIR), else_=DirType.FILE).label(
|
|
629
624
|
"dir_type"
|
|
630
625
|
),
|
|
631
626
|
de.c.path,
|
|
@@ -634,8 +629,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
634
629
|
with_default(dr.c.is_latest),
|
|
635
630
|
dr.c.last_modified,
|
|
636
631
|
with_default(dr.c.size),
|
|
637
|
-
with_default(dr.c.owner_name),
|
|
638
|
-
with_default(dr.c.owner_id),
|
|
639
632
|
with_default(dr.c.sys__rand),
|
|
640
633
|
dr.c.location,
|
|
641
634
|
de.c.source,
|
|
@@ -650,7 +643,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
650
643
|
query = dr.select().where(
|
|
651
644
|
self.path_expr(dr) == path,
|
|
652
645
|
dr.c.is_latest == true(),
|
|
653
|
-
dr.c.dir_type != DirType.DIR,
|
|
654
646
|
)
|
|
655
647
|
row = next(self.db.execute(query), None)
|
|
656
648
|
if row is not None:
|
|
@@ -660,7 +652,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
660
652
|
dr.select()
|
|
661
653
|
.where(
|
|
662
654
|
dr.c.is_latest == true(),
|
|
663
|
-
dr.c.dir_type != DirType.DIR,
|
|
664
655
|
dr.c.path.startswith(path),
|
|
665
656
|
)
|
|
666
657
|
.exists()
|
|
@@ -761,13 +752,11 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
761
752
|
|
|
762
753
|
sub_glob = posixpath.join(path, "*")
|
|
763
754
|
dr = dataset_rows
|
|
764
|
-
selections = [
|
|
755
|
+
selections: list[sa.ColumnElement] = [
|
|
765
756
|
func.sum(dr.c.size),
|
|
766
757
|
]
|
|
767
758
|
if count_files:
|
|
768
|
-
selections.append(
|
|
769
|
-
func.sum(dr.c.dir_type.in_(DirTypeGroup.FILE)),
|
|
770
|
-
)
|
|
759
|
+
selections.append(func.count())
|
|
771
760
|
results = next(
|
|
772
761
|
self.db.execute(
|
|
773
762
|
dr.select(*selections).where(
|
datachain/lib/dc.py
CHANGED
datachain/lib/file.py
CHANGED
|
@@ -118,7 +118,6 @@ class File(DataModel):
|
|
|
118
118
|
is_latest: bool = Field(default=True)
|
|
119
119
|
last_modified: datetime = Field(default=TIME_ZERO)
|
|
120
120
|
location: Optional[Union[dict, list[dict]]] = Field(default=None)
|
|
121
|
-
vtype: str = Field(default="")
|
|
122
121
|
|
|
123
122
|
_datachain_column_types: ClassVar[dict[str, Any]] = {
|
|
124
123
|
"source": String,
|
|
@@ -129,7 +128,6 @@ class File(DataModel):
|
|
|
129
128
|
"is_latest": Boolean,
|
|
130
129
|
"last_modified": DateTime,
|
|
131
130
|
"location": JSON,
|
|
132
|
-
"vtype": String,
|
|
133
131
|
}
|
|
134
132
|
|
|
135
133
|
_unique_id_keys: ClassVar[list[str]] = [
|
|
@@ -139,7 +137,6 @@ class File(DataModel):
|
|
|
139
137
|
"etag",
|
|
140
138
|
"version",
|
|
141
139
|
"is_latest",
|
|
142
|
-
"vtype",
|
|
143
140
|
"location",
|
|
144
141
|
"last_modified",
|
|
145
142
|
]
|
datachain/listing.py
CHANGED
|
@@ -104,7 +104,7 @@ class Listing:
|
|
|
104
104
|
return self.warehouse.get_node_by_path(self.dataset_rows, path)
|
|
105
105
|
|
|
106
106
|
def ls_path(self, node, fields):
|
|
107
|
-
if node.
|
|
107
|
+
if node.location or node.dir_type == DirType.TAR_ARCHIVE:
|
|
108
108
|
return self.warehouse.select_node_fields_by_parent_path_tar(
|
|
109
109
|
self.dataset_rows, node.path, fields
|
|
110
110
|
)
|
|
@@ -235,7 +235,7 @@ class Listing:
|
|
|
235
235
|
return self.warehouse.size(self.dataset_rows, node, count_files)
|
|
236
236
|
|
|
237
237
|
def subtree_files(self, node: Node, sort=None):
|
|
238
|
-
if node.dir_type == DirType.TAR_ARCHIVE or node.
|
|
238
|
+
if node.dir_type == DirType.TAR_ARCHIVE or node.location:
|
|
239
239
|
include_subobjects = True
|
|
240
240
|
else:
|
|
241
241
|
include_subobjects = False
|
datachain/node.py
CHANGED
|
@@ -49,18 +49,15 @@ class DirTypeGroup:
|
|
|
49
49
|
class Node:
|
|
50
50
|
sys__id: int = 0
|
|
51
51
|
sys__rand: int = 0
|
|
52
|
-
vtype: str = ""
|
|
53
|
-
dir_type: Optional[int] = None
|
|
54
52
|
path: str = ""
|
|
55
53
|
etag: str = ""
|
|
56
54
|
version: Optional[str] = None
|
|
57
55
|
is_latest: bool = True
|
|
58
56
|
last_modified: Optional[datetime] = None
|
|
59
57
|
size: int = 0
|
|
60
|
-
owner_name: str = ""
|
|
61
|
-
owner_id: str = ""
|
|
62
58
|
location: Optional[str] = None
|
|
63
59
|
source: StorageURI = StorageURI("")
|
|
60
|
+
dir_type: int = DirType.FILE
|
|
64
61
|
|
|
65
62
|
@property
|
|
66
63
|
def is_dir(self) -> bool:
|
|
@@ -113,7 +110,6 @@ class Node:
|
|
|
113
110
|
version=self.version or "",
|
|
114
111
|
etag=self.etag,
|
|
115
112
|
is_latest=self.is_latest,
|
|
116
|
-
vtype=self.vtype,
|
|
117
113
|
location=self.location,
|
|
118
114
|
last_modified=self.last_modified or TIME_ZERO,
|
|
119
115
|
)
|
|
@@ -145,38 +141,20 @@ class Node:
|
|
|
145
141
|
|
|
146
142
|
@attrs.define
|
|
147
143
|
class Entry:
|
|
148
|
-
vtype: str = ""
|
|
149
|
-
dir_type: Optional[int] = None
|
|
150
144
|
path: str = ""
|
|
151
145
|
etag: str = ""
|
|
152
146
|
version: str = ""
|
|
153
147
|
is_latest: bool = True
|
|
154
148
|
last_modified: Optional[datetime] = None
|
|
155
149
|
size: int = 0
|
|
156
|
-
owner_name: str = ""
|
|
157
|
-
owner_id: str = ""
|
|
158
150
|
location: Optional[str] = None
|
|
159
151
|
|
|
160
|
-
@property
|
|
161
|
-
def is_dir(self) -> bool:
|
|
162
|
-
return self.dir_type == DirType.DIR
|
|
163
|
-
|
|
164
|
-
@classmethod
|
|
165
|
-
def from_dir(cls, path: str, **kwargs) -> "Entry":
|
|
166
|
-
return cls(dir_type=DirType.DIR, path=path, **kwargs)
|
|
167
|
-
|
|
168
152
|
@classmethod
|
|
169
153
|
def from_file(cls, path: str, **kwargs) -> "Entry":
|
|
170
|
-
return cls(
|
|
171
|
-
|
|
172
|
-
@classmethod
|
|
173
|
-
def root(cls):
|
|
174
|
-
return cls(dir_type=DirType.DIR)
|
|
154
|
+
return cls(path=path, **kwargs)
|
|
175
155
|
|
|
176
156
|
@property
|
|
177
157
|
def full_path(self) -> str:
|
|
178
|
-
if self.is_dir and self.path:
|
|
179
|
-
return self.path + "/"
|
|
180
158
|
return self.path
|
|
181
159
|
|
|
182
160
|
@property
|
|
@@ -229,9 +207,9 @@ class NodeWithPath:
|
|
|
229
207
|
TIME_FMT = "%Y-%m-%d %H:%M"
|
|
230
208
|
|
|
231
209
|
|
|
232
|
-
def long_line_str(name: str, timestamp: Optional[datetime]
|
|
210
|
+
def long_line_str(name: str, timestamp: Optional[datetime]) -> str:
|
|
233
211
|
if timestamp is None:
|
|
234
212
|
time = "-"
|
|
235
213
|
else:
|
|
236
214
|
time = timestamp.strftime(TIME_FMT)
|
|
237
|
-
return f"{
|
|
215
|
+
return f"{time: <19} {name}"
|
datachain/query/builtins.py
CHANGED
|
@@ -22,10 +22,6 @@ def load_tar(raw):
|
|
|
22
22
|
C.source,
|
|
23
23
|
C.path,
|
|
24
24
|
C.size,
|
|
25
|
-
C.vtype,
|
|
26
|
-
C.dir_type,
|
|
27
|
-
C.owner_name,
|
|
28
|
-
C.owner_id,
|
|
29
25
|
C.is_latest,
|
|
30
26
|
C.last_modified,
|
|
31
27
|
C.version,
|
|
@@ -38,10 +34,6 @@ def index_tar(
|
|
|
38
34
|
source,
|
|
39
35
|
parent_path,
|
|
40
36
|
size,
|
|
41
|
-
vtype,
|
|
42
|
-
dir_type,
|
|
43
|
-
owner_name,
|
|
44
|
-
owner_id,
|
|
45
37
|
is_latest,
|
|
46
38
|
last_modified,
|
|
47
39
|
version,
|
|
@@ -53,10 +45,6 @@ def index_tar(
|
|
|
53
45
|
source=source,
|
|
54
46
|
path=parent_path,
|
|
55
47
|
size=size,
|
|
56
|
-
vtype=vtype,
|
|
57
|
-
dir_type=dir_type,
|
|
58
|
-
owner_name=owner_name,
|
|
59
|
-
owner_id=owner_id,
|
|
60
48
|
is_latest=bool(is_latest),
|
|
61
49
|
last_modified=last_modified,
|
|
62
50
|
version=version,
|
|
@@ -70,7 +58,6 @@ def index_tar(
|
|
|
70
58
|
source=source,
|
|
71
59
|
path=full_path,
|
|
72
60
|
size=info.size,
|
|
73
|
-
vtype="tar",
|
|
74
61
|
location={
|
|
75
62
|
"vtype": "tar",
|
|
76
63
|
"offset": info.offset_data,
|
|
@@ -81,7 +68,6 @@ def index_tar(
|
|
|
81
68
|
"version": version,
|
|
82
69
|
"size": size,
|
|
83
70
|
"etag": etag,
|
|
84
|
-
"vtype": "",
|
|
85
71
|
"location": None,
|
|
86
72
|
},
|
|
87
73
|
},
|
datachain/query/schema.py
CHANGED
|
@@ -9,7 +9,7 @@ import attrs
|
|
|
9
9
|
import sqlalchemy as sa
|
|
10
10
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
11
11
|
|
|
12
|
-
from datachain.sql.types import JSON, Boolean, DateTime,
|
|
12
|
+
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from datachain.catalog import Catalog
|
|
@@ -222,10 +222,6 @@ class DatasetRow:
|
|
|
222
222
|
"path": String,
|
|
223
223
|
"size": Int64,
|
|
224
224
|
"location": JSON,
|
|
225
|
-
"vtype": String,
|
|
226
|
-
"dir_type": Int,
|
|
227
|
-
"owner_name": String,
|
|
228
|
-
"owner_id": String,
|
|
229
225
|
"is_latest": Boolean,
|
|
230
226
|
"last_modified": DateTime,
|
|
231
227
|
"version": String,
|
|
@@ -238,10 +234,6 @@ class DatasetRow:
|
|
|
238
234
|
source: str = "",
|
|
239
235
|
size: int = 0,
|
|
240
236
|
location: Optional[dict[str, Any]] = None,
|
|
241
|
-
vtype: str = "",
|
|
242
|
-
dir_type: int = 0,
|
|
243
|
-
owner_name: str = "",
|
|
244
|
-
owner_id: str = "",
|
|
245
237
|
is_latest: bool = True,
|
|
246
238
|
last_modified: Optional[datetime] = None,
|
|
247
239
|
version: str = "",
|
|
@@ -251,10 +243,7 @@ class DatasetRow:
|
|
|
251
243
|
str,
|
|
252
244
|
int,
|
|
253
245
|
Optional[str],
|
|
254
|
-
str,
|
|
255
246
|
int,
|
|
256
|
-
str,
|
|
257
|
-
str,
|
|
258
247
|
bool,
|
|
259
248
|
datetime,
|
|
260
249
|
str,
|
|
@@ -271,10 +260,6 @@ class DatasetRow:
|
|
|
271
260
|
path,
|
|
272
261
|
size,
|
|
273
262
|
location,
|
|
274
|
-
vtype,
|
|
275
|
-
dir_type,
|
|
276
|
-
owner_name,
|
|
277
|
-
owner_id,
|
|
278
263
|
is_latest,
|
|
279
264
|
last_modified,
|
|
280
265
|
version,
|
datachain/utils.py
CHANGED
|
@@ -1,49 +1,49 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
|
|
4
|
-
datachain/cache.py,sha256=
|
|
5
|
-
datachain/cli.py,sha256=
|
|
4
|
+
datachain/cache.py,sha256=WP-ktH_bRn3w2g1JOOQ7rCPsZyR4OM6K1Kb7yZsSSns,4056
|
|
5
|
+
datachain/cli.py,sha256=alMjnoBUBLvBSMBR51N09rA_aUEdHJwyxSRogF7VbbA,30891
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
8
8
|
datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
|
|
9
9
|
datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
|
-
datachain/listing.py,sha256=
|
|
12
|
-
datachain/node.py,sha256=
|
|
11
|
+
datachain/listing.py,sha256=LgL0lV10AzD1v52ajSaJKFnyiq4hNXwQiqaGySWGQsw,8290
|
|
12
|
+
datachain/node.py,sha256=gacKxUPLgJ1ul6LJWz7nylYjUWPbyUY5cqaBFDOnO9E,5756
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
|
|
15
15
|
datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
|
|
16
16
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
|
-
datachain/utils.py,sha256=
|
|
18
|
+
datachain/utils.py,sha256=Z9-lPNvrrAh_VWpzVBJ7L5-Oy_Oo1V0ZW7G0MVDyPK4,13065
|
|
19
19
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=hhLciKHD0dVwniFzUsYORQ72WpnM40QYT0ydoyx1Kvw,69308
|
|
21
21
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
22
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
23
23
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
24
24
|
datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
|
|
25
25
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
26
|
-
datachain/client/fsspec.py,sha256=
|
|
26
|
+
datachain/client/fsspec.py,sha256=Hy3-4HRV-3MozOybqAnF-qL0EoMYFHynpTG_YZphjZE,13298
|
|
27
27
|
datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
|
|
28
28
|
datachain/client/hf.py,sha256=R-F6Ks6aVM9wSNkIXOkOnZFwsJlfdRwJjymRa78RLjM,1246
|
|
29
29
|
datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
|
|
30
|
-
datachain/client/s3.py,sha256=
|
|
30
|
+
datachain/client/s3.py,sha256=zs41EvYW1bS_pUxnkCnJILzUJpL2V1jvvVKSN4BKYcc,6326
|
|
31
31
|
datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
|
|
32
32
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
33
33
|
datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
35
|
datachain/data_storage/metastore.py,sha256=cHN0xmbUvChyayHHZm3Vqxr87jFqojPSlGBqhTPStlE,54519
|
|
36
|
-
datachain/data_storage/schema.py,sha256=
|
|
36
|
+
datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9mTFg,28048
|
|
39
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/warehouse.py,sha256=s5hhVUWrlEopE6eGOqzXHeNtRapK30G8gj0Vkt_HHFQ,32649
|
|
40
40
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
|
|
42
42
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
43
43
|
datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
|
|
44
44
|
datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
|
|
45
|
-
datachain/lib/dc.py,sha256=
|
|
46
|
-
datachain/lib/file.py,sha256=
|
|
45
|
+
datachain/lib/dc.py,sha256=C-sfWRinV8pDK2P6UHLbScOahTlTiVQpoxUUdVllF2k,68710
|
|
46
|
+
datachain/lib/file.py,sha256=rXmyzUFgnLQ4J3CyOCcg-guhzAz4x9Ug595FbNn4Y2E,11398
|
|
47
47
|
datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
|
|
48
48
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
49
49
|
datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
|
|
@@ -68,13 +68,13 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
|
|
|
68
68
|
datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
|
|
69
69
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
70
70
|
datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
|
|
71
|
-
datachain/query/builtins.py,sha256=
|
|
71
|
+
datachain/query/builtins.py,sha256=U6yHPF9bzxqK5iwyqCqbJxo8ggBVx9FtuXxRrQQ0SNM,2244
|
|
72
72
|
datachain/query/dataset.py,sha256=B2EmGOL8gjrdU_WhU88Dj7FsxvxrNeKwe2STXnU9T9E,58369
|
|
73
73
|
datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
|
|
74
74
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
75
75
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
76
76
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
77
|
-
datachain/query/schema.py,sha256=
|
|
77
|
+
datachain/query/schema.py,sha256=ytlkA1xFAUOia25u8d6pxvxBSRl3uivLuOe2eHaw-qc,7550
|
|
78
78
|
datachain/query/session.py,sha256=UPH5Z4fzCDsvj81ji0e8GA6Mgra3bOAEpVq4htqOtis,4317
|
|
79
79
|
datachain/query/udf.py,sha256=j3NhmKK5rYG5TclcM2Sr0LhS1tmYLMjzMugx9G9iFLM,8100
|
|
80
80
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -96,9 +96,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
|
|
|
96
96
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
97
97
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
98
98
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
99
|
-
datachain-0.3.
|
|
100
|
-
datachain-0.3.
|
|
101
|
-
datachain-0.3.
|
|
102
|
-
datachain-0.3.
|
|
103
|
-
datachain-0.3.
|
|
104
|
-
datachain-0.3.
|
|
99
|
+
datachain-0.3.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
100
|
+
datachain-0.3.13.dist-info/METADATA,sha256=pzMOR9LYuLR26Wifk4GPS9Wi1mmqCC5CIBZyA-X5_oo,17073
|
|
101
|
+
datachain-0.3.13.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
102
|
+
datachain-0.3.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
103
|
+
datachain-0.3.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
104
|
+
datachain-0.3.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|