datachain 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +3 -25
- datachain/cli.py +0 -8
- datachain/client/fsspec.py +10 -5
- datachain/client/hf.py +1 -0
- datachain/client/local.py +7 -3
- datachain/data_storage/metastore.py +11 -478
- datachain/data_storage/sqlite.py +9 -41
- datachain/data_storage/warehouse.py +1 -2
- datachain/dataset.py +12 -10
- datachain/error.py +0 -4
- datachain/lib/arrow.py +1 -1
- datachain/node.py +1 -1
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/METADATA +1 -1
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/RECORD +18 -19
- datachain/storage.py +0 -136
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/LICENSE +0 -0
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/WHEEL +0 -0
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/entry_points.txt +0 -0
- {datachain-0.6.3.dist-info → datachain-0.6.5.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -42,6 +42,7 @@ from datachain.dataset import (
|
|
|
42
42
|
DatasetStats,
|
|
43
43
|
DatasetStatus,
|
|
44
44
|
RowDict,
|
|
45
|
+
StorageURI,
|
|
45
46
|
create_dataset_uri,
|
|
46
47
|
parse_dataset_uri,
|
|
47
48
|
)
|
|
@@ -58,7 +59,6 @@ from datachain.node import DirType, Node, NodeWithPath
|
|
|
58
59
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
59
60
|
from datachain.remote.studio import StudioClient
|
|
60
61
|
from datachain.sql.types import DateTime, SQLType, String
|
|
61
|
-
from datachain.storage import StorageURI
|
|
62
62
|
from datachain.utils import (
|
|
63
63
|
DataChainDir,
|
|
64
64
|
batched,
|
|
@@ -1702,31 +1702,9 @@ class Catalog:
|
|
|
1702
1702
|
*,
|
|
1703
1703
|
client_config=None,
|
|
1704
1704
|
) -> None:
|
|
1705
|
-
root_sources = [
|
|
1706
|
-
src for src in sources if Client.get_implementation(src).is_root_url(src)
|
|
1707
|
-
]
|
|
1708
|
-
non_root_sources = [
|
|
1709
|
-
src
|
|
1710
|
-
for src in sources
|
|
1711
|
-
if not Client.get_implementation(src).is_root_url(src)
|
|
1712
|
-
]
|
|
1713
|
-
|
|
1714
|
-
client_config = client_config or self.client_config
|
|
1715
|
-
|
|
1716
|
-
# for root sources (e.g s3://) we are just getting all buckets and
|
|
1717
|
-
# saving them as storages, without further indexing in each bucket
|
|
1718
|
-
for source in root_sources:
|
|
1719
|
-
for bucket in Client.get_implementation(source).ls_buckets(**client_config):
|
|
1720
|
-
client = self.get_client(bucket.uri, **client_config)
|
|
1721
|
-
print(f"Registering storage {client.uri}")
|
|
1722
|
-
self.metastore.create_storage_if_not_registered(client.uri)
|
|
1723
|
-
|
|
1724
1705
|
self.enlist_sources(
|
|
1725
|
-
|
|
1706
|
+
sources,
|
|
1726
1707
|
update,
|
|
1727
|
-
client_config=client_config,
|
|
1708
|
+
client_config=client_config or self.client_config,
|
|
1728
1709
|
only_index=True,
|
|
1729
1710
|
)
|
|
1730
|
-
|
|
1731
|
-
def find_stale_storages(self) -> None:
|
|
1732
|
-
self.metastore.find_stale_storages()
|
datachain/cli.py
CHANGED
|
@@ -568,12 +568,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
568
568
|
)
|
|
569
569
|
add_sources_arg(parse_index)
|
|
570
570
|
|
|
571
|
-
subp.add_parser(
|
|
572
|
-
"find-stale-storages",
|
|
573
|
-
parents=[parent_parser],
|
|
574
|
-
description="Finds and marks stale storages",
|
|
575
|
-
)
|
|
576
|
-
|
|
577
571
|
show_parser = subp.add_parser(
|
|
578
572
|
"show",
|
|
579
573
|
parents=[parent_parser],
|
|
@@ -1100,8 +1094,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
|
|
|
1100
1094
|
)
|
|
1101
1095
|
elif args.command == "completion":
|
|
1102
1096
|
print(completion(args.shell))
|
|
1103
|
-
elif args.command == "find-stale-storages":
|
|
1104
|
-
catalog.find_stale_storages()
|
|
1105
1097
|
elif args.command == "query":
|
|
1106
1098
|
query(
|
|
1107
1099
|
catalog,
|
datachain/client/fsspec.py
CHANGED
|
@@ -31,11 +31,12 @@ from datachain.error import ClientError as DataChainClientError
|
|
|
31
31
|
from datachain.lib.file import File
|
|
32
32
|
from datachain.nodes_fetcher import NodesFetcher
|
|
33
33
|
from datachain.nodes_thread_pool import NodeChunk
|
|
34
|
-
from datachain.storage import StorageURI
|
|
35
34
|
|
|
36
35
|
if TYPE_CHECKING:
|
|
37
36
|
from fsspec.spec import AbstractFileSystem
|
|
38
37
|
|
|
38
|
+
from datachain.dataset import StorageURI
|
|
39
|
+
|
|
39
40
|
|
|
40
41
|
logger = logging.getLogger("datachain")
|
|
41
42
|
|
|
@@ -63,7 +64,7 @@ def _is_win_local_path(uri: str) -> bool:
|
|
|
63
64
|
|
|
64
65
|
class Bucket(NamedTuple):
|
|
65
66
|
name: str
|
|
66
|
-
uri: StorageURI
|
|
67
|
+
uri: "StorageURI"
|
|
67
68
|
created: Optional[datetime]
|
|
68
69
|
|
|
69
70
|
|
|
@@ -115,7 +116,7 @@ class Client(ABC):
|
|
|
115
116
|
return DATA_SOURCE_URI_PATTERN.match(name) is not None
|
|
116
117
|
|
|
117
118
|
@staticmethod
|
|
118
|
-
def parse_url(source: str) -> tuple[StorageURI, str]:
|
|
119
|
+
def parse_url(source: str) -> tuple["StorageURI", str]:
|
|
119
120
|
cls = Client.get_implementation(source)
|
|
120
121
|
storage_name, rel_path = cls.split_url(source)
|
|
121
122
|
return cls.get_uri(storage_name), rel_path
|
|
@@ -148,7 +149,7 @@ class Client(ABC):
|
|
|
148
149
|
@classmethod
|
|
149
150
|
def from_source(
|
|
150
151
|
cls,
|
|
151
|
-
uri: StorageURI,
|
|
152
|
+
uri: "StorageURI",
|
|
152
153
|
cache: DataChainCache,
|
|
153
154
|
**kwargs,
|
|
154
155
|
) -> "Client":
|
|
@@ -156,6 +157,8 @@ class Client(ABC):
|
|
|
156
157
|
|
|
157
158
|
@classmethod
|
|
158
159
|
def ls_buckets(cls, **kwargs) -> Iterator[Bucket]:
|
|
160
|
+
from datachain.dataset import StorageURI
|
|
161
|
+
|
|
159
162
|
for entry in cls.create_fs(**kwargs).ls(cls.PREFIX, detail=True):
|
|
160
163
|
name = entry["name"].rstrip("/")
|
|
161
164
|
yield Bucket(
|
|
@@ -169,7 +172,9 @@ class Client(ABC):
|
|
|
169
172
|
return url == cls.PREFIX
|
|
170
173
|
|
|
171
174
|
@classmethod
|
|
172
|
-
def get_uri(cls, name) -> StorageURI:
|
|
175
|
+
def get_uri(cls, name) -> "StorageURI":
|
|
176
|
+
from datachain.dataset import StorageURI
|
|
177
|
+
|
|
173
178
|
return StorageURI(f"{cls.PREFIX}{name}")
|
|
174
179
|
|
|
175
180
|
@classmethod
|
datachain/client/hf.py
CHANGED
datachain/client/local.py
CHANGED
|
@@ -2,16 +2,18 @@ import os
|
|
|
2
2
|
import posixpath
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
from fsspec.implementations.local import LocalFileSystem
|
|
9
9
|
|
|
10
10
|
from datachain.lib.file import File
|
|
11
|
-
from datachain.storage import StorageURI
|
|
12
11
|
|
|
13
12
|
from .fsspec import Client
|
|
14
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from datachain.dataset import StorageURI
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class FileClient(Client):
|
|
17
19
|
FS_CLASS = LocalFileSystem
|
|
@@ -28,7 +30,9 @@ class FileClient(Client):
|
|
|
28
30
|
raise TypeError("Signed urls are not implemented for local file system")
|
|
29
31
|
|
|
30
32
|
@classmethod
|
|
31
|
-
def get_uri(cls, name) -> StorageURI:
|
|
33
|
+
def get_uri(cls, name) -> "StorageURI":
|
|
34
|
+
from datachain.dataset import StorageURI
|
|
35
|
+
|
|
32
36
|
return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
|
|
33
37
|
|
|
34
38
|
@classmethod
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
|
-
import hashlib
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
5
4
|
import os
|
|
6
|
-
import posixpath
|
|
7
5
|
from abc import ABC, abstractmethod
|
|
8
6
|
from collections.abc import Iterator
|
|
9
7
|
from datetime import datetime, timezone
|
|
@@ -24,7 +22,6 @@ from sqlalchemy import (
|
|
|
24
22
|
UniqueConstraint,
|
|
25
23
|
select,
|
|
26
24
|
)
|
|
27
|
-
from sqlalchemy.sql import func
|
|
28
25
|
|
|
29
26
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
30
27
|
from datachain.data_storage.serializer import Serializable
|
|
@@ -33,15 +30,14 @@ from datachain.dataset import (
|
|
|
33
30
|
DatasetRecord,
|
|
34
31
|
DatasetStatus,
|
|
35
32
|
DatasetVersion,
|
|
33
|
+
StorageURI,
|
|
36
34
|
)
|
|
37
35
|
from datachain.error import (
|
|
38
36
|
DatasetNotFoundError,
|
|
39
|
-
StorageNotFoundError,
|
|
40
37
|
TableMissingError,
|
|
41
38
|
)
|
|
42
39
|
from datachain.job import Job
|
|
43
|
-
from datachain.
|
|
44
|
-
from datachain.utils import JSONSerialize, is_expired
|
|
40
|
+
from datachain.utils import JSONSerialize
|
|
45
41
|
|
|
46
42
|
if TYPE_CHECKING:
|
|
47
43
|
from sqlalchemy import Delete, Insert, Select, Update
|
|
@@ -60,21 +56,17 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
60
56
|
"""
|
|
61
57
|
|
|
62
58
|
uri: StorageURI
|
|
63
|
-
partial_id: Optional[int]
|
|
64
59
|
|
|
65
60
|
schema: "schema.Schema"
|
|
66
|
-
storage_class: type[Storage] = Storage
|
|
67
61
|
dataset_class: type[DatasetRecord] = DatasetRecord
|
|
68
62
|
dependency_class: type[DatasetDependency] = DatasetDependency
|
|
69
63
|
job_class: type[Job] = Job
|
|
70
64
|
|
|
71
65
|
def __init__(
|
|
72
66
|
self,
|
|
73
|
-
uri: StorageURI =
|
|
74
|
-
partial_id: Optional[int] = None,
|
|
67
|
+
uri: Optional[StorageURI] = None,
|
|
75
68
|
):
|
|
76
|
-
self.uri = uri
|
|
77
|
-
self.partial_id: Optional[int] = partial_id
|
|
69
|
+
self.uri = uri or StorageURI("")
|
|
78
70
|
|
|
79
71
|
def __enter__(self) -> "AbstractMetastore":
|
|
80
72
|
"""Returns self upon entering context manager."""
|
|
@@ -86,8 +78,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
86
78
|
@abstractmethod
|
|
87
79
|
def clone(
|
|
88
80
|
self,
|
|
89
|
-
uri: StorageURI =
|
|
90
|
-
partial_id: Optional[int] = None,
|
|
81
|
+
uri: Optional[StorageURI] = None,
|
|
91
82
|
use_new_connection: bool = False,
|
|
92
83
|
) -> "AbstractMetastore":
|
|
93
84
|
"""Clones AbstractMetastore implementation for some Storage input.
|
|
@@ -95,10 +86,6 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
95
86
|
New connections should only be used if needed due to errors with
|
|
96
87
|
closed connections."""
|
|
97
88
|
|
|
98
|
-
@abstractmethod
|
|
99
|
-
def init(self, uri: StorageURI) -> None:
|
|
100
|
-
"""Initialize partials table for given storage uri."""
|
|
101
|
-
|
|
102
89
|
def close(self) -> None:
|
|
103
90
|
"""Closes any active database or HTTP connections."""
|
|
104
91
|
|
|
@@ -114,96 +101,6 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
114
101
|
def cleanup_for_tests(self) -> None:
|
|
115
102
|
"""Cleanup for tests."""
|
|
116
103
|
|
|
117
|
-
#
|
|
118
|
-
# Storages
|
|
119
|
-
#
|
|
120
|
-
|
|
121
|
-
@abstractmethod
|
|
122
|
-
def create_storage_if_not_registered(self, uri: StorageURI) -> None:
|
|
123
|
-
"""Saves new storage if it doesn't exist in database."""
|
|
124
|
-
|
|
125
|
-
@abstractmethod
|
|
126
|
-
def register_storage_for_indexing(
|
|
127
|
-
self,
|
|
128
|
-
uri: StorageURI,
|
|
129
|
-
force_update: bool = True,
|
|
130
|
-
prefix: str = "",
|
|
131
|
-
) -> tuple[Storage, bool, bool, Optional[int], Optional[str]]:
|
|
132
|
-
"""
|
|
133
|
-
Prepares storage for indexing operation.
|
|
134
|
-
This method should be called before index operation is started
|
|
135
|
-
It returns:
|
|
136
|
-
- storage, prepared for indexing
|
|
137
|
-
- boolean saying if indexing is needed
|
|
138
|
-
- boolean saying if indexing is currently pending (running)
|
|
139
|
-
- partial id
|
|
140
|
-
- partial path
|
|
141
|
-
"""
|
|
142
|
-
|
|
143
|
-
@abstractmethod
|
|
144
|
-
def find_stale_storages(self) -> None:
|
|
145
|
-
"""
|
|
146
|
-
Finds all pending storages for which the last inserted node has happened
|
|
147
|
-
before STALE_MINUTES_LIMIT minutes, and marks it as STALE.
|
|
148
|
-
"""
|
|
149
|
-
|
|
150
|
-
@abstractmethod
|
|
151
|
-
def mark_storage_indexed(
|
|
152
|
-
self,
|
|
153
|
-
uri: StorageURI,
|
|
154
|
-
status: int,
|
|
155
|
-
ttl: int,
|
|
156
|
-
end_time: Optional[datetime] = None,
|
|
157
|
-
prefix: str = "",
|
|
158
|
-
partial_id: int = 0,
|
|
159
|
-
error_message: str = "",
|
|
160
|
-
error_stack: str = "",
|
|
161
|
-
dataset: Optional[DatasetRecord] = None,
|
|
162
|
-
) -> None:
|
|
163
|
-
"""
|
|
164
|
-
Marks storage as indexed.
|
|
165
|
-
This method should be called when index operation is finished.
|
|
166
|
-
"""
|
|
167
|
-
|
|
168
|
-
@abstractmethod
|
|
169
|
-
def update_last_inserted_at(self, uri: Optional[StorageURI] = None) -> None:
|
|
170
|
-
"""Updates last inserted datetime in bucket with current time."""
|
|
171
|
-
|
|
172
|
-
@abstractmethod
|
|
173
|
-
def get_storage(self, uri: StorageURI) -> Storage:
|
|
174
|
-
"""
|
|
175
|
-
Gets storage representation from database.
|
|
176
|
-
E.g. if s3 is used as storage this would be s3 bucket data.
|
|
177
|
-
"""
|
|
178
|
-
|
|
179
|
-
@abstractmethod
|
|
180
|
-
def mark_storage_pending(self, storage: Storage) -> Storage:
|
|
181
|
-
"""Marks storage as pending."""
|
|
182
|
-
|
|
183
|
-
#
|
|
184
|
-
# Partial Indexes
|
|
185
|
-
#
|
|
186
|
-
|
|
187
|
-
@abstractmethod
|
|
188
|
-
def init_partial_id(self, uri: StorageURI) -> None:
|
|
189
|
-
"""Initializes partial id for given storage."""
|
|
190
|
-
|
|
191
|
-
@abstractmethod
|
|
192
|
-
def get_next_partial_id(self, uri: StorageURI) -> int:
|
|
193
|
-
"""Returns next partial id for given storage."""
|
|
194
|
-
|
|
195
|
-
@abstractmethod
|
|
196
|
-
def get_valid_partial_id(
|
|
197
|
-
self, uri: StorageURI, prefix: str, raise_exc: bool = True
|
|
198
|
-
) -> tuple[Optional[int], Optional[str]]:
|
|
199
|
-
"""
|
|
200
|
-
Returns valid partial id and it's path, if they exist, for a given storage.
|
|
201
|
-
"""
|
|
202
|
-
|
|
203
|
-
@abstractmethod
|
|
204
|
-
def get_last_partial_path(self, uri: StorageURI) -> Optional[str]:
|
|
205
|
-
"""Returns last partial path for given storage."""
|
|
206
|
-
|
|
207
104
|
#
|
|
208
105
|
# Datasets
|
|
209
106
|
#
|
|
@@ -397,8 +294,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
397
294
|
and has shared logic for all database systems currently in use.
|
|
398
295
|
"""
|
|
399
296
|
|
|
400
|
-
PARTIALS_TABLE_NAME_PREFIX = "prt_"
|
|
401
|
-
STORAGE_TABLE = "buckets"
|
|
402
297
|
DATASET_TABLE = "datasets"
|
|
403
298
|
DATASET_VERSION_TABLE = "datasets_versions"
|
|
404
299
|
DATASET_DEPENDENCY_TABLE = "datasets_dependencies"
|
|
@@ -410,15 +305,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
410
305
|
def __init__(
|
|
411
306
|
self,
|
|
412
307
|
id_generator: "AbstractIDGenerator",
|
|
413
|
-
uri: StorageURI =
|
|
414
|
-
partial_id: Optional[int] = None,
|
|
308
|
+
uri: Optional[StorageURI] = None,
|
|
415
309
|
):
|
|
310
|
+
uri = uri or StorageURI("")
|
|
416
311
|
self.id_generator = id_generator
|
|
417
|
-
super().__init__(uri
|
|
418
|
-
|
|
419
|
-
@abstractmethod
|
|
420
|
-
def init(self, uri: StorageURI) -> None:
|
|
421
|
-
"""Initialize partials table for given storage uri."""
|
|
312
|
+
super().__init__(uri)
|
|
422
313
|
|
|
423
314
|
def close(self) -> None:
|
|
424
315
|
"""Closes any active database connections."""
|
|
@@ -428,21 +319,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
428
319
|
"""Cleanup temp tables."""
|
|
429
320
|
self.id_generator.delete_uris(temp_table_names)
|
|
430
321
|
|
|
431
|
-
@classmethod
|
|
432
|
-
def _buckets_columns(cls) -> list["SchemaItem"]:
|
|
433
|
-
"""Buckets (storages) table columns."""
|
|
434
|
-
return [
|
|
435
|
-
Column("id", Integer, primary_key=True, nullable=False),
|
|
436
|
-
Column("uri", Text, nullable=False),
|
|
437
|
-
Column("timestamp", DateTime(timezone=True)),
|
|
438
|
-
Column("expires", DateTime(timezone=True)),
|
|
439
|
-
Column("started_inserting_at", DateTime(timezone=True)),
|
|
440
|
-
Column("last_inserted_at", DateTime(timezone=True)),
|
|
441
|
-
Column("status", Integer, nullable=False),
|
|
442
|
-
Column("error_message", Text, nullable=False, default=""),
|
|
443
|
-
Column("error_stack", Text, nullable=False, default=""),
|
|
444
|
-
]
|
|
445
|
-
|
|
446
322
|
@classmethod
|
|
447
323
|
def _datasets_columns(cls) -> list["SchemaItem"]:
|
|
448
324
|
"""Datasets table columns."""
|
|
@@ -543,58 +419,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
543
419
|
ForeignKey(f"{cls.DATASET_VERSION_TABLE}.id"),
|
|
544
420
|
nullable=True,
|
|
545
421
|
),
|
|
546
|
-
# TODO remove when https://github.com/iterative/dvcx/issues/1121 is done
|
|
547
|
-
# If we unify datasets and bucket listing then both bucket fields won't
|
|
548
|
-
# be needed
|
|
549
|
-
Column(
|
|
550
|
-
"bucket_id",
|
|
551
|
-
Integer,
|
|
552
|
-
ForeignKey(f"{cls.STORAGE_TABLE}.id"),
|
|
553
|
-
nullable=True,
|
|
554
|
-
),
|
|
555
|
-
Column("bucket_version", Text, nullable=True),
|
|
556
|
-
]
|
|
557
|
-
|
|
558
|
-
@classmethod
|
|
559
|
-
def _storage_partial_columns(cls) -> list["SchemaItem"]:
|
|
560
|
-
"""Storage partial table columns."""
|
|
561
|
-
return [
|
|
562
|
-
Column("path_str", Text, nullable=False),
|
|
563
|
-
# This is generated before insert and is not the SQLite rowid,
|
|
564
|
-
# so it is not the primary key.
|
|
565
|
-
Column("partial_id", Integer, nullable=False, index=True),
|
|
566
|
-
Column("timestamp", DateTime(timezone=True)),
|
|
567
|
-
Column("expires", DateTime(timezone=True)),
|
|
568
422
|
]
|
|
569
423
|
|
|
570
|
-
def _get_storage_partial_table(self, name: str) -> Table:
|
|
571
|
-
table = self.db.metadata.tables.get(name)
|
|
572
|
-
if table is None:
|
|
573
|
-
table = Table(
|
|
574
|
-
name,
|
|
575
|
-
self.db.metadata,
|
|
576
|
-
*self._storage_partial_columns(),
|
|
577
|
-
)
|
|
578
|
-
return table
|
|
579
|
-
|
|
580
424
|
#
|
|
581
425
|
# Query Tables
|
|
582
426
|
#
|
|
583
|
-
|
|
584
|
-
def _partials_table(self, uri: StorageURI) -> Table:
|
|
585
|
-
return self._get_storage_partial_table(self._partials_table_name(uri))
|
|
586
|
-
|
|
587
|
-
@cached_property
|
|
588
|
-
def _storages(self) -> Table:
|
|
589
|
-
return Table(self.STORAGE_TABLE, self.db.metadata, *self._buckets_columns())
|
|
590
|
-
|
|
591
|
-
@cached_property
|
|
592
|
-
def _partials(self) -> Table:
|
|
593
|
-
assert (
|
|
594
|
-
self._current_partials_table_name
|
|
595
|
-
), "Partials can only be used if uri/current_partials_table_name is set"
|
|
596
|
-
return self._get_storage_partial_table(self._current_partials_table_name)
|
|
597
|
-
|
|
598
427
|
@cached_property
|
|
599
428
|
def _datasets(self) -> Table:
|
|
600
429
|
return Table(self.DATASET_TABLE, self.db.metadata, *self._datasets_columns())
|
|
@@ -618,32 +447,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
618
447
|
#
|
|
619
448
|
# Query Starters (These can be overridden by subclasses)
|
|
620
449
|
#
|
|
621
|
-
|
|
622
|
-
@abstractmethod
|
|
623
|
-
def _storages_insert(self) -> "Insert": ...
|
|
624
|
-
|
|
625
|
-
def _storages_select(self, *columns) -> "Select":
|
|
626
|
-
if not columns:
|
|
627
|
-
return self._storages.select()
|
|
628
|
-
return select(*columns)
|
|
629
|
-
|
|
630
|
-
def _storages_update(self) -> "Update":
|
|
631
|
-
return self._storages.update()
|
|
632
|
-
|
|
633
|
-
def _storages_delete(self) -> "Delete":
|
|
634
|
-
return self._storages.delete()
|
|
635
|
-
|
|
636
|
-
@abstractmethod
|
|
637
|
-
def _partials_insert(self) -> "Insert": ...
|
|
638
|
-
|
|
639
|
-
def _partials_select(self, *columns) -> "Select":
|
|
640
|
-
if not columns:
|
|
641
|
-
return self._partials.select()
|
|
642
|
-
return select(*columns)
|
|
643
|
-
|
|
644
|
-
def _partials_update(self) -> "Update":
|
|
645
|
-
return self._partials.update()
|
|
646
|
-
|
|
647
450
|
@abstractmethod
|
|
648
451
|
def _datasets_insert(self) -> "Insert": ...
|
|
649
452
|
|
|
@@ -686,275 +489,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
686
489
|
def _datasets_dependencies_delete(self) -> "Delete":
|
|
687
490
|
return self._datasets_dependencies.delete()
|
|
688
491
|
|
|
689
|
-
#
|
|
690
|
-
# Table Name Internal Functions
|
|
691
|
-
#
|
|
692
|
-
|
|
693
|
-
def _partials_table_name(self, uri: StorageURI) -> str:
|
|
694
|
-
sha = hashlib.sha256(uri.encode("utf-8")).hexdigest()[:12]
|
|
695
|
-
return f"{self.PARTIALS_TABLE_NAME_PREFIX}_{sha}"
|
|
696
|
-
|
|
697
|
-
@property
|
|
698
|
-
def _current_partials_table_name(self) -> Optional[str]:
|
|
699
|
-
if not self.uri:
|
|
700
|
-
return None
|
|
701
|
-
return self._partials_table_name(self.uri)
|
|
702
|
-
|
|
703
|
-
#
|
|
704
|
-
# Storages
|
|
705
|
-
#
|
|
706
|
-
|
|
707
|
-
def create_storage_if_not_registered(self, uri: StorageURI, conn=None) -> None:
|
|
708
|
-
"""Saves new storage if it doesn't exist in database."""
|
|
709
|
-
query = self._storages_insert().values(
|
|
710
|
-
uri=uri,
|
|
711
|
-
status=StorageStatus.CREATED,
|
|
712
|
-
error_message="",
|
|
713
|
-
error_stack="",
|
|
714
|
-
)
|
|
715
|
-
if hasattr(query, "on_conflict_do_nothing"):
|
|
716
|
-
# SQLite and PostgreSQL both support 'on_conflict_do_nothing',
|
|
717
|
-
# but generic SQL does not
|
|
718
|
-
query = query.on_conflict_do_nothing()
|
|
719
|
-
self.db.execute(query, conn=conn)
|
|
720
|
-
|
|
721
|
-
def register_storage_for_indexing(
|
|
722
|
-
self,
|
|
723
|
-
uri: StorageURI,
|
|
724
|
-
force_update: bool = True,
|
|
725
|
-
prefix: str = "",
|
|
726
|
-
) -> tuple[Storage, bool, bool, Optional[int], Optional[str]]:
|
|
727
|
-
"""
|
|
728
|
-
Prepares storage for indexing operation.
|
|
729
|
-
This method should be called before index operation is started
|
|
730
|
-
It returns:
|
|
731
|
-
- storage, prepared for indexing
|
|
732
|
-
- boolean saying if indexing is needed
|
|
733
|
-
- boolean saying if indexing is currently pending (running)
|
|
734
|
-
- partial id
|
|
735
|
-
- partial path
|
|
736
|
-
"""
|
|
737
|
-
# This ensures that all calls to the DB are in a single transaction
|
|
738
|
-
# and commit is automatically called once this function returns
|
|
739
|
-
with self.db.transaction() as conn:
|
|
740
|
-
# Create storage if it doesn't exist
|
|
741
|
-
self.create_storage_if_not_registered(uri, conn=conn)
|
|
742
|
-
storage = self.get_storage(uri, conn=conn)
|
|
743
|
-
|
|
744
|
-
if storage.status == StorageStatus.PENDING:
|
|
745
|
-
return storage, False, True, None, None
|
|
746
|
-
|
|
747
|
-
if storage.is_expired or storage.status == StorageStatus.STALE:
|
|
748
|
-
storage = self.mark_storage_pending(storage, conn=conn)
|
|
749
|
-
return storage, True, False, None, None
|
|
750
|
-
|
|
751
|
-
if (
|
|
752
|
-
storage.status in (StorageStatus.PARTIAL, StorageStatus.COMPLETE)
|
|
753
|
-
and not force_update
|
|
754
|
-
):
|
|
755
|
-
partial_id, partial_path = self.get_valid_partial_id(
|
|
756
|
-
uri, prefix, raise_exc=False
|
|
757
|
-
)
|
|
758
|
-
if partial_id is not None:
|
|
759
|
-
return storage, False, False, partial_id, partial_path
|
|
760
|
-
return storage, True, False, None, None
|
|
761
|
-
|
|
762
|
-
storage = self.mark_storage_pending(storage, conn=conn)
|
|
763
|
-
return storage, True, False, None, None
|
|
764
|
-
|
|
765
|
-
def find_stale_storages(self) -> None:
|
|
766
|
-
"""
|
|
767
|
-
Finds all pending storages for which the last inserted node has happened
|
|
768
|
-
before STALE_MINUTES_LIMIT minutes, and marks it as STALE.
|
|
769
|
-
"""
|
|
770
|
-
s = self._storages
|
|
771
|
-
with self.db.transaction() as conn:
|
|
772
|
-
pending_storages = map(
|
|
773
|
-
self.storage_class._make,
|
|
774
|
-
self.db.execute(
|
|
775
|
-
self._storages_select().where(s.c.status == StorageStatus.PENDING),
|
|
776
|
-
conn=conn,
|
|
777
|
-
),
|
|
778
|
-
)
|
|
779
|
-
for storage in pending_storages:
|
|
780
|
-
if storage.is_stale:
|
|
781
|
-
print(f"Marking storage {storage.uri} as stale")
|
|
782
|
-
self._mark_storage_stale(storage.id, conn=conn)
|
|
783
|
-
|
|
784
|
-
def mark_storage_indexed(
|
|
785
|
-
self,
|
|
786
|
-
uri: StorageURI,
|
|
787
|
-
status: int,
|
|
788
|
-
ttl: int,
|
|
789
|
-
end_time: Optional[datetime] = None,
|
|
790
|
-
prefix: str = "",
|
|
791
|
-
partial_id: int = 0,
|
|
792
|
-
error_message: str = "",
|
|
793
|
-
error_stack: str = "",
|
|
794
|
-
dataset: Optional[DatasetRecord] = None,
|
|
795
|
-
) -> None:
|
|
796
|
-
"""
|
|
797
|
-
Marks storage as indexed.
|
|
798
|
-
This method should be called when index operation is finished.
|
|
799
|
-
"""
|
|
800
|
-
if status == StorageStatus.PARTIAL and not prefix:
|
|
801
|
-
raise AssertionError("Partial indexing requires a prefix")
|
|
802
|
-
|
|
803
|
-
if end_time is None:
|
|
804
|
-
end_time = datetime.now(timezone.utc)
|
|
805
|
-
expires = Storage.get_expiration_time(end_time, ttl)
|
|
806
|
-
|
|
807
|
-
s = self._storages
|
|
808
|
-
with self.db.transaction() as conn:
|
|
809
|
-
self.db.execute(
|
|
810
|
-
self._storages_update()
|
|
811
|
-
.where(s.c.uri == uri)
|
|
812
|
-
.values( # type: ignore [attr-defined]
|
|
813
|
-
timestamp=end_time,
|
|
814
|
-
expires=expires,
|
|
815
|
-
status=status,
|
|
816
|
-
last_inserted_at=end_time,
|
|
817
|
-
error_message=error_message,
|
|
818
|
-
error_stack=error_stack,
|
|
819
|
-
),
|
|
820
|
-
conn=conn,
|
|
821
|
-
)
|
|
822
|
-
|
|
823
|
-
if not self._current_partials_table_name:
|
|
824
|
-
# This only occurs in tests
|
|
825
|
-
return
|
|
826
|
-
|
|
827
|
-
if status in (StorageStatus.PARTIAL, StorageStatus.COMPLETE):
|
|
828
|
-
dir_prefix = posixpath.join(prefix, "")
|
|
829
|
-
self.db.execute(
|
|
830
|
-
self._partials_insert().values(
|
|
831
|
-
path_str=dir_prefix,
|
|
832
|
-
timestamp=end_time,
|
|
833
|
-
expires=expires,
|
|
834
|
-
partial_id=partial_id,
|
|
835
|
-
),
|
|
836
|
-
conn=conn,
|
|
837
|
-
)
|
|
838
|
-
|
|
839
|
-
# update underlying dataset status as well
|
|
840
|
-
if status == StorageStatus.FAILED and dataset:
|
|
841
|
-
self.update_dataset_status(
|
|
842
|
-
dataset,
|
|
843
|
-
DatasetStatus.FAILED,
|
|
844
|
-
dataset.latest_version,
|
|
845
|
-
error_message=error_message,
|
|
846
|
-
error_stack=error_stack,
|
|
847
|
-
conn=conn,
|
|
848
|
-
)
|
|
849
|
-
|
|
850
|
-
if status in (StorageStatus.PARTIAL, StorageStatus.COMPLETE) and dataset:
|
|
851
|
-
self.update_dataset_status(
|
|
852
|
-
dataset, DatasetStatus.COMPLETE, dataset.latest_version, conn=conn
|
|
853
|
-
)
|
|
854
|
-
|
|
855
|
-
def update_last_inserted_at(self, uri: Optional[StorageURI] = None) -> None:
|
|
856
|
-
"""Updates last inserted datetime in bucket with current time"""
|
|
857
|
-
uri = uri or self.uri
|
|
858
|
-
updates = {"last_inserted_at": datetime.now(timezone.utc)}
|
|
859
|
-
s = self._storages
|
|
860
|
-
self.db.execute(
|
|
861
|
-
self._storages_update().where(s.c.uri == uri).values(**updates) # type: ignore [attr-defined]
|
|
862
|
-
)
|
|
863
|
-
|
|
864
|
-
def get_storage(self, uri: StorageURI, conn=None) -> Storage:
|
|
865
|
-
"""
|
|
866
|
-
Gets storage representation from database.
|
|
867
|
-
E.g. if s3 is used as storage this would be s3 bucket data
|
|
868
|
-
"""
|
|
869
|
-
s = self._storages
|
|
870
|
-
result = next(
|
|
871
|
-
self.db.execute(self._storages_select().where(s.c.uri == uri), conn=conn),
|
|
872
|
-
None,
|
|
873
|
-
)
|
|
874
|
-
if not result:
|
|
875
|
-
raise StorageNotFoundError(f"Storage {uri} not found.")
|
|
876
|
-
|
|
877
|
-
return self.storage_class._make(result)
|
|
878
|
-
|
|
879
|
-
def mark_storage_pending(self, storage: Storage, conn=None) -> Storage:
|
|
880
|
-
# Update status to pending and dates
|
|
881
|
-
updates = {
|
|
882
|
-
"status": StorageStatus.PENDING,
|
|
883
|
-
"timestamp": None,
|
|
884
|
-
"expires": None,
|
|
885
|
-
"last_inserted_at": None,
|
|
886
|
-
"started_inserting_at": datetime.now(timezone.utc),
|
|
887
|
-
}
|
|
888
|
-
storage = storage._replace(**updates) # type: ignore [arg-type]
|
|
889
|
-
s = self._storages
|
|
890
|
-
self.db.execute(
|
|
891
|
-
self._storages_update().where(s.c.uri == storage.uri).values(**updates), # type: ignore [attr-defined]
|
|
892
|
-
conn=conn,
|
|
893
|
-
)
|
|
894
|
-
return storage
|
|
895
|
-
|
|
896
|
-
def _mark_storage_stale(self, storage_id: int, conn=None) -> None:
|
|
897
|
-
# Update status to pending and dates
|
|
898
|
-
updates = {"status": StorageStatus.STALE, "timestamp": None, "expires": None}
|
|
899
|
-
s = self._storages
|
|
900
|
-
self.db.execute(
|
|
901
|
-
self._storages.update().where(s.c.id == storage_id).values(**updates), # type: ignore [attr-defined]
|
|
902
|
-
conn=conn,
|
|
903
|
-
)
|
|
904
|
-
|
|
905
|
-
#
|
|
906
|
-
# Partial Indexes
|
|
907
|
-
#
|
|
908
|
-
|
|
909
|
-
def init_partial_id(self, uri: StorageURI) -> None:
|
|
910
|
-
"""Initializes partial id for given storage."""
|
|
911
|
-
if not uri:
|
|
912
|
-
raise ValueError("uri for get_next_partial_id() cannot be empty")
|
|
913
|
-
self.id_generator.init_id(f"partials:{uri}")
|
|
914
|
-
|
|
915
|
-
def get_next_partial_id(self, uri: StorageURI) -> int:
|
|
916
|
-
"""Returns next partial id for given storage."""
|
|
917
|
-
if not uri:
|
|
918
|
-
raise ValueError("uri for get_next_partial_id() cannot be empty")
|
|
919
|
-
return self.id_generator.get_next_id(f"partials:{uri}")
|
|
920
|
-
|
|
921
|
-
def get_valid_partial_id(
|
|
922
|
-
self, uri: StorageURI, prefix: str, raise_exc: bool = True
|
|
923
|
-
) -> tuple[Optional[int], Optional[str]]:
|
|
924
|
-
"""
|
|
925
|
-
Returns valid partial id and it's path, if they exist, for a given storage.
|
|
926
|
-
"""
|
|
927
|
-
# This SQL statement finds all entries that are
|
|
928
|
-
# prefixes of the given prefix, matching this or parent directories
|
|
929
|
-
# that are indexed.
|
|
930
|
-
dir_prefix = posixpath.join(prefix, "")
|
|
931
|
-
p = self._partials_table(uri)
|
|
932
|
-
expire_values = self.db.execute(
|
|
933
|
-
select(p.c.expires, p.c.partial_id, p.c.path_str)
|
|
934
|
-
.where(
|
|
935
|
-
p.c.path_str == func.substr(dir_prefix, 1, func.length(p.c.path_str))
|
|
936
|
-
)
|
|
937
|
-
.order_by(p.c.expires.desc())
|
|
938
|
-
)
|
|
939
|
-
for expires, partial_id, path_str in expire_values:
|
|
940
|
-
if not is_expired(expires):
|
|
941
|
-
return partial_id, path_str
|
|
942
|
-
if raise_exc:
|
|
943
|
-
raise RuntimeError(f"Unable to get valid partial_id: {uri=}, {prefix=}")
|
|
944
|
-
return None, None
|
|
945
|
-
|
|
946
|
-
def get_last_partial_path(self, uri: StorageURI) -> Optional[str]:
|
|
947
|
-
"""Returns last partial path for given storage."""
|
|
948
|
-
p = self._partials_table(uri)
|
|
949
|
-
if not self.db.has_table(p.name):
|
|
950
|
-
raise StorageNotFoundError(f"Storage {uri} partials are not found.")
|
|
951
|
-
last_partial = self.db.execute(
|
|
952
|
-
select(p.c.path_str).order_by(p.c.timestamp.desc()).limit(1)
|
|
953
|
-
)
|
|
954
|
-
for (path_str,) in last_partial:
|
|
955
|
-
return path_str
|
|
956
|
-
return None
|
|
957
|
-
|
|
958
492
|
#
|
|
959
493
|
# Datasets
|
|
960
494
|
#
|
|
@@ -1298,7 +832,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1298
832
|
d = self._datasets
|
|
1299
833
|
dd = self._datasets_dependencies
|
|
1300
834
|
dv = self._datasets_versions
|
|
1301
|
-
s = self._storages
|
|
1302
835
|
|
|
1303
836
|
dataset_version = dataset.get_version(version)
|
|
1304
837
|
|
|
@@ -1307,9 +840,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1307
840
|
query = (
|
|
1308
841
|
self._datasets_dependencies_select(*select_cols)
|
|
1309
842
|
.select_from(
|
|
1310
|
-
dd.join(d, dd.c.dataset_id == d.c.id, isouter=True)
|
|
1311
|
-
|
|
1312
|
-
|
|
843
|
+
dd.join(d, dd.c.dataset_id == d.c.id, isouter=True).join(
|
|
844
|
+
dv, dd.c.dataset_version_id == dv.c.id, isouter=True
|
|
845
|
+
)
|
|
1313
846
|
)
|
|
1314
847
|
.where(
|
|
1315
848
|
(dd.c.source_dataset_id == dataset.id)
|
datachain/data_storage/sqlite.py
CHANGED
|
@@ -29,12 +29,11 @@ from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
|
|
|
29
29
|
from datachain.data_storage.db_engine import DatabaseEngine
|
|
30
30
|
from datachain.data_storage.id_generator import AbstractDBIDGenerator
|
|
31
31
|
from datachain.data_storage.schema import DefaultSchema
|
|
32
|
-
from datachain.dataset import DatasetRecord
|
|
32
|
+
from datachain.dataset import DatasetRecord, StorageURI
|
|
33
33
|
from datachain.error import DataChainError
|
|
34
34
|
from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
|
|
35
35
|
from datachain.sql.sqlite.base import load_usearch_extension
|
|
36
36
|
from datachain.sql.types import SQLType
|
|
37
|
-
from datachain.storage import StorageURI
|
|
38
37
|
from datachain.utils import DataChainDir, batched_it
|
|
39
38
|
|
|
40
39
|
if TYPE_CHECKING:
|
|
@@ -392,14 +391,14 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
392
391
|
def __init__(
|
|
393
392
|
self,
|
|
394
393
|
id_generator: "SQLiteIDGenerator",
|
|
395
|
-
uri: StorageURI =
|
|
396
|
-
partial_id: Optional[int] = None,
|
|
394
|
+
uri: Optional[StorageURI] = None,
|
|
397
395
|
db: Optional["SQLiteDatabaseEngine"] = None,
|
|
398
396
|
db_file: Optional[str] = None,
|
|
399
397
|
in_memory: bool = False,
|
|
400
398
|
):
|
|
399
|
+
uri = uri or StorageURI("")
|
|
401
400
|
self.schema: DefaultSchema = DefaultSchema()
|
|
402
|
-
super().__init__(id_generator, uri
|
|
401
|
+
super().__init__(id_generator, uri)
|
|
403
402
|
|
|
404
403
|
# needed for dropping tables in correct order for tests because of
|
|
405
404
|
# foreign keys
|
|
@@ -417,21 +416,16 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
417
416
|
|
|
418
417
|
def clone(
|
|
419
418
|
self,
|
|
420
|
-
uri: StorageURI =
|
|
421
|
-
partial_id: Optional[int] = None,
|
|
419
|
+
uri: Optional[StorageURI] = None,
|
|
422
420
|
use_new_connection: bool = False,
|
|
423
421
|
) -> "SQLiteMetastore":
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
uri = self.uri
|
|
429
|
-
if self.partial_id:
|
|
430
|
-
partial_id = self.partial_id
|
|
422
|
+
uri = uri or StorageURI("")
|
|
423
|
+
if not uri and self.uri:
|
|
424
|
+
uri = self.uri
|
|
425
|
+
|
|
431
426
|
return SQLiteMetastore(
|
|
432
427
|
self.id_generator.clone(),
|
|
433
428
|
uri=uri,
|
|
434
|
-
partial_id=partial_id,
|
|
435
429
|
db=self.db.clone(),
|
|
436
430
|
)
|
|
437
431
|
|
|
@@ -446,7 +440,6 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
446
440
|
{
|
|
447
441
|
"id_generator_clone_params": self.id_generator.clone_params(),
|
|
448
442
|
"uri": self.uri,
|
|
449
|
-
"partial_id": self.partial_id,
|
|
450
443
|
"db_clone_params": self.db.clone_params(),
|
|
451
444
|
},
|
|
452
445
|
)
|
|
@@ -457,7 +450,6 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
457
450
|
*,
|
|
458
451
|
id_generator_clone_params: tuple[Callable, list, dict[str, Any]],
|
|
459
452
|
uri: StorageURI,
|
|
460
|
-
partial_id: Optional[int],
|
|
461
453
|
db_clone_params: tuple[Callable, list, dict[str, Any]],
|
|
462
454
|
) -> "SQLiteMetastore":
|
|
463
455
|
(
|
|
@@ -469,14 +461,11 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
469
461
|
return cls(
|
|
470
462
|
id_generator=id_generator_class(*id_generator_args, **id_generator_kwargs),
|
|
471
463
|
uri=uri,
|
|
472
|
-
partial_id=partial_id,
|
|
473
464
|
db=db_class(*db_args, **db_kwargs),
|
|
474
465
|
)
|
|
475
466
|
|
|
476
467
|
def _init_tables(self) -> None:
|
|
477
468
|
"""Initialize tables."""
|
|
478
|
-
self.db.create_table(self._storages, if_not_exists=True)
|
|
479
|
-
self.default_table_names.append(self._storages.name)
|
|
480
469
|
self.db.create_table(self._datasets, if_not_exists=True)
|
|
481
470
|
self.default_table_names.append(self._datasets.name)
|
|
482
471
|
self.db.create_table(self._datasets_versions, if_not_exists=True)
|
|
@@ -486,28 +475,11 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
486
475
|
self.db.create_table(self._jobs, if_not_exists=True)
|
|
487
476
|
self.default_table_names.append(self._jobs.name)
|
|
488
477
|
|
|
489
|
-
def init(self, uri: StorageURI) -> None:
|
|
490
|
-
if not uri:
|
|
491
|
-
raise ValueError("uri for init() cannot be empty")
|
|
492
|
-
partials_table = self._partials_table(uri)
|
|
493
|
-
self.db.create_table(partials_table, if_not_exists=True)
|
|
494
|
-
|
|
495
|
-
@classmethod
|
|
496
|
-
def _buckets_columns(cls) -> list["SchemaItem"]:
|
|
497
|
-
"""Buckets (storages) table columns."""
|
|
498
|
-
return [*super()._buckets_columns(), UniqueConstraint("uri")]
|
|
499
|
-
|
|
500
478
|
@classmethod
|
|
501
479
|
def _datasets_columns(cls) -> list["SchemaItem"]:
|
|
502
480
|
"""Datasets table columns."""
|
|
503
481
|
return [*super()._datasets_columns(), UniqueConstraint("name")]
|
|
504
482
|
|
|
505
|
-
def _storages_insert(self) -> "Insert":
|
|
506
|
-
return sqlite.insert(self._storages)
|
|
507
|
-
|
|
508
|
-
def _partials_insert(self) -> "Insert":
|
|
509
|
-
return sqlite.insert(self._partials)
|
|
510
|
-
|
|
511
483
|
def _datasets_insert(self) -> "Insert":
|
|
512
484
|
return sqlite.insert(self._datasets)
|
|
513
485
|
|
|
@@ -526,13 +498,9 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
526
498
|
self._datasets_dependencies.c.id,
|
|
527
499
|
self._datasets_dependencies.c.dataset_id,
|
|
528
500
|
self._datasets_dependencies.c.dataset_version_id,
|
|
529
|
-
self._datasets_dependencies.c.bucket_id,
|
|
530
|
-
self._datasets_dependencies.c.bucket_version,
|
|
531
501
|
self._datasets.c.name,
|
|
532
|
-
self._datasets.c.created_at,
|
|
533
502
|
self._datasets_versions.c.version,
|
|
534
503
|
self._datasets_versions.c.created_at,
|
|
535
|
-
self._storages.c.uri,
|
|
536
504
|
]
|
|
537
505
|
|
|
538
506
|
#
|
|
@@ -19,11 +19,10 @@ from tqdm import tqdm
|
|
|
19
19
|
from datachain.client import Client
|
|
20
20
|
from datachain.data_storage.schema import convert_rows_custom_column_types
|
|
21
21
|
from datachain.data_storage.serializer import Serializable
|
|
22
|
-
from datachain.dataset import DatasetRecord
|
|
22
|
+
from datachain.dataset import DatasetRecord, StorageURI
|
|
23
23
|
from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
|
|
24
24
|
from datachain.sql.functions import path as pathfunc
|
|
25
25
|
from datachain.sql.types import Int, SQLType
|
|
26
|
-
from datachain.storage import StorageURI
|
|
27
26
|
from datachain.utils import sql_escape_like
|
|
28
27
|
|
|
29
28
|
if TYPE_CHECKING:
|
datachain/dataset.py
CHANGED
|
@@ -3,21 +3,17 @@ import json
|
|
|
3
3
|
from dataclasses import dataclass, fields
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import (
|
|
6
|
-
TYPE_CHECKING,
|
|
7
6
|
Any,
|
|
7
|
+
NewType,
|
|
8
8
|
Optional,
|
|
9
9
|
TypeVar,
|
|
10
10
|
Union,
|
|
11
11
|
)
|
|
12
12
|
from urllib.parse import urlparse
|
|
13
13
|
|
|
14
|
-
from datachain.client import Client
|
|
15
14
|
from datachain.error import DatasetVersionNotFoundError
|
|
16
15
|
from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
|
|
17
16
|
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from datachain.storage import StorageURI
|
|
20
|
-
|
|
21
17
|
T = TypeVar("T", bound="DatasetRecord")
|
|
22
18
|
V = TypeVar("V", bound="DatasetVersion")
|
|
23
19
|
DD = TypeVar("DD", bound="DatasetDependency")
|
|
@@ -27,6 +23,13 @@ QUERY_DATASET_PREFIX = "ds_query_"
|
|
|
27
23
|
LISTING_PREFIX = "lst__"
|
|
28
24
|
|
|
29
25
|
|
|
26
|
+
# StorageURI represents a normalised URI to a valid storage location (full bucket or
|
|
27
|
+
# absolute local path).
|
|
28
|
+
# Valid examples: s3://foo, file:///var/data
|
|
29
|
+
# Invalid examples: s3://foo/, s3://foo/bar, file://~
|
|
30
|
+
StorageURI = NewType("StorageURI", str)
|
|
31
|
+
|
|
32
|
+
|
|
30
33
|
def parse_dataset_uri(uri: str) -> tuple[str, Optional[int]]:
|
|
31
34
|
"""
|
|
32
35
|
Parse dataser uri to extract name and version out of it (if version is defined)
|
|
@@ -94,14 +97,11 @@ class DatasetDependency:
|
|
|
94
97
|
id: int,
|
|
95
98
|
dataset_id: Optional[int],
|
|
96
99
|
dataset_version_id: Optional[int],
|
|
97
|
-
bucket_id: Optional[int],
|
|
98
|
-
bucket_version: Optional[str],
|
|
99
100
|
dataset_name: Optional[str],
|
|
100
|
-
dataset_created_at: Optional[datetime],
|
|
101
101
|
dataset_version: Optional[int],
|
|
102
102
|
dataset_version_created_at: Optional[datetime],
|
|
103
|
-
bucket_uri: Optional["StorageURI"],
|
|
104
103
|
) -> Optional["DatasetDependency"]:
|
|
104
|
+
from datachain.client import Client
|
|
105
105
|
from datachain.lib.listing import is_listing_dataset, listing_uri_from_name
|
|
106
106
|
|
|
107
107
|
if not dataset_id:
|
|
@@ -124,7 +124,7 @@ class DatasetDependency:
|
|
|
124
124
|
if dataset_version
|
|
125
125
|
else None
|
|
126
126
|
),
|
|
127
|
-
dataset_version_created_at
|
|
127
|
+
dataset_version_created_at, # type: ignore[arg-type]
|
|
128
128
|
[],
|
|
129
129
|
)
|
|
130
130
|
|
|
@@ -448,6 +448,8 @@ class DatasetRecord:
|
|
|
448
448
|
For bucket listing we implicitly create underlying dataset to hold data. This
|
|
449
449
|
method is checking if this is one of those datasets.
|
|
450
450
|
"""
|
|
451
|
+
from datachain.client import Client
|
|
452
|
+
|
|
451
453
|
# TODO refactor and maybe remove method in
|
|
452
454
|
# https://github.com/iterative/datachain/issues/318
|
|
453
455
|
return Client.is_data_source_uri(self.name) or self.name.startswith(
|
datachain/error.py
CHANGED
datachain/lib/arrow.py
CHANGED
|
@@ -175,7 +175,7 @@ def arrow_type_mapper(col_type: pa.DataType, column: str = "") -> type: # noqa:
|
|
|
175
175
|
return dict
|
|
176
176
|
if isinstance(col_type, pa.lib.DictionaryType):
|
|
177
177
|
return arrow_type_mapper(col_type.value_type) # type: ignore[return-value]
|
|
178
|
-
raise TypeError(f"{col_type!r} datatypes not supported")
|
|
178
|
+
raise TypeError(f"{col_type!r} datatypes not supported, column: {column}")
|
|
179
179
|
|
|
180
180
|
|
|
181
181
|
def _nrows_file(file: File, nrows: int) -> str:
|
datachain/node.py
CHANGED
|
@@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
3
3
|
|
|
4
4
|
import attrs
|
|
5
5
|
|
|
6
|
+
from datachain.dataset import StorageURI
|
|
6
7
|
from datachain.lib.file import File
|
|
7
|
-
from datachain.storage import StorageURI
|
|
8
8
|
from datachain.utils import TIME_ZERO, time_to_str
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
@@ -2,45 +2,44 @@ datachain/__init__.py,sha256=OGzc8xZWtwqxiiutjU4AxCRPY0lrX_csgERiTrq4G0o,908
|
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
5
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=Wl-xMpTRgrkg4drX5I_QxAB1IATyULHCXOdx_wfoLVg,33529
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
8
|
-
datachain/dataset.py,sha256=
|
|
9
|
-
datachain/error.py,sha256=
|
|
8
|
+
datachain/dataset.py,sha256=lLUbUbJP1TYL9Obkc0f2IDziGcDylZge9ORQjK-WtXs,14717
|
|
9
|
+
datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
11
|
datachain/listing.py,sha256=AV23WZq-k6e2zeeNBhVQP1-2PrwNCYidO0HBDKzpVaA,7152
|
|
12
|
-
datachain/node.py,sha256=
|
|
12
|
+
datachain/node.py,sha256=i7_jC8VcW6W5VYkDszAOu0H-rNBuqXB4UnLEh4wFzjc,5195
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=F-73-h19HHNGtHFBGKk7p3mc0ALm4a9zGnzhtuUjnp4,1107
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
15
15
|
datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
|
|
16
16
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
17
|
datachain/studio.py,sha256=d-jUsYpfI1LEv3g8KU-lLchVgb9L0TXvlHakieFud_E,3788
|
|
19
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
20
19
|
datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
|
|
21
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
22
|
-
datachain/catalog/catalog.py,sha256=
|
|
21
|
+
datachain/catalog/catalog.py,sha256=qFlRrR01_9h1MjK6DEgVSgIwbtZEGV_SdG_E5qUsHmM,57352
|
|
23
22
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
24
23
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
25
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
26
25
|
datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
|
|
27
26
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
28
|
-
datachain/client/fsspec.py,sha256=
|
|
27
|
+
datachain/client/fsspec.py,sha256=C6C5AO6ndkgcoUxCRN9_8fUzqX2cRWJWG6FL6oD9X_Q,12708
|
|
29
28
|
datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
|
|
30
|
-
datachain/client/hf.py,sha256=
|
|
31
|
-
datachain/client/local.py,sha256=
|
|
29
|
+
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
30
|
+
datachain/client/local.py,sha256=vwbgCwZ7IqY2voj2l7tLJjgov7Dp--fEUvUwUBsMbls,4457
|
|
32
31
|
datachain/client/s3.py,sha256=CVHBUZ1Ic2Q3370nl-Bbe69phuWjFlrVv9dTJKBpRT0,6019
|
|
33
32
|
datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
|
|
34
33
|
datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
|
|
35
34
|
datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
|
|
36
35
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
37
|
-
datachain/data_storage/metastore.py,sha256
|
|
36
|
+
datachain/data_storage/metastore.py,sha256=-TJCqG70VofSVOh2yEez4dwjHS3eQL8p7d9uO3WTVwM,35878
|
|
38
37
|
datachain/data_storage/schema.py,sha256=CiRXrDYp5ZZopSyUgZ7MT2ml_6YvqSTYXdybatcbX9M,9849
|
|
39
38
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
40
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
41
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/sqlite.py,sha256=wb8xlMJYYyt59wft0psJj587d-AwpNThzIqspVcKnRI,27388
|
|
40
|
+
datachain/data_storage/warehouse.py,sha256=xwMaR4jBpR13vjG3zrhphH4z2_CFLNj0KPF0LJCXCJ8,30727
|
|
42
41
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
datachain/lib/arrow.py,sha256
|
|
42
|
+
datachain/lib/arrow.py,sha256=-hu9tic79a01SY2UBqkA3U6wUr6tnE3T3q5q_BnO93A,9156
|
|
44
43
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
45
44
|
datachain/lib/data_model.py,sha256=dau4AlZBhOFvF7pEKMeqCeRkcFFg5KFvTBWW_2CdH5g,2371
|
|
46
45
|
datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
|
|
@@ -101,9 +100,9 @@ datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,
|
|
|
101
100
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
102
101
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
103
102
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
104
|
-
datachain-0.6.
|
|
105
|
-
datachain-0.6.
|
|
106
|
-
datachain-0.6.
|
|
107
|
-
datachain-0.6.
|
|
108
|
-
datachain-0.6.
|
|
109
|
-
datachain-0.6.
|
|
103
|
+
datachain-0.6.5.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
104
|
+
datachain-0.6.5.dist-info/METADATA,sha256=eSh62q8OKalsO_IHYb0M2lT4y0x5z84uX1WVt7_dZlM,17188
|
|
105
|
+
datachain-0.6.5.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
106
|
+
datachain-0.6.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
107
|
+
datachain-0.6.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
108
|
+
datachain-0.6.5.dist-info/RECORD,,
|
datachain/storage.py
DELETED
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
import posixpath
|
|
2
|
-
from abc import ABC, abstractmethod
|
|
3
|
-
from datetime import datetime, timedelta, timezone
|
|
4
|
-
from functools import cached_property
|
|
5
|
-
from typing import NamedTuple, NewType, Optional, Union
|
|
6
|
-
from urllib.parse import urlparse
|
|
7
|
-
|
|
8
|
-
from datachain.utils import is_expired, time_to_local_str, time_to_str
|
|
9
|
-
|
|
10
|
-
STALE_MINUTES_LIMIT = 15
|
|
11
|
-
|
|
12
|
-
# StorageURI represents a normalised URI to a valid storage location (full bucket or
|
|
13
|
-
# absolute local path).
|
|
14
|
-
# Valid examples: s3://foo, file:///var/data
|
|
15
|
-
# Invalid examples: s3://foo/, s3://foo/bar, file://~
|
|
16
|
-
StorageURI = NewType("StorageURI", str)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class StorageStatus:
|
|
20
|
-
CREATED = 1
|
|
21
|
-
PENDING = 2
|
|
22
|
-
FAILED = 3
|
|
23
|
-
COMPLETE = 4
|
|
24
|
-
PARTIAL = 5
|
|
25
|
-
STALE = 6
|
|
26
|
-
INDEXING_SCHEDULED = 7
|
|
27
|
-
DELETE_SCHEDULED = 8
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class AbstractStorage(ABC):
|
|
31
|
-
@property
|
|
32
|
-
@abstractmethod
|
|
33
|
-
def uri(self) -> StorageURI: ...
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
@abstractmethod
|
|
37
|
-
def timestamp(self) -> Optional[Union[datetime, str]]: ...
|
|
38
|
-
|
|
39
|
-
@property
|
|
40
|
-
@abstractmethod
|
|
41
|
-
def expires(self) -> Optional[Union[datetime, str]]: ...
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
@abstractmethod
|
|
45
|
-
def status(self) -> int: ...
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def type(self):
|
|
49
|
-
return self._parsed_uri.scheme
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def name(self):
|
|
53
|
-
return self._parsed_uri.netloc
|
|
54
|
-
|
|
55
|
-
@cached_property
|
|
56
|
-
def _parsed_uri(self):
|
|
57
|
-
return urlparse(self.uri)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class StorageRecord(NamedTuple):
|
|
61
|
-
id: int
|
|
62
|
-
uri: StorageURI
|
|
63
|
-
timestamp: Optional[Union[datetime, str]] = None
|
|
64
|
-
expires: Optional[Union[datetime, str]] = None
|
|
65
|
-
started_inserting_at: Optional[Union[datetime, str]] = None
|
|
66
|
-
last_inserted_at: Optional[Union[datetime, str]] = None
|
|
67
|
-
status: int = StorageStatus.CREATED
|
|
68
|
-
error_message: str = ""
|
|
69
|
-
error_stack: str = ""
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class Storage(StorageRecord, AbstractStorage):
|
|
73
|
-
@property
|
|
74
|
-
def is_indexed(self) -> bool:
|
|
75
|
-
return self.status == StorageStatus.COMPLETE
|
|
76
|
-
|
|
77
|
-
@property
|
|
78
|
-
def is_expired(self) -> bool:
|
|
79
|
-
return is_expired(self.expires)
|
|
80
|
-
|
|
81
|
-
@property
|
|
82
|
-
def is_pending(self) -> bool:
|
|
83
|
-
return self.status == StorageStatus.PENDING
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def is_stale(self) -> bool:
|
|
87
|
-
limit = datetime.now(timezone.utc) - timedelta(minutes=STALE_MINUTES_LIMIT)
|
|
88
|
-
date_to_check = self.last_inserted_at or self.started_inserting_at
|
|
89
|
-
|
|
90
|
-
return self.is_pending and date_to_check < limit # type: ignore [operator]
|
|
91
|
-
|
|
92
|
-
@property
|
|
93
|
-
def need_indexing(self) -> bool:
|
|
94
|
-
return self.is_expired or not self.is_indexed
|
|
95
|
-
|
|
96
|
-
@property
|
|
97
|
-
def timestamp_str(self) -> Optional[str]:
|
|
98
|
-
if not self.timestamp:
|
|
99
|
-
return None
|
|
100
|
-
return time_to_str(self.timestamp)
|
|
101
|
-
|
|
102
|
-
@property
|
|
103
|
-
def timestamp_to_local(self) -> Optional[str]:
|
|
104
|
-
if not self.timestamp:
|
|
105
|
-
return None
|
|
106
|
-
return time_to_local_str(self.timestamp)
|
|
107
|
-
|
|
108
|
-
@property
|
|
109
|
-
def expires_to_local(self) -> Optional[str]:
|
|
110
|
-
if not self.expires:
|
|
111
|
-
return None
|
|
112
|
-
return time_to_local_str(self.expires)
|
|
113
|
-
|
|
114
|
-
@staticmethod
|
|
115
|
-
def get_expiration_time(timestamp: datetime, ttl: int):
|
|
116
|
-
if ttl >= 0:
|
|
117
|
-
try:
|
|
118
|
-
return timestamp + timedelta(seconds=ttl)
|
|
119
|
-
except OverflowError:
|
|
120
|
-
return datetime.max
|
|
121
|
-
else:
|
|
122
|
-
return datetime.max
|
|
123
|
-
|
|
124
|
-
@staticmethod
|
|
125
|
-
def dataset_name(uri: str, partial_path: str) -> str:
|
|
126
|
-
return f"{uri}/{partial_path}"
|
|
127
|
-
|
|
128
|
-
def to_dict(self, file_path=""):
|
|
129
|
-
uri = self.uri
|
|
130
|
-
if file_path:
|
|
131
|
-
uri = posixpath.join(uri, *file_path.rstrip("/").split("/"))
|
|
132
|
-
return {
|
|
133
|
-
"uri": uri,
|
|
134
|
-
"timestamp": time_to_str(self.timestamp) if self.timestamp else None,
|
|
135
|
-
"expires": time_to_str(self.expires) if self.expires else None,
|
|
136
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|