esgpull 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgpull/__init__.py +12 -0
- esgpull/auth.py +181 -0
- esgpull/cli/__init__.py +73 -0
- esgpull/cli/add.py +103 -0
- esgpull/cli/autoremove.py +38 -0
- esgpull/cli/config.py +116 -0
- esgpull/cli/convert.py +285 -0
- esgpull/cli/decorators.py +342 -0
- esgpull/cli/download.py +74 -0
- esgpull/cli/facet.py +23 -0
- esgpull/cli/get.py +28 -0
- esgpull/cli/install.py +85 -0
- esgpull/cli/link.py +105 -0
- esgpull/cli/login.py +56 -0
- esgpull/cli/remove.py +73 -0
- esgpull/cli/retry.py +43 -0
- esgpull/cli/search.py +201 -0
- esgpull/cli/self.py +238 -0
- esgpull/cli/show.py +66 -0
- esgpull/cli/status.py +67 -0
- esgpull/cli/track.py +87 -0
- esgpull/cli/update.py +184 -0
- esgpull/cli/utils.py +247 -0
- esgpull/config.py +410 -0
- esgpull/constants.py +56 -0
- esgpull/context.py +724 -0
- esgpull/database.py +161 -0
- esgpull/download.py +162 -0
- esgpull/esgpull.py +447 -0
- esgpull/exceptions.py +167 -0
- esgpull/fs.py +253 -0
- esgpull/graph.py +460 -0
- esgpull/install_config.py +185 -0
- esgpull/migrations/README +1 -0
- esgpull/migrations/env.py +82 -0
- esgpull/migrations/script.py.mako +24 -0
- esgpull/migrations/versions/0.3.0_update_tables.py +170 -0
- esgpull/migrations/versions/0.3.1_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.2_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.3_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.4_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.5_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.6_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.7_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.8_update_tables.py +26 -0
- esgpull/migrations/versions/0.4.0_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.0_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.1_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.2_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.3_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.4_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.5_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.0_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.1_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.2_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.3_update_tables.py +25 -0
- esgpull/models/__init__.py +31 -0
- esgpull/models/base.py +50 -0
- esgpull/models/dataset.py +34 -0
- esgpull/models/facet.py +18 -0
- esgpull/models/file.py +65 -0
- esgpull/models/options.py +164 -0
- esgpull/models/query.py +481 -0
- esgpull/models/selection.py +201 -0
- esgpull/models/sql.py +258 -0
- esgpull/models/synda_file.py +85 -0
- esgpull/models/tag.py +19 -0
- esgpull/models/utils.py +54 -0
- esgpull/presets.py +13 -0
- esgpull/processor.py +172 -0
- esgpull/py.typed +0 -0
- esgpull/result.py +53 -0
- esgpull/tui.py +346 -0
- esgpull/utils.py +54 -0
- esgpull/version.py +1 -0
- esgpull-0.6.3.dist-info/METADATA +110 -0
- esgpull-0.6.3.dist-info/RECORD +80 -0
- esgpull-0.6.3.dist-info/WHEEL +4 -0
- esgpull-0.6.3.dist-info/entry_points.txt +3 -0
- esgpull-0.6.3.dist-info/licenses/LICENSE +28 -0
esgpull/models/query.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator, MutableMapping, Sequence
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
|
|
6
|
+
import sqlalchemy as sa
|
|
7
|
+
from rich.console import Console, ConsoleOptions
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
from rich.text import Text
|
|
10
|
+
from rich.tree import Tree
|
|
11
|
+
from sqlalchemy.orm import Mapped, mapped_column, object_session, relationship
|
|
12
|
+
from typing_extensions import NotRequired, TypedDict
|
|
13
|
+
|
|
14
|
+
from esgpull.exceptions import UntrackableQuery
|
|
15
|
+
from esgpull.models.base import Base, Sha
|
|
16
|
+
from esgpull.models.file import FileDict, FileStatus
|
|
17
|
+
from esgpull.models.options import Options
|
|
18
|
+
from esgpull.models.selection import FacetValues, Selection
|
|
19
|
+
from esgpull.models.tag import Tag
|
|
20
|
+
from esgpull.models.utils import (
|
|
21
|
+
find_int,
|
|
22
|
+
find_str,
|
|
23
|
+
get_local_path,
|
|
24
|
+
rich_measure_impl,
|
|
25
|
+
short_sha,
|
|
26
|
+
)
|
|
27
|
+
from esgpull.utils import format_size
|
|
28
|
+
|
|
29
|
+
query_file_proxy = sa.Table(
|
|
30
|
+
"query_file",
|
|
31
|
+
Base.metadata,
|
|
32
|
+
sa.Column("query_sha", Sha, sa.ForeignKey("query.sha"), primary_key=True),
|
|
33
|
+
sa.Column("file_sha", Sha, sa.ForeignKey("file.sha"), primary_key=True),
|
|
34
|
+
)
|
|
35
|
+
query_tag_proxy = sa.Table(
|
|
36
|
+
"query_tag",
|
|
37
|
+
Base.metadata,
|
|
38
|
+
sa.Column("query_sha", Sha, sa.ForeignKey("query.sha"), primary_key=True),
|
|
39
|
+
sa.Column("tag_sha", Sha, sa.ForeignKey("tag.sha"), primary_key=True),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class File(Base):
|
|
44
|
+
__tablename__ = "file"
|
|
45
|
+
|
|
46
|
+
file_id: Mapped[str] = mapped_column(sa.String(255), unique=True)
|
|
47
|
+
dataset_id: Mapped[str] = mapped_column(sa.String(255))
|
|
48
|
+
master_id: Mapped[str] = mapped_column(sa.String(255))
|
|
49
|
+
url: Mapped[str] = mapped_column(sa.String(255))
|
|
50
|
+
version: Mapped[str] = mapped_column(sa.String(16))
|
|
51
|
+
filename: Mapped[str] = mapped_column(sa.String(255))
|
|
52
|
+
local_path: Mapped[str] = mapped_column(sa.String(255))
|
|
53
|
+
data_node: Mapped[str] = mapped_column(sa.String(40))
|
|
54
|
+
checksum: Mapped[str] = mapped_column(sa.String(64))
|
|
55
|
+
checksum_type: Mapped[str] = mapped_column(sa.String(16))
|
|
56
|
+
size: Mapped[int] = mapped_column(sa.BigInteger)
|
|
57
|
+
status: Mapped[FileStatus] = mapped_column(
|
|
58
|
+
sa.Enum(FileStatus), default=FileStatus.New
|
|
59
|
+
)
|
|
60
|
+
queries: Mapped[list[Query]] = relationship(
|
|
61
|
+
secondary=query_file_proxy,
|
|
62
|
+
default_factory=list,
|
|
63
|
+
back_populates="files",
|
|
64
|
+
repr=False,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def _as_bytes(self) -> bytes:
|
|
68
|
+
self_tuple = (self.file_id, self.checksum)
|
|
69
|
+
return str(self_tuple).encode()
|
|
70
|
+
|
|
71
|
+
def compute_sha(self) -> None:
|
|
72
|
+
Base.compute_sha(self)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def fromdict(cls, source: FileDict) -> File:
|
|
76
|
+
result = cls(
|
|
77
|
+
file_id=source["file_id"],
|
|
78
|
+
dataset_id=source["dataset_id"],
|
|
79
|
+
master_id=source["master_id"],
|
|
80
|
+
url=source["url"],
|
|
81
|
+
version=source["version"],
|
|
82
|
+
filename=source["filename"],
|
|
83
|
+
local_path=source["local_path"],
|
|
84
|
+
data_node=source["data_node"],
|
|
85
|
+
checksum=source["checksum"],
|
|
86
|
+
checksum_type=source["checksum_type"],
|
|
87
|
+
size=source["size"],
|
|
88
|
+
)
|
|
89
|
+
if "status" in source:
|
|
90
|
+
result.status = FileStatus(source.get("source"))
|
|
91
|
+
return result
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def serialize(cls, source: dict) -> File:
|
|
95
|
+
dataset_id = find_str(source["dataset_id"]).partition("|")[0]
|
|
96
|
+
filename = find_str(source["title"])
|
|
97
|
+
url = find_str(source["url"]).partition("|")[0]
|
|
98
|
+
url = url.replace("http://", "https://") # TODO: is this always true ?
|
|
99
|
+
data_node = find_str(source["data_node"])
|
|
100
|
+
checksum = find_str(source["checksum"])
|
|
101
|
+
checksum_type = find_str(source["checksum_type"])
|
|
102
|
+
size = find_int(source["size"])
|
|
103
|
+
file_id = ".".join([dataset_id, filename])
|
|
104
|
+
dataset_master, version = dataset_id.rsplit(".", 1) # remove version
|
|
105
|
+
master_id = ".".join([dataset_master, filename])
|
|
106
|
+
local_path = get_local_path(source, version)
|
|
107
|
+
result = cls.fromdict(
|
|
108
|
+
{
|
|
109
|
+
"file_id": file_id,
|
|
110
|
+
"dataset_id": dataset_id,
|
|
111
|
+
"master_id": master_id,
|
|
112
|
+
"url": url,
|
|
113
|
+
"version": version,
|
|
114
|
+
"filename": filename,
|
|
115
|
+
"local_path": local_path,
|
|
116
|
+
"data_node": data_node,
|
|
117
|
+
"checksum": checksum,
|
|
118
|
+
"checksum_type": checksum_type,
|
|
119
|
+
"size": size,
|
|
120
|
+
}
|
|
121
|
+
)
|
|
122
|
+
result.compute_sha()
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
def asdict(self) -> FileDict:
|
|
126
|
+
return FileDict(
|
|
127
|
+
file_id=self.file_id,
|
|
128
|
+
dataset_id=self.dataset_id,
|
|
129
|
+
master_id=self.master_id,
|
|
130
|
+
url=self.url,
|
|
131
|
+
version=self.version,
|
|
132
|
+
filename=self.filename,
|
|
133
|
+
local_path=self.local_path,
|
|
134
|
+
data_node=self.data_node,
|
|
135
|
+
checksum=self.checksum,
|
|
136
|
+
checksum_type=self.checksum_type,
|
|
137
|
+
size=self.size,
|
|
138
|
+
status=self.status.name,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def clone(self, compute_sha: bool = True) -> File:
|
|
142
|
+
result = File.fromdict(self.asdict())
|
|
143
|
+
if compute_sha:
|
|
144
|
+
result.compute_sha()
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class QueryDict(TypedDict):
|
|
149
|
+
tags: NotRequired[str | list[str]]
|
|
150
|
+
tracked: NotRequired[Literal[True]]
|
|
151
|
+
require: NotRequired[str]
|
|
152
|
+
options: NotRequired[MutableMapping[str, bool | None]]
|
|
153
|
+
selection: NotRequired[MutableMapping[str, FacetValues]]
|
|
154
|
+
files: NotRequired[list[FileDict]]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class Query(Base):
|
|
158
|
+
__tablename__ = "query"
|
|
159
|
+
|
|
160
|
+
tags: Mapped[list[Tag]] = relationship(
|
|
161
|
+
secondary=query_tag_proxy,
|
|
162
|
+
default_factory=list,
|
|
163
|
+
)
|
|
164
|
+
tracked: Mapped[bool] = mapped_column(default=False)
|
|
165
|
+
require: Mapped[str | None] = mapped_column(Sha, default=None)
|
|
166
|
+
options_sha: Mapped[str] = mapped_column(
|
|
167
|
+
Sha,
|
|
168
|
+
sa.ForeignKey("options.sha"),
|
|
169
|
+
init=False,
|
|
170
|
+
)
|
|
171
|
+
options: Mapped[Options] = relationship(default_factory=Options)
|
|
172
|
+
selection_sha: Mapped[int] = mapped_column(
|
|
173
|
+
Sha,
|
|
174
|
+
sa.ForeignKey("selection.sha"),
|
|
175
|
+
init=False,
|
|
176
|
+
)
|
|
177
|
+
selection: Mapped[Selection] = relationship(default_factory=Selection)
|
|
178
|
+
files: Mapped[list[File]] = relationship(
|
|
179
|
+
secondary=query_file_proxy,
|
|
180
|
+
default_factory=list,
|
|
181
|
+
back_populates="queries",
|
|
182
|
+
repr=False,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
*,
|
|
188
|
+
tracked: bool = False,
|
|
189
|
+
require: str | None = None,
|
|
190
|
+
tags: Sequence[Tag | str] | Tag | str | None = None,
|
|
191
|
+
options: Options | MutableMapping[str, bool | None] | None = None,
|
|
192
|
+
selection: Selection | MutableMapping[str, FacetValues] | None = None,
|
|
193
|
+
files: list[FileDict] | None = None,
|
|
194
|
+
) -> None:
|
|
195
|
+
self.tracked = tracked
|
|
196
|
+
self.require = require
|
|
197
|
+
self.tags = []
|
|
198
|
+
if tags is not None:
|
|
199
|
+
if isinstance(tags, (str, Tag)):
|
|
200
|
+
tags = [tags]
|
|
201
|
+
for tag in tags:
|
|
202
|
+
if isinstance(tag, str):
|
|
203
|
+
self.tags.append(Tag(name=tag))
|
|
204
|
+
elif isinstance(tag, Tag):
|
|
205
|
+
self.tags.append(tag)
|
|
206
|
+
if selection is None:
|
|
207
|
+
self.selection = Selection()
|
|
208
|
+
elif isinstance(selection, dict):
|
|
209
|
+
self.selection = Selection(**selection)
|
|
210
|
+
elif isinstance(selection, Selection):
|
|
211
|
+
self.selection = selection
|
|
212
|
+
if options is None:
|
|
213
|
+
self.options = Options()
|
|
214
|
+
elif isinstance(options, dict):
|
|
215
|
+
self.options = Options(**options)
|
|
216
|
+
elif isinstance(options, Options):
|
|
217
|
+
self.options = options
|
|
218
|
+
self.files = []
|
|
219
|
+
if files is not None:
|
|
220
|
+
for file in files:
|
|
221
|
+
self.files.append(File.fromdict(file))
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def has_files(self) -> bool:
|
|
225
|
+
stmt: sa.Select[tuple[int]] = (
|
|
226
|
+
sa.select(sa.func.count("*"))
|
|
227
|
+
.join_from(query_file_proxy, File)
|
|
228
|
+
.where(query_file_proxy.c.query_sha == self.sha)
|
|
229
|
+
)
|
|
230
|
+
session = object_session(self)
|
|
231
|
+
if session is None:
|
|
232
|
+
return bool(self.files)
|
|
233
|
+
else:
|
|
234
|
+
nb_files = session.scalar(stmt)
|
|
235
|
+
return nb_files is not None and nb_files > 0
|
|
236
|
+
|
|
237
|
+
def files_count_size(self, *status: FileStatus) -> tuple[int, int]:
|
|
238
|
+
stmt: sa.Select[tuple[int, int | None]] = (
|
|
239
|
+
sa.select(sa.func.count("*"), sa.func.sum(File.size))
|
|
240
|
+
.join_from(query_file_proxy, File)
|
|
241
|
+
.where(query_file_proxy.c.query_sha == self.sha)
|
|
242
|
+
)
|
|
243
|
+
session = object_session(self)
|
|
244
|
+
if session is None:
|
|
245
|
+
if status:
|
|
246
|
+
files = [file for file in self.files if file.status in status]
|
|
247
|
+
else:
|
|
248
|
+
files = [file for file in self.files]
|
|
249
|
+
count: int = len(files)
|
|
250
|
+
size: int | None = sum([file.size for file in files])
|
|
251
|
+
else:
|
|
252
|
+
if status:
|
|
253
|
+
stmt = stmt.where(File.status.in_(status))
|
|
254
|
+
count, size = session.execute(stmt).all()[0]
|
|
255
|
+
return count, size or 0
|
|
256
|
+
|
|
257
|
+
def _as_bytes(self) -> bytes:
|
|
258
|
+
self_tuple = (self.require, self.options.sha, self.selection.sha)
|
|
259
|
+
return str(self_tuple).encode()
|
|
260
|
+
|
|
261
|
+
def compute_sha(self) -> None:
|
|
262
|
+
for tag in self.tags:
|
|
263
|
+
tag.compute_sha()
|
|
264
|
+
self.options.compute_sha()
|
|
265
|
+
self.selection.compute_sha()
|
|
266
|
+
super().compute_sha()
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def tag_name(self) -> str | None:
|
|
270
|
+
if len(self.tags) == 1:
|
|
271
|
+
return self.tags[0].name
|
|
272
|
+
else:
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def name(self) -> str:
|
|
277
|
+
# TODO: make these 2 lines useless
|
|
278
|
+
if self.sha is None:
|
|
279
|
+
self.compute_sha()
|
|
280
|
+
elif ":" in self.sha:
|
|
281
|
+
return self.sha.split(":")[0]
|
|
282
|
+
return short_sha(self.sha)
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def rich_name(self) -> str:
|
|
286
|
+
return f"[b green]{self.name}[/]"
|
|
287
|
+
|
|
288
|
+
def items(self, include_name: bool = False) -> Iterator[tuple[str, Any]]:
|
|
289
|
+
if include_name:
|
|
290
|
+
yield "name", self.name
|
|
291
|
+
if self.tags:
|
|
292
|
+
yield "tags", [tag.name for tag in self.tags]
|
|
293
|
+
if self.tracked:
|
|
294
|
+
yield "tracked", self.tracked
|
|
295
|
+
if self.require is not None:
|
|
296
|
+
yield "require", short_sha(self.require)
|
|
297
|
+
if self.options:
|
|
298
|
+
yield "options", self.options
|
|
299
|
+
if self.selection:
|
|
300
|
+
yield "selection", self.selection
|
|
301
|
+
|
|
302
|
+
def asdict(self) -> QueryDict:
|
|
303
|
+
result: QueryDict = {}
|
|
304
|
+
if len(self.tags) > 1:
|
|
305
|
+
result["tags"] = [tag.name for tag in self.tags]
|
|
306
|
+
elif len(self.tags) == 1:
|
|
307
|
+
result["tags"] = self.tags[0].name
|
|
308
|
+
if self.tracked:
|
|
309
|
+
result["tracked"] = self.tracked
|
|
310
|
+
if self.require is not None:
|
|
311
|
+
result["require"] = self.require
|
|
312
|
+
if self.options:
|
|
313
|
+
result["options"] = self.options.asdict()
|
|
314
|
+
if self.selection:
|
|
315
|
+
result["selection"] = self.selection.asdict()
|
|
316
|
+
return result
|
|
317
|
+
|
|
318
|
+
def clone(self, compute_sha: bool = True) -> Query:
|
|
319
|
+
instance = Query(**self.asdict())
|
|
320
|
+
instance.files = list(self.files)
|
|
321
|
+
if self.sha == "LEGACY":
|
|
322
|
+
instance.sha = "LEGACY"
|
|
323
|
+
elif compute_sha:
|
|
324
|
+
instance.compute_sha()
|
|
325
|
+
else:
|
|
326
|
+
instance.sha = self.sha
|
|
327
|
+
return instance
|
|
328
|
+
|
|
329
|
+
def get_tag(self, name: str) -> Tag | None:
|
|
330
|
+
result: Tag | None = None
|
|
331
|
+
for tag in self.tags:
|
|
332
|
+
if tag.name == name:
|
|
333
|
+
result = tag
|
|
334
|
+
break
|
|
335
|
+
return result
|
|
336
|
+
|
|
337
|
+
def add_tag(
|
|
338
|
+
self,
|
|
339
|
+
name: str,
|
|
340
|
+
description: str | None = None,
|
|
341
|
+
compute_sha: bool = True,
|
|
342
|
+
) -> None:
|
|
343
|
+
if self.get_tag(name) is not None:
|
|
344
|
+
raise ValueError(f"Tag {name!r} already exists.")
|
|
345
|
+
tag = Tag(name=name, description=description)
|
|
346
|
+
if compute_sha:
|
|
347
|
+
tag.compute_sha()
|
|
348
|
+
self.tags.append(tag)
|
|
349
|
+
|
|
350
|
+
def update_tag(self, name: str, description: str | None) -> None:
|
|
351
|
+
tag = self.get_tag(name)
|
|
352
|
+
if tag is None:
|
|
353
|
+
raise ValueError(f"Tag {name!r} does not exist.")
|
|
354
|
+
else:
|
|
355
|
+
tag.description = description
|
|
356
|
+
|
|
357
|
+
def remove_tag(self, name: str) -> bool:
|
|
358
|
+
tag = self.get_tag(name)
|
|
359
|
+
if tag is not None:
|
|
360
|
+
self.tags.remove(tag)
|
|
361
|
+
return tag is not None
|
|
362
|
+
|
|
363
|
+
def no_require(self) -> Query:
|
|
364
|
+
cl = self.clone(compute_sha=False)
|
|
365
|
+
cl._rich_no_require = True # type: ignore [attr-defined]
|
|
366
|
+
return cl
|
|
367
|
+
|
|
368
|
+
def __lshift__(self, child: Query) -> Query:
|
|
369
|
+
result = self.clone(compute_sha=False)
|
|
370
|
+
# if self.name != child.require:
|
|
371
|
+
# raise ValueError(f"{self.name} is not required by {child.name}")
|
|
372
|
+
for tag in child.tags:
|
|
373
|
+
if tag not in result.tags:
|
|
374
|
+
result.tags.append(tag)
|
|
375
|
+
for name, option in child.options.items():
|
|
376
|
+
setattr(result.options, name, option)
|
|
377
|
+
for name, values in child.selection.items():
|
|
378
|
+
result.selection[name] = values
|
|
379
|
+
result.tracked = child.tracked
|
|
380
|
+
result.compute_sha()
|
|
381
|
+
files_shas = {f.sha for f in result.files}
|
|
382
|
+
for file in child.files:
|
|
383
|
+
if file.sha not in files_shas:
|
|
384
|
+
result.files.append(file)
|
|
385
|
+
return result
|
|
386
|
+
|
|
387
|
+
@classmethod
|
|
388
|
+
def _from_detailed_dict(cls, source: dict) -> Query:
|
|
389
|
+
result = cls(tracked=True)
|
|
390
|
+
for name, values in source.items():
|
|
391
|
+
try:
|
|
392
|
+
result.selection[name] = values
|
|
393
|
+
except KeyError:
|
|
394
|
+
...
|
|
395
|
+
result.compute_sha()
|
|
396
|
+
return result
|
|
397
|
+
|
|
398
|
+
def __rich_repr__(self) -> Iterator:
|
|
399
|
+
yield from self.items(include_name=True)
|
|
400
|
+
|
|
401
|
+
def __repr__(self) -> str:
|
|
402
|
+
cls_name = self.__class__.__name__
|
|
403
|
+
items = [f"{k}={v}" for k, v in self.items(include_name=True)]
|
|
404
|
+
return f"{cls_name}(" + ", ".join(items) + ")"
|
|
405
|
+
|
|
406
|
+
__rich_measure__ = rich_measure_impl
|
|
407
|
+
|
|
408
|
+
def _rich_tree(self) -> Tree:
|
|
409
|
+
title = Text.from_markup(self.rich_name)
|
|
410
|
+
if not self.tracked:
|
|
411
|
+
title.append(" untracked", style="i red")
|
|
412
|
+
contents = Table.grid(padding=(0, 1))
|
|
413
|
+
if not hasattr(self, "_rich_no_require") and self.require is not None:
|
|
414
|
+
if len(self.require) == 40:
|
|
415
|
+
require = Text(short_sha(self.require), style="i green")
|
|
416
|
+
else:
|
|
417
|
+
if hasattr(self, "_unknown_require"):
|
|
418
|
+
require = Text(f"{self.require} [?]", style="red")
|
|
419
|
+
else:
|
|
420
|
+
require = Text(self.require, style="magenta")
|
|
421
|
+
contents.add_row("require:", require)
|
|
422
|
+
if self.tags:
|
|
423
|
+
text = Text()
|
|
424
|
+
text.append("tags", style="magenta")
|
|
425
|
+
text.append(":")
|
|
426
|
+
contents.add_row(text, ", ".join([tag.name for tag in self.tags]))
|
|
427
|
+
for name, option in self.options.items():
|
|
428
|
+
text = Text()
|
|
429
|
+
text.append(name, style="yellow")
|
|
430
|
+
text.append(":")
|
|
431
|
+
contents.add_row(text, str(option.value[1]))
|
|
432
|
+
for name, values in self.selection.items():
|
|
433
|
+
text = Text()
|
|
434
|
+
if name != "query":
|
|
435
|
+
text.append(name, style="blue")
|
|
436
|
+
text.append(":")
|
|
437
|
+
if len(values) == 1:
|
|
438
|
+
values_str = values[0]
|
|
439
|
+
else:
|
|
440
|
+
values_str = ", ".join(values)
|
|
441
|
+
contents.add_row(text, values_str)
|
|
442
|
+
if self.has_files:
|
|
443
|
+
count_ondisk, size_ondisk = self.files_count_size(FileStatus.Done)
|
|
444
|
+
count_total, size_total = self.files_count_size()
|
|
445
|
+
sizes = f"{format_size(size_ondisk)} / {format_size(size_total)}"
|
|
446
|
+
lens = f"{count_ondisk}/{count_total}"
|
|
447
|
+
contents.add_row(
|
|
448
|
+
"files:", Text(f"{sizes} [{lens}]", style="magenta")
|
|
449
|
+
)
|
|
450
|
+
tree = Tree("", hide_root=True, guide_style="dim").add(title)
|
|
451
|
+
if contents.row_count:
|
|
452
|
+
tree.add(contents)
|
|
453
|
+
return tree
|
|
454
|
+
|
|
455
|
+
def __rich_console__(
|
|
456
|
+
self,
|
|
457
|
+
console: Console,
|
|
458
|
+
opts: ConsoleOptions,
|
|
459
|
+
) -> Iterator[Tree]:
|
|
460
|
+
yield self._rich_tree()
|
|
461
|
+
|
|
462
|
+
def trackable(self) -> bool:
|
|
463
|
+
return self.options.trackable()
|
|
464
|
+
|
|
465
|
+
def track(self, options: Options | None = None, compute_sha: bool = True):
|
|
466
|
+
if options is not None:
|
|
467
|
+
self.options.apply_defaults(options)
|
|
468
|
+
elif not self.options.trackable():
|
|
469
|
+
raise UntrackableQuery(self.name)
|
|
470
|
+
self.tracked = True
|
|
471
|
+
if compute_sha:
|
|
472
|
+
self.compute_sha()
|
|
473
|
+
|
|
474
|
+
def untrack(self):
|
|
475
|
+
self.tracked = False
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
LegacyQuery = Query()
|
|
479
|
+
LegacyQuery.compute_sha() # compute shas for empty selection/options/...
|
|
480
|
+
LegacyQuery.sha = "LEGACY"
|
|
481
|
+
LegacyQuery.compute_sha = lambda: None # type: ignore [assignment]
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator, MutableMapping
|
|
4
|
+
from typing import ClassVar, TypeAlias
|
|
5
|
+
|
|
6
|
+
import sqlalchemy as sa
|
|
7
|
+
from rich.pretty import pretty_repr
|
|
8
|
+
from sqlalchemy.orm import Mapped, relationship
|
|
9
|
+
|
|
10
|
+
from esgpull.exceptions import AlreadySetFacet, DuplicateFacet
|
|
11
|
+
from esgpull.models.base import Base, Sha
|
|
12
|
+
from esgpull.models.facet import Facet
|
|
13
|
+
|
|
14
|
+
FacetValues: TypeAlias = str | list[str]
|
|
15
|
+
|
|
16
|
+
selection_facet_proxy = sa.Table(
|
|
17
|
+
"selection_facet",
|
|
18
|
+
Base.metadata,
|
|
19
|
+
sa.Column(
|
|
20
|
+
"selection_sha", Sha, sa.ForeignKey("selection.sha"), primary_key=True
|
|
21
|
+
),
|
|
22
|
+
sa.Column("facet_sha", Sha, sa.ForeignKey("facet.sha"), primary_key=True),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def opposite(facet_name: str) -> str:
|
|
27
|
+
if facet_name[0] == "!":
|
|
28
|
+
return facet_name[1:]
|
|
29
|
+
else:
|
|
30
|
+
return f"!{facet_name}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Selection(Base):
|
|
34
|
+
__tablename__ = "selection"
|
|
35
|
+
_facet_names: ClassVar[set[str]] = set()
|
|
36
|
+
|
|
37
|
+
_facets: Mapped[list[Facet]] = relationship(
|
|
38
|
+
secondary=selection_facet_proxy,
|
|
39
|
+
default_factory=list,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def _add_property(cls, name: str) -> None:
|
|
44
|
+
def getter(self: Selection) -> list[str]:
|
|
45
|
+
indices = self._facet_map_.get(name, set())
|
|
46
|
+
return sorted([self._facets[idx].value for idx in indices])
|
|
47
|
+
|
|
48
|
+
def setter(self: Selection, values: FacetValues):
|
|
49
|
+
other = opposite(name)
|
|
50
|
+
if name in self._facet_map_:
|
|
51
|
+
raise AlreadySetFacet(name, ", ".join(self[name]))
|
|
52
|
+
elif other in self._facet_map_:
|
|
53
|
+
raise AlreadySetFacet(other, ", ".join(self[other]))
|
|
54
|
+
facet_map_name = set()
|
|
55
|
+
if isinstance(values, str):
|
|
56
|
+
iter_values = enumerate([values])
|
|
57
|
+
else:
|
|
58
|
+
iter_values = enumerate(values)
|
|
59
|
+
offset = len(self._facets)
|
|
60
|
+
for i, value in iter_values:
|
|
61
|
+
facet = Facet(name=name, value=value)
|
|
62
|
+
if facet in self._facets:
|
|
63
|
+
raise DuplicateFacet(
|
|
64
|
+
facet.name,
|
|
65
|
+
facet.value,
|
|
66
|
+
pretty_repr(self),
|
|
67
|
+
)
|
|
68
|
+
self._facets.append(facet)
|
|
69
|
+
facet_map_name.add(offset + i)
|
|
70
|
+
self._facet_map_[name] = facet_map_name
|
|
71
|
+
|
|
72
|
+
setattr(cls, name, property(getter, setter))
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def configure(cls, *names: str, replace: bool = True) -> None:
|
|
76
|
+
nameset = set(names) | {f"!{name}" for name in names}
|
|
77
|
+
if replace:
|
|
78
|
+
for name in cls._facet_names:
|
|
79
|
+
delattr(cls, name)
|
|
80
|
+
new_names = nameset
|
|
81
|
+
cls._facet_names = nameset
|
|
82
|
+
else:
|
|
83
|
+
new_names = nameset - cls._facet_names
|
|
84
|
+
cls._facet_names |= new_names
|
|
85
|
+
for name in new_names:
|
|
86
|
+
cls._add_property(name)
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
facets: list[Facet] | None = None,
|
|
91
|
+
**kwargs: FacetValues,
|
|
92
|
+
):
|
|
93
|
+
if facets is None:
|
|
94
|
+
self._facets = []
|
|
95
|
+
else:
|
|
96
|
+
self._facets = facets
|
|
97
|
+
self._init_facet_map()
|
|
98
|
+
for name, values in kwargs.items():
|
|
99
|
+
self[name] = values
|
|
100
|
+
|
|
101
|
+
def _init_facet_map(self) -> None:
|
|
102
|
+
self._facet_map_: dict[str, set[int]] = {}
|
|
103
|
+
for i, facet in enumerate(self._facets):
|
|
104
|
+
self._facet_map_.setdefault(facet.name, set())
|
|
105
|
+
self._facet_map_[facet.name].add(i)
|
|
106
|
+
|
|
107
|
+
def __getitem__(self, name: str) -> list[str]:
|
|
108
|
+
if name in self._facet_names:
|
|
109
|
+
return getattr(self, name)
|
|
110
|
+
else:
|
|
111
|
+
raise KeyError(name)
|
|
112
|
+
|
|
113
|
+
def __setitem__(self, name: str, value: FacetValues):
|
|
114
|
+
if name in self._facet_names:
|
|
115
|
+
setattr(self, name, value)
|
|
116
|
+
else:
|
|
117
|
+
raise KeyError(name)
|
|
118
|
+
|
|
119
|
+
def items(self) -> Iterator[tuple[str, list[str]]]:
|
|
120
|
+
if not hasattr(self, "_facet_map_"):
|
|
121
|
+
self._init_facet_map()
|
|
122
|
+
for name in sorted(self._facet_map_.keys()):
|
|
123
|
+
yield name, self[name]
|
|
124
|
+
|
|
125
|
+
def __bool__(self) -> bool:
|
|
126
|
+
return bool(self._facets)
|
|
127
|
+
|
|
128
|
+
def _as_bytes(self) -> bytes:
|
|
129
|
+
return str(tuple(self.items())).encode()
|
|
130
|
+
|
|
131
|
+
def compute_sha(self) -> None:
|
|
132
|
+
for facet in self._facets:
|
|
133
|
+
if facet.sha is None:
|
|
134
|
+
facet.compute_sha()
|
|
135
|
+
super().compute_sha()
|
|
136
|
+
|
|
137
|
+
def asdict(self) -> MutableMapping[str, FacetValues]:
|
|
138
|
+
result: dict[str, FacetValues] = {}
|
|
139
|
+
for name, facet in self.items():
|
|
140
|
+
if len(facet) == 1:
|
|
141
|
+
result[name] = facet[0]
|
|
142
|
+
else:
|
|
143
|
+
result[name] = list(facet)
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
def __rich_repr__(self) -> Iterator[tuple[str, FacetValues]]:
|
|
147
|
+
for name, facet in self.items():
|
|
148
|
+
if len(facet) == 1:
|
|
149
|
+
yield name, facet[0]
|
|
150
|
+
else:
|
|
151
|
+
yield name, facet
|
|
152
|
+
|
|
153
|
+
def __repr__(self) -> str:
|
|
154
|
+
cls_name = self.__class__.__name__
|
|
155
|
+
items = [f"{k}={v}" for k, v in self.__rich_repr__()]
|
|
156
|
+
return f"{cls_name}(" + ", ".join(items) + ")"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
DefaultFacets = [
|
|
160
|
+
"query",
|
|
161
|
+
"start",
|
|
162
|
+
"end",
|
|
163
|
+
"facets",
|
|
164
|
+
"url",
|
|
165
|
+
"data_node",
|
|
166
|
+
"index_node",
|
|
167
|
+
"master_id",
|
|
168
|
+
"instance_id", # search does not work with instance_id
|
|
169
|
+
"title",
|
|
170
|
+
"variable_long_name",
|
|
171
|
+
"experiment_family",
|
|
172
|
+
]
|
|
173
|
+
BaseFacets = [
|
|
174
|
+
"project",
|
|
175
|
+
"mip_era",
|
|
176
|
+
"experiment",
|
|
177
|
+
"experiment_id",
|
|
178
|
+
"institute",
|
|
179
|
+
"institution_id",
|
|
180
|
+
"model",
|
|
181
|
+
"table_id",
|
|
182
|
+
"activity_id",
|
|
183
|
+
"ensemble",
|
|
184
|
+
"variant_label",
|
|
185
|
+
"realm",
|
|
186
|
+
"frequency",
|
|
187
|
+
"time_frequency",
|
|
188
|
+
"variable",
|
|
189
|
+
"variable_id",
|
|
190
|
+
"dataset_id",
|
|
191
|
+
"source_id",
|
|
192
|
+
"domain",
|
|
193
|
+
"driving_model",
|
|
194
|
+
"rcm_name",
|
|
195
|
+
"member_id",
|
|
196
|
+
"cmor_table",
|
|
197
|
+
"grid_label",
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
Selection.configure(*DefaultFacets, *BaseFacets, replace=True)
|