esgpull 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgpull/__init__.py +12 -0
- esgpull/auth.py +181 -0
- esgpull/cli/__init__.py +73 -0
- esgpull/cli/add.py +103 -0
- esgpull/cli/autoremove.py +38 -0
- esgpull/cli/config.py +116 -0
- esgpull/cli/convert.py +285 -0
- esgpull/cli/decorators.py +342 -0
- esgpull/cli/download.py +74 -0
- esgpull/cli/facet.py +23 -0
- esgpull/cli/get.py +28 -0
- esgpull/cli/install.py +85 -0
- esgpull/cli/link.py +105 -0
- esgpull/cli/login.py +56 -0
- esgpull/cli/remove.py +73 -0
- esgpull/cli/retry.py +43 -0
- esgpull/cli/search.py +201 -0
- esgpull/cli/self.py +238 -0
- esgpull/cli/show.py +66 -0
- esgpull/cli/status.py +67 -0
- esgpull/cli/track.py +87 -0
- esgpull/cli/update.py +184 -0
- esgpull/cli/utils.py +247 -0
- esgpull/config.py +410 -0
- esgpull/constants.py +56 -0
- esgpull/context.py +724 -0
- esgpull/database.py +161 -0
- esgpull/download.py +162 -0
- esgpull/esgpull.py +447 -0
- esgpull/exceptions.py +167 -0
- esgpull/fs.py +253 -0
- esgpull/graph.py +460 -0
- esgpull/install_config.py +185 -0
- esgpull/migrations/README +1 -0
- esgpull/migrations/env.py +82 -0
- esgpull/migrations/script.py.mako +24 -0
- esgpull/migrations/versions/0.3.0_update_tables.py +170 -0
- esgpull/migrations/versions/0.3.1_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.2_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.3_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.4_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.5_update_tables.py +25 -0
- esgpull/migrations/versions/0.3.6_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.7_update_tables.py +26 -0
- esgpull/migrations/versions/0.3.8_update_tables.py +26 -0
- esgpull/migrations/versions/0.4.0_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.0_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.1_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.2_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.3_update_tables.py +26 -0
- esgpull/migrations/versions/0.5.4_update_tables.py +25 -0
- esgpull/migrations/versions/0.5.5_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.0_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.1_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.2_update_tables.py +25 -0
- esgpull/migrations/versions/0.6.3_update_tables.py +25 -0
- esgpull/models/__init__.py +31 -0
- esgpull/models/base.py +50 -0
- esgpull/models/dataset.py +34 -0
- esgpull/models/facet.py +18 -0
- esgpull/models/file.py +65 -0
- esgpull/models/options.py +164 -0
- esgpull/models/query.py +481 -0
- esgpull/models/selection.py +201 -0
- esgpull/models/sql.py +258 -0
- esgpull/models/synda_file.py +85 -0
- esgpull/models/tag.py +19 -0
- esgpull/models/utils.py +54 -0
- esgpull/presets.py +13 -0
- esgpull/processor.py +172 -0
- esgpull/py.typed +0 -0
- esgpull/result.py +53 -0
- esgpull/tui.py +346 -0
- esgpull/utils.py +54 -0
- esgpull/version.py +1 -0
- esgpull-0.6.3.dist-info/METADATA +110 -0
- esgpull-0.6.3.dist-info/RECORD +80 -0
- esgpull-0.6.3.dist-info/WHEEL +4 -0
- esgpull-0.6.3.dist-info/entry_points.txt +3 -0
- esgpull-0.6.3.dist-info/licenses/LICENSE +28 -0
esgpull/esgpull.py
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections.abc import AsyncIterator
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from functools import cached_property, partial
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from warnings import warn
|
|
9
|
+
|
|
10
|
+
from rich.live import Live
|
|
11
|
+
from rich.progress import (
|
|
12
|
+
BarColumn,
|
|
13
|
+
DownloadColumn,
|
|
14
|
+
MofNCompleteColumn,
|
|
15
|
+
Progress,
|
|
16
|
+
SpinnerColumn,
|
|
17
|
+
TaskID,
|
|
18
|
+
TextColumn,
|
|
19
|
+
TimeRemainingColumn,
|
|
20
|
+
TransferSpeedColumn,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from esgpull.auth import Auth, Credentials
|
|
24
|
+
from esgpull.config import Config
|
|
25
|
+
from esgpull.context import Context
|
|
26
|
+
from esgpull.database import Database
|
|
27
|
+
from esgpull.exceptions import (
|
|
28
|
+
DownloadCancelled,
|
|
29
|
+
InvalidInstallPath,
|
|
30
|
+
NoInstallPath,
|
|
31
|
+
)
|
|
32
|
+
from esgpull.fs import Filesystem
|
|
33
|
+
from esgpull.graph import Graph
|
|
34
|
+
from esgpull.install_config import InstallConfig
|
|
35
|
+
from esgpull.models import (
|
|
36
|
+
Facet,
|
|
37
|
+
File,
|
|
38
|
+
FileStatus,
|
|
39
|
+
LegacyQuery,
|
|
40
|
+
Options,
|
|
41
|
+
Query,
|
|
42
|
+
sql,
|
|
43
|
+
)
|
|
44
|
+
from esgpull.models.utils import short_sha
|
|
45
|
+
from esgpull.processor import Processor
|
|
46
|
+
from esgpull.result import Err, Ok, Result
|
|
47
|
+
from esgpull.tui import UI, DummyLive, Verbosity, logger
|
|
48
|
+
from esgpull.utils import format_size
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(repr=False)
|
|
52
|
+
class Esgpull:
|
|
53
|
+
path: Path
|
|
54
|
+
config: Config
|
|
55
|
+
ui: UI
|
|
56
|
+
auth: Auth
|
|
57
|
+
db: Database
|
|
58
|
+
context: Context
|
|
59
|
+
fs: Filesystem
|
|
60
|
+
graph: Graph
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
path: Path | str | None = None,
|
|
65
|
+
verbosity: Verbosity = Verbosity.Detail,
|
|
66
|
+
install: bool = False,
|
|
67
|
+
record: bool = False,
|
|
68
|
+
safe: bool = False,
|
|
69
|
+
load_db: bool = True,
|
|
70
|
+
) -> None:
|
|
71
|
+
if path is not None:
|
|
72
|
+
path = Path(path)
|
|
73
|
+
InstallConfig.choose(path=path)
|
|
74
|
+
default = path
|
|
75
|
+
warning = f"Using unknown location: {path}\n"
|
|
76
|
+
else:
|
|
77
|
+
default = InstallConfig.default
|
|
78
|
+
warning = f"Using default location: {default}\n"
|
|
79
|
+
if InstallConfig.current is None:
|
|
80
|
+
if safe:
|
|
81
|
+
raise NoInstallPath
|
|
82
|
+
InstallConfig.choose(path=default)
|
|
83
|
+
if InstallConfig.current_idx is None:
|
|
84
|
+
idx = InstallConfig.add(default)
|
|
85
|
+
InstallConfig.choose(idx=idx)
|
|
86
|
+
needs_install = True
|
|
87
|
+
else:
|
|
88
|
+
idx = InstallConfig.current_idx
|
|
89
|
+
needs_install = False
|
|
90
|
+
self.path = InstallConfig.installs[idx].path
|
|
91
|
+
warning += "To disable this warning, please run:\n"
|
|
92
|
+
if needs_install:
|
|
93
|
+
warning += f"$ esgpull self install {self.path}"
|
|
94
|
+
else:
|
|
95
|
+
warning += f"$ esgpull self choose {self.path}"
|
|
96
|
+
if logger.level == logging.NOTSET:
|
|
97
|
+
warn(warning)
|
|
98
|
+
else:
|
|
99
|
+
logger.warning(warning)
|
|
100
|
+
else:
|
|
101
|
+
self.path = InstallConfig.current.path
|
|
102
|
+
if not install and not self.path.is_dir():
|
|
103
|
+
raise InvalidInstallPath(path=self.path)
|
|
104
|
+
self.config = Config.load(path=self.path)
|
|
105
|
+
Options._set_defaults(**self.config.api.default_options.asdict())
|
|
106
|
+
self.fs = Filesystem.from_config(self.config, install=install)
|
|
107
|
+
self.ui = UI.from_config(
|
|
108
|
+
self.config,
|
|
109
|
+
verbosity=verbosity,
|
|
110
|
+
record=record,
|
|
111
|
+
)
|
|
112
|
+
credentials = Credentials.from_config(self.config)
|
|
113
|
+
self.auth = Auth.from_config(self.config, credentials)
|
|
114
|
+
self.context = Context(self.config, noraise=True)
|
|
115
|
+
if load_db:
|
|
116
|
+
self.db = Database.from_config(self.config)
|
|
117
|
+
self.graph = Graph(self.db)
|
|
118
|
+
|
|
119
|
+
def fetch_index_nodes(self) -> list[str]:
|
|
120
|
+
"""
|
|
121
|
+
Returns a list of ESGF index nodes.
|
|
122
|
+
|
|
123
|
+
Fetch hints from ESGF search API with a distributed query.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
default_index = self.config.api.index_node
|
|
127
|
+
logger.info(f"Fetching index nodes from {default_index!r}")
|
|
128
|
+
options = Options(distrib=True)
|
|
129
|
+
query = Query(options=options)
|
|
130
|
+
facets = ["index_node"]
|
|
131
|
+
hints = self.context.hints(
|
|
132
|
+
query,
|
|
133
|
+
file=False,
|
|
134
|
+
facets=facets,
|
|
135
|
+
index_node=default_index,
|
|
136
|
+
)
|
|
137
|
+
return list(hints[0]["index_node"])
|
|
138
|
+
|
|
139
|
+
def fetch_facets(self, update: bool = False) -> bool:
|
|
140
|
+
"""
|
|
141
|
+
Fill db with all existing facets found in ESGF index nodes.
|
|
142
|
+
|
|
143
|
+
1. Fetch index nodes from `Esgpull.fetch_index_nodes()`
|
|
144
|
+
2. Fetch all facets (names + values) from all index nodes.
|
|
145
|
+
|
|
146
|
+
Workaround method, since searching directly for all facets using
|
|
147
|
+
`distrib=True` seems to crash the index node.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
# those facets have (almost) unique values
|
|
151
|
+
IGNORE_NAMES = [
|
|
152
|
+
"version",
|
|
153
|
+
# "cf_standard_name",
|
|
154
|
+
# "variable_long_name",
|
|
155
|
+
"creation_date",
|
|
156
|
+
# "datetime_end",
|
|
157
|
+
]
|
|
158
|
+
nb_facets = self.db.scalars(sql.count_table(Facet))[0]
|
|
159
|
+
logger.info(f"Found {nb_facets} facets in database")
|
|
160
|
+
if nb_facets and not update:
|
|
161
|
+
return False
|
|
162
|
+
index_nodes = self.fetch_index_nodes()
|
|
163
|
+
options = Options(distrib=False)
|
|
164
|
+
query = Query(options=options)
|
|
165
|
+
hints_coros = []
|
|
166
|
+
for index_node in index_nodes:
|
|
167
|
+
hints_results = self.context.prepare_hints(
|
|
168
|
+
query,
|
|
169
|
+
file=False,
|
|
170
|
+
facets=["*"],
|
|
171
|
+
index_node=index_node,
|
|
172
|
+
)
|
|
173
|
+
hints_coros.append(self.context._hints(*hints_results))
|
|
174
|
+
hints = self.context.sync_gather(*hints_coros)
|
|
175
|
+
new_facets: set[Facet] = set()
|
|
176
|
+
facets_db = self.db.scalars(sql.facet.all())
|
|
177
|
+
for index_hints in hints:
|
|
178
|
+
for name, values in index_hints[0].items():
|
|
179
|
+
if name in IGNORE_NAMES:
|
|
180
|
+
continue
|
|
181
|
+
for value in values.keys():
|
|
182
|
+
facet = Facet(name=name, value=value)
|
|
183
|
+
if facet not in facets_db:
|
|
184
|
+
facet.compute_sha()
|
|
185
|
+
new_facets.add(facet)
|
|
186
|
+
self.db.add(*new_facets)
|
|
187
|
+
return len(new_facets) > 0
|
|
188
|
+
|
|
189
|
+
@cached_property
|
|
190
|
+
def legacy_query(self) -> Query:
|
|
191
|
+
legacy = LegacyQuery
|
|
192
|
+
if (
|
|
193
|
+
legacy_db := self.db.get(Query, "LEGACY")
|
|
194
|
+
) and legacy_db is not None:
|
|
195
|
+
legacy = legacy_db
|
|
196
|
+
# else:
|
|
197
|
+
# self.db.add(legacy)
|
|
198
|
+
# self.graph.add(legacy, clone=False)
|
|
199
|
+
# self.graph.merge(commit=True)
|
|
200
|
+
return legacy
|
|
201
|
+
|
|
202
|
+
def import_synda(
|
|
203
|
+
self,
|
|
204
|
+
url: Path,
|
|
205
|
+
track: bool = False,
|
|
206
|
+
size: int = 5000,
|
|
207
|
+
ask: bool = False,
|
|
208
|
+
) -> int:
|
|
209
|
+
assert url.is_file()
|
|
210
|
+
synda = Database(f"sqlite:///{url}", run_migrations=False)
|
|
211
|
+
synda_ids = synda.scalars(sql.synda_file.ids())
|
|
212
|
+
shas = set(self.db.scalars(sql.file.linked()))
|
|
213
|
+
msg = f"Found {len(synda_ids)} files to import, proceed?"
|
|
214
|
+
if ask and not self.ui.ask(msg):
|
|
215
|
+
return 0
|
|
216
|
+
synda_shas: set[str] = set()
|
|
217
|
+
idx_range = range(0, len(synda_ids), size)
|
|
218
|
+
if track:
|
|
219
|
+
iter_idx_range = self.ui.track(idx_range)
|
|
220
|
+
else:
|
|
221
|
+
iter_idx_range = iter(idx_range)
|
|
222
|
+
nb_imported = 0
|
|
223
|
+
for start in iter_idx_range:
|
|
224
|
+
stop = min(len(synda_ids), start + size)
|
|
225
|
+
ids = synda_ids[start:stop]
|
|
226
|
+
synda_files = synda.scalars(sql.synda_file.with_ids(*ids))
|
|
227
|
+
files: list[File] = []
|
|
228
|
+
for synda_file in synda_files:
|
|
229
|
+
file = synda_file.to_file()
|
|
230
|
+
if file.sha not in shas:
|
|
231
|
+
file.queries.append(self.legacy_query)
|
|
232
|
+
files.append(file)
|
|
233
|
+
synda_shas.add(file.sha)
|
|
234
|
+
if files:
|
|
235
|
+
nb_imported += len(files)
|
|
236
|
+
self.db.add(*files)
|
|
237
|
+
return nb_imported
|
|
238
|
+
|
|
239
|
+
# def add(
|
|
240
|
+
# self,
|
|
241
|
+
# *queries: Query,
|
|
242
|
+
# with_file: bool = False,
|
|
243
|
+
# ) -> tuple[list[Query], list[Query]]:
|
|
244
|
+
# """
|
|
245
|
+
# Add new queries to query/options/selection tables.
|
|
246
|
+
# Returns two lists: added and discarded queries
|
|
247
|
+
# """
|
|
248
|
+
# for query in
|
|
249
|
+
# self.graph.add()
|
|
250
|
+
# return [], []
|
|
251
|
+
|
|
252
|
+
# def install(
|
|
253
|
+
# self,
|
|
254
|
+
# *files: File,
|
|
255
|
+
# status: FileStatus = FileStatus.Queued,
|
|
256
|
+
# ) -> tuple[list[File], list[File]]:
|
|
257
|
+
# """
|
|
258
|
+
# Insert `files` with specified `status` into db if not already there.
|
|
259
|
+
# """
|
|
260
|
+
# file_ids = [f.file_id for f in files]
|
|
261
|
+
# with self.db.select(File.file_id) as stmt:
|
|
262
|
+
# stmt.where(File.file_id.in_(file_ids))
|
|
263
|
+
# existing_file_ids = set(stmt.scalars)
|
|
264
|
+
# to_install = [f for f in files if f.file_id not in existing_file_ids]
|
|
265
|
+
# to_download: list[File] = []
|
|
266
|
+
# already_on_disk: list[File] = []
|
|
267
|
+
# for file in to_install:
|
|
268
|
+
# if status == FileStatus.Done:
|
|
269
|
+
# # skip check on status=done
|
|
270
|
+
# file.status = status
|
|
271
|
+
# to_download.append(file)
|
|
272
|
+
# continue
|
|
273
|
+
# path = self.fs.path_of(file)
|
|
274
|
+
# if path.is_file():
|
|
275
|
+
# file.status = FileStatus.Done
|
|
276
|
+
# already_on_disk.append(file)
|
|
277
|
+
# else:
|
|
278
|
+
# file.status = status
|
|
279
|
+
# to_download.append(file)
|
|
280
|
+
# self.db.add(*to_install)
|
|
281
|
+
# return to_download, already_on_disk
|
|
282
|
+
|
|
283
|
+
# def remove(self, *files: File) -> list[File]:
|
|
284
|
+
# """
|
|
285
|
+
# Remove `files` from db and delete from filesystem.
|
|
286
|
+
# """
|
|
287
|
+
# file_ids = [f.file_id for f in files]
|
|
288
|
+
# with self.db.select(File) as stmt:
|
|
289
|
+
# stmt.where(File.file_id.in_(file_ids))
|
|
290
|
+
# deleted = stmt.scalars
|
|
291
|
+
# for file in files:
|
|
292
|
+
# if file.status == FileStatus.Done:
|
|
293
|
+
# self.fs.delete(file)
|
|
294
|
+
# self.db.delete(*deleted)
|
|
295
|
+
# return deleted
|
|
296
|
+
|
|
297
|
+
# def autoremove(self) -> list[File]:
|
|
298
|
+
# """
|
|
299
|
+
# Search duplicate files and keep latest version only.
|
|
300
|
+
# """
|
|
301
|
+
# deprecated = self.db.get_deprecated_files()
|
|
302
|
+
# return self.remove(*deprecated)
|
|
303
|
+
|
|
304
|
+
async def iter_results(
|
|
305
|
+
self,
|
|
306
|
+
processor: Processor,
|
|
307
|
+
progress: Progress,
|
|
308
|
+
task_ids: dict[str, TaskID],
|
|
309
|
+
live: Live | DummyLive,
|
|
310
|
+
) -> AsyncIterator[Result]:
|
|
311
|
+
async for result in processor.process():
|
|
312
|
+
task_idx = progress.task_ids.index(task_ids[result.data.file.sha])
|
|
313
|
+
task = progress.tasks[task_idx]
|
|
314
|
+
progress.update(task.id, visible=True)
|
|
315
|
+
match result:
|
|
316
|
+
case Ok():
|
|
317
|
+
progress.update(task.id, completed=result.data.completed)
|
|
318
|
+
if task.finished:
|
|
319
|
+
# TODO: add checksum verif here
|
|
320
|
+
progress.stop_task(task.id)
|
|
321
|
+
progress.update(task.id, visible=False)
|
|
322
|
+
sha = f"[b blue]{task.fields['sha']}[/]"
|
|
323
|
+
file = result.data.file
|
|
324
|
+
digest = result.data.digest
|
|
325
|
+
match self.fs.finalize(file, digest=digest):
|
|
326
|
+
case Ok():
|
|
327
|
+
size = f"[green]{format_size(int(task.completed))}[/]"
|
|
328
|
+
if task.elapsed is not None:
|
|
329
|
+
final_speed = int(
|
|
330
|
+
task.completed / task.elapsed
|
|
331
|
+
)
|
|
332
|
+
speed = (
|
|
333
|
+
f"[red]{format_size(final_speed)}/s[/]"
|
|
334
|
+
)
|
|
335
|
+
else:
|
|
336
|
+
speed = "[b red]?[/]"
|
|
337
|
+
data_node = (
|
|
338
|
+
f"[blue]{task.fields['data_node']}[/]"
|
|
339
|
+
)
|
|
340
|
+
msg = " · ".join([sha, size, speed, data_node])
|
|
341
|
+
logger.info(msg)
|
|
342
|
+
live.console.print(msg)
|
|
343
|
+
yield result
|
|
344
|
+
case Err(_, err):
|
|
345
|
+
progress.remove_task(task.id)
|
|
346
|
+
yield Err(result.data, err)
|
|
347
|
+
case Err():
|
|
348
|
+
progress.remove_task(task.id)
|
|
349
|
+
yield result
|
|
350
|
+
case _:
|
|
351
|
+
raise ValueError("Unexpected result")
|
|
352
|
+
|
|
353
|
+
async def download(
|
|
354
|
+
self,
|
|
355
|
+
queue: list[File],
|
|
356
|
+
use_db: bool = True,
|
|
357
|
+
show_progress: bool = True,
|
|
358
|
+
) -> tuple[list[File], list[Err]]:
|
|
359
|
+
"""
|
|
360
|
+
Download files provided in `queue`.
|
|
361
|
+
"""
|
|
362
|
+
for file in queue:
|
|
363
|
+
file.status = FileStatus.Starting
|
|
364
|
+
main_progress = self.ui.make_progress(
|
|
365
|
+
SpinnerColumn(),
|
|
366
|
+
MofNCompleteColumn(),
|
|
367
|
+
TimeRemainingColumn(compact=True, elapsed_when_finished=True),
|
|
368
|
+
)
|
|
369
|
+
file_progress = self.ui.make_progress(
|
|
370
|
+
TextColumn("[cyan][{task.id}] [b blue]{task.fields[sha]}"),
|
|
371
|
+
"[progress.percentage]{task.percentage:>3.0f}%",
|
|
372
|
+
BarColumn(),
|
|
373
|
+
"·",
|
|
374
|
+
DownloadColumn(binary_units=True),
|
|
375
|
+
"·",
|
|
376
|
+
TransferSpeedColumn(),
|
|
377
|
+
"·",
|
|
378
|
+
TextColumn("[blue]{task.fields[data_node]}"),
|
|
379
|
+
transient=True,
|
|
380
|
+
)
|
|
381
|
+
file_task_shas = {}
|
|
382
|
+
start_callbacks = {}
|
|
383
|
+
for file in queue:
|
|
384
|
+
task_id = file_progress.add_task(
|
|
385
|
+
"",
|
|
386
|
+
total=file.size,
|
|
387
|
+
visible=False,
|
|
388
|
+
start=False,
|
|
389
|
+
sha=short_sha(file.sha),
|
|
390
|
+
data_node=file.data_node,
|
|
391
|
+
)
|
|
392
|
+
callback = partial(file_progress.start_task, task_id)
|
|
393
|
+
file_task_shas[file.sha] = task_id
|
|
394
|
+
start_callbacks[file.sha] = [callback]
|
|
395
|
+
processor = Processor(
|
|
396
|
+
config=self.config,
|
|
397
|
+
auth=self.auth,
|
|
398
|
+
fs=self.fs,
|
|
399
|
+
files=queue,
|
|
400
|
+
start_callbacks=start_callbacks,
|
|
401
|
+
)
|
|
402
|
+
if use_db:
|
|
403
|
+
self.db.add(*processor.files)
|
|
404
|
+
queue_size = len(processor.tasks)
|
|
405
|
+
main_task_id = main_progress.add_task("", total=queue_size)
|
|
406
|
+
# TODO: rename ? installed/downloaded/completed/...
|
|
407
|
+
files: list[File] = []
|
|
408
|
+
errors: list[Err] = []
|
|
409
|
+
remaining_dict = {file.sha: file for file in processor.files}
|
|
410
|
+
try:
|
|
411
|
+
with self.ui.live(
|
|
412
|
+
file_progress,
|
|
413
|
+
main_progress,
|
|
414
|
+
disable=not show_progress,
|
|
415
|
+
) as live:
|
|
416
|
+
async for result in self.iter_results(
|
|
417
|
+
processor,
|
|
418
|
+
file_progress,
|
|
419
|
+
file_task_shas,
|
|
420
|
+
live,
|
|
421
|
+
):
|
|
422
|
+
match result:
|
|
423
|
+
case Ok():
|
|
424
|
+
main_progress.update(main_task_id, advance=1)
|
|
425
|
+
result.data.file.status = FileStatus.Done
|
|
426
|
+
files.append(result.data.file)
|
|
427
|
+
case Err():
|
|
428
|
+
queue_size -= 1
|
|
429
|
+
main_progress.update(
|
|
430
|
+
main_task_id, total=queue_size
|
|
431
|
+
)
|
|
432
|
+
result.data.file.status = FileStatus.Error
|
|
433
|
+
errors.append(result)
|
|
434
|
+
if use_db:
|
|
435
|
+
self.db.add(result.data.file)
|
|
436
|
+
remaining_dict.pop(result.data.file.sha, None)
|
|
437
|
+
finally:
|
|
438
|
+
if remaining_dict:
|
|
439
|
+
logger.warning(f"Cancelling {len(remaining_dict)} downloads.")
|
|
440
|
+
cancelled: list[File] = []
|
|
441
|
+
for file in remaining_dict.values():
|
|
442
|
+
file.status = FileStatus.Cancelled
|
|
443
|
+
cancelled.append(file)
|
|
444
|
+
errors.append(Err(file, DownloadCancelled()))
|
|
445
|
+
if use_db:
|
|
446
|
+
self.db.add(*cancelled)
|
|
447
|
+
return files, errors
|
esgpull/exceptions.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# from esgpull.constants import ROOT_ENV
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EsgpullException(Exception):
|
|
5
|
+
msg: str = NotImplemented
|
|
6
|
+
|
|
7
|
+
def __init__(self, *args, **kwargs):
|
|
8
|
+
self.message = self.msg.format(*args, **kwargs)
|
|
9
|
+
super().__init__(self.message.strip())
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# class NoRootError(EsgpullException):
|
|
13
|
+
# msg = f"Environment variable `{ROOT_ENV}` must be set."
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# class UnknownFacet(EsgpullException):
|
|
17
|
+
# msg = "{}"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FacetNameError(EsgpullException, AttributeError):
|
|
21
|
+
"""
|
|
22
|
+
AttributeError is required for autocomplete engines (e.g. jupyter).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
msg = "'{}' is not a valid facet."
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AlreadySetFacet(EsgpullException):
|
|
29
|
+
msg = "'{}' is already set to [{}]"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DuplicateFacet(EsgpullException):
|
|
33
|
+
msg = "'{}:{}'\n{}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class QueryDuplicate(EsgpullException):
|
|
37
|
+
msg = "{}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class PageIndexError(EsgpullException, IndexError):
|
|
41
|
+
msg = "Cannot show page {}/{}."
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# # errors meant for use when validation is implemented
|
|
45
|
+
# class UnknownFacetValue(EsgpullException):
|
|
46
|
+
# msg = "'{}' is not valid for {}."
|
|
47
|
+
# class ImpossibleFacet(EsgpullException):
|
|
48
|
+
# msg = """Facet '{}' is not available with this query:
|
|
49
|
+
# {}"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SolrUnstableQueryError(EsgpullException):
|
|
53
|
+
msg = """
|
|
54
|
+
Solr can not handle this query:
|
|
55
|
+
{}
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class QuerySourceError(EsgpullException):
|
|
60
|
+
msg = """
|
|
61
|
+
This source cannot be parsed as a query:
|
|
62
|
+
{}
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TooShortKeyError(EsgpullException, KeyError):
|
|
67
|
+
msg = "{}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class GraphWithoutDatabase(EsgpullException):
|
|
71
|
+
msg = "Graph is not connected to a database."
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DownloadKindError(EsgpullException):
|
|
75
|
+
msg = """{} is not a valid download kind. Choose from:
|
|
76
|
+
* Download
|
|
77
|
+
* ChunkedDownload
|
|
78
|
+
* MultiSourceChunkedDownload
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DownloadSizeError(EsgpullException):
|
|
83
|
+
msg = """
|
|
84
|
+
Downloaded file is larger than expected: {} > {}
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class DownloadCancelled(EsgpullException):
|
|
89
|
+
msg = """
|
|
90
|
+
Download cancelled by user.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class NoClauseError(EsgpullException):
|
|
95
|
+
msg = """
|
|
96
|
+
No clause provided (query might be empty).
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class BadConfigError(EsgpullException):
|
|
101
|
+
msg = """
|
|
102
|
+
Please fix your config, located at {}
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class VirtualConfigError(EsgpullException):
|
|
107
|
+
msg = """
|
|
108
|
+
This config was not loaded from a file.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class InstallException(EsgpullException):
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class UntrackableQuery(EsgpullException):
|
|
117
|
+
msg = """
|
|
118
|
+
{} cannot be tracked, it has unset options.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class UnsetOptionsError(EsgpullException):
|
|
123
|
+
msg = """
|
|
124
|
+
{} has some unset options.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class UnregisteredInstallPath(InstallException):
|
|
129
|
+
msg = "{}"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class UnknownInstallName(InstallException):
|
|
133
|
+
msg = "{!r}"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class NoInstallPath(InstallException):
|
|
137
|
+
msg = """Choose or install one
|
|
138
|
+
|
|
139
|
+
Show existing install locations with:
|
|
140
|
+
$ esgpull self choose
|
|
141
|
+
|
|
142
|
+
Install a new location with:
|
|
143
|
+
$ esgpull self install
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class InvalidInstallPath(InstallException):
|
|
148
|
+
msg = """{path}
|
|
149
|
+
|
|
150
|
+
Choose this install location with:
|
|
151
|
+
$ esgpull self choose {path}
|
|
152
|
+
|
|
153
|
+
Install a new location with:
|
|
154
|
+
$ esgpull self install
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class AlreadyInstalledPath(InstallException):
|
|
159
|
+
msg = """{path}
|
|
160
|
+
{msg}
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class AlreadyInstalledName(InstallException):
|
|
165
|
+
msg = """{name}
|
|
166
|
+
{msg}
|
|
167
|
+
"""
|