freva-client 2404.0.1__py3-none-any.whl → 2408.0.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- freva_client/__init__.py +3 -2
- freva_client/auth.py +197 -0
- freva_client/cli/auth_cli.py +71 -0
- freva_client/cli/cli_app.py +22 -9
- freva_client/cli/cli_parser.py +152 -0
- freva_client/cli/cli_utils.py +13 -147
- freva_client/cli/databrowser_cli.py +177 -9
- freva_client/query.py +166 -23
- freva_client/utils/__init__.py +1 -1
- freva_client/utils/databrowser_utils.py +26 -9
- freva_client/utils/logger.py +2 -2
- {freva_client-2404.0.1.dist-info → freva_client-2408.0.0.dev2.dist-info}/METADATA +3 -1
- freva_client-2408.0.0.dev2.dist-info/RECORD +19 -0
- freva_client-2404.0.1.dist-info/RECORD +0 -16
- {freva_client-2404.0.1.data → freva_client-2408.0.0.dev2.data}/data/share/freva/freva.toml +0 -0
- {freva_client-2404.0.1.dist-info → freva_client-2408.0.0.dev2.dist-info}/WHEEL +0 -0
- {freva_client-2404.0.1.dist-info → freva_client-2408.0.0.dev2.dist-info}/entry_points.txt +0 -0
|
@@ -5,14 +5,26 @@ Search quickly and intuitively for many different climate datasets.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from tempfile import NamedTemporaryFile
|
|
8
10
|
from typing import Dict, List, Literal, Optional, Union, cast
|
|
9
11
|
|
|
10
12
|
import typer
|
|
11
13
|
from freva_client import databrowser
|
|
14
|
+
from freva_client.auth import Auth
|
|
12
15
|
from freva_client.utils import exception_handler, logger
|
|
13
16
|
|
|
14
|
-
from .
|
|
15
|
-
|
|
17
|
+
from .cli_utils import parse_cli_args, version_callback
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _auth(url: str, token: Optional[str]) -> None:
|
|
21
|
+
if token:
|
|
22
|
+
auth = Auth()
|
|
23
|
+
auth.set_token(
|
|
24
|
+
access_token=token, expires=auth.token_expiration_time.timestamp()
|
|
25
|
+
)
|
|
26
|
+
else:
|
|
27
|
+
raise ValueError("`--access-token` is required for authentication.")
|
|
16
28
|
|
|
17
29
|
|
|
18
30
|
class UniqKeys(str, Enum):
|
|
@@ -55,7 +67,12 @@ class TimeSelect(str, Enum):
|
|
|
55
67
|
)
|
|
56
68
|
|
|
57
69
|
|
|
58
|
-
|
|
70
|
+
databrowser_app = typer.Typer(
|
|
71
|
+
help="Data search related commands", callback=logger.set_cli
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@databrowser_app.command(
|
|
59
76
|
name="data-overview",
|
|
60
77
|
help="Get an overview over what is available in the databrowser.",
|
|
61
78
|
)
|
|
@@ -74,7 +91,7 @@ def overview(
|
|
|
74
91
|
print(databrowser.overview(host=host))
|
|
75
92
|
|
|
76
93
|
|
|
77
|
-
@
|
|
94
|
+
@databrowser_app.command(
|
|
78
95
|
name="metadata-search", help="Search databrowser for metadata (facets)."
|
|
79
96
|
)
|
|
80
97
|
@exception_handler
|
|
@@ -190,7 +207,9 @@ def metadata_search(
|
|
|
190
207
|
print(f"{key}: {', '.join(values)}")
|
|
191
208
|
|
|
192
209
|
|
|
193
|
-
@
|
|
210
|
+
@databrowser_app.command(
|
|
211
|
+
name="data-search", help="Search the databrowser for datasets."
|
|
212
|
+
)
|
|
194
213
|
@exception_handler
|
|
195
214
|
def data_search(
|
|
196
215
|
search_keys: Optional[List[str]] = typer.Argument(
|
|
@@ -234,6 +253,17 @@ def data_search(
|
|
|
234
253
|
"--time-select",
|
|
235
254
|
help=TimeSelect.get_help(),
|
|
236
255
|
),
|
|
256
|
+
zarr: bool = typer.Option(
|
|
257
|
+
False, "--zarr", help="Create zarr stream files."
|
|
258
|
+
),
|
|
259
|
+
access_token: Optional[str] = typer.Option(
|
|
260
|
+
None,
|
|
261
|
+
"--access-token",
|
|
262
|
+
help=(
|
|
263
|
+
"Use this access token for authentication"
|
|
264
|
+
" when creating a zarr stream files."
|
|
265
|
+
),
|
|
266
|
+
),
|
|
237
267
|
time: Optional[str] = typer.Option(
|
|
238
268
|
None,
|
|
239
269
|
"-t",
|
|
@@ -264,14 +294,14 @@ def data_search(
|
|
|
264
294
|
),
|
|
265
295
|
multiversion: bool = typer.Option(
|
|
266
296
|
False,
|
|
267
|
-
"--
|
|
297
|
+
"--multi-version",
|
|
268
298
|
help="Select all versions and not just the latest version (default).",
|
|
269
299
|
),
|
|
270
300
|
version: Optional[bool] = typer.Option(
|
|
271
301
|
False,
|
|
272
302
|
"-V",
|
|
273
303
|
"--version",
|
|
274
|
-
help="Show
|
|
304
|
+
help="Show version an exit",
|
|
275
305
|
callback=version_callback,
|
|
276
306
|
),
|
|
277
307
|
) -> None:
|
|
@@ -295,8 +325,11 @@ def data_search(
|
|
|
295
325
|
host=host,
|
|
296
326
|
fail_on_error=False,
|
|
297
327
|
multiversion=multiversion,
|
|
328
|
+
stream_zarr=zarr,
|
|
298
329
|
**(parse_cli_args(search_keys or [])),
|
|
299
330
|
)
|
|
331
|
+
if zarr:
|
|
332
|
+
_auth(result._cfg.auth_url, access_token)
|
|
300
333
|
if parse_json:
|
|
301
334
|
print(json.dumps(sorted(result)))
|
|
302
335
|
else:
|
|
@@ -304,7 +337,141 @@ def data_search(
|
|
|
304
337
|
print(res)
|
|
305
338
|
|
|
306
339
|
|
|
307
|
-
@
|
|
340
|
+
@databrowser_app.command(
|
|
341
|
+
name="intake-catalogue", help="Create an intake catalogue from the search."
|
|
342
|
+
)
|
|
343
|
+
@exception_handler
|
|
344
|
+
def intake_catalogue(
|
|
345
|
+
search_keys: Optional[List[str]] = typer.Argument(
|
|
346
|
+
default=None,
|
|
347
|
+
help="Refine your data search with this `key=value` pair search "
|
|
348
|
+
"parameters. The parameters could be, depending on the DRS standard, "
|
|
349
|
+
"flavour product, project model etc.",
|
|
350
|
+
),
|
|
351
|
+
facets: Optional[List[str]] = typer.Option(
|
|
352
|
+
None,
|
|
353
|
+
"--facet",
|
|
354
|
+
help=(
|
|
355
|
+
"If you are not sure about the correct search key's you can use"
|
|
356
|
+
" the ``--facet`` flag to search of any matching entries. For "
|
|
357
|
+
"example --facet 'era5' would allow you to search for any entries"
|
|
358
|
+
" containing era5, regardless of project, product etc."
|
|
359
|
+
),
|
|
360
|
+
),
|
|
361
|
+
uniq_key: UniqKeys = typer.Option(
|
|
362
|
+
"file",
|
|
363
|
+
"--uniq-key",
|
|
364
|
+
"-u",
|
|
365
|
+
help=(
|
|
366
|
+
"The type of search result, which can be either “file” "
|
|
367
|
+
"or “uri”. This parameter determines whether the search will be "
|
|
368
|
+
"based on file paths or Uniform Resource Identifiers"
|
|
369
|
+
),
|
|
370
|
+
),
|
|
371
|
+
flavour: Flavours = typer.Option(
|
|
372
|
+
"freva",
|
|
373
|
+
"--flavour",
|
|
374
|
+
"-f",
|
|
375
|
+
help=(
|
|
376
|
+
"The Data Reference Syntax (DRS) standard specifying the type "
|
|
377
|
+
"of climate datasets to query."
|
|
378
|
+
),
|
|
379
|
+
),
|
|
380
|
+
time_select: TimeSelect = typer.Option(
|
|
381
|
+
"flexible",
|
|
382
|
+
"-ts",
|
|
383
|
+
"--time-select",
|
|
384
|
+
help=TimeSelect.get_help(),
|
|
385
|
+
),
|
|
386
|
+
time: Optional[str] = typer.Option(
|
|
387
|
+
None,
|
|
388
|
+
"-t",
|
|
389
|
+
"--time",
|
|
390
|
+
help=(
|
|
391
|
+
"Special search facet to refine/subset search results by time. "
|
|
392
|
+
"This can be a string representation of a time range or a single "
|
|
393
|
+
"time step. The time steps have to follow ISO-8601. Valid strings "
|
|
394
|
+
"are ``%Y-%m-%dT%H:%M`` to ``%Y-%m-%dT%H:%M`` for time ranges and "
|
|
395
|
+
"``%Y-%m-%dT%H:%M``. **Note**: You don't have to give the full "
|
|
396
|
+
"string format to subset time steps ``%Y``, ``%Y-%m`` etc are also"
|
|
397
|
+
" valid."
|
|
398
|
+
),
|
|
399
|
+
),
|
|
400
|
+
zarr: bool = typer.Option(
|
|
401
|
+
False, "--zarr", help="Create zarr stream files, as catalogue targets."
|
|
402
|
+
),
|
|
403
|
+
access_token: Optional[str] = typer.Option(
|
|
404
|
+
None,
|
|
405
|
+
"--access-token",
|
|
406
|
+
help=(
|
|
407
|
+
"Use this access token for authentication"
|
|
408
|
+
" when creating a zarr based intake catalogue."
|
|
409
|
+
),
|
|
410
|
+
),
|
|
411
|
+
filename: Optional[Path] = typer.Option(
|
|
412
|
+
None,
|
|
413
|
+
"-f",
|
|
414
|
+
"--filename",
|
|
415
|
+
help=(
|
|
416
|
+
"Path to the file where the catalogue, should be written to. "
|
|
417
|
+
"if None given (default) the catalogue is parsed to stdout."
|
|
418
|
+
),
|
|
419
|
+
),
|
|
420
|
+
host: Optional[str] = typer.Option(
|
|
421
|
+
None,
|
|
422
|
+
"--host",
|
|
423
|
+
help=(
|
|
424
|
+
"Set the hostname of the databrowser, if not set (default) "
|
|
425
|
+
"the hostname is read from a config file"
|
|
426
|
+
),
|
|
427
|
+
),
|
|
428
|
+
verbose: int = typer.Option(
|
|
429
|
+
0, "-v", help="Increase verbosity", count=True
|
|
430
|
+
),
|
|
431
|
+
multiversion: bool = typer.Option(
|
|
432
|
+
False,
|
|
433
|
+
"--multi-version",
|
|
434
|
+
help="Select all versions and not just the latest version (default).",
|
|
435
|
+
),
|
|
436
|
+
version: Optional[bool] = typer.Option(
|
|
437
|
+
False,
|
|
438
|
+
"-V",
|
|
439
|
+
"--version",
|
|
440
|
+
help="Show version an exit",
|
|
441
|
+
callback=version_callback,
|
|
442
|
+
),
|
|
443
|
+
) -> None:
|
|
444
|
+
"""Create an intake catalogue for climate datasets based on the specified "
|
|
445
|
+
"Data Reference Syntax (DRS) standard (flavour) and the type of search "
|
|
446
|
+
result (uniq_key), which can be either “file” or “uri”."""
|
|
447
|
+
logger.set_verbosity(verbose)
|
|
448
|
+
logger.debug("Search the databrowser")
|
|
449
|
+
result = databrowser(
|
|
450
|
+
*(facets or []),
|
|
451
|
+
time=time or "",
|
|
452
|
+
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
453
|
+
flavour=cast(
|
|
454
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
455
|
+
flavour.value,
|
|
456
|
+
),
|
|
457
|
+
uniq_key=cast(Literal["uri", "file"], uniq_key.value),
|
|
458
|
+
host=host,
|
|
459
|
+
fail_on_error=False,
|
|
460
|
+
multiversion=multiversion,
|
|
461
|
+
stream_zarr=zarr,
|
|
462
|
+
**(parse_cli_args(search_keys or [])),
|
|
463
|
+
)
|
|
464
|
+
if zarr:
|
|
465
|
+
_auth(result._cfg.auth_url, access_token)
|
|
466
|
+
with NamedTemporaryFile(suffix=".json") as temp_f:
|
|
467
|
+
result._create_intake_catalogue_file(str(filename or temp_f.name))
|
|
468
|
+
if not filename:
|
|
469
|
+
print(Path(temp_f.name).read_text())
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
@databrowser_app.command(
|
|
473
|
+
name="data-count", help="Count the databrowser search results"
|
|
474
|
+
)
|
|
308
475
|
@exception_handler
|
|
309
476
|
def count_values(
|
|
310
477
|
search_keys: Optional[List[str]] = typer.Argument(
|
|
@@ -387,7 +554,7 @@ def count_values(
|
|
|
387
554
|
False,
|
|
388
555
|
"-V",
|
|
389
556
|
"--version",
|
|
390
|
-
help="Show
|
|
557
|
+
help="Show version an exit",
|
|
391
558
|
callback=version_callback,
|
|
392
559
|
),
|
|
393
560
|
) -> None:
|
|
@@ -438,6 +605,7 @@ def count_values(
|
|
|
438
605
|
multiversion=multiversion,
|
|
439
606
|
fail_on_error=False,
|
|
440
607
|
uniq_key="file",
|
|
608
|
+
stream_zarr=False,
|
|
441
609
|
**search_kws,
|
|
442
610
|
)
|
|
443
611
|
)
|
freva_client/query.py
CHANGED
|
@@ -4,12 +4,27 @@ import sys
|
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
from fnmatch import fnmatch
|
|
6
6
|
from functools import cached_property
|
|
7
|
-
from
|
|
8
|
-
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from tempfile import NamedTemporaryFile
|
|
9
|
+
from typing import (
|
|
10
|
+
Any,
|
|
11
|
+
Dict,
|
|
12
|
+
Iterator,
|
|
13
|
+
List,
|
|
14
|
+
Literal,
|
|
15
|
+
Optional,
|
|
16
|
+
Tuple,
|
|
17
|
+
Union,
|
|
18
|
+
cast,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
import intake
|
|
22
|
+
import intake_esm
|
|
9
23
|
import requests
|
|
10
24
|
import yaml
|
|
11
25
|
from rich import print as pprint
|
|
12
26
|
|
|
27
|
+
from .auth import Auth
|
|
13
28
|
from .utils import logger
|
|
14
29
|
from .utils.databrowser_utils import Config
|
|
15
30
|
|
|
@@ -67,6 +82,9 @@ class databrowser:
|
|
|
67
82
|
url where the freva web site can be found. Such as www.freva.dkrz.de.
|
|
68
83
|
By default no host name is given and the host name will be taken from
|
|
69
84
|
the freva config file.
|
|
85
|
+
stream_zarr: bool, default: False
|
|
86
|
+
Create a zarr stream for all search results. When set to true the
|
|
87
|
+
files are served in zarr format and can be opened from anywhere.
|
|
70
88
|
multiversion: bool, default: False
|
|
71
89
|
Select all versions and not just the latest version (default).
|
|
72
90
|
fail_on_error: bool, default: False
|
|
@@ -98,7 +116,7 @@ class databrowser:
|
|
|
98
116
|
db = databrowser(experiment="cmorph", uniq_key="uri")
|
|
99
117
|
print(db)
|
|
100
118
|
|
|
101
|
-
After having created the search object you can
|
|
119
|
+
After having created the search object you can acquire different kinds of
|
|
102
120
|
information like the number of found objects:
|
|
103
121
|
|
|
104
122
|
.. execute_code::
|
|
@@ -149,24 +167,61 @@ class databrowser:
|
|
|
149
167
|
db = databrowser("reana*", realm="ocean", flavour="cmip6")
|
|
150
168
|
for file in db:
|
|
151
169
|
print(file)
|
|
170
|
+
|
|
171
|
+
If you don't have direct access to the data, for example because you are
|
|
172
|
+
not directly logged in to the computer where the data is stored you can
|
|
173
|
+
set ``stream_zarr=True``. The data will then be
|
|
174
|
+
provisioned in zarr format and can be opened from anywhere. But bear in
|
|
175
|
+
mind that zarr streams if not accessed in time will expire. Since the
|
|
176
|
+
data can be accessed from anywhere you will also have to authenticate
|
|
177
|
+
before you are able to access the data. Refer also to the
|
|
178
|
+
:py:meth:`freva_client.authenticate` method.
|
|
179
|
+
|
|
180
|
+
.. execute_code::
|
|
181
|
+
|
|
182
|
+
from freva_client import authenticate, databrowser
|
|
183
|
+
token_info = authenticate(username="janedoe")
|
|
184
|
+
db = databrowser(dataset="cmip6-fs", stream_zarr=True)
|
|
185
|
+
zarr_files = list(db)
|
|
186
|
+
print(zarr_files)
|
|
187
|
+
|
|
188
|
+
After you have created the paths to the zarr files you can open them
|
|
189
|
+
|
|
190
|
+
::
|
|
191
|
+
|
|
192
|
+
import xarray as xr
|
|
193
|
+
dset = xr.open_dataset(
|
|
194
|
+
zarr_files[0],
|
|
195
|
+
chunks="auto",
|
|
196
|
+
engine="zarr",
|
|
197
|
+
storage_options={"header":
|
|
198
|
+
{"Authorization": f"Bearer {token_info['access_token']}"}
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
152
203
|
"""
|
|
153
204
|
|
|
154
205
|
def __init__(
|
|
155
206
|
self,
|
|
156
207
|
*facets: str,
|
|
157
208
|
uniq_key: Literal["file", "uri"] = "file",
|
|
158
|
-
flavour: Literal[
|
|
209
|
+
flavour: Literal[
|
|
210
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
211
|
+
] = "freva",
|
|
159
212
|
time: Optional[str] = None,
|
|
160
213
|
host: Optional[str] = None,
|
|
161
214
|
time_select: Literal["flexible", "strict", "file"] = "flexible",
|
|
215
|
+
stream_zarr: bool = False,
|
|
162
216
|
multiversion: bool = False,
|
|
163
217
|
fail_on_error: bool = False,
|
|
164
218
|
**search_keys: Union[str, List[str]],
|
|
165
219
|
) -> None:
|
|
166
|
-
|
|
220
|
+
self._auth = Auth()
|
|
167
221
|
self._fail_on_error = fail_on_error
|
|
168
222
|
self._cfg = Config(host, uniq_key=uniq_key, flavour=flavour)
|
|
169
223
|
self._flavour = flavour
|
|
224
|
+
self._stream_zarr = stream_zarr
|
|
170
225
|
facet_search: Dict[str, List[str]] = defaultdict(list)
|
|
171
226
|
for key, value in search_keys.items():
|
|
172
227
|
if isinstance(value, str):
|
|
@@ -188,7 +243,8 @@ class databrowser:
|
|
|
188
243
|
self, facets: Tuple[str, ...], search_kw: Dict[str, List[str]]
|
|
189
244
|
) -> None:
|
|
190
245
|
metadata = {
|
|
191
|
-
k: v[::2]
|
|
246
|
+
k: v[::2]
|
|
247
|
+
for (k, v) in self._facet_search(extended_search=True).items()
|
|
192
248
|
}
|
|
193
249
|
primary_key = list(metadata.keys() or ["project"])[0]
|
|
194
250
|
num_facets = 0
|
|
@@ -201,19 +257,29 @@ class databrowser:
|
|
|
201
257
|
|
|
202
258
|
if facets and num_facets == 0:
|
|
203
259
|
# TODO: This isn't pretty, but if a user requested a search
|
|
204
|
-
# string doesn't exist than we have to somehow make the search
|
|
260
|
+
# string that doesn't exist than we have to somehow make the search
|
|
205
261
|
# return nothing.
|
|
206
262
|
search_kw = {primary_key: ["NotAvailable"]}
|
|
207
263
|
self._params.update(search_kw)
|
|
208
264
|
|
|
209
265
|
def __iter__(self) -> Iterator[str]:
|
|
210
|
-
|
|
266
|
+
query_url = self._cfg.search_url
|
|
267
|
+
headers = {}
|
|
268
|
+
if self._stream_zarr:
|
|
269
|
+
query_url = self._cfg.zarr_loader_url
|
|
270
|
+
token = self._auth.check_authentication(
|
|
271
|
+
auth_url=self._cfg.auth_url
|
|
272
|
+
)
|
|
273
|
+
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
|
274
|
+
result = self._get(query_url, headers=headers, stream=True)
|
|
211
275
|
if result is not None:
|
|
212
276
|
try:
|
|
213
277
|
for res in result.iter_lines():
|
|
214
278
|
yield res.decode("utf-8")
|
|
215
279
|
except KeyboardInterrupt:
|
|
216
|
-
pprint(
|
|
280
|
+
pprint(
|
|
281
|
+
"[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr
|
|
282
|
+
)
|
|
217
283
|
|
|
218
284
|
def __repr__(self) -> str:
|
|
219
285
|
params = ", ".join(
|
|
@@ -240,7 +306,9 @@ class databrowser:
|
|
|
240
306
|
|
|
241
307
|
# Create a table-like structure for available flavors and search facets
|
|
242
308
|
style = 'style="text-align: left"'
|
|
243
|
-
facet_heading =
|
|
309
|
+
facet_heading = (
|
|
310
|
+
f"Available search facets for <em>{self._flavour}</em> flavour"
|
|
311
|
+
)
|
|
244
312
|
html_repr = (
|
|
245
313
|
"<table>"
|
|
246
314
|
f"<tr><th colspan='2' {style}>{self.__class__.__name__}"
|
|
@@ -274,11 +342,71 @@ class databrowser:
|
|
|
274
342
|
return cast(int, result.json().get("total_count", 0))
|
|
275
343
|
return 0
|
|
276
344
|
|
|
345
|
+
def _create_intake_catalogue_file(self, filename: str) -> None:
|
|
346
|
+
"""Create an intake catalogue file."""
|
|
347
|
+
kwargs: Dict[str, Any] = {"stream": True}
|
|
348
|
+
url = self._cfg.intake_url
|
|
349
|
+
if self._stream_zarr:
|
|
350
|
+
token = self._auth.check_authentication(
|
|
351
|
+
auth_url=self._cfg.auth_url
|
|
352
|
+
)
|
|
353
|
+
url = self._cfg.zarr_loader_url
|
|
354
|
+
kwargs["headers"] = {
|
|
355
|
+
"Authorization": f"Bearer {token['access_token']}"
|
|
356
|
+
}
|
|
357
|
+
kwargs["params"] = {"catalogue-type": "intake"}
|
|
358
|
+
result = self._get(url, **kwargs)
|
|
359
|
+
if result is None:
|
|
360
|
+
raise ValueError("No results found")
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
Path(filename).parent.mkdir(exist_ok=True, parents=True)
|
|
364
|
+
with open(filename, "bw") as stream:
|
|
365
|
+
for content in result.iter_content(decode_unicode=False):
|
|
366
|
+
stream.write(content)
|
|
367
|
+
except Exception as error:
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"Couldn't write catalogue content: {error}"
|
|
370
|
+
) from None
|
|
371
|
+
|
|
372
|
+
def intake_catalogue(self) -> intake_esm.core.esm_datastore:
|
|
373
|
+
"""Create an intake esm catalogue object from the search.
|
|
374
|
+
|
|
375
|
+
This method creates a intake-esm catalogue from the current object
|
|
376
|
+
search. Instead of having the original files as target objects you can
|
|
377
|
+
also choose to stream the files via zarr.
|
|
378
|
+
|
|
379
|
+
Returns
|
|
380
|
+
~~~~~~~
|
|
381
|
+
intake_esm.core.esm_datastore: intake-esm catalogue.
|
|
382
|
+
|
|
383
|
+
Raises
|
|
384
|
+
~~~~~~
|
|
385
|
+
ValueError: If user is not authenticated or catalogue creation failed.
|
|
386
|
+
|
|
387
|
+
Example
|
|
388
|
+
~~~~~~~
|
|
389
|
+
Let's create an intake-esm catalogue that points points allows for
|
|
390
|
+
streaming the target data as zarr:
|
|
391
|
+
|
|
392
|
+
.. execute_code::
|
|
393
|
+
|
|
394
|
+
from freva_client import databrowser
|
|
395
|
+
db = databrowser(dataset="cmip6-fs", stream_zarr=True)
|
|
396
|
+
cat = db.intake_catalogue()
|
|
397
|
+
print(cat.df)
|
|
398
|
+
"""
|
|
399
|
+
with NamedTemporaryFile(suffix=".json") as temp_f:
|
|
400
|
+
self._create_intake_catalogue_file(temp_f.name)
|
|
401
|
+
return intake.open_esm_datastore(temp_f.name)
|
|
402
|
+
|
|
277
403
|
@classmethod
|
|
278
404
|
def count_values(
|
|
279
405
|
cls,
|
|
280
406
|
*facets: str,
|
|
281
|
-
flavour: Literal[
|
|
407
|
+
flavour: Literal[
|
|
408
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
409
|
+
] = "freva",
|
|
282
410
|
time: Optional[str] = None,
|
|
283
411
|
host: Optional[str] = None,
|
|
284
412
|
time_select: Literal["flexible", "strict", "file"] = "flexible",
|
|
@@ -328,7 +456,7 @@ class databrowser:
|
|
|
328
456
|
fail_on_error: bool, default: False
|
|
329
457
|
Make the call fail if the connection to the databrowser could not
|
|
330
458
|
**search_keys: str
|
|
331
|
-
The search
|
|
459
|
+
The search constraints to be applied in the data search. If not given
|
|
332
460
|
the whole dataset will be queried.
|
|
333
461
|
|
|
334
462
|
Returns
|
|
@@ -370,12 +498,15 @@ class databrowser:
|
|
|
370
498
|
multiversion=multiversion,
|
|
371
499
|
fail_on_error=fail_on_error,
|
|
372
500
|
uniq_key="file",
|
|
501
|
+
stream_zarr=False,
|
|
373
502
|
**search_keys,
|
|
374
503
|
)
|
|
375
504
|
result = this._facet_search(extended_search=extended_search)
|
|
376
505
|
counts = {}
|
|
377
506
|
for facet, value_counts in result.items():
|
|
378
|
-
counts[facet] = dict(
|
|
507
|
+
counts[facet] = dict(
|
|
508
|
+
zip(value_counts[::2], map(int, value_counts[1::2]))
|
|
509
|
+
)
|
|
379
510
|
return counts
|
|
380
511
|
|
|
381
512
|
@cached_property
|
|
@@ -384,7 +515,7 @@ class databrowser:
|
|
|
384
515
|
|
|
385
516
|
You can retrieve all information that is associated with your current
|
|
386
517
|
databrowser search. This can be useful for reverse searches for example
|
|
387
|
-
for retrieving metadata of object
|
|
518
|
+
for retrieving metadata of object stores or file/directory names.
|
|
388
519
|
|
|
389
520
|
Example
|
|
390
521
|
~~~~~~~
|
|
@@ -400,14 +531,17 @@ class databrowser:
|
|
|
400
531
|
|
|
401
532
|
"""
|
|
402
533
|
return {
|
|
403
|
-
k: v[::2]
|
|
534
|
+
k: v[::2]
|
|
535
|
+
for (k, v) in self._facet_search(extended_search=True).items()
|
|
404
536
|
}
|
|
405
537
|
|
|
406
538
|
@classmethod
|
|
407
539
|
def metadata_search(
|
|
408
540
|
cls,
|
|
409
541
|
*facets: str,
|
|
410
|
-
flavour: Literal[
|
|
542
|
+
flavour: Literal[
|
|
543
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
544
|
+
] = "freva",
|
|
411
545
|
time: Optional[str] = None,
|
|
412
546
|
host: Optional[str] = None,
|
|
413
547
|
time_select: Literal["flexible", "strict", "file"] = "flexible",
|
|
@@ -432,7 +566,7 @@ class databrowser:
|
|
|
432
566
|
flavour: str, default: freva
|
|
433
567
|
The Data Reference Syntax (DRS) standard specifying the type of climate
|
|
434
568
|
datasets to query.
|
|
435
|
-
time: str,
|
|
569
|
+
time: str, default: ""
|
|
436
570
|
Special search facet to refine/subset search results by time.
|
|
437
571
|
This can be a string representation of a time range or a single
|
|
438
572
|
timestamp. The timestamp has to follow ISO-8601. Valid strings are
|
|
@@ -525,11 +659,14 @@ class databrowser:
|
|
|
525
659
|
multiversion=multiversion,
|
|
526
660
|
fail_on_error=fail_on_error,
|
|
527
661
|
uniq_key="file",
|
|
662
|
+
stream_zarr=False,
|
|
528
663
|
**search_keys,
|
|
529
664
|
)
|
|
530
665
|
return {
|
|
531
666
|
k: v[::2]
|
|
532
|
-
for (k, v) in this._facet_search(
|
|
667
|
+
for (k, v) in this._facet_search(
|
|
668
|
+
extended_search=extended_search
|
|
669
|
+
).items()
|
|
533
670
|
}
|
|
534
671
|
|
|
535
672
|
@classmethod
|
|
@@ -591,16 +728,22 @@ class databrowser:
|
|
|
591
728
|
return {}
|
|
592
729
|
data = result.json()
|
|
593
730
|
if extended_search:
|
|
594
|
-
|
|
731
|
+
constraints = data["facets"].keys()
|
|
595
732
|
else:
|
|
596
|
-
|
|
597
|
-
return {f: v for f, v in data["facets"].items() if f in
|
|
733
|
+
constraints = data["primary_facets"]
|
|
734
|
+
return {f: v for f, v in data["facets"].items() if f in constraints}
|
|
598
735
|
|
|
599
|
-
def _get(
|
|
736
|
+
def _get(
|
|
737
|
+
self, url: str, **kwargs: Any
|
|
738
|
+
) -> Optional[requests.models.Response]:
|
|
600
739
|
"""Apply the get method to the databrowser."""
|
|
601
740
|
logger.debug("Searching %s with parameters: %s", url, self._params)
|
|
741
|
+
params = kwargs.pop("params", {})
|
|
742
|
+
kwargs.setdefault("timeout", 30)
|
|
602
743
|
try:
|
|
603
|
-
res = requests.get(
|
|
744
|
+
res = requests.get(
|
|
745
|
+
url, params={**self._params, **params}, **kwargs
|
|
746
|
+
)
|
|
604
747
|
res.raise_for_status()
|
|
605
748
|
return res
|
|
606
749
|
except KeyboardInterrupt:
|
freva_client/utils/__init__.py
CHANGED
|
@@ -17,7 +17,7 @@ def exception_handler(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
|
17
17
|
|
|
18
18
|
@wraps(func)
|
|
19
19
|
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
20
|
-
"""Wrapper function that handles the
|
|
20
|
+
"""Wrapper function that handles the exception."""
|
|
21
21
|
try:
|
|
22
22
|
return func(*args, **kwargs)
|
|
23
23
|
except KeyboardInterrupt:
|