freva-client 2410.0.1__tar.gz → 2411.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of freva-client might be problematic. Click here for more details.
- {freva_client-2410.0.1 → freva_client-2411.0.0}/PKG-INFO +2 -2
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/__init__.py +1 -1
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/auth_cli.py +1 -3
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/databrowser_cli.py +121 -32
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/query.py +184 -51
- freva_client-2411.0.0/src/freva_client/utils/databrowser_utils.py +368 -0
- freva_client-2410.0.1/src/freva_client/utils/databrowser_utils.py +0 -179
- {freva_client-2410.0.1 → freva_client-2411.0.0}/MANIFEST.in +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/README.md +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/assets/share/freva/freva.toml +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/pyproject.toml +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/__main__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/auth.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/__init__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/cli_app.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/cli_parser.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/cli/cli_utils.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/py.typed +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/utils/__init__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2411.0.0}/src/freva_client/utils/logger.py +0 -0
|
@@ -49,9 +49,7 @@ def authenticate_cli(
|
|
|
49
49
|
"-f",
|
|
50
50
|
help="Force token recreation, even if current token is still valid.",
|
|
51
51
|
),
|
|
52
|
-
verbose: int = typer.Option(
|
|
53
|
-
0, "-v", help="Increase verbosity", count=True
|
|
54
|
-
),
|
|
52
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
55
53
|
version: Optional[bool] = typer.Option(
|
|
56
54
|
False,
|
|
57
55
|
"-V",
|
|
@@ -10,6 +10,7 @@ from tempfile import NamedTemporaryFile
|
|
|
10
10
|
from typing import Dict, List, Literal, Optional, Union, cast
|
|
11
11
|
|
|
12
12
|
import typer
|
|
13
|
+
import xarray as xr
|
|
13
14
|
from freva_client import databrowser
|
|
14
15
|
from freva_client.auth import Auth
|
|
15
16
|
from freva_client.utils import exception_handler, logger
|
|
@@ -42,6 +43,7 @@ class Flavours(str, Enum):
|
|
|
42
43
|
cmip5: str = "cmip5"
|
|
43
44
|
cordex: str = "cordex"
|
|
44
45
|
nextgems: str = "nextgems"
|
|
46
|
+
user: str = "user"
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class TimeSelect(str, Enum):
|
|
@@ -163,9 +165,7 @@ def metadata_search(
|
|
|
163
165
|
parse_json: bool = typer.Option(
|
|
164
166
|
False, "-j", "--json", help="Parse output in json format."
|
|
165
167
|
),
|
|
166
|
-
verbose: int = typer.Option(
|
|
167
|
-
0, "-v", help="Increase verbosity", count=True
|
|
168
|
-
),
|
|
168
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
169
169
|
version: Optional[bool] = typer.Option(
|
|
170
170
|
False,
|
|
171
171
|
"-V",
|
|
@@ -187,11 +187,9 @@ def metadata_search(
|
|
|
187
187
|
result = databrowser.metadata_search(
|
|
188
188
|
*(facets or []),
|
|
189
189
|
time=time or "",
|
|
190
|
-
time_select=cast(
|
|
191
|
-
Literal["file", "flexible", "strict"], time_select.value
|
|
192
|
-
),
|
|
190
|
+
time_select=cast(Literal["file", "flexible", "strict"], time_select.value),
|
|
193
191
|
flavour=cast(
|
|
194
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
192
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
195
193
|
flavour.value,
|
|
196
194
|
),
|
|
197
195
|
host=host,
|
|
@@ -253,9 +251,7 @@ def data_search(
|
|
|
253
251
|
"--time-select",
|
|
254
252
|
help=TimeSelect.get_help(),
|
|
255
253
|
),
|
|
256
|
-
zarr: bool = typer.Option(
|
|
257
|
-
False, "--zarr", help="Create zarr stream files."
|
|
258
|
-
),
|
|
254
|
+
zarr: bool = typer.Option(False, "--zarr", help="Create zarr stream files."),
|
|
259
255
|
access_token: Optional[str] = typer.Option(
|
|
260
256
|
None,
|
|
261
257
|
"--access-token",
|
|
@@ -289,9 +285,7 @@ def data_search(
|
|
|
289
285
|
"the hostname is read from a config file"
|
|
290
286
|
),
|
|
291
287
|
),
|
|
292
|
-
verbose: int = typer.Option(
|
|
293
|
-
0, "-v", help="Increase verbosity", count=True
|
|
294
|
-
),
|
|
288
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
295
289
|
multiversion: bool = typer.Option(
|
|
296
290
|
False,
|
|
297
291
|
"--multi-version",
|
|
@@ -318,7 +312,7 @@ def data_search(
|
|
|
318
312
|
time=time or "",
|
|
319
313
|
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
320
314
|
flavour=cast(
|
|
321
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
315
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
322
316
|
flavour.value,
|
|
323
317
|
),
|
|
324
318
|
uniq_key=cast(Literal["uri", "file"], uniq_key.value),
|
|
@@ -425,9 +419,7 @@ def intake_catalogue(
|
|
|
425
419
|
"the hostname is read from a config file"
|
|
426
420
|
),
|
|
427
421
|
),
|
|
428
|
-
verbose: int = typer.Option(
|
|
429
|
-
0, "-v", help="Increase verbosity", count=True
|
|
430
|
-
),
|
|
422
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
431
423
|
multiversion: bool = typer.Option(
|
|
432
424
|
False,
|
|
433
425
|
"--multi-version",
|
|
@@ -451,7 +443,7 @@ def intake_catalogue(
|
|
|
451
443
|
time=time or "",
|
|
452
444
|
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
453
445
|
flavour=cast(
|
|
454
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
446
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
455
447
|
flavour.value,
|
|
456
448
|
),
|
|
457
449
|
uniq_key=cast(Literal["uri", "file"], uniq_key.value),
|
|
@@ -469,9 +461,7 @@ def intake_catalogue(
|
|
|
469
461
|
print(Path(temp_f.name).read_text())
|
|
470
462
|
|
|
471
463
|
|
|
472
|
-
@databrowser_app.command(
|
|
473
|
-
name="data-count", help="Count the databrowser search results"
|
|
474
|
-
)
|
|
464
|
+
@databrowser_app.command(name="data-count", help="Count the databrowser search results")
|
|
475
465
|
@exception_handler
|
|
476
466
|
def count_values(
|
|
477
467
|
search_keys: Optional[List[str]] = typer.Argument(
|
|
@@ -547,9 +537,7 @@ def count_values(
|
|
|
547
537
|
parse_json: bool = typer.Option(
|
|
548
538
|
False, "-j", "--json", help="Parse output in json format."
|
|
549
539
|
),
|
|
550
|
-
verbose: int = typer.Option(
|
|
551
|
-
0, "-v", help="Increase verbosity", count=True
|
|
552
|
-
),
|
|
540
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
553
541
|
version: Optional[bool] = typer.Option(
|
|
554
542
|
False,
|
|
555
543
|
"-V",
|
|
@@ -576,11 +564,9 @@ def count_values(
|
|
|
576
564
|
result = databrowser.count_values(
|
|
577
565
|
*facets,
|
|
578
566
|
time=time or "",
|
|
579
|
-
time_select=cast(
|
|
580
|
-
Literal["file", "flexible", "strict"], time_select
|
|
581
|
-
),
|
|
567
|
+
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
582
568
|
flavour=cast(
|
|
583
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
569
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
584
570
|
flavour.value,
|
|
585
571
|
),
|
|
586
572
|
host=host,
|
|
@@ -594,11 +580,9 @@ def count_values(
|
|
|
594
580
|
databrowser(
|
|
595
581
|
*facets,
|
|
596
582
|
time=time or "",
|
|
597
|
-
time_select=cast(
|
|
598
|
-
Literal["file", "flexible", "strict"], time_select
|
|
599
|
-
),
|
|
583
|
+
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
600
584
|
flavour=cast(
|
|
601
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
585
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
602
586
|
flavour.value,
|
|
603
587
|
),
|
|
604
588
|
host=host,
|
|
@@ -620,3 +604,108 @@ def count_values(
|
|
|
620
604
|
print(f"{key}: {', '.join(counts)}")
|
|
621
605
|
else:
|
|
622
606
|
print(result)
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
user_data_app = typer.Typer(help="Add or delete user data.")
|
|
610
|
+
databrowser_app.add_typer(user_data_app, name="user-data")
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
@user_data_app.command(name="add", help="Add user data into the databrowser.")
|
|
614
|
+
@exception_handler
|
|
615
|
+
def user_data_add(
|
|
616
|
+
paths: List[str] = typer.Option(
|
|
617
|
+
...,
|
|
618
|
+
"--path",
|
|
619
|
+
"-p",
|
|
620
|
+
help="Paths to the user's data to be added.",
|
|
621
|
+
),
|
|
622
|
+
facets: Optional[List[str]] = typer.Option(
|
|
623
|
+
None,
|
|
624
|
+
"--facet",
|
|
625
|
+
help="Key-value metadata pairs to categorize the user"
|
|
626
|
+
"input data in the format key=value.",
|
|
627
|
+
),
|
|
628
|
+
host: Optional[str] = typer.Option(
|
|
629
|
+
None,
|
|
630
|
+
"--host",
|
|
631
|
+
help=(
|
|
632
|
+
"Set the hostname of the databrowser. If not set (default), "
|
|
633
|
+
"the hostname is read from a config file."
|
|
634
|
+
),
|
|
635
|
+
),
|
|
636
|
+
access_token: Optional[str] = typer.Option(
|
|
637
|
+
None,
|
|
638
|
+
"--access-token",
|
|
639
|
+
help="Access token for authentication when adding user data.",
|
|
640
|
+
),
|
|
641
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
642
|
+
) -> None:
|
|
643
|
+
"""Add user data into the databrowser."""
|
|
644
|
+
logger.set_verbosity(verbose)
|
|
645
|
+
logger.debug("Checking if the user has the right to add data")
|
|
646
|
+
result = databrowser(host=host)
|
|
647
|
+
_auth(result._cfg.auth_url, access_token)
|
|
648
|
+
|
|
649
|
+
facet_dict = {}
|
|
650
|
+
if facets:
|
|
651
|
+
for facet in facets:
|
|
652
|
+
if "=" not in facet:
|
|
653
|
+
logger.error(
|
|
654
|
+
f"Invalid facet format: {facet}. Expected format: key=value."
|
|
655
|
+
)
|
|
656
|
+
raise typer.Exit(code=1)
|
|
657
|
+
key, value = facet.split("=", 1)
|
|
658
|
+
facet_dict[key] = value
|
|
659
|
+
|
|
660
|
+
logger.debug(f"Adding user data with paths {paths} and facets {facet_dict}")
|
|
661
|
+
databrowser.userdata(
|
|
662
|
+
action="add",
|
|
663
|
+
userdata_items=cast(List[Union[str, xr.Dataset]], paths),
|
|
664
|
+
metadata=facet_dict,
|
|
665
|
+
host=host
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
@user_data_app.command(name="delete", help="Delete user data from the databrowser.")
|
|
670
|
+
@exception_handler
|
|
671
|
+
def user_data_delete(
|
|
672
|
+
search_keys: List[str] = typer.Option(
|
|
673
|
+
None,
|
|
674
|
+
"--search-key",
|
|
675
|
+
"-s",
|
|
676
|
+
help="Key-value metadata pairs to search and identify user data "
|
|
677
|
+
"for deletion in the format key=value.",
|
|
678
|
+
),
|
|
679
|
+
host: Optional[str] = typer.Option(
|
|
680
|
+
None,
|
|
681
|
+
"--host",
|
|
682
|
+
help=(
|
|
683
|
+
"Set the hostname of the databrowser. If not set (default), "
|
|
684
|
+
"the hostname is read from a config file."
|
|
685
|
+
),
|
|
686
|
+
),
|
|
687
|
+
access_token: Optional[str] = typer.Option(
|
|
688
|
+
None,
|
|
689
|
+
"--access-token",
|
|
690
|
+
help="Access token for authentication when deleting user data.",
|
|
691
|
+
),
|
|
692
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
693
|
+
) -> None:
|
|
694
|
+
"""Delete user data from the databrowser."""
|
|
695
|
+
logger.set_verbosity(verbose)
|
|
696
|
+
logger.debug("Checking if the user has the right to delete data")
|
|
697
|
+
result = databrowser(host=host)
|
|
698
|
+
_auth(result._cfg.auth_url, access_token)
|
|
699
|
+
|
|
700
|
+
search_key_dict = {}
|
|
701
|
+
if search_keys:
|
|
702
|
+
for search_key in search_keys:
|
|
703
|
+
if "=" not in search_key:
|
|
704
|
+
logger.error(
|
|
705
|
+
f"Invalid search key format: {search_key}. "
|
|
706
|
+
"Expected format: key=value."
|
|
707
|
+
)
|
|
708
|
+
raise typer.Exit(code=1)
|
|
709
|
+
key, value = search_key.split("=", 1)
|
|
710
|
+
search_key_dict[key] = value
|
|
711
|
+
databrowser.userdata(action="delete", metadata=search_key_dict, host=host)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Query climate data sets by using-key value pair search queries."""
|
|
2
2
|
|
|
3
|
+
|
|
3
4
|
import sys
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from fnmatch import fnmatch
|
|
@@ -8,6 +9,7 @@ from pathlib import Path
|
|
|
8
9
|
from tempfile import NamedTemporaryFile
|
|
9
10
|
from typing import (
|
|
10
11
|
Any,
|
|
12
|
+
Collection,
|
|
11
13
|
Dict,
|
|
12
14
|
Iterator,
|
|
13
15
|
List,
|
|
@@ -21,12 +23,13 @@ from typing import (
|
|
|
21
23
|
import intake
|
|
22
24
|
import intake_esm
|
|
23
25
|
import requests
|
|
26
|
+
import xarray as xr
|
|
24
27
|
import yaml
|
|
25
28
|
from rich import print as pprint
|
|
26
29
|
|
|
27
30
|
from .auth import Auth
|
|
28
31
|
from .utils import logger
|
|
29
|
-
from .utils.databrowser_utils import Config
|
|
32
|
+
from .utils.databrowser_utils import Config, UserDataHandler
|
|
30
33
|
|
|
31
34
|
__all__ = ["databrowser"]
|
|
32
35
|
|
|
@@ -207,7 +210,7 @@ class databrowser:
|
|
|
207
210
|
*facets: str,
|
|
208
211
|
uniq_key: Literal["file", "uri"] = "file",
|
|
209
212
|
flavour: Literal[
|
|
210
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
213
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
211
214
|
] = "freva",
|
|
212
215
|
time: Optional[str] = None,
|
|
213
216
|
host: Optional[str] = None,
|
|
@@ -243,8 +246,7 @@ class databrowser:
|
|
|
243
246
|
self, facets: Tuple[str, ...], search_kw: Dict[str, List[str]]
|
|
244
247
|
) -> None:
|
|
245
248
|
metadata = {
|
|
246
|
-
k: v[::2]
|
|
247
|
-
for (k, v) in self._facet_search(extended_search=True).items()
|
|
249
|
+
k: v[::2] for (k, v) in self._facet_search(extended_search=True).items()
|
|
248
250
|
}
|
|
249
251
|
primary_key = list(metadata.keys() or ["project"])[0]
|
|
250
252
|
num_facets = 0
|
|
@@ -267,19 +269,15 @@ class databrowser:
|
|
|
267
269
|
headers = {}
|
|
268
270
|
if self._stream_zarr:
|
|
269
271
|
query_url = self._cfg.zarr_loader_url
|
|
270
|
-
token = self._auth.check_authentication(
|
|
271
|
-
auth_url=self._cfg.auth_url
|
|
272
|
-
)
|
|
272
|
+
token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
|
|
273
273
|
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
|
274
|
-
result = self.
|
|
274
|
+
result = self._request("GET", query_url, headers=headers, stream=True)
|
|
275
275
|
if result is not None:
|
|
276
276
|
try:
|
|
277
277
|
for res in result.iter_lines():
|
|
278
278
|
yield res.decode("utf-8")
|
|
279
279
|
except KeyboardInterrupt:
|
|
280
|
-
pprint(
|
|
281
|
-
"[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr
|
|
282
|
-
)
|
|
280
|
+
pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
|
|
283
281
|
|
|
284
282
|
def __repr__(self) -> str:
|
|
285
283
|
params = ", ".join(
|
|
@@ -306,9 +304,7 @@ class databrowser:
|
|
|
306
304
|
|
|
307
305
|
# Create a table-like structure for available flavors and search facets
|
|
308
306
|
style = 'style="text-align: left"'
|
|
309
|
-
facet_heading =
|
|
310
|
-
f"Available search facets for <em>{self._flavour}</em> flavour"
|
|
311
|
-
)
|
|
307
|
+
facet_heading = f"Available search facets for <em>{self._flavour}</em> flavour"
|
|
312
308
|
html_repr = (
|
|
313
309
|
"<table>"
|
|
314
310
|
f"<tr><th colspan='2' {style}>{self.__class__.__name__}"
|
|
@@ -337,7 +333,7 @@ class databrowser:
|
|
|
337
333
|
|
|
338
334
|
|
|
339
335
|
"""
|
|
340
|
-
result = self.
|
|
336
|
+
result = self._request("GET", self._cfg.metadata_url)
|
|
341
337
|
if result:
|
|
342
338
|
return cast(int, result.json().get("total_count", 0))
|
|
343
339
|
return 0
|
|
@@ -347,15 +343,11 @@ class databrowser:
|
|
|
347
343
|
kwargs: Dict[str, Any] = {"stream": True}
|
|
348
344
|
url = self._cfg.intake_url
|
|
349
345
|
if self._stream_zarr:
|
|
350
|
-
token = self._auth.check_authentication(
|
|
351
|
-
auth_url=self._cfg.auth_url
|
|
352
|
-
)
|
|
346
|
+
token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
|
|
353
347
|
url = self._cfg.zarr_loader_url
|
|
354
|
-
kwargs["headers"] = {
|
|
355
|
-
"Authorization": f"Bearer {token['access_token']}"
|
|
356
|
-
}
|
|
348
|
+
kwargs["headers"] = {"Authorization": f"Bearer {token['access_token']}"}
|
|
357
349
|
kwargs["params"] = {"catalogue-type": "intake"}
|
|
358
|
-
result = self.
|
|
350
|
+
result = self._request("GET", url, **kwargs)
|
|
359
351
|
if result is None:
|
|
360
352
|
raise ValueError("No results found")
|
|
361
353
|
|
|
@@ -365,9 +357,7 @@ class databrowser:
|
|
|
365
357
|
for content in result.iter_content(decode_unicode=False):
|
|
366
358
|
stream.write(content)
|
|
367
359
|
except Exception as error:
|
|
368
|
-
raise ValueError(
|
|
369
|
-
f"Couldn't write catalogue content: {error}"
|
|
370
|
-
) from None
|
|
360
|
+
raise ValueError(f"Couldn't write catalogue content: {error}") from None
|
|
371
361
|
|
|
372
362
|
def intake_catalogue(self) -> intake_esm.core.esm_datastore:
|
|
373
363
|
"""Create an intake esm catalogue object from the search.
|
|
@@ -392,7 +382,7 @@ class databrowser:
|
|
|
392
382
|
.. execute_code::
|
|
393
383
|
|
|
394
384
|
from freva_client import databrowser
|
|
395
|
-
db = databrowser(dataset="cmip6-
|
|
385
|
+
db = databrowser(dataset="cmip6-hsm", stream_zarr=True)
|
|
396
386
|
cat = db.intake_catalogue()
|
|
397
387
|
print(cat.df)
|
|
398
388
|
"""
|
|
@@ -405,7 +395,7 @@ class databrowser:
|
|
|
405
395
|
cls,
|
|
406
396
|
*facets: str,
|
|
407
397
|
flavour: Literal[
|
|
408
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
398
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
409
399
|
] = "freva",
|
|
410
400
|
time: Optional[str] = None,
|
|
411
401
|
host: Optional[str] = None,
|
|
@@ -455,6 +445,7 @@ class databrowser:
|
|
|
455
445
|
Select all versions and not just the latest version (default).
|
|
456
446
|
fail_on_error: bool, default: False
|
|
457
447
|
Make the call fail if the connection to the databrowser could not
|
|
448
|
+
be established.
|
|
458
449
|
**search_keys: str
|
|
459
450
|
The search constraints to be applied in the data search. If not given
|
|
460
451
|
the whole dataset will be queried.
|
|
@@ -504,9 +495,7 @@ class databrowser:
|
|
|
504
495
|
result = this._facet_search(extended_search=extended_search)
|
|
505
496
|
counts = {}
|
|
506
497
|
for facet, value_counts in result.items():
|
|
507
|
-
counts[facet] = dict(
|
|
508
|
-
zip(value_counts[::2], map(int, value_counts[1::2]))
|
|
509
|
-
)
|
|
498
|
+
counts[facet] = dict(zip(value_counts[::2], map(int, value_counts[1::2])))
|
|
510
499
|
return counts
|
|
511
500
|
|
|
512
501
|
@cached_property
|
|
@@ -531,8 +520,7 @@ class databrowser:
|
|
|
531
520
|
|
|
532
521
|
"""
|
|
533
522
|
return {
|
|
534
|
-
k: v[::2]
|
|
535
|
-
for (k, v) in self._facet_search(extended_search=True).items()
|
|
523
|
+
k: v[::2] for (k, v) in self._facet_search(extended_search=True).items()
|
|
536
524
|
}
|
|
537
525
|
|
|
538
526
|
@classmethod
|
|
@@ -540,7 +528,7 @@ class databrowser:
|
|
|
540
528
|
cls,
|
|
541
529
|
*facets: str,
|
|
542
530
|
flavour: Literal[
|
|
543
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
531
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
544
532
|
] = "freva",
|
|
545
533
|
time: Optional[str] = None,
|
|
546
534
|
host: Optional[str] = None,
|
|
@@ -593,6 +581,7 @@ class databrowser:
|
|
|
593
581
|
name will be taken from the freva config file.
|
|
594
582
|
fail_on_error: bool, default: False
|
|
595
583
|
Make the call fail if the connection to the databrowser could not
|
|
584
|
+
be established.
|
|
596
585
|
**search_keys: str, list[str]
|
|
597
586
|
The facets to be applied in the data search. If not given
|
|
598
587
|
the whole dataset will be queried.
|
|
@@ -664,9 +653,7 @@ class databrowser:
|
|
|
664
653
|
)
|
|
665
654
|
return {
|
|
666
655
|
k: v[::2]
|
|
667
|
-
for (k, v) in this._facet_search(
|
|
668
|
-
extended_search=extended_search
|
|
669
|
-
).items()
|
|
656
|
+
for (k, v) in this._facet_search(extended_search=extended_search).items()
|
|
670
657
|
}
|
|
671
658
|
|
|
672
659
|
@classmethod
|
|
@@ -723,7 +710,7 @@ class databrowser:
|
|
|
723
710
|
self,
|
|
724
711
|
extended_search: bool = False,
|
|
725
712
|
) -> Dict[str, List[str]]:
|
|
726
|
-
result = self.
|
|
713
|
+
result = self._request("GET", self._cfg.metadata_url)
|
|
727
714
|
if result is None:
|
|
728
715
|
return {}
|
|
729
716
|
data = result.json()
|
|
@@ -733,26 +720,172 @@ class databrowser:
|
|
|
733
720
|
constraints = data["primary_facets"]
|
|
734
721
|
return {f: v for f, v in data["facets"].items() if f in constraints}
|
|
735
722
|
|
|
736
|
-
|
|
737
|
-
|
|
723
|
+
@classmethod
|
|
724
|
+
def userdata(
|
|
725
|
+
cls,
|
|
726
|
+
action: Literal["add", "delete"],
|
|
727
|
+
userdata_items: Optional[List[Union[str, xr.Dataset]]] = None,
|
|
728
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
729
|
+
host: Optional[str] = None,
|
|
730
|
+
fail_on_error: bool = False,
|
|
731
|
+
) -> None:
|
|
732
|
+
"""Add or delete user data in the databrowser system.
|
|
733
|
+
|
|
734
|
+
Manage user data in the databrowser system by adding new data or
|
|
735
|
+
deleting existing data.
|
|
736
|
+
|
|
737
|
+
For the "``add``" action, the user can provide data items (file paths
|
|
738
|
+
or xarray datasets) along with metadata (key-value pairs) to
|
|
739
|
+
categorize and organize the data.
|
|
740
|
+
|
|
741
|
+
For the "``delete``" action, the user provides metadata as search
|
|
742
|
+
criteria to identify and remove the existing data from the
|
|
743
|
+
system.
|
|
744
|
+
|
|
745
|
+
Parameters
|
|
746
|
+
~~~~~~~~~~
|
|
747
|
+
action : Literal["add", "delete"]
|
|
748
|
+
The action to perform: "add" to add new data, or "delete"
|
|
749
|
+
to remove existing data.
|
|
750
|
+
userdata_items : List[Union[str, xr.Dataset]], optional
|
|
751
|
+
A list of user file paths or xarray datasets to add to the
|
|
752
|
+
databrowser (required for "add").
|
|
753
|
+
metadata : Dict[str, str], optional
|
|
754
|
+
Key-value metadata pairs to categorize the data (for "add")
|
|
755
|
+
or search and identify data for
|
|
756
|
+
deletion (for "delete").
|
|
757
|
+
host : str, optional
|
|
758
|
+
Override the host name of the databrowser server. This is usually
|
|
759
|
+
the url where the freva web site can be found. Such as
|
|
760
|
+
www.freva.dkrz.de. By default no host name is given and the host
|
|
761
|
+
name will be taken from the freva config file.
|
|
762
|
+
fail_on_error : bool, optional
|
|
763
|
+
Make the call fail if the connection to the databrowser could not
|
|
764
|
+
be established.
|
|
765
|
+
|
|
766
|
+
Raises
|
|
767
|
+
~~~~~~
|
|
768
|
+
ValueError
|
|
769
|
+
If the operation fails or required parameters are missing
|
|
770
|
+
for the specified action.
|
|
771
|
+
FileNotFoundError
|
|
772
|
+
If no user data is provided for the "add" action.
|
|
773
|
+
|
|
774
|
+
Example
|
|
775
|
+
~~~~~~~
|
|
776
|
+
|
|
777
|
+
Adding user data:
|
|
778
|
+
|
|
779
|
+
.. execute_code::
|
|
780
|
+
|
|
781
|
+
from freva_client import authenticate, databrowser
|
|
782
|
+
import xarray as xr
|
|
783
|
+
token_info = authenticate(username="janedoe")
|
|
784
|
+
filenames = (
|
|
785
|
+
"../freva-rest/src/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
|
|
786
|
+
"GERICS/NCC-NorESM1-M/rcp85/r1i1p1/GERICS-REMO2015/v1/3hr/pr/v20181212/*.nc"
|
|
787
|
+
)
|
|
788
|
+
filename1 = (
|
|
789
|
+
"../freva-rest/src/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
|
|
790
|
+
"CLMcom/MPI-M-MPI-ESM-LR/historical/r0i0p0/CLMcom-CCLM4-8-17/v1/fx/orog/v20140515/"
|
|
791
|
+
"orog_EUR-11_MPI-M-MPI-ESM-LR_historical_r1i1p1_CLMcom-CCLM4-8-17_v1_fx.nc"
|
|
792
|
+
)
|
|
793
|
+
xarray_data = xr.open_dataset(filename1)
|
|
794
|
+
databrowser.userdata(
|
|
795
|
+
action="add",
|
|
796
|
+
userdata_items=[xarray_data, filenames],
|
|
797
|
+
metadata={"project": "cmip5", "experiment": "myFavExp"}
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
Deleting user data:
|
|
801
|
+
|
|
802
|
+
.. execute_code::
|
|
803
|
+
|
|
804
|
+
from freva_client import authenticate, databrowser
|
|
805
|
+
token_info = authenticate(username="janedoe")
|
|
806
|
+
databrowser.userdata(
|
|
807
|
+
action="delete",
|
|
808
|
+
metadata={"project": "cmip5", "experiment": "myFavExp"}
|
|
809
|
+
)
|
|
810
|
+
"""
|
|
811
|
+
this = cls(
|
|
812
|
+
host=host,
|
|
813
|
+
fail_on_error=fail_on_error,
|
|
814
|
+
)
|
|
815
|
+
userdata_items = userdata_items or []
|
|
816
|
+
metadata = metadata or {}
|
|
817
|
+
url = f"{this._cfg.userdata_url}"
|
|
818
|
+
token = this._auth.check_authentication(auth_url=this._cfg.auth_url)
|
|
819
|
+
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
|
820
|
+
payload_metadata: dict[str, Collection[Collection[str]]] = {}
|
|
821
|
+
|
|
822
|
+
if action == "add":
|
|
823
|
+
user_data_handler = UserDataHandler(userdata_items)
|
|
824
|
+
if user_data_handler.user_metadata:
|
|
825
|
+
payload_metadata = {
|
|
826
|
+
"user_metadata": user_data_handler.user_metadata,
|
|
827
|
+
"facets": metadata,
|
|
828
|
+
}
|
|
829
|
+
result = this._request(
|
|
830
|
+
"POST", url, data=payload_metadata, headers=headers
|
|
831
|
+
)
|
|
832
|
+
if result is not None:
|
|
833
|
+
response_data = result.json()
|
|
834
|
+
status_message = response_data.get("status")
|
|
835
|
+
else:
|
|
836
|
+
raise ValueError("Failed to add user data")
|
|
837
|
+
pprint(f"[b][green]{status_message}[green][b]")
|
|
838
|
+
else:
|
|
839
|
+
raise ValueError("No metadata generated from the input data.")
|
|
840
|
+
|
|
841
|
+
if action == "delete":
|
|
842
|
+
if userdata_items:
|
|
843
|
+
logger.info(
|
|
844
|
+
"'userdata_items' are not needed for the 'delete'"
|
|
845
|
+
"action and will be ignored."
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
result = this._request("DELETE", url, data=metadata, headers=headers)
|
|
849
|
+
|
|
850
|
+
if result is None:
|
|
851
|
+
raise ValueError("Failed to delete user data")
|
|
852
|
+
pprint("[b][green]User data deleted successfully[green][b]")
|
|
853
|
+
|
|
854
|
+
def _request(
|
|
855
|
+
self,
|
|
856
|
+
method: Literal["GET", "POST", "PUT", "PATCH", "DELETE"],
|
|
857
|
+
url: str,
|
|
858
|
+
data: Optional[Dict[str, Any]] = None,
|
|
859
|
+
**kwargs: Any
|
|
738
860
|
) -> Optional[requests.models.Response]:
|
|
739
|
-
"""
|
|
740
|
-
|
|
861
|
+
"""Request method to handle CRUD operations (GET, POST, PUT, PATCH, DELETE)."""
|
|
862
|
+
method_upper = method.upper()
|
|
863
|
+
timeout = kwargs.pop("timeout", 30)
|
|
741
864
|
params = kwargs.pop("params", {})
|
|
742
|
-
kwargs.
|
|
865
|
+
stream = kwargs.pop("stream", False)
|
|
866
|
+
|
|
867
|
+
logger.debug("%s request to %s with data: %s and parameters: %s",
|
|
868
|
+
method_upper, url, data, {**self._params, **params})
|
|
869
|
+
|
|
743
870
|
try:
|
|
744
|
-
|
|
745
|
-
|
|
871
|
+
req = requests.Request(
|
|
872
|
+
method=method_upper,
|
|
873
|
+
url=url,
|
|
874
|
+
params={**self._params, **params},
|
|
875
|
+
json=None if method_upper in "GET" else data,
|
|
876
|
+
**kwargs
|
|
746
877
|
)
|
|
747
|
-
|
|
748
|
-
|
|
878
|
+
with requests.Session() as session:
|
|
879
|
+
prepared = session.prepare_request(req)
|
|
880
|
+
res = session.send(prepared, timeout=timeout, stream=stream)
|
|
881
|
+
res.raise_for_status()
|
|
882
|
+
return res
|
|
883
|
+
|
|
749
884
|
except KeyboardInterrupt:
|
|
750
885
|
pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
|
|
751
|
-
except (
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
) as error:
|
|
755
|
-
msg = f"Search request failed with {error}"
|
|
886
|
+
except (requests.exceptions.ConnectionError,
|
|
887
|
+
requests.exceptions.HTTPError) as error:
|
|
888
|
+
msg = f"{method_upper} request failed with {error}"
|
|
756
889
|
if self._fail_on_error:
|
|
757
890
|
raise ValueError(msg) from None
|
|
758
891
|
logger.warning(msg)
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""Various utilities for getting the databrowser working."""
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import sysconfig
|
|
7
|
+
from configparser import ConfigParser, ExtendedInterpolation
|
|
8
|
+
from functools import cached_property
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import (
|
|
11
|
+
Any,
|
|
12
|
+
Dict,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Literal,
|
|
16
|
+
Optional,
|
|
17
|
+
Sequence,
|
|
18
|
+
Tuple,
|
|
19
|
+
Union,
|
|
20
|
+
cast,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
import appdirs
|
|
24
|
+
import numpy as np
|
|
25
|
+
import requests
|
|
26
|
+
import tomli
|
|
27
|
+
import xarray as xr
|
|
28
|
+
|
|
29
|
+
from . import logger
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Config:
|
|
33
|
+
"""Client config class.
|
|
34
|
+
|
|
35
|
+
This class is used for basic configuration of the databrowser
|
|
36
|
+
client.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
host: Optional[str] = None,
|
|
42
|
+
uniq_key: Literal["file", "uri"] = "file",
|
|
43
|
+
flavour: str = "freva",
|
|
44
|
+
) -> None:
|
|
45
|
+
self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
|
|
46
|
+
self.auth_url = f"{self.get_api_url(host)}/auth/v2"
|
|
47
|
+
self.uniq_key = uniq_key
|
|
48
|
+
self._flavour = flavour
|
|
49
|
+
|
|
50
|
+
def _read_ini(self, path: Path) -> str:
|
|
51
|
+
"""Read an ini file."""
|
|
52
|
+
ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
|
|
53
|
+
ini_parser.read_string(path.read_text())
|
|
54
|
+
config = ini_parser["evaluation_system"]
|
|
55
|
+
scheme, host = self._split_url(
|
|
56
|
+
config.get("databrowser.host") or config.get("solr.host")
|
|
57
|
+
)
|
|
58
|
+
host, _, port = (host or "").partition(":")
|
|
59
|
+
port = port or config.get("databrowser.port", "")
|
|
60
|
+
if port:
|
|
61
|
+
host = f"{host}:{port}"
|
|
62
|
+
return f"{scheme}://{host}"
|
|
63
|
+
|
|
64
|
+
def _read_toml(self, path: Path) -> str:
|
|
65
|
+
"""Read a new style toml config file."""
|
|
66
|
+
try:
|
|
67
|
+
config = tomli.loads(path.read_text()).get("freva", {})
|
|
68
|
+
scheme, host = self._split_url(cast(str, config["host"]))
|
|
69
|
+
except (tomli.TOMLDecodeError, KeyError):
|
|
70
|
+
return ""
|
|
71
|
+
host, _, port = host.partition(":")
|
|
72
|
+
if port:
|
|
73
|
+
host = f"{host}:{port}"
|
|
74
|
+
return f"{scheme}://{host}"
|
|
75
|
+
|
|
76
|
+
def _read_config(self, path: Path, file_type: Literal["toml", "ini"]) -> str:
|
|
77
|
+
"""Read the configuration."""
|
|
78
|
+
data_types = {"toml": self._read_toml, "ini": self._read_ini}
|
|
79
|
+
try:
|
|
80
|
+
return data_types[file_type](path)
|
|
81
|
+
except KeyError:
|
|
82
|
+
pass
|
|
83
|
+
return ""
|
|
84
|
+
|
|
85
|
+
@cached_property
|
|
86
|
+
def overview(self) -> Dict[str, Any]:
|
|
87
|
+
"""Get an overview of the all databrowser flavours and search keys."""
|
|
88
|
+
try:
|
|
89
|
+
res = requests.get(f"{self.databrowser_url}/overview", timeout=15)
|
|
90
|
+
except requests.exceptions.ConnectionError:
|
|
91
|
+
raise ValueError(f"Could not connect to {self.databrowser_url}") from None
|
|
92
|
+
return cast(Dict[str, Any], res.json())
|
|
93
|
+
|
|
94
|
+
def _get_databrowser_host_from_config(self) -> str:
|
|
95
|
+
"""Get the config file order."""
|
|
96
|
+
|
|
97
|
+
eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
|
|
98
|
+
freva_config = Path(
|
|
99
|
+
os.environ.get("FREVA_CONFIG")
|
|
100
|
+
or Path(self.get_dirs(user=False)) / "freva.toml"
|
|
101
|
+
)
|
|
102
|
+
paths: Dict[Path, Literal["toml", "ini"]] = {
|
|
103
|
+
Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
|
|
104
|
+
Path(self.get_dirs(user=True)) / "freva.toml": "toml",
|
|
105
|
+
freva_config: "toml",
|
|
106
|
+
Path(os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf): "ini",
|
|
107
|
+
}
|
|
108
|
+
for config_path, config_type in paths.items():
|
|
109
|
+
if config_path.is_file():
|
|
110
|
+
host = self._read_config(config_path, config_type)
|
|
111
|
+
if host:
|
|
112
|
+
return host
|
|
113
|
+
raise ValueError(
|
|
114
|
+
"No databrowser host configured, please use a"
|
|
115
|
+
" configuration defining a databrowser host or"
|
|
116
|
+
" set a host name using the `host` key"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
@cached_property
|
|
120
|
+
def flavour(self) -> str:
|
|
121
|
+
"""Get the flavour."""
|
|
122
|
+
flavours = self.overview.get("flavours", [])
|
|
123
|
+
if self._flavour not in flavours:
|
|
124
|
+
raise ValueError(
|
|
125
|
+
f"Search {self._flavour} not available, select from"
|
|
126
|
+
f" {','.join(flavours)}"
|
|
127
|
+
)
|
|
128
|
+
return self._flavour
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def search_url(self) -> str:
|
|
132
|
+
"""Define the data search endpoint."""
|
|
133
|
+
return f"{self.databrowser_url}/data_search/{self.flavour}/{self.uniq_key}"
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def zarr_loader_url(self) -> str:
|
|
137
|
+
"""Define the url for getting zarr files."""
|
|
138
|
+
return f"{self.databrowser_url}/load/{self.flavour}/"
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def intake_url(self) -> str:
|
|
142
|
+
"""Define the url for creating intake catalogues."""
|
|
143
|
+
return f"{self.databrowser_url}/intake_catalogue/{self.flavour}/{self.uniq_key}"
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def metadata_url(self) -> str:
|
|
147
|
+
"""Define the endpoint for the metadata search."""
|
|
148
|
+
return (
|
|
149
|
+
f"{self.databrowser_url}/metadata_search/" f"{self.flavour}/{self.uniq_key}"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def _split_url(url: str) -> Tuple[str, str]:
|
|
154
|
+
scheme, _, hostname = url.partition("://")
|
|
155
|
+
if not hostname:
|
|
156
|
+
hostname = scheme
|
|
157
|
+
scheme = ""
|
|
158
|
+
scheme = scheme or "http"
|
|
159
|
+
return scheme, hostname
|
|
160
|
+
|
|
161
|
+
def get_api_url(self, url: Optional[str]) -> str:
|
|
162
|
+
"""Construct the databrowser url from a given hostname."""
|
|
163
|
+
url = url or self._get_databrowser_host_from_config()
|
|
164
|
+
scheme, hostname = self._split_url(url)
|
|
165
|
+
hostname, _, port = hostname.partition(":")
|
|
166
|
+
if port:
|
|
167
|
+
hostname = f"{hostname}:{port}"
|
|
168
|
+
hostname = hostname.partition("/")[0]
|
|
169
|
+
return f"{scheme}://{hostname}/api"
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def get_dirs(user: bool = True) -> Path:
|
|
173
|
+
"""Get the 'scripts' and 'purelib' directories we'll install into.
|
|
174
|
+
|
|
175
|
+
This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
|
|
176
|
+
because some tests mock it out to install to a different location.
|
|
177
|
+
"""
|
|
178
|
+
if user:
|
|
179
|
+
if (sys.platform == "darwin") and sysconfig.get_config_var(
|
|
180
|
+
"PYTHONFRAMEWORK"
|
|
181
|
+
):
|
|
182
|
+
scheme = "osx_framework_user"
|
|
183
|
+
else:
|
|
184
|
+
scheme = f"{os.name}_user"
|
|
185
|
+
return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
|
|
186
|
+
# The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
|
|
187
|
+
# installing into a virtualenv
|
|
188
|
+
return Path(sysconfig.get_path("data")) / "share" / "freva"
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def userdata_url(self) -> str:
|
|
192
|
+
"""Define the url for adding and deleting user-data."""
|
|
193
|
+
return f"{self.databrowser_url}/userdata"
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class UserDataHandler:
|
|
197
|
+
"""Class for processing user data.
|
|
198
|
+
|
|
199
|
+
This class is used for processing user data and extracting metadata
|
|
200
|
+
from the data files.
|
|
201
|
+
"""
|
|
202
|
+
def __init__(self, userdata_items: List[Union[str, xr.Dataset]]) -> None:
|
|
203
|
+
self._suffixes = [".nc", ".nc4", ".grb", ".grib", ".zarr", "zar"]
|
|
204
|
+
self.user_metadata: List[Dict[str, Union[str, List[str], Dict[str, str]]]] = []
|
|
205
|
+
self._metadata_collection: List[Dict[str, Union[str, List[str]]]] = []
|
|
206
|
+
try:
|
|
207
|
+
self._executor = concurrent.futures.ThreadPoolExecutor(
|
|
208
|
+
max_workers=min(int(os.cpu_count() or 4), 15)
|
|
209
|
+
)
|
|
210
|
+
self._process_user_data(userdata_items)
|
|
211
|
+
finally:
|
|
212
|
+
self._executor.shutdown(wait=True)
|
|
213
|
+
|
|
214
|
+
def _gather_files(self, path: Path, pattern: str = "*") -> Iterator[Path]:
|
|
215
|
+
"""Gather all valid files from directory and wildcard pattern."""
|
|
216
|
+
for item in path.rglob(pattern):
|
|
217
|
+
if item.is_file() and item.suffix in self._suffixes:
|
|
218
|
+
yield item
|
|
219
|
+
|
|
220
|
+
def _validate_user_data(
|
|
221
|
+
self,
|
|
222
|
+
user_data: Sequence[Union[str, xr.Dataset]],
|
|
223
|
+
) -> Dict[str, Union[List[Path], List[xr.Dataset]]]:
|
|
224
|
+
validated_paths: List[Path] = []
|
|
225
|
+
validated_xarray_datasets: List[xr.Dataset] = []
|
|
226
|
+
for data in user_data:
|
|
227
|
+
if isinstance(data, (str, Path)):
|
|
228
|
+
path = Path(data).expanduser().absolute()
|
|
229
|
+
if path.is_dir():
|
|
230
|
+
validated_paths.extend(self._gather_files(path))
|
|
231
|
+
elif path.is_file() and path.suffix in self._suffixes:
|
|
232
|
+
validated_paths.append(path)
|
|
233
|
+
else:
|
|
234
|
+
validated_paths.extend(
|
|
235
|
+
self._gather_files(path.parent, pattern=path.name)
|
|
236
|
+
)
|
|
237
|
+
elif isinstance(data, xr.Dataset):
|
|
238
|
+
validated_xarray_datasets.append(data)
|
|
239
|
+
|
|
240
|
+
if not validated_paths and not validated_xarray_datasets:
|
|
241
|
+
raise FileNotFoundError("No valid file paths or xarray datasets found.")
|
|
242
|
+
return {
|
|
243
|
+
"validated_user_paths": validated_paths,
|
|
244
|
+
"validated_user_xrdatasets": validated_xarray_datasets,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
def _process_user_data(self, userdata_items: List[Union[str, xr.Dataset]],
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Process xarray datasets and file paths using thread pool."""
|
|
250
|
+
futures = []
|
|
251
|
+
validated_userdata: Dict[str, Union[List[Path], List[xr.Dataset]]] = \
|
|
252
|
+
self._validate_user_data(userdata_items)
|
|
253
|
+
if validated_userdata["validated_user_xrdatasets"]:
|
|
254
|
+
futures.append(
|
|
255
|
+
self._executor.submit(self._process_userdata_in_executor,
|
|
256
|
+
validated_userdata["validated_user_xrdatasets"])
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if validated_userdata["validated_user_paths"]:
|
|
260
|
+
futures.append(
|
|
261
|
+
self._executor.submit(self._process_userdata_in_executor,
|
|
262
|
+
validated_userdata["validated_user_paths"])
|
|
263
|
+
)
|
|
264
|
+
for future in futures:
|
|
265
|
+
try:
|
|
266
|
+
future.result()
|
|
267
|
+
except Exception as e: # pragma: no cover
|
|
268
|
+
logger.error(f"Error processing batch: {e}")
|
|
269
|
+
|
|
270
|
+
def _process_userdata_in_executor(
|
|
271
|
+
self, validated_userdata: Union[List[Path], List[xr.Dataset]]
|
|
272
|
+
) -> None:
|
|
273
|
+
for data in validated_userdata:
|
|
274
|
+
metadata = self._get_metadata(data)
|
|
275
|
+
if isinstance(metadata, Exception) or metadata == {}:
|
|
276
|
+
logger.warning("Error getting metadata: %s", metadata)
|
|
277
|
+
else:
|
|
278
|
+
self.user_metadata.append(metadata)
|
|
279
|
+
|
|
280
|
+
def _timedelta_to_cmor_frequency(self, dt: float) -> str:
|
|
281
|
+
for total_seconds, frequency in self._time_table.items():
|
|
282
|
+
if dt >= total_seconds:
|
|
283
|
+
return frequency
|
|
284
|
+
return "fx" # pragma: no cover
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def _time_table(self) -> dict[int, str]:
|
|
288
|
+
return {
|
|
289
|
+
315360000: "dec", # Decade
|
|
290
|
+
31104000: "yr", # Year
|
|
291
|
+
2538000: "mon", # Month
|
|
292
|
+
1296000: "sem", # Seasonal (half-year)
|
|
293
|
+
84600: "day", # Day
|
|
294
|
+
21600: "6h", # Six-hourly
|
|
295
|
+
10800: "3h", # Three-hourly
|
|
296
|
+
3600: "hr", # Hourly
|
|
297
|
+
1: "subhr", # Sub-hourly
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
def _get_time_frequency(self, time_delta: int, freq_attr: str = "") -> str:
|
|
301
|
+
if freq_attr in self._time_table.values():
|
|
302
|
+
return freq_attr
|
|
303
|
+
return self._timedelta_to_cmor_frequency(time_delta)
|
|
304
|
+
|
|
305
|
+
def _get_metadata(
|
|
306
|
+
self, path: Union[os.PathLike[str], xr.Dataset]
|
|
307
|
+
) -> Dict[str, Union[str, List[str], Dict[str, str]]]:
|
|
308
|
+
"""Get metadata from a path or xarray dataset."""
|
|
309
|
+
|
|
310
|
+
try:
|
|
311
|
+
dset = (
|
|
312
|
+
path if isinstance(path, xr.Dataset)
|
|
313
|
+
else xr.open_mfdataset(str(path),
|
|
314
|
+
parallel=False,
|
|
315
|
+
use_cftime=True,
|
|
316
|
+
lock=False)
|
|
317
|
+
)
|
|
318
|
+
time_freq = dset.attrs.get("frequency", "")
|
|
319
|
+
data_vars = list(map(str, dset.data_vars))
|
|
320
|
+
coords = list(map(str, dset.coords))
|
|
321
|
+
try:
|
|
322
|
+
times = dset["time"].values[:]
|
|
323
|
+
except (KeyError, IndexError, TypeError):
|
|
324
|
+
times = np.array([])
|
|
325
|
+
|
|
326
|
+
except Exception as error:
|
|
327
|
+
logger.error("Failed to open data file %s: %s", str(path), error)
|
|
328
|
+
return {}
|
|
329
|
+
if len(times) > 0:
|
|
330
|
+
try:
|
|
331
|
+
time_str = f"[{times[0].isoformat()}Z TO {times[-1].isoformat()}Z]"
|
|
332
|
+
dt = abs((times[1] - times[0]).total_seconds()) if len(times) > 1 else 0
|
|
333
|
+
except Exception as non_cftime:
|
|
334
|
+
logger.info("The time var is not based on the cftime: %s", non_cftime)
|
|
335
|
+
time_str = (
|
|
336
|
+
f"[{np.datetime_as_string(times[0], unit='s')}Z TO "
|
|
337
|
+
f"{np.datetime_as_string(times[-1], unit='s')}Z]"
|
|
338
|
+
)
|
|
339
|
+
dt = (
|
|
340
|
+
abs((times[1] - times[0]).astype("timedelta64[s]").astype(int))
|
|
341
|
+
if len(times) > 1
|
|
342
|
+
else 0
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
time_str = "fx"
|
|
346
|
+
dt = 0
|
|
347
|
+
|
|
348
|
+
variables = [
|
|
349
|
+
var
|
|
350
|
+
for var in data_vars
|
|
351
|
+
if var not in coords
|
|
352
|
+
and not any(
|
|
353
|
+
term in var.lower() for term in ["lon", "lat", "bnds", "x", "y"]
|
|
354
|
+
)
|
|
355
|
+
and var.lower() not in ["rotated_pole", "rot_pole"]
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
_data: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
|
|
359
|
+
_data.setdefault("variable", variables[0])
|
|
360
|
+
_data.setdefault("time_frequency", self._get_time_frequency(dt, time_freq))
|
|
361
|
+
_data["time"] = time_str
|
|
362
|
+
_data.setdefault("cmor_table", _data["time_frequency"])
|
|
363
|
+
_data.setdefault("version", "")
|
|
364
|
+
if isinstance(path, Path):
|
|
365
|
+
_data["file"] = str(path)
|
|
366
|
+
if isinstance(path, xr.Dataset):
|
|
367
|
+
_data["file"] = str(dset.encoding["source"])
|
|
368
|
+
return _data
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
"""Various utilities for getting the databrowser working."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
import sysconfig
|
|
6
|
-
from configparser import ConfigParser, ExtendedInterpolation
|
|
7
|
-
from functools import cached_property
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any, Dict, Literal, Optional, Tuple, cast
|
|
10
|
-
|
|
11
|
-
import appdirs
|
|
12
|
-
import requests
|
|
13
|
-
import tomli
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class Config:
|
|
17
|
-
"""Client config class.
|
|
18
|
-
|
|
19
|
-
This class is used for basic configuration of the databrowser
|
|
20
|
-
client.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
host: Optional[str] = None,
|
|
26
|
-
uniq_key: Literal["file", "uri"] = "file",
|
|
27
|
-
flavour: str = "freva",
|
|
28
|
-
) -> None:
|
|
29
|
-
self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
|
|
30
|
-
self.auth_url = f"{self.get_api_url(host)}/auth/v2"
|
|
31
|
-
self.uniq_key = uniq_key
|
|
32
|
-
self._flavour = flavour
|
|
33
|
-
|
|
34
|
-
def _read_ini(self, path: Path) -> str:
|
|
35
|
-
"""Read an ini file."""
|
|
36
|
-
ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
|
|
37
|
-
ini_parser.read_string(path.read_text())
|
|
38
|
-
config = ini_parser["evaluation_system"]
|
|
39
|
-
scheme, host = self._split_url(
|
|
40
|
-
config.get("databrowser.host") or config.get("solr.host")
|
|
41
|
-
)
|
|
42
|
-
host, _, port = (host or "").partition(":")
|
|
43
|
-
port = port or config.get("databrowser.port", "")
|
|
44
|
-
if port:
|
|
45
|
-
host = f"{host}:{port}"
|
|
46
|
-
return f"{scheme}://{host}"
|
|
47
|
-
|
|
48
|
-
def _read_toml(self, path: Path) -> str:
|
|
49
|
-
"""Read a new style toml config file."""
|
|
50
|
-
try:
|
|
51
|
-
config = tomli.loads(path.read_text()).get("freva", {})
|
|
52
|
-
scheme, host = self._split_url(cast(str, config["host"]))
|
|
53
|
-
except (tomli.TOMLDecodeError, KeyError):
|
|
54
|
-
return ""
|
|
55
|
-
host, _, port = host.partition(":")
|
|
56
|
-
if port:
|
|
57
|
-
host = f"{host}:{port}"
|
|
58
|
-
return f"{scheme}://{host}"
|
|
59
|
-
|
|
60
|
-
def _read_config(
|
|
61
|
-
self, path: Path, file_type: Literal["toml", "ini"]
|
|
62
|
-
) -> str:
|
|
63
|
-
"""Read the configuration."""
|
|
64
|
-
data_types = {"toml": self._read_toml, "ini": self._read_ini}
|
|
65
|
-
try:
|
|
66
|
-
return data_types[file_type](path)
|
|
67
|
-
except KeyError:
|
|
68
|
-
pass
|
|
69
|
-
return ""
|
|
70
|
-
|
|
71
|
-
@cached_property
|
|
72
|
-
def overview(self) -> Dict[str, Any]:
|
|
73
|
-
"""Get an overview of the all databrowser flavours and search keys."""
|
|
74
|
-
try:
|
|
75
|
-
res = requests.get(f"{self.databrowser_url}/overview", timeout=3)
|
|
76
|
-
except requests.exceptions.ConnectionError:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
f"Could not connect to {self.databrowser_url}"
|
|
79
|
-
) from None
|
|
80
|
-
return cast(Dict[str, Any], res.json())
|
|
81
|
-
|
|
82
|
-
def _get_databrowser_host_from_config(self) -> str:
|
|
83
|
-
"""Get the config file order."""
|
|
84
|
-
|
|
85
|
-
eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
|
|
86
|
-
freva_config = Path(
|
|
87
|
-
os.environ.get("FREVA_CONFIG")
|
|
88
|
-
or Path(self.get_dirs(user=False)) / "freva.toml"
|
|
89
|
-
)
|
|
90
|
-
paths: Dict[Path, Literal["toml", "ini"]] = {
|
|
91
|
-
Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
|
|
92
|
-
Path(self.get_dirs(user=True)) / "freva.toml": "toml",
|
|
93
|
-
freva_config: "toml",
|
|
94
|
-
Path(
|
|
95
|
-
os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf
|
|
96
|
-
): "ini",
|
|
97
|
-
}
|
|
98
|
-
for config_path, config_type in paths.items():
|
|
99
|
-
if config_path.is_file():
|
|
100
|
-
host = self._read_config(config_path, config_type)
|
|
101
|
-
if host:
|
|
102
|
-
return host
|
|
103
|
-
raise ValueError(
|
|
104
|
-
"No databrowser host configured, please use a"
|
|
105
|
-
" configuration defining a databrowser host or"
|
|
106
|
-
" set a host name using the `host` key"
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
@cached_property
|
|
110
|
-
def flavour(self) -> str:
|
|
111
|
-
"""Get the flavour."""
|
|
112
|
-
flavours = self.overview.get("flavours", [])
|
|
113
|
-
if self._flavour not in flavours:
|
|
114
|
-
raise ValueError(
|
|
115
|
-
f"Search {self._flavour} not available, select from"
|
|
116
|
-
f" {','.join(flavours)}"
|
|
117
|
-
)
|
|
118
|
-
return self._flavour
|
|
119
|
-
|
|
120
|
-
@property
|
|
121
|
-
def search_url(self) -> str:
|
|
122
|
-
"""Define the data search endpoint."""
|
|
123
|
-
return f"{self.databrowser_url}/data_search/{self.flavour}/{self.uniq_key}"
|
|
124
|
-
|
|
125
|
-
@property
|
|
126
|
-
def zarr_loader_url(self) -> str:
|
|
127
|
-
"""Define the url for getting zarr files."""
|
|
128
|
-
return f"{self.databrowser_url}/load/{self.flavour}/"
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def intake_url(self) -> str:
|
|
132
|
-
"""Define the url for creating intake catalogues."""
|
|
133
|
-
return f"{self.databrowser_url}/intake_catalogue/{self.flavour}/{self.uniq_key}"
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def metadata_url(self) -> str:
|
|
137
|
-
"""Define the endpoint for the metadata search."""
|
|
138
|
-
return (
|
|
139
|
-
f"{self.databrowser_url}/metadata_search/"
|
|
140
|
-
f"{self.flavour}/{self.uniq_key}"
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
@staticmethod
|
|
144
|
-
def _split_url(url: str) -> Tuple[str, str]:
|
|
145
|
-
scheme, _, hostname = url.partition("://")
|
|
146
|
-
if not hostname:
|
|
147
|
-
hostname = scheme
|
|
148
|
-
scheme = ""
|
|
149
|
-
scheme = scheme or "http"
|
|
150
|
-
return scheme, hostname
|
|
151
|
-
|
|
152
|
-
def get_api_url(self, url: Optional[str]) -> str:
|
|
153
|
-
"""Construct the databrowser url from a given hostname."""
|
|
154
|
-
url = url or self._get_databrowser_host_from_config()
|
|
155
|
-
scheme, hostname = self._split_url(url)
|
|
156
|
-
hostname, _, port = hostname.partition(":")
|
|
157
|
-
if port:
|
|
158
|
-
hostname = f"{hostname}:{port}"
|
|
159
|
-
hostname = hostname.partition("/")[0]
|
|
160
|
-
return f"{scheme}://{hostname}/api"
|
|
161
|
-
|
|
162
|
-
@staticmethod
|
|
163
|
-
def get_dirs(user: bool = True) -> Path:
|
|
164
|
-
"""Get the 'scripts' and 'purelib' directories we'll install into.
|
|
165
|
-
|
|
166
|
-
This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
|
|
167
|
-
because some tests mock it out to install to a different location.
|
|
168
|
-
"""
|
|
169
|
-
if user:
|
|
170
|
-
if (sys.platform == "darwin") and sysconfig.get_config_var(
|
|
171
|
-
"PYTHONFRAMEWORK"
|
|
172
|
-
):
|
|
173
|
-
scheme = "osx_framework_user"
|
|
174
|
-
else:
|
|
175
|
-
scheme = f"{os.name}_user"
|
|
176
|
-
return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
|
|
177
|
-
# The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
|
|
178
|
-
# installing into a virtualenv
|
|
179
|
-
return Path(sysconfig.get_path("data")) / "share" / "freva"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|