freva-client 2410.0.1__tar.gz → 2502.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of freva-client might be problematic. Click here for more details.
- {freva_client-2410.0.1 → freva_client-2502.0.0}/PKG-INFO +4 -3
- {freva_client-2410.0.1 → freva_client-2502.0.0}/pyproject.toml +3 -2
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/__init__.py +1 -1
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/auth_cli.py +2 -3
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/cli_app.py +1 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/cli_utils.py +2 -1
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/databrowser_cli.py +133 -33
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/query.py +183 -29
- freva_client-2502.0.0/src/freva_client/utils/databrowser_utils.py +399 -0
- freva_client-2410.0.1/src/freva_client/utils/databrowser_utils.py +0 -179
- {freva_client-2410.0.1 → freva_client-2502.0.0}/MANIFEST.in +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/README.md +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/assets/share/freva/freva.toml +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/__main__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/auth.py +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/__init__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/cli/cli_parser.py +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/py.typed +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/utils/__init__.py +0 -0
- {freva_client-2410.0.1 → freva_client-2502.0.0}/src/freva_client/utils/logger.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: freva-client
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2502.0.0
|
|
4
4
|
Summary: Search for climate data based on key-value pairs
|
|
5
5
|
Author-email: "DKRZ, Clint" <freva@dkrz.de>
|
|
6
6
|
Requires-Python: >=3.8
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
|
-
Classifier: Development Status ::
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
9
|
Classifier: Environment :: Console
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
20
|
Requires-Dist: appdirs
|
|
20
21
|
Requires-Dist: pyyaml
|
|
21
22
|
Requires-Dist: authlib
|
|
@@ -8,7 +8,7 @@ description = "Search for climate data based on key-value pairs"
|
|
|
8
8
|
authors = [{name = "DKRZ, Clint", email = "freva@dkrz.de"}]
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
classifiers = [
|
|
11
|
-
"Development Status ::
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
12
|
"Environment :: Console",
|
|
13
13
|
"Intended Audience :: Developers",
|
|
14
14
|
"Intended Audience :: Science/Research",
|
|
@@ -19,6 +19,7 @@ classifiers = [
|
|
|
19
19
|
"Programming Language :: Python :: 3.10",
|
|
20
20
|
"Programming Language :: Python :: 3.11",
|
|
21
21
|
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
22
23
|
]
|
|
23
24
|
requires-python = ">=3.8"
|
|
24
25
|
dependencies = [
|
|
@@ -48,4 +49,4 @@ include = ["assets/*"]
|
|
|
48
49
|
[tool.flit.external-data]
|
|
49
50
|
directory = "assets"
|
|
50
51
|
[package-data]
|
|
51
|
-
|
|
52
|
+
freva_client = ["py.typed"]
|
|
@@ -5,6 +5,7 @@ from getpass import getuser
|
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
import typer
|
|
8
|
+
|
|
8
9
|
from freva_client import authenticate
|
|
9
10
|
from freva_client.utils import exception_handler, logger
|
|
10
11
|
|
|
@@ -49,9 +50,7 @@ def authenticate_cli(
|
|
|
49
50
|
"-f",
|
|
50
51
|
help="Force token recreation, even if current token is still valid.",
|
|
51
52
|
),
|
|
52
|
-
verbose: int = typer.Option(
|
|
53
|
-
0, "-v", help="Increase verbosity", count=True
|
|
54
|
-
),
|
|
53
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
55
54
|
version: Optional[bool] = typer.Option(
|
|
56
55
|
False,
|
|
57
56
|
"-V",
|
|
@@ -10,6 +10,8 @@ from tempfile import NamedTemporaryFile
|
|
|
10
10
|
from typing import Dict, List, Literal, Optional, Union, cast
|
|
11
11
|
|
|
12
12
|
import typer
|
|
13
|
+
import xarray as xr
|
|
14
|
+
|
|
13
15
|
from freva_client import databrowser
|
|
14
16
|
from freva_client.auth import Auth
|
|
15
17
|
from freva_client.utils import exception_handler, logger
|
|
@@ -30,26 +32,27 @@ def _auth(url: str, token: Optional[str]) -> None:
|
|
|
30
32
|
class UniqKeys(str, Enum):
|
|
31
33
|
"""Literal implementation for the cli."""
|
|
32
34
|
|
|
33
|
-
file
|
|
34
|
-
uri
|
|
35
|
+
file = "file"
|
|
36
|
+
uri = "uri"
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class Flavours(str, Enum):
|
|
38
40
|
"""Literal implementation for the cli."""
|
|
39
41
|
|
|
40
|
-
freva
|
|
41
|
-
cmip6
|
|
42
|
-
cmip5
|
|
43
|
-
cordex
|
|
44
|
-
nextgems
|
|
42
|
+
freva = "freva"
|
|
43
|
+
cmip6 = "cmip6"
|
|
44
|
+
cmip5 = "cmip5"
|
|
45
|
+
cordex = "cordex"
|
|
46
|
+
nextgems = "nextgems"
|
|
47
|
+
user = "user"
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
class TimeSelect(str, Enum):
|
|
48
51
|
"""Literal implementation for the cli."""
|
|
49
52
|
|
|
50
|
-
strict
|
|
51
|
-
flexible
|
|
52
|
-
file
|
|
53
|
+
strict = "strict"
|
|
54
|
+
flexible = "flexible"
|
|
55
|
+
file = "file"
|
|
53
56
|
|
|
54
57
|
@staticmethod
|
|
55
58
|
def get_help() -> str:
|
|
@@ -163,9 +166,7 @@ def metadata_search(
|
|
|
163
166
|
parse_json: bool = typer.Option(
|
|
164
167
|
False, "-j", "--json", help="Parse output in json format."
|
|
165
168
|
),
|
|
166
|
-
verbose: int = typer.Option(
|
|
167
|
-
0, "-v", help="Increase verbosity", count=True
|
|
168
|
-
),
|
|
169
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
169
170
|
version: Optional[bool] = typer.Option(
|
|
170
171
|
False,
|
|
171
172
|
"-V",
|
|
@@ -191,7 +192,7 @@ def metadata_search(
|
|
|
191
192
|
Literal["file", "flexible", "strict"], time_select.value
|
|
192
193
|
),
|
|
193
194
|
flavour=cast(
|
|
194
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
195
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
195
196
|
flavour.value,
|
|
196
197
|
),
|
|
197
198
|
host=host,
|
|
@@ -253,9 +254,7 @@ def data_search(
|
|
|
253
254
|
"--time-select",
|
|
254
255
|
help=TimeSelect.get_help(),
|
|
255
256
|
),
|
|
256
|
-
zarr: bool = typer.Option(
|
|
257
|
-
False, "--zarr", help="Create zarr stream files."
|
|
258
|
-
),
|
|
257
|
+
zarr: bool = typer.Option(False, "--zarr", help="Create zarr stream files."),
|
|
259
258
|
access_token: Optional[str] = typer.Option(
|
|
260
259
|
None,
|
|
261
260
|
"--access-token",
|
|
@@ -289,9 +288,7 @@ def data_search(
|
|
|
289
288
|
"the hostname is read from a config file"
|
|
290
289
|
),
|
|
291
290
|
),
|
|
292
|
-
verbose: int = typer.Option(
|
|
293
|
-
0, "-v", help="Increase verbosity", count=True
|
|
294
|
-
),
|
|
291
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
295
292
|
multiversion: bool = typer.Option(
|
|
296
293
|
False,
|
|
297
294
|
"--multi-version",
|
|
@@ -318,7 +315,7 @@ def data_search(
|
|
|
318
315
|
time=time or "",
|
|
319
316
|
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
320
317
|
flavour=cast(
|
|
321
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
318
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
322
319
|
flavour.value,
|
|
323
320
|
),
|
|
324
321
|
uniq_key=cast(Literal["uri", "file"], uniq_key.value),
|
|
@@ -425,9 +422,7 @@ def intake_catalogue(
|
|
|
425
422
|
"the hostname is read from a config file"
|
|
426
423
|
),
|
|
427
424
|
),
|
|
428
|
-
verbose: int = typer.Option(
|
|
429
|
-
0, "-v", help="Increase verbosity", count=True
|
|
430
|
-
),
|
|
425
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
431
426
|
multiversion: bool = typer.Option(
|
|
432
427
|
False,
|
|
433
428
|
"--multi-version",
|
|
@@ -451,7 +446,7 @@ def intake_catalogue(
|
|
|
451
446
|
time=time or "",
|
|
452
447
|
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
453
448
|
flavour=cast(
|
|
454
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
449
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
455
450
|
flavour.value,
|
|
456
451
|
),
|
|
457
452
|
uniq_key=cast(Literal["uri", "file"], uniq_key.value),
|
|
@@ -547,9 +542,7 @@ def count_values(
|
|
|
547
542
|
parse_json: bool = typer.Option(
|
|
548
543
|
False, "-j", "--json", help="Parse output in json format."
|
|
549
544
|
),
|
|
550
|
-
verbose: int = typer.Option(
|
|
551
|
-
0, "-v", help="Increase verbosity", count=True
|
|
552
|
-
),
|
|
545
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
553
546
|
version: Optional[bool] = typer.Option(
|
|
554
547
|
False,
|
|
555
548
|
"-V",
|
|
@@ -576,11 +569,9 @@ def count_values(
|
|
|
576
569
|
result = databrowser.count_values(
|
|
577
570
|
*facets,
|
|
578
571
|
time=time or "",
|
|
579
|
-
time_select=cast(
|
|
580
|
-
Literal["file", "flexible", "strict"], time_select
|
|
581
|
-
),
|
|
572
|
+
time_select=cast(Literal["file", "flexible", "strict"], time_select),
|
|
582
573
|
flavour=cast(
|
|
583
|
-
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
|
|
574
|
+
Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
|
|
584
575
|
flavour.value,
|
|
585
576
|
),
|
|
586
577
|
host=host,
|
|
@@ -598,7 +589,9 @@ def count_values(
|
|
|
598
589
|
Literal["file", "flexible", "strict"], time_select
|
|
599
590
|
),
|
|
600
591
|
flavour=cast(
|
|
601
|
-
Literal[
|
|
592
|
+
Literal[
|
|
593
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
594
|
+
],
|
|
602
595
|
flavour.value,
|
|
603
596
|
),
|
|
604
597
|
host=host,
|
|
@@ -620,3 +613,110 @@ def count_values(
|
|
|
620
613
|
print(f"{key}: {', '.join(counts)}")
|
|
621
614
|
else:
|
|
622
615
|
print(result)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
user_data_app = typer.Typer(help="Add or delete user data.")
|
|
619
|
+
databrowser_app.add_typer(user_data_app, name="user-data")
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
@user_data_app.command(name="add", help="Add user data into the databrowser.")
|
|
623
|
+
@exception_handler
|
|
624
|
+
def user_data_add(
|
|
625
|
+
paths: List[str] = typer.Option(
|
|
626
|
+
...,
|
|
627
|
+
"--path",
|
|
628
|
+
"-p",
|
|
629
|
+
help="Paths to the user's data to be added.",
|
|
630
|
+
),
|
|
631
|
+
facets: Optional[List[str]] = typer.Option(
|
|
632
|
+
None,
|
|
633
|
+
"--facet",
|
|
634
|
+
help="Key-value metadata pairs to categorize the user"
|
|
635
|
+
"input data in the format key=value.",
|
|
636
|
+
),
|
|
637
|
+
host: Optional[str] = typer.Option(
|
|
638
|
+
None,
|
|
639
|
+
"--host",
|
|
640
|
+
help=(
|
|
641
|
+
"Set the hostname of the databrowser. If not set (default), "
|
|
642
|
+
"the hostname is read from a config file."
|
|
643
|
+
),
|
|
644
|
+
),
|
|
645
|
+
access_token: Optional[str] = typer.Option(
|
|
646
|
+
None,
|
|
647
|
+
"--access-token",
|
|
648
|
+
help="Access token for authentication when adding user data.",
|
|
649
|
+
),
|
|
650
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
651
|
+
) -> None:
|
|
652
|
+
"""Add user data into the databrowser."""
|
|
653
|
+
logger.set_verbosity(verbose)
|
|
654
|
+
logger.debug("Checking if the user has the right to add data")
|
|
655
|
+
result = databrowser(host=host)
|
|
656
|
+
_auth(result._cfg.auth_url, access_token)
|
|
657
|
+
|
|
658
|
+
facet_dict = {}
|
|
659
|
+
if facets:
|
|
660
|
+
for facet in facets:
|
|
661
|
+
if "=" not in facet:
|
|
662
|
+
logger.error(
|
|
663
|
+
f"Invalid facet format: {facet}. Expected format: key=value."
|
|
664
|
+
)
|
|
665
|
+
raise typer.Exit(code=1)
|
|
666
|
+
key, value = facet.split("=", 1)
|
|
667
|
+
facet_dict[key] = value
|
|
668
|
+
|
|
669
|
+
logger.debug(f"Adding user data with paths {paths} and facets {facet_dict}")
|
|
670
|
+
databrowser.userdata(
|
|
671
|
+
action="add",
|
|
672
|
+
userdata_items=cast(List[Union[str, xr.Dataset]], paths),
|
|
673
|
+
metadata=facet_dict,
|
|
674
|
+
host=host,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
@user_data_app.command(
|
|
679
|
+
name="delete", help="Delete user data from the databrowser."
|
|
680
|
+
)
|
|
681
|
+
@exception_handler
|
|
682
|
+
def user_data_delete(
|
|
683
|
+
search_keys: List[str] = typer.Option(
|
|
684
|
+
None,
|
|
685
|
+
"--search-key",
|
|
686
|
+
"-s",
|
|
687
|
+
help="Key-value metadata pairs to search and identify user data "
|
|
688
|
+
"for deletion in the format key=value.",
|
|
689
|
+
),
|
|
690
|
+
host: Optional[str] = typer.Option(
|
|
691
|
+
None,
|
|
692
|
+
"--host",
|
|
693
|
+
help=(
|
|
694
|
+
"Set the hostname of the databrowser. If not set (default), "
|
|
695
|
+
"the hostname is read from a config file."
|
|
696
|
+
),
|
|
697
|
+
),
|
|
698
|
+
access_token: Optional[str] = typer.Option(
|
|
699
|
+
None,
|
|
700
|
+
"--access-token",
|
|
701
|
+
help="Access token for authentication when deleting user data.",
|
|
702
|
+
),
|
|
703
|
+
verbose: int = typer.Option(0, "-v", help="Increase verbosity", count=True),
|
|
704
|
+
) -> None:
|
|
705
|
+
"""Delete user data from the databrowser."""
|
|
706
|
+
logger.set_verbosity(verbose)
|
|
707
|
+
logger.debug("Checking if the user has the right to delete data")
|
|
708
|
+
result = databrowser(host=host)
|
|
709
|
+
_auth(result._cfg.auth_url, access_token)
|
|
710
|
+
|
|
711
|
+
search_key_dict = {}
|
|
712
|
+
if search_keys:
|
|
713
|
+
for search_key in search_keys:
|
|
714
|
+
if "=" not in search_key:
|
|
715
|
+
logger.error(
|
|
716
|
+
f"Invalid search key format: {search_key}. "
|
|
717
|
+
"Expected format: key=value."
|
|
718
|
+
)
|
|
719
|
+
raise typer.Exit(code=1)
|
|
720
|
+
key, value = search_key.split("=", 1)
|
|
721
|
+
search_key_dict[key] = value
|
|
722
|
+
databrowser.userdata(action="delete", metadata=search_key_dict, host=host)
|
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from tempfile import NamedTemporaryFile
|
|
9
9
|
from typing import (
|
|
10
10
|
Any,
|
|
11
|
+
Collection,
|
|
11
12
|
Dict,
|
|
12
13
|
Iterator,
|
|
13
14
|
List,
|
|
@@ -21,12 +22,13 @@ from typing import (
|
|
|
21
22
|
import intake
|
|
22
23
|
import intake_esm
|
|
23
24
|
import requests
|
|
25
|
+
import xarray as xr
|
|
24
26
|
import yaml
|
|
25
27
|
from rich import print as pprint
|
|
26
28
|
|
|
27
29
|
from .auth import Auth
|
|
28
30
|
from .utils import logger
|
|
29
|
-
from .utils.databrowser_utils import Config
|
|
31
|
+
from .utils.databrowser_utils import Config, UserDataHandler
|
|
30
32
|
|
|
31
33
|
__all__ = ["databrowser"]
|
|
32
34
|
|
|
@@ -207,7 +209,7 @@ class databrowser:
|
|
|
207
209
|
*facets: str,
|
|
208
210
|
uniq_key: Literal["file", "uri"] = "file",
|
|
209
211
|
flavour: Literal[
|
|
210
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
212
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
211
213
|
] = "freva",
|
|
212
214
|
time: Optional[str] = None,
|
|
213
215
|
host: Optional[str] = None,
|
|
@@ -267,19 +269,15 @@ class databrowser:
|
|
|
267
269
|
headers = {}
|
|
268
270
|
if self._stream_zarr:
|
|
269
271
|
query_url = self._cfg.zarr_loader_url
|
|
270
|
-
token = self._auth.check_authentication(
|
|
271
|
-
auth_url=self._cfg.auth_url
|
|
272
|
-
)
|
|
272
|
+
token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
|
|
273
273
|
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
|
274
|
-
result = self.
|
|
274
|
+
result = self._request("GET", query_url, headers=headers, stream=True)
|
|
275
275
|
if result is not None:
|
|
276
276
|
try:
|
|
277
277
|
for res in result.iter_lines():
|
|
278
278
|
yield res.decode("utf-8")
|
|
279
279
|
except KeyboardInterrupt:
|
|
280
|
-
pprint(
|
|
281
|
-
"[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr
|
|
282
|
-
)
|
|
280
|
+
pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
|
|
283
281
|
|
|
284
282
|
def __repr__(self) -> str:
|
|
285
283
|
params = ", ".join(
|
|
@@ -337,7 +335,7 @@ class databrowser:
|
|
|
337
335
|
|
|
338
336
|
|
|
339
337
|
"""
|
|
340
|
-
result = self.
|
|
338
|
+
result = self._request("GET", self._cfg.metadata_url)
|
|
341
339
|
if result:
|
|
342
340
|
return cast(int, result.json().get("total_count", 0))
|
|
343
341
|
return 0
|
|
@@ -347,15 +345,13 @@ class databrowser:
|
|
|
347
345
|
kwargs: Dict[str, Any] = {"stream": True}
|
|
348
346
|
url = self._cfg.intake_url
|
|
349
347
|
if self._stream_zarr:
|
|
350
|
-
token = self._auth.check_authentication(
|
|
351
|
-
auth_url=self._cfg.auth_url
|
|
352
|
-
)
|
|
348
|
+
token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
|
|
353
349
|
url = self._cfg.zarr_loader_url
|
|
354
350
|
kwargs["headers"] = {
|
|
355
351
|
"Authorization": f"Bearer {token['access_token']}"
|
|
356
352
|
}
|
|
357
353
|
kwargs["params"] = {"catalogue-type": "intake"}
|
|
358
|
-
result = self.
|
|
354
|
+
result = self._request("GET", url, **kwargs)
|
|
359
355
|
if result is None:
|
|
360
356
|
raise ValueError("No results found")
|
|
361
357
|
|
|
@@ -392,20 +388,23 @@ class databrowser:
|
|
|
392
388
|
.. execute_code::
|
|
393
389
|
|
|
394
390
|
from freva_client import databrowser
|
|
395
|
-
db = databrowser(dataset="cmip6-
|
|
391
|
+
db = databrowser(dataset="cmip6-hsm", stream_zarr=True)
|
|
396
392
|
cat = db.intake_catalogue()
|
|
397
393
|
print(cat.df)
|
|
398
394
|
"""
|
|
399
395
|
with NamedTemporaryFile(suffix=".json") as temp_f:
|
|
400
396
|
self._create_intake_catalogue_file(temp_f.name)
|
|
401
|
-
return
|
|
397
|
+
return cast(
|
|
398
|
+
intake_esm.core.esm_datastore,
|
|
399
|
+
intake.open_esm_datastore(temp_f.name),
|
|
400
|
+
)
|
|
402
401
|
|
|
403
402
|
@classmethod
|
|
404
403
|
def count_values(
|
|
405
404
|
cls,
|
|
406
405
|
*facets: str,
|
|
407
406
|
flavour: Literal[
|
|
408
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
407
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
409
408
|
] = "freva",
|
|
410
409
|
time: Optional[str] = None,
|
|
411
410
|
host: Optional[str] = None,
|
|
@@ -455,6 +454,7 @@ class databrowser:
|
|
|
455
454
|
Select all versions and not just the latest version (default).
|
|
456
455
|
fail_on_error: bool, default: False
|
|
457
456
|
Make the call fail if the connection to the databrowser could not
|
|
457
|
+
be established.
|
|
458
458
|
**search_keys: str
|
|
459
459
|
The search constraints to be applied in the data search. If not given
|
|
460
460
|
the whole dataset will be queried.
|
|
@@ -540,7 +540,7 @@ class databrowser:
|
|
|
540
540
|
cls,
|
|
541
541
|
*facets: str,
|
|
542
542
|
flavour: Literal[
|
|
543
|
-
"freva", "cmip6", "cmip5", "cordex", "nextgems"
|
|
543
|
+
"freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
|
|
544
544
|
] = "freva",
|
|
545
545
|
time: Optional[str] = None,
|
|
546
546
|
host: Optional[str] = None,
|
|
@@ -593,6 +593,7 @@ class databrowser:
|
|
|
593
593
|
name will be taken from the freva config file.
|
|
594
594
|
fail_on_error: bool, default: False
|
|
595
595
|
Make the call fail if the connection to the databrowser could not
|
|
596
|
+
be established.
|
|
596
597
|
**search_keys: str, list[str]
|
|
597
598
|
The facets to be applied in the data search. If not given
|
|
598
599
|
the whole dataset will be queried.
|
|
@@ -723,7 +724,7 @@ class databrowser:
|
|
|
723
724
|
self,
|
|
724
725
|
extended_search: bool = False,
|
|
725
726
|
) -> Dict[str, List[str]]:
|
|
726
|
-
result = self.
|
|
727
|
+
result = self._request("GET", self._cfg.metadata_url)
|
|
727
728
|
if result is None:
|
|
728
729
|
return {}
|
|
729
730
|
data = result.json()
|
|
@@ -733,26 +734,179 @@ class databrowser:
|
|
|
733
734
|
constraints = data["primary_facets"]
|
|
734
735
|
return {f: v for f, v in data["facets"].items() if f in constraints}
|
|
735
736
|
|
|
736
|
-
|
|
737
|
-
|
|
737
|
+
@classmethod
|
|
738
|
+
def userdata(
|
|
739
|
+
cls,
|
|
740
|
+
action: Literal["add", "delete"],
|
|
741
|
+
userdata_items: Optional[List[Union[str, xr.Dataset]]] = None,
|
|
742
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
743
|
+
host: Optional[str] = None,
|
|
744
|
+
fail_on_error: bool = False,
|
|
745
|
+
) -> None:
|
|
746
|
+
"""Add or delete user data in the databrowser system.
|
|
747
|
+
|
|
748
|
+
Manage user data in the databrowser system by adding new data or
|
|
749
|
+
deleting existing data.
|
|
750
|
+
|
|
751
|
+
For the "``add``" action, the user can provide data items (file paths
|
|
752
|
+
or xarray datasets) along with metadata (key-value pairs) to
|
|
753
|
+
categorize and organize the data.
|
|
754
|
+
|
|
755
|
+
For the "``delete``" action, the user provides metadata as search
|
|
756
|
+
criteria to identify and remove the existing data from the
|
|
757
|
+
system.
|
|
758
|
+
|
|
759
|
+
Parameters
|
|
760
|
+
~~~~~~~~~~
|
|
761
|
+
action : Literal["add", "delete"]
|
|
762
|
+
The action to perform: "add" to add new data, or "delete"
|
|
763
|
+
to remove existing data.
|
|
764
|
+
userdata_items : List[Union[str, xr.Dataset]], optional
|
|
765
|
+
A list of user file paths or xarray datasets to add to the
|
|
766
|
+
databrowser (required for "add").
|
|
767
|
+
metadata : Dict[str, str], optional
|
|
768
|
+
Key-value metadata pairs to categorize the data (for "add")
|
|
769
|
+
or search and identify data for
|
|
770
|
+
deletion (for "delete").
|
|
771
|
+
host : str, optional
|
|
772
|
+
Override the host name of the databrowser server. This is usually
|
|
773
|
+
the url where the freva web site can be found. Such as
|
|
774
|
+
www.freva.dkrz.de. By default no host name is given and the host
|
|
775
|
+
name will be taken from the freva config file.
|
|
776
|
+
fail_on_error : bool, optional
|
|
777
|
+
Make the call fail if the connection to the databrowser could not
|
|
778
|
+
be established.
|
|
779
|
+
|
|
780
|
+
Raises
|
|
781
|
+
~~~~~~
|
|
782
|
+
ValueError
|
|
783
|
+
If the operation fails or required parameters are missing
|
|
784
|
+
for the specified action.
|
|
785
|
+
FileNotFoundError
|
|
786
|
+
If no user data is provided for the "add" action.
|
|
787
|
+
|
|
788
|
+
Example
|
|
789
|
+
~~~~~~~
|
|
790
|
+
|
|
791
|
+
Adding user data:
|
|
792
|
+
|
|
793
|
+
.. execute_code::
|
|
794
|
+
|
|
795
|
+
from freva_client import authenticate, databrowser
|
|
796
|
+
import xarray as xr
|
|
797
|
+
token_info = authenticate(username="janedoe")
|
|
798
|
+
filenames = (
|
|
799
|
+
"../freva-rest/src/freva_rest/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
|
|
800
|
+
"GERICS/NCC-NorESM1-M/rcp85/r1i1p1/GERICS-REMO2015/v1/3hr/pr/v20181212/*.nc"
|
|
801
|
+
)
|
|
802
|
+
filename1 = (
|
|
803
|
+
"../freva-rest/src/freva_rest/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
|
|
804
|
+
"CLMcom/MPI-M-MPI-ESM-LR/historical/r0i0p0/CLMcom-CCLM4-8-17/v1/fx/orog/v20140515/"
|
|
805
|
+
"orog_EUR-11_MPI-M-MPI-ESM-LR_historical_r1i1p1_CLMcom-CCLM4-8-17_v1_fx.nc"
|
|
806
|
+
)
|
|
807
|
+
xarray_data = xr.open_dataset(filename1)
|
|
808
|
+
databrowser.userdata(
|
|
809
|
+
action="add",
|
|
810
|
+
userdata_items=[xarray_data, filenames],
|
|
811
|
+
metadata={"project": "cmip5", "experiment": "myFavExp"}
|
|
812
|
+
)
|
|
813
|
+
|
|
814
|
+
Deleting user data:
|
|
815
|
+
|
|
816
|
+
.. execute_code::
|
|
817
|
+
|
|
818
|
+
from freva_client import authenticate, databrowser
|
|
819
|
+
token_info = authenticate(username="janedoe")
|
|
820
|
+
databrowser.userdata(
|
|
821
|
+
action="delete",
|
|
822
|
+
metadata={"project": "cmip5", "experiment": "myFavExp"}
|
|
823
|
+
)
|
|
824
|
+
"""
|
|
825
|
+
this = cls(
|
|
826
|
+
host=host,
|
|
827
|
+
fail_on_error=fail_on_error,
|
|
828
|
+
)
|
|
829
|
+
userdata_items = userdata_items or []
|
|
830
|
+
metadata = metadata or {}
|
|
831
|
+
url = f"{this._cfg.userdata_url}"
|
|
832
|
+
token = this._auth.check_authentication(auth_url=this._cfg.auth_url)
|
|
833
|
+
headers = {"Authorization": f"Bearer {token['access_token']}"}
|
|
834
|
+
payload_metadata: dict[str, Collection[Collection[str]]] = {}
|
|
835
|
+
|
|
836
|
+
if action == "add":
|
|
837
|
+
user_data_handler = UserDataHandler(userdata_items)
|
|
838
|
+
if user_data_handler.user_metadata:
|
|
839
|
+
payload_metadata = {
|
|
840
|
+
"user_metadata": user_data_handler.user_metadata,
|
|
841
|
+
"facets": metadata,
|
|
842
|
+
}
|
|
843
|
+
result = this._request(
|
|
844
|
+
"POST", url, data=payload_metadata, headers=headers
|
|
845
|
+
)
|
|
846
|
+
if result is not None:
|
|
847
|
+
response_data = result.json()
|
|
848
|
+
status_message = response_data.get("status")
|
|
849
|
+
else:
|
|
850
|
+
raise ValueError("Failed to add user data")
|
|
851
|
+
pprint(f"[b][green]{status_message}[green][b]")
|
|
852
|
+
else:
|
|
853
|
+
raise ValueError("No metadata generated from the input data.")
|
|
854
|
+
|
|
855
|
+
if action == "delete":
|
|
856
|
+
if userdata_items:
|
|
857
|
+
logger.info(
|
|
858
|
+
"'userdata_items' are not needed for the 'delete'"
|
|
859
|
+
"action and will be ignored."
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
result = this._request("DELETE", url, data=metadata, headers=headers)
|
|
863
|
+
|
|
864
|
+
if result is None:
|
|
865
|
+
raise ValueError("Failed to delete user data")
|
|
866
|
+
pprint("[b][green]User data deleted successfully[green][b]")
|
|
867
|
+
|
|
868
|
+
def _request(
|
|
869
|
+
self,
|
|
870
|
+
method: Literal["GET", "POST", "PUT", "PATCH", "DELETE"],
|
|
871
|
+
url: str,
|
|
872
|
+
data: Optional[Dict[str, Any]] = None,
|
|
873
|
+
**kwargs: Any,
|
|
738
874
|
) -> Optional[requests.models.Response]:
|
|
739
|
-
"""
|
|
740
|
-
|
|
875
|
+
"""Request method to handle CRUD operations (GET, POST, PUT, PATCH, DELETE)."""
|
|
876
|
+
method_upper = method.upper()
|
|
877
|
+
timeout = kwargs.pop("timeout", 30)
|
|
741
878
|
params = kwargs.pop("params", {})
|
|
742
|
-
kwargs.
|
|
879
|
+
stream = kwargs.pop("stream", False)
|
|
880
|
+
|
|
881
|
+
logger.debug(
|
|
882
|
+
"%s request to %s with data: %s and parameters: %s",
|
|
883
|
+
method_upper,
|
|
884
|
+
url,
|
|
885
|
+
data,
|
|
886
|
+
{**self._params, **params},
|
|
887
|
+
)
|
|
888
|
+
|
|
743
889
|
try:
|
|
744
|
-
|
|
745
|
-
|
|
890
|
+
req = requests.Request(
|
|
891
|
+
method=method_upper,
|
|
892
|
+
url=url,
|
|
893
|
+
params={**self._params, **params},
|
|
894
|
+
json=None if method_upper in "GET" else data,
|
|
895
|
+
**kwargs,
|
|
746
896
|
)
|
|
747
|
-
|
|
748
|
-
|
|
897
|
+
with requests.Session() as session:
|
|
898
|
+
prepared = session.prepare_request(req)
|
|
899
|
+
res = session.send(prepared, timeout=timeout, stream=stream)
|
|
900
|
+
res.raise_for_status()
|
|
901
|
+
return res
|
|
902
|
+
|
|
749
903
|
except KeyboardInterrupt:
|
|
750
904
|
pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
|
|
751
905
|
except (
|
|
752
906
|
requests.exceptions.ConnectionError,
|
|
753
907
|
requests.exceptions.HTTPError,
|
|
754
908
|
) as error:
|
|
755
|
-
msg = f"
|
|
909
|
+
msg = f"{method_upper} request failed with {error}"
|
|
756
910
|
if self._fail_on_error:
|
|
757
911
|
raise ValueError(msg) from None
|
|
758
912
|
logger.warning(msg)
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""Various utilities for getting the databrowser working."""
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import sysconfig
|
|
7
|
+
from configparser import ConfigParser, ExtendedInterpolation
|
|
8
|
+
from functools import cached_property
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import (
|
|
11
|
+
Any,
|
|
12
|
+
Dict,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Literal,
|
|
16
|
+
Optional,
|
|
17
|
+
Sequence,
|
|
18
|
+
Tuple,
|
|
19
|
+
Union,
|
|
20
|
+
cast,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
import appdirs
|
|
24
|
+
import numpy as np
|
|
25
|
+
import requests
|
|
26
|
+
import tomli
|
|
27
|
+
import xarray as xr
|
|
28
|
+
|
|
29
|
+
from . import logger
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Config:
|
|
33
|
+
"""Client config class.
|
|
34
|
+
|
|
35
|
+
This class is used for basic configuration of the databrowser
|
|
36
|
+
client.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
host: Optional[str] = None,
|
|
42
|
+
uniq_key: Literal["file", "uri"] = "file",
|
|
43
|
+
flavour: str = "freva",
|
|
44
|
+
) -> None:
|
|
45
|
+
self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
|
|
46
|
+
self.auth_url = f"{self.get_api_url(host)}/auth/v2"
|
|
47
|
+
self.uniq_key = uniq_key
|
|
48
|
+
self._flavour = flavour
|
|
49
|
+
|
|
50
|
+
def _read_ini(self, path: Path) -> str:
|
|
51
|
+
"""Read an ini file."""
|
|
52
|
+
ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
|
|
53
|
+
ini_parser.read_string(path.read_text())
|
|
54
|
+
config = ini_parser["evaluation_system"]
|
|
55
|
+
scheme, host = self._split_url(
|
|
56
|
+
config.get("databrowser.host", config.get("solr.host", ""))
|
|
57
|
+
)
|
|
58
|
+
host, _, port = (host or "").partition(":")
|
|
59
|
+
port = port or config.get("databrowser.port", "")
|
|
60
|
+
if port:
|
|
61
|
+
host = f"{host}:{port}"
|
|
62
|
+
return f"{scheme}://{host}"
|
|
63
|
+
|
|
64
|
+
def _read_toml(self, path: Path) -> str:
|
|
65
|
+
"""Read a new style toml config file."""
|
|
66
|
+
try:
|
|
67
|
+
config = tomli.loads(path.read_text()).get("freva", {})
|
|
68
|
+
scheme, host = self._split_url(cast(str, config["host"]))
|
|
69
|
+
except (tomli.TOMLDecodeError, KeyError):
|
|
70
|
+
return ""
|
|
71
|
+
host, _, port = host.partition(":")
|
|
72
|
+
if port:
|
|
73
|
+
host = f"{host}:{port}"
|
|
74
|
+
return f"{scheme}://{host}"
|
|
75
|
+
|
|
76
|
+
def _read_config(self, path: Path, file_type: Literal["toml", "ini"]) -> str:
|
|
77
|
+
"""Read the configuration."""
|
|
78
|
+
data_types = {"toml": self._read_toml, "ini": self._read_ini}
|
|
79
|
+
try:
|
|
80
|
+
return data_types[file_type](path)
|
|
81
|
+
except KeyError:
|
|
82
|
+
pass
|
|
83
|
+
return ""
|
|
84
|
+
|
|
85
|
+
@cached_property
|
|
86
|
+
def overview(self) -> Dict[str, Any]:
|
|
87
|
+
"""Get an overview of the all databrowser flavours and search keys."""
|
|
88
|
+
try:
|
|
89
|
+
res = requests.get(f"{self.databrowser_url}/overview", timeout=15)
|
|
90
|
+
except requests.exceptions.ConnectionError:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"Could not connect to {self.databrowser_url}"
|
|
93
|
+
) from None
|
|
94
|
+
return cast(Dict[str, Any], res.json())
|
|
95
|
+
|
|
96
|
+
def _get_databrowser_host_from_config(self) -> str:
|
|
97
|
+
"""Get the config file order."""
|
|
98
|
+
|
|
99
|
+
eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
|
|
100
|
+
freva_config = Path(
|
|
101
|
+
os.environ.get("FREVA_CONFIG")
|
|
102
|
+
or Path(self.get_dirs(user=False)) / "freva.toml"
|
|
103
|
+
)
|
|
104
|
+
paths: Dict[Path, Literal["toml", "ini"]] = {
|
|
105
|
+
Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
|
|
106
|
+
Path(self.get_dirs(user=True)) / "freva.toml": "toml",
|
|
107
|
+
freva_config: "toml",
|
|
108
|
+
Path(
|
|
109
|
+
os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf
|
|
110
|
+
): "ini",
|
|
111
|
+
}
|
|
112
|
+
for config_path, config_type in paths.items():
|
|
113
|
+
if config_path.is_file():
|
|
114
|
+
host = self._read_config(config_path, config_type)
|
|
115
|
+
if host:
|
|
116
|
+
return host
|
|
117
|
+
raise ValueError(
|
|
118
|
+
"No databrowser host configured, please use a"
|
|
119
|
+
" configuration defining a databrowser host or"
|
|
120
|
+
" set a host name using the `host` key"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@cached_property
|
|
124
|
+
def flavour(self) -> str:
|
|
125
|
+
"""Get the flavour."""
|
|
126
|
+
flavours = self.overview.get("flavours", [])
|
|
127
|
+
if self._flavour not in flavours:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"Search {self._flavour} not available, select from"
|
|
130
|
+
f" {','.join(flavours)}"
|
|
131
|
+
)
|
|
132
|
+
return self._flavour
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def search_url(self) -> str:
|
|
136
|
+
"""Define the data search endpoint."""
|
|
137
|
+
return (
|
|
138
|
+
f"{self.databrowser_url}/data-search/{self.flavour}/{self.uniq_key}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def zarr_loader_url(self) -> str:
|
|
143
|
+
"""Define the url for getting zarr files."""
|
|
144
|
+
return f"{self.databrowser_url}/load/{self.flavour}/"
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def intake_url(self) -> str:
|
|
148
|
+
"""Define the url for creating intake catalogues."""
|
|
149
|
+
return f"{self.databrowser_url}/intake-catalogue/{self.flavour}/{self.uniq_key}"
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def metadata_url(self) -> str:
|
|
153
|
+
"""Define the endpoint for the metadata search."""
|
|
154
|
+
return (
|
|
155
|
+
f"{self.databrowser_url}/metadata-search/"
|
|
156
|
+
f"{self.flavour}/{self.uniq_key}"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _split_url(url: str) -> Tuple[str, str]:
|
|
161
|
+
scheme, _, hostname = url.partition("://")
|
|
162
|
+
if not hostname:
|
|
163
|
+
hostname = scheme
|
|
164
|
+
scheme = ""
|
|
165
|
+
scheme = scheme or "http"
|
|
166
|
+
return scheme, hostname
|
|
167
|
+
|
|
168
|
+
def get_api_url(self, url: Optional[str]) -> str:
|
|
169
|
+
"""Construct the databrowser url from a given hostname."""
|
|
170
|
+
url = url or self._get_databrowser_host_from_config()
|
|
171
|
+
scheme, hostname = self._split_url(url)
|
|
172
|
+
hostname, _, port = hostname.partition(":")
|
|
173
|
+
if port:
|
|
174
|
+
hostname = f"{hostname}:{port}"
|
|
175
|
+
hostname = hostname.partition("/")[0]
|
|
176
|
+
return f"{scheme}://{hostname}/api/freva-nextgen"
|
|
177
|
+
|
|
178
|
+
@staticmethod
|
|
179
|
+
def get_dirs(user: bool = True) -> Path:
|
|
180
|
+
"""Get the 'scripts' and 'purelib' directories we'll install into.
|
|
181
|
+
|
|
182
|
+
This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
|
|
183
|
+
because some tests mock it out to install to a different location.
|
|
184
|
+
"""
|
|
185
|
+
if user:
|
|
186
|
+
if (sys.platform == "darwin") and sysconfig.get_config_var(
|
|
187
|
+
"PYTHONFRAMEWORK"
|
|
188
|
+
):
|
|
189
|
+
scheme = "osx_framework_user"
|
|
190
|
+
else:
|
|
191
|
+
scheme = f"{os.name}_user"
|
|
192
|
+
return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
|
|
193
|
+
# The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
|
|
194
|
+
# installing into a virtualenv
|
|
195
|
+
return Path(sysconfig.get_path("data")) / "share" / "freva"
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def userdata_url(self) -> str:
|
|
199
|
+
"""Define the url for adding and deleting user-data."""
|
|
200
|
+
return f"{self.databrowser_url}/userdata"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class UserDataHandler:
|
|
204
|
+
"""Class for processing user data.
|
|
205
|
+
|
|
206
|
+
This class is used for processing user data and extracting metadata
|
|
207
|
+
from the data files.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(self, userdata_items: List[Union[str, xr.Dataset]]) -> None:
|
|
211
|
+
self._suffixes = [".nc", ".nc4", ".grb", ".grib", ".zarr", "zar"]
|
|
212
|
+
self.user_metadata: List[
|
|
213
|
+
Dict[str, Union[str, List[str], Dict[str, str]]]
|
|
214
|
+
] = []
|
|
215
|
+
self._metadata_collection: List[Dict[str, Union[str, List[str]]]] = []
|
|
216
|
+
try:
|
|
217
|
+
self._executor = concurrent.futures.ThreadPoolExecutor(
|
|
218
|
+
max_workers=min(int(os.cpu_count() or 4), 15)
|
|
219
|
+
)
|
|
220
|
+
self._process_user_data(userdata_items)
|
|
221
|
+
finally:
|
|
222
|
+
self._executor.shutdown(wait=True)
|
|
223
|
+
|
|
224
|
+
def _gather_files(self, path: Path, pattern: str = "*") -> Iterator[Path]:
|
|
225
|
+
"""Gather all valid files from directory and wildcard pattern."""
|
|
226
|
+
for item in path.rglob(pattern):
|
|
227
|
+
if item.is_file() and item.suffix in self._suffixes:
|
|
228
|
+
yield item
|
|
229
|
+
|
|
230
|
+
def _validate_user_data(
|
|
231
|
+
self,
|
|
232
|
+
user_data: Sequence[Union[str, xr.Dataset]],
|
|
233
|
+
) -> Dict[str, Union[List[Path], List[xr.Dataset]]]:
|
|
234
|
+
validated_paths: List[Path] = []
|
|
235
|
+
validated_xarray_datasets: List[xr.Dataset] = []
|
|
236
|
+
for data in user_data:
|
|
237
|
+
if isinstance(data, (str, Path)):
|
|
238
|
+
path = Path(data).expanduser().absolute()
|
|
239
|
+
if path.is_dir():
|
|
240
|
+
validated_paths.extend(self._gather_files(path))
|
|
241
|
+
elif path.is_file() and path.suffix in self._suffixes:
|
|
242
|
+
validated_paths.append(path)
|
|
243
|
+
else:
|
|
244
|
+
validated_paths.extend(
|
|
245
|
+
self._gather_files(path.parent, pattern=path.name)
|
|
246
|
+
)
|
|
247
|
+
elif isinstance(data, xr.Dataset):
|
|
248
|
+
validated_xarray_datasets.append(data)
|
|
249
|
+
|
|
250
|
+
if not validated_paths and not validated_xarray_datasets:
|
|
251
|
+
raise FileNotFoundError(
|
|
252
|
+
"No valid file paths or xarray datasets found."
|
|
253
|
+
)
|
|
254
|
+
return {
|
|
255
|
+
"validated_user_paths": validated_paths,
|
|
256
|
+
"validated_user_xrdatasets": validated_xarray_datasets,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def _process_user_data(
|
|
260
|
+
self,
|
|
261
|
+
userdata_items: List[Union[str, xr.Dataset]],
|
|
262
|
+
) -> None:
|
|
263
|
+
"""Process xarray datasets and file paths using thread pool."""
|
|
264
|
+
futures = []
|
|
265
|
+
validated_userdata: Dict[str, Union[List[Path], List[xr.Dataset]]] = (
|
|
266
|
+
self._validate_user_data(userdata_items)
|
|
267
|
+
)
|
|
268
|
+
if validated_userdata["validated_user_xrdatasets"]:
|
|
269
|
+
futures.append(
|
|
270
|
+
self._executor.submit(
|
|
271
|
+
self._process_userdata_in_executor,
|
|
272
|
+
validated_userdata["validated_user_xrdatasets"],
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
if validated_userdata["validated_user_paths"]:
|
|
277
|
+
futures.append(
|
|
278
|
+
self._executor.submit(
|
|
279
|
+
self._process_userdata_in_executor,
|
|
280
|
+
validated_userdata["validated_user_paths"],
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
for future in futures:
|
|
284
|
+
try:
|
|
285
|
+
future.result()
|
|
286
|
+
except Exception as e: # pragma: no cover
|
|
287
|
+
logger.error(f"Error processing batch: {e}")
|
|
288
|
+
|
|
289
|
+
def _process_userdata_in_executor(
|
|
290
|
+
self, validated_userdata: Union[List[Path], List[xr.Dataset]]
|
|
291
|
+
) -> None:
|
|
292
|
+
for data in validated_userdata:
|
|
293
|
+
metadata = self._get_metadata(data)
|
|
294
|
+
if isinstance(metadata, Exception) or metadata == {}:
|
|
295
|
+
logger.warning("Error getting metadata: %s", metadata)
|
|
296
|
+
else:
|
|
297
|
+
self.user_metadata.append(metadata)
|
|
298
|
+
|
|
299
|
+
def _timedelta_to_cmor_frequency(self, dt: float) -> str:
|
|
300
|
+
for total_seconds, frequency in self._time_table.items():
|
|
301
|
+
if dt >= total_seconds:
|
|
302
|
+
return frequency
|
|
303
|
+
return "fx" # pragma: no cover
|
|
304
|
+
|
|
305
|
+
@property
|
|
306
|
+
def _time_table(self) -> dict[int, str]:
|
|
307
|
+
return {
|
|
308
|
+
315360000: "dec", # Decade
|
|
309
|
+
31104000: "yr", # Year
|
|
310
|
+
2538000: "mon", # Month
|
|
311
|
+
1296000: "sem", # Seasonal (half-year)
|
|
312
|
+
84600: "day", # Day
|
|
313
|
+
21600: "6h", # Six-hourly
|
|
314
|
+
10800: "3h", # Three-hourly
|
|
315
|
+
3600: "hr", # Hourly
|
|
316
|
+
1: "subhr", # Sub-hourly
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
def _get_time_frequency(self, time_delta: int, freq_attr: str = "") -> str:
|
|
320
|
+
if freq_attr in self._time_table.values():
|
|
321
|
+
return freq_attr
|
|
322
|
+
return self._timedelta_to_cmor_frequency(time_delta)
|
|
323
|
+
|
|
324
|
+
def _get_metadata(
|
|
325
|
+
self, path: Union[os.PathLike[str], xr.Dataset]
|
|
326
|
+
) -> Dict[str, Union[str, List[str], Dict[str, str]]]:
|
|
327
|
+
"""Get metadata from a path or xarray dataset."""
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
dset = (
|
|
331
|
+
path
|
|
332
|
+
if isinstance(path, xr.Dataset)
|
|
333
|
+
else xr.open_mfdataset(
|
|
334
|
+
str(path), parallel=False, use_cftime=True, lock=False
|
|
335
|
+
)
|
|
336
|
+
)
|
|
337
|
+
time_freq = dset.attrs.get("frequency", "")
|
|
338
|
+
data_vars = list(map(str, dset.data_vars))
|
|
339
|
+
coords = list(map(str, dset.coords))
|
|
340
|
+
try:
|
|
341
|
+
times = dset["time"].values[:]
|
|
342
|
+
except (KeyError, IndexError, TypeError):
|
|
343
|
+
times = np.array([])
|
|
344
|
+
|
|
345
|
+
except Exception as error:
|
|
346
|
+
logger.error("Failed to open data file %s: %s", str(path), error)
|
|
347
|
+
return {}
|
|
348
|
+
if len(times) > 0:
|
|
349
|
+
try:
|
|
350
|
+
time_str = (
|
|
351
|
+
f"[{times[0].isoformat()}Z TO {times[-1].isoformat()}Z]"
|
|
352
|
+
)
|
|
353
|
+
dt = (
|
|
354
|
+
abs((times[1] - times[0]).total_seconds())
|
|
355
|
+
if len(times) > 1
|
|
356
|
+
else 0
|
|
357
|
+
)
|
|
358
|
+
except Exception as non_cftime:
|
|
359
|
+
logger.info(
|
|
360
|
+
"The time var is not based on the cftime: %s", non_cftime
|
|
361
|
+
)
|
|
362
|
+
time_str = (
|
|
363
|
+
f"[{np.datetime_as_string(times[0], unit='s')}Z TO "
|
|
364
|
+
f"{np.datetime_as_string(times[-1], unit='s')}Z]"
|
|
365
|
+
)
|
|
366
|
+
dt = (
|
|
367
|
+
abs(
|
|
368
|
+
(times[1] - times[0]).astype("timedelta64[s]").astype(int)
|
|
369
|
+
)
|
|
370
|
+
if len(times) > 1
|
|
371
|
+
else 0
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
time_str = "fx"
|
|
375
|
+
dt = 0
|
|
376
|
+
|
|
377
|
+
variables = [
|
|
378
|
+
var
|
|
379
|
+
for var in data_vars
|
|
380
|
+
if var not in coords
|
|
381
|
+
and not any(
|
|
382
|
+
term in var.lower() for term in ["lon", "lat", "bnds", "x", "y"]
|
|
383
|
+
)
|
|
384
|
+
and var.lower() not in ["rotated_pole", "rot_pole"]
|
|
385
|
+
]
|
|
386
|
+
|
|
387
|
+
_data: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
|
|
388
|
+
_data.setdefault("variable", variables[0])
|
|
389
|
+
_data.setdefault(
|
|
390
|
+
"time_frequency", self._get_time_frequency(dt, time_freq)
|
|
391
|
+
)
|
|
392
|
+
_data["time"] = time_str
|
|
393
|
+
_data.setdefault("cmor_table", _data["time_frequency"])
|
|
394
|
+
_data.setdefault("version", "")
|
|
395
|
+
if isinstance(path, Path):
|
|
396
|
+
_data["file"] = str(path)
|
|
397
|
+
if isinstance(path, xr.Dataset):
|
|
398
|
+
_data["file"] = str(dset.encoding["source"])
|
|
399
|
+
return _data
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
"""Various utilities for getting the databrowser working."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
import sysconfig
|
|
6
|
-
from configparser import ConfigParser, ExtendedInterpolation
|
|
7
|
-
from functools import cached_property
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any, Dict, Literal, Optional, Tuple, cast
|
|
10
|
-
|
|
11
|
-
import appdirs
|
|
12
|
-
import requests
|
|
13
|
-
import tomli
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class Config:
|
|
17
|
-
"""Client config class.
|
|
18
|
-
|
|
19
|
-
This class is used for basic configuration of the databrowser
|
|
20
|
-
client.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
host: Optional[str] = None,
|
|
26
|
-
uniq_key: Literal["file", "uri"] = "file",
|
|
27
|
-
flavour: str = "freva",
|
|
28
|
-
) -> None:
|
|
29
|
-
self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
|
|
30
|
-
self.auth_url = f"{self.get_api_url(host)}/auth/v2"
|
|
31
|
-
self.uniq_key = uniq_key
|
|
32
|
-
self._flavour = flavour
|
|
33
|
-
|
|
34
|
-
def _read_ini(self, path: Path) -> str:
|
|
35
|
-
"""Read an ini file."""
|
|
36
|
-
ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
|
|
37
|
-
ini_parser.read_string(path.read_text())
|
|
38
|
-
config = ini_parser["evaluation_system"]
|
|
39
|
-
scheme, host = self._split_url(
|
|
40
|
-
config.get("databrowser.host") or config.get("solr.host")
|
|
41
|
-
)
|
|
42
|
-
host, _, port = (host or "").partition(":")
|
|
43
|
-
port = port or config.get("databrowser.port", "")
|
|
44
|
-
if port:
|
|
45
|
-
host = f"{host}:{port}"
|
|
46
|
-
return f"{scheme}://{host}"
|
|
47
|
-
|
|
48
|
-
def _read_toml(self, path: Path) -> str:
|
|
49
|
-
"""Read a new style toml config file."""
|
|
50
|
-
try:
|
|
51
|
-
config = tomli.loads(path.read_text()).get("freva", {})
|
|
52
|
-
scheme, host = self._split_url(cast(str, config["host"]))
|
|
53
|
-
except (tomli.TOMLDecodeError, KeyError):
|
|
54
|
-
return ""
|
|
55
|
-
host, _, port = host.partition(":")
|
|
56
|
-
if port:
|
|
57
|
-
host = f"{host}:{port}"
|
|
58
|
-
return f"{scheme}://{host}"
|
|
59
|
-
|
|
60
|
-
def _read_config(
|
|
61
|
-
self, path: Path, file_type: Literal["toml", "ini"]
|
|
62
|
-
) -> str:
|
|
63
|
-
"""Read the configuration."""
|
|
64
|
-
data_types = {"toml": self._read_toml, "ini": self._read_ini}
|
|
65
|
-
try:
|
|
66
|
-
return data_types[file_type](path)
|
|
67
|
-
except KeyError:
|
|
68
|
-
pass
|
|
69
|
-
return ""
|
|
70
|
-
|
|
71
|
-
@cached_property
|
|
72
|
-
def overview(self) -> Dict[str, Any]:
|
|
73
|
-
"""Get an overview of the all databrowser flavours and search keys."""
|
|
74
|
-
try:
|
|
75
|
-
res = requests.get(f"{self.databrowser_url}/overview", timeout=3)
|
|
76
|
-
except requests.exceptions.ConnectionError:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
f"Could not connect to {self.databrowser_url}"
|
|
79
|
-
) from None
|
|
80
|
-
return cast(Dict[str, Any], res.json())
|
|
81
|
-
|
|
82
|
-
def _get_databrowser_host_from_config(self) -> str:
|
|
83
|
-
"""Get the config file order."""
|
|
84
|
-
|
|
85
|
-
eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
|
|
86
|
-
freva_config = Path(
|
|
87
|
-
os.environ.get("FREVA_CONFIG")
|
|
88
|
-
or Path(self.get_dirs(user=False)) / "freva.toml"
|
|
89
|
-
)
|
|
90
|
-
paths: Dict[Path, Literal["toml", "ini"]] = {
|
|
91
|
-
Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
|
|
92
|
-
Path(self.get_dirs(user=True)) / "freva.toml": "toml",
|
|
93
|
-
freva_config: "toml",
|
|
94
|
-
Path(
|
|
95
|
-
os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf
|
|
96
|
-
): "ini",
|
|
97
|
-
}
|
|
98
|
-
for config_path, config_type in paths.items():
|
|
99
|
-
if config_path.is_file():
|
|
100
|
-
host = self._read_config(config_path, config_type)
|
|
101
|
-
if host:
|
|
102
|
-
return host
|
|
103
|
-
raise ValueError(
|
|
104
|
-
"No databrowser host configured, please use a"
|
|
105
|
-
" configuration defining a databrowser host or"
|
|
106
|
-
" set a host name using the `host` key"
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
@cached_property
|
|
110
|
-
def flavour(self) -> str:
|
|
111
|
-
"""Get the flavour."""
|
|
112
|
-
flavours = self.overview.get("flavours", [])
|
|
113
|
-
if self._flavour not in flavours:
|
|
114
|
-
raise ValueError(
|
|
115
|
-
f"Search {self._flavour} not available, select from"
|
|
116
|
-
f" {','.join(flavours)}"
|
|
117
|
-
)
|
|
118
|
-
return self._flavour
|
|
119
|
-
|
|
120
|
-
@property
|
|
121
|
-
def search_url(self) -> str:
|
|
122
|
-
"""Define the data search endpoint."""
|
|
123
|
-
return f"{self.databrowser_url}/data_search/{self.flavour}/{self.uniq_key}"
|
|
124
|
-
|
|
125
|
-
@property
|
|
126
|
-
def zarr_loader_url(self) -> str:
|
|
127
|
-
"""Define the url for getting zarr files."""
|
|
128
|
-
return f"{self.databrowser_url}/load/{self.flavour}/"
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def intake_url(self) -> str:
|
|
132
|
-
"""Define the url for creating intake catalogues."""
|
|
133
|
-
return f"{self.databrowser_url}/intake_catalogue/{self.flavour}/{self.uniq_key}"
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def metadata_url(self) -> str:
|
|
137
|
-
"""Define the endpoint for the metadata search."""
|
|
138
|
-
return (
|
|
139
|
-
f"{self.databrowser_url}/metadata_search/"
|
|
140
|
-
f"{self.flavour}/{self.uniq_key}"
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
@staticmethod
|
|
144
|
-
def _split_url(url: str) -> Tuple[str, str]:
|
|
145
|
-
scheme, _, hostname = url.partition("://")
|
|
146
|
-
if not hostname:
|
|
147
|
-
hostname = scheme
|
|
148
|
-
scheme = ""
|
|
149
|
-
scheme = scheme or "http"
|
|
150
|
-
return scheme, hostname
|
|
151
|
-
|
|
152
|
-
def get_api_url(self, url: Optional[str]) -> str:
|
|
153
|
-
"""Construct the databrowser url from a given hostname."""
|
|
154
|
-
url = url or self._get_databrowser_host_from_config()
|
|
155
|
-
scheme, hostname = self._split_url(url)
|
|
156
|
-
hostname, _, port = hostname.partition(":")
|
|
157
|
-
if port:
|
|
158
|
-
hostname = f"{hostname}:{port}"
|
|
159
|
-
hostname = hostname.partition("/")[0]
|
|
160
|
-
return f"{scheme}://{hostname}/api"
|
|
161
|
-
|
|
162
|
-
@staticmethod
|
|
163
|
-
def get_dirs(user: bool = True) -> Path:
|
|
164
|
-
"""Get the 'scripts' and 'purelib' directories we'll install into.
|
|
165
|
-
|
|
166
|
-
This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
|
|
167
|
-
because some tests mock it out to install to a different location.
|
|
168
|
-
"""
|
|
169
|
-
if user:
|
|
170
|
-
if (sys.platform == "darwin") and sysconfig.get_config_var(
|
|
171
|
-
"PYTHONFRAMEWORK"
|
|
172
|
-
):
|
|
173
|
-
scheme = "osx_framework_user"
|
|
174
|
-
else:
|
|
175
|
-
scheme = f"{os.name}_user"
|
|
176
|
-
return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
|
|
177
|
-
# The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
|
|
178
|
-
# installing into a virtualenv
|
|
179
|
-
return Path(sysconfig.get_path("data")) / "share" / "freva"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|