freva-client 2508.0.0__py3-none-any.whl → 2509.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of freva-client might be problematic. Click here for more details.

freva_client/query.py CHANGED
@@ -21,6 +21,7 @@ from typing import (
21
21
 
22
22
  import intake
23
23
  import intake_esm
24
+ import pandas as pd
24
25
  import requests
25
26
  import xarray as xr
26
27
  import yaml
@@ -28,7 +29,12 @@ from rich import print as pprint
28
29
 
29
30
  from .auth import Auth
30
31
  from .utils import logger
31
- from .utils.auth_utils import Token, choose_token_strategy, load_token
32
+ from .utils.auth_utils import (
33
+ Token,
34
+ choose_token_strategy,
35
+ load_token,
36
+ requires_authentication,
37
+ )
32
38
  from .utils.databrowser_utils import Config, UserDataHandler
33
39
 
34
40
  __all__ = ["databrowser"]
@@ -218,6 +224,16 @@ class databrowser:
218
224
  }
219
225
  )
220
226
 
227
+ You can also filter the metadata to only include specific facets.
228
+
229
+ .. code-block:: python
230
+
231
+ from freva_client import databrowser
232
+ db = databrowser(
233
+ "era5*",
234
+ realm="atmos",
235
+ )[['project', 'model', 'experiment']]
236
+ print(db.metadata)
221
237
 
222
238
  """
223
239
 
@@ -225,9 +241,7 @@ class databrowser:
225
241
  self,
226
242
  *facets: str,
227
243
  uniq_key: Literal["file", "uri"] = "file",
228
- flavour: Literal[
229
- "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
230
- ] = "freva",
244
+ flavour: Optional[str] = None,
231
245
  time: Optional[str] = None,
232
246
  host: Optional[str] = None,
233
247
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -241,8 +255,9 @@ class databrowser:
241
255
  self._auth = Auth()
242
256
  self._fail_on_error = fail_on_error
243
257
  self._cfg = Config(host, uniq_key=uniq_key, flavour=flavour)
244
- self._flavour = flavour
258
+ self._flavour = self._cfg.flavour
245
259
  self._stream_zarr = stream_zarr
260
+ self.builtin_flavours = {"freva", "cmip6", "cmip5", "cordex", "user"}
246
261
  facet_search: Dict[str, List[str]] = defaultdict(list)
247
262
  for key, value in search_keys.items():
248
263
  if isinstance(value, str):
@@ -289,12 +304,10 @@ class databrowser:
289
304
 
290
305
  def __iter__(self) -> Iterator[str]:
291
306
  query_url = self._cfg.search_url
292
- headers = {}
293
307
  if self._stream_zarr:
294
308
  query_url = self._cfg.zarr_loader_url
295
- token = self._auth.authenticate(config=self._cfg)
296
- headers = {"Authorization": f"Bearer {token['access_token']}"}
297
- result = self._request("GET", query_url, headers=headers, stream=True)
309
+
310
+ result = self._request("GET", query_url, stream=True)
298
311
  if result is not None:
299
312
  try:
300
313
  for res in result.iter_lines():
@@ -368,11 +381,7 @@ class databrowser:
368
381
  kwargs: Dict[str, Any] = {"stream": True}
369
382
  url = self._cfg.intake_url
370
383
  if self._stream_zarr:
371
- token = self._auth.authenticate(config=self._cfg)
372
384
  url = self._cfg.zarr_loader_url
373
- kwargs["headers"] = {
374
- "Authorization": f"Bearer {token['access_token']}"
375
- }
376
385
  kwargs["params"] = {"catalogue-type": "intake"}
377
386
  result = self._request("GET", url, **kwargs)
378
387
  if result is None:
@@ -539,9 +548,7 @@ class databrowser:
539
548
  def count_values(
540
549
  cls,
541
550
  *facets: str,
542
- flavour: Literal[
543
- "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
544
- ] = "freva",
551
+ flavour: Optional[str] = None,
545
552
  time: Optional[str] = None,
546
553
  host: Optional[str] = None,
547
554
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -643,6 +650,15 @@ class databrowser:
643
650
  from freva_client import databrowser
644
651
  print(databrowser.count_values("reana*", realm="ocean", flavour="cmip6"))
645
652
 
653
+ Count only specific facets:
654
+
655
+ .. code-block:: python
656
+
657
+ from freva_client import databrowser
658
+ era5_counts = databrowser.count_values(
659
+ "era5*",
660
+ )[['project', 'model']]
661
+ print(era5_counts)
646
662
  """
647
663
  this = cls(
648
664
  *facets,
@@ -667,7 +683,7 @@ class databrowser:
667
683
  return counts
668
684
 
669
685
  @cached_property
670
- def metadata(self) -> Dict[str, List[str]]:
686
+ def metadata(self) -> pd.DataFrame:
671
687
  """Get the metadata (facets) for the current databrowser query.
672
688
 
673
689
  You can retrieve all information that is associated with your current
@@ -685,20 +701,34 @@ class databrowser:
685
701
  db = databrowser(uri="slk:///arch/*/CPC/*")
686
702
  print(db.metadata)
687
703
 
704
+ To retrieve only a limited set of metadata you can
705
+ specify the facets you are interested in:
706
+
707
+ .. code-block:: python
708
+
709
+ from freva_client import databrowser
710
+ db = databrowser(
711
+ "era5*",
712
+ realm="atmos",
713
+ )
714
+ print(db.metadata[['project', 'model', 'experiment']])
715
+
688
716
 
689
717
  """
690
- return {
691
- k: v[::2]
692
- for (k, v) in self._facet_search(extended_search=True).items()
693
- }
718
+ return (
719
+ pd.DataFrame([
720
+ (k, v[::2])
721
+ for k, v in self._facet_search(extended_search=True).items()
722
+ ], columns=["facet", "values"])
723
+ .explode("values")
724
+ .groupby("facet")["values"].apply(list)
725
+ )
694
726
 
695
727
  @classmethod
696
728
  def metadata_search(
697
729
  cls,
698
730
  *facets: str,
699
- flavour: Literal[
700
- "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
701
- ] = "freva",
731
+ flavour: Optional[str] = None,
702
732
  time: Optional[str] = None,
703
733
  host: Optional[str] = None,
704
734
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -708,7 +738,7 @@ class databrowser:
708
738
  fail_on_error: bool = False,
709
739
  extended_search: bool = False,
710
740
  **search_keys: Union[str, List[str]],
711
- ) -> Dict[str, List[str]]:
741
+ ) -> pd.DataFrame:
712
742
  """Search for data attributes (facets) in the databrowser.
713
743
 
714
744
  The method queries the databrowser for available search facets (keys)
@@ -816,6 +846,16 @@ class databrowser:
816
846
  res = databrowser.metadata_search(file="/arch/*CPC/*")
817
847
  print(res)
818
848
 
849
+ Return only specific facets: for example project and realm:
850
+
851
+ .. code-block:: python
852
+
853
+ from freva_client import databrowser
854
+ selected = databrowser.metadata_search(
855
+ "era5*",
856
+ )[['project', 'realm']]
857
+ print(selected)
858
+
819
859
  Sometimes you don't exactly know the exact names of the search keys and
820
860
  want retrieve all file objects that match a certain category. For
821
861
  example for getting all ocean reanalysis datasets you can apply the
@@ -857,12 +897,18 @@ class databrowser:
857
897
  stream_zarr=False,
858
898
  **search_keys,
859
899
  )
860
- return {
861
- k: v[::2]
862
- for (k, v) in this._facet_search(
863
- extended_search=extended_search
864
- ).items()
865
- }
900
+ return (
901
+ pd.DataFrame(
902
+ [
903
+ (k, v[::2])
904
+ for k, v in this._facet_search(
905
+ extended_search=extended_search
906
+ ).items()
907
+ ], columns=["facet", "values"]
908
+ )
909
+ .explode("values")
910
+ .groupby("facet")["values"].apply(list)
911
+ )
866
912
 
867
913
  @classmethod
868
914
  def overview(cls, host: Optional[str] = None) -> str:
@@ -894,9 +940,12 @@ class databrowser:
894
940
  print(databrowser.overview())
895
941
  """
896
942
  overview = Config(host).overview.copy()
943
+ note = overview.pop("Note", None)
944
+ if note:
945
+ overview["Note"] = note
897
946
  overview["Available search flavours"] = overview.pop("flavours")
898
947
  overview["Search attributes by flavour"] = overview.pop("attributes")
899
- return yaml.safe_dump(overview)
948
+ return yaml.safe_dump(overview, sort_keys=False)
900
949
 
901
950
  @property
902
951
  def url(self) -> str:
@@ -1038,8 +1087,6 @@ class databrowser:
1038
1087
  if result is not None:
1039
1088
  response_data = result.json()
1040
1089
  status_message = response_data.get("status")
1041
- else:
1042
- raise ValueError("Failed to add user data")
1043
1090
  pprint(f"[b][green]{status_message}[green][b]")
1044
1091
  else:
1045
1092
  raise ValueError("No metadata generated from the input data.")
@@ -1052,11 +1099,153 @@ class databrowser:
1052
1099
  )
1053
1100
 
1054
1101
  result = this._request("DELETE", url, data=metadata, headers=headers)
1055
-
1056
- if result is None:
1057
- raise ValueError("Failed to delete user data")
1058
1102
  pprint("[b][green]User data deleted successfully[green][b]")
1059
1103
 
1104
+ @classmethod
1105
+ def flavour(
1106
+ cls,
1107
+ action: Literal["add", "delete", "list"],
1108
+ name: Optional[str] = None,
1109
+ mapping: Optional[Dict[str, str]] = None,
1110
+ is_global: bool = False,
1111
+ host: Optional[str] = None,
1112
+ fail_on_error: bool = False,
1113
+ ) -> Union[None, Dict[str, Any]]:
1114
+ """Manage custom flavours in the databrowser system.
1115
+
1116
+ This method allows user to add, delete, or list custom flavours that
1117
+ define how search facets are mapped to different Data Reference Syntax
1118
+ (DRS) standards.
1119
+
1120
+ Parameters
1121
+ ~~~~~~~~~~
1122
+ action : Literal["add", "delete", "list"]
1123
+ The action to perform: "add" to create a new flavour, "delete"
1124
+ to remove an existing flavour, or "list" to retrieve all available
1125
+ custom flavours.
1126
+ name : str, optional
1127
+ The name of the flavour to add or delete (required for "add" and
1128
+ "delete" actions).
1129
+ mapping : Dict[str, str], optional
1130
+ A dictionary mapping facet names to their corresponding values in
1131
+ the new flavour (required for "add" action).
1132
+ is_global : bool, default: False
1133
+ Whether to make the flavour available to all users (requires admin
1134
+ privileges) or just the current user.
1135
+ host : str, optional
1136
+ Override the host name of the databrowser server. This is usually
1137
+ the url where the freva web site can be found. Such as
1138
+ www.freva.dkrz.de. By default no host name is given and the host
1139
+ name will be taken from the freva config file.
1140
+ fail_on_error : bool, optional
1141
+ Make the call fail if the connection to the databrowser could not
1142
+ be established.
1143
+
1144
+ Returns
1145
+ ~~~~~~~
1146
+ Union[None, List[Dict[str, Any]]]
1147
+ For "list" action, returns a list of dictionaries containing flavour
1148
+ information. For "add" and "delete" actions, returns None.
1149
+
1150
+ Raises
1151
+ ~~~~~~
1152
+ ValueError
1153
+ If the operation fails, required parameters are missing, or the
1154
+ flavour name conflicts with built-in flavours.
1155
+
1156
+ Example
1157
+ ~~~~~~~
1158
+
1159
+ Adding a custom flavour:
1160
+
1161
+ .. code-block:: python
1162
+
1163
+ from freva_client import databrowser
1164
+ databrowser.flavour(
1165
+ action="add",
1166
+ name="klimakataster",
1167
+ mapping={"project": "Projekt", "model": "Modell"},
1168
+ is_global=False
1169
+ )
1170
+
1171
+ Listing all custom flavours:
1172
+
1173
+ .. code-block:: python
1174
+
1175
+ from freva_client import databrowser
1176
+ flavours = databrowser.flavour(action="list")
1177
+ print(flavours)
1178
+
1179
+ Deleting a custom flavour:
1180
+
1181
+ .. code-block:: python
1182
+
1183
+ from freva_client import databrowser
1184
+ databrowser.flavour(action="delete", name="klimakataster")
1185
+ """
1186
+ this = cls(
1187
+ host=host,
1188
+ fail_on_error=fail_on_error,
1189
+ )
1190
+ cfg = Config(host)
1191
+ if action == "add":
1192
+ token = this._auth.authenticate(config=this._cfg)
1193
+ headers = {"Authorization": f"Bearer {token['access_token']}"}
1194
+ if not name or not mapping:
1195
+ raise ValueError(
1196
+ "Both 'name' and 'mapping' are required for add action"
1197
+ )
1198
+ payload = {
1199
+ "flavour_name": name,
1200
+ "mapping": mapping,
1201
+ "is_global": is_global,
1202
+ }
1203
+ result = this._request(
1204
+ "POST",
1205
+ f"{this._cfg.databrowser_url}/flavours",
1206
+ data=payload,
1207
+ headers=headers,
1208
+ )
1209
+ if result is not None:
1210
+ msg = result.json().get("status", "Flavour added successfully")
1211
+ pprint(f"[b][green] {msg} [/green][/b]")
1212
+
1213
+ elif action == "delete":
1214
+ token = this._auth.authenticate(config=this._cfg)
1215
+ headers = {"Authorization": f"Bearer {token['access_token']}"}
1216
+ if not name:
1217
+ raise ValueError("'name' is required for delete action")
1218
+ params = {"is_global": "true" if is_global else "false"}
1219
+
1220
+ result = this._request(
1221
+ "DELETE",
1222
+ f"{this._cfg.databrowser_url}/flavours/{name}",
1223
+ headers=headers,
1224
+ params=params,
1225
+ )
1226
+ if result is not None:
1227
+ msg = result.json().get("status", "Flavour deleted successfully")
1228
+ pprint(f"[b][green] {msg} [/green][/b]")
1229
+ elif action == "list":
1230
+ headers = cast(Dict[str, str], this._cfg._get_headers)
1231
+ flavours: List[Dict[str, Any]] = []
1232
+ result = this._request(
1233
+ "GET", f"{cfg.databrowser_url}/flavours", headers=headers
1234
+ )
1235
+ if result is not None:
1236
+ flavours = result.json().get("flavours", [])
1237
+ result_data: Dict[str, Any] = {
1238
+ "flavours": flavours,
1239
+ }
1240
+ if not headers:
1241
+ result_data["Note"] = (
1242
+ "Displaying only global flavours. "
1243
+ "Authenticate to see custom user flavours as well."
1244
+ )
1245
+
1246
+ return result_data
1247
+ return None
1248
+
1060
1249
  def _request(
1061
1250
  self,
1062
1251
  method: Literal["GET", "POST", "PUT", "PATCH", "DELETE"],
@@ -1065,10 +1254,23 @@ class databrowser:
1065
1254
  **kwargs: Any,
1066
1255
  ) -> Optional[requests.models.Response]:
1067
1256
  """Request method to handle CRUD operations (GET, POST, PUT, PATCH, DELETE)."""
1257
+ self._cfg.validate_server
1068
1258
  method_upper = method.upper()
1069
1259
  timeout = kwargs.pop("timeout", 30)
1070
1260
  params = kwargs.pop("params", {})
1071
1261
  stream = kwargs.pop("stream", False)
1262
+ kwargs.setdefault("headers", {})
1263
+
1264
+ if (
1265
+ requires_authentication(
1266
+ self._flavour,
1267
+ self._stream_zarr,
1268
+ self._cfg.databrowser_url
1269
+ )
1270
+ and "Authorization" not in kwargs["headers"]
1271
+ ):
1272
+ token = self._auth.authenticate(config=self._cfg)
1273
+ kwargs["headers"]["Authorization"] = f"Bearer {token['access_token']}"
1072
1274
 
1073
1275
  logger.debug(
1074
1276
  "%s request to %s with data: %s and parameters: %s",
@@ -1097,8 +1299,23 @@ class databrowser:
1097
1299
  except (
1098
1300
  requests.exceptions.ConnectionError,
1099
1301
  requests.exceptions.HTTPError,
1302
+ requests.exceptions.InvalidURL,
1100
1303
  ) as error:
1101
- msg = f"{method_upper} request failed with {error}"
1304
+ server_msg = ""
1305
+ if hasattr(error, 'response') and error.response is not None:
1306
+ try:
1307
+ error_data = error.response.json()
1308
+ error_var = {error_data.get(
1309
+ 'detail', error_data.get(
1310
+ 'message', error_data.get('error', '')
1311
+ )
1312
+ )}
1313
+ server_msg = (
1314
+ f" - {error_var}"
1315
+ )
1316
+ except Exception:
1317
+ pass
1318
+ msg = f"{method_upper} request failed with: {error}{server_msg}"
1102
1319
  if self._fail_on_error:
1103
1320
  raise ValueError(msg) from None
1104
1321
  logger.warning(msg)
@@ -8,6 +8,7 @@ import time
8
8
  from pathlib import Path
9
9
  from typing import Literal, Optional, TypedDict, Union, cast
10
10
 
11
+ import requests
11
12
  from appdirs import user_cache_dir
12
13
 
13
14
  TOKEN_EXPIRY_BUFFER = 60 # seconds
@@ -238,3 +239,40 @@ def wait_for_port(host: str, port: int, timeout: float = 5.0) -> None:
238
239
  raise TimeoutError(
239
240
  f"Port {port} on {host} did not open within {timeout} seconds."
240
241
  )
242
+
243
+
244
+ def requires_authentication(
245
+ flavour: Optional[str],
246
+ zarr: bool = False,
247
+ databrowser_url: Optional[str] = None
248
+ ) -> bool:
249
+ """Check if authentication is required.
250
+
251
+ Parameters
252
+ ----------
253
+ flavour : str or None
254
+ The data flavour to check.
255
+ zarr : bool, default: False
256
+ Whether the request is for zarr data.
257
+ databrowser_url : str or None
258
+ The URL of the databrowser to query for available flavours.
259
+ If None, the function will skip querying and assume authentication
260
+ is required for non-default flavours.
261
+ """
262
+ if zarr:
263
+ return True
264
+ if flavour in {"freva", "cmip6", "cmip5", "cordex", "user", None}:
265
+ return False
266
+ try:
267
+ response = requests.get(f"{databrowser_url}/flavours", timeout=30)
268
+ response.raise_for_status()
269
+ result = {"flavours": response.json().get("flavours", [])}
270
+ if "flavours" in result:
271
+ global_flavour_names = {
272
+ f["flavour_name"] for f in result["flavours"]
273
+ }
274
+ return flavour not in global_flavour_names
275
+ except Exception:
276
+ pass
277
+
278
+ return True