freva-client 2410.0.0b1__tar.gz → 2411.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of freva-client might be problematic. Click here for more details.

Files changed (20) hide show
  1. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/PKG-INFO +3 -2
  2. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/pyproject.toml +1 -0
  3. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/__init__.py +1 -1
  4. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/databrowser_cli.py +19 -17
  5. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/query.py +153 -142
  6. freva_client-2411.0.0/src/freva_client/utils/databrowser_utils.py +368 -0
  7. freva_client-2410.0.0b1/src/freva_client/utils/databrowser_utils.py +0 -177
  8. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/MANIFEST.in +0 -0
  9. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/README.md +0 -0
  10. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/assets/share/freva/freva.toml +0 -0
  11. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/__main__.py +0 -0
  12. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/auth.py +0 -0
  13. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/__init__.py +0 -0
  14. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/auth_cli.py +0 -0
  15. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/cli_app.py +0 -0
  16. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/cli_parser.py +0 -0
  17. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/cli/cli_utils.py +0 -0
  18. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/py.typed +0 -0
  19. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/utils/__init__.py +0 -0
  20. {freva_client-2410.0.0b1 → freva_client-2411.0.0}/src/freva_client/utils/logger.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: freva-client
3
- Version: 2410.0.0b1
3
+ Version: 2411.0.0
4
4
  Summary: Search for climate data based on key-value pairs
5
5
  Author-email: "DKRZ, Clint" <freva@dkrz.de>
6
6
  Requires-Python: >=3.8
@@ -22,6 +22,7 @@ Requires-Dist: authlib
22
22
  Requires-Dist: requests
23
23
  Requires-Dist: intake_esm
24
24
  Requires-Dist: rich
25
+ Requires-Dist: setuptools
25
26
  Requires-Dist: tomli
26
27
  Requires-Dist: typer
27
28
  Requires-Dist: tox ; extra == "dev"
@@ -28,6 +28,7 @@ dependencies = [
28
28
  "requests",
29
29
  "intake_esm",
30
30
  "rich",
31
+ "setuptools",
31
32
  "tomli",
32
33
  "typer",
33
34
  ]
@@ -17,5 +17,5 @@ need to apply data analysis plugins, please visit the
17
17
  from .auth import authenticate
18
18
  from .query import databrowser
19
19
 
20
- __version__ = "2410.0.0-beta1"
20
+ __version__ = "2411.0.0"
21
21
  __all__ = ["authenticate", "databrowser", "__version__"]
@@ -10,6 +10,7 @@ from tempfile import NamedTemporaryFile
10
10
  from typing import Dict, List, Literal, Optional, Union, cast
11
11
 
12
12
  import typer
13
+ import xarray as xr
13
14
  from freva_client import databrowser
14
15
  from freva_client.auth import Auth
15
16
  from freva_client.utils import exception_handler, logger
@@ -42,6 +43,7 @@ class Flavours(str, Enum):
42
43
  cmip5: str = "cmip5"
43
44
  cordex: str = "cordex"
44
45
  nextgems: str = "nextgems"
46
+ user: str = "user"
45
47
 
46
48
 
47
49
  class TimeSelect(str, Enum):
@@ -187,7 +189,7 @@ def metadata_search(
187
189
  time=time or "",
188
190
  time_select=cast(Literal["file", "flexible", "strict"], time_select.value),
189
191
  flavour=cast(
190
- Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
192
+ Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
191
193
  flavour.value,
192
194
  ),
193
195
  host=host,
@@ -310,7 +312,7 @@ def data_search(
310
312
  time=time or "",
311
313
  time_select=cast(Literal["file", "flexible", "strict"], time_select),
312
314
  flavour=cast(
313
- Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
315
+ Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
314
316
  flavour.value,
315
317
  ),
316
318
  uniq_key=cast(Literal["uri", "file"], uniq_key.value),
@@ -441,7 +443,7 @@ def intake_catalogue(
441
443
  time=time or "",
442
444
  time_select=cast(Literal["file", "flexible", "strict"], time_select),
443
445
  flavour=cast(
444
- Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
446
+ Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
445
447
  flavour.value,
446
448
  ),
447
449
  uniq_key=cast(Literal["uri", "file"], uniq_key.value),
@@ -564,7 +566,7 @@ def count_values(
564
566
  time=time or "",
565
567
  time_select=cast(Literal["file", "flexible", "strict"], time_select),
566
568
  flavour=cast(
567
- Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
569
+ Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
568
570
  flavour.value,
569
571
  ),
570
572
  host=host,
@@ -580,7 +582,7 @@ def count_values(
580
582
  time=time or "",
581
583
  time_select=cast(Literal["file", "flexible", "strict"], time_select),
582
584
  flavour=cast(
583
- Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"],
585
+ Literal["freva", "cmip6", "cmip5", "cordex", "nextgems", "user"],
584
586
  flavour.value,
585
587
  ),
586
588
  host=host,
@@ -611,7 +613,6 @@ databrowser_app.add_typer(user_data_app, name="user-data")
611
613
  @user_data_app.command(name="add", help="Add user data into the databrowser.")
612
614
  @exception_handler
613
615
  def user_data_add(
614
- username: str = typer.Argument(..., help="Username of the data owner"),
615
616
  paths: List[str] = typer.Option(
616
617
  ...,
617
618
  "--path",
@@ -621,8 +622,8 @@ def user_data_add(
621
622
  facets: Optional[List[str]] = typer.Option(
622
623
  None,
623
624
  "--facet",
624
- "-f",
625
- help="Facet key-value pairs for metadata in the format key=value.",
625
+ help="Key-value metadata pairs to categorize the user"
626
+ "input data in the format key=value.",
626
627
  ),
627
628
  host: Optional[str] = typer.Option(
628
629
  None,
@@ -656,22 +657,24 @@ def user_data_add(
656
657
  key, value = facet.split("=", 1)
657
658
  facet_dict[key] = value
658
659
 
659
- logger.debug(
660
- f"Adding user data for {username} with paths {paths} and facets {facet_dict}"
660
+ logger.debug(f"Adding user data with paths {paths} and facets {facet_dict}")
661
+ databrowser.userdata(
662
+ action="add",
663
+ userdata_items=cast(List[Union[str, xr.Dataset]], paths),
664
+ metadata=facet_dict,
665
+ host=host
661
666
  )
662
- result.add_user_data(username=username, paths=paths, facets=facet_dict)
663
- logger.info("User data started crawling. Check the Databrowser to see the updates.")
664
667
 
665
668
 
666
669
  @user_data_app.command(name="delete", help="Delete user data from the databrowser.")
667
670
  @exception_handler
668
- def user_data_remove(
669
- username: str = typer.Argument(..., help="Username of the data owner"),
671
+ def user_data_delete(
670
672
  search_keys: List[str] = typer.Option(
671
673
  None,
672
674
  "--search-key",
673
675
  "-s",
674
- help="Search keys for the data to be deleted in the format key=value.",
676
+ help="Key-value metadata pairs to search and identify user data "
677
+ "for deletion in the format key=value.",
675
678
  ),
676
679
  host: Optional[str] = typer.Option(
677
680
  None,
@@ -705,5 +708,4 @@ def user_data_remove(
705
708
  raise typer.Exit(code=1)
706
709
  key, value = search_key.split("=", 1)
707
710
  search_key_dict[key] = value
708
- result.delete_user_data(username=username, search_keys=search_key_dict)
709
- logger.info("User data deleted successfully.")
711
+ databrowser.userdata(action="delete", metadata=search_key_dict, host=host)
@@ -1,5 +1,6 @@
1
1
  """Query climate data sets by using-key value pair search queries."""
2
2
 
3
+
3
4
  import sys
4
5
  from collections import defaultdict
5
6
  from fnmatch import fnmatch
@@ -8,6 +9,7 @@ from pathlib import Path
8
9
  from tempfile import NamedTemporaryFile
9
10
  from typing import (
10
11
  Any,
12
+ Collection,
11
13
  Dict,
12
14
  Iterator,
13
15
  List,
@@ -21,12 +23,13 @@ from typing import (
21
23
  import intake
22
24
  import intake_esm
23
25
  import requests
26
+ import xarray as xr
24
27
  import yaml
25
28
  from rich import print as pprint
26
29
 
27
30
  from .auth import Auth
28
31
  from .utils import logger
29
- from .utils.databrowser_utils import Config
32
+ from .utils.databrowser_utils import Config, UserDataHandler
30
33
 
31
34
  __all__ = ["databrowser"]
32
35
 
@@ -206,7 +209,9 @@ class databrowser:
206
209
  self,
207
210
  *facets: str,
208
211
  uniq_key: Literal["file", "uri"] = "file",
209
- flavour: Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"] = "freva",
212
+ flavour: Literal[
213
+ "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
214
+ ] = "freva",
210
215
  time: Optional[str] = None,
211
216
  host: Optional[str] = None,
212
217
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -266,7 +271,7 @@ class databrowser:
266
271
  query_url = self._cfg.zarr_loader_url
267
272
  token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
268
273
  headers = {"Authorization": f"Bearer {token['access_token']}"}
269
- result = self._get(query_url, headers=headers, stream=True)
274
+ result = self._request("GET", query_url, headers=headers, stream=True)
270
275
  if result is not None:
271
276
  try:
272
277
  for res in result.iter_lines():
@@ -328,7 +333,7 @@ class databrowser:
328
333
 
329
334
 
330
335
  """
331
- result = self._get(self._cfg.metadata_url)
336
+ result = self._request("GET", self._cfg.metadata_url)
332
337
  if result:
333
338
  return cast(int, result.json().get("total_count", 0))
334
339
  return 0
@@ -342,7 +347,7 @@ class databrowser:
342
347
  url = self._cfg.zarr_loader_url
343
348
  kwargs["headers"] = {"Authorization": f"Bearer {token['access_token']}"}
344
349
  kwargs["params"] = {"catalogue-type": "intake"}
345
- result = self._get(url, **kwargs)
350
+ result = self._request("GET", url, **kwargs)
346
351
  if result is None:
347
352
  raise ValueError("No results found")
348
353
 
@@ -377,7 +382,7 @@ class databrowser:
377
382
  .. execute_code::
378
383
 
379
384
  from freva_client import databrowser
380
- db = databrowser(dataset="cmip6-fs", stream_zarr=True)
385
+ db = databrowser(dataset="cmip6-hsm", stream_zarr=True)
381
386
  cat = db.intake_catalogue()
382
387
  print(cat.df)
383
388
  """
@@ -389,7 +394,9 @@ class databrowser:
389
394
  def count_values(
390
395
  cls,
391
396
  *facets: str,
392
- flavour: Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"] = "freva",
397
+ flavour: Literal[
398
+ "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
399
+ ] = "freva",
393
400
  time: Optional[str] = None,
394
401
  host: Optional[str] = None,
395
402
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -438,6 +445,7 @@ class databrowser:
438
445
  Select all versions and not just the latest version (default).
439
446
  fail_on_error: bool, default: False
440
447
  Make the call fail if the connection to the databrowser could not
448
+ be established.
441
449
  **search_keys: str
442
450
  The search constraints to be applied in the data search. If not given
443
451
  the whole dataset will be queried.
@@ -519,7 +527,9 @@ class databrowser:
519
527
  def metadata_search(
520
528
  cls,
521
529
  *facets: str,
522
- flavour: Literal["freva", "cmip6", "cmip5", "cordex", "nextgems"] = "freva",
530
+ flavour: Literal[
531
+ "freva", "cmip6", "cmip5", "cordex", "nextgems", "user"
532
+ ] = "freva",
523
533
  time: Optional[str] = None,
524
534
  host: Optional[str] = None,
525
535
  time_select: Literal["flexible", "strict", "file"] = "flexible",
@@ -571,6 +581,7 @@ class databrowser:
571
581
  name will be taken from the freva config file.
572
582
  fail_on_error: bool, default: False
573
583
  Make the call fail if the connection to the databrowser could not
584
+ be established.
574
585
  **search_keys: str, list[str]
575
586
  The facets to be applied in the data search. If not given
576
587
  the whole dataset will be queried.
@@ -699,7 +710,7 @@ class databrowser:
699
710
  self,
700
711
  extended_search: bool = False,
701
712
  ) -> Dict[str, List[str]]:
702
- result = self._get(self._cfg.metadata_url)
713
+ result = self._request("GET", self._cfg.metadata_url)
703
714
  if result is None:
704
715
  return {}
705
716
  data = result.json()
@@ -709,172 +720,172 @@ class databrowser:
709
720
  constraints = data["primary_facets"]
710
721
  return {f: v for f, v in data["facets"].items() if f in constraints}
711
722
 
712
- def add_user_data(
713
- self, username: str, paths: List[str], facets: Dict[str, str]
723
+ @classmethod
724
+ def userdata(
725
+ cls,
726
+ action: Literal["add", "delete"],
727
+ userdata_items: Optional[List[Union[str, xr.Dataset]]] = None,
728
+ metadata: Optional[Dict[str, str]] = None,
729
+ host: Optional[str] = None,
730
+ fail_on_error: bool = False,
714
731
  ) -> None:
715
- """Add user data to the databrowser.
732
+ """Add or delete user data in the databrowser system.
716
733
 
717
- Via this functionality, user would be able to add data to the databrowser.
718
- It accepts file paths and metadata facets to categorize and store the user's
719
- data.
734
+ Manage user data in the databrowser system by adding new data or
735
+ deleting existing data.
736
+
737
+ For the "``add``" action, the user can provide data items (file paths
738
+ or xarray datasets) along with metadata (key-value pairs) to
739
+ categorize and organize the data.
740
+
741
+ For the "``delete``" action, the user provides metadata as search
742
+ criteria to identify and remove the existing data from the
743
+ system.
720
744
 
721
745
  Parameters
722
746
  ~~~~~~~~~~
723
- username: str
724
- The username of user.
725
- paths: list[str]
726
- A list of paths to the data files that should be uploaded or cataloged.
727
- facets: dict[str, str]
728
- A dictionary containing metadata facets (key-value pairs) to describe the
729
- data.
730
-
731
- Returns
732
- ~~~~~~~~
733
- None
734
- If the operation is successful, no return value is provided.
747
+ action : Literal["add", "delete"]
748
+ The action to perform: "add" to add new data, or "delete"
749
+ to remove existing data.
750
+ userdata_items : List[Union[str, xr.Dataset]], optional
751
+ A list of user file paths or xarray datasets to add to the
752
+ databrowser (required for "add").
753
+ metadata : Dict[str, str], optional
754
+ Key-value metadata pairs to categorize the data (for "add")
755
+ or search and identify data for
756
+ deletion (for "delete").
757
+ host : str, optional
758
+ Override the host name of the databrowser server. This is usually
759
+ the url where the freva web site can be found. Such as
760
+ www.freva.dkrz.de. By default no host name is given and the host
761
+ name will be taken from the freva config file.
762
+ fail_on_error : bool, optional
763
+ Make the call fail if the connection to the databrowser could not
764
+ be established.
735
765
 
736
766
  Raises
737
- ~~~~~~~
767
+ ~~~~~~
738
768
  ValueError
739
- If the operation fails to add the user data.
769
+ If the operation fails or required parameters are missing
770
+ for the specified action.
771
+ FileNotFoundError
772
+ If no user data is provided for the "add" action.
740
773
 
741
774
  Example
742
775
  ~~~~~~~
776
+
777
+ Adding user data:
778
+
743
779
  .. execute_code::
744
780
 
745
781
  from freva_client import authenticate, databrowser
782
+ import xarray as xr
746
783
  token_info = authenticate(username="janedoe")
747
- db = databrowser()
748
- db.add_user_data(
749
- "janedoe",
750
- ["."],
751
- {"project": "cmip5", "experiment": "something"}
784
+ filenames = (
785
+ "../freva-rest/src/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
786
+ "GERICS/NCC-NorESM1-M/rcp85/r1i1p1/GERICS-REMO2015/v1/3hr/pr/v20181212/*.nc"
787
+ )
788
+ filename1 = (
789
+ "../freva-rest/src/databrowser_api/mock/data/model/regional/cordex/output/EUR-11/"
790
+ "CLMcom/MPI-M-MPI-ESM-LR/historical/r0i0p0/CLMcom-CCLM4-8-17/v1/fx/orog/v20140515/"
791
+ "orog_EUR-11_MPI-M-MPI-ESM-LR_historical_r1i1p1_CLMcom-CCLM4-8-17_v1_fx.nc"
792
+ )
793
+ xarray_data = xr.open_dataset(filename1)
794
+ databrowser.userdata(
795
+ action="add",
796
+ userdata_items=[xarray_data, filenames],
797
+ metadata={"project": "cmip5", "experiment": "myFavExp"}
752
798
  )
753
- """
754
- url = f"{self._cfg.userdata_url}/{username}"
755
- token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
756
- headers = {"Authorization": f"Bearer {token['access_token']}"}
757
- params = {"paths": paths}
758
- if "username" in facets:
759
- del facets["username"]
760
- data = facets
761
- result = self._put(url, data=data, headers=headers, params=params)
762
-
763
- if result is None:
764
- raise ValueError("Failed to add user data")
765
-
766
- def delete_user_data(self, username: str, search_keys: Dict[str, str]) -> None:
767
- """
768
- Delete user data from the databrowser.
769
-
770
- Uing this, user would be able to delete the user's data from the databrowser
771
- based on the provided search keys.
772
-
773
- Parameters
774
- ~~~~~~~~~~
775
- username: str
776
- The username associated with the data to be deleted.
777
- search_keys: dict[str, str]
778
- A dictionary containing the search keys to identify the data to be deleted.
779
799
 
780
- Returns
781
- ~~~~~~~~
782
- None
783
- If the operation is successful, no return value is provided.
800
+ Deleting user data:
784
801
 
785
- Raises
786
- ~~~~~~~
787
- ValueError
788
- If the operation fails to delete the user data.
789
-
790
- Example
791
- ~~~~~~~
792
802
  .. execute_code::
793
803
 
794
- from freva_client import databrowser, authenticate
804
+ from freva_client import authenticate, databrowser
795
805
  token_info = authenticate(username="janedoe")
796
- db = databrowser()
797
- db.delete_user_data(
798
- "janedoe",
799
- {"project": "cmip5", "experiment": "something"}
806
+ databrowser.userdata(
807
+ action="delete",
808
+ metadata={"project": "cmip5", "experiment": "myFavExp"}
800
809
  )
801
810
  """
802
- url = f"{self._cfg.userdata_url}/{username}"
803
- token = self._auth.check_authentication(auth_url=self._cfg.auth_url)
811
+ this = cls(
812
+ host=host,
813
+ fail_on_error=fail_on_error,
814
+ )
815
+ userdata_items = userdata_items or []
816
+ metadata = metadata or {}
817
+ url = f"{this._cfg.userdata_url}"
818
+ token = this._auth.check_authentication(auth_url=this._cfg.auth_url)
804
819
  headers = {"Authorization": f"Bearer {token['access_token']}"}
805
- data = search_keys
806
- result = self._delete(url, headers=headers, json=data)
807
- if result is None:
808
- raise ValueError("Failed to delete user data")
820
+ payload_metadata: dict[str, Collection[Collection[str]]] = {}
821
+
822
+ if action == "add":
823
+ user_data_handler = UserDataHandler(userdata_items)
824
+ if user_data_handler.user_metadata:
825
+ payload_metadata = {
826
+ "user_metadata": user_data_handler.user_metadata,
827
+ "facets": metadata,
828
+ }
829
+ result = this._request(
830
+ "POST", url, data=payload_metadata, headers=headers
831
+ )
832
+ if result is not None:
833
+ response_data = result.json()
834
+ status_message = response_data.get("status")
835
+ else:
836
+ raise ValueError("Failed to add user data")
837
+ pprint(f"[b][green]{status_message}[green][b]")
838
+ else:
839
+ raise ValueError("No metadata generated from the input data.")
809
840
 
810
- def _get(self, url: str, **kwargs: Any) -> Optional[requests.models.Response]:
811
- """Apply the get method to the databrowser."""
812
- logger.debug("Searching %s with parameters: %s", url, self._params)
813
- params = kwargs.pop("params", {})
814
- kwargs.setdefault("timeout", 30)
815
- try:
816
- res = requests.get(url, params={**self._params, **params}, **kwargs)
817
- res.raise_for_status()
818
- return res
819
- except KeyboardInterrupt:
820
- pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
821
- except (
822
- requests.exceptions.ConnectionError,
823
- requests.exceptions.HTTPError,
824
- ) as error:
825
- msg = f"Search request failed with {error}"
826
- if self._fail_on_error:
827
- raise ValueError(msg) from None
828
- logger.warning(msg)
829
- return None
841
+ if action == "delete":
842
+ if userdata_items:
843
+ logger.info(
844
+ "'userdata_items' are not needed for the 'delete'"
845
+ "action and will be ignored."
846
+ )
847
+
848
+ result = this._request("DELETE", url, data=metadata, headers=headers)
830
849
 
831
- def _put(
832
- self, url: str, data: Dict[str, Any], **kwargs: Any
850
+ if result is None:
851
+ raise ValueError("Failed to delete user data")
852
+ pprint("[b][green]User data deleted successfully[green][b]")
853
+
854
+ def _request(
855
+ self,
856
+ method: Literal["GET", "POST", "PUT", "PATCH", "DELETE"],
857
+ url: str,
858
+ data: Optional[Dict[str, Any]] = None,
859
+ **kwargs: Any
833
860
  ) -> Optional[requests.models.Response]:
834
- """Apply the PUT method to the databrowser."""
835
- logger.debug(
836
- "PUT request to %s with data: %s and parameters: %s",
837
- url,
838
- data,
839
- self._params,
840
- )
841
- kwargs.setdefault("timeout", 30)
861
+ """Request method to handle CRUD operations (GET, POST, PUT, PATCH, DELETE)."""
862
+ method_upper = method.upper()
863
+ timeout = kwargs.pop("timeout", 30)
842
864
  params = kwargs.pop("params", {})
843
- try:
844
- res = requests.put(
845
- url, json=data, params={**self._params, **params}, **kwargs
846
- )
847
- res.raise_for_status()
848
- return res
849
- except KeyboardInterrupt:
850
- pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
865
+ stream = kwargs.pop("stream", False)
851
866
 
852
- except (
853
- requests.exceptions.ConnectionError,
854
- requests.exceptions.HTTPError,
855
- ) as error:
856
- msg = f"adding user data request failed with {error}"
857
- if self._fail_on_error:
858
- raise ValueError(msg) from None
859
- logger.warning(msg)
860
- return None
867
+ logger.debug("%s request to %s with data: %s and parameters: %s",
868
+ method_upper, url, data, {**self._params, **params})
861
869
 
862
- def _delete(self, url: str, **kwargs: Any) -> Optional[requests.models.Response]:
863
- """Apply the DELETE method to the databrowser."""
864
- logger.debug("DELETE request to %s with parameters: %s", url, self._params)
865
- params = kwargs.pop("params", {})
866
- kwargs.setdefault("timeout", 30)
867
870
  try:
868
- res = requests.delete(url, params={**self._params, **params}, **kwargs)
869
- res.raise_for_status()
870
- return res
871
+ req = requests.Request(
872
+ method=method_upper,
873
+ url=url,
874
+ params={**self._params, **params},
875
+ json=None if method_upper in "GET" else data,
876
+ **kwargs
877
+ )
878
+ with requests.Session() as session:
879
+ prepared = session.prepare_request(req)
880
+ res = session.send(prepared, timeout=timeout, stream=stream)
881
+ res.raise_for_status()
882
+ return res
883
+
871
884
  except KeyboardInterrupt:
872
885
  pprint("[red][b]User interrupt: Exit[/red][/b]", file=sys.stderr)
873
- except (
874
- requests.exceptions.ConnectionError,
875
- requests.exceptions.HTTPError,
876
- ) as error:
877
- msg = f"DELETE request failed with {error}"
886
+ except (requests.exceptions.ConnectionError,
887
+ requests.exceptions.HTTPError) as error:
888
+ msg = f"{method_upper} request failed with {error}"
878
889
  if self._fail_on_error:
879
890
  raise ValueError(msg) from None
880
891
  logger.warning(msg)
@@ -0,0 +1,368 @@
1
+ """Various utilities for getting the databrowser working."""
2
+
3
+ import concurrent.futures
4
+ import os
5
+ import sys
6
+ import sysconfig
7
+ from configparser import ConfigParser, ExtendedInterpolation
8
+ from functools import cached_property
9
+ from pathlib import Path
10
+ from typing import (
11
+ Any,
12
+ Dict,
13
+ Iterator,
14
+ List,
15
+ Literal,
16
+ Optional,
17
+ Sequence,
18
+ Tuple,
19
+ Union,
20
+ cast,
21
+ )
22
+
23
+ import appdirs
24
+ import numpy as np
25
+ import requests
26
+ import tomli
27
+ import xarray as xr
28
+
29
+ from . import logger
30
+
31
+
32
+ class Config:
33
+ """Client config class.
34
+
35
+ This class is used for basic configuration of the databrowser
36
+ client.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ host: Optional[str] = None,
42
+ uniq_key: Literal["file", "uri"] = "file",
43
+ flavour: str = "freva",
44
+ ) -> None:
45
+ self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
46
+ self.auth_url = f"{self.get_api_url(host)}/auth/v2"
47
+ self.uniq_key = uniq_key
48
+ self._flavour = flavour
49
+
50
+ def _read_ini(self, path: Path) -> str:
51
+ """Read an ini file."""
52
+ ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
53
+ ini_parser.read_string(path.read_text())
54
+ config = ini_parser["evaluation_system"]
55
+ scheme, host = self._split_url(
56
+ config.get("databrowser.host") or config.get("solr.host")
57
+ )
58
+ host, _, port = (host or "").partition(":")
59
+ port = port or config.get("databrowser.port", "")
60
+ if port:
61
+ host = f"{host}:{port}"
62
+ return f"{scheme}://{host}"
63
+
64
+ def _read_toml(self, path: Path) -> str:
65
+ """Read a new style toml config file."""
66
+ try:
67
+ config = tomli.loads(path.read_text()).get("freva", {})
68
+ scheme, host = self._split_url(cast(str, config["host"]))
69
+ except (tomli.TOMLDecodeError, KeyError):
70
+ return ""
71
+ host, _, port = host.partition(":")
72
+ if port:
73
+ host = f"{host}:{port}"
74
+ return f"{scheme}://{host}"
75
+
76
+ def _read_config(self, path: Path, file_type: Literal["toml", "ini"]) -> str:
77
+ """Read the configuration."""
78
+ data_types = {"toml": self._read_toml, "ini": self._read_ini}
79
+ try:
80
+ return data_types[file_type](path)
81
+ except KeyError:
82
+ pass
83
+ return ""
84
+
85
+ @cached_property
86
+ def overview(self) -> Dict[str, Any]:
87
+ """Get an overview of the all databrowser flavours and search keys."""
88
+ try:
89
+ res = requests.get(f"{self.databrowser_url}/overview", timeout=15)
90
+ except requests.exceptions.ConnectionError:
91
+ raise ValueError(f"Could not connect to {self.databrowser_url}") from None
92
+ return cast(Dict[str, Any], res.json())
93
+
94
+ def _get_databrowser_host_from_config(self) -> str:
95
+ """Get the config file order."""
96
+
97
+ eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
98
+ freva_config = Path(
99
+ os.environ.get("FREVA_CONFIG")
100
+ or Path(self.get_dirs(user=False)) / "freva.toml"
101
+ )
102
+ paths: Dict[Path, Literal["toml", "ini"]] = {
103
+ Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
104
+ Path(self.get_dirs(user=True)) / "freva.toml": "toml",
105
+ freva_config: "toml",
106
+ Path(os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf): "ini",
107
+ }
108
+ for config_path, config_type in paths.items():
109
+ if config_path.is_file():
110
+ host = self._read_config(config_path, config_type)
111
+ if host:
112
+ return host
113
+ raise ValueError(
114
+ "No databrowser host configured, please use a"
115
+ " configuration defining a databrowser host or"
116
+ " set a host name using the `host` key"
117
+ )
118
+
119
+ @cached_property
120
+ def flavour(self) -> str:
121
+ """Get the flavour."""
122
+ flavours = self.overview.get("flavours", [])
123
+ if self._flavour not in flavours:
124
+ raise ValueError(
125
+ f"Search {self._flavour} not available, select from"
126
+ f" {','.join(flavours)}"
127
+ )
128
+ return self._flavour
129
+
130
+ @property
131
+ def search_url(self) -> str:
132
+ """Define the data search endpoint."""
133
+ return f"{self.databrowser_url}/data_search/{self.flavour}/{self.uniq_key}"
134
+
135
+ @property
136
+ def zarr_loader_url(self) -> str:
137
+ """Define the url for getting zarr files."""
138
+ return f"{self.databrowser_url}/load/{self.flavour}/"
139
+
140
+ @property
141
+ def intake_url(self) -> str:
142
+ """Define the url for creating intake catalogues."""
143
+ return f"{self.databrowser_url}/intake_catalogue/{self.flavour}/{self.uniq_key}"
144
+
145
+ @property
146
+ def metadata_url(self) -> str:
147
+ """Define the endpoint for the metadata search."""
148
+ return (
149
+ f"{self.databrowser_url}/metadata_search/" f"{self.flavour}/{self.uniq_key}"
150
+ )
151
+
152
+ @staticmethod
153
+ def _split_url(url: str) -> Tuple[str, str]:
154
+ scheme, _, hostname = url.partition("://")
155
+ if not hostname:
156
+ hostname = scheme
157
+ scheme = ""
158
+ scheme = scheme or "http"
159
+ return scheme, hostname
160
+
161
+ def get_api_url(self, url: Optional[str]) -> str:
162
+ """Construct the databrowser url from a given hostname."""
163
+ url = url or self._get_databrowser_host_from_config()
164
+ scheme, hostname = self._split_url(url)
165
+ hostname, _, port = hostname.partition(":")
166
+ if port:
167
+ hostname = f"{hostname}:{port}"
168
+ hostname = hostname.partition("/")[0]
169
+ return f"{scheme}://{hostname}/api"
170
+
171
+ @staticmethod
172
+ def get_dirs(user: bool = True) -> Path:
173
+ """Get the 'scripts' and 'purelib' directories we'll install into.
174
+
175
+ This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
176
+ because some tests mock it out to install to a different location.
177
+ """
178
+ if user:
179
+ if (sys.platform == "darwin") and sysconfig.get_config_var(
180
+ "PYTHONFRAMEWORK"
181
+ ):
182
+ scheme = "osx_framework_user"
183
+ else:
184
+ scheme = f"{os.name}_user"
185
+ return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
186
+ # The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
187
+ # installing into a virtualenv
188
+ return Path(sysconfig.get_path("data")) / "share" / "freva"
189
+
190
+ @property
191
+ def userdata_url(self) -> str:
192
+ """Define the url for adding and deleting user-data."""
193
+ return f"{self.databrowser_url}/userdata"
194
+
195
+
196
+ class UserDataHandler:
197
+ """Class for processing user data.
198
+
199
+ This class is used for processing user data and extracting metadata
200
+ from the data files.
201
+ """
202
+ def __init__(self, userdata_items: List[Union[str, xr.Dataset]]) -> None:
203
+ self._suffixes = [".nc", ".nc4", ".grb", ".grib", ".zarr", "zar"]
204
+ self.user_metadata: List[Dict[str, Union[str, List[str], Dict[str, str]]]] = []
205
+ self._metadata_collection: List[Dict[str, Union[str, List[str]]]] = []
206
+ try:
207
+ self._executor = concurrent.futures.ThreadPoolExecutor(
208
+ max_workers=min(int(os.cpu_count() or 4), 15)
209
+ )
210
+ self._process_user_data(userdata_items)
211
+ finally:
212
+ self._executor.shutdown(wait=True)
213
+
214
+ def _gather_files(self, path: Path, pattern: str = "*") -> Iterator[Path]:
215
+ """Gather all valid files from directory and wildcard pattern."""
216
+ for item in path.rglob(pattern):
217
+ if item.is_file() and item.suffix in self._suffixes:
218
+ yield item
219
+
220
+ def _validate_user_data(
221
+ self,
222
+ user_data: Sequence[Union[str, xr.Dataset]],
223
+ ) -> Dict[str, Union[List[Path], List[xr.Dataset]]]:
224
+ validated_paths: List[Path] = []
225
+ validated_xarray_datasets: List[xr.Dataset] = []
226
+ for data in user_data:
227
+ if isinstance(data, (str, Path)):
228
+ path = Path(data).expanduser().absolute()
229
+ if path.is_dir():
230
+ validated_paths.extend(self._gather_files(path))
231
+ elif path.is_file() and path.suffix in self._suffixes:
232
+ validated_paths.append(path)
233
+ else:
234
+ validated_paths.extend(
235
+ self._gather_files(path.parent, pattern=path.name)
236
+ )
237
+ elif isinstance(data, xr.Dataset):
238
+ validated_xarray_datasets.append(data)
239
+
240
+ if not validated_paths and not validated_xarray_datasets:
241
+ raise FileNotFoundError("No valid file paths or xarray datasets found.")
242
+ return {
243
+ "validated_user_paths": validated_paths,
244
+ "validated_user_xrdatasets": validated_xarray_datasets,
245
+ }
246
+
247
+ def _process_user_data(self, userdata_items: List[Union[str, xr.Dataset]],
248
+ ) -> None:
249
+ """Process xarray datasets and file paths using thread pool."""
250
+ futures = []
251
+ validated_userdata: Dict[str, Union[List[Path], List[xr.Dataset]]] = \
252
+ self._validate_user_data(userdata_items)
253
+ if validated_userdata["validated_user_xrdatasets"]:
254
+ futures.append(
255
+ self._executor.submit(self._process_userdata_in_executor,
256
+ validated_userdata["validated_user_xrdatasets"])
257
+ )
258
+
259
+ if validated_userdata["validated_user_paths"]:
260
+ futures.append(
261
+ self._executor.submit(self._process_userdata_in_executor,
262
+ validated_userdata["validated_user_paths"])
263
+ )
264
+ for future in futures:
265
+ try:
266
+ future.result()
267
+ except Exception as e: # pragma: no cover
268
+ logger.error(f"Error processing batch: {e}")
269
+
270
+ def _process_userdata_in_executor(
271
+ self, validated_userdata: Union[List[Path], List[xr.Dataset]]
272
+ ) -> None:
273
+ for data in validated_userdata:
274
+ metadata = self._get_metadata(data)
275
+ if isinstance(metadata, Exception) or metadata == {}:
276
+ logger.warning("Error getting metadata: %s", metadata)
277
+ else:
278
+ self.user_metadata.append(metadata)
279
+
280
+ def _timedelta_to_cmor_frequency(self, dt: float) -> str:
281
+ for total_seconds, frequency in self._time_table.items():
282
+ if dt >= total_seconds:
283
+ return frequency
284
+ return "fx" # pragma: no cover
285
+
286
+ @property
287
+ def _time_table(self) -> dict[int, str]:
288
+ return {
289
+ 315360000: "dec", # Decade
290
+ 31104000: "yr", # Year
291
+ 2538000: "mon", # Month
292
+ 1296000: "sem", # Seasonal (half-year)
293
+ 84600: "day", # Day
294
+ 21600: "6h", # Six-hourly
295
+ 10800: "3h", # Three-hourly
296
+ 3600: "hr", # Hourly
297
+ 1: "subhr", # Sub-hourly
298
+ }
299
+
300
+ def _get_time_frequency(self, time_delta: int, freq_attr: str = "") -> str:
301
+ if freq_attr in self._time_table.values():
302
+ return freq_attr
303
+ return self._timedelta_to_cmor_frequency(time_delta)
304
+
305
+ def _get_metadata(
306
+ self, path: Union[os.PathLike[str], xr.Dataset]
307
+ ) -> Dict[str, Union[str, List[str], Dict[str, str]]]:
308
+ """Get metadata from a path or xarray dataset."""
309
+
310
+ try:
311
+ dset = (
312
+ path if isinstance(path, xr.Dataset)
313
+ else xr.open_mfdataset(str(path),
314
+ parallel=False,
315
+ use_cftime=True,
316
+ lock=False)
317
+ )
318
+ time_freq = dset.attrs.get("frequency", "")
319
+ data_vars = list(map(str, dset.data_vars))
320
+ coords = list(map(str, dset.coords))
321
+ try:
322
+ times = dset["time"].values[:]
323
+ except (KeyError, IndexError, TypeError):
324
+ times = np.array([])
325
+
326
+ except Exception as error:
327
+ logger.error("Failed to open data file %s: %s", str(path), error)
328
+ return {}
329
+ if len(times) > 0:
330
+ try:
331
+ time_str = f"[{times[0].isoformat()}Z TO {times[-1].isoformat()}Z]"
332
+ dt = abs((times[1] - times[0]).total_seconds()) if len(times) > 1 else 0
333
+ except Exception as non_cftime:
334
+ logger.info("The time var is not based on the cftime: %s", non_cftime)
335
+ time_str = (
336
+ f"[{np.datetime_as_string(times[0], unit='s')}Z TO "
337
+ f"{np.datetime_as_string(times[-1], unit='s')}Z]"
338
+ )
339
+ dt = (
340
+ abs((times[1] - times[0]).astype("timedelta64[s]").astype(int))
341
+ if len(times) > 1
342
+ else 0
343
+ )
344
+ else:
345
+ time_str = "fx"
346
+ dt = 0
347
+
348
+ variables = [
349
+ var
350
+ for var in data_vars
351
+ if var not in coords
352
+ and not any(
353
+ term in var.lower() for term in ["lon", "lat", "bnds", "x", "y"]
354
+ )
355
+ and var.lower() not in ["rotated_pole", "rot_pole"]
356
+ ]
357
+
358
+ _data: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
359
+ _data.setdefault("variable", variables[0])
360
+ _data.setdefault("time_frequency", self._get_time_frequency(dt, time_freq))
361
+ _data["time"] = time_str
362
+ _data.setdefault("cmor_table", _data["time_frequency"])
363
+ _data.setdefault("version", "")
364
+ if isinstance(path, Path):
365
+ _data["file"] = str(path)
366
+ if isinstance(path, xr.Dataset):
367
+ _data["file"] = str(dset.encoding["source"])
368
+ return _data
@@ -1,177 +0,0 @@
1
- """Various utilities for getting the databrowser working."""
2
-
3
- import os
4
- import sys
5
- import sysconfig
6
- from configparser import ConfigParser, ExtendedInterpolation
7
- from functools import cached_property
8
- from pathlib import Path
9
- from typing import Any, Dict, Literal, Optional, Tuple, cast
10
-
11
- import appdirs
12
- import requests
13
- import tomli
14
-
15
-
16
- class Config:
17
- """Client config class.
18
-
19
- This class is used for basic configuration of the databrowser
20
- client.
21
- """
22
-
23
- def __init__(
24
- self,
25
- host: Optional[str] = None,
26
- uniq_key: Literal["file", "uri"] = "file",
27
- flavour: str = "freva",
28
- ) -> None:
29
- self.databrowser_url = f"{self.get_api_url(host)}/databrowser"
30
- self.auth_url = f"{self.get_api_url(host)}/auth/v2"
31
- self.uniq_key = uniq_key
32
- self._flavour = flavour
33
-
34
- def _read_ini(self, path: Path) -> str:
35
- """Read an ini file."""
36
- ini_parser = ConfigParser(interpolation=ExtendedInterpolation())
37
- ini_parser.read_string(path.read_text())
38
- config = ini_parser["evaluation_system"]
39
- scheme, host = self._split_url(
40
- config.get("databrowser.host") or config.get("solr.host")
41
- )
42
- host, _, port = (host or "").partition(":")
43
- port = port or config.get("databrowser.port", "")
44
- if port:
45
- host = f"{host}:{port}"
46
- return f"{scheme}://{host}"
47
-
48
- def _read_toml(self, path: Path) -> str:
49
- """Read a new style toml config file."""
50
- try:
51
- config = tomli.loads(path.read_text()).get("freva", {})
52
- scheme, host = self._split_url(cast(str, config["host"]))
53
- except (tomli.TOMLDecodeError, KeyError):
54
- return ""
55
- host, _, port = host.partition(":")
56
- if port:
57
- host = f"{host}:{port}"
58
- return f"{scheme}://{host}"
59
-
60
- def _read_config(self, path: Path, file_type: Literal["toml", "ini"]) -> str:
61
- """Read the configuration."""
62
- data_types = {"toml": self._read_toml, "ini": self._read_ini}
63
- try:
64
- return data_types[file_type](path)
65
- except KeyError:
66
- pass
67
- return ""
68
-
69
- @cached_property
70
- def overview(self) -> Dict[str, Any]:
71
- """Get an overview of the all databrowser flavours and search keys."""
72
- try:
73
- res = requests.get(f"{self.databrowser_url}/overview", timeout=15)
74
- except requests.exceptions.ConnectionError:
75
- raise ValueError(f"Could not connect to {self.databrowser_url}") from None
76
- return cast(Dict[str, Any], res.json())
77
-
78
- def _get_databrowser_host_from_config(self) -> str:
79
- """Get the config file order."""
80
-
81
- eval_conf = self.get_dirs(user=False) / "evaluation_system.conf"
82
- freva_config = Path(
83
- os.environ.get("FREVA_CONFIG")
84
- or Path(self.get_dirs(user=False)) / "freva.toml"
85
- )
86
- paths: Dict[Path, Literal["toml", "ini"]] = {
87
- Path(appdirs.user_config_dir("freva")) / "freva.toml": "toml",
88
- Path(self.get_dirs(user=True)) / "freva.toml": "toml",
89
- freva_config: "toml",
90
- Path(os.environ.get("EVALUATION_SYSTEM_CONFIG_FILE") or eval_conf): "ini",
91
- }
92
- for config_path, config_type in paths.items():
93
- if config_path.is_file():
94
- host = self._read_config(config_path, config_type)
95
- if host:
96
- return host
97
- raise ValueError(
98
- "No databrowser host configured, please use a"
99
- " configuration defining a databrowser host or"
100
- " set a host name using the `host` key"
101
- )
102
-
103
- @cached_property
104
- def flavour(self) -> str:
105
- """Get the flavour."""
106
- flavours = self.overview.get("flavours", [])
107
- if self._flavour not in flavours:
108
- raise ValueError(
109
- f"Search {self._flavour} not available, select from"
110
- f" {','.join(flavours)}"
111
- )
112
- return self._flavour
113
-
114
- @property
115
- def search_url(self) -> str:
116
- """Define the data search endpoint."""
117
- return f"{self.databrowser_url}/data_search/{self.flavour}/{self.uniq_key}"
118
-
119
- @property
120
- def zarr_loader_url(self) -> str:
121
- """Define the url for getting zarr files."""
122
- return f"{self.databrowser_url}/load/{self.flavour}/"
123
-
124
- @property
125
- def intake_url(self) -> str:
126
- """Define the url for creating intake catalogues."""
127
- return f"{self.databrowser_url}/intake_catalogue/{self.flavour}/{self.uniq_key}"
128
-
129
- @property
130
- def metadata_url(self) -> str:
131
- """Define the endpoint for the metadata search."""
132
- return (
133
- f"{self.databrowser_url}/metadata_search/" f"{self.flavour}/{self.uniq_key}"
134
- )
135
-
136
- @staticmethod
137
- def _split_url(url: str) -> Tuple[str, str]:
138
- scheme, _, hostname = url.partition("://")
139
- if not hostname:
140
- hostname = scheme
141
- scheme = ""
142
- scheme = scheme or "http"
143
- return scheme, hostname
144
-
145
- def get_api_url(self, url: Optional[str]) -> str:
146
- """Construct the databrowser url from a given hostname."""
147
- url = url or self._get_databrowser_host_from_config()
148
- scheme, hostname = self._split_url(url)
149
- hostname, _, port = hostname.partition(":")
150
- if port:
151
- hostname = f"{hostname}:{port}"
152
- hostname = hostname.partition("/")[0]
153
- return f"{scheme}://{hostname}/api"
154
-
155
- @staticmethod
156
- def get_dirs(user: bool = True) -> Path:
157
- """Get the 'scripts' and 'purelib' directories we'll install into.
158
-
159
- This is now a thin wrapper around sysconfig.get_paths(). It's not inlined,
160
- because some tests mock it out to install to a different location.
161
- """
162
- if user:
163
- if (sys.platform == "darwin") and sysconfig.get_config_var(
164
- "PYTHONFRAMEWORK"
165
- ):
166
- scheme = "osx_framework_user"
167
- else:
168
- scheme = f"{os.name}_user"
169
- return Path(sysconfig.get_path("data", scheme)) / "share" / "freva"
170
- # The default scheme is 'posix_prefix' or 'nt', and should work for e.g.
171
- # installing into a virtualenv
172
- return Path(sysconfig.get_path("data")) / "share" / "freva"
173
-
174
- @property
175
- def userdata_url(self) -> str:
176
- """Define the url for adding and deleting user-data."""
177
- return f"{self.databrowser_url}/userdata"