esgvoc 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (43) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +0 -6
  3. esgvoc/api/data_descriptors/__init__.py +8 -0
  4. esgvoc/api/data_descriptors/archive.py +5 -0
  5. esgvoc/api/data_descriptors/citation_url.py +5 -0
  6. esgvoc/api/data_descriptors/experiment.py +2 -2
  7. esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
  8. esgvoc/api/data_descriptors/member_id.py +9 -0
  9. esgvoc/api/data_descriptors/regex.py +5 -0
  10. esgvoc/api/data_descriptors/vertical_label.py +2 -2
  11. esgvoc/api/project_specs.py +48 -130
  12. esgvoc/api/projects.py +185 -66
  13. esgvoc/apps/drs/generator.py +103 -85
  14. esgvoc/apps/drs/validator.py +22 -38
  15. esgvoc/apps/jsg/json_schema_generator.py +255 -130
  16. esgvoc/apps/jsg/templates/template.jinja +249 -0
  17. esgvoc/apps/test_cv/README.md +214 -0
  18. esgvoc/apps/test_cv/cv_tester.py +1368 -0
  19. esgvoc/apps/test_cv/example_usage.py +216 -0
  20. esgvoc/apps/vr/__init__.py +12 -0
  21. esgvoc/apps/vr/build_variable_registry.py +71 -0
  22. esgvoc/apps/vr/example_usage.py +60 -0
  23. esgvoc/apps/vr/vr_app.py +333 -0
  24. esgvoc/cli/config.py +671 -86
  25. esgvoc/cli/drs.py +39 -21
  26. esgvoc/cli/main.py +2 -0
  27. esgvoc/cli/test_cv.py +257 -0
  28. esgvoc/core/constants.py +10 -7
  29. esgvoc/core/data_handler.py +24 -22
  30. esgvoc/core/db/connection.py +7 -0
  31. esgvoc/core/db/project_ingestion.py +34 -9
  32. esgvoc/core/db/universe_ingestion.py +1 -2
  33. esgvoc/core/service/configuration/setting.py +192 -21
  34. esgvoc/core/service/data_merger.py +1 -1
  35. esgvoc/core/service/state.py +18 -2
  36. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -3
  37. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/RECORD +41 -30
  38. esgvoc/apps/jsg/cmip6_template.json +0 -74
  39. esgvoc/apps/jsg/cmip6plus_template.json +0 -74
  40. /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
  41. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
  42. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
  43. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  import esgvoc.core.logging_handler # noqa
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.1.1"
esgvoc/api/__init__.py CHANGED
@@ -1,8 +1,5 @@
1
1
  from esgvoc.api.project_specs import (
2
- DrsCollection,
3
- DrsConstant,
4
2
  DrsPart,
5
- DrsPartKind,
6
3
  DrsSpecification,
7
4
  DrsType,
8
5
  ProjectSpecs,
@@ -51,10 +48,7 @@ from esgvoc.api.universe import (
51
48
  )
52
49
 
53
50
  __all__ = [
54
- "DrsCollection",
55
- "DrsConstant",
56
51
  "DrsPart",
57
- "DrsPartKind",
58
52
  "DrsSpecification",
59
53
  "DrsType",
60
54
  "find_collections_in_project",
@@ -1,7 +1,9 @@
1
1
  from esgvoc.api.data_descriptors.activity import Activity
2
+ from esgvoc.api.data_descriptors.archive import Archive
2
3
  from esgvoc.api.data_descriptors.area_label import AreaLabel
3
4
  from esgvoc.api.data_descriptors.branded_suffix import BrandedSuffix
4
5
  from esgvoc.api.data_descriptors.branded_variable import BrandedVariable
6
+ from esgvoc.api.data_descriptors.citation_url import CitationUrl
5
7
  from esgvoc.api.data_descriptors.consortium import Consortium
6
8
  from esgvoc.api.data_descriptors.contact import Contact
7
9
  from esgvoc.api.data_descriptors.conventions import Convention
@@ -20,6 +22,7 @@ from esgvoc.api.data_descriptors.initialisation_index import InitialisationIndex
20
22
  from esgvoc.api.data_descriptors.institution import Institution
21
23
  from esgvoc.api.data_descriptors.known_branded_variable import KnownBrandedVariable
22
24
  from esgvoc.api.data_descriptors.license import License
25
+ from esgvoc.api.data_descriptors.member_id import MemberId
23
26
  from esgvoc.api.data_descriptors.mip_era import MipEra
24
27
  from esgvoc.api.data_descriptors.model_component import ModelComponent
25
28
  from esgvoc.api.data_descriptors.obs_type import ObsType
@@ -29,6 +32,7 @@ from esgvoc.api.data_descriptors.product import Product
29
32
  from esgvoc.api.data_descriptors.publication_status import PublicationStatus
30
33
  from esgvoc.api.data_descriptors.realisation_index import RealisationIndex
31
34
  from esgvoc.api.data_descriptors.realm import Realm
35
+ from esgvoc.api.data_descriptors.regex import Regex
32
36
  from esgvoc.api.data_descriptors.region import Region
33
37
  from esgvoc.api.data_descriptors.resolution import Resolution
34
38
  from esgvoc.api.data_descriptors.source import Source
@@ -87,5 +91,9 @@ DATA_DESCRIPTOR_CLASS_MAPPING: dict[str, type[DataDescriptor]] = {
87
91
  "title": Title,
88
92
  "contact": Contact,
89
93
  "region": Region,
94
+ "member_id": MemberId,
90
95
  "obs_type": ObsType, # obs4Mips
96
+ "regex": Regex,
97
+ "citation_url": CitationUrl,
98
+ "archive": Archive,
91
99
  }
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
+
3
+
4
+ class Archive(PlainTermDataDescriptor):
5
+ pass
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class CitationUrl(PatternTermDataDescriptor):
5
+ pass
@@ -20,8 +20,8 @@ class Experiment(PlainTermDataDescriptor):
20
20
  experiment: str
21
21
  required_model_components: list[str] | None
22
22
  additional_allowed_model_components: list[str] = Field(default_factory=list)
23
- start_year: int | None
24
- end_year: int | None
23
+ start_year: str | int | None
24
+ end_year: str | int | None
25
25
  min_number_yrs_per_sim: int | None
26
26
  parent_activity_id: list[str] | None
27
27
  parent_experiment_id: list[str] | None
@@ -1,7 +1,30 @@
1
+ from typing import Any, Dict, List, Optional
2
+
1
3
  from pydantic import Field
2
4
 
3
5
  from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
4
6
 
7
+ #
8
+ # class KnownBrandedVariable(PlainTermDataDescriptor):
9
+ # """
10
+ # A climate-related quantity or measurement, including information about sampling.
11
+ #
12
+ # The concept of a branded variable was introduced in CMIP7.
13
+ # A branded variable is composed of two parts.
14
+ # The first part is the root variable (see :py:class:`Variable`).
15
+ # The second is the suffix (see :py:class:`BrandedSuffix`).
16
+ #
17
+ # For further details on the development of branded variables,
18
+ # see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
19
+ # """
20
+ #
21
+ # description: str
22
+ # dimensions: list[str] = Field(default_factory=list)
23
+ # cell_methods: str
24
+ # variable: str
25
+ # label: str
26
+ #
27
+
5
28
 
6
29
  class KnownBrandedVariable(PlainTermDataDescriptor):
7
30
  """
@@ -16,8 +39,38 @@ class KnownBrandedVariable(PlainTermDataDescriptor):
16
39
  see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
17
40
  """
18
41
 
19
- description: str
20
- dimensions: list[str] = Field(default_factory=list)
21
- cell_methods: str
22
- variable: str
23
- label: str
42
+ # # ESGVoc required fields
43
+ # id: str = Field(description="Unique identifier, e.g., 'ta_tavg-p19-hxy-air'")
44
+ # type: str = Field(default="branded_variable", description="ESGVoc type identifier")
45
+ # drs_name: str = Field(description="DRS name, same as id")
46
+ # => already in PlainTermDataDescriptor
47
+
48
+ # CF Standard Name context (flattened from hierarchy)
49
+ cf_standard_name: str = Field(description="CF standard name, e.g., 'air_temperature'")
50
+ cf_units: str = Field(description="CF standard units, e.g., 'K'")
51
+ cf_sn_status: str = Field(description="CF standard name status, e.g., 'approved'")
52
+
53
+ # Variable Root context (flattened from hierarchy)
54
+ variable_root_name: str = Field(description="Variable root name, e.g., 'ta'")
55
+ var_def_qualifier: str = Field(default="", description="Variable definition qualifier")
56
+ branding_suffix_name: str = Field(description="Branding suffix, e.g., 'tavg-p19-hxy-air'")
57
+
58
+ # Variable metadata
59
+ description: str = Field(description="Human-readable description")
60
+ dimensions: List[str] = Field(description="NetCDF dimensions")
61
+ cell_methods: str = Field(default="", description="CF cell_methods attribute")
62
+ cell_measures: str = Field(default="", description="CF cell_measures attribute")
63
+ history: str = Field(default="", description="Processing history")
64
+ realm: str = Field(description="Earth system realm, e.g., 'atmos'")
65
+
66
+ # Label components (embedded, not references)
67
+ temporal_label: str = Field(description="Temporal label, e.g., 'tavg'")
68
+ vertical_label: str = Field(description="Vertical label, e.g., 'p19'")
69
+ horizontal_label: str = Field(description="Horizontal label, e.g., 'hxy'")
70
+ area_label: str = Field(description="Area label, e.g., 'air'")
71
+
72
+ # Status
73
+ bn_status: str = Field(description="Branded variable status, e.g., 'accepted'")
74
+
75
+ # Additional required fields from specifications
76
+ positive_direction: str = Field(default="", description="Positive direction for the variable")
@@ -0,0 +1,9 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
2
+
3
+
4
+ class MemberId(CompositeTermDataDescriptor):
5
+ """
6
+ The member_id uniquely identifies a specific model simulation within an experiment. It is created by combining the sub_experiment, which describes the setup or timing of the simulation (like a specific start year), and the variant_label, which details the configuration of the model (including initial conditions, physics, and forcings). Together, they form a code like s1960-r1i1p1f1. This allows users to distinguish between different ensemble members and understand how each run differs from others within the same experiment.
7
+ """
8
+
9
+ description: str
@@ -0,0 +1,5 @@
1
+ from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
2
+
3
+
4
+ class Regex(PatternTermDataDescriptor):
5
+ pass
@@ -1,7 +1,7 @@
1
- from esgvoc.api.data_descriptors.data_descriptor import PatternTermDataDescriptor
1
+ from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
2
2
 
3
3
 
4
- class VerticalLabel(PatternTermDataDescriptor):
4
+ class VerticalLabel(PlainTermDataDescriptor):
5
5
  """
6
6
  Vertical label.
7
7
 
@@ -1,7 +1,6 @@
1
1
  from enum import Enum
2
- from typing import Annotated, Any, Literal, Optional, Protocol
3
2
 
4
- from pydantic import BaseModel, ConfigDict, Field
3
+ from pydantic import BaseModel, ConfigDict
5
4
 
6
5
 
7
6
  class DrsType(str, Enum):
@@ -17,49 +16,18 @@ class DrsType(str, Enum):
17
16
  """The DRS dataset id specification type."""
18
17
 
19
18
 
20
- class DrsPartKind(str, Enum):
21
- """
22
- The kinds of DRS part (constant and collection).
23
- """
24
-
25
- CONSTANT = "constant"
26
- """The constant part type."""
27
- COLLECTION = "collection"
28
- """The collection part type."""
29
-
30
-
31
- class DrsConstant(BaseModel):
32
- """
33
- A constant part of a DRS specification (e.g., cmip5).
34
- """
35
-
36
- value: str
37
- """The value of the a constant part."""
38
- kind: Literal[DrsPartKind.CONSTANT] = DrsPartKind.CONSTANT
39
- """The DRS part kind."""
40
-
41
- def __str__(self) -> str:
42
- return self.value
43
-
19
+ class DrsPart(BaseModel):
20
+ """A fragment of a DRS specification"""
44
21
 
45
- class DrsCollection(BaseModel):
46
- """
47
- A collection part of a DRS specification (e.g., institution_id for CMIP6).
48
- """
49
-
50
- collection_id: str
22
+ source_collection: str
51
23
  """The collection id."""
24
+ source_collection_term: str | None = None
25
+ "Specifies a specific term in the collection."
52
26
  is_required: bool
53
27
  """Whether the collection is required for the DRS specification or not."""
54
- kind: Literal[DrsPartKind.COLLECTION] = DrsPartKind.COLLECTION
55
- """The DRS part kind."""
56
28
 
57
29
  def __str__(self) -> str:
58
- return self.collection_id
59
-
60
-
61
- DrsPart = Annotated[DrsConstant | DrsCollection, Field(discriminator="kind")]
62
- """A fragment of a DRS specification"""
30
+ return self.source_collection
63
31
 
64
32
 
65
33
  class DrsSpecification(BaseModel):
@@ -69,6 +37,8 @@ class DrsSpecification(BaseModel):
69
37
 
70
38
  type: DrsType
71
39
  """The type of the specification."""
40
+ regex: str
41
+ """General pattern for simples checks"""
72
42
  separator: str
73
43
  """The textual separator string or character."""
74
44
  properties: dict | None = None
@@ -77,109 +47,56 @@ class DrsSpecification(BaseModel):
77
47
  """The parts of the DRS specification."""
78
48
 
79
49
 
80
- class GlobalAttributeValueType(str, Enum):
50
+ class CatalogProperty(BaseModel):
81
51
  """
82
- The types of global attribute values.
83
- """
84
-
85
- STRING = "string"
86
- """String value type."""
87
- INTEGER = "integer"
88
- """Integer value type."""
89
- FLOAT = "float"
90
- """Float value type."""
91
-
92
-
93
- class GlobalAttributeVisitor(Protocol):
94
- """
95
- Specifications for a global attribute visitor.
96
- """
97
- def visit_base_attribute(self,
98
- attribute_name: str,
99
- attribute: "GlobalAttributeSpecBase") -> Any:
100
- """Visit a base global attribute."""
101
- pass
102
-
103
- def visit_specific_attribute(self,
104
- attribute_name: str,
105
- attribute: "GlobalAttributeSpecSpecific") -> Any:
106
- """Visit a specific global attribute."""
107
- pass
108
-
109
-
110
- class GlobalAttributeSpecBase(BaseModel):
111
- """
112
- Specification for a global attribute.
52
+ A dataset property described in a catalog.
113
53
  """
114
54
 
115
55
  source_collection: str
116
- """the source_collection to get the term from"""
117
- value_type: GlobalAttributeValueType
118
- """The expected value type."""
119
-
120
- def accept(self, attribute_name: str, visitor: GlobalAttributeVisitor) -> Any:
121
- return visitor.visit_base_attribute(attribute_name, self)
122
-
123
-
124
- class GlobalAttributeSpecSpecific(GlobalAttributeSpecBase):
125
- """
126
- Specification for a global attribute.
127
- with a specific key
128
- """
129
-
130
- specific_key: str
131
- """If the validation is for the value of a specific key, for instance description or ui-label """
56
+ "The project collection that originated the property."
57
+ catalog_field_value_type: str
58
+ "The type of the field value."
59
+ is_required: bool
60
+ "Specifies if the property must be present in the dataset properties."
61
+ source_collection_term: str | None = None
62
+ "Specifies a specific term in the collection."
63
+ catalog_field_name: str | None = None
64
+ "The name of the collection referenced in the catalog."
65
+ source_collection_key: str | None = None
66
+ "Specifies a key other than drs_name in the collection."
132
67
 
133
- def accept(self, attribute_name: str, visitor: GlobalAttributeVisitor) -> Any:
134
- """
135
- Accept a global attribute visitor.
136
68
 
137
- :param attribute_name: The attribute name.
138
- :param visitor: The global attribute visitor.
139
- :type visitor: GlobalAttributeVisitor
140
- :return: Depending on the visitor.
141
- :rtype: Any
142
- """
143
- return visitor.visit_specific_attribute(attribute_name, self)
69
+ class CatalogExtension(BaseModel):
70
+ name: str
71
+ """The name of the extension"""
72
+ version: str
73
+ """The version of the extension"""
144
74
 
145
75
 
146
- GlobalAttributeSpec = GlobalAttributeSpecSpecific | GlobalAttributeSpecBase
76
+ class CatalogProperties(BaseModel):
77
+ name: str
78
+ """The name of the catalog system."""
79
+ url_template: str
80
+ """The URI template of the catalog system."""
81
+ extensions: list[CatalogExtension]
82
+ """The extensions of the catalog."""
147
83
 
148
84
 
149
- class GlobalAttributeSpecs(BaseModel):
85
+ class CatalogSpecification(BaseModel):
150
86
  """
151
- Container for global attribute specifications.
87
+ A catalog specifications.
152
88
  """
153
89
 
154
- specs: dict[str, GlobalAttributeSpec] = Field(default_factory=dict)
155
- """The global attributes specifications dictionary."""
156
-
157
- def __str__(self) -> str:
158
- """Return all keys when printing."""
159
- return str(list(self.specs.keys()))
160
-
161
- def __repr__(self) -> str:
162
- """Return all keys when using repr."""
163
- return f"GlobalAttributeSpecs(keys={list(self.specs.keys())})"
164
-
165
- # Dictionary-like access methods
166
- def __getitem__(self, key: str) -> GlobalAttributeSpec:
167
- return self.specs[key]
168
-
169
- def __setitem__(self, key: str, value: GlobalAttributeSpec) -> None:
170
- self.specs[key] = value
171
-
172
- def __contains__(self, key: str) -> bool:
173
- return key in self.specs
174
-
175
- def keys(self):
176
- return self.specs.keys()
90
+ version: str
91
+ """The version of the catalog."""
177
92
 
178
- def values(self):
179
- return self.specs.values()
93
+ catalog_properties: CatalogProperties
94
+ """The properties of the catalog."""
180
95
 
181
- def items(self):
182
- return self.specs.items()
96
+ dataset_properties: list[CatalogProperty]
97
+ "The properties of the dataset described in a catalog."
98
+ file_properties: list[CatalogProperty]
99
+ "The properties of the files described in a catalog."
183
100
 
184
101
 
185
102
  class ProjectSpecs(BaseModel):
@@ -191,8 +108,9 @@ class ProjectSpecs(BaseModel):
191
108
  """The project id."""
192
109
  description: str
193
110
  """The description of the project."""
194
- drs_specs: list[DrsSpecification]
111
+ drs_specs: dict[DrsType, DrsSpecification]
195
112
  """The DRS specifications of the project (directory, file name and dataset id)."""
196
- global_attributes_specs: Optional[GlobalAttributeSpecs] = None
197
- """The global attributes specifications of the project."""
113
+ # TODO: release = None when all projects have catalog_specs.yaml.
114
+ catalog_specs: CatalogSpecification | None = None
115
+ """The catalog specifications of the project."""
198
116
  model_config = ConfigDict(extra="allow")