esgvoc 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +0 -6
- esgvoc/api/data_descriptors/__init__.py +8 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +2 -2
- esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/vertical_label.py +2 -2
- esgvoc/api/project_specs.py +48 -130
- esgvoc/api/projects.py +185 -66
- esgvoc/apps/drs/generator.py +103 -85
- esgvoc/apps/drs/validator.py +22 -38
- esgvoc/apps/jsg/json_schema_generator.py +255 -130
- esgvoc/apps/jsg/templates/template.jinja +249 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/cv_tester.py +1368 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/config.py +671 -86
- esgvoc/cli/drs.py +39 -21
- esgvoc/cli/main.py +2 -0
- esgvoc/cli/test_cv.py +257 -0
- esgvoc/core/constants.py +10 -7
- esgvoc/core/data_handler.py +24 -22
- esgvoc/core/db/connection.py +7 -0
- esgvoc/core/db/project_ingestion.py +34 -9
- esgvoc/core/db/universe_ingestion.py +1 -2
- esgvoc/core/service/configuration/setting.py +192 -21
- esgvoc/core/service/data_merger.py +1 -1
- esgvoc/core/service/state.py +18 -2
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -3
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/RECORD +41 -30
- esgvoc/apps/jsg/cmip6_template.json +0 -74
- esgvoc/apps/jsg/cmip6plus_template.json +0 -74
- /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/__init__.py
CHANGED
esgvoc/api/__init__.py
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
from esgvoc.api.project_specs import (
|
|
2
|
-
DrsCollection,
|
|
3
|
-
DrsConstant,
|
|
4
2
|
DrsPart,
|
|
5
|
-
DrsPartKind,
|
|
6
3
|
DrsSpecification,
|
|
7
4
|
DrsType,
|
|
8
5
|
ProjectSpecs,
|
|
@@ -51,10 +48,7 @@ from esgvoc.api.universe import (
|
|
|
51
48
|
)
|
|
52
49
|
|
|
53
50
|
__all__ = [
|
|
54
|
-
"DrsCollection",
|
|
55
|
-
"DrsConstant",
|
|
56
51
|
"DrsPart",
|
|
57
|
-
"DrsPartKind",
|
|
58
52
|
"DrsSpecification",
|
|
59
53
|
"DrsType",
|
|
60
54
|
"find_collections_in_project",
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from esgvoc.api.data_descriptors.activity import Activity
|
|
2
|
+
from esgvoc.api.data_descriptors.archive import Archive
|
|
2
3
|
from esgvoc.api.data_descriptors.area_label import AreaLabel
|
|
3
4
|
from esgvoc.api.data_descriptors.branded_suffix import BrandedSuffix
|
|
4
5
|
from esgvoc.api.data_descriptors.branded_variable import BrandedVariable
|
|
6
|
+
from esgvoc.api.data_descriptors.citation_url import CitationUrl
|
|
5
7
|
from esgvoc.api.data_descriptors.consortium import Consortium
|
|
6
8
|
from esgvoc.api.data_descriptors.contact import Contact
|
|
7
9
|
from esgvoc.api.data_descriptors.conventions import Convention
|
|
@@ -20,6 +22,7 @@ from esgvoc.api.data_descriptors.initialisation_index import InitialisationIndex
|
|
|
20
22
|
from esgvoc.api.data_descriptors.institution import Institution
|
|
21
23
|
from esgvoc.api.data_descriptors.known_branded_variable import KnownBrandedVariable
|
|
22
24
|
from esgvoc.api.data_descriptors.license import License
|
|
25
|
+
from esgvoc.api.data_descriptors.member_id import MemberId
|
|
23
26
|
from esgvoc.api.data_descriptors.mip_era import MipEra
|
|
24
27
|
from esgvoc.api.data_descriptors.model_component import ModelComponent
|
|
25
28
|
from esgvoc.api.data_descriptors.obs_type import ObsType
|
|
@@ -29,6 +32,7 @@ from esgvoc.api.data_descriptors.product import Product
|
|
|
29
32
|
from esgvoc.api.data_descriptors.publication_status import PublicationStatus
|
|
30
33
|
from esgvoc.api.data_descriptors.realisation_index import RealisationIndex
|
|
31
34
|
from esgvoc.api.data_descriptors.realm import Realm
|
|
35
|
+
from esgvoc.api.data_descriptors.regex import Regex
|
|
32
36
|
from esgvoc.api.data_descriptors.region import Region
|
|
33
37
|
from esgvoc.api.data_descriptors.resolution import Resolution
|
|
34
38
|
from esgvoc.api.data_descriptors.source import Source
|
|
@@ -87,5 +91,9 @@ DATA_DESCRIPTOR_CLASS_MAPPING: dict[str, type[DataDescriptor]] = {
|
|
|
87
91
|
"title": Title,
|
|
88
92
|
"contact": Contact,
|
|
89
93
|
"region": Region,
|
|
94
|
+
"member_id": MemberId,
|
|
90
95
|
"obs_type": ObsType, # obs4Mips
|
|
96
|
+
"regex": Regex,
|
|
97
|
+
"citation_url": CitationUrl,
|
|
98
|
+
"archive": Archive,
|
|
91
99
|
}
|
|
@@ -20,8 +20,8 @@ class Experiment(PlainTermDataDescriptor):
|
|
|
20
20
|
experiment: str
|
|
21
21
|
required_model_components: list[str] | None
|
|
22
22
|
additional_allowed_model_components: list[str] = Field(default_factory=list)
|
|
23
|
-
start_year: int | None
|
|
24
|
-
end_year: int | None
|
|
23
|
+
start_year: str | int | None
|
|
24
|
+
end_year: str | int | None
|
|
25
25
|
min_number_yrs_per_sim: int | None
|
|
26
26
|
parent_activity_id: list[str] | None
|
|
27
27
|
parent_experiment_id: list[str] | None
|
|
@@ -1,7 +1,30 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
1
3
|
from pydantic import Field
|
|
2
4
|
|
|
3
5
|
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
4
6
|
|
|
7
|
+
#
|
|
8
|
+
# class KnownBrandedVariable(PlainTermDataDescriptor):
|
|
9
|
+
# """
|
|
10
|
+
# A climate-related quantity or measurement, including information about sampling.
|
|
11
|
+
#
|
|
12
|
+
# The concept of a branded variable was introduced in CMIP7.
|
|
13
|
+
# A branded variable is composed of two parts.
|
|
14
|
+
# The first part is the root variable (see :py:class:`Variable`).
|
|
15
|
+
# The second is the suffix (see :py:class:`BrandedSuffix`).
|
|
16
|
+
#
|
|
17
|
+
# For further details on the development of branded variables,
|
|
18
|
+
# see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
|
|
19
|
+
# """
|
|
20
|
+
#
|
|
21
|
+
# description: str
|
|
22
|
+
# dimensions: list[str] = Field(default_factory=list)
|
|
23
|
+
# cell_methods: str
|
|
24
|
+
# variable: str
|
|
25
|
+
# label: str
|
|
26
|
+
#
|
|
27
|
+
|
|
5
28
|
|
|
6
29
|
class KnownBrandedVariable(PlainTermDataDescriptor):
|
|
7
30
|
"""
|
|
@@ -16,8 +39,38 @@ class KnownBrandedVariable(PlainTermDataDescriptor):
|
|
|
16
39
|
see [this paper draft](https://docs.google.com/document/d/19jzecgymgiiEsTDzaaqeLP6pTvLT-NzCMaq-wu-QoOc/edit?pli=1&tab=t.0).
|
|
17
40
|
"""
|
|
18
41
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
42
|
+
# # ESGVoc required fields
|
|
43
|
+
# id: str = Field(description="Unique identifier, e.g., 'ta_tavg-p19-hxy-air'")
|
|
44
|
+
# type: str = Field(default="branded_variable", description="ESGVoc type identifier")
|
|
45
|
+
# drs_name: str = Field(description="DRS name, same as id")
|
|
46
|
+
# => already in PlainTermDataDescriptor
|
|
47
|
+
|
|
48
|
+
# CF Standard Name context (flattened from hierarchy)
|
|
49
|
+
cf_standard_name: str = Field(description="CF standard name, e.g., 'air_temperature'")
|
|
50
|
+
cf_units: str = Field(description="CF standard units, e.g., 'K'")
|
|
51
|
+
cf_sn_status: str = Field(description="CF standard name status, e.g., 'approved'")
|
|
52
|
+
|
|
53
|
+
# Variable Root context (flattened from hierarchy)
|
|
54
|
+
variable_root_name: str = Field(description="Variable root name, e.g., 'ta'")
|
|
55
|
+
var_def_qualifier: str = Field(default="", description="Variable definition qualifier")
|
|
56
|
+
branding_suffix_name: str = Field(description="Branding suffix, e.g., 'tavg-p19-hxy-air'")
|
|
57
|
+
|
|
58
|
+
# Variable metadata
|
|
59
|
+
description: str = Field(description="Human-readable description")
|
|
60
|
+
dimensions: List[str] = Field(description="NetCDF dimensions")
|
|
61
|
+
cell_methods: str = Field(default="", description="CF cell_methods attribute")
|
|
62
|
+
cell_measures: str = Field(default="", description="CF cell_measures attribute")
|
|
63
|
+
history: str = Field(default="", description="Processing history")
|
|
64
|
+
realm: str = Field(description="Earth system realm, e.g., 'atmos'")
|
|
65
|
+
|
|
66
|
+
# Label components (embedded, not references)
|
|
67
|
+
temporal_label: str = Field(description="Temporal label, e.g., 'tavg'")
|
|
68
|
+
vertical_label: str = Field(description="Vertical label, e.g., 'p19'")
|
|
69
|
+
horizontal_label: str = Field(description="Horizontal label, e.g., 'hxy'")
|
|
70
|
+
area_label: str = Field(description="Area label, e.g., 'air'")
|
|
71
|
+
|
|
72
|
+
# Status
|
|
73
|
+
bn_status: str = Field(description="Branded variable status, e.g., 'accepted'")
|
|
74
|
+
|
|
75
|
+
# Additional required fields from specifications
|
|
76
|
+
positive_direction: str = Field(default="", description="Positive direction for the variable")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from esgvoc.api.data_descriptors.data_descriptor import CompositeTermDataDescriptor
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MemberId(CompositeTermDataDescriptor):
|
|
5
|
+
"""
|
|
6
|
+
The member_id uniquely identifies a specific model simulation within an experiment. It is created by combining the sub_experiment, which describes the setup or timing of the simulation (like a specific start year), and the variant_label, which details the configuration of the model (including initial conditions, physics, and forcings). Together, they form a code like s1960-r1i1p1f1. This allows users to distinguish between different ensemble members and understand how each run differs from others within the same experiment.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
description: str
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from esgvoc.api.data_descriptors.data_descriptor import
|
|
1
|
+
from esgvoc.api.data_descriptors.data_descriptor import PlainTermDataDescriptor
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
class VerticalLabel(
|
|
4
|
+
class VerticalLabel(PlainTermDataDescriptor):
|
|
5
5
|
"""
|
|
6
6
|
Vertical label.
|
|
7
7
|
|
esgvoc/api/project_specs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Annotated, Any, Literal, Optional, Protocol
|
|
3
2
|
|
|
4
|
-
from pydantic import BaseModel, ConfigDict
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class DrsType(str, Enum):
|
|
@@ -17,49 +16,18 @@ class DrsType(str, Enum):
|
|
|
17
16
|
"""The DRS dataset id specification type."""
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
class
|
|
21
|
-
"""
|
|
22
|
-
The kinds of DRS part (constant and collection).
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
CONSTANT = "constant"
|
|
26
|
-
"""The constant part type."""
|
|
27
|
-
COLLECTION = "collection"
|
|
28
|
-
"""The collection part type."""
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class DrsConstant(BaseModel):
|
|
32
|
-
"""
|
|
33
|
-
A constant part of a DRS specification (e.g., cmip5).
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
value: str
|
|
37
|
-
"""The value of the a constant part."""
|
|
38
|
-
kind: Literal[DrsPartKind.CONSTANT] = DrsPartKind.CONSTANT
|
|
39
|
-
"""The DRS part kind."""
|
|
40
|
-
|
|
41
|
-
def __str__(self) -> str:
|
|
42
|
-
return self.value
|
|
43
|
-
|
|
19
|
+
class DrsPart(BaseModel):
|
|
20
|
+
"""A fragment of a DRS specification"""
|
|
44
21
|
|
|
45
|
-
|
|
46
|
-
"""
|
|
47
|
-
A collection part of a DRS specification (e.g., institution_id for CMIP6).
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
collection_id: str
|
|
22
|
+
source_collection: str
|
|
51
23
|
"""The collection id."""
|
|
24
|
+
source_collection_term: str | None = None
|
|
25
|
+
"Specifies a specific term in the collection."
|
|
52
26
|
is_required: bool
|
|
53
27
|
"""Whether the collection is required for the DRS specification or not."""
|
|
54
|
-
kind: Literal[DrsPartKind.COLLECTION] = DrsPartKind.COLLECTION
|
|
55
|
-
"""The DRS part kind."""
|
|
56
28
|
|
|
57
29
|
def __str__(self) -> str:
|
|
58
|
-
return self.
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
DrsPart = Annotated[DrsConstant | DrsCollection, Field(discriminator="kind")]
|
|
62
|
-
"""A fragment of a DRS specification"""
|
|
30
|
+
return self.source_collection
|
|
63
31
|
|
|
64
32
|
|
|
65
33
|
class DrsSpecification(BaseModel):
|
|
@@ -69,6 +37,8 @@ class DrsSpecification(BaseModel):
|
|
|
69
37
|
|
|
70
38
|
type: DrsType
|
|
71
39
|
"""The type of the specification."""
|
|
40
|
+
regex: str
|
|
41
|
+
"""General pattern for simples checks"""
|
|
72
42
|
separator: str
|
|
73
43
|
"""The textual separator string or character."""
|
|
74
44
|
properties: dict | None = None
|
|
@@ -77,109 +47,56 @@ class DrsSpecification(BaseModel):
|
|
|
77
47
|
"""The parts of the DRS specification."""
|
|
78
48
|
|
|
79
49
|
|
|
80
|
-
class
|
|
50
|
+
class CatalogProperty(BaseModel):
|
|
81
51
|
"""
|
|
82
|
-
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
STRING = "string"
|
|
86
|
-
"""String value type."""
|
|
87
|
-
INTEGER = "integer"
|
|
88
|
-
"""Integer value type."""
|
|
89
|
-
FLOAT = "float"
|
|
90
|
-
"""Float value type."""
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class GlobalAttributeVisitor(Protocol):
|
|
94
|
-
"""
|
|
95
|
-
Specifications for a global attribute visitor.
|
|
96
|
-
"""
|
|
97
|
-
def visit_base_attribute(self,
|
|
98
|
-
attribute_name: str,
|
|
99
|
-
attribute: "GlobalAttributeSpecBase") -> Any:
|
|
100
|
-
"""Visit a base global attribute."""
|
|
101
|
-
pass
|
|
102
|
-
|
|
103
|
-
def visit_specific_attribute(self,
|
|
104
|
-
attribute_name: str,
|
|
105
|
-
attribute: "GlobalAttributeSpecSpecific") -> Any:
|
|
106
|
-
"""Visit a specific global attribute."""
|
|
107
|
-
pass
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
class GlobalAttributeSpecBase(BaseModel):
|
|
111
|
-
"""
|
|
112
|
-
Specification for a global attribute.
|
|
52
|
+
A dataset property described in a catalog.
|
|
113
53
|
"""
|
|
114
54
|
|
|
115
55
|
source_collection: str
|
|
116
|
-
"
|
|
117
|
-
|
|
118
|
-
"
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
with a specific key
|
|
128
|
-
"""
|
|
129
|
-
|
|
130
|
-
specific_key: str
|
|
131
|
-
"""If the validation is for the value of a specific key, for instance description or ui-label """
|
|
56
|
+
"The project collection that originated the property."
|
|
57
|
+
catalog_field_value_type: str
|
|
58
|
+
"The type of the field value."
|
|
59
|
+
is_required: bool
|
|
60
|
+
"Specifies if the property must be present in the dataset properties."
|
|
61
|
+
source_collection_term: str | None = None
|
|
62
|
+
"Specifies a specific term in the collection."
|
|
63
|
+
catalog_field_name: str | None = None
|
|
64
|
+
"The name of the collection referenced in the catalog."
|
|
65
|
+
source_collection_key: str | None = None
|
|
66
|
+
"Specifies a key other than drs_name in the collection."
|
|
132
67
|
|
|
133
|
-
def accept(self, attribute_name: str, visitor: GlobalAttributeVisitor) -> Any:
|
|
134
|
-
"""
|
|
135
|
-
Accept a global attribute visitor.
|
|
136
68
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"""
|
|
143
|
-
return visitor.visit_specific_attribute(attribute_name, self)
|
|
69
|
+
class CatalogExtension(BaseModel):
|
|
70
|
+
name: str
|
|
71
|
+
"""The name of the extension"""
|
|
72
|
+
version: str
|
|
73
|
+
"""The version of the extension"""
|
|
144
74
|
|
|
145
75
|
|
|
146
|
-
|
|
76
|
+
class CatalogProperties(BaseModel):
|
|
77
|
+
name: str
|
|
78
|
+
"""The name of the catalog system."""
|
|
79
|
+
url_template: str
|
|
80
|
+
"""The URI template of the catalog system."""
|
|
81
|
+
extensions: list[CatalogExtension]
|
|
82
|
+
"""The extensions of the catalog."""
|
|
147
83
|
|
|
148
84
|
|
|
149
|
-
class
|
|
85
|
+
class CatalogSpecification(BaseModel):
|
|
150
86
|
"""
|
|
151
|
-
|
|
87
|
+
A catalog specifications.
|
|
152
88
|
"""
|
|
153
89
|
|
|
154
|
-
|
|
155
|
-
"""The
|
|
156
|
-
|
|
157
|
-
def __str__(self) -> str:
|
|
158
|
-
"""Return all keys when printing."""
|
|
159
|
-
return str(list(self.specs.keys()))
|
|
160
|
-
|
|
161
|
-
def __repr__(self) -> str:
|
|
162
|
-
"""Return all keys when using repr."""
|
|
163
|
-
return f"GlobalAttributeSpecs(keys={list(self.specs.keys())})"
|
|
164
|
-
|
|
165
|
-
# Dictionary-like access methods
|
|
166
|
-
def __getitem__(self, key: str) -> GlobalAttributeSpec:
|
|
167
|
-
return self.specs[key]
|
|
168
|
-
|
|
169
|
-
def __setitem__(self, key: str, value: GlobalAttributeSpec) -> None:
|
|
170
|
-
self.specs[key] = value
|
|
171
|
-
|
|
172
|
-
def __contains__(self, key: str) -> bool:
|
|
173
|
-
return key in self.specs
|
|
174
|
-
|
|
175
|
-
def keys(self):
|
|
176
|
-
return self.specs.keys()
|
|
90
|
+
version: str
|
|
91
|
+
"""The version of the catalog."""
|
|
177
92
|
|
|
178
|
-
|
|
179
|
-
|
|
93
|
+
catalog_properties: CatalogProperties
|
|
94
|
+
"""The properties of the catalog."""
|
|
180
95
|
|
|
181
|
-
|
|
182
|
-
|
|
96
|
+
dataset_properties: list[CatalogProperty]
|
|
97
|
+
"The properties of the dataset described in a catalog."
|
|
98
|
+
file_properties: list[CatalogProperty]
|
|
99
|
+
"The properties of the files described in a catalog."
|
|
183
100
|
|
|
184
101
|
|
|
185
102
|
class ProjectSpecs(BaseModel):
|
|
@@ -191,8 +108,9 @@ class ProjectSpecs(BaseModel):
|
|
|
191
108
|
"""The project id."""
|
|
192
109
|
description: str
|
|
193
110
|
"""The description of the project."""
|
|
194
|
-
drs_specs:
|
|
111
|
+
drs_specs: dict[DrsType, DrsSpecification]
|
|
195
112
|
"""The DRS specifications of the project (directory, file name and dataset id)."""
|
|
196
|
-
|
|
197
|
-
|
|
113
|
+
# TODO: release = None when all projects have catalog_specs.yaml.
|
|
114
|
+
catalog_specs: CatalogSpecification | None = None
|
|
115
|
+
"""The catalog specifications of the project."""
|
|
198
116
|
model_config = ConfigDict(extra="allow")
|