esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Example usage of the GA (Global Attributes) validator.
|
|
3
|
+
|
|
4
|
+
This script demonstrates how to use the GA validator to validate NetCDF global attributes
|
|
5
|
+
against CMIP project specifications using the esgvoc API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from esgvoc.apps.ga.validator import GAValidator, validate_netcdf_attributes, create_validation_summary
|
|
12
|
+
from esgvoc.apps.ga.models import NetCDFHeaderParser
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def example_validate_ncdump():
|
|
16
|
+
"""
|
|
17
|
+
Example: Validate NetCDF global attributes from ncdump output.
|
|
18
|
+
"""
|
|
19
|
+
# Sample ncdump output from the provided example
|
|
20
|
+
ncdump_output = """netcdf tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412 {
|
|
21
|
+
dimensions:
|
|
22
|
+
time = UNLIMITED ; // (1980 currently)
|
|
23
|
+
bnds = 2 ;
|
|
24
|
+
lat = 64 ;
|
|
25
|
+
lon = 128 ;
|
|
26
|
+
variables:
|
|
27
|
+
double time(time) ;
|
|
28
|
+
time:bounds = "time_bnds" ;
|
|
29
|
+
time:units = "days since 1850-01-01 0:0:0.0" ;
|
|
30
|
+
time:calendar = "365_day" ;
|
|
31
|
+
time:axis = "T" ;
|
|
32
|
+
time:long_name = "time" ;
|
|
33
|
+
time:standard_name = "time" ;
|
|
34
|
+
double time_bnds(time, bnds) ;
|
|
35
|
+
double lat(lat) ;
|
|
36
|
+
lat:bounds = "lat_bnds" ;
|
|
37
|
+
lat:units = "degrees_north" ;
|
|
38
|
+
lat:axis = "Y" ;
|
|
39
|
+
lat:long_name = "Latitude" ;
|
|
40
|
+
lat:standard_name = "latitude" ;
|
|
41
|
+
double lat_bnds(lat, bnds) ;
|
|
42
|
+
double lon(lon) ;
|
|
43
|
+
lon:bounds = "lon_bnds" ;
|
|
44
|
+
lon:units = "degrees_east" ;
|
|
45
|
+
lon:axis = "X" ;
|
|
46
|
+
lon:long_name = "Longitude" ;
|
|
47
|
+
lon:standard_name = "longitude" ;
|
|
48
|
+
double lon_bnds(lon, bnds) ;
|
|
49
|
+
double height ;
|
|
50
|
+
height:units = "m" ;
|
|
51
|
+
height:axis = "Z" ;
|
|
52
|
+
height:positive = "up" ;
|
|
53
|
+
height:long_name = "height" ;
|
|
54
|
+
height:standard_name = "height" ;
|
|
55
|
+
float tas(time, lat, lon) ;
|
|
56
|
+
tas:standard_name = "air_temperature" ;
|
|
57
|
+
tas:long_name = "Near-Surface Air Temperature" ;
|
|
58
|
+
tas:comment = "ST+273.16, CMIP_table_comment: near-surface (usually, 2 meter) air temperature" ;
|
|
59
|
+
tas:units = "K" ;
|
|
60
|
+
tas:original_name = "ST" ;
|
|
61
|
+
tas:history = "degctok 2019-04-30T17:44:13Z altered by CMOR: Treated scalar dimension: 'height'. 2019-04-30T17:44:13Z altered by CMOR: Reordered dimensions, original order: lat lon time. 2019-04-30T17:44:13Z altered by CMOR: replaced missing value flag (1e+38) with standard missing value (1e+20)." ;
|
|
62
|
+
tas:cell_methods = "area: time: mean" ;
|
|
63
|
+
tas:cell_measures = "area: areacella" ;
|
|
64
|
+
tas:coordinates = "height" ;
|
|
65
|
+
tas:missing_value = 1.e+20f ;
|
|
66
|
+
tas:_FillValue = 1.e+20f ;
|
|
67
|
+
|
|
68
|
+
// global attributes:
|
|
69
|
+
:CCCma_model_hash = "7e8e715f3f2ce47e1bab830db971c362ca329419" ;
|
|
70
|
+
:CCCma_parent_runid = "rc3.1-pictrl" ;
|
|
71
|
+
:CCCma_pycmor_hash = "33c30511acc319a98240633965a04ca99c26427e" ;
|
|
72
|
+
:CCCma_runid = "rc3.1-his11" ;
|
|
73
|
+
:Conventions = "CF-1.7 CMIP-6.2" ;
|
|
74
|
+
:YMDH_branch_time_in_child = "1850:01:01:00" ;
|
|
75
|
+
:YMDH_branch_time_in_parent = "5701:01:01:00" ;
|
|
76
|
+
:activity_id = "CMIP" ;
|
|
77
|
+
:branch_method = "Spin-up documentation" ;
|
|
78
|
+
:branch_time_in_child = 0. ;
|
|
79
|
+
:branch_time_in_parent = 1405615. ;
|
|
80
|
+
:contact = "ec.cccma.info-info.ccmac.ec@canada.ca" ;
|
|
81
|
+
:creation_date = "2019-04-30T17:44:13Z" ;
|
|
82
|
+
:data_specs_version = "01.00.29" ;
|
|
83
|
+
:experiment = "all-forcing simulation of the recent past" ;
|
|
84
|
+
:experiment_id = "historical" ;
|
|
85
|
+
:external_variables = "areacella" ;
|
|
86
|
+
:forcing_index = 1 ;
|
|
87
|
+
:frequency = "mon" ;
|
|
88
|
+
:further_info_url = "https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.historical.none.r11i1p1f1" ;
|
|
89
|
+
:grid = "T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa" ;
|
|
90
|
+
:grid_label = "gn" ;
|
|
91
|
+
:history = "2019-04-30T17:44:13Z ;rewrote data to be consistent with CMIP for variable tas found in table Amon.;\n",
|
|
92
|
+
"Output from $runid" ;
|
|
93
|
+
:initialization_index = 1 ;
|
|
94
|
+
:institution = "Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada" ;
|
|
95
|
+
:institution_id = "CCCma" ;
|
|
96
|
+
:mip_era = "CMIP6" ;
|
|
97
|
+
:nominal_resolution = "500 km" ;
|
|
98
|
+
:parent_activity_id = "CMIP" ;
|
|
99
|
+
:parent_experiment_id = "piControl" ;
|
|
100
|
+
:parent_mip_era = "CMIP6" ;
|
|
101
|
+
:parent_source_id = "CanESM5" ;
|
|
102
|
+
:parent_time_units = "days since 1850-01-01 0:0:0.0" ;
|
|
103
|
+
:parent_variant_label = "r1i1p1f1" ;
|
|
104
|
+
:physics_index = 1 ;
|
|
105
|
+
:product = "model-output" ;
|
|
106
|
+
:realization_index = 11 ;
|
|
107
|
+
:realm = "atmos" ;
|
|
108
|
+
:references = "Geophysical Model Development Special issue on CanESM5 (https://www.geosci-model-dev.net/special_issues.html)" ;
|
|
109
|
+
:source = "CanESM5 (2019): \\n",
|
|
110
|
+
"aerosol: interactive\\n",
|
|
111
|
+
"atmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\n",
|
|
112
|
+
"atmosChem: specified oxidants for aerosols\\n",
|
|
113
|
+
"land: CLASS3.6/CTEM1.2\\n",
|
|
114
|
+
"landIce: specified ice sheets\\n",
|
|
115
|
+
"ocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\n",
|
|
116
|
+
"ocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\n",
|
|
117
|
+
"seaIce: LIM2" ;
|
|
118
|
+
:source_id = "CanESM5" ;
|
|
119
|
+
:source_type = "AOGCM" ;
|
|
120
|
+
:sub_experiment = "none" ;
|
|
121
|
+
:sub_experiment_id = "none" ;
|
|
122
|
+
:table_id = "Amon" ;
|
|
123
|
+
:table_info = "Creation Date:(20 February 2019) MD5:374fbe5a2bcca535c40f7f23da271e49" ;
|
|
124
|
+
:title = "CanESM5 output prepared for CMIP6" ;
|
|
125
|
+
:tracking_id = "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e" ;
|
|
126
|
+
:variable_id = "tas" ;
|
|
127
|
+
:variant_label = "r11i1p1f1" ;
|
|
128
|
+
:version = "v20190429" ;
|
|
129
|
+
:license = "CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law." ;
|
|
130
|
+
:cmor_version = "3.4.0" ;
|
|
131
|
+
}"""
|
|
132
|
+
|
|
133
|
+
print("=== Example: Validating NetCDF Global Attributes ===")
|
|
134
|
+
print()
|
|
135
|
+
|
|
136
|
+
# Method 1: Using the convenience function
|
|
137
|
+
print("Method 1: Using convenience function")
|
|
138
|
+
report = validate_netcdf_attributes(
|
|
139
|
+
ncdump_output=ncdump_output,
|
|
140
|
+
project_id="cmip6",
|
|
141
|
+
filename="tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412.nc",
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
print(create_validation_summary(report))
|
|
145
|
+
print()
|
|
146
|
+
|
|
147
|
+
# Method 2: Using the GAValidator class directly
|
|
148
|
+
print("Method 2: Using GAValidator class")
|
|
149
|
+
validator = GAValidator(project_id="cmip6")
|
|
150
|
+
|
|
151
|
+
# List required attributes
|
|
152
|
+
print("Required attributes for CMIP6:")
|
|
153
|
+
for attr in validator.get_required_attributes():
|
|
154
|
+
print(f" • {attr}")
|
|
155
|
+
print()
|
|
156
|
+
|
|
157
|
+
# Get info about specific attributes
|
|
158
|
+
print("Information about 'activity_id' attribute:")
|
|
159
|
+
activity_info = validator.get_attribute_info("activity_id")
|
|
160
|
+
if activity_info:
|
|
161
|
+
for key, value in activity_info.items():
|
|
162
|
+
print(f" {key}: {value}")
|
|
163
|
+
print()
|
|
164
|
+
|
|
165
|
+
# Validate with detailed reporting
|
|
166
|
+
report2 = validator.validate_from_ncdump(ncdump_output, "example_file.nc")
|
|
167
|
+
print(f"Validation result: {report2.summary()}")
|
|
168
|
+
|
|
169
|
+
return report, report2
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def example_validate_attributes_dict():
|
|
173
|
+
"""
|
|
174
|
+
Example: Validate global attributes from a dictionary.
|
|
175
|
+
"""
|
|
176
|
+
print("=== Example: Validating from Attributes Dictionary ===")
|
|
177
|
+
print()
|
|
178
|
+
|
|
179
|
+
# Sample attributes dictionary
|
|
180
|
+
attributes = {
|
|
181
|
+
"Conventions": "CF-1.7 CMIP-6.2",
|
|
182
|
+
"activity_id": "CMIP",
|
|
183
|
+
"creation_date": "2019-04-30T17:44:13Z",
|
|
184
|
+
"data_specs_version": "01.00.29",
|
|
185
|
+
"experiment_id": "historical",
|
|
186
|
+
"forcing_index": 1,
|
|
187
|
+
"frequency": "mon",
|
|
188
|
+
"grid_label": "gn",
|
|
189
|
+
"initialization_index": 1,
|
|
190
|
+
"institution_id": "CCCma",
|
|
191
|
+
"mip_era": "CMIP6",
|
|
192
|
+
"nominal_resolution": "500 km",
|
|
193
|
+
"physics_index": 1,
|
|
194
|
+
"realization_index": 11,
|
|
195
|
+
"source_id": "CanESM5",
|
|
196
|
+
"table_id": "Amon",
|
|
197
|
+
"tracking_id": "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e",
|
|
198
|
+
"variable_id": "tas",
|
|
199
|
+
"variant_label": "r11i1p1f1",
|
|
200
|
+
# Missing some required attributes to test validation
|
|
201
|
+
# "license": "...", # Optional attribute
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
validator = GAValidator(project_id="cmip6")
|
|
205
|
+
report = validator.validate_from_attributes_dict(attributes, "test_attributes.nc")
|
|
206
|
+
|
|
207
|
+
print(create_validation_summary(report))
|
|
208
|
+
|
|
209
|
+
return report
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def example_parse_netcdf_header():
|
|
213
|
+
"""
|
|
214
|
+
Example: Parse NetCDF header information.
|
|
215
|
+
"""
|
|
216
|
+
print("=== Example: Parsing NetCDF Header ===")
|
|
217
|
+
print()
|
|
218
|
+
|
|
219
|
+
# Simple ncdump output for parsing
|
|
220
|
+
simple_ncdump = """netcdf test_file {
|
|
221
|
+
dimensions:
|
|
222
|
+
time = UNLIMITED ; // (12 currently)
|
|
223
|
+
lat = 180 ;
|
|
224
|
+
lon = 360 ;
|
|
225
|
+
variables:
|
|
226
|
+
double time(time) ;
|
|
227
|
+
time:units = "days since 1850-01-01" ;
|
|
228
|
+
time:calendar = "gregorian" ;
|
|
229
|
+
float temperature(time, lat, lon) ;
|
|
230
|
+
temperature:units = "K" ;
|
|
231
|
+
temperature:long_name = "Temperature" ;
|
|
232
|
+
|
|
233
|
+
// global attributes:
|
|
234
|
+
:Conventions = "CF-1.7" ;
|
|
235
|
+
:title = "Test NetCDF file" ;
|
|
236
|
+
:institution = "Test Institution" ;
|
|
237
|
+
:source = "Test Model" ;
|
|
238
|
+
:history = "Created for testing" ;
|
|
239
|
+
:comment = "This is a test file" ;
|
|
240
|
+
}"""
|
|
241
|
+
|
|
242
|
+
# Parse the header
|
|
243
|
+
header = NetCDFHeaderParser.parse_from_ncdump(simple_ncdump)
|
|
244
|
+
|
|
245
|
+
print(f"Filename: {header.filename}")
|
|
246
|
+
print(f"Dimensions: {len(header.dimensions)}")
|
|
247
|
+
for dim_name, dim in header.dimensions.items():
|
|
248
|
+
print(f" • {dim_name}: {dim.size} {'(unlimited)' if dim.is_unlimited else ''}")
|
|
249
|
+
|
|
250
|
+
print(f"Variables: {len(header.variables)}")
|
|
251
|
+
for var_name, var in header.variables.items():
|
|
252
|
+
print(f" • {var_name} ({var.data_type}): dims={var.dimensions}, attrs={len(var.attributes)}")
|
|
253
|
+
|
|
254
|
+
print(f"Global attributes: {len(header.global_attributes.attributes)}")
|
|
255
|
+
for attr_name, attr_value in header.global_attributes.attributes.items():
|
|
256
|
+
print(f" • {attr_name}: {attr_value}")
|
|
257
|
+
|
|
258
|
+
return header
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def example_custom_config():
|
|
262
|
+
"""
|
|
263
|
+
Example: Using custom YAML configuration.
|
|
264
|
+
"""
|
|
265
|
+
print("=== Example: Custom Configuration ===")
|
|
266
|
+
print()
|
|
267
|
+
|
|
268
|
+
# Get the default config path
|
|
269
|
+
current_dir = Path(__file__).parent
|
|
270
|
+
config_path = current_dir / "attributes_specs.yaml"
|
|
271
|
+
|
|
272
|
+
print(f"Using configuration file: {config_path}")
|
|
273
|
+
|
|
274
|
+
if config_path.exists():
|
|
275
|
+
validator = GAValidator(config_path=str(config_path), project_id="cmip6")
|
|
276
|
+
|
|
277
|
+
print(f"Loaded {len(validator.list_attributes())} attribute specifications")
|
|
278
|
+
print("Attribute names:")
|
|
279
|
+
for attr in sorted(validator.list_attributes()):
|
|
280
|
+
info = validator.get_attribute_info(attr)
|
|
281
|
+
required = "required" if info and info.get("required") else "optional"
|
|
282
|
+
print(f" • {attr} ({required})")
|
|
283
|
+
|
|
284
|
+
return validator
|
|
285
|
+
else:
|
|
286
|
+
print(f"Configuration file not found: {config_path}")
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def main():
|
|
291
|
+
"""
|
|
292
|
+
Run all examples.
|
|
293
|
+
"""
|
|
294
|
+
try:
|
|
295
|
+
print("NetCDF Global Attributes Validator - Examples")
|
|
296
|
+
print("=" * 50)
|
|
297
|
+
print()
|
|
298
|
+
|
|
299
|
+
# Example 1: Validate from ncdump output
|
|
300
|
+
report1, report2 = example_validate_ncdump()
|
|
301
|
+
|
|
302
|
+
print()
|
|
303
|
+
print("=" * 50)
|
|
304
|
+
print("Examples completed successfully!")
|
|
305
|
+
|
|
306
|
+
except Exception as e:
|
|
307
|
+
print(f"Error running examples: {e}")
|
|
308
|
+
import traceback
|
|
309
|
+
|
|
310
|
+
traceback.print_exc()
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
if __name__ == "__main__":
|
|
314
|
+
main()
|
|
315
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GA (Global Attributes) models package.
|
|
3
|
+
|
|
4
|
+
This package provides Pydantic models for validating NetCDF global attributes
|
|
5
|
+
against project specifications using the esgvoc API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Import from project_specs for attribute models
|
|
9
|
+
from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
|
|
10
|
+
|
|
11
|
+
from .netcdf_header import (
|
|
12
|
+
NetCDFDimension,
|
|
13
|
+
NetCDFVariable,
|
|
14
|
+
NetCDFGlobalAttributes as NetCDFGlobalAttributesNew,
|
|
15
|
+
NetCDFHeader,
|
|
16
|
+
NetCDFHeaderParser,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from .validator import (
|
|
20
|
+
ValidationSeverity,
|
|
21
|
+
ValidationIssue,
|
|
22
|
+
ValidationReport,
|
|
23
|
+
ESGVocAttributeValidator,
|
|
24
|
+
GlobalAttributeValidator as GlobalAttributeValidatorNew,
|
|
25
|
+
ValidatorFactory,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Build __all__ dynamically based on available modules
|
|
30
|
+
__all__ = [
|
|
31
|
+
# Attribute specification models from project_specs
|
|
32
|
+
"AttributeProperty",
|
|
33
|
+
"AttributeSpecification",
|
|
34
|
+
# NetCDF header models
|
|
35
|
+
"NetCDFDimension",
|
|
36
|
+
"NetCDFVariable",
|
|
37
|
+
"NetCDFGlobalAttributesNew",
|
|
38
|
+
"NetCDFHeader",
|
|
39
|
+
"NetCDFHeaderParser",
|
|
40
|
+
# Validation models
|
|
41
|
+
"ValidationSeverity",
|
|
42
|
+
"ValidationIssue",
|
|
43
|
+
"ValidationReport",
|
|
44
|
+
"ESGVocAttributeValidator",
|
|
45
|
+
"GlobalAttributeValidatorNew",
|
|
46
|
+
"ValidatorFactory",
|
|
47
|
+
]
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Models for parsing and validating NetCDF headers.
|
|
3
|
+
|
|
4
|
+
This module provides Pydantic models to parse NetCDF header information
|
|
5
|
+
and validate global attributes against project specifications.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any, Union, Optional, Dict, List
|
|
10
|
+
from pydantic import BaseModel, Field, field_validator, ValidationError
|
|
11
|
+
|
|
12
|
+
from esgvoc.api.data_descriptors.data_descriptor import ConfiguredBaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NetCDFDimension(ConfiguredBaseModel):
|
|
16
|
+
"""
|
|
17
|
+
Represents a NetCDF dimension.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
name: str = Field(..., description="Dimension name")
|
|
21
|
+
size: Union[int, str] = Field(..., description="Dimension size (int or 'UNLIMITED')")
|
|
22
|
+
is_unlimited: bool = Field(default=False, description="Whether this is an unlimited dimension")
|
|
23
|
+
|
|
24
|
+
@field_validator("is_unlimited", mode="before")
|
|
25
|
+
@classmethod
|
|
26
|
+
def check_unlimited(cls, v, info):
|
|
27
|
+
"""Check if dimension is unlimited based on size."""
|
|
28
|
+
if info.data.get("size") == "UNLIMITED":
|
|
29
|
+
return True
|
|
30
|
+
return v
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class NetCDFVariable(ConfiguredBaseModel):
|
|
34
|
+
"""
|
|
35
|
+
Represents a NetCDF variable with its attributes.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
name: str = Field(..., description="Variable name")
|
|
39
|
+
dimensions: List[str] = Field(default_factory=list, description="Variable dimensions")
|
|
40
|
+
data_type: str = Field(..., description="Variable data type (e.g., float, double)")
|
|
41
|
+
attributes: Dict[str, Any] = Field(default_factory=dict, description="Variable attributes")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class NetCDFGlobalAttributes(ConfiguredBaseModel):
|
|
45
|
+
"""
|
|
46
|
+
Container for NetCDF global attributes.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
attributes: Dict[str, Union[str, int, float, List[Any]]] = Field(
|
|
50
|
+
default_factory=dict, description="Dictionary of global attributes"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def get_attribute(self, name: str) -> Optional[Any]:
|
|
54
|
+
"""
|
|
55
|
+
Get a global attribute by name.
|
|
56
|
+
|
|
57
|
+
:param name: Attribute name
|
|
58
|
+
:return: Attribute value or None if not found
|
|
59
|
+
"""
|
|
60
|
+
return self.attributes.get(name)
|
|
61
|
+
|
|
62
|
+
def get_string_attribute(self, name: str) -> Optional[str]:
|
|
63
|
+
"""
|
|
64
|
+
Get a global attribute as string.
|
|
65
|
+
|
|
66
|
+
:param name: Attribute name
|
|
67
|
+
:return: Attribute value as string or None if not found
|
|
68
|
+
"""
|
|
69
|
+
value = self.get_attribute(name)
|
|
70
|
+
return str(value) if value is not None else None
|
|
71
|
+
|
|
72
|
+
def has_attribute(self, name: str) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
Check if global attribute exists.
|
|
75
|
+
|
|
76
|
+
:param name: Attribute name
|
|
77
|
+
:return: True if attribute exists
|
|
78
|
+
"""
|
|
79
|
+
return name in self.attributes
|
|
80
|
+
|
|
81
|
+
def list_attributes(self) -> List[str]:
|
|
82
|
+
"""
|
|
83
|
+
List all global attribute names.
|
|
84
|
+
|
|
85
|
+
:return: List of attribute names
|
|
86
|
+
"""
|
|
87
|
+
return list(self.attributes.keys())
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class NetCDFHeader(ConfiguredBaseModel):
|
|
91
|
+
"""
|
|
92
|
+
Complete NetCDF header information including dimensions, variables, and global attributes.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
filename: Optional[str] = Field(default=None, description="NetCDF filename")
|
|
96
|
+
dimensions: Dict[str, NetCDFDimension] = Field(default_factory=dict, description="File dimensions")
|
|
97
|
+
variables: Dict[str, NetCDFVariable] = Field(default_factory=dict, description="File variables")
|
|
98
|
+
global_attributes: NetCDFGlobalAttributes = Field(
|
|
99
|
+
default_factory=NetCDFGlobalAttributes, description="Global attributes"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def from_ncdump_output(cls, ncdump_output: str) -> "NetCDFHeader":
|
|
104
|
+
"""
|
|
105
|
+
Parse NetCDF header from ncdump command output.
|
|
106
|
+
|
|
107
|
+
:param ncdump_output: Output from ncdump -h command
|
|
108
|
+
:return: NetCDFHeader instance
|
|
109
|
+
"""
|
|
110
|
+
lines = ncdump_output.strip().split("\n")
|
|
111
|
+
|
|
112
|
+
# Extract filename from first line (e.g., "netcdf tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412 {")
|
|
113
|
+
filename_match = re.match(r"netcdf\s+(.+?)\s*\{", lines[0])
|
|
114
|
+
filename = filename_match.group(1) if filename_match else None
|
|
115
|
+
|
|
116
|
+
dimensions = {}
|
|
117
|
+
variables = {}
|
|
118
|
+
global_attributes = {}
|
|
119
|
+
|
|
120
|
+
current_section = None
|
|
121
|
+
current_variable = None
|
|
122
|
+
current_variable_info = {}
|
|
123
|
+
|
|
124
|
+
for line in lines[1:]:
|
|
125
|
+
line = line.strip()
|
|
126
|
+
|
|
127
|
+
if not line or line == "}":
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
# Section headers
|
|
131
|
+
if line.startswith("dimensions:"):
|
|
132
|
+
current_section = "dimensions"
|
|
133
|
+
continue
|
|
134
|
+
elif line.startswith("variables:"):
|
|
135
|
+
current_section = "variables"
|
|
136
|
+
continue
|
|
137
|
+
elif line.startswith("// global attributes:"):
|
|
138
|
+
current_section = "global_attributes"
|
|
139
|
+
continue
|
|
140
|
+
elif line.startswith("data:"):
|
|
141
|
+
break # We don't need data section
|
|
142
|
+
|
|
143
|
+
# Parse based on current section
|
|
144
|
+
if current_section == "dimensions":
|
|
145
|
+
cls._parse_dimension_line(line, dimensions)
|
|
146
|
+
elif current_section == "variables":
|
|
147
|
+
result = cls._parse_variable_line(line, variables, current_variable, current_variable_info)
|
|
148
|
+
if result:
|
|
149
|
+
current_variable, current_variable_info = result
|
|
150
|
+
elif current_section == "global_attributes":
|
|
151
|
+
cls._parse_global_attribute_line(line, global_attributes)
|
|
152
|
+
|
|
153
|
+
return cls(
|
|
154
|
+
filename=filename,
|
|
155
|
+
dimensions=dimensions,
|
|
156
|
+
variables=variables,
|
|
157
|
+
global_attributes=NetCDFGlobalAttributes(attributes=global_attributes),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _parse_dimension_line(line: str, dimensions: Dict[str, NetCDFDimension]):
|
|
162
|
+
"""Parse a dimension line from ncdump output."""
|
|
163
|
+
# e.g., "time = UNLIMITED ; // (1980 currently)"
|
|
164
|
+
# e.g., "lat = 64 ;"
|
|
165
|
+
match = re.match(r"\s*(\w+)\s*=\s*(.+?)\s*;", line)
|
|
166
|
+
if match:
|
|
167
|
+
dim_name = match.group(1)
|
|
168
|
+
dim_size_str = match.group(2).split("//")[0].strip()
|
|
169
|
+
|
|
170
|
+
if dim_size_str == "UNLIMITED":
|
|
171
|
+
dimensions[dim_name] = NetCDFDimension(name=dim_name, size="UNLIMITED", is_unlimited=True)
|
|
172
|
+
else:
|
|
173
|
+
try:
|
|
174
|
+
size = int(dim_size_str)
|
|
175
|
+
dimensions[dim_name] = NetCDFDimension(name=dim_name, size=size)
|
|
176
|
+
except ValueError:
|
|
177
|
+
pass # Skip malformed dimension lines
|
|
178
|
+
|
|
179
|
+
@staticmethod
|
|
180
|
+
def _parse_variable_line(
|
|
181
|
+
line: str, variables: Dict[str, NetCDFVariable], current_variable: Optional[str], current_variable_info: Dict
|
|
182
|
+
) -> Optional[tuple]:
|
|
183
|
+
"""Parse a variable line from ncdump output."""
|
|
184
|
+
# Variable declaration: "double time(time) ;"
|
|
185
|
+
var_decl_match = re.match(r"\s*(\w+)\s+(\w+)\s*\(([^)]*)\)\s*;", line)
|
|
186
|
+
if var_decl_match:
|
|
187
|
+
data_type = var_decl_match.group(1)
|
|
188
|
+
var_name = var_decl_match.group(2)
|
|
189
|
+
dimensions_str = var_decl_match.group(3)
|
|
190
|
+
|
|
191
|
+
dimensions_list = [d.strip() for d in dimensions_str.split(",") if d.strip()]
|
|
192
|
+
|
|
193
|
+
variables[var_name] = NetCDFVariable(
|
|
194
|
+
name=var_name, data_type=data_type, dimensions=dimensions_list, attributes={}
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return var_name, {}
|
|
198
|
+
|
|
199
|
+
# Variable attribute: "time:units = "days since 1850-01-01 0:0:0.0" ;"
|
|
200
|
+
attr_match = re.match(r"\s*(\w+):(\w+)\s*=\s*(.+?)\s*;", line)
|
|
201
|
+
if attr_match and current_variable:
|
|
202
|
+
var_name = attr_match.group(1)
|
|
203
|
+
attr_name = attr_match.group(2)
|
|
204
|
+
attr_value = attr_match.group(3).strip()
|
|
205
|
+
|
|
206
|
+
# Remove quotes if present
|
|
207
|
+
if attr_value.startswith('"') and attr_value.endswith('"'):
|
|
208
|
+
attr_value = attr_value[1:-1]
|
|
209
|
+
|
|
210
|
+
# Try to convert to appropriate type
|
|
211
|
+
try:
|
|
212
|
+
if "." in attr_value or "e" in attr_value.lower():
|
|
213
|
+
attr_value = float(attr_value)
|
|
214
|
+
elif attr_value.isdigit() or (attr_value.startswith("-") and attr_value[1:].isdigit()):
|
|
215
|
+
attr_value = int(attr_value)
|
|
216
|
+
except ValueError:
|
|
217
|
+
pass # Keep as string
|
|
218
|
+
|
|
219
|
+
if var_name in variables:
|
|
220
|
+
variables[var_name].attributes[attr_name] = attr_value
|
|
221
|
+
|
|
222
|
+
return current_variable, current_variable_info
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def _parse_global_attribute_line(line: str, global_attributes: Dict[str, Any]):
|
|
226
|
+
"""Parse a global attribute line from ncdump output."""
|
|
227
|
+
# e.g., ':Conventions = "CF-1.7 CMIP-6.2" ;'
|
|
228
|
+
# e.g., ':forcing_index = 1 ;'
|
|
229
|
+
match = re.match(r"\s*:(\w+)\s*=\s*(.+?)\s*;", line)
|
|
230
|
+
if match:
|
|
231
|
+
attr_name = match.group(1)
|
|
232
|
+
attr_value = match.group(2).strip()
|
|
233
|
+
|
|
234
|
+
# Handle multiline strings
|
|
235
|
+
if attr_value.startswith('"') and not attr_value.endswith('"'):
|
|
236
|
+
# This is a multiline string, we'd need to handle continuation
|
|
237
|
+
attr_value = attr_value[1:] # Remove starting quote
|
|
238
|
+
elif attr_value.startswith('"') and attr_value.endswith('"'):
|
|
239
|
+
attr_value = attr_value[1:-1] # Remove both quotes
|
|
240
|
+
|
|
241
|
+
# Try to convert to appropriate type
|
|
242
|
+
try:
|
|
243
|
+
if (
|
|
244
|
+
attr_value.replace(".", "")
|
|
245
|
+
.replace("-", "")
|
|
246
|
+
.replace("e", "")
|
|
247
|
+
.replace("E", "")
|
|
248
|
+
.replace("+", "")
|
|
249
|
+
.isdigit()
|
|
250
|
+
):
|
|
251
|
+
if "." in attr_value or "e" in attr_value.lower() or "E" in attr_value:
|
|
252
|
+
attr_value = float(attr_value)
|
|
253
|
+
else:
|
|
254
|
+
attr_value = int(attr_value)
|
|
255
|
+
except (ValueError, AttributeError):
|
|
256
|
+
pass # Keep as string
|
|
257
|
+
|
|
258
|
+
global_attributes[attr_name] = attr_value
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class NetCDFHeaderParser:
|
|
262
|
+
"""
|
|
263
|
+
Utility class for parsing NetCDF headers from various sources.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def parse_from_ncdump(ncdump_output: str) -> NetCDFHeader:
|
|
268
|
+
"""
|
|
269
|
+
Parse NetCDF header from ncdump command output.
|
|
270
|
+
|
|
271
|
+
:param ncdump_output: Output from ncdump -h command
|
|
272
|
+
:return: NetCDFHeader instance
|
|
273
|
+
"""
|
|
274
|
+
return NetCDFHeader.from_ncdump_output(ncdump_output)
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def parse_from_file(filepath: str) -> NetCDFHeader:
|
|
278
|
+
"""
|
|
279
|
+
Parse NetCDF header directly from file using netCDF4 library.
|
|
280
|
+
|
|
281
|
+
Note: This is a placeholder for future implementation.
|
|
282
|
+
"""
|
|
283
|
+
raise NotImplementedError("Direct NetCDF file parsing not yet implemented")
|
|
284
|
+
|
|
285
|
+
@staticmethod
|
|
286
|
+
def validate_ncdump_format(ncdump_output: str) -> bool:
|
|
287
|
+
"""
|
|
288
|
+
Validate that the input looks like valid ncdump output.
|
|
289
|
+
|
|
290
|
+
:param ncdump_output: String to validate
|
|
291
|
+
:return: True if format looks valid
|
|
292
|
+
"""
|
|
293
|
+
lines = ncdump_output.strip().split("\n")
|
|
294
|
+
if not lines:
|
|
295
|
+
return False
|
|
296
|
+
|
|
297
|
+
# Check for netcdf header
|
|
298
|
+
if not lines[0].strip().startswith("netcdf "):
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
# Check for required sections
|
|
302
|
+
has_dimensions = any("dimensions:" in line for line in lines)
|
|
303
|
+
has_global_attrs = any("// global attributes:" in line for line in lines)
|
|
304
|
+
|
|
305
|
+
return has_dimensions or has_global_attrs
|
|
306
|
+
|