esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,315 @@
1
+ """
2
+ Example usage of the GA (Global Attributes) validator.
3
+
4
+ This script demonstrates how to use the GA validator to validate NetCDF global attributes
5
+ against CMIP project specifications using the esgvoc API.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+
11
+ from esgvoc.apps.ga.validator import GAValidator, validate_netcdf_attributes, create_validation_summary
12
+ from esgvoc.apps.ga.models import NetCDFHeaderParser
13
+
14
+
15
+ def example_validate_ncdump():
16
+ """
17
+ Example: Validate NetCDF global attributes from ncdump output.
18
+ """
19
+ # Sample ncdump output from the provided example
20
+ ncdump_output = """netcdf tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412 {
21
+ dimensions:
22
+ time = UNLIMITED ; // (1980 currently)
23
+ bnds = 2 ;
24
+ lat = 64 ;
25
+ lon = 128 ;
26
+ variables:
27
+ double time(time) ;
28
+ time:bounds = "time_bnds" ;
29
+ time:units = "days since 1850-01-01 0:0:0.0" ;
30
+ time:calendar = "365_day" ;
31
+ time:axis = "T" ;
32
+ time:long_name = "time" ;
33
+ time:standard_name = "time" ;
34
+ double time_bnds(time, bnds) ;
35
+ double lat(lat) ;
36
+ lat:bounds = "lat_bnds" ;
37
+ lat:units = "degrees_north" ;
38
+ lat:axis = "Y" ;
39
+ lat:long_name = "Latitude" ;
40
+ lat:standard_name = "latitude" ;
41
+ double lat_bnds(lat, bnds) ;
42
+ double lon(lon) ;
43
+ lon:bounds = "lon_bnds" ;
44
+ lon:units = "degrees_east" ;
45
+ lon:axis = "X" ;
46
+ lon:long_name = "Longitude" ;
47
+ lon:standard_name = "longitude" ;
48
+ double lon_bnds(lon, bnds) ;
49
+ double height ;
50
+ height:units = "m" ;
51
+ height:axis = "Z" ;
52
+ height:positive = "up" ;
53
+ height:long_name = "height" ;
54
+ height:standard_name = "height" ;
55
+ float tas(time, lat, lon) ;
56
+ tas:standard_name = "air_temperature" ;
57
+ tas:long_name = "Near-Surface Air Temperature" ;
58
+ tas:comment = "ST+273.16, CMIP_table_comment: near-surface (usually, 2 meter) air temperature" ;
59
+ tas:units = "K" ;
60
+ tas:original_name = "ST" ;
61
+ tas:history = "degctok 2019-04-30T17:44:13Z altered by CMOR: Treated scalar dimension: 'height'. 2019-04-30T17:44:13Z altered by CMOR: Reordered dimensions, original order: lat lon time. 2019-04-30T17:44:13Z altered by CMOR: replaced missing value flag (1e+38) with standard missing value (1e+20)." ;
62
+ tas:cell_methods = "area: time: mean" ;
63
+ tas:cell_measures = "area: areacella" ;
64
+ tas:coordinates = "height" ;
65
+ tas:missing_value = 1.e+20f ;
66
+ tas:_FillValue = 1.e+20f ;
67
+
68
+ // global attributes:
69
+ :CCCma_model_hash = "7e8e715f3f2ce47e1bab830db971c362ca329419" ;
70
+ :CCCma_parent_runid = "rc3.1-pictrl" ;
71
+ :CCCma_pycmor_hash = "33c30511acc319a98240633965a04ca99c26427e" ;
72
+ :CCCma_runid = "rc3.1-his11" ;
73
+ :Conventions = "CF-1.7 CMIP-6.2" ;
74
+ :YMDH_branch_time_in_child = "1850:01:01:00" ;
75
+ :YMDH_branch_time_in_parent = "5701:01:01:00" ;
76
+ :activity_id = "CMIP" ;
77
+ :branch_method = "Spin-up documentation" ;
78
+ :branch_time_in_child = 0. ;
79
+ :branch_time_in_parent = 1405615. ;
80
+ :contact = "ec.cccma.info-info.ccmac.ec@canada.ca" ;
81
+ :creation_date = "2019-04-30T17:44:13Z" ;
82
+ :data_specs_version = "01.00.29" ;
83
+ :experiment = "all-forcing simulation of the recent past" ;
84
+ :experiment_id = "historical" ;
85
+ :external_variables = "areacella" ;
86
+ :forcing_index = 1 ;
87
+ :frequency = "mon" ;
88
+ :further_info_url = "https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.historical.none.r11i1p1f1" ;
89
+ :grid = "T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa" ;
90
+ :grid_label = "gn" ;
91
+ :history = "2019-04-30T17:44:13Z ;rewrote data to be consistent with CMIP for variable tas found in table Amon.;\n",
92
+ "Output from $runid" ;
93
+ :initialization_index = 1 ;
94
+ :institution = "Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada" ;
95
+ :institution_id = "CCCma" ;
96
+ :mip_era = "CMIP6" ;
97
+ :nominal_resolution = "500 km" ;
98
+ :parent_activity_id = "CMIP" ;
99
+ :parent_experiment_id = "piControl" ;
100
+ :parent_mip_era = "CMIP6" ;
101
+ :parent_source_id = "CanESM5" ;
102
+ :parent_time_units = "days since 1850-01-01 0:0:0.0" ;
103
+ :parent_variant_label = "r1i1p1f1" ;
104
+ :physics_index = 1 ;
105
+ :product = "model-output" ;
106
+ :realization_index = 11 ;
107
+ :realm = "atmos" ;
108
+ :references = "Geophysical Model Development Special issue on CanESM5 (https://www.geosci-model-dev.net/special_issues.html)" ;
109
+ :source = "CanESM5 (2019): \\n",
110
+ "aerosol: interactive\\n",
111
+ "atmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\n",
112
+ "atmosChem: specified oxidants for aerosols\\n",
113
+ "land: CLASS3.6/CTEM1.2\\n",
114
+ "landIce: specified ice sheets\\n",
115
+ "ocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\n",
116
+ "ocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\n",
117
+ "seaIce: LIM2" ;
118
+ :source_id = "CanESM5" ;
119
+ :source_type = "AOGCM" ;
120
+ :sub_experiment = "none" ;
121
+ :sub_experiment_id = "none" ;
122
+ :table_id = "Amon" ;
123
+ :table_info = "Creation Date:(20 February 2019) MD5:374fbe5a2bcca535c40f7f23da271e49" ;
124
+ :title = "CanESM5 output prepared for CMIP6" ;
125
+ :tracking_id = "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e" ;
126
+ :variable_id = "tas" ;
127
+ :variant_label = "r11i1p1f1" ;
128
+ :version = "v20190429" ;
129
+ :license = "CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law." ;
130
+ :cmor_version = "3.4.0" ;
131
+ }"""
132
+
133
+ print("=== Example: Validating NetCDF Global Attributes ===")
134
+ print()
135
+
136
+ # Method 1: Using the convenience function
137
+ print("Method 1: Using convenience function")
138
+ report = validate_netcdf_attributes(
139
+ ncdump_output=ncdump_output,
140
+ project_id="cmip6",
141
+ filename="tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412.nc",
142
+ )
143
+
144
+ print(create_validation_summary(report))
145
+ print()
146
+
147
+ # Method 2: Using the GAValidator class directly
148
+ print("Method 2: Using GAValidator class")
149
+ validator = GAValidator(project_id="cmip6")
150
+
151
+ # List required attributes
152
+ print("Required attributes for CMIP6:")
153
+ for attr in validator.get_required_attributes():
154
+ print(f" • {attr}")
155
+ print()
156
+
157
+ # Get info about specific attributes
158
+ print("Information about 'activity_id' attribute:")
159
+ activity_info = validator.get_attribute_info("activity_id")
160
+ if activity_info:
161
+ for key, value in activity_info.items():
162
+ print(f" {key}: {value}")
163
+ print()
164
+
165
+ # Validate with detailed reporting
166
+ report2 = validator.validate_from_ncdump(ncdump_output, "example_file.nc")
167
+ print(f"Validation result: {report2.summary()}")
168
+
169
+ return report, report2
170
+
171
+
172
+ def example_validate_attributes_dict():
173
+ """
174
+ Example: Validate global attributes from a dictionary.
175
+ """
176
+ print("=== Example: Validating from Attributes Dictionary ===")
177
+ print()
178
+
179
+ # Sample attributes dictionary
180
+ attributes = {
181
+ "Conventions": "CF-1.7 CMIP-6.2",
182
+ "activity_id": "CMIP",
183
+ "creation_date": "2019-04-30T17:44:13Z",
184
+ "data_specs_version": "01.00.29",
185
+ "experiment_id": "historical",
186
+ "forcing_index": 1,
187
+ "frequency": "mon",
188
+ "grid_label": "gn",
189
+ "initialization_index": 1,
190
+ "institution_id": "CCCma",
191
+ "mip_era": "CMIP6",
192
+ "nominal_resolution": "500 km",
193
+ "physics_index": 1,
194
+ "realization_index": 11,
195
+ "source_id": "CanESM5",
196
+ "table_id": "Amon",
197
+ "tracking_id": "hdl:21.14100/3a32f67e-ae59-40d8-ae4a-2e03e922fe8e",
198
+ "variable_id": "tas",
199
+ "variant_label": "r11i1p1f1",
200
+ # Missing some required attributes to test validation
201
+ # "license": "...", # Optional attribute
202
+ }
203
+
204
+ validator = GAValidator(project_id="cmip6")
205
+ report = validator.validate_from_attributes_dict(attributes, "test_attributes.nc")
206
+
207
+ print(create_validation_summary(report))
208
+
209
+ return report
210
+
211
+
212
+ def example_parse_netcdf_header():
213
+ """
214
+ Example: Parse NetCDF header information.
215
+ """
216
+ print("=== Example: Parsing NetCDF Header ===")
217
+ print()
218
+
219
+ # Simple ncdump output for parsing
220
+ simple_ncdump = """netcdf test_file {
221
+ dimensions:
222
+ time = UNLIMITED ; // (12 currently)
223
+ lat = 180 ;
224
+ lon = 360 ;
225
+ variables:
226
+ double time(time) ;
227
+ time:units = "days since 1850-01-01" ;
228
+ time:calendar = "gregorian" ;
229
+ float temperature(time, lat, lon) ;
230
+ temperature:units = "K" ;
231
+ temperature:long_name = "Temperature" ;
232
+
233
+ // global attributes:
234
+ :Conventions = "CF-1.7" ;
235
+ :title = "Test NetCDF file" ;
236
+ :institution = "Test Institution" ;
237
+ :source = "Test Model" ;
238
+ :history = "Created for testing" ;
239
+ :comment = "This is a test file" ;
240
+ }"""
241
+
242
+ # Parse the header
243
+ header = NetCDFHeaderParser.parse_from_ncdump(simple_ncdump)
244
+
245
+ print(f"Filename: {header.filename}")
246
+ print(f"Dimensions: {len(header.dimensions)}")
247
+ for dim_name, dim in header.dimensions.items():
248
+ print(f" • {dim_name}: {dim.size} {'(unlimited)' if dim.is_unlimited else ''}")
249
+
250
+ print(f"Variables: {len(header.variables)}")
251
+ for var_name, var in header.variables.items():
252
+ print(f" • {var_name} ({var.data_type}): dims={var.dimensions}, attrs={len(var.attributes)}")
253
+
254
+ print(f"Global attributes: {len(header.global_attributes.attributes)}")
255
+ for attr_name, attr_value in header.global_attributes.attributes.items():
256
+ print(f" • {attr_name}: {attr_value}")
257
+
258
+ return header
259
+
260
+
261
+ def example_custom_config():
262
+ """
263
+ Example: Using custom YAML configuration.
264
+ """
265
+ print("=== Example: Custom Configuration ===")
266
+ print()
267
+
268
+ # Get the default config path
269
+ current_dir = Path(__file__).parent
270
+ config_path = current_dir / "attributes_specs.yaml"
271
+
272
+ print(f"Using configuration file: {config_path}")
273
+
274
+ if config_path.exists():
275
+ validator = GAValidator(config_path=str(config_path), project_id="cmip6")
276
+
277
+ print(f"Loaded {len(validator.list_attributes())} attribute specifications")
278
+ print("Attribute names:")
279
+ for attr in sorted(validator.list_attributes()):
280
+ info = validator.get_attribute_info(attr)
281
+ required = "required" if info and info.get("required") else "optional"
282
+ print(f" • {attr} ({required})")
283
+
284
+ return validator
285
+ else:
286
+ print(f"Configuration file not found: {config_path}")
287
+ return None
288
+
289
+
290
+ def main():
291
+ """
292
+ Run all examples.
293
+ """
294
+ try:
295
+ print("NetCDF Global Attributes Validator - Examples")
296
+ print("=" * 50)
297
+ print()
298
+
299
+ # Example 1: Validate from ncdump output
300
+ report1, report2 = example_validate_ncdump()
301
+
302
+ print()
303
+ print("=" * 50)
304
+ print("Examples completed successfully!")
305
+
306
+ except Exception as e:
307
+ print(f"Error running examples: {e}")
308
+ import traceback
309
+
310
+ traceback.print_exc()
311
+
312
+
313
+ if __name__ == "__main__":
314
+ main()
315
+
@@ -0,0 +1,47 @@
1
+ """
2
+ GA (Global Attributes) models package.
3
+
4
+ This package provides Pydantic models for validating NetCDF global attributes
5
+ against project specifications using the esgvoc API.
6
+ """
7
+
8
+ # Import from project_specs for attribute models
9
+ from esgvoc.api.project_specs import AttributeProperty, AttributeSpecification
10
+
11
+ from .netcdf_header import (
12
+ NetCDFDimension,
13
+ NetCDFVariable,
14
+ NetCDFGlobalAttributes as NetCDFGlobalAttributesNew,
15
+ NetCDFHeader,
16
+ NetCDFHeaderParser,
17
+ )
18
+
19
+ from .validator import (
20
+ ValidationSeverity,
21
+ ValidationIssue,
22
+ ValidationReport,
23
+ ESGVocAttributeValidator,
24
+ GlobalAttributeValidator as GlobalAttributeValidatorNew,
25
+ ValidatorFactory,
26
+ )
27
+
28
+
29
+ # Build __all__ dynamically based on available modules
30
+ __all__ = [
31
+ # Attribute specification models from project_specs
32
+ "AttributeProperty",
33
+ "AttributeSpecification",
34
+ # NetCDF header models
35
+ "NetCDFDimension",
36
+ "NetCDFVariable",
37
+ "NetCDFGlobalAttributesNew",
38
+ "NetCDFHeader",
39
+ "NetCDFHeaderParser",
40
+ # Validation models
41
+ "ValidationSeverity",
42
+ "ValidationIssue",
43
+ "ValidationReport",
44
+ "ESGVocAttributeValidator",
45
+ "GlobalAttributeValidatorNew",
46
+ "ValidatorFactory",
47
+ ]
@@ -0,0 +1,306 @@
1
+ """
2
+ Models for parsing and validating NetCDF headers.
3
+
4
+ This module provides Pydantic models to parse NetCDF header information
5
+ and validate global attributes against project specifications.
6
+ """
7
+
8
+ import re
9
+ from typing import Any, Union, Optional, Dict, List
10
+ from pydantic import BaseModel, Field, field_validator, ValidationError
11
+
12
+ from esgvoc.api.data_descriptors.data_descriptor import ConfiguredBaseModel
13
+
14
+
15
+ class NetCDFDimension(ConfiguredBaseModel):
16
+ """
17
+ Represents a NetCDF dimension.
18
+ """
19
+
20
+ name: str = Field(..., description="Dimension name")
21
+ size: Union[int, str] = Field(..., description="Dimension size (int or 'UNLIMITED')")
22
+ is_unlimited: bool = Field(default=False, description="Whether this is an unlimited dimension")
23
+
24
+ @field_validator("is_unlimited", mode="before")
25
+ @classmethod
26
+ def check_unlimited(cls, v, info):
27
+ """Check if dimension is unlimited based on size."""
28
+ if info.data.get("size") == "UNLIMITED":
29
+ return True
30
+ return v
31
+
32
+
33
+ class NetCDFVariable(ConfiguredBaseModel):
34
+ """
35
+ Represents a NetCDF variable with its attributes.
36
+ """
37
+
38
+ name: str = Field(..., description="Variable name")
39
+ dimensions: List[str] = Field(default_factory=list, description="Variable dimensions")
40
+ data_type: str = Field(..., description="Variable data type (e.g., float, double)")
41
+ attributes: Dict[str, Any] = Field(default_factory=dict, description="Variable attributes")
42
+
43
+
44
+ class NetCDFGlobalAttributes(ConfiguredBaseModel):
45
+ """
46
+ Container for NetCDF global attributes.
47
+ """
48
+
49
+ attributes: Dict[str, Union[str, int, float, List[Any]]] = Field(
50
+ default_factory=dict, description="Dictionary of global attributes"
51
+ )
52
+
53
+ def get_attribute(self, name: str) -> Optional[Any]:
54
+ """
55
+ Get a global attribute by name.
56
+
57
+ :param name: Attribute name
58
+ :return: Attribute value or None if not found
59
+ """
60
+ return self.attributes.get(name)
61
+
62
+ def get_string_attribute(self, name: str) -> Optional[str]:
63
+ """
64
+ Get a global attribute as string.
65
+
66
+ :param name: Attribute name
67
+ :return: Attribute value as string or None if not found
68
+ """
69
+ value = self.get_attribute(name)
70
+ return str(value) if value is not None else None
71
+
72
+ def has_attribute(self, name: str) -> bool:
73
+ """
74
+ Check if global attribute exists.
75
+
76
+ :param name: Attribute name
77
+ :return: True if attribute exists
78
+ """
79
+ return name in self.attributes
80
+
81
+ def list_attributes(self) -> List[str]:
82
+ """
83
+ List all global attribute names.
84
+
85
+ :return: List of attribute names
86
+ """
87
+ return list(self.attributes.keys())
88
+
89
+
90
+ class NetCDFHeader(ConfiguredBaseModel):
91
+ """
92
+ Complete NetCDF header information including dimensions, variables, and global attributes.
93
+ """
94
+
95
+ filename: Optional[str] = Field(default=None, description="NetCDF filename")
96
+ dimensions: Dict[str, NetCDFDimension] = Field(default_factory=dict, description="File dimensions")
97
+ variables: Dict[str, NetCDFVariable] = Field(default_factory=dict, description="File variables")
98
+ global_attributes: NetCDFGlobalAttributes = Field(
99
+ default_factory=NetCDFGlobalAttributes, description="Global attributes"
100
+ )
101
+
102
+ @classmethod
103
+ def from_ncdump_output(cls, ncdump_output: str) -> "NetCDFHeader":
104
+ """
105
+ Parse NetCDF header from ncdump command output.
106
+
107
+ :param ncdump_output: Output from ncdump -h command
108
+ :return: NetCDFHeader instance
109
+ """
110
+ lines = ncdump_output.strip().split("\n")
111
+
112
+ # Extract filename from first line (e.g., "netcdf tas_Amon_CanESM5_historical_r11i1p1f1_gn_185001-201412 {")
113
+ filename_match = re.match(r"netcdf\s+(.+?)\s*\{", lines[0])
114
+ filename = filename_match.group(1) if filename_match else None
115
+
116
+ dimensions = {}
117
+ variables = {}
118
+ global_attributes = {}
119
+
120
+ current_section = None
121
+ current_variable = None
122
+ current_variable_info = {}
123
+
124
+ for line in lines[1:]:
125
+ line = line.strip()
126
+
127
+ if not line or line == "}":
128
+ continue
129
+
130
+ # Section headers
131
+ if line.startswith("dimensions:"):
132
+ current_section = "dimensions"
133
+ continue
134
+ elif line.startswith("variables:"):
135
+ current_section = "variables"
136
+ continue
137
+ elif line.startswith("// global attributes:"):
138
+ current_section = "global_attributes"
139
+ continue
140
+ elif line.startswith("data:"):
141
+ break # We don't need data section
142
+
143
+ # Parse based on current section
144
+ if current_section == "dimensions":
145
+ cls._parse_dimension_line(line, dimensions)
146
+ elif current_section == "variables":
147
+ result = cls._parse_variable_line(line, variables, current_variable, current_variable_info)
148
+ if result:
149
+ current_variable, current_variable_info = result
150
+ elif current_section == "global_attributes":
151
+ cls._parse_global_attribute_line(line, global_attributes)
152
+
153
+ return cls(
154
+ filename=filename,
155
+ dimensions=dimensions,
156
+ variables=variables,
157
+ global_attributes=NetCDFGlobalAttributes(attributes=global_attributes),
158
+ )
159
+
160
+ @staticmethod
161
+ def _parse_dimension_line(line: str, dimensions: Dict[str, NetCDFDimension]):
162
+ """Parse a dimension line from ncdump output."""
163
+ # e.g., "time = UNLIMITED ; // (1980 currently)"
164
+ # e.g., "lat = 64 ;"
165
+ match = re.match(r"\s*(\w+)\s*=\s*(.+?)\s*;", line)
166
+ if match:
167
+ dim_name = match.group(1)
168
+ dim_size_str = match.group(2).split("//")[0].strip()
169
+
170
+ if dim_size_str == "UNLIMITED":
171
+ dimensions[dim_name] = NetCDFDimension(name=dim_name, size="UNLIMITED", is_unlimited=True)
172
+ else:
173
+ try:
174
+ size = int(dim_size_str)
175
+ dimensions[dim_name] = NetCDFDimension(name=dim_name, size=size)
176
+ except ValueError:
177
+ pass # Skip malformed dimension lines
178
+
179
+ @staticmethod
180
+ def _parse_variable_line(
181
+ line: str, variables: Dict[str, NetCDFVariable], current_variable: Optional[str], current_variable_info: Dict
182
+ ) -> Optional[tuple]:
183
+ """Parse a variable line from ncdump output."""
184
+ # Variable declaration: "double time(time) ;"
185
+ var_decl_match = re.match(r"\s*(\w+)\s+(\w+)\s*\(([^)]*)\)\s*;", line)
186
+ if var_decl_match:
187
+ data_type = var_decl_match.group(1)
188
+ var_name = var_decl_match.group(2)
189
+ dimensions_str = var_decl_match.group(3)
190
+
191
+ dimensions_list = [d.strip() for d in dimensions_str.split(",") if d.strip()]
192
+
193
+ variables[var_name] = NetCDFVariable(
194
+ name=var_name, data_type=data_type, dimensions=dimensions_list, attributes={}
195
+ )
196
+
197
+ return var_name, {}
198
+
199
+ # Variable attribute: "time:units = "days since 1850-01-01 0:0:0.0" ;"
200
+ attr_match = re.match(r"\s*(\w+):(\w+)\s*=\s*(.+?)\s*;", line)
201
+ if attr_match and current_variable:
202
+ var_name = attr_match.group(1)
203
+ attr_name = attr_match.group(2)
204
+ attr_value = attr_match.group(3).strip()
205
+
206
+ # Remove quotes if present
207
+ if attr_value.startswith('"') and attr_value.endswith('"'):
208
+ attr_value = attr_value[1:-1]
209
+
210
+ # Try to convert to appropriate type
211
+ try:
212
+ if "." in attr_value or "e" in attr_value.lower():
213
+ attr_value = float(attr_value)
214
+ elif attr_value.isdigit() or (attr_value.startswith("-") and attr_value[1:].isdigit()):
215
+ attr_value = int(attr_value)
216
+ except ValueError:
217
+ pass # Keep as string
218
+
219
+ if var_name in variables:
220
+ variables[var_name].attributes[attr_name] = attr_value
221
+
222
+ return current_variable, current_variable_info
223
+
224
+ @staticmethod
225
+ def _parse_global_attribute_line(line: str, global_attributes: Dict[str, Any]):
226
+ """Parse a global attribute line from ncdump output."""
227
+ # e.g., ':Conventions = "CF-1.7 CMIP-6.2" ;'
228
+ # e.g., ':forcing_index = 1 ;'
229
+ match = re.match(r"\s*:(\w+)\s*=\s*(.+?)\s*;", line)
230
+ if match:
231
+ attr_name = match.group(1)
232
+ attr_value = match.group(2).strip()
233
+
234
+ # Handle multiline strings
235
+ if attr_value.startswith('"') and not attr_value.endswith('"'):
236
+ # This is a multiline string, we'd need to handle continuation
237
+ attr_value = attr_value[1:] # Remove starting quote
238
+ elif attr_value.startswith('"') and attr_value.endswith('"'):
239
+ attr_value = attr_value[1:-1] # Remove both quotes
240
+
241
+ # Try to convert to appropriate type
242
+ try:
243
+ if (
244
+ attr_value.replace(".", "")
245
+ .replace("-", "")
246
+ .replace("e", "")
247
+ .replace("E", "")
248
+ .replace("+", "")
249
+ .isdigit()
250
+ ):
251
+ if "." in attr_value or "e" in attr_value.lower() or "E" in attr_value:
252
+ attr_value = float(attr_value)
253
+ else:
254
+ attr_value = int(attr_value)
255
+ except (ValueError, AttributeError):
256
+ pass # Keep as string
257
+
258
+ global_attributes[attr_name] = attr_value
259
+
260
+
261
+ class NetCDFHeaderParser:
262
+ """
263
+ Utility class for parsing NetCDF headers from various sources.
264
+ """
265
+
266
+ @staticmethod
267
+ def parse_from_ncdump(ncdump_output: str) -> NetCDFHeader:
268
+ """
269
+ Parse NetCDF header from ncdump command output.
270
+
271
+ :param ncdump_output: Output from ncdump -h command
272
+ :return: NetCDFHeader instance
273
+ """
274
+ return NetCDFHeader.from_ncdump_output(ncdump_output)
275
+
276
+ @staticmethod
277
+ def parse_from_file(filepath: str) -> NetCDFHeader:
278
+ """
279
+ Parse NetCDF header directly from file using netCDF4 library.
280
+
281
+ Note: This is a placeholder for future implementation.
282
+ """
283
+ raise NotImplementedError("Direct NetCDF file parsing not yet implemented")
284
+
285
+ @staticmethod
286
+ def validate_ncdump_format(ncdump_output: str) -> bool:
287
+ """
288
+ Validate that the input looks like valid ncdump output.
289
+
290
+ :param ncdump_output: String to validate
291
+ :return: True if format looks valid
292
+ """
293
+ lines = ncdump_output.strip().split("\n")
294
+ if not lines:
295
+ return False
296
+
297
+ # Check for netcdf header
298
+ if not lines[0].strip().startswith("netcdf "):
299
+ return False
300
+
301
+ # Check for required sections
302
+ has_dimensions = any("dimensions:" in line for line in lines)
303
+ has_global_attrs = any("// global attributes:" in line for line in lines)
304
+
305
+ return has_dimensions or has_global_attrs
306
+