climate-ref-pmp 0.7.0__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/PKG-INFO +1 -1
  2. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/pyproject.toml +1 -1
  3. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/annual_cycle.py +213 -69
  4. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/variability_modes.py +6 -1
  5. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/pmp_driver.py +3 -1
  6. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_annual_cycle.py +1 -1
  7. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/.gitignore +0 -0
  8. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/LICENCE +0 -0
  9. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/NOTICE +0 -0
  10. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/README.md +0 -0
  11. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/conftest.py +0 -0
  12. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/__init__.py +0 -0
  13. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/dataset_registry/pmp_climatology.txt +0 -0
  14. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/__init__.py +0 -0
  15. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/diagnostics/enso.py +0 -0
  16. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/drivers/enso_driver.py +0 -0
  17. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/params/pmp_param_MoV-psl.py +0 -0
  18. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/params/pmp_param_MoV-ts.py +0 -0
  19. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/params/pmp_param_annualcycle_1-clims.py +0 -0
  20. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/params/pmp_param_annualcycle_2-metrics.py +0 -0
  21. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/py.typed +0 -0
  22. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/requirements/conda-lock.yml +0 -0
  23. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/src/climate_ref_pmp/requirements/environment.yml +0 -0
  24. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/integration/test_diagnostics.py +0 -0
  25. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/conftest.py +0 -0
  26. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_enso.py +0 -0
  27. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_pmp_driver.py +0 -0
  28. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_provider.py +0 -0
  29. {climate_ref_pmp-0.7.0 → climate_ref_pmp-0.8.1}/tests/unit/test_variability_modes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-pmp
3
- Version: 0.7.0
3
+ Version: 0.8.1
4
4
  Summary: PMP diagnostic provider for the Rapid Evaluation Framework
5
5
  Author-email: Jiwoo Lee <jwlee@llnl.gov>, Jared Lewis <jared.lewis@climate-resource.com>
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climate-ref-pmp"
3
- version = "0.7.0"
3
+ version = "0.8.1"
4
4
  description = "PMP diagnostic provider for the Rapid Evaluation Framework"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import json
3
+ from pathlib import Path
3
4
  from typing import Any
4
5
 
5
6
  from loguru import logger
@@ -14,6 +15,10 @@ from climate_ref_core.diagnostics import (
14
15
  from climate_ref_core.pycmec.metric import remove_dimensions
15
16
  from climate_ref_pmp.pmp_driver import build_glob_pattern, build_pmp_command, process_json_result
16
17
 
18
+ # =================================================================
19
+ # PMP diagnostics support functions for the annual cycle diagnostic
20
+ # =================================================================
21
+
17
22
 
18
23
  def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequirement, DataRequirement]:
19
24
  """
@@ -43,7 +48,7 @@ def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequir
43
48
  FacetFilter(
44
49
  facets={
45
50
  "frequency": "mon",
46
- "experiment_id": ("amip", "historical", "hist-GHG", "piControl"),
51
+ "experiment_id": ("amip", "historical", "hist-GHG"),
47
52
  "variable_id": (variable_id,),
48
53
  }
49
54
  ),
@@ -53,6 +58,159 @@ def make_data_requirement(variable_id: str, obs_source: str) -> tuple[DataRequir
53
58
  )
54
59
 
55
60
 
61
+ def _transform_results(data: dict[str, Any]) -> dict[str, Any]:
62
+ """
63
+ Transform the executions dictionary to match the expected structure.
64
+
65
+ Parameters
66
+ ----------
67
+ data : dict
68
+ The original execution dictionary.
69
+
70
+ Returns
71
+ -------
72
+ dict
73
+ The transformed executions dictionary.
74
+ """
75
+ # Remove the model, reference, rip dimensions
76
+ # These are later replaced with a REF-specific naming convention
77
+ data = remove_dimensions(data, ["model", "reference", "rip"])
78
+
79
+ # TODO: replace this with the ability to capture series
80
+ # Remove the "CalendarMonths" key from the nested structure
81
+ for region, region_values in data["RESULTS"].items():
82
+ for stat, stat_values in region_values.items():
83
+ if "CalendarMonths" in stat_values:
84
+ stat_values.pop("CalendarMonths")
85
+
86
+ # Remove the "CalendarMonths" key from the nested structure in "DIMENSIONS"
87
+ data["DIMENSIONS"]["season"].pop("CalendarMonths")
88
+
89
+ return data
90
+
91
+
92
+ def transform_results_files(results_files: list[Any]) -> list[Any]:
93
+ """
94
+ Transform the results files to match the expected structure.
95
+
96
+ Parameters
97
+ ----------
98
+ results_files : list
99
+ List of result files to transform.
100
+
101
+ Returns
102
+ -------
103
+ list
104
+ List of transformed result files.
105
+
106
+ """
107
+ if len(results_files) == 0:
108
+ logger.warning("No results files provided for transformation.")
109
+ return []
110
+
111
+ transformed_results_files = []
112
+
113
+ for results_file in results_files:
114
+ # Rewrite the CMEC JSON file for compatibility
115
+ with open(results_file) as f:
116
+ results = json.load(f)
117
+ results_transformed = _transform_results(results)
118
+
119
+ # Get the stem (filename without extension)
120
+ stem = results_file.stem
121
+
122
+ # Create the new filename
123
+ results_file_transformed = results_file.with_name(f"{stem}_transformed.json")
124
+
125
+ with open(results_file_transformed, "w") as f:
126
+ # Write the transformed executions back to the file
127
+ json.dump(results_transformed, f, indent=4)
128
+ logger.debug(f"Transformed executions written to {results_file_transformed}")
129
+
130
+ transformed_results_files.append(results_file_transformed)
131
+
132
+ return transformed_results_files
133
+
134
+
135
+ def _update_top_level_keys(combined_results: dict[str, Any], data: dict[str, Any], levels: list[str]) -> None:
136
+ if "DIMENSIONS" not in data:
137
+ data["DIMENSIONS"] = {}
138
+
139
+ top_level_keys = list(data.keys())
140
+ top_level_keys.remove("RESULTS")
141
+
142
+ json_structure = data.get("DIMENSIONS", {}).get("json_structure", {})
143
+ json_structure = ["level", *json_structure]
144
+
145
+ for key in top_level_keys:
146
+ combined_results[key] = data[key]
147
+ if key == "Variable":
148
+ combined_results[key]["level"] = levels
149
+ elif key == "DIMENSIONS":
150
+ combined_results[key]["json_structure"] = json_structure
151
+ if "level" not in combined_results[key]:
152
+ combined_results[key]["level"] = {}
153
+ for level in levels:
154
+ combined_results[key]["level"][level] = {}
155
+
156
+
157
+ def combine_results_files(results_files: list[Any], output_directory: str | Path) -> Path:
158
+ """
159
+ Combine multiple results files into a single file.
160
+
161
+ Parameters
162
+ ----------
163
+ results_files : list
164
+ List of result files to combine.
165
+ output_directory : str or Path
166
+ Directory where the combined file will be saved.
167
+
168
+ Returns
169
+ -------
170
+ Path, list[str]
171
+ The path to the combined results file and a list of levels found in the results files.
172
+ """
173
+ combined_results: dict[str, dict[str, dict[str, dict[str, dict[str, Any]]]]] = {}
174
+ combined_results["RESULTS"] = {}
175
+ levels = []
176
+
177
+ # Ensure output_directory is a Path object
178
+ if isinstance(output_directory, str):
179
+ output_directory = Path(output_directory)
180
+
181
+ last_data = None
182
+ for file in results_files:
183
+ with open(file) as f:
184
+ data = json.load(f)
185
+ last_data = data
186
+ level_key = str(int(data["Variable"]["level"]))
187
+ levels.append(level_key)
188
+ logger.debug(f"Processing file: {file}, level_key: {level_key}")
189
+ # Insert the results into the combined_results dictionary
190
+ if level_key not in combined_results["RESULTS"]:
191
+ combined_results["RESULTS"][level_key] = data.get("RESULTS", {})
192
+
193
+ if last_data is not None:
194
+ _update_top_level_keys(combined_results, last_data, levels)
195
+
196
+ # Ensure the output directory exists
197
+ output_directory.mkdir(parents=True, exist_ok=True)
198
+
199
+ # Create the combined file path
200
+ combined_file_path = output_directory / "combined_results.json"
201
+
202
+ with open(combined_file_path, "w") as f:
203
+ json.dump(combined_results, f, indent=4)
204
+
205
+ # return combined_file_path, levels
206
+ return combined_file_path
207
+
208
+
209
+ # ===================================================
210
+ # PMP diagnostics main class: annual cycle diagnostic
211
+ # ===================================================
212
+
213
+
56
214
  class AnnualCycle(CommandLineDiagnostic):
57
215
  """
58
216
  Calculate the annual cycle for a dataset
@@ -72,10 +230,17 @@ class AnnualCycle(CommandLineDiagnostic):
72
230
  )
73
231
 
74
232
  data_requirements = (
233
+ # ERA-5 as reference dataset, spatial 2-D variables
75
234
  make_data_requirement("ts", "ERA-5"),
76
235
  make_data_requirement("uas", "ERA-5"),
77
236
  make_data_requirement("vas", "ERA-5"),
78
237
  make_data_requirement("psl", "ERA-5"),
238
+ # ERA-5 as reference dataset, spatial 3-D variables
239
+ make_data_requirement("ta", "ERA-5"),
240
+ make_data_requirement("ua", "ERA-5"),
241
+ make_data_requirement("va", "ERA-5"),
242
+ make_data_requirement("zg", "ERA-5"),
243
+ # Other reference datasets, spatial 2-D variables
79
244
  make_data_requirement("pr", "GPCP-Monthly-3-2"),
80
245
  make_data_requirement("rlds", "CERES-EBAF-4-2"),
81
246
  make_data_requirement("rlus", "CERES-EBAF-4-2"),
@@ -105,10 +270,6 @@ class AnnualCycle(CommandLineDiagnostic):
105
270
  """
106
271
  input_datasets = definition.datasets[SourceDatasetType.CMIP6]
107
272
  reference_datasets = definition.datasets[SourceDatasetType.PMPClimatology]
108
- selector = input_datasets.selector_dict()
109
- reference_selector = reference_datasets.selector_dict()
110
- logger.debug(f"selector: {selector}")
111
- logger.debug(f"reference selector: {reference_selector}")
112
273
 
113
274
  source_id = input_datasets["source_id"].unique()[0]
114
275
  experiment_id = input_datasets["experiment_id"].unique()[0]
@@ -159,10 +320,9 @@ class AnnualCycle(CommandLineDiagnostic):
159
320
  )
160
321
  )
161
322
 
162
- # ----------------------------------------------
323
+ # --------------------------------------------------
163
324
  # PART 2: Build the command to calculate diagnostics
164
- # ----------------------------------------------
165
-
325
+ # --------------------------------------------------
166
326
  # Reference
167
327
  obs_dict = {
168
328
  variable_id: {
@@ -179,13 +339,32 @@ class AnnualCycle(CommandLineDiagnostic):
179
339
 
180
340
  date = datetime.datetime.now().strftime("%Y%m%d")
181
341
 
342
+ if variable_id in ["ua", "va", "ta"]:
343
+ levels = ["200", "850"]
344
+ elif variable_id in ["zg"]:
345
+ levels = ["500"]
346
+ else:
347
+ levels = None
348
+
349
+ variables = []
350
+ if levels is not None:
351
+ for level in levels:
352
+ variable_id_with_level = f"{variable_id}-{level}"
353
+ variables.append(variable_id_with_level)
354
+ else:
355
+ variables = [variable_id]
356
+
357
+ logger.debug(f"variables: {variables}")
358
+ logger.debug(f"levels: {levels}")
359
+
360
+ # Build the command for each level
182
361
  params = {
183
- "vars": variable_id,
362
+ "vars": variables,
184
363
  "custom_observations": f"{output_directory_path}/obs_dict.json",
185
364
  "test_data_path": output_directory_path,
186
365
  "test_data_set": source_id,
187
366
  "realization": member_id,
188
- "filename_template": f"{variable_id}_{data_name}_clims.198101-200512.AC.v{date}.nc",
367
+ "filename_template": f"%(variable)_{data_name}_clims.198101-200512.AC.v{date}.nc",
189
368
  "metrics_output_path": output_directory_path,
190
369
  "cmec": "",
191
370
  }
@@ -198,6 +377,9 @@ class AnnualCycle(CommandLineDiagnostic):
198
377
  )
199
378
  )
200
379
 
380
+ logger.debug("build_cmd end")
381
+ logger.debug(f"cmds: {cmds}")
382
+
201
383
  return cmds
202
384
 
203
385
  def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
@@ -216,46 +398,39 @@ class AnnualCycle(CommandLineDiagnostic):
216
398
  input_datasets = definition.datasets[SourceDatasetType.CMIP6]
217
399
  variable_id = input_datasets["variable_id"].unique()[0]
218
400
 
401
+ if variable_id in ["ua", "va", "ta"]:
402
+ variable_dir_pattern = f"{variable_id}-???"
403
+ else:
404
+ variable_dir_pattern = variable_id
405
+
219
406
  results_directory = definition.output_directory
220
- png_directory = results_directory / variable_id
221
- data_directory = results_directory / variable_id
407
+ png_directory = results_directory / variable_dir_pattern
408
+ data_directory = results_directory / variable_dir_pattern
222
409
 
223
410
  logger.debug(f"results_directory: {results_directory}")
224
411
  logger.debug(f"png_directory: {png_directory}")
225
412
  logger.debug(f"data_directory: {data_directory}")
226
413
 
227
- # Find the executions file
228
- results_files = list(results_directory.glob("*_cmec.json"))
229
- if len(results_files) != 1: # pragma: no cover
230
- logger.error(f"More than one or no cmec file found: {results_files}")
231
- return ExecutionResult.build_from_failure(definition)
232
- else:
414
+ # Find the CMEC JSON file(s)
415
+ results_files = transform_results_files(list(results_directory.glob("*_cmec.json")))
416
+
417
+ if len(results_files) == 1:
418
+ # If only one file, use it directly
233
419
  results_file = results_files[0]
234
420
  logger.debug(f"results_file: {results_file}")
421
+ elif len(results_files) > 1:
422
+ logger.info(f"More than one cmec file found: {results_files}")
423
+ results_file = combine_results_files(results_files, definition.output_directory)
424
+ else:
425
+ logger.error("Unexpected case: no cmec file found")
426
+ return ExecutionResult.build_from_failure(definition)
235
427
 
236
- # Rewrite executions file for compatibility
237
- with open(results_file) as f:
238
- results = json.load(f)
239
- results_transformed = _transform_results(results)
240
-
241
- # Get the stem (filename without extension)
242
- stem = results_file.stem
243
-
244
- # Create the new filename
245
- results_file_transformed = results_file.with_name(f"{stem}_transformed.json")
246
-
247
- with open(results_file_transformed, "w") as f:
248
- # Write the transformed executions back to the file
249
- json.dump(results_transformed, f, indent=4)
250
- logger.debug(f"Transformed executions written to {results_file_transformed}")
251
-
252
- # Find the other outputs
428
+ # Find the other outputs: PNG and NetCDF files
253
429
  png_files = list(png_directory.glob("*.png"))
254
430
  data_files = list(data_directory.glob("*.nc"))
255
431
 
256
- cmec_output_bundle, cmec_metric_bundle = process_json_result(
257
- results_file_transformed, png_files, data_files
258
- )
432
+ # Prepare the output bundles
433
+ cmec_output_bundle, cmec_metric_bundle = process_json_result(results_file, png_files, data_files)
259
434
 
260
435
  # Add missing dimensions to the output
261
436
  input_selectors = input_datasets.selector_dict()
@@ -294,34 +469,3 @@ class AnnualCycle(CommandLineDiagnostic):
294
469
 
295
470
  runs = [self.provider.run(cmd) for cmd in cmds]
296
471
  logger.debug(f"runs: {runs}")
297
-
298
-
299
- def _transform_results(data: dict[str, Any]) -> dict[str, Any]:
300
- """
301
- Transform the executions dictionary to match the expected structure.
302
-
303
- Parameters
304
- ----------
305
- data : dict
306
- The original execution dictionary.
307
-
308
- Returns
309
- -------
310
- dict
311
- The transformed executions dictionary.
312
- """
313
- # Remove the model, reference, rip dimensions
314
- # These are later replaced with a REF-specific naming convention
315
- data = remove_dimensions(data, ["model", "reference", "rip"])
316
-
317
- # TODO: replace this with the ability to capture series
318
- # Remove the "CalendarMonths" key from the nested structure
319
- for region, region_values in data["RESULTS"].items():
320
- for stat, stat_values in region_values.items():
321
- if "CalendarMonths" in stat_values:
322
- stat_values.pop("CalendarMonths")
323
-
324
- # Remove the "CalendarMonths" key from the nested structure in "DIMENSIONS"
325
- data["DIMENSIONS"]["season"].pop("CalendarMonths")
326
-
327
- return data
@@ -50,7 +50,7 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
50
50
  FacetFilter(
51
51
  facets={
52
52
  "frequency": "mon",
53
- "experiment_id": ("historical", "hist-GHG", "piControl", *extra_experiments),
53
+ "experiment_id": ("historical", "hist-GHG", *extra_experiments),
54
54
  "variable_id": model_variable,
55
55
  }
56
56
  )
@@ -149,6 +149,11 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
149
149
  params["osyear"] = 1950
150
150
  params["oeyear"] = 2005
151
151
 
152
+ if self.mode_id in ["NPO", "NPGO"]:
153
+ params["eofn_obs"] = 2
154
+ params["eofn_mod"] = 2
155
+ params["eofn_mod_max"] = 2
156
+
152
157
  # Pass the parameters using **kwargs
153
158
  return build_pmp_command(
154
159
  driver_file="variability_modes_driver.py",
@@ -169,7 +169,9 @@ def build_pmp_command(
169
169
  # Loop through additional arguments if they exist
170
170
  if kwargs: # pragma: no cover
171
171
  for key, value in kwargs.items():
172
- if value:
172
+ if isinstance(value, list):
173
+ cmd.extend([f"--{key}"] + [str(v) for v in value])
174
+ elif value:
173
175
  cmd.extend([f"--{key}", str(value)])
174
176
  else:
175
177
  cmd.extend([f"--{key}"])
@@ -173,7 +173,7 @@ def test_annual_cycle_diagnostic(
173
173
  "--realization",
174
174
  member_id,
175
175
  "--filename_template",
176
- f"{variable_id}_{source_id}_historical_{member_id}_clims.198101-200512.AC.v{datecode}.nc",
176
+ f"%(variable)_{source_id}_historical_{member_id}_clims.198101-200512.AC.v{datecode}.nc",
177
177
  "--metrics_output_path",
178
178
  str(output_dir),
179
179
  "--cmec",
File without changes
File without changes