climate-ref-pmp 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,458 @@
1
+ """
2
+ ENSO diagnostic driver
3
+
4
+ This script runs inside the PMP conda environment due to the use of xcdat.
5
+ """
6
+
7
+ import argparse
8
+ import copy
9
+ import json
10
+ import os
11
+ from collections import defaultdict
12
+
13
+ import xcdat as xc
14
+
15
+ from pcmdi_metrics import resources # isort:skip
16
+ from pcmdi_metrics.enso.lib import metrics_to_json # isort:skip
17
+ from pcmdi_metrics.io import StringConstructor # isort:skip
18
+ from pcmdi_metrics.utils import create_land_sea_mask
19
+
20
+ from EnsoMetrics.EnsoCollectionsLib import defCollection # isort:skip
21
+ from EnsoMetrics.EnsoComputeMetricsLib import ComputeCollection # isort:skip
22
+ from EnsoPlots.EnsoMetricPlot import main_plotter # isort:skip
23
+
24
+
25
+ def main():
26
+ """
27
+ Run the ENSO metrics collection and plotting.
28
+
29
+ This script is designed to be run from the command line.
30
+ It takes two command line arguments:
31
+ 1. input_json_path: Path to the JSON file containing the datasets.
32
+ 2. output_directory: Directory where the output files will be saved.
33
+ """
34
+ print("### PMP ENSO: Compute the metric collection ###\n")
35
+
36
+ args = parse_arguments()
37
+ dict_datasets, mod, run, pattern = prepare_datasets(args)
38
+ dict_metric, dict_dive = compute_metrics(args, dict_datasets, mod, run, pattern)
39
+ save_metrics_to_json(args, dict_datasets, dict_metric, dict_dive, pattern)
40
+ plot_results(args, pattern, mod, run)
41
+
42
+
43
+ def parse_arguments():
44
+ """Parse command-line arguments."""
45
+ parser = argparse.ArgumentParser(description="A script that takes two inputs and processes them.")
46
+ parser.add_argument("--metrics_collection", type=str, help="metrics collection")
47
+ parser.add_argument("--experiment_id", type=str, help="experiment id")
48
+ parser.add_argument("--input_json_path", type=str, help="JSON file path")
49
+ parser.add_argument("--output_directory", type=str, help="output directory")
50
+ return parser.parse_args()
51
+
52
+
53
+ def prepare_datasets(args):
54
+ """Prepare datasets and update them with land-sea masks."""
55
+ os.makedirs(args.output_directory, exist_ok=True)
56
+ with open(args.input_json_path) as f:
57
+ dict_datasets = json.load(f)
58
+ mod_run = next(iter(dict_datasets["model"].keys()))
59
+ mod, run = mod_run.split("_")
60
+ pattern = f"{args.metrics_collection}_{mod}_{args.experiment_id}_{run}"
61
+ dict_datasets = update_dict_datasets(dict_datasets, os.path.join(args.output_directory, "ref_landmask"))
62
+ # Write a JSON file for dict_datasets
63
+ json_file = os.path.join(args.output_directory, f"input_{pattern}_processed.json")
64
+ with open(json_file, "w") as f:
65
+ json.dump(dict_datasets, f, indent=4)
66
+ print(f"JSON file created: {json_file}")
67
+ return dict_datasets, mod, run, pattern
68
+
69
+
70
+ def compute_metrics(args, dict_datasets, mod, run, pattern):
71
+ """Compute the metric collection."""
72
+ dict_metric = defaultdict(dict)
73
+ dict_dive = defaultdict(dict)
74
+ metrics, dive_results = ComputeCollection(
75
+ args.metrics_collection,
76
+ dict_datasets,
77
+ f"{mod}_{run}",
78
+ netcdf=True,
79
+ netcdf_name=os.path.join(args.output_directory, pattern),
80
+ obs_interpreter=True,
81
+ debug=True,
82
+ )
83
+ dict_metric[mod][run] = metrics
84
+ dict_dive[mod][run] = dive_results
85
+ return dict_metric, dict_dive
86
+
87
+
88
+ def save_metrics_to_json(args, dict_datasets, dict_metric, dict_dive, pattern):
89
+ """Save metrics to a JSON file."""
90
+ egg_pth = resources.resource_path()
91
+ dict_obs = dict_datasets["observations"]
92
+ # pattern = f"{args.metrics_collection}_{mod}_{args.experiment_id}_{run}"
93
+ mod = pattern.split("_")[-3]
94
+ run = pattern.split("_")[-1]
95
+ metrics_to_json(
96
+ args.metrics_collection,
97
+ dict_obs,
98
+ dict_metric,
99
+ dict_dive,
100
+ egg_pth,
101
+ StringConstructor(args.output_directory),
102
+ pattern,
103
+ mod=mod,
104
+ run=run,
105
+ )
106
+ # Write an additional JSON file for the results for CMEC standard
107
+ json_file = os.path.join(args.output_directory, f"{pattern}.json")
108
+ write_CMEC_json(json_file)
109
+
110
+
111
+ def write_CMEC_json(json_file):
112
+ """
113
+ Write the CMEC JSON file.
114
+
115
+ Parameters
116
+ ----------
117
+ json_file : str
118
+ Path to the input JSON file.
119
+ """
120
+ # Load the existing JSON file
121
+ with open(json_file) as f:
122
+ dict_data = json.load(f)
123
+
124
+ # -----------------------------------------
125
+ # Prepare components for the CMEC structure
126
+ # -----------------------------------------
127
+ metrics_dict = {}
128
+ ref_datasets = []
129
+
130
+ mod = next(iter(dict_data["RESULTS"]["model"].keys()))
131
+ run = next(iter(dict_data["RESULTS"]["model"][mod].keys()))
132
+
133
+ metrics = list(dict_data["RESULTS"]["model"][mod][run]["value"].keys())
134
+ for metric in metrics:
135
+ metrics_dict[metric] = {}
136
+ ref_datasets.extend(list(dict_data["RESULTS"]["model"][mod][run]["value"][metric]["metric"].keys()))
137
+
138
+ ref_datasets = list(set(ref_datasets)) # Remove duplicates
139
+
140
+ ref_datasets_dict = {ref: {} for ref in ref_datasets}
141
+
142
+ dimensions_dict = {
143
+ "json_structure": ["model", "realization", "metric", "reference_datasets"],
144
+ "model": {mod: {}},
145
+ "realization": {run: {}},
146
+ "metric": metrics_dict,
147
+ "reference_datasets": ref_datasets_dict,
148
+ }
149
+
150
+ results_dict = {}
151
+ results_dict[mod] = {}
152
+ results_dict[mod][run] = {}
153
+
154
+ for metric in metrics:
155
+ results_dict[mod][run][metric] = {}
156
+ ref_datasets = list(dict_data["RESULTS"]["model"][mod][run]["value"][metric]["metric"].keys())
157
+ for ref_dataset in ref_datasets:
158
+ value = dict_data["RESULTS"]["model"][mod][run]["value"][metric]["metric"][ref_dataset]["value"]
159
+ results_dict[mod][run][metric][ref_dataset] = value
160
+
161
+ # Check if the dimensions are in the JSON file
162
+ if "provenance" in dict_data:
163
+ provenance_dict = dict_data["provenance"]
164
+ else:
165
+ provenance_dict = {}
166
+
167
+ # Check if the reference datasets are in the JSON file
168
+ if "obs" in dict_data["RESULTS"]:
169
+ ref_dict = dict_data["RESULTS"]["obs"]
170
+ else:
171
+ ref_dict = {}
172
+
173
+ # -----------------------------------------------
174
+ # Create a new dictionary with the CMEC structure
175
+ # -----------------------------------------------
176
+ cmec_dict = {
177
+ "RESULTS": results_dict,
178
+ "DIMENSIONS": dimensions_dict,
179
+ "REF": ref_dict,
180
+ "provenance": provenance_dict,
181
+ }
182
+
183
+ # ---------------------------------------
184
+ # Write the new dictionary to a JSON file
185
+ # ---------------------------------------
186
+ json_cmec_file = json_file.replace(".json", "_cmec.json")
187
+
188
+ with open(json_cmec_file, "w") as f:
189
+ json.dump(cmec_dict, f, indent=4)
190
+
191
+
192
+ def plot_results(args, pattern, mod, run):
193
+ """Plot the results."""
194
+ mod_run = f"{mod}_{run}"
195
+ with open(os.path.join(args.output_directory, f"{pattern}.json")) as ff:
196
+ data_json = json.load(ff)["RESULTS"]["model"][mod][run]
197
+ plot_enso(
198
+ args.metrics_collection,
199
+ mod_run,
200
+ args.experiment_id,
201
+ args.output_directory,
202
+ data_json,
203
+ )
204
+
205
+
206
+ def plot_enso(mc_name, mod_run, exp, path_work_dir, data_json):
207
+ """
208
+ Plot the ENSO metrics collection.
209
+
210
+ Parameters
211
+ ----------
212
+ mc_name : str
213
+ Name of the metrics collection.
214
+ mod_run : str
215
+ Model and run name, separated by an underscore.
216
+ e.g., "ACCESS-CM2_r1i1p1f1".
217
+ exp : str
218
+ Experiment name.
219
+ path_work_dir : str
220
+ Path of directory that contains the input NetCDF files and used to save the output PNG files.
221
+ data_json : dict
222
+ Data loaded from the JSON file.
223
+ """
224
+ metrics = sorted(defCollection(mc_name)["metrics_list"].keys(), key=lambda v: v.upper())
225
+ print(f"metrics: {metrics}")
226
+
227
+ mod = mod_run.split("_")[0]
228
+ run = mod_run.split("_")[1]
229
+
230
+ pattern = "_".join([mc_name, mod, exp, run])
231
+
232
+ for met in metrics:
233
+ print(f"met: {met}")
234
+ # get NetCDF file name
235
+ filename_nc = os.path.join(path_work_dir, pattern + "_" + met + ".nc")
236
+ print(f"filename_nc: {filename_nc}")
237
+ if os.path.exists(filename_nc):
238
+ # get diagnostic values for the given model and observations
239
+ if mc_name == "ENSO_tel" and "Map" in met:
240
+ dict_dia = data_json["value"][met + "Corr"]["diagnostic"]
241
+ diagnostic_values = dict((key1, None) for key1 in dict_dia.keys())
242
+ diagnostic_units = ""
243
+ else:
244
+ dict_dia = data_json["value"][met]["diagnostic"]
245
+ diagnostic_values = dict((key1, dict_dia[key1]["value"]) for key1 in dict_dia.keys())
246
+ diagnostic_units = data_json["metadata"]["metrics"][met]["diagnostic"]["units"]
247
+ # get metric values computed with the given model and observations
248
+ if mc_name == "ENSO_tel" and "Map" in met:
249
+ list1, list2 = (
250
+ [met + "Corr", met + "Rmse"],
251
+ [
252
+ "diagnostic",
253
+ "metric",
254
+ ],
255
+ )
256
+ dict_met = data_json["value"]
257
+ metric_values = dict(
258
+ (
259
+ key1,
260
+ {mod: [dict_met[su][ty][key1]["value"] for su, ty in zip(list1, list2)]},
261
+ )
262
+ for key1 in dict_met[list1[0]]["metric"].keys()
263
+ )
264
+ metric_units = [data_json["metadata"]["metrics"][su]["metric"]["units"] for su in list1]
265
+ else:
266
+ dict_met = data_json["value"][met]["metric"]
267
+ metric_values = dict((key1, {mod: dict_met[key1]["value"]}) for key1 in dict_met.keys())
268
+ metric_units = data_json["metadata"]["metrics"][met]["metric"]["units"]
269
+ # figure name
270
+ figure_name = "_".join([mc_name, mod, exp, run, met])
271
+ print(f"figure_name: {figure_name}")
272
+
273
+ main_plotter(
274
+ mc_name,
275
+ met,
276
+ mod,
277
+ exp,
278
+ filename_nc,
279
+ diagnostic_values,
280
+ diagnostic_units,
281
+ metric_values,
282
+ metric_units,
283
+ member=run,
284
+ path_png=path_work_dir,
285
+ name_png=figure_name,
286
+ )
287
+
288
+ print("figure plotting done")
289
+
290
+ else:
291
+ print(f"file not found: {filename_nc}")
292
+
293
+
294
+ def update_dict_datasets(dict_datasets: dict, output_dir: str = ".") -> dict:
295
+ """
296
+ Update the dictDatasets to include the land-sea mask and remap observation names.
297
+
298
+ Parameters
299
+ ----------
300
+ dict_datasets : dict
301
+ Dictionary containing datasets information.
302
+ output_dir : str
303
+ Directory where the land-sea mask will be saved.
304
+ Default is the current directory.
305
+
306
+ Returns
307
+ -------
308
+ dict
309
+ Updated dictionary with land-sea mask and remapped observation names.
310
+
311
+ Raises
312
+ ------
313
+ FileNotFoundError
314
+ If the input file path is not valid.
315
+ NotImplementedError
316
+ If multiple paths are found for a dataset or if the path is not a string.
317
+ """
318
+ dict_datasets2 = copy.deepcopy(dict_datasets)
319
+ data_types = dict_datasets.keys() # ["model", "observations"]
320
+
321
+ # Select only model and observations datasets
322
+ data_types = [data_type for data_type in data_types if data_type in ["model", "observations"]]
323
+
324
+ for data_type in data_types:
325
+ datasets = dict_datasets[data_type].keys()
326
+ for dataset in datasets:
327
+ variables = dict_datasets[data_type][dataset].keys()
328
+ for variable in variables:
329
+ path = dict_datasets[data_type][dataset][variable]["path + filename"]
330
+
331
+ # If path is a list and has one element, take it as a string,
332
+ # otherwise raise notImplementedError
333
+ if isinstance(path, list) and len(path) == 1:
334
+ path = copy.deepcopy(path[0])
335
+ dict_datasets2[data_type][dataset][variable]["path + filename"] = path
336
+ elif isinstance(path, list) and len(path) > 1:
337
+ raise NotImplementedError(
338
+ f"Multiple paths found for {data_type} {dataset} {variable}: {path}"
339
+ )
340
+ elif not isinstance(path, str):
341
+ raise NotImplementedError(
342
+ f"Path is not a string for {data_type} {dataset} {variable}: {path}"
343
+ )
344
+ else:
345
+ dict_datasets2[data_type][dataset][variable]["path + filename"] = path
346
+
347
+ # Check if the file exists
348
+ if not os.path.exists(path):
349
+ raise FileNotFoundError(f"File not found: {path}")
350
+
351
+ # Generate the landmask path regardless data_type is observation or model.
352
+ if (
353
+ "path + filename_area" not in dict_datasets[data_type][dataset]
354
+ or "path + filename_landmask" not in dict_datasets[data_type][dataset]
355
+ ):
356
+ # Generate it per variable as different variables may be on different grids.
357
+ path_landmask = generate_landmask_path(path, variable, output_dir=output_dir)
358
+
359
+ dict_datasets2[data_type][dataset][variable]["areaname"] = "areacella"
360
+ dict_datasets2[data_type][dataset][variable]["landmaskname"] = "sftlf"
361
+ dict_datasets2[data_type][dataset][variable]["path + filename_area"] = path_landmask
362
+ dict_datasets2[data_type][dataset][variable]["path + filename_landmask"] = path_landmask
363
+
364
+ # Map variable names to ENSO package recognized names
365
+ var_name_mapping = {"ts": "sst", "tauu": "taux"}
366
+ var_name_key = var_name_mapping.get(variable, variable)
367
+
368
+ # Update the variable name
369
+ dict_datasets2[data_type][dataset][var_name_key] = dict_datasets2[data_type][dataset].pop(
370
+ variable
371
+ )
372
+
373
+ if data_type == "observations":
374
+ # Mapping of old observation names to new ones recognized by the ENSO package
375
+ observation_name_mapping = {
376
+ "GPCP-2-3": "GPCPv2.3",
377
+ "ERA-INT": "ERA-Interim",
378
+ "ERA-5": "ERA5",
379
+ "AVISO-1-0": "AVISO",
380
+ "TropFlux-1-0": "Tropflux",
381
+ "HadISST-1-1": "HadISST",
382
+ }
383
+ # Get the new name if it exists in the mapping, otherwise keep the original name
384
+ dataset_name_key = observation_name_mapping.get(dataset, dataset)
385
+ # Update the dictDatasets with the new name
386
+ dict_datasets2[data_type][dataset_name_key] = dict_datasets2[data_type].pop(dataset)
387
+
388
+ return dict_datasets2
389
+
390
+
391
+ def generate_landmask_path(file_path, var_name, output_dir=".", output_filename=None):
392
+ """
393
+ Generate the landmask path based on the given file path.
394
+
395
+ Parameters
396
+ ----------
397
+ file_path : str
398
+ Path to the input NetCDF file.
399
+ var_name : str
400
+ Variable name to be used for creating the land-sea mask.
401
+ output_dir : str
402
+ Directory where the land-sea mask will be saved.
403
+ Default is the current directory.
404
+ output_filename : str
405
+ Name of the output land-sea mask file.
406
+ If not provided, it will be generated based on the input file name.
407
+ Default is None.
408
+
409
+ Returns
410
+ -------
411
+ str
412
+ Path to the generated land-sea mask file.
413
+
414
+ Raises
415
+ ------
416
+ FileNotFoundError
417
+ If the input file path is not valid.
418
+ ValueError
419
+ If the variable name is not valid.
420
+ """
421
+ # If file_path is a list, take the first element
422
+ if isinstance(file_path, list):
423
+ file_path = file_path[0]
424
+
425
+ # Check if the file path is valid
426
+ if not os.path.isfile(file_path):
427
+ raise FileNotFoundError(f"File not found: {file_path}")
428
+
429
+ # Check if the variable name is valid
430
+ if not isinstance(var_name, str):
431
+ raise ValueError(f"Invalid variable name: {var_name}")
432
+
433
+ # Open the dataset using xcdat and create the land-sea mask
434
+ ds = xc.open_dataset(file_path)
435
+ mask = create_land_sea_mask(ds[var_name])
436
+
437
+ # Name mask variable as 'sftlf'
438
+ mask.name = "sftlf"
439
+
440
+ # Check if the output directory exists, create it if not
441
+ if not os.path.exists(output_dir):
442
+ os.makedirs(output_dir)
443
+
444
+ # Save the land-sea mask to a new NetCDF file
445
+ if output_filename:
446
+ landmask_filename = output_filename
447
+ else:
448
+ # Generate the filename based on the input file name
449
+ landmask_filename = os.path.basename(file_path).replace(".nc", "_landmask.nc")
450
+
451
+ landmask_path = os.path.join(output_dir, landmask_filename)
452
+ mask.to_netcdf(landmask_path)
453
+
454
+ return os.path.abspath(landmask_path)
455
+
456
+
457
+ if __name__ == "__main__":
458
+ main()