climate-ref-pmp 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ from climate_ref_core.diagnostics import (
12
12
  ExecutionDefinition,
13
13
  ExecutionResult,
14
14
  )
15
+ from climate_ref_core.pycmec.metric import remove_dimensions
15
16
  from climate_ref_pmp.pmp_driver import build_glob_pattern, build_pmp_command, process_json_result
16
17
 
17
18
 
@@ -22,7 +23,16 @@ class AnnualCycle(CommandLineDiagnostic):
22
23
 
23
24
  name = "Annual Cycle"
24
25
  slug = "annual-cycle"
25
- facets = ("model", "realization", "reference", "mode", "season", "method", "statistic")
26
+ facets = (
27
+ "source_id",
28
+ "member_id",
29
+ "experiment_id",
30
+ "variable_id",
31
+ "reference_source_id",
32
+ "region",
33
+ "statistic",
34
+ "season",
35
+ )
26
36
  data_requirements = (
27
37
  # Surface temperature
28
38
  (
@@ -101,16 +111,17 @@ class AnnualCycle(CommandLineDiagnostic):
101
111
  Command arguments to execute in the PMP environment
102
112
  """
103
113
  input_datasets = definition.datasets[SourceDatasetType.CMIP6]
114
+ reference_datasets = definition.datasets[SourceDatasetType.PMPClimatology]
115
+ selector = input_datasets.selector_dict()
116
+ reference_selector = reference_datasets.selector_dict()
117
+ logger.debug(f"selector: {selector}")
118
+ logger.debug(f"reference selector: {reference_selector}")
119
+
104
120
  source_id = input_datasets["source_id"].unique()[0]
105
121
  experiment_id = input_datasets["experiment_id"].unique()[0]
106
122
  member_id = input_datasets["member_id"].unique()[0]
107
123
  variable_id = input_datasets["variable_id"].unique()[0]
108
124
 
109
- logger.debug(f"input_datasets['source_id'].unique(): {input_datasets['source_id'].unique()}")
110
- logger.debug(f"input_datasets['experiment_id'].unique(): {input_datasets['experiment_id'].unique()}")
111
- logger.debug(f"input_datasets['member_id'].unique(): {input_datasets['member_id'].unique()}")
112
- logger.debug(f"input_datasets['variable_id'].unique(): {input_datasets['variable_id'].unique()}")
113
-
114
125
  model_files_raw = input_datasets.path.to_list()
115
126
  if len(model_files_raw) == 1:
116
127
  model_files = model_files_raw[0] # If only one file, use it directly
@@ -123,23 +134,11 @@ class AnnualCycle(CommandLineDiagnostic):
123
134
 
124
135
  logger.debug(f"input_datasets: {input_datasets}")
125
136
  logger.debug(f"input_datasets.keys(): {input_datasets.keys()}")
126
- logger.debug(f"input_datasets['variable_id']: {input_datasets['variable_id']}")
127
-
128
- logger.debug(f"source_id: {source_id}")
129
- logger.debug(f"experiment_id: {experiment_id}")
130
- logger.debug(f"member_id: {member_id}")
131
- logger.debug(f"variable_id: {variable_id}")
132
137
 
133
- reference_dataset = definition.datasets[SourceDatasetType.PMPClimatology]
134
- reference_dataset_name = reference_dataset["source_id"].unique()[0]
135
- reference_dataset_path = reference_dataset.datasets.iloc[0]["path"]
136
-
137
- logger.debug(f"reference_dataset.datasets: {reference_dataset.datasets}")
138
- logger.debug(f"reference_dataset['source_id']: {reference_dataset['source_id']}")
139
- logger.debug(
140
- f"reference_dataset.datasets.iloc[0]['path']: {reference_dataset.datasets.iloc[0]['path']}"
141
- )
138
+ reference_dataset_name = reference_datasets["source_id"].unique()[0]
139
+ reference_dataset_path = reference_datasets.datasets.iloc[0]["path"]
142
140
 
141
+ logger.debug(f"reference_dataset.datasets: {reference_datasets.datasets}")
143
142
  logger.debug(f"reference_dataset_name: {reference_dataset_name}")
144
143
  logger.debug(f"reference_dataset_path: {reference_dataset_path}")
145
144
 
@@ -154,14 +153,18 @@ class AnnualCycle(CommandLineDiagnostic):
154
153
  data_name = f"{source_id}_{experiment_id}_{member_id}"
155
154
  data_path = model_files
156
155
  params = {
157
- "driver_file": "mean_climate/pcmdi_compute_climatologies.py",
158
- "parameter_file": self.parameter_file_1,
159
156
  "vars": variable_id,
160
157
  "infile": data_path,
161
158
  "outfile": f"{output_directory_path}/{variable_id}_{data_name}_clims.nc",
162
159
  }
163
160
 
164
- cmds.append(build_pmp_command(**params))
161
+ cmds.append(
162
+ build_pmp_command(
163
+ driver_file="pcmdi_compute_climatologies.py",
164
+ parameter_file=self.parameter_file_1,
165
+ **params,
166
+ )
167
+ )
165
168
 
166
169
  # ----------------------------------------------
167
170
  # PART 2: Build the command to calculate diagnostics
@@ -184,8 +187,6 @@ class AnnualCycle(CommandLineDiagnostic):
184
187
  date = datetime.datetime.now().strftime("%Y%m%d")
185
188
 
186
189
  params = {
187
- "driver_file": "mean_climate/mean_climate_driver.py",
188
- "parameter_file": self.parameter_file_2,
189
190
  "vars": variable_id,
190
191
  "custom_observations": f"{output_directory_path}/obs_dict.json",
191
192
  "test_data_path": output_directory_path,
@@ -196,7 +197,13 @@ class AnnualCycle(CommandLineDiagnostic):
196
197
  "cmec": "",
197
198
  }
198
199
 
199
- cmds.append(build_pmp_command(**params))
200
+ cmds.append(
201
+ build_pmp_command(
202
+ driver_file="mean_climate_driver.py",
203
+ parameter_file=self.parameter_file_2,
204
+ **params,
205
+ )
206
+ )
200
207
 
201
208
  return cmds
202
209
 
@@ -227,6 +234,7 @@ class AnnualCycle(CommandLineDiagnostic):
227
234
  # Find the executions file
228
235
  results_files = list(results_directory.glob("*_cmec.json"))
229
236
  if len(results_files) != 1: # pragma: no cover
237
+ logger.error(f"More than one or no cmec file found: {results_files}")
230
238
  return ExecutionResult.build_from_failure(definition)
231
239
  else:
232
240
  results_file = results_files[0]
@@ -252,12 +260,27 @@ class AnnualCycle(CommandLineDiagnostic):
252
260
  png_files = list(png_directory.glob("*.png"))
253
261
  data_files = list(data_directory.glob("*.nc"))
254
262
 
255
- cmec_output, cmec_metric = process_json_result(results_file_transformed, png_files, data_files)
263
+ cmec_output_bundle, cmec_metric_bundle = process_json_result(
264
+ results_file_transformed, png_files, data_files
265
+ )
266
+
267
+ # Add missing dimensions to the output
268
+ input_selectors = input_datasets.selector_dict()
269
+ reference_selectors = definition.datasets[SourceDatasetType.PMPClimatology].selector_dict()
270
+ cmec_metric_bundle = cmec_metric_bundle.prepend_dimensions(
271
+ {
272
+ "source_id": input_selectors["source_id"],
273
+ "member_id": input_selectors["member_id"],
274
+ "experiment_id": input_selectors["experiment_id"],
275
+ "variable_id": input_selectors["variable_id"],
276
+ "reference_source_id": reference_selectors["source_id"],
277
+ }
278
+ )
256
279
 
257
280
  return ExecutionResult.build_from_output_bundle(
258
281
  definition,
259
- cmec_output_bundle=cmec_output,
260
- cmec_metric_bundle=cmec_metric,
282
+ cmec_output_bundle=cmec_output_bundle,
283
+ cmec_metric_bundle=cmec_metric_bundle,
261
284
  )
262
285
 
263
286
  def run(self, definition: ExecutionDefinition) -> ExecutionResult:
@@ -274,7 +297,6 @@ class AnnualCycle(CommandLineDiagnostic):
274
297
  :
275
298
  The result of running the diagnostic.
276
299
  """
277
- logger.debug("PMP annual cycle run start")
278
300
  cmds = self.build_cmds(definition)
279
301
 
280
302
  runs = [self.provider.run(cmd) for cmd in cmds]
@@ -297,41 +319,18 @@ def _transform_results(data: dict[str, Any]) -> dict[str, Any]:
297
319
  dict
298
320
  The transformed executions dictionary.
299
321
  """
322
+ # Remove the model, reference, rip dimensions
323
+ # These are later replaced with a REF-specific naming convention
324
+ data = remove_dimensions(data, ["model", "reference", "rip"])
325
+
326
+ # TODO: replace this with the ability to capture series
300
327
  # Remove the "CalendarMonths" key from the nested structure
301
- if "RESULTS" in data:
302
- models = list(data["RESULTS"].keys())
303
- for model in models:
304
- if "default" in data["RESULTS"][model]:
305
- realizations = list(data["RESULTS"][model]["default"].keys())
306
- if "attributes" in realizations:
307
- realizations.remove("attributes")
308
- for realization in realizations:
309
- regions = list(data["RESULTS"][model]["default"][realization].keys())
310
- for region in regions:
311
- stats = list(data["RESULTS"][model]["default"][realization][region].keys())
312
- for stat in stats:
313
- if (
314
- "CalendarMonths"
315
- in data["RESULTS"][model]["default"][realization][region][stat]
316
- ):
317
- calendar_months = data["RESULTS"][model]["default"][realization][region][
318
- stat
319
- ].pop("CalendarMonths")
320
- for i, value in enumerate(calendar_months):
321
- key_name = f"CalendarMonth-{i + 1:02d}"
322
- data["RESULTS"][model]["default"][realization][region][stat][key_name] = (
323
- value
324
- )
328
+ for region, region_values in data["RESULTS"].items():
329
+ for stat, stat_values in region_values.items():
330
+ if "CalendarMonths" in stat_values:
331
+ stat_values.pop("CalendarMonths")
325
332
 
326
333
  # Remove the "CalendarMonths" key from the nested structure in "DIMENSIONS"
327
- if (
328
- "DIMENSIONS" in data
329
- and "season" in data["DIMENSIONS"]
330
- and "CalendarMonths" in data["DIMENSIONS"]["season"]
331
- ):
332
- calendar_months = data["DIMENSIONS"]["season"].pop("CalendarMonths")
333
- for i in range(1, 13):
334
- key_name = f"CalendarMonth-{i:02d}"
335
- data["DIMENSIONS"]["season"][key_name] = {}
334
+ data["DIMENSIONS"]["season"].pop("CalendarMonths")
336
335
 
337
336
  return data
@@ -20,7 +20,16 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
20
20
  ts_modes = ("PDO", "NPGO", "AMO")
21
21
  psl_modes = ("NAO", "NAM", "PNA", "NPO", "SAM")
22
22
 
23
- facets = ("model", "realization", "reference", "mode", "season", "method", "statistic")
23
+ facets = (
24
+ "source_id",
25
+ "member_id",
26
+ "experiment_id",
27
+ "reference_source_id",
28
+ "mode",
29
+ "season",
30
+ "method",
31
+ "statistic",
32
+ )
24
33
 
25
34
  def __init__(self, mode_id: str):
26
35
  self.mode_id = mode_id.upper()
@@ -32,7 +41,6 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
32
41
  obs_variable: str,
33
42
  cmip_variable: str,
34
43
  extra_experiments: str | tuple[str, ...] | list[str] = (),
35
- remove_experiments: str | tuple[str, ...] | list[str] = (),
36
44
  ) -> tuple[DataRequirement, DataRequirement]:
37
45
  filters = [
38
46
  FacetFilter(
@@ -55,6 +63,7 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
55
63
  DataRequirement(
56
64
  source_type=SourceDatasetType.CMIP6,
57
65
  filters=tuple(filters),
66
+ # TODO: remove unneeded variant_label
58
67
  group_by=("source_id", "experiment_id", "variant_label", "member_id"),
59
68
  ),
60
69
  )
@@ -95,7 +104,6 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
95
104
 
96
105
  reference_dataset = definition.datasets[SourceDatasetType.obs4MIPs]
97
106
  reference_dataset_name = reference_dataset["source_id"].unique()[0]
98
- # reference_dataset_path = reference_dataset.datasets[0]["path"]
99
107
  reference_dataset_path = reference_dataset.datasets.iloc[0]["path"]
100
108
 
101
109
  logger.debug(f"reference_dataset: {reference_dataset}")
@@ -119,9 +127,7 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
119
127
  reference_data_path = reference_dataset_path
120
128
 
121
129
  # Build the command to run the PMP driver script
122
- params = {
123
- "driver_file": "variability_mode/variability_modes_driver.py",
124
- "parameter_file": self.parameter_file,
130
+ params: dict[str, str | int | None] = {
125
131
  "variability_mode": self.mode_id,
126
132
  "modpath": modpath,
127
133
  "modpath_lf": "none",
@@ -141,7 +147,11 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
141
147
  params["oeyear"] = 2005
142
148
 
143
149
  # Pass the parameters using **kwargs
144
- return build_pmp_command(**params)
150
+ return build_pmp_command(
151
+ driver_file="variability_modes_driver.py",
152
+ parameter_file=self.parameter_file,
153
+ **params,
154
+ )
145
155
 
146
156
  def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
147
157
  """
@@ -165,10 +175,28 @@ class ExtratropicalModesOfVariability(CommandLineDiagnostic):
165
175
  png_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.png")]
166
176
  data_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.nc")]
167
177
 
168
- cmec_output, cmec_metric = process_json_result(results_files[0], png_files, data_files)
178
+ cmec_output_bundle, cmec_metric_bundle = process_json_result(results_files[0], png_files, data_files)
179
+
180
+ # Add additional metadata to the metrics
181
+ input_selectors = definition.datasets[SourceDatasetType.CMIP6].selector_dict()
182
+ reference_selectors = definition.datasets[SourceDatasetType.obs4MIPs].selector_dict()
183
+ cmec_metric_bundle = cmec_metric_bundle.remove_dimensions(
184
+ [
185
+ "model",
186
+ "realization",
187
+ "reference",
188
+ ],
189
+ ).prepend_dimensions(
190
+ {
191
+ "source_id": input_selectors["source_id"],
192
+ "member_id": input_selectors["member_id"],
193
+ "experiment_id": input_selectors["experiment_id"],
194
+ "reference_source_id": reference_selectors["source_id"],
195
+ }
196
+ )
169
197
 
170
198
  return ExecutionResult.build_from_output_bundle(
171
199
  definition,
172
- cmec_output_bundle=cmec_output,
173
- cmec_metric_bundle=cmec_metric,
200
+ cmec_output_bundle=cmec_output_bundle,
201
+ cmec_metric_bundle=cmec_metric_bundle,
174
202
  )
@@ -83,19 +83,13 @@ def process_json_result(
83
83
  dimensions.update(dimensions["dimensions"])
84
84
  del dimensions["dimensions"]
85
85
 
86
- if "statistic" in dimensions["json_structure"]: # pragma: no branch
87
- dimensions["json_structure"].remove("statistic")
88
- dimensions.pop("statistic")
89
-
90
- # Remove the "attributes" key from the RESULTS
91
- # This isn't standard CMEC output, but it is what PMP produces
92
86
  results = json_result["RESULTS"]
93
87
 
94
88
  cmec_metric["RESULTS"] = results
95
89
  cmec_metric["DIMENSIONS"] = dimensions
96
90
 
97
91
  if "provenance" in json_result: # pragma: no branch
98
- cmec_metric["provenance"] = json_result["provenance"]
92
+ cmec_metric["PROVENANCE"] = json_result["provenance"]
99
93
 
100
94
  logger.info(f"cmec_output: {pretty_repr(cmec_output)}")
101
95
  logger.info(f"cmec_metric: {pretty_repr(cmec_metric)}")
@@ -140,7 +134,7 @@ def _get_resource(package: str, resource_name: str | pathlib.Path, use_resources
140
134
  def build_pmp_command(
141
135
  driver_file: str,
142
136
  parameter_file: str,
143
- **kwargs: dict[str, str | int | float | list[str]],
137
+ **kwargs: str | int | float | list[str] | None,
144
138
  ) -> list[str]:
145
139
  """
146
140
  Run a PMP driver script via a conda environment
@@ -150,6 +144,9 @@ def build_pmp_command(
150
144
  The output consists of a JSON file that contains the executions of the PMP diagnostics,
151
145
  and a set of PNG and data files that are produced by the diagnostics.
152
146
 
147
+ The PMP driver scripts are installed in the PMP conda environment,
148
+ but absolute paths should be used for non-PMP scripts.
149
+
153
150
  Parameters
154
151
  ----------
155
152
  driver_file
@@ -160,13 +157,18 @@ def build_pmp_command(
160
157
  Additional arguments to pass to the driver script
161
158
  """
162
159
  # Note this uses the driver script from the REF env *not* the PMP conda env
163
- _driver_script = _get_resource("pcmdi_metrics", driver_file, use_resources=False)
164
160
  _parameter_file = _get_resource("climate_ref_pmp.params", parameter_file, use_resources=True)
165
161
 
162
+ # This is a workaround for a fatal error in internal_Finalize of MPICH
163
+ # when running in a conda environment on MacOS.
164
+ # It is not clear if this is a bug in MPICH or a problem with the conda environment.
165
+ if "FI_PROVIDER" not in os.environ: # pragma: no branch
166
+ logger.debug("Setting env variable 'FI_PROVIDER=tcp'")
167
+ os.environ["FI_PROVIDER"] = "tcp"
168
+
166
169
  # Run the driver script inside the PMP conda environment
167
170
  cmd = [
168
- "python",
169
- _driver_script,
171
+ driver_file,
170
172
  "-p",
171
173
  _parameter_file,
172
174
  ]
@@ -179,9 +181,7 @@ def build_pmp_command(
179
181
  else:
180
182
  cmd.extend([f"--{key}"])
181
183
 
182
- logger.info("-- PMP command to run --")
183
- logger.info("[PMP] Command to run:", " ".join(map(str, cmd)))
184
- logger.info("[PMP] Command generation for the driver completed.")
184
+ logger.info(f"PMP Command: {cmd}")
185
185
 
186
186
  return cmd
187
187
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref-pmp
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: PMP diagnostic provider for the Rapid Evaluation Framework
5
5
  Author-email: Jiwoo Lee <jwlee@llnl.gov>
6
6
  License: Apache-2.0
@@ -18,7 +18,6 @@ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Scientific/Engineering
19
19
  Requires-Python: >=3.11
20
20
  Requires-Dist: climate-ref-core
21
- Requires-Dist: pcmdi-metrics
22
21
  Description-Content-Type: text/markdown
23
22
 
24
23
  # climate-ref-pmp
@@ -1,18 +1,18 @@
1
1
  climate_ref_pmp/__init__.py,sha256=UtSOMQe0lOZIen3wflN3jbWBVWbQQxnzUPBYjOlEo9E,1138
2
- climate_ref_pmp/pmp_driver.py,sha256=zEX6ZgEtiajuS9FPCRvqP8SyCgLRSrDhRiQVG7e9ttk,8433
2
+ climate_ref_pmp/pmp_driver.py,sha256=ieBuPQzoNxlepf-acQu5q_SCru2lbppxWhY_FlDVr5s,8410
3
3
  climate_ref_pmp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  climate_ref_pmp/dataset_registry/pmp_climatology.txt,sha256=lruldzyr8fS79Tdg5RsIo_nzPGaQDjwpUE00t9S2QTM,3488
5
5
  climate_ref_pmp/diagnostics/__init__.py,sha256=ltItlaUkQe1ic8CpOAg2tCVHfYaLq5HFl-Xq_IZv02c,252
6
- climate_ref_pmp/diagnostics/annual_cycle.py,sha256=UsIMn2ypA3knWXcDjfuvtSM-rSwj_umRHAasy267URQ,12830
7
- climate_ref_pmp/diagnostics/variability_modes.py,sha256=NZROTwXDv_LjHx3TVMR_xXrhVq1Rc4GbXIf5Z3c835Y,6794
6
+ climate_ref_pmp/diagnostics/annual_cycle.py,sha256=VqToeG0UKT0kIkArfUEh4AM8CbR7D3z5At-jO492Oqo,11819
7
+ climate_ref_pmp/diagnostics/variability_modes.py,sha256=a_s3JW8vuzYV-7S1dRbs0oELY_maFLEY0ct1OMG-AJQ,7602
8
8
  climate_ref_pmp/params/pmp_param_MoV-psl.py,sha256=cpQyro0UdVWxe7jREidEQdirjksIXc7lm3CIfefcz5I,2268
9
9
  climate_ref_pmp/params/pmp_param_MoV-ts.py,sha256=yfKs8qgsoaCmdRwDJWSZ3sy0GM_WsQrOde9d__iOo7I,2547
10
10
  climate_ref_pmp/params/pmp_param_annualcycle_1-clims.py,sha256=P7DkL5wdtK9huQ9umzdG8JDG4saCFQPY7nncYGEHQmk,466
11
11
  climate_ref_pmp/params/pmp_param_annualcycle_2-metrics.py,sha256=JH9flUE9Ti-2bhCpDkDtmup1aD_7brHPqNSbc2-loF8,1655
12
12
  climate_ref_pmp/requirements/conda-lock.yml,sha256=5BBYqyQ_51ANHOw2kCH1i_qi-HimLt5hSHtiYripW_Q,346794
13
13
  climate_ref_pmp/requirements/environment.yml,sha256=IfzFtZV6PxY6zl6rNni1ytphxnFiRNurpE2gSrc2HhU,107
14
- climate_ref_pmp-0.5.0.dist-info/METADATA,sha256=d9LB4LrDrspA_gBKQVT7rFpPLuw5tb1GF3ht4ybGeiQ,2639
15
- climate_ref_pmp-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- climate_ref_pmp-0.5.0.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
17
- climate_ref_pmp-0.5.0.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
18
- climate_ref_pmp-0.5.0.dist-info/RECORD,,
14
+ climate_ref_pmp-0.5.1.dist-info/METADATA,sha256=PDD76wvJv_wrn6R2JeWX5Y6jeuvrxSDqbKXAP4fmbGE,2610
15
+ climate_ref_pmp-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ climate_ref_pmp-0.5.1.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
17
+ climate_ref_pmp-0.5.1.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
18
+ climate_ref_pmp-0.5.1.dist-info/RECORD,,