pydartdiags 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,522 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ from pydartdiags.stats import stats
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+
6
+ # HK @todo color scheme class
7
+ dacolors = ["green", "magenta", "orange", "red"]
8
+
9
+
10
+ def plot_profile(
11
+ obs_seq, levels, type, bias=True, rmse=True, totalspread=True, depth=False
12
+ ):
13
+ """
14
+ plot_profile on the levels for prior and posterior if present
15
+ - bias
16
+ - rmse
17
+ - totalspread
18
+
19
+ For observations in pressure (Pa), the levels are assumed to be in hPa and
20
+ the y-axis is in hPa and inverted.
21
+ For ocean observations, which are height (m), set depth=True to invert y-axis.
22
+
23
+ Args:
24
+ obs_seq, levels, type, bias=True, rmse=True, totalspread=True, depth=False
25
+
26
+ Example:
27
+
28
+ type = 'RADIOSONDE_U_WIND_COMPONENT'
29
+ hPalevels = [0.0, 100.0, 150.0, 200.0, 250.0, 300.0, 400.0, 500.0, 700, 850, 925, 1000]
30
+ levels = [i * 100 for i in hPalevels]
31
+
32
+ plot_profile(obs_seq, levels, type, bias=True, rmse=True, totalspread=True)
33
+
34
+ """
35
+
36
+ # calculate stats and add to dataframe
37
+ stats.diag_stats(obs_seq.df)
38
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
39
+
40
+ # filter by type
41
+ qc0 = qc0[qc0["type"] == type]
42
+ if qc0.empty:
43
+ print(f"No rows found for type: {type}")
44
+ return None
45
+
46
+ all_df = obs_seq.df[obs_seq.df["type"] == type] # for possible vs used
47
+
48
+ if all_df["vert_unit"].nunique() > 1:
49
+ print(
50
+ f"Multiple vertical units found in the data: {all_df['vert_unit'].unique()} for type: {type}"
51
+ )
52
+ return None
53
+
54
+ vert_unit = all_df.iloc[0]["vert_unit"]
55
+ conversion, unit = _get_plot_unit(vert_unit) # multiplier and unit for y-axis
56
+
57
+ # grand statistics
58
+ grand = stats.grand_statistics(qc0)
59
+
60
+ # add level bins to the dataframe
61
+ stats.bin_by_layer(all_df, levels, verticalUnit=vert_unit)
62
+ stats.bin_by_layer(qc0, levels, verticalUnit=vert_unit)
63
+
64
+ # aggregate by layer
65
+ df_pvu = stats.possible_vs_used_by_layer(all_df) # possible vs used
66
+ df = stats.layer_statistics(qc0) # bias, rmse, totalspread for plotting
67
+
68
+ # using rmse because mean_sqrt vs mean for bias (get a column with 0 obs)
69
+ if "prior_rmse" not in df.columns:
70
+ print(f"All layers empty for type: {type}")
71
+ return None
72
+
73
+ fig, ax1 = plt.subplots(figsize=(8, 8))
74
+
75
+ # convert to hPa for Pressure (Pa)
76
+ df["midpoint"] = df["midpoint"].astype(float)
77
+ df["midpoint"] = df["midpoint"] * conversion
78
+
79
+ df_pvu["midpoint"] = df_pvu["midpoint"].astype(float)
80
+ df_pvu["midpoint"] = df_pvu["midpoint"] * conversion
81
+
82
+ # Add horizontal stripes alternating between gray and white to represent the vertical levels
83
+ left = df["vlevels"].apply(lambda x: x.left * conversion) # todo convert to HPa
84
+ right = df["vlevels"].apply(lambda x: x.right * conversion)
85
+ for i in range(len(left)):
86
+ color = "gray" if i % 2 == 0 else "white"
87
+ ax1.axhspan(left.iloc[i], right.iloc[i], color=color, alpha=0.3)
88
+
89
+ # Plot the 'bias' data on the first y-axis
90
+ if bias:
91
+ ax1.plot(
92
+ df["prior_bias"],
93
+ df["midpoint"],
94
+ color=dacolors[0],
95
+ marker=".",
96
+ linestyle="-",
97
+ label="prior bias",
98
+ )
99
+ bias_prior = grand.loc[0, "prior_bias"]
100
+ if "posterior_bias" in df.columns:
101
+ ax1.plot(
102
+ df["posterior_bias"],
103
+ df["midpoint"],
104
+ color=dacolors[0],
105
+ marker=".",
106
+ linestyle="--",
107
+ label="posterior bias",
108
+ )
109
+ bias_posterior = grand.loc[0, "posterior_bias"]
110
+ if rmse:
111
+ ax1.plot(
112
+ df["prior_rmse"],
113
+ df["midpoint"],
114
+ color=dacolors[1],
115
+ marker=".",
116
+ linestyle="-",
117
+ label="prior RMSE",
118
+ )
119
+ rmse_prior = grand.loc[0, "prior_rmse"]
120
+ if "posterior_rmse" in df.columns:
121
+ ax1.plot(
122
+ df["posterior_rmse"],
123
+ df["midpoint"],
124
+ color=dacolors[1],
125
+ marker=".",
126
+ linestyle="--",
127
+ label="posterior RMSE",
128
+ )
129
+ rmse_posterior = grand.loc[0, "posterior_rmse"]
130
+ if totalspread:
131
+ ax1.plot(
132
+ df["prior_totalspread"],
133
+ df["midpoint"],
134
+ color=dacolors[2],
135
+ marker=".",
136
+ linestyle="-",
137
+ label="prior totalspread",
138
+ )
139
+ totalspread_prior = grand.loc[0, "prior_totalspread"]
140
+ if "posterior_totalspread" in df.columns:
141
+ totalspread_posterior = grand.loc[0, "posterior_totalspread"]
142
+ ax1.plot(
143
+ df["posterior_totalspread"],
144
+ df["midpoint"],
145
+ color=dacolors[2],
146
+ marker=".",
147
+ linestyle="--",
148
+ label="posterior totalspread",
149
+ )
150
+
151
+ ax1.set_ylabel(unit)
152
+ ax1.tick_params(axis="y")
153
+ ax1.set_yticks(df["midpoint"])
154
+ # ax1.set_yticklabels(df['midpoint'])
155
+
156
+ ax3 = ax1.twiny()
157
+ ax3.set_xlabel("# obs (o=possible; +=assimilated)", color=dacolors[-1])
158
+ ax3.tick_params(axis="x", colors=dacolors[-1])
159
+ ax3.plot(
160
+ df_pvu["possible"],
161
+ df_pvu["midpoint"],
162
+ color=dacolors[-1],
163
+ marker="o",
164
+ linestyle="",
165
+ markerfacecolor="none",
166
+ label="possible",
167
+ )
168
+ ax3.plot(
169
+ df_pvu["used"],
170
+ df_pvu["midpoint"],
171
+ color=dacolors[-1],
172
+ marker="+",
173
+ linestyle="",
174
+ label="possible",
175
+ )
176
+ ax3.set_xlim(left=0)
177
+
178
+ if vert_unit == "pressure (Pa)" or depth:
179
+ ax1.invert_yaxis()
180
+ ax1.set_title(type)
181
+ # Build the datalabel string
182
+ datalabel = []
183
+ if bias:
184
+ datalabel.append("bias")
185
+ if rmse:
186
+ datalabel.append("rmse")
187
+ if totalspread:
188
+ datalabel.append("totalspread")
189
+ ax1.set_xlabel(", ".join(datalabel))
190
+
191
+ lines1, labels1 = ax1.get_legend_handles_labels()
192
+ ax1.legend(lines1, labels1, loc="upper left", bbox_to_anchor=(1.05, 1))
193
+
194
+ ax1.text(
195
+ 0.6, -0.08, obs_seq.file, ha="center", va="center", transform=ax1.transAxes
196
+ )
197
+
198
+ # Add a text box with information below the legend
199
+ textstr = "Grand statistics:\n"
200
+ if bias:
201
+ textstr += f"prior_bias: {bias_prior:.7f}\n"
202
+ if rmse:
203
+ textstr += f"rmse_prior: {rmse_prior:.7f}\n"
204
+ if totalspread:
205
+ textstr += f"totalspread_prior: {totalspread_prior:.7f}\n"
206
+ if "posterior_bias" in df.columns:
207
+ if bias:
208
+ textstr += f"posterior_bias: {bias_posterior:.7f}\n"
209
+ if rmse:
210
+ textstr += f"rmse_posterior: {rmse_posterior:.7f}\n"
211
+ if totalspread:
212
+ textstr += f"totalspread_posterior: {totalspread_posterior:.7f}\n"
213
+
214
+ props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
215
+ ax1.text(
216
+ 1.05,
217
+ 0.5,
218
+ textstr,
219
+ transform=ax1.transAxes,
220
+ fontsize=10,
221
+ verticalalignment="top",
222
+ bbox=props,
223
+ )
224
+
225
+ plt.tight_layout()
226
+ plt.show()
227
+
228
+ return fig
229
+
230
+
231
+ def plot_rank_histogram(obs_seq, type, ens_size, levels=None):
232
+ """
233
+ Plot the rank histograms of the requested observation type, ensemble
234
+ size, and levels (if applicable). Rank histograms are plotted for prior
235
+ and posterior if present.
236
+
237
+ Args:
238
+ obs_seq: The observation sequence object.
239
+ type (str): The type of observation to filter by. For identity
240
+ observations, use "IDENTITY_OBS" or a negative integer
241
+ ens_size (int): The ensemble size.
242
+ levels (list, optional): The levels to bin by. If None, no binning by level.
243
+
244
+ Returns:
245
+ fig: The matplotlib figure object.
246
+ """
247
+
248
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
249
+
250
+ if (isinstance(type, int) and type < 0) or (type == "IDENTITY_OBS"):
251
+ type = "IDENTITY_OBS"
252
+ print(
253
+ "Observation type is for identity observations."
254
+ ) # Filter on types < 0 to get identity observations
255
+
256
+ # Only keep rows where 'type' is numeric before comparing
257
+ qc0 = qc0[pd.to_numeric(qc0["type"], errors="coerce").notnull()]
258
+ qc0 = qc0[qc0["type"].astype(int) < 0]
259
+ if qc0.empty:
260
+ print(f"No rows found for IDENTITY_OBS")
261
+ return None
262
+
263
+ else:
264
+ qc0 = qc0[qc0["type"] == type] # filter by type
265
+
266
+ if qc0.empty:
267
+ print(f"No rows found for type: {type}")
268
+ return None
269
+
270
+ if levels is None:
271
+ print(f"No levels given. Proceeding without level binning.")
272
+
273
+ df = stats.calculate_rank(qc0)
274
+
275
+ if "posterior_rank" in df.columns:
276
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
277
+ else:
278
+ fig, ax1 = plt.subplots()
279
+
280
+ # Plot the prior rank histogram
281
+ bins = list(range(1, ens_size + 2))
282
+ ax1.hist(
283
+ df["prior_rank"], bins=bins, color="blue", alpha=0.5, label="prior rank"
284
+ )
285
+ ax1.set_title("Prior Rank Histogram")
286
+ ax1.set_xlabel("Observation Rank (among ensemble members)")
287
+ ax1.set_ylabel("Count")
288
+
289
+ # Plot the posterior rank histogram if it exists
290
+ if "posterior_rank" in df.columns:
291
+ ax2.hist(
292
+ df["posterior_rank"],
293
+ bins=bins,
294
+ color="green",
295
+ alpha=0.5,
296
+ label="posterior rank",
297
+ )
298
+ ax2.set_title("Posterior Rank Histogram")
299
+ ax2.set_xlabel("Observation Rank (among ensemble members)")
300
+ ax2.set_ylabel("Count")
301
+
302
+ fig.suptitle(f"{type}", fontsize=14)
303
+
304
+ plt.tight_layout(rect=[0, 0.03, 1, 0.95])
305
+ plt.show()
306
+ return None
307
+
308
+ elif qc0["vert_unit"].nunique() > 1:
309
+ print(
310
+ f"Multiple vertical units found in the data: {qc0['vert_unit'].unique()} for type: {type}"
311
+ )
312
+ return None
313
+
314
+ else:
315
+ vert_unit = qc0.iloc[0]["vert_unit"]
316
+ conversion, unit = _get_plot_unit(vert_unit) # multiplier and unit for y-axis
317
+
318
+ stats.bin_by_layer(qc0, levels, verticalUnit=vert_unit) # bin by level
319
+
320
+ midpoints = qc0["midpoint"].unique()
321
+
322
+ for level in sorted(midpoints):
323
+
324
+ df = qc0[qc0["midpoint"] == level]
325
+ # convert to hPa only for Pressure (Pa)
326
+ df["midpoint"] = df["midpoint"].astype(float)
327
+ df["midpoint"] = df["midpoint"] * conversion
328
+
329
+ df = stats.calculate_rank(qc0)
330
+
331
+ if "posterior_rank" in df.columns:
332
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
333
+ else:
334
+ fig, ax1 = plt.subplots()
335
+
336
+ # Plot the prior rank histogram
337
+ bins = list(range(1, ens_size + 2))
338
+ ax1.hist(
339
+ df["prior_rank"], bins=bins, color="blue", alpha=0.5, label="prior rank"
340
+ )
341
+ ax1.set_title("Prior Rank Histogram")
342
+ ax1.set_xlabel("Observation Rank (among ensemble members)")
343
+ ax1.set_ylabel("Count")
344
+
345
+ # Plot the posterior rank histogram if it exists
346
+ if "posterior_rank" in df.columns:
347
+ ax2.hist(
348
+ df["posterior_rank"],
349
+ bins=bins,
350
+ color="green",
351
+ alpha=0.5,
352
+ label="posterior rank",
353
+ )
354
+ ax2.set_title("Posterior Rank Histogram")
355
+ ax2.set_xlabel("Observation Rank (among ensemble members)")
356
+ ax2.set_ylabel("Count")
357
+
358
+ fig.suptitle(f"{type} at Level {round(level, 1)} {unit}", fontsize=14)
359
+
360
+ plt.tight_layout(rect=[0, 0.03, 1, 0.95])
361
+ plt.show()
362
+
363
+ return fig
364
+
365
+
366
+ def plot_evolution(
367
+ obs_seq,
368
+ type,
369
+ time_bin_width,
370
+ stat,
371
+ levels=None,
372
+ tick_interval=2,
373
+ time_format="%m-%d",
374
+ plot_pvu=True,
375
+ ):
376
+ """
377
+ Plot the time evolution of the requested statistics and optionally used vs possible observations.
378
+
379
+ Args:
380
+ obs_seq: The observation sequence object.
381
+ type (str): The type of observation to filter by.
382
+ time_bin_width (str): The width of each time bin (e.g., '3600s' for 1 hour).
383
+ stat (str): The statistic to plot. Default is "prior_rmse".
384
+ levels (list, optional): The levels to bin by. If None, no binning by level.
385
+ tick_interval (int): Interval for x-axis ticks (default is 2).
386
+ time_format (str): Format string for time labels on the x-axis (default is '%m-%d').
387
+ plot_pvu (bool): Whether to plot possible vs used observations (default is True).
388
+
389
+ Returns:
390
+ fig: The matplotlib figure object.
391
+ """
392
+ # Calculate stats and add to dataframe
393
+ stats.diag_stats(obs_seq.df)
394
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
395
+ qc0 = qc0[qc0["type"] == type] # filter by type
396
+
397
+ if qc0.empty:
398
+ print(f"No data found for type: {type}")
399
+ return
400
+
401
+ all_df = obs_seq.df[obs_seq.df["type"] == type] # for possible vs used
402
+
403
+ if levels:
404
+ stats.bin_by_layer(qc0, levels) # bin by level
405
+ midpoints = qc0["midpoint"].unique()
406
+
407
+ for level in sorted(midpoints):
408
+ df = qc0[qc0["midpoint"] == level]
409
+
410
+ # Bin by time
411
+ stats.bin_by_time(df, time_bin_width)
412
+
413
+ # Aggregate by time bin
414
+ df = stats.time_statistics(df)
415
+
416
+ # Calculate possible vs used if enabled
417
+ df_pvu = None
418
+ if plot_pvu:
419
+ stats.bin_by_time(all_df, time_bin_width)
420
+ df_pvu = stats.possible_vs_used_by_time(all_df)
421
+
422
+ # Plot the time evolution of requested stats
423
+ plot_time_evolution(
424
+ df, df_pvu, stat, type, level, tick_interval, time_format, plot_pvu
425
+ )
426
+ else:
427
+ # Bin by time
428
+ stats.bin_by_time(qc0, time_bin_width)
429
+
430
+ # Aggregate by time bin
431
+ df = stats.time_statistics(qc0)
432
+
433
+ # Calculate possible vs used if enabled
434
+ df_pvu = None
435
+ if plot_pvu:
436
+ stats.bin_by_time(all_df, time_bin_width)
437
+ df_pvu = stats.possible_vs_used_by_time(all_df)
438
+
439
+ # Plot the time evolution of requested stats
440
+ return plot_time_evolution(
441
+ df, df_pvu, stat, type, None, tick_interval, time_format, plot_pvu
442
+ )
443
+
444
+
445
+ def plot_time_evolution(
446
+ df, df_pvu, stat, type, level, tick_interval, time_format, plot_pvu
447
+ ):
448
+ """
449
+ Plot the time evolution of the requested statistics and optionally used vs possible observations.
450
+
451
+ Args:
452
+ df (pd.DataFrame): The aggregated DataFrame for statistics.
453
+ df_pvu (pd.DataFrame): The DataFrame for possible vs used observations (if plot_pvu is True).
454
+ stat (str): The statistic to plot.
455
+ type (str): The type of observation.
456
+ level (float or None): The vertical level (if applicable).
457
+ tick_interval (int): Interval for x-axis ticks (default is 2).
458
+ time_format (str): Format string for time labels on the x-axis.
459
+ plot_pvu (bool): Whether to plot possible vs used observations (default is True).
460
+
461
+ Returns:
462
+ fig: The matplotlib figure object.
463
+ """
464
+ fig, ax1 = plt.subplots()
465
+
466
+ # Plot prior and posterior statistics
467
+ if f"prior_{stat}" in df.columns:
468
+ ax1.plot(df["time_bin_midpoint"], df[f"prior_{stat}"], label=f"prior {stat}")
469
+ if f"posterior_{stat}" in df.columns:
470
+ ax1.plot(
471
+ df["time_bin_midpoint"], df[f"posterior_{stat}"], label=f"posterior {stat}"
472
+ )
473
+
474
+ # Set x-axis ticks every 'tick_interval' values
475
+ tick_positions = df["time_bin_midpoint"][::tick_interval]
476
+ ax1.set_xticks(tick_positions)
477
+ ax1.set_xticklabels(
478
+ tick_positions.dt.strftime(time_format), rotation=45, ha="right"
479
+ )
480
+
481
+ # Add a secondary y-axis for possible vs used observations if enabled
482
+ if plot_pvu and df_pvu is not None:
483
+ ax2 = ax1.twinx()
484
+ ax2.set_ylabel("# obs (o=possible; +=assimilated)", color="red")
485
+ ax2.tick_params(axis="y", colors="red")
486
+
487
+ # Plot possible and used observations
488
+ ax2.plot(
489
+ df_pvu["time_bin_midpoint"],
490
+ df_pvu["possible"],
491
+ color="red",
492
+ marker="o",
493
+ linestyle="",
494
+ markerfacecolor="none",
495
+ )
496
+ ax2.plot(
497
+ df_pvu["time_bin_midpoint"],
498
+ df_pvu["used"],
499
+ color="red",
500
+ marker="+",
501
+ linestyle="",
502
+ )
503
+ ax2.set_ylim(bottom=0)
504
+
505
+ ax1.legend(loc="upper right")
506
+ title = f"{type}" if level is None else f"{type} at level {level}"
507
+ ax1.set_title(title)
508
+ ax1.set_xlabel("Time")
509
+ ax1.set_ylabel(stat)
510
+
511
+ plt.tight_layout()
512
+
513
+ return fig
514
+
515
+
516
+ def _get_plot_unit(vert_unit):
517
+ if vert_unit == "pressure (Pa)":
518
+ return 0.01, "hPa"
519
+ elif vert_unit in ("height (m)", "surface (m)"):
520
+ return 1.0, "m"
521
+ else:
522
+ return 1.0, vert_unit
File without changes
@@ -0,0 +1,35 @@
1
+ acars_horizontal_wind:
2
+ description: ACARS-derived Horizontal wind speed
3
+ components:
4
+ - acars_u_wind_component
5
+ - acars_v_wind_component
6
+
7
+ sat_horizontal_wind:
8
+ description: Satellite-derived horizontal wind speed
9
+ components:
10
+ - sat_u_wind_component
11
+ - sat_v_wind_component
12
+
13
+ radiosonde_horizontal_wind:
14
+ description: Radiosonde-derived horizontal wind speed
15
+ components:
16
+ - radiosonde_u_wind_component
17
+ - radiosonde_v_wind_component
18
+
19
+ aircraft_horizontal_wind:
20
+ description: Aircraft-derived horizontal wind speed
21
+ components:
22
+ - aircraft_u_wind_component
23
+ - aircraft_v_wind_component
24
+
25
+ 10_m_horizontal_wind:
26
+ description: 10 meter horizontal wind speed
27
+ components:
28
+ - 10m_u_wind_component
29
+ - 10m_v_wind_component
30
+
31
+ marine_sfc_horizontal_wind:
32
+ description: Marine surface horizontal wind speed
33
+ components:
34
+ - marine_sfc_u_wind_component
35
+ - marine_sfc_v_wind_component