pydartdiags 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

@@ -28,35 +28,56 @@ def plot_profile(obs_seq, levels, type, bias=True, rmse=True, totalspread=True):
28
28
 
29
29
  # calculate stats and add to dataframe
30
30
  stats.diag_stats(obs_seq.df)
31
- qc0 = obs_seq.select_by_dart_qc(0) # filter only qc=0
31
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
32
32
 
33
33
  # filter by type
34
34
  qc0 = qc0[qc0["type"] == type]
35
- all_df = obs_seq.df[obs_seq.df["type"] == type]
35
+ if qc0.empty:
36
+ print(f"No rows found for type: {type}")
37
+ return None
38
+
39
+ all_df = obs_seq.df[obs_seq.df["type"] == type] # for possible vs used
40
+
41
+ if all_df["vert_unit"].nunique() > 1:
42
+ print(
43
+ f"Multiple vertical units found in the data: {all_df['vert_unit'].unique()} for type: {type}"
44
+ )
45
+ return None
46
+
47
+ vert_unit = all_df.iloc[0]["vert_unit"]
48
+ if vert_unit == "pressure (Pa)":
49
+ conversion = 0.01 # from Pa to hPa
50
+ else:
51
+ conversion = 1.0 # no conversion needed
36
52
 
37
53
  # grand statistics
38
54
  grand = stats.grand_statistics(qc0)
39
55
 
40
56
  # add level bins to the dataframe
41
- stats.bin_by_layer(all_df, levels) # have to count used vs possible
42
- stats.bin_by_layer(qc0, levels)
57
+ stats.bin_by_layer(all_df, levels, verticalUnit=vert_unit)
58
+ stats.bin_by_layer(qc0, levels, verticalUnit=vert_unit)
43
59
 
44
60
  # aggregate by layer
45
61
  df_pvu = stats.possible_vs_used_by_layer(all_df) # possible vs used
46
62
  df = stats.layer_statistics(qc0) # bias, rmse, totalspread for plotting
47
63
 
48
- fig, ax1 = plt.subplots()
64
+ # using rmse because mean_sqrt vs mean for bias (get a column with 0 obs)
65
+ if "prior_rmse" not in df.columns:
66
+ print(f"All layers empty for type: {type}")
67
+ return None
68
+
69
+ fig, ax1 = plt.subplots(figsize=(8, 8))
49
70
 
50
71
  # convert to hPa HK @todo only for Pressure (Pa)
51
72
  df["midpoint"] = df["midpoint"].astype(float)
52
- df["midpoint"] = df["midpoint"] / 100.0
73
+ df["midpoint"] = df["midpoint"] * conversion
53
74
 
54
75
  df_pvu["midpoint"] = df_pvu["midpoint"].astype(float)
55
- df_pvu["midpoint"] = df_pvu["midpoint"] / 100.0
76
+ df_pvu["midpoint"] = df_pvu["midpoint"] * conversion
56
77
 
57
78
  # Add horizontal stripes alternating between gray and white to represent the vertical levels
58
- left = df["vlevels"].apply(lambda x: x.left / 100.0) # todo convert to HPa
59
- right = df["vlevels"].apply(lambda x: x.right / 100.0)
79
+ left = df["vlevels"].apply(lambda x: x.left * conversion) # todo convert to HPa
80
+ right = df["vlevels"].apply(lambda x: x.right * conversion)
60
81
  for i in range(len(left)):
61
82
  color = "gray" if i % 2 == 0 else "white"
62
83
  ax1.axhspan(left.iloc[i], right.iloc[i], color=color, alpha=0.3)
@@ -150,33 +171,41 @@ def plot_profile(obs_seq, levels, type, bias=True, rmse=True, totalspread=True):
150
171
  )
151
172
  ax3.set_xlim(left=0)
152
173
 
153
- ax1.invert_yaxis()
174
+ if vert_unit == "pressure (Pa)":
175
+ ax1.invert_yaxis()
154
176
  ax1.set_title(type)
155
- datalabel = "bias," + " " + "rmse," + " " + "totalspread"
156
- ax1.set_xlabel(datalabel)
177
+ # Build the datalabel string
178
+ datalabel = []
179
+ if bias:
180
+ datalabel.append("bias")
181
+ if rmse:
182
+ datalabel.append("rmse")
183
+ if totalspread:
184
+ datalabel.append("totalspread")
185
+ ax1.set_xlabel(", ".join(datalabel))
157
186
 
158
187
  lines1, labels1 = ax1.get_legend_handles_labels()
159
188
  ax1.legend(lines1, labels1, loc="upper left", bbox_to_anchor=(1.05, 1))
160
189
 
161
190
  ax1.text(
162
- 0.5, -0.15, obs_seq.file, ha="center", va="center", transform=ax1.transAxes
191
+ 0.6, -0.08, obs_seq.file, ha="center", va="center", transform=ax1.transAxes
163
192
  )
164
193
 
165
194
  # Add a text box with information below the legend
166
195
  textstr = "Grand statistics:\n"
167
196
  if bias:
168
- textstr += f"- prior_bias: {bias_prior:.7f}\n"
197
+ textstr += f"prior_bias: {bias_prior:.7f}\n"
169
198
  if rmse:
170
- textstr += f"- rmse_prior: {rmse_prior:.7f}\n"
199
+ textstr += f"rmse_prior: {rmse_prior:.7f}\n"
171
200
  if totalspread:
172
- textstr += f"- totalspread_prior: {totalspread_prior:.7f}\n"
201
+ textstr += f"totalspread_prior: {totalspread_prior:.7f}\n"
173
202
  if "posterior_bias" in df.columns:
174
203
  if bias:
175
- textstr += f"- posterior_bias: {bias_posterior:.7f}\n"
204
+ textstr += f"posterior_bias: {bias_posterior:.7f}\n"
176
205
  if rmse:
177
- textstr += f"- rmse_posterior: {rmse_posterior:.7f}\n"
206
+ textstr += f"rmse_posterior: {rmse_posterior:.7f}\n"
178
207
  if totalspread:
179
- textstr += f"- totalspread_posterior: {totalspread_posterior:.7f}\n"
208
+ textstr += f"totalspread_posterior: {totalspread_posterior:.7f}\n"
180
209
 
181
210
  props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
182
211
  ax1.text(
@@ -189,6 +218,7 @@ def plot_profile(obs_seq, levels, type, bias=True, rmse=True, totalspread=True):
189
218
  bbox=props,
190
219
  )
191
220
 
221
+ plt.tight_layout()
192
222
  plt.show()
193
223
 
194
224
  return fig
@@ -196,7 +226,7 @@ def plot_profile(obs_seq, levels, type, bias=True, rmse=True, totalspread=True):
196
226
 
197
227
  def plot_rank_histogram(obs_seq, levels, type, ens_size):
198
228
 
199
- qc0 = obs_seq.select_by_dart_qc(0) # filter only qc=0
229
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
200
230
  qc0 = qc0[qc0["type"] == type] # filter by type
201
231
  stats.bin_by_layer(qc0, levels) # bin by level
202
232
 
@@ -241,3 +271,153 @@ def plot_rank_histogram(obs_seq, levels, type, ens_size):
241
271
  plt.show()
242
272
 
243
273
  return fig
274
+
275
+
276
+ def plot_evolution(
277
+ obs_seq,
278
+ type,
279
+ time_bin_width,
280
+ stat,
281
+ levels=None,
282
+ tick_interval=2,
283
+ time_format="%m-%d",
284
+ plot_pvu=True,
285
+ ):
286
+ """
287
+ Plot the time evolution of the requested statistics and optionally used vs possible observations.
288
+
289
+ Args:
290
+ obs_seq: The observation sequence object.
291
+ type (str): The type of observation to filter by.
292
+ time_bin_width (str): The width of each time bin (e.g., '3600s' for 1 hour).
293
+ stat (str): The statistic to plot. Default is "prior_rmse".
294
+ levels (list, optional): The levels to bin by. If None, no binning by level.
295
+ tick_interval (int): Interval for x-axis ticks (default is 2).
296
+ time_format (str): Format string for time labels on the x-axis (default is '%m-%d').
297
+ plot_pvu (bool): Whether to plot possible vs used observations (default is True).
298
+
299
+ Returns:
300
+ fig: The matplotlib figure object.
301
+ """
302
+ # Calculate stats and add to dataframe
303
+ stats.diag_stats(obs_seq.df)
304
+ qc0 = stats.select_used_qcs(obs_seq.df) # filter only qc=0, qc=2
305
+ qc0 = qc0[qc0["type"] == type] # filter by type
306
+
307
+ if qc0.empty:
308
+ print(f"No data found for type: {type}")
309
+ return
310
+
311
+ all_df = obs_seq.df[obs_seq.df["type"] == type] # for possible vs used
312
+
313
+ if levels:
314
+ stats.bin_by_layer(qc0, levels) # bin by level
315
+ midpoints = qc0["midpoint"].unique()
316
+
317
+ for level in sorted(midpoints):
318
+ df = qc0[qc0["midpoint"] == level]
319
+
320
+ # Bin by time
321
+ stats.bin_by_time(df, time_bin_width)
322
+
323
+ # Aggregate by time bin
324
+ df = stats.time_statistics(df)
325
+
326
+ # Calculate possible vs used if enabled
327
+ df_pvu = None
328
+ if plot_pvu:
329
+ stats.bin_by_time(all_df, time_bin_width)
330
+ df_pvu = stats.possible_vs_used_by_time(all_df)
331
+
332
+ # Plot the time evolution of requested stats
333
+ plot_time_evolution(
334
+ df, df_pvu, stat, type, level, tick_interval, time_format, plot_pvu
335
+ )
336
+ else:
337
+ # Bin by time
338
+ stats.bin_by_time(qc0, time_bin_width)
339
+
340
+ # Aggregate by time bin
341
+ df = stats.time_statistics(qc0)
342
+
343
+ # Calculate possible vs used if enabled
344
+ df_pvu = None
345
+ if plot_pvu:
346
+ stats.bin_by_time(all_df, time_bin_width)
347
+ df_pvu = stats.possible_vs_used_by_time(all_df)
348
+
349
+ # Plot the time evolution of requested stats
350
+ return plot_time_evolution(
351
+ df, df_pvu, stat, type, None, tick_interval, time_format, plot_pvu
352
+ )
353
+
354
+
355
+ def plot_time_evolution(
356
+ df, df_pvu, stat, type, level, tick_interval, time_format, plot_pvu
357
+ ):
358
+ """
359
+ Plot the time evolution of the requested statistics and optionally used vs possible observations.
360
+
361
+ Args:
362
+ df (pd.DataFrame): The aggregated DataFrame for statistics.
363
+ df_pvu (pd.DataFrame): The DataFrame for possible vs used observations (if plot_pvu is True).
364
+ stat (str): The statistic to plot.
365
+ type (str): The type of observation.
366
+ level (float or None): The vertical level (if applicable).
367
+ tick_interval (int): Interval for x-axis ticks (default is 2).
368
+ time_format (str): Format string for time labels on the x-axis.
369
+ plot_pvu (bool): Whether to plot possible vs used observations (default is True).
370
+
371
+ Returns:
372
+ fig: The matplotlib figure object.
373
+ """
374
+ fig, ax1 = plt.subplots()
375
+
376
+ # Plot prior and posterior statistics
377
+ if f"prior_{stat}" in df.columns:
378
+ ax1.plot(df["time_bin_midpoint"], df[f"prior_{stat}"], label=f"prior {stat}")
379
+ if f"posterior_{stat}" in df.columns:
380
+ ax1.plot(
381
+ df["time_bin_midpoint"], df[f"posterior_{stat}"], label=f"posterior {stat}"
382
+ )
383
+
384
+ # Set x-axis ticks every 'tick_interval' values
385
+ tick_positions = df["time_bin_midpoint"][::tick_interval]
386
+ ax1.set_xticks(tick_positions)
387
+ ax1.set_xticklabels(
388
+ tick_positions.dt.strftime(time_format), rotation=45, ha="right"
389
+ )
390
+
391
+ # Add a secondary y-axis for possible vs used observations if enabled
392
+ if plot_pvu and df_pvu is not None:
393
+ ax2 = ax1.twinx()
394
+ ax2.set_ylabel("# obs (o=possible; +=assimilated)", color="red")
395
+ ax2.tick_params(axis="y", colors="red")
396
+
397
+ # Plot possible and used observations
398
+ ax2.plot(
399
+ df_pvu["time_bin_midpoint"],
400
+ df_pvu["possible"],
401
+ color="red",
402
+ marker="o",
403
+ linestyle="",
404
+ markerfacecolor="none",
405
+ )
406
+ ax2.plot(
407
+ df_pvu["time_bin_midpoint"],
408
+ df_pvu["used"],
409
+ color="red",
410
+ marker="+",
411
+ linestyle="",
412
+ )
413
+ ax2.set_ylim(bottom=0)
414
+
415
+ ax1.legend(loc="upper right")
416
+ title = f"{type}" if level is None else f"{type} at level {level}"
417
+ ax1.set_title(title)
418
+ ax1.set_xlabel("Time")
419
+ ax1.set_ylabel(stat)
420
+
421
+ plt.tight_layout()
422
+
423
+ return fig
@@ -0,0 +1,35 @@
1
+ acars_horizontal_wind:
2
+ description: ACARS-derived Horizontal wind speed
3
+ components:
4
+ - acars_u_wind_component
5
+ - acars_v_wind_component
6
+
7
+ sat_horizontal_wind:
8
+ description: Satellite-derived horizontal wind speed
9
+ components:
10
+ - sat_u_wind_component
11
+ - sat_v_wind_component
12
+
13
+ radiosonde_horizontal_wind:
14
+ description: Radiosonde-derived horizontal wind speed
15
+ components:
16
+ - radiosonde_u_wind_component
17
+ - radiosonde_v_wind_component
18
+
19
+ aircraft_horizontal_wind:
20
+ description: Aircraft-derived horizontal wind speed
21
+ components:
22
+ - aircraft_u_wind_component
23
+ - aircraft_v_wind_component
24
+
25
+ 10_m_horizontal_wind:
26
+ description: 10 meter horizontal wind speed
27
+ components:
28
+ - 10m_u_wind_component
29
+ - 10m_v_wind_component
30
+
31
+ marine_sfc_horizontal_wind:
32
+ description: Marine surface horizontal wind speed
33
+ components:
34
+ - marine_sfc_u_wind_component
35
+ - marine_sfc_v_wind_component