pydartdiags 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

@@ -3,6 +3,24 @@ import datetime as dt
3
3
  import numpy as np
4
4
  import os
5
5
  import yaml
6
+ import struct
7
+
8
+ def requires_assimilation_info(func):
9
+ def wrapper(self, *args, **kwargs):
10
+ if self.has_assimilation_info:
11
+ return func(self, *args, **kwargs)
12
+ else:
13
+ raise ValueError("Assimilation information is required to call this function.")
14
+ return wrapper
15
+
16
+ def requires_posterior_info(func):
17
+ def wrapper(self, *args, **kwargs):
18
+ if self.has_posterior_info:
19
+ return func(self, *args, **kwargs)
20
+ else:
21
+ raise ValueError("Posterior information is required to call this function.")
22
+ return wrapper
23
+
6
24
 
7
25
  class obs_sequence:
8
26
  """Create an obs_sequence object from an ascii observation sequence file.
@@ -55,22 +73,53 @@ class obs_sequence:
55
73
 
56
74
  reversed_vert = {value: key for key, value in vert.items()}
57
75
 
58
- # synonyms for observation
59
- synonyms_for_obs = ['NCEP BUFR observation',
60
- 'AIRS observation',
61
- 'GTSPP observation',
62
- 'SST observation',
63
- 'observations',
64
- 'WOD observation']
65
76
 
66
- def __init__(self, file):
77
+ def __init__(self, file, synonyms=None):
67
78
  self.loc_mod = 'None'
79
+ self.has_assimilation_info = False
80
+ self.has_posterior = False
68
81
  self.file = file
69
- self.header = self.read_header(file)
82
+ self.synonyms_for_obs = ['NCEP BUFR observation',
83
+ 'AIRS observation',
84
+ 'GTSPP observation',
85
+ 'SST observation',
86
+ 'observations',
87
+ 'WOD observation']
88
+ if synonyms:
89
+ if isinstance(synonyms, list):
90
+ self.synonyms_for_obs.extend(synonyms)
91
+ else:
92
+ self.synonyms_for_obs.append(synonyms)
93
+
94
+ if file is None:
95
+ # Early exit for testing purposes
96
+ self.df = pd.DataFrame()
97
+ self.types = {}
98
+ self.reverse_types = {}
99
+ self.copie_names = []
100
+ self.n_copies = 0
101
+ self.seq = []
102
+ self.all_obs = []
103
+ return
104
+
105
+ module_dir = os.path.dirname(__file__)
106
+ self.default_composite_types = os.path.join(module_dir,"composite_types.yaml")
107
+
108
+ if self.is_binary(file):
109
+ self.header = self.read_binary_header(file)
110
+ else:
111
+ self.header = self.read_header(file)
112
+
70
113
  self.types = self.collect_obs_types(self.header)
71
114
  self.reverse_types = {v: k for k, v in self.types.items()}
72
115
  self.copie_names, self.n_copies = self.collect_copie_names(self.header)
73
- self.seq = self.obs_reader(file, self.n_copies)
116
+
117
+ if self.is_binary(file):
118
+ self.seq = self.obs_binary_reader(file, self.n_copies)
119
+ self.loc_mod = 'loc3d' # only loc3d supported for binary, & no way to check
120
+ else:
121
+ self.seq = self.obs_reader(file, self.n_copies)
122
+
74
123
  self.all_obs = self.create_all_obs() # uses up the generator
75
124
  # at this point you know if the seq is loc3d or loc1d
76
125
  if self.loc_mod == 'None':
@@ -84,12 +133,16 @@ class obs_sequence:
84
133
  self.synonyms_for_obs = [synonym.replace(' ', '_') for synonym in self.synonyms_for_obs]
85
134
  rename_dict = {old: 'observation' for old in self.synonyms_for_obs if old in self.df.columns}
86
135
  self.df = self.df.rename(columns=rename_dict)
136
+
87
137
  # calculate bias and sq_err is the obs_seq is an obs_seq.final
88
138
  if 'prior_ensemble_mean'.casefold() in map(str.casefold, self.columns):
89
- self.df['bias'] = (self.df['prior_ensemble_mean'] - self.df['observation'])
90
- self.df['sq_err'] = self.df['bias']**2 # squared error
91
- module_dir = os.path.dirname(__file__)
92
- self.default_composite_types = os.path.join(module_dir,"composite_types.yaml")
139
+ self.has_assimilation_info = True
140
+ self.df['prior_bias'] = (self.df['prior_ensemble_mean'] - self.df['observation'])
141
+ self.df['prior_sq_err'] = self.df['prior_bias']**2 # squared error
142
+ if 'posterior_ensemble_mean'.casefold() in map(str.casefold, self.columns):
143
+ self.has_posterior_info = True
144
+ self.df['posterior_bias'] = (self.df['posterior_ensemble_mean'] - self.df['observation'])
145
+ self.df['posterior_sq_err'] = self.df['posterior_bias']**2
93
146
 
94
147
  def create_all_obs(self):
95
148
  """ steps through the generator to create a
@@ -128,16 +181,44 @@ class obs_sequence:
128
181
  raise ValueError("Neither 'loc3d' nor 'loc1d' could be found in the observation sequence.")
129
182
  typeI = obs.index('kind') # type of observation
130
183
  type_value = obs[typeI + 1]
131
- data.append(self.types[type_value]) # observation type
184
+ if not self.types:
185
+ data.append('Identity')
186
+ else:
187
+ data.append(self.types[type_value]) # observation type
188
+
132
189
  # any observation specific obs def info is between here and the end of the list
190
+ # can be obs_def & external forward operator
191
+ metadata = obs[typeI+2:-2]
192
+ obs_def_metadata, external_metadata = self.split_metadata(metadata)
193
+ data.append(obs_def_metadata)
194
+ data.append(external_metadata)
195
+
133
196
  time = obs[-2].split()
134
197
  data.append(int(time[0])) # seconds
135
198
  data.append(int(time[1])) # days
136
199
  data.append(convert_dart_time(int(time[0]), int(time[1]))) # datetime # HK todo what is approprate for 1d models?
137
200
  data.append(float(obs[-1])) # obs error variance ?convert to sd?
138
-
201
+
139
202
  return data
140
203
 
204
+ @staticmethod
205
+ def split_metadata(metadata):
206
+ """
207
+ Split the metadata list at the first occurrence of an element starting with 'externalF0'.
208
+
209
+ Args:
210
+ metadata (list of str): The metadata list to be split.
211
+
212
+ Returns:
213
+ tuple: Two sublists, the first containing elements before 'externalF0', and the second
214
+ containing 'externalF0' and all elements after it. If 'externalF0' is not found,
215
+ the first sublist contains the entire metadata list, and the second is empty.
216
+ """
217
+ for i, item in enumerate(metadata):
218
+ if item.startswith('external_FO'):
219
+ return metadata[:i], metadata[i:]
220
+ return metadata, []
221
+
141
222
  def list_to_obs(self, data):
142
223
  obs = []
143
224
  obs.append('OBS ' + str(data[0])) # obs_num lots of space
@@ -149,10 +230,16 @@ class obs_sequence:
149
230
  obs.append(' '.join(map(str, data[self.n_copies+2:self.n_copies+5])) + ' ' + str(self.reversed_vert[data[self.n_copies+5]]) ) # location x, y, z, vert
150
231
  obs.append('kind') # this is type of observation
151
232
  obs.append(self.reverse_types[data[self.n_copies + 6]]) # observation type
233
+ # Convert metadata to a string and append
234
+ obs.extend(data[self.n_copies + 7]) # metadata
152
235
  elif self.loc_mod == 'loc1d':
153
236
  obs.append(data[self.n_copies+2]) # 1d location
154
237
  obs.append('kind') # this is type of observation
155
238
  obs.append(self.reverse_types[data[self.n_copies + 3]]) # observation type
239
+ # Convert metadata to a string and append
240
+ metadata = ' '.join(map(str, data[self.n_copies + 4:-4]))
241
+ if metadata:
242
+ obs.append(metadata) # metadata
156
243
  obs.append(' '.join(map(str, data[-4:-2]))) # seconds, days
157
244
  obs.append(data[-1]) # obs error variance
158
245
 
@@ -251,15 +338,83 @@ class obs_sequence:
251
338
  elif self.loc_mod == 'loc1d':
252
339
  heading.append('location')
253
340
  heading.append('type')
341
+ heading.append('metadata')
342
+ heading.append('external_FO')
254
343
  heading.append('seconds')
255
344
  heading.append('days')
256
345
  heading.append('time')
257
346
  heading.append('obs_err_var')
258
347
  return heading
259
348
 
349
+ @requires_assimilation_info
350
+ def select_by_dart_qc(self, dart_qc):
351
+ """
352
+ Selects rows from a DataFrame based on the DART quality control flag.
353
+
354
+ Parameters:
355
+ df (DataFrame): A pandas DataFrame.
356
+ dart_qc (int): The DART quality control flag to select.
357
+
358
+ Returns:
359
+ DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
360
+
361
+ Raises:
362
+ ValueError: If the DART quality control flag is not present in the DataFrame.
363
+ """
364
+ if dart_qc not in self.df['DART_quality_control'].unique():
365
+ raise ValueError(f"DART quality control flag '{dart_qc}' not found in DataFrame.")
366
+ else:
367
+ return self.df[self.df['DART_quality_control'] == dart_qc]
368
+
369
+ @requires_assimilation_info
370
+ def select_failed_qcs(self):
371
+ """
372
+ Select rows from the DataFrame where the DART quality control flag is greater than 0.
373
+
374
+ Returns:
375
+ pandas.DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
376
+ """
377
+ return self.df[self.df['DART_quality_control'] > 0]
378
+
379
+ @requires_assimilation_info
380
+ def possible_vs_used(self):
381
+ """
382
+ Calculates the count of possible vs. used observations by type.
383
+
384
+ This function takes a DataFrame containing observation data, including a 'type' column for the observation
385
+ type and an 'observation' column. The number of used observations ('used'), is the total number
386
+ minus the observations that failed quality control checks (as determined by the `select_failed_qcs` function).
387
+ The result is a DataFrame with each observation type, the count of possible observations, and the count of
388
+ used observations.
389
+
390
+ Returns:
391
+ pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
392
+ 'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
393
+ that passed quality control checks.
394
+ """
395
+ possible = self.df.groupby('type')['observation'].count()
396
+ possible.rename('possible', inplace=True)
397
+
398
+ failed_qcs = self.select_failed_qcs().groupby('type')['observation'].count()
399
+ used = possible - failed_qcs.reindex(possible.index, fill_value=0)
400
+ used.rename('used', inplace=True)
401
+
402
+ return pd.concat([possible, used], axis=1).reset_index()
403
+
404
+
405
+ @staticmethod
406
+ def is_binary(file):
407
+ """Check if a file is binary file."""
408
+ with open(file, 'rb') as f:
409
+ chunk = f.read(1024)
410
+ if b'\0' in chunk:
411
+ return True
412
+ return False
413
+
414
+
260
415
  @staticmethod
261
416
  def read_header(file):
262
- """Read the header and number of lines in the header of an obs_seq file"""
417
+ """Read the header and number of lines in the header of an ascii obs_seq file"""
263
418
  header = []
264
419
  with open(file, 'r') as f:
265
420
  for line in f:
@@ -270,6 +425,118 @@ class obs_sequence:
270
425
  header.append(line.strip())
271
426
  return header
272
427
 
428
+ @staticmethod
429
+ def read_binary_header(file):
430
+ """Read the header and number of lines in the header of a binary obs_seq file from Fortran output"""
431
+ header = []
432
+ linecount = 0
433
+ obs_types_definitions = -1000
434
+ num_obs = 0
435
+ max_num_obs = 0
436
+ # need to get:
437
+ # number of obs_type_definitions
438
+ # number of copies
439
+ # number of qcs
440
+ with open(file, 'rb') as f:
441
+ while True:
442
+ # Read the record length
443
+ record_length = obs_sequence.read_record_length(f)
444
+ if record_length is None:
445
+ break
446
+ record = f.read(record_length)
447
+ if not record: # end of file
448
+ break
449
+
450
+ # Read the trailing record length (should match the leading one)
451
+ obs_sequence.check_trailing_record_length(f, record_length)
452
+
453
+ linecount += 1
454
+
455
+ if linecount == 3:
456
+ obs_types_definitions = struct.unpack('i', record)[0]
457
+ continue
458
+
459
+ if linecount == 4+obs_types_definitions:
460
+ num_copies, num_qcs, num_obs, max_num_obs = struct.unpack('iiii', record)[:16]
461
+ break
462
+
463
+ # Go back to the beginning of the file
464
+ f.seek(0)
465
+
466
+ for _ in range(2):
467
+ record_length = obs_sequence.read_record_length(f)
468
+ if record_length is None:
469
+ break
470
+
471
+ record = f.read(record_length)
472
+ if not record: # end of file
473
+ break
474
+
475
+ obs_sequence.check_trailing_record_length(f, record_length)
476
+ header.append(record.decode('utf-8').strip())
477
+
478
+ header.append(str(obs_types_definitions))
479
+
480
+ # obs_types_definitions
481
+ for _ in range(3,4+obs_types_definitions):
482
+ # Read the record length
483
+ record_length = obs_sequence.read_record_length(f)
484
+ if record_length is None:
485
+ break
486
+
487
+ # Read the actual record
488
+ record = f.read(record_length)
489
+ if not record: # end of file
490
+ break
491
+
492
+ obs_sequence.check_trailing_record_length(f, record_length)
493
+
494
+ if _ == 3:
495
+ continue # num obs_types_definitions
496
+ # Read an integer and a string from the record
497
+ integer_value = struct.unpack('i', record[:4])[0]
498
+ string_value = record[4:].decode('utf-8').strip()
499
+ header.append(f"{integer_value} {string_value}")
500
+
501
+ header.append(f"num_copies: {num_copies} num_qc: {num_qcs}")
502
+ header.append(f"num_obs: {num_obs} max_num_obs: {max_num_obs}")
503
+
504
+ #copie names
505
+ for _ in range(5+obs_types_definitions, 5+obs_types_definitions+num_copies+num_qcs+1):
506
+ # Read the record length
507
+ record_length = obs_sequence.read_record_length(f)
508
+ if record_length is None:
509
+ break
510
+
511
+ # Read the actual record
512
+ record = f.read(record_length)
513
+ if not record:
514
+ break
515
+
516
+ obs_sequence.check_trailing_record_length(f, record_length)
517
+
518
+ if _ == 5+obs_types_definitions:
519
+ continue
520
+
521
+ # Read the whole record as a string
522
+ string_value = record.decode('utf-8').strip()
523
+ header.append(string_value)
524
+
525
+ # first and last obs
526
+ # Read the record length
527
+ record_length = obs_sequence.read_record_length(f)
528
+
529
+ # Read the actual record
530
+ record = f.read(record_length)
531
+
532
+ obs_sequence.check_trailing_record_length(f, record_length)
533
+
534
+ # Read the whole record as a two integers
535
+ first, last = struct.unpack('ii', record)[:8]
536
+ header.append(f"first: {first} last: {last}")
537
+
538
+ return header
539
+
273
540
  @staticmethod
274
541
  def collect_obs_types(header):
275
542
  """Create a dictionary for the observation types in the obs_seq header"""
@@ -299,7 +566,7 @@ class obs_sequence:
299
566
 
300
567
  @staticmethod
301
568
  def obs_reader(file, n):
302
- """Reads the obs sequence file and returns a generator of the obs"""
569
+ """Reads the ascii obs sequence file and returns a generator of the obs"""
303
570
  previous_line = ''
304
571
  with open(file, 'r') as f:
305
572
  for line in f:
@@ -339,6 +606,115 @@ class obs_sequence:
339
606
  previous_line = next_line
340
607
  yield obs
341
608
 
609
+ @staticmethod
610
+ def check_trailing_record_length(file, expected_length):
611
+ """Reads and checks the trailing record length from the binary file written by Fortran.
612
+
613
+ Parameters:
614
+ file (file): The file object.
615
+ expected_length (int): The expected length of the trailing record.
616
+
617
+ Assuming 4 bytes:
618
+ | Record Length (4 bytes) | Data (N bytes) | Trailing Record Length (4 bytes) |
619
+ """
620
+ trailing_record_length_bytes = file.read(4)
621
+ trailing_record_length = struct.unpack('i', trailing_record_length_bytes)[0]
622
+ if expected_length != trailing_record_length:
623
+ raise ValueError("Record length mismatch in Fortran binary file")
624
+
625
+ @staticmethod
626
+ def read_record_length(file):
627
+ """Reads and unpacks the record length from the file."""
628
+ record_length_bytes = file.read(4)
629
+ if not record_length_bytes:
630
+ return None # End of file
631
+ return struct.unpack('i', record_length_bytes)[0]
632
+
633
+
634
+ def obs_binary_reader(self, file, n):
635
+ """Reads the obs sequence binary file and returns a generator of the obs"""
636
+ header_length = len(self.header)
637
+ with open(file, 'rb') as f:
638
+ # Skip the first len(obs_seq.header) lines
639
+ for _ in range(header_length-1):
640
+ # Read the record length
641
+ record_length = obs_sequence.read_record_length(f)
642
+ if record_length is None: # End of file
643
+ break
644
+
645
+ # Skip the actual record
646
+ f.seek(record_length, 1)
647
+
648
+ # Skip the trailing record length
649
+ f.seek(4, 1)
650
+
651
+ obs_num = 0
652
+ while True:
653
+ obs = []
654
+ obs_num += 1
655
+ obs.append(f"OBS {obs_num}")
656
+ for _ in range(n): # number of copies
657
+ # Read the record length
658
+ record_length = obs_sequence.read_record_length(f)
659
+ if record_length is None:
660
+ break
661
+ # Read the actual record (copie)
662
+ record = f.read(record_length)
663
+ obs.append(struct.unpack('d', record)[0])
664
+
665
+ # Read the trailing record length (should match the leading one)
666
+ obs_sequence.check_trailing_record_length(f, record_length)
667
+
668
+ # linked list info
669
+ record_length = obs_sequence.read_record_length(f)
670
+ if record_length is None:
671
+ break
672
+
673
+ record = f.read(record_length)
674
+ int1, int2, int3 = struct.unpack('iii', record[:12])
675
+ linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
676
+ obs.append(linked_list_string)
677
+
678
+ obs_sequence.check_trailing_record_length(f, record_length)
679
+
680
+ # location (note no location header "loc3d" or "loc1d" for binary files)
681
+ obs.append('loc3d')
682
+ record_length = obs_sequence.read_record_length(f)
683
+ record = f.read(record_length)
684
+ x,y,z,vert = struct.unpack('dddi', record[:28])
685
+ location_string = f"{x} {y} {z} {vert}"
686
+ obs.append(location_string)
687
+
688
+ obs_sequence.check_trailing_record_length(f, record_length)
689
+
690
+ # kind (type of observation) value
691
+ obs.append('kind')
692
+ record_length_bytes = f.read(4)
693
+ record_length = struct.unpack('i', record_length_bytes)[0]
694
+ record = f.read(record_length)
695
+ kind = f"{struct.unpack('i', record)[0]}"
696
+ obs.append(kind)
697
+
698
+ obs_sequence.check_trailing_record_length(f, record_length)
699
+
700
+ # time (seconds, days)
701
+ record_length = obs_sequence.read_record_length(f)
702
+ record = f.read(record_length)
703
+ seconds, days = struct.unpack('ii', record)[:8]
704
+ time_string = f"{seconds} {days}"
705
+ obs.append(time_string)
706
+
707
+ obs_sequence.check_trailing_record_length(f, record_length)
708
+
709
+ # obs error variance
710
+ record_length = obs_sequence.read_record_length(f)
711
+ record = f.read(record_length)
712
+ obs.append(struct.unpack('d', record)[0])
713
+
714
+ obs_sequence.check_trailing_record_length(f, record_length)
715
+
716
+ yield obs
717
+
342
718
  def composite_types(self, composite_types='use_default'):
343
719
  """
344
720
  Set up and construct composite types for the DataFrame.
@@ -406,65 +782,6 @@ def convert_dart_time(seconds, days):
406
782
  """
407
783
  time = dt.datetime(1601,1,1) + dt.timedelta(days=days, seconds=seconds)
408
784
  return time
409
-
410
- def select_by_dart_qc(df, dart_qc):
411
- """
412
- Selects rows from a DataFrame based on the DART quality control flag.
413
-
414
- Parameters:
415
- df (DataFrame): A pandas DataFrame.
416
- dart_qc (int): The DART quality control flag to select.
417
-
418
- Returns:
419
- DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
420
-
421
- Raises:
422
- ValueError: If the DART quality control flag is not present in the DataFrame.
423
- """
424
- if dart_qc not in df['DART_quality_control'].unique():
425
- raise ValueError(f"DART quality control flag '{dart_qc}' not found in DataFrame.")
426
- else:
427
- return df[df['DART_quality_control'] == dart_qc]
428
-
429
- def select_failed_qcs(df):
430
- """
431
- Selects rows from a DataFrame where the DART quality control flag is greater than 0.
432
-
433
- Parameters:
434
- df (DataFrame): A pandas DataFrame.
435
-
436
- Returns:
437
- DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
438
- """
439
- return df[df['DART_quality_control'] > 0]
440
-
441
- def possible_vs_used(df):
442
- """
443
- Calculates the count of possible vs. used observations by type.
444
-
445
- This function takes a DataFrame containing observation data, including a 'type' column for the observation
446
- type and an 'observation' column. The number of used observations ('used'), is the total number
447
- minus the observations that failed quality control checks (as determined by the `select_failed_qcs` function).
448
- The result is a DataFrame with each observation type, the count of possible observations, and the count of
449
- used observations.
450
-
451
- Parameters:
452
- df (pd.DataFrame): A DataFrame with at least two columns: 'type' for the observation type and 'observation'
453
- for the observation data. It may also contain other columns required by the `select_failed_qcs` function
454
- to determine failed quality control checks.
455
-
456
- Returns:
457
- pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
458
- 'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
459
- that passed quality control checks.
460
-
461
- """
462
- possible = df.groupby('type')['observation'].count()
463
- possible.rename('possible', inplace=True)
464
- used = df.groupby('type')['observation'].count() - select_failed_qcs(df).groupby('type')['observation'].count()
465
- used.rename('used', inplace=True)
466
- return pd.concat([possible, used], axis=1).reset_index()
467
-
468
785
 
469
786
  def construct_composit(df_comp, composite, components):
470
787
  """
@@ -504,3 +821,5 @@ def construct_composit(df_comp, composite, components):
504
821
  merged_df = merged_df.drop(columns=[col for col in merged_df.columns if col.endswith('_v')])
505
822
 
506
823
  return merged_df
824
+
825
+
@@ -1,6 +1,7 @@
1
1
 
2
2
  import numpy as np
3
3
  import plotly.express as px
4
+ import plotly.graph_objects as go
4
5
  import pandas as pd
5
6
 
6
7
  def plot_rank_histogram(df):
@@ -62,55 +63,208 @@ def calculate_rank(df):
62
63
 
63
64
  return (rank, ens_size, result_df)
64
65
 
65
- def plot_profile(df, levels):
66
+ def plot_profile(df, levels, verticalUnit = "pressure (Pa)"):
66
67
  """
67
- Plots RMSE and Bias profiles for different observation types across specified pressure levels.
68
+ Plots RMSE, bias, and total spread profiles for different observation types across specified vertical levels.
68
69
 
69
70
  This function takes a DataFrame containing observational data and model predictions, categorizes
70
- the data into specified pressure levels, and calculates the RMSE and Bias for each level and
71
- observation type. It then plots two line charts: one for RMSE and another for Bias, both as functions
72
- of pressure level. The pressure levels are plotted on the y-axis in reversed order to represent
73
- the vertical profile in the atmosphere correctly.
71
+ the data into specified vertical levels, and calculates the RMSE, bias and total spread for each level and
72
+ observation type. It then plots three line charts: one for RMSE, one for bias, one for total spread, as functions
73
+ of vertical level. The vertical levels are plotted on the y-axis in reversed order to represent
74
+ the vertical profile in the atmosphere correctly if the vertical units are pressure.
74
75
 
75
76
  Parameters:
76
- df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for pressure levels,
77
- and other columns required by the `rmse_bias` function for calculating RMSE and Bias.
78
- levels (array-like): The bin edges for categorizing the 'vertical' column values into pressure levels.
77
+ df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for vertical levels,
78
+ the vert_unit column, and other columns required by the `rmse_bias` function for calculating RMSE and
79
+ Bias.
80
+ levels (array-like): The bin edges for categorizing the 'vertical' column values into the desired
81
+ vertical levels.
82
+ verticalUnit (string) (optional): The vertical unit to be used. Only observations in df which have this
83
+ string in the vert_unit column will be plotted. Defaults to 'pressure (Pa)'.
79
84
 
80
85
  Returns:
81
- tuple: A tuple containing the DataFrame with RMSE and Bias calculations, the RMSE plot figure, and the
82
- Bias plot figure. The DataFrame includes a 'plevels' column representing the categorized pressure levels
83
- and 'hPa' column representing the midpoint of each pressure level bin.
86
+ tuple: A tuple containing the DataFrame with RMSE, bias and total spread calculations,
87
+ The DataFrame includes a 'vlevels' column representing the categorized vertical levels
88
+ and 'midpoint' column representing the midpoint of each vertical level bin. And the three figures.
84
89
 
85
90
  Raises:
86
91
  ValueError: If there are missing values in the 'vertical' column of the input DataFrame.
92
+ ValueError: If none of the input obs have 'verticalUnit' in the 'vert_unit' column of the input DataFrame.
87
93
 
88
94
  Note:
89
- - The function modifies the input DataFrame by adding 'plevels' and 'hPa' columns.
90
- - The 'hPa' values are calculated as half the midpoint of each pressure level bin, which may need
91
- adjustment based on the specific requirements for pressure level representation.
95
+ - The function modifies the input DataFrame by adding 'vlevels' and 'midpoint' columns.
96
+ - The 'midpoint' values are calculated as half the midpoint of each vertical level bin, which may need
97
+ adjustment based on the specific requirements for vertical level representation.
92
98
  - The plots are generated using Plotly Express and are displayed inline. The y-axis of the plots is
93
- reversed to align with standard atmospheric pressure level representation.
99
+ reversed to align with standard atmospheric pressure level representation if the vertical units
100
+ are atmospheric pressure.
94
101
  """
95
102
 
96
103
  pd.options.mode.copy_on_write = True
97
104
  if df['vertical'].isnull().values.any(): # what about horizontal observations?
98
105
  raise ValueError("Missing values in 'vertical' column.")
106
+ elif verticalUnit not in df['vert_unit'].values:
107
+ raise ValueError("No obs with expected vertical unit '"+verticalUnit+"'.")
99
108
  else:
100
- df.loc[:,'plevels'] = pd.cut(df['vertical'], levels)
101
- df.loc[:,'hPa'] = df['plevels'].apply(lambda x: x.mid / 1000.) # HK todo units
109
+ df = df[df["vert_unit"].isin({verticalUnit})] # Subset to only rows with the correct vertical unit
110
+ df.loc[:,'vlevels'] = pd.cut(df['vertical'], levels)
111
+ if verticalUnit == "pressure (Pa)":
112
+ df.loc[:,'midpoint'] = df['vlevels'].apply(lambda x: x.mid / 100.) # HK todo units
113
+ else:
114
+ df.loc[:,'midpoint'] = df['vlevels'].apply(lambda x: x.mid)
102
115
 
103
- df_profile = rmse_bias(df)
104
- fig_rmse = px.line(df_profile, y='hPa', x='rmse', title='RMSE by Level', markers=True, color='type', width=800, height=800)
105
- fig_rmse.update_yaxes(autorange="reversed")
106
- fig_rmse.show()
116
+ # Calculations
117
+ df_profile_prior = rmse_bias_totalspread(df, phase='prior')
118
+ df_profile_posterior = None
119
+ if 'posterior_ensemble_mean' in df.columns:
120
+ df_profile_posterior = rmse_bias_totalspread(df, phase='posterior')
107
121
 
108
- fig_bias = px.line(df_profile, y='hPa', x='bias', title='Bias by Level', markers=True, color='type', width=800, height=800)
109
- fig_bias.update_yaxes(autorange="reversed")
110
- fig_bias.show()
122
+ # Merge prior and posterior dataframes
123
+ if df_profile_posterior is not None:
124
+ df_profile = pd.merge(df_profile_prior, df_profile_posterior, on=['midpoint', 'type'], suffixes=('_prior', '_posterior'))
125
+ fig_rmse = plot_profile_prior_post(df_profile, 'rmse', verticalUnit)
126
+ fig_rmse.show()
127
+ fig_bias = plot_profile_prior_post(df_profile, 'bias', verticalUnit)
128
+ fig_bias.show()
129
+ fig_ts = plot_profile_prior_post(df_profile, 'totalspread', verticalUnit)
130
+ fig_ts.show()
131
+ else:
132
+ df_profile = df_profile_prior
133
+ fig_rmse = plot_profile_prior(df_profile, 'rmse', verticalUnit)
134
+ fig_rmse.show()
135
+ fig_bias = plot_profile_prior(df_profile, 'bias', verticalUnit)
136
+ fig_bias.show()
137
+ fig_ts = plot_profile_prior(df_profile, 'totalspread', verticalUnit)
138
+ fig_ts.show()
111
139
 
112
- return df_profile, fig_rmse, fig_bias
140
+ return df_profile, fig_rmse, fig_ts, fig_bias
113
141
 
142
+ def plot_profile_prior_post(df_profile, stat, verticalUnit):
143
+ """
144
+ Plots prior and posterior statistics by vertical level for different observation types.
145
+
146
+ Parameters:
147
+ df_profile (pd.DataFrame): DataFrame containing the prior and posterior statistics.
148
+ stat (str): The statistic to plot (e.g., 'rmse', 'bias', 'totalspread').
149
+ verticalUnit (str): The unit of the vertical axis (e.g., 'pressure (Pa)').
150
+
151
+ Returns:
152
+ plotly.graph_objects.Figure: The generated Plotly figure.
153
+ """
154
+ # Reshape DataFrame to long format for easier plotting
155
+ df_long = pd.melt(
156
+ df_profile,
157
+ id_vars=["midpoint", "type"],
158
+ value_vars=["prior_"+stat, "posterior_"+stat],
159
+ var_name=stat+"_type",
160
+ value_name=stat+"_value"
161
+ )
162
+
163
+ # Define a color mapping for observation each type
164
+ unique_types = df_long["type"].unique()
165
+ colors = px.colors.qualitative.Plotly
166
+ color_mapping = {type_: colors[i % len(colors)] for i, type_ in enumerate(unique_types)}
167
+
168
+ # Create a mapping for line styles based on stat
169
+ line_styles = {"prior_"+stat: "solid", "posterior_"+stat: "dash"}
170
+
171
+ # Create the figure
172
+ fig_stat = go.Figure()
173
+
174
+ # Loop through each type and type to add traces
175
+ for t in df_long["type"].unique():
176
+ for stat_type, dash_style in line_styles.items():
177
+ # Filter the DataFrame for this type and stat
178
+ df_filtered = df_long[(df_long[stat+"_type"] == stat_type) & (df_long["type"] == t)]
179
+
180
+ # Add a trace
181
+ fig_stat.add_trace(go.Scatter(
182
+ x=df_filtered[stat+"_value"],
183
+ y=df_filtered["midpoint"],
184
+ mode='lines+markers',
185
+ name='prior '+t if stat_type == "prior_"+stat else 'post ', # Show legend for "prior_stat OBS TYPE" only
186
+ line=dict(dash=dash_style, color=color_mapping[t]), # Same color for all traces in group
187
+ marker=dict(size=8, color=color_mapping[t]),
188
+ legendgroup=t # Group traces by type
189
+ ))
190
+
191
+ # Update layout
192
+ fig_stat.update_layout(
193
+ title= stat+' by Level',
194
+ xaxis_title=stat,
195
+ yaxis_title=verticalUnit,
196
+ width=800,
197
+ height=800,
198
+ template="plotly_white"
199
+ )
200
+
201
+ if verticalUnit == "pressure (Pa)":
202
+ fig_stat.update_yaxes(autorange="reversed")
203
+
204
+ return fig_stat
205
+
206
+
207
+ def plot_profile_prior(df_profile, stat, verticalUnit):
208
+ """
209
+ Plots prior statistics by vertical level for different observation types.
210
+
211
+ Parameters:
212
+ df_profile (pd.DataFrame): DataFrame containing the prior statistics.
213
+ stat (str): The statistic to plot (e.g., 'rmse', 'bias', 'totalspread').
214
+ verticalUnit (str): The unit of the vertical axis (e.g., 'pressure (Pa)').
215
+
216
+ Returns:
217
+ plotly.graph_objects.Figure: The generated Plotly figure.
218
+ """
219
+ # Reshape DataFrame to long format for easier plotting - not needed for prior only, but
220
+ # leaving it in for consistency with the plot_profile_prior_post function for now
221
+ df_long = pd.melt(
222
+ df_profile,
223
+ id_vars=["midpoint", "type"],
224
+ value_vars=["prior_"+stat],
225
+ var_name=stat+"_type",
226
+ value_name=stat+"_value"
227
+ )
228
+
229
+ # Define a color mapping for observation each type
230
+ unique_types = df_long["type"].unique()
231
+ colors = px.colors.qualitative.Plotly
232
+ color_mapping = {type_: colors[i % len(colors)] for i, type_ in enumerate(unique_types)}
233
+
234
+ # Create the figure
235
+ fig_stat = go.Figure()
236
+
237
+ # Loop through each type to add traces
238
+ for t in df_long["type"].unique():
239
+ # Filter the DataFrame for this type and stat
240
+ df_filtered = df_long[(df_long["type"] == t)]
241
+
242
+ # Add a trace
243
+ fig_stat.add_trace(go.Scatter(
244
+ x=df_filtered[stat+"_value"],
245
+ y=df_filtered["midpoint"],
246
+ mode='lines+markers',
247
+ name='prior ' + t,
248
+ line=dict(color=color_mapping[t]), # Same color for all traces in group
249
+ marker=dict(size=8, color=color_mapping[t]),
250
+ legendgroup=t # Group traces by type
251
+ ))
252
+
253
+ # Update layout
254
+ fig_stat.update_layout(
255
+ title=stat + ' by Level',
256
+ xaxis_title=stat,
257
+ yaxis_title=verticalUnit,
258
+ width=800,
259
+ height=800,
260
+ template="plotly_white"
261
+ )
262
+
263
+ if verticalUnit == "pressure (Pa)":
264
+ fig_stat.update_yaxes(autorange="reversed")
265
+
266
+ return fig_stat
267
+
114
268
 
115
269
  def mean_then_sqrt(x):
116
270
  """
@@ -130,12 +284,36 @@ def mean_then_sqrt(x):
130
284
 
131
285
  return np.sqrt(np.mean(x))
132
286
 
133
- def rmse_bias(df):
134
- rmse_bias_df = df.groupby(['hPa', 'type']).agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
135
- rmse_bias_df.rename(columns={'sq_err':'rmse'}, inplace=True)
136
-
137
- return rmse_bias_df
287
+ def rmse_bias_totalspread(df, phase='prior'):
288
+ if phase == 'prior':
289
+ sq_err_column = 'prior_sq_err'
290
+ bias_column = 'prior_bias'
291
+ rmse_column = 'prior_rmse'
292
+ spread_column = 'prior_ensemble_spread'
293
+ totalspread_column = 'prior_totalspread'
294
+ elif phase == 'posterior':
295
+ sq_err_column = 'posterior_sq_err'
296
+ bias_column = 'posterior_bias'
297
+ rmse_column = 'posterior_rmse'
298
+ spread_column = 'posterior_ensemble_spread'
299
+ totalspread_column = 'posterior_totalspread'
300
+ else:
301
+ raise ValueError("Invalid phase. Must be 'prior' or 'posterior'.")
302
+
303
+ rmse_bias_ts_df = df.groupby(['midpoint', 'type'], observed=False).agg({
304
+ sq_err_column: mean_then_sqrt,
305
+ bias_column: 'mean',
306
+ spread_column: mean_then_sqrt,
307
+ 'obs_err_var': mean_then_sqrt
308
+ }).reset_index()
138
309
 
310
+ # Add column for totalspread
311
+ rmse_bias_ts_df[totalspread_column] = np.sqrt(rmse_bias_ts_df[spread_column] + rmse_bias_ts_df['obs_err_var'])
312
+
313
+ # Rename square error to root mean square error
314
+ rmse_bias_ts_df.rename(columns={sq_err_column: rmse_column}, inplace=True)
315
+
316
+ return rmse_bias_ts_df
139
317
 
140
318
  def rmse_bias_by_obs_type(df, obs_type):
141
319
  """
@@ -155,7 +333,7 @@ def rmse_bias_by_obs_type(df, obs_type):
155
333
  raise ValueError(f"Observation type '{obs_type}' not found in DataFrame.")
156
334
  else:
157
335
  obs_type_df = df[df['type'] == obs_type]
158
- obs_type_agg = obs_type_df.groupby('plevels').agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
336
+ obs_type_agg = obs_type_df.groupby('vlevels', observed=False).agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
159
337
  obs_type_agg.rename(columns={'sq_err':'rmse'}, inplace=True)
160
338
  return obs_type_agg
161
339
 
@@ -0,0 +1,45 @@
1
+ Metadata-Version: 2.1
2
+ Name: pydartdiags
3
+ Version: 0.0.43
4
+ Summary: Observation Sequence Diagnostics for DART
5
+ Home-page: https://github.com/NCAR/pyDARTdiags.git
6
+ Author: Helen Kershaw
7
+ Author-email: Helen Kershaw <hkershaw@ucar.edu>
8
+ Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
9
+ Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
10
+ Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pandas>=2.2.0
18
+ Requires-Dist: numpy>=1.26
19
+ Requires-Dist: plotly>=5.22.0
20
+ Requires-Dist: pyyaml>=6.0.2
21
+
22
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
23
+ [![codecov](https://codecov.io/gh/NCAR/pyDARTdiags/graph/badge.svg?token=VK55SQZSVD)](https://codecov.io/gh/NCAR/pyDARTdiags)
24
+ [![PyPI version](https://badge.fury.io/py/pydartdiags.svg)](https://pypi.org/project/pydartdiags/)
25
+
26
+
27
+ # pyDARTdiags
28
+
29
+ pyDARTdiags is a Python library for obsevation space diagnostics for the Data Assimilation Research Testbed ([DART](https://github.com/NCAR/DART)).
30
+
31
+ pyDARTdiags is under initial development, so please use caution.
32
+ The MATLAB [observation space diagnostics](https://docs.dart.ucar.edu/en/latest/guide/matlab-observation-space.html) are available through [DART](https://github.com/NCAR/DART).
33
+
34
+
35
+ pyDARTdiags can be installed through pip: https://pypi.org/project/pydartdiags/
36
+ Documenation : https://ncar.github.io/pyDARTdiags/
37
+
38
+ ## Contributing
39
+ Contributions are welcome! If you have a feature request, bug report, or a suggestion, please open an issue on our GitHub repository.
40
+ Please read our [Contributors Guide](https://github.com/NCAR/pyDARTdiags/blob/main/CONTRIBUTING.md) if you would like to contribute to
41
+ pyDARTdiags.
42
+
43
+ ## License
44
+
45
+ pyDARTdiags is released under the Apache License 2.0. For more details, see the LICENSE file in the root directory of this source tree or visit [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
@@ -0,0 +1,10 @@
1
+ pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pydartdiags/obs_sequence/obs_sequence.py,sha256=2pddiJ6VRFkaDizYq8HvGUpC4rw7TTV14XjmemjqCNg,34187
4
+ pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ pydartdiags/plots/plots.py,sha256=UecLgWauO9L_EaGhEVxW3IuKcSU95uRA2mptsxh4-0E,13901
6
+ pydartdiags-0.0.43.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
7
+ pydartdiags-0.0.43.dist-info/METADATA,sha256=udwmddMTrqFpyj0tjOffWVf2xbTI_3IwQCS4ZVvnnuU,2185
8
+ pydartdiags-0.0.43.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
9
+ pydartdiags-0.0.43.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
10
+ pydartdiags-0.0.43.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,399 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: pydartdiags
3
- Version: 0.0.41
4
- Summary: Observation Sequence Diagnostics for DART
5
- Home-page: https://github.com/NCAR/pyDARTdiags.git
6
- Author: Helen Kershaw
7
- Author-email: Helen Kershaw <hkershaw@ucar.edu>
8
- Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
9
- Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
10
- Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.8
15
- Description-Content-Type: text/markdown
16
- License-File: LICENSE
17
- Requires-Dist: pandas>=2.2.0
18
- Requires-Dist: numpy>=1.26
19
- Requires-Dist: plotly>=5.22.0
20
- Requires-Dist: pyyaml>=6.0.2
21
-
22
- # pyDARTdiags
23
-
24
- pyDARTdiags is a Python library for obsevation space diagnostics for the Data Assimilation Research Testbed ([DART](https://github.com/NCAR/DART)).
25
-
26
- pyDARTdiags is under initial development, so please use caution.
27
- The MATLAB [observation space diagnostics](https://docs.dart.ucar.edu/en/latest/guide/matlab-observation-space.html) are available through [DART](https://github.com/NCAR/DART).
28
-
29
-
30
- pyDARTdiags can be installed through pip: https://pypi.org/project/pydartdiags/
31
- Documenation : https://ncar.github.io/pyDARTdiags/
32
-
33
- We recommend installing pydartdiags in a virtual enviroment:
34
-
35
-
36
- ```
37
- python3 -m venv dartdiags
38
- source dartdiags/bin/activate
39
- pip install pydartdiags
40
- ```
41
-
42
- ## Example importing the obs\_sequence and plots modules
43
-
44
- ```python
45
- from pydartdiags.obs_sequence import obs_sequence as obsq
46
- from pydartdiags.plots import plots
47
- ```
48
-
49
- ## Examining the dataframe
50
-
51
- ```python
52
- obs_seq = obsq.obs_sequence('obs_seq.final.ascii')
53
- obs_seq.df.head()
54
- ```
55
-
56
- <table border="1" class="dataframe">
57
- <thead>
58
- <tr style="text-align: right;">
59
- <th></th>
60
- <th>obs_num</th>
61
- <th>observation</th>
62
- <th>prior_ensemble_mean</th>
63
- <th>prior_ensemble_spread</th>
64
- <th>prior_ensemble_member_1</th>
65
- <th>prior_ensemble_member_2</th>
66
- <th>prior_ensemble_member_3</th>
67
- <th>prior_ensemble_member_4</th>
68
- <th>prior_ensemble_member_5</th>
69
- <th>prior_ensemble_member_6</th>
70
- <th>...</th>
71
- <th>latitude</th>
72
- <th>vertical</th>
73
- <th>vert_unit</th>
74
- <th>type</th>
75
- <th>seconds</th>
76
- <th>days</th>
77
- <th>time</th>
78
- <th>obs_err_var</th>
79
- <th>bias</th>
80
- <th>sq_err</th>
81
- </tr>
82
- </thead>
83
- <tbody>
84
- <tr>
85
- <th>0</th>
86
- <td>1</td>
87
- <td>230.16</td>
88
- <td>231.310652</td>
89
- <td>0.405191</td>
90
- <td>231.304725</td>
91
- <td>231.562874</td>
92
- <td>231.333915</td>
93
- <td>231.297690</td>
94
- <td>232.081416</td>
95
- <td>231.051063</td>
96
- <td>...</td>
97
- <td>0.012188</td>
98
- <td>23950.0</td>
99
- <td>pressure (Pa)</td>
100
- <td>ACARS_TEMPERATURE</td>
101
- <td>75603</td>
102
- <td>153005</td>
103
- <td>2019-12-01 21:00:03</td>
104
- <td>1.00</td>
105
- <td>1.150652</td>
106
- <td>1.324001</td>
107
- </tr>
108
- <tr>
109
- <th>1</th>
110
- <td>2</td>
111
- <td>18.40</td>
112
- <td>15.720527</td>
113
- <td>0.630827</td>
114
- <td>14.217207</td>
115
- <td>15.558196</td>
116
- <td>15.805599</td>
117
- <td>16.594644</td>
118
- <td>14.877743</td>
119
- <td>16.334438</td>
120
- <td>...</td>
121
- <td>0.012188</td>
122
- <td>23950.0</td>
123
- <td>pressure (Pa)</td>
124
- <td>ACARS_U_WIND_COMPONENT</td>
125
- <td>75603</td>
126
- <td>153005</td>
127
- <td>2019-12-01 21:00:03</td>
128
- <td>6.25</td>
129
- <td>-2.679473</td>
130
- <td>7.179578</td>
131
- </tr>
132
- <tr>
133
- <th>2</th>
134
- <td>3</td>
135
- <td>1.60</td>
136
- <td>-4.932073</td>
137
- <td>0.825899</td>
138
- <td>-5.270562</td>
139
- <td>-5.955998</td>
140
- <td>-4.209766</td>
141
- <td>-5.105016</td>
142
- <td>-4.669405</td>
143
- <td>-4.365305</td>
144
- <td>...</td>
145
- <td>0.012188</td>
146
- <td>23950.0</td>
147
- <td>pressure (Pa)</td>
148
- <td>ACARS_V_WIND_COMPONENT</td>
149
- <td>75603</td>
150
- <td>153005</td>
151
- <td>2019-12-01 21:00:03</td>
152
- <td>6.25</td>
153
- <td>-6.532073</td>
154
- <td>42.667980</td>
155
- </tr>
156
- <tr>
157
- <th>3</th>
158
- <td>4</td>
159
- <td>264.16</td>
160
- <td>264.060532</td>
161
- <td>0.035584</td>
162
- <td>264.107192</td>
163
- <td>264.097270</td>
164
- <td>264.073212</td>
165
- <td>264.047718</td>
166
- <td>264.074140</td>
167
- <td>264.019895</td>
168
- <td>...</td>
169
- <td>0.010389</td>
170
- <td>56260.0</td>
171
- <td>pressure (Pa)</td>
172
- <td>ACARS_TEMPERATURE</td>
173
- <td>75603</td>
174
- <td>153005</td>
175
- <td>2019-12-01 21:00:03</td>
176
- <td>1.00</td>
177
- <td>-0.099468</td>
178
- <td>0.009894</td>
179
- </tr>
180
- <tr>
181
- <th>4</th>
182
- <td>5</td>
183
- <td>11.60</td>
184
- <td>10.134115</td>
185
- <td>0.063183</td>
186
- <td>10.067956</td>
187
- <td>10.078798</td>
188
- <td>10.120263</td>
189
- <td>10.084885</td>
190
- <td>10.135112</td>
191
- <td>10.140610</td>
192
- <td>...</td>
193
- <td>0.010389</td>
194
- <td>56260.0</td>
195
- <td>pressure (Pa)</td>
196
- <td>ACARS_U_WIND_COMPONENT</td>
197
- <td>75603</td>
198
- <td>153005</td>
199
- <td>2019-12-01 21:00:03</td>
200
- <td>6.25</td>
201
- <td>-1.465885</td>
202
- <td>2.148818</td>
203
- </tr>
204
- </tbody>
205
- </table>
206
- <p>5 rows × 97 columns</p>
207
- </div>
208
-
209
-
210
- Find the numeber of assimilated (used) observations vs. possible observations by type
211
-
212
- ```python
213
- obsq.possible_vs_used(obs_seq.df)
214
- ```
215
-
216
- <table border="1" class="dataframe">
217
- <thead>
218
- <tr style="text-align: right;">
219
- <th></th>
220
- <th>type</th>
221
- <th>possible</th>
222
- <th>used</th>
223
- </tr>
224
- </thead>
225
- <tbody>
226
- <tr>
227
- <th>0</th>
228
- <td>ACARS_TEMPERATURE</td>
229
- <td>175429</td>
230
- <td>128040</td>
231
- </tr>
232
- <tr>
233
- <th>1</th>
234
- <td>ACARS_U_WIND_COMPONENT</td>
235
- <td>176120</td>
236
- <td>126946</td>
237
- </tr>
238
- <tr>
239
- <th>2</th>
240
- <td>ACARS_V_WIND_COMPONENT</td>
241
- <td>176120</td>
242
- <td>127834</td>
243
- </tr>
244
- <tr>
245
- <th>3</th>
246
- <td>AIRCRAFT_TEMPERATURE</td>
247
- <td>21335</td>
248
- <td>13663</td>
249
- </tr>
250
- <tr>
251
- <th>4</th>
252
- <td>AIRCRAFT_U_WIND_COMPONENT</td>
253
- <td>21044</td>
254
- <td>13694</td>
255
- </tr>
256
- <tr>
257
- <th>5</th>
258
- <td>AIRCRAFT_V_WIND_COMPONENT</td>
259
- <td>21044</td>
260
- <td>13642</td>
261
- </tr>
262
- <tr>
263
- <th>6</th>
264
- <td>AIRS_SPECIFIC_HUMIDITY</td>
265
- <td>6781</td>
266
- <td>0</td>
267
- </tr>
268
- <tr>
269
- <th>7</th>
270
- <td>AIRS_TEMPERATURE</td>
271
- <td>19583</td>
272
- <td>7901</td>
273
- </tr>
274
- <tr>
275
- <th>8</th>
276
- <td>GPSRO_REFRACTIVITY</td>
277
- <td>81404</td>
278
- <td>54626</td>
279
- </tr>
280
- <tr>
281
- <th>9</th>
282
- <td>LAND_SFC_ALTIMETER</td>
283
- <td>21922</td>
284
- <td>0</td>
285
- </tr>
286
- <tr>
287
- <th>10</th>
288
- <td>MARINE_SFC_ALTIMETER</td>
289
- <td>9987</td>
290
- <td>0</td>
291
- </tr>
292
- <tr>
293
- <th>11</th>
294
- <td>MARINE_SFC_SPECIFIC_HUMIDITY</td>
295
- <td>4196</td>
296
- <td>0</td>
297
- </tr>
298
- <tr>
299
- <th>12</th>
300
- <td>MARINE_SFC_TEMPERATURE</td>
301
- <td>8646</td>
302
- <td>0</td>
303
- </tr>
304
- <tr>
305
- <th>13</th>
306
- <td>MARINE_SFC_U_WIND_COMPONENT</td>
307
- <td>8207</td>
308
- <td>0</td>
309
- </tr>
310
- <tr>
311
- <th>14</th>
312
- <td>MARINE_SFC_V_WIND_COMPONENT</td>
313
- <td>8207</td>
314
- <td>0</td>
315
- </tr>
316
- <tr>
317
- <th>15</th>
318
- <td>RADIOSONDE_SPECIFIC_HUMIDITY</td>
319
- <td>14272</td>
320
- <td>0</td>
321
- </tr>
322
- <tr>
323
- <th>16</th>
324
- <td>RADIOSONDE_SURFACE_ALTIMETER</td>
325
- <td>601</td>
326
- <td>0</td>
327
- </tr>
328
- <tr>
329
- <th>17</th>
330
- <td>RADIOSONDE_TEMPERATURE</td>
331
- <td>29275</td>
332
- <td>22228</td>
333
- </tr>
334
- <tr>
335
- <th>18</th>
336
- <td>RADIOSONDE_U_WIND_COMPONENT</td>
337
- <td>36214</td>
338
- <td>27832</td>
339
- </tr>
340
- <tr>
341
- <th>19</th>
342
- <td>RADIOSONDE_V_WIND_COMPONENT</td>
343
- <td>36214</td>
344
- <td>27975</td>
345
- </tr>
346
- <tr>
347
- <th>20</th>
348
- <td>SAT_U_WIND_COMPONENT</td>
349
- <td>107212</td>
350
- <td>82507</td>
351
- </tr>
352
- <tr>
353
- <th>21</th>
354
- <td>SAT_V_WIND_COMPONENT</td>
355
- <td>107212</td>
356
- <td>82647</td>
357
- </tr>
358
- </tbody>
359
- </table>
360
-
361
-
362
- ## Example plotting
363
-
364
- ### rank histogram
365
-
366
- * Select only observations that were assimliated (QC === 0).
367
- * plot the rank histogram
368
-
369
- ```python
370
- df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0)
371
- plots.plot_rank_histogram(df_qc0)
372
- ```
373
- ![Rank Histogram](docs/images/rankhist.png)
374
-
375
-
376
- ### plot profile of RMSE and Bias
377
-
378
- * Chose levels
379
- * Select only observations that were assimliated (QC === 0).
380
- * plot the profiles
381
-
382
- ```python
383
- hPalevels = [0.0, 100.0, 150.0, 200.0, 250.0, 300.0, 400.0, 500.0, 700, 850, 925, 1000]# float("inf")] # Pa?
384
- plevels = [i * 100 for i in hPalevels]
385
-
386
- df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0) # only qc 0
387
- df_profile, figrmse, figbias = plots.plot_profile(df_qc0, plevels)
388
- ```
389
-
390
- ![RMSE Plot](docs/images/rmse.png)
391
-
392
- ![Bias Plot](docs/images/bias.png)
393
-
394
- ## Contributing
395
- Contributions are welcome! If you have a feature request, bug report, or a suggestion, please open an issue on our GitHub repository.
396
-
397
- ## License
398
-
399
- pyDARTdiags is released under the Apache License 2.0. For more details, see the LICENSE file in the root directory of this source tree or visit [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
@@ -1,10 +0,0 @@
1
- pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- pydartdiags/obs_sequence/obs_sequence.py,sha256=WrQ4lFymM1y9KVBl-_SzMR7E_VfPQJ8b4kHcVnIyEOc,21817
4
- pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- pydartdiags/plots/plots.py,sha256=_vZFgQ9qrmtwE_HAP6_nx3pV4JHRdnYckZ5xUxUH4io,6753
6
- pydartdiags-0.0.41.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
7
- pydartdiags-0.0.41.dist-info/METADATA,sha256=LP13-RMWfmd54Fifdp_r3GDhzfzvd3X-kMFAmR6cA5s,9345
8
- pydartdiags-0.0.41.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
9
- pydartdiags-0.0.41.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
10
- pydartdiags-0.0.41.dist-info/RECORD,,