pydartdiags 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydartdiags might be problematic. Click here for more details.
- pydartdiags/obs_sequence/obs_sequence.py +74 -82
- pydartdiags/stats/stats.py +93 -15
- {pydartdiags-0.5.1.dist-info → pydartdiags-0.6.0.dist-info}/METADATA +2 -2
- {pydartdiags-0.5.1.dist-info → pydartdiags-0.6.0.dist-info}/RECORD +7 -7
- {pydartdiags-0.5.1.dist-info → pydartdiags-0.6.0.dist-info}/WHEEL +1 -1
- {pydartdiags-0.5.1.dist-info → pydartdiags-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {pydartdiags-0.5.1.dist-info → pydartdiags-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -19,17 +19,46 @@ def requires_assimilation_info(func):
|
|
|
19
19
|
return wrapper
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class
|
|
22
|
+
class ObsSequence:
|
|
23
23
|
"""
|
|
24
|
-
Initialize an
|
|
25
|
-
or create an empty
|
|
24
|
+
Initialize an ObsSequence object from an ASCII or binary observation sequence file,
|
|
25
|
+
or create an empty ObsSequence object from scratch.
|
|
26
|
+
|
|
27
|
+
1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
|
|
28
|
+
|
|
29
|
+
3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
|
|
26
30
|
|
|
27
31
|
Args:
|
|
28
32
|
file (str): The input observation sequence ASCII or binary file.
|
|
29
|
-
|
|
33
|
+
If None, an empty ObsSequence object is created from scratch.
|
|
34
|
+
synonyms (list, optional): List of additional synonyms for the observation column in the DataFrame.
|
|
35
|
+
The default list is
|
|
36
|
+
|
|
37
|
+
.. code-block:: python
|
|
38
|
+
|
|
39
|
+
['NCEP BUFR observation',
|
|
40
|
+
'AIRS observation',
|
|
41
|
+
'GTSPP observation',
|
|
42
|
+
'SST observation',
|
|
43
|
+
'observations',
|
|
44
|
+
'WOD observation']
|
|
45
|
+
|
|
46
|
+
You can add more synonyms by providing a list of strings when
|
|
47
|
+
creating the ObsSequence object.
|
|
48
|
+
|
|
49
|
+
.. code-block:: python
|
|
50
|
+
|
|
51
|
+
ObsSequence(file, synonyms=['synonym1', 'synonym2'])
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If neither 'loc3d' nor 'loc1d' could be found in the observation sequence.
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
|
|
58
|
+
.. code-block:: python
|
|
59
|
+
|
|
60
|
+
obs_seq = ObsSequence(file='obs_seq.final')
|
|
30
61
|
|
|
31
|
-
Returns:
|
|
32
|
-
An obs_sequence object
|
|
33
62
|
|
|
34
63
|
Attributes:
|
|
35
64
|
df (pandas.DataFrame): The DataFrame containing the observation sequence data.
|
|
@@ -54,34 +83,18 @@ class obs_sequence:
|
|
|
54
83
|
- scale height: 'VERTISSCALEHEIGHT' (unitless)
|
|
55
84
|
loc_mod (str): The location model, either 'loc3d' or 'loc1d'.
|
|
56
85
|
For 3D sphere models: latitude and longitude are in degrees in the DataFrame.
|
|
57
|
-
types (dict): Dictionary of types of observations the observation sequence,
|
|
86
|
+
types (dict): Dictionary of types of observations in the observation sequence,
|
|
58
87
|
e.g. {23: 'ACARS_TEMPERATURE'},
|
|
59
88
|
reverse_types (dict): Dictionary of types with keys and values reversed, e.g
|
|
60
89
|
{'ACARS_TEMPERATURE': 23}
|
|
61
90
|
synonyms_for_obs (list): List of synonyms for the observation column in the DataFrame.
|
|
62
|
-
The default list is
|
|
63
91
|
|
|
64
|
-
.. code-block:: python
|
|
65
|
-
|
|
66
|
-
[ 'NCEP BUFR observation',
|
|
67
|
-
'AIRS observation',
|
|
68
|
-
'GTSPP observation',
|
|
69
|
-
'SST observation',
|
|
70
|
-
'observations',
|
|
71
|
-
'WOD observation']
|
|
72
|
-
|
|
73
|
-
You can add more synonyms by providing a list of strings when
|
|
74
|
-
creating the obs_sequence object.
|
|
75
|
-
|
|
76
|
-
.. code-block:: python
|
|
77
|
-
|
|
78
|
-
obs_sequence(file, synonyms=['synonym1', 'synonym2']).df
|
|
79
92
|
|
|
80
93
|
seq (generator): Generator of observations from the observation sequence file.
|
|
81
94
|
all_obs (list): List of all observations, each observation is a list.
|
|
82
|
-
Valid when the
|
|
83
|
-
Set to None when the
|
|
84
|
-
|
|
95
|
+
Valid when the ObsSequence is created from a file.
|
|
96
|
+
Set to None when the ObsSequence is created from scratch or multiple
|
|
97
|
+
ObsSequences are joined.
|
|
85
98
|
"""
|
|
86
99
|
|
|
87
100
|
vert = {
|
|
@@ -96,27 +109,6 @@ class obs_sequence:
|
|
|
96
109
|
reversed_vert = {value: key for key, value in vert.items()}
|
|
97
110
|
|
|
98
111
|
def __init__(self, file, synonyms=None):
|
|
99
|
-
"""
|
|
100
|
-
Create an obs_sequence object from an ASCII or binary observation sequence file,
|
|
101
|
-
or create an empty obs_sequence object from scratch.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
file (str): The input observation sequence ASCII or binary file.
|
|
105
|
-
If None, an empty obs_sequence object is created from scratch.
|
|
106
|
-
synonyms (list, optional): List of synonyms for the observation column in the DataFrame.
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
an obs_sequence object
|
|
110
|
-
1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
|
|
111
|
-
3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
|
|
112
|
-
|
|
113
|
-
Examples:
|
|
114
|
-
|
|
115
|
-
.. code-block:: python
|
|
116
|
-
|
|
117
|
-
obs_seq = obs_sequence(file='obs_seq.final')
|
|
118
|
-
|
|
119
|
-
"""
|
|
120
112
|
|
|
121
113
|
self.loc_mod = "None"
|
|
122
114
|
self.file = file
|
|
@@ -214,7 +206,7 @@ class obs_sequence:
|
|
|
214
206
|
data.append(float(location[0])) # location x
|
|
215
207
|
data.append(float(location[1])) # location y
|
|
216
208
|
data.append(float(location[2])) # location z
|
|
217
|
-
data.append(
|
|
209
|
+
data.append(ObsSequence.vert[int(location[3])])
|
|
218
210
|
self.loc_mod = "loc3d"
|
|
219
211
|
except ValueError:
|
|
220
212
|
try:
|
|
@@ -372,7 +364,7 @@ class obs_sequence:
|
|
|
372
364
|
) # sort the DataFrame by time
|
|
373
365
|
df_copy.reset_index(drop=True, inplace=True)
|
|
374
366
|
df_copy["obs_num"] = df_copy.index + 1 # obs_num in time order
|
|
375
|
-
df_copy["linked_list"] =
|
|
367
|
+
df_copy["linked_list"] = ObsSequence.generate_linked_list_pattern(
|
|
376
368
|
len(df_copy)
|
|
377
369
|
) # linked list pattern
|
|
378
370
|
|
|
@@ -594,7 +586,7 @@ class obs_sequence:
|
|
|
594
586
|
with open(file, "rb") as f:
|
|
595
587
|
while True:
|
|
596
588
|
# Read the record length
|
|
597
|
-
record_length =
|
|
589
|
+
record_length = ObsSequence.read_record_length(f)
|
|
598
590
|
if record_length is None:
|
|
599
591
|
break
|
|
600
592
|
record = f.read(record_length)
|
|
@@ -602,7 +594,7 @@ class obs_sequence:
|
|
|
602
594
|
break
|
|
603
595
|
|
|
604
596
|
# Read the trailing record length (should match the leading one)
|
|
605
|
-
|
|
597
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
606
598
|
|
|
607
599
|
linecount += 1
|
|
608
600
|
|
|
@@ -620,7 +612,7 @@ class obs_sequence:
|
|
|
620
612
|
f.seek(0)
|
|
621
613
|
|
|
622
614
|
for _ in range(2):
|
|
623
|
-
record_length =
|
|
615
|
+
record_length = ObsSequence.read_record_length(f)
|
|
624
616
|
if record_length is None:
|
|
625
617
|
break
|
|
626
618
|
|
|
@@ -628,7 +620,7 @@ class obs_sequence:
|
|
|
628
620
|
if not record: # end of file
|
|
629
621
|
break
|
|
630
622
|
|
|
631
|
-
|
|
623
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
632
624
|
header.append(record.decode("utf-8").strip())
|
|
633
625
|
|
|
634
626
|
header.append(str(obs_types_definitions))
|
|
@@ -636,7 +628,7 @@ class obs_sequence:
|
|
|
636
628
|
# obs_types_definitions
|
|
637
629
|
for _ in range(3, 4 + obs_types_definitions):
|
|
638
630
|
# Read the record length
|
|
639
|
-
record_length =
|
|
631
|
+
record_length = ObsSequence.read_record_length(f)
|
|
640
632
|
if record_length is None:
|
|
641
633
|
break
|
|
642
634
|
|
|
@@ -645,7 +637,7 @@ class obs_sequence:
|
|
|
645
637
|
if not record: # end of file
|
|
646
638
|
break
|
|
647
639
|
|
|
648
|
-
|
|
640
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
649
641
|
|
|
650
642
|
if _ == 3:
|
|
651
643
|
continue # num obs_types_definitions
|
|
@@ -663,7 +655,7 @@ class obs_sequence:
|
|
|
663
655
|
5 + obs_types_definitions + num_copies + num_qcs + 1,
|
|
664
656
|
):
|
|
665
657
|
# Read the record length
|
|
666
|
-
record_length =
|
|
658
|
+
record_length = ObsSequence.read_record_length(f)
|
|
667
659
|
if record_length is None:
|
|
668
660
|
break
|
|
669
661
|
|
|
@@ -672,7 +664,7 @@ class obs_sequence:
|
|
|
672
664
|
if not record:
|
|
673
665
|
break
|
|
674
666
|
|
|
675
|
-
|
|
667
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
676
668
|
|
|
677
669
|
if _ == 5 + obs_types_definitions:
|
|
678
670
|
continue
|
|
@@ -683,12 +675,12 @@ class obs_sequence:
|
|
|
683
675
|
|
|
684
676
|
# first and last obs
|
|
685
677
|
# Read the record length
|
|
686
|
-
record_length =
|
|
678
|
+
record_length = ObsSequence.read_record_length(f)
|
|
687
679
|
|
|
688
680
|
# Read the actual record
|
|
689
681
|
record = f.read(record_length)
|
|
690
682
|
|
|
691
|
-
|
|
683
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
692
684
|
|
|
693
685
|
# Read the whole record as a two integers
|
|
694
686
|
first, last = struct.unpack("ii", record)[:8]
|
|
@@ -813,7 +805,7 @@ class obs_sequence:
|
|
|
813
805
|
# Skip the first len(obs_seq.header) lines
|
|
814
806
|
for _ in range(header_length - 1):
|
|
815
807
|
# Read the record length
|
|
816
|
-
record_length =
|
|
808
|
+
record_length = ObsSequence.read_record_length(f)
|
|
817
809
|
if record_length is None: # End of file
|
|
818
810
|
break
|
|
819
811
|
|
|
@@ -830,7 +822,7 @@ class obs_sequence:
|
|
|
830
822
|
obs.append(f"OBS {obs_num}")
|
|
831
823
|
for _ in range(n): # number of copies
|
|
832
824
|
# Read the record length
|
|
833
|
-
record_length =
|
|
825
|
+
record_length = ObsSequence.read_record_length(f)
|
|
834
826
|
if record_length is None:
|
|
835
827
|
break
|
|
836
828
|
# Read the actual record (copie)
|
|
@@ -838,10 +830,10 @@ class obs_sequence:
|
|
|
838
830
|
obs.append(struct.unpack("d", record)[0])
|
|
839
831
|
|
|
840
832
|
# Read the trailing record length (should match the leading one)
|
|
841
|
-
|
|
833
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
842
834
|
|
|
843
835
|
# linked list info
|
|
844
|
-
record_length =
|
|
836
|
+
record_length = ObsSequence.read_record_length(f)
|
|
845
837
|
if record_length is None:
|
|
846
838
|
break
|
|
847
839
|
|
|
@@ -850,17 +842,17 @@ class obs_sequence:
|
|
|
850
842
|
linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
|
|
851
843
|
obs.append(linked_list_string)
|
|
852
844
|
|
|
853
|
-
|
|
845
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
854
846
|
|
|
855
847
|
# location (note no location header "loc3d" or "loc1d" for binary files)
|
|
856
848
|
obs.append("loc3d")
|
|
857
|
-
record_length =
|
|
849
|
+
record_length = ObsSequence.read_record_length(f)
|
|
858
850
|
record = f.read(record_length)
|
|
859
851
|
x, y, z, vert = struct.unpack("dddi", record[:28])
|
|
860
852
|
location_string = f"{x} {y} {z} {vert}"
|
|
861
853
|
obs.append(location_string)
|
|
862
854
|
|
|
863
|
-
|
|
855
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
864
856
|
|
|
865
857
|
# kind (type of observation) value
|
|
866
858
|
obs.append("kind")
|
|
@@ -870,23 +862,23 @@ class obs_sequence:
|
|
|
870
862
|
kind = f"{struct.unpack('i', record)[0]}"
|
|
871
863
|
obs.append(kind)
|
|
872
864
|
|
|
873
|
-
|
|
865
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
874
866
|
|
|
875
867
|
# time (seconds, days)
|
|
876
|
-
record_length =
|
|
868
|
+
record_length = ObsSequence.read_record_length(f)
|
|
877
869
|
record = f.read(record_length)
|
|
878
870
|
seconds, days = struct.unpack("ii", record)[:8]
|
|
879
871
|
time_string = f"{seconds} {days}"
|
|
880
872
|
obs.append(time_string)
|
|
881
873
|
|
|
882
|
-
|
|
874
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
883
875
|
|
|
884
876
|
# obs error variance
|
|
885
|
-
record_length =
|
|
877
|
+
record_length = ObsSequence.read_record_length(f)
|
|
886
878
|
record = f.read(record_length)
|
|
887
879
|
obs.append(struct.unpack("d", record)[0])
|
|
888
880
|
|
|
889
|
-
|
|
881
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
890
882
|
|
|
891
883
|
yield obs
|
|
892
884
|
|
|
@@ -945,29 +937,29 @@ class obs_sequence:
|
|
|
945
937
|
"""
|
|
946
938
|
Join a list of observation sequences together.
|
|
947
939
|
|
|
948
|
-
This method combines the headers and observations from a list of
|
|
949
|
-
into a single
|
|
940
|
+
This method combines the headers and observations from a list of ObsSequence objects
|
|
941
|
+
into a single ObsSequence object.
|
|
950
942
|
|
|
951
943
|
Args:
|
|
952
|
-
obs_sequences (list of
|
|
944
|
+
obs_sequences (list of ObsSequences): The list of observation sequences objects to join.
|
|
953
945
|
copies (list of str, optional): A list of copy names to include in the combined data.
|
|
954
946
|
If not provided, all copies are included.
|
|
955
947
|
|
|
956
948
|
Returns:
|
|
957
|
-
A new
|
|
949
|
+
A new ObsSequence object containing the combined data.
|
|
958
950
|
|
|
959
951
|
Example:
|
|
960
952
|
.. code-block:: python
|
|
961
953
|
|
|
962
|
-
obs_seq1 =
|
|
963
|
-
obs_seq2 =
|
|
964
|
-
obs_seq3 =
|
|
965
|
-
combined =
|
|
954
|
+
obs_seq1 = ObsSequence(file='obs_seq1.final')
|
|
955
|
+
obs_seq2 = ObsSequence(file='obs_seq2.final')
|
|
956
|
+
obs_seq3 = ObsSequence(file='obs_seq3.final')
|
|
957
|
+
combined = ObsSequence.join([obs_seq1, obs_seq2, obs_seq3])
|
|
966
958
|
"""
|
|
967
959
|
if not obs_sequences:
|
|
968
960
|
raise ValueError("The list of observation sequences is empty.")
|
|
969
961
|
|
|
970
|
-
# Create a new
|
|
962
|
+
# Create a new ObsSequence object with the combined data
|
|
971
963
|
combo = cls(file=None)
|
|
972
964
|
|
|
973
965
|
# Check if all obs_sequences have compatible attributes
|
|
@@ -1092,7 +1084,7 @@ class obs_sequence:
|
|
|
1092
1084
|
|
|
1093
1085
|
# create linked list for obs
|
|
1094
1086
|
combo.df = combined_df.sort_values(by="time").reset_index(drop=True)
|
|
1095
|
-
combo.df["linked_list"] =
|
|
1087
|
+
combo.df["linked_list"] = ObsSequence.generate_linked_list_pattern(
|
|
1096
1088
|
len(combo.df)
|
|
1097
1089
|
)
|
|
1098
1090
|
combo.df["obs_num"] = combined_df.index + 1
|
|
@@ -1125,7 +1117,7 @@ class obs_sequence:
|
|
|
1125
1117
|
)
|
|
1126
1118
|
|
|
1127
1119
|
def create_header(self, n):
|
|
1128
|
-
"""Create a header for the obs_seq file from the
|
|
1120
|
+
"""Create a header for the obs_seq file from the ObsSequence object."""
|
|
1129
1121
|
assert (
|
|
1130
1122
|
self.n_copies == self.n_non_qc + self.n_qc
|
|
1131
1123
|
), "n_copies must be equal to n_non_qc + n_qc"
|
pydartdiags/stats/stats.py
CHANGED
|
@@ -4,8 +4,6 @@ import numpy as np
|
|
|
4
4
|
from functools import wraps
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
6
|
|
|
7
|
-
# from pydartdiags.obs_sequence import obs_sequence as obsq
|
|
8
|
-
|
|
9
7
|
|
|
10
8
|
def apply_to_phases_in_place(func):
|
|
11
9
|
"""
|
|
@@ -93,6 +91,12 @@ def calculate_rank(df, phase):
|
|
|
93
91
|
"""
|
|
94
92
|
Calculate the rank of observations within an ensemble.
|
|
95
93
|
|
|
94
|
+
Note:
|
|
95
|
+
|
|
96
|
+
This function is decorated with @apply_to_phases_by_obs, which modifies its usage.
|
|
97
|
+
You should call it as calculate_rank(df), and the decorator will automatically apply the
|
|
98
|
+
function to all relevant phases (‘prior’ and ‘posterior’).
|
|
99
|
+
|
|
96
100
|
This function takes a DataFrame containing ensemble predictions and observed values,
|
|
97
101
|
adds sampling noise to the ensemble predictions, and calculates the rank of the observed
|
|
98
102
|
value within the perturbed ensemble for each observation. The rank indicates the position
|
|
@@ -103,8 +107,6 @@ def calculate_rank(df, phase):
|
|
|
103
107
|
Parameters:
|
|
104
108
|
df (pd.DataFrame): A DataFrame with columns for rank, and observation type.
|
|
105
109
|
|
|
106
|
-
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
|
|
107
|
-
|
|
108
110
|
Returns:
|
|
109
111
|
DataFrame containing columns for 'rank' and observation 'type'.
|
|
110
112
|
"""
|
|
@@ -158,15 +160,20 @@ def diag_stats(df, phase):
|
|
|
158
160
|
"""
|
|
159
161
|
Calculate diagnostic statistics for a given phase and add them to the DataFrame.
|
|
160
162
|
|
|
163
|
+
Note:
|
|
164
|
+
This function is decorated with @apply_to_phases_in_place, which modifies its usage.
|
|
165
|
+
You should call it as diag_stats(df), and the decorator will automatically apply the
|
|
166
|
+
function to all relevant phases (‘prior’ and ‘posterior’) modifying the DataFrame
|
|
167
|
+
in place.
|
|
168
|
+
|
|
161
169
|
Args:
|
|
162
170
|
df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
|
|
163
|
-
|
|
164
|
-
- 'observation': The actual observation values.
|
|
165
|
-
- 'obs_err_var': The variance of the observation error.
|
|
166
|
-
- 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
|
|
167
|
-
- 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
|
|
171
|
+
The DataFrame must include the following columns:
|
|
168
172
|
|
|
169
|
-
|
|
173
|
+
- 'observation': The actual observation values.
|
|
174
|
+
- 'obs_err_var': The variance of the observation error.
|
|
175
|
+
- 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
|
|
176
|
+
- 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
|
|
170
177
|
|
|
171
178
|
Returns:
|
|
172
179
|
None: The function modifies the DataFrame in place by adding the following columns:
|
|
@@ -203,9 +210,12 @@ def bin_by_layer(df, levels, verticalUnit="pressure (Pa)"):
|
|
|
203
210
|
vertical level bin. Only observations (row) with the specified vertical unit are binned.
|
|
204
211
|
|
|
205
212
|
Args:
|
|
206
|
-
df (pandas.DataFrame): The input DataFrame containing observation data.
|
|
213
|
+
df (pandas.DataFrame): The input DataFrame containing observation data.
|
|
214
|
+
The DataFrame must include the following columns:
|
|
215
|
+
|
|
207
216
|
- 'vertical': The vertical coordinate values of the observations.
|
|
208
217
|
- 'vert_unit': The unit of the vertical coordinate values.
|
|
218
|
+
|
|
209
219
|
levels (list): A list of bin edges for the vertical levels.
|
|
210
220
|
verticalUnit (str, optional): The unit of the vertical axis (e.g., 'pressure (Pa)'). Default is 'pressure (Pa)'.
|
|
211
221
|
|
|
@@ -261,6 +271,28 @@ def bin_by_time(df, time_value):
|
|
|
261
271
|
|
|
262
272
|
@apply_to_phases_by_type_return_df
|
|
263
273
|
def grand_statistics(df, phase):
|
|
274
|
+
"""
|
|
275
|
+
Calculate grand statistics (RMSE, bias, total spread) for each observation type and phase.
|
|
276
|
+
|
|
277
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
278
|
+
have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data by observation
|
|
279
|
+
type and computes the root mean square error (RMSE), mean bias, and total spread for the specified phase.
|
|
280
|
+
|
|
281
|
+
Note:
|
|
282
|
+
This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
|
|
283
|
+
You should call it as grand_statistics(df), and the decorator will automatically apply the function
|
|
284
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
291
|
+
- 'type': The observation type.
|
|
292
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
293
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
294
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
295
|
+
"""
|
|
264
296
|
|
|
265
297
|
# assuming diag_stats has been called
|
|
266
298
|
grand = (
|
|
@@ -283,6 +315,33 @@ def grand_statistics(df, phase):
|
|
|
283
315
|
|
|
284
316
|
@apply_to_phases_by_type_return_df
|
|
285
317
|
def layer_statistics(df, phase):
|
|
318
|
+
"""
|
|
319
|
+
Calculate statistics (RMSE, bias, total spread) for each observation type and vertical layer.
|
|
320
|
+
|
|
321
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
322
|
+
have already been computed with :func:`diag_stats` and are present in the DataFrame. It groups the data by
|
|
323
|
+
vertical layer midpoint and observation type, and computes the root mean square error (RMSE),
|
|
324
|
+
mean bias, and total spread for the specified phase for each vertical layer.
|
|
325
|
+
|
|
326
|
+
Note:
|
|
327
|
+
This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
|
|
328
|
+
You should call it as layer_statistics(df), and the decorator will automatically apply the function
|
|
329
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
333
|
+
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
337
|
+
- 'midpoint': The midpoint of the vertical layer.
|
|
338
|
+
- 'type': The observation type.
|
|
339
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
340
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
341
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
342
|
+
- 'vert_unit': The vertical unit.
|
|
343
|
+
- 'vlevels': The categorized vertical level.
|
|
344
|
+
"""
|
|
286
345
|
|
|
287
346
|
# assuming diag_stats has been called
|
|
288
347
|
layer_stats = (
|
|
@@ -310,14 +369,31 @@ def layer_statistics(df, phase):
|
|
|
310
369
|
@apply_to_phases_by_type_return_df
|
|
311
370
|
def time_statistics(df, phase):
|
|
312
371
|
"""
|
|
313
|
-
Calculate time-based statistics for
|
|
372
|
+
Calculate time-based statistics (RMSE, bias, total spread) for each observation type and time bin.
|
|
373
|
+
|
|
374
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
375
|
+
have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data
|
|
376
|
+
by time bin midpoint and observation type, and computes the root mean square error (RMSE), mean bias,
|
|
377
|
+
and total spread for the specified phase for each time bin.
|
|
378
|
+
|
|
379
|
+
Note:
|
|
380
|
+
This function is decorated with @apply_to_phases_by_type_return_df.
|
|
381
|
+
You should call it as time_statistics(df), and the decorator will automatically apply the function
|
|
382
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
314
383
|
|
|
315
384
|
Args:
|
|
316
|
-
df (pandas.DataFrame): The input DataFrame containing
|
|
385
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
317
386
|
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
|
|
318
387
|
|
|
319
388
|
Returns:
|
|
320
|
-
pandas.DataFrame: A DataFrame
|
|
389
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
390
|
+
- 'time_bin_midpoint': The midpoint of the time bin.
|
|
391
|
+
- 'type': The observation type.
|
|
392
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
393
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
394
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
395
|
+
- 'time_bin': The time bin interval.
|
|
396
|
+
- 'time': The first time value in the bin.
|
|
321
397
|
"""
|
|
322
398
|
# Assuming diag_stats has been called
|
|
323
399
|
time_stats = (
|
|
@@ -402,7 +478,9 @@ def possible_vs_used_by_time(df):
|
|
|
402
478
|
Calculates the count of possible vs. used observations by type and time bin.
|
|
403
479
|
|
|
404
480
|
Args:
|
|
405
|
-
df (pd.DataFrame): The input DataFrame containing observation data.
|
|
481
|
+
df (pd.DataFrame): The input DataFrame containing observation data.
|
|
482
|
+
The DataFrame must include:
|
|
483
|
+
|
|
406
484
|
- 'type': The observation type.
|
|
407
485
|
- 'time_bin_midpoint': The midpoint of the time bin.
|
|
408
486
|
- 'observation': The observation values.
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydartdiags
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Observation Sequence Diagnostics for DART
|
|
5
5
|
Home-page: https://github.com/NCAR/pyDARTdiags.git
|
|
6
6
|
Author: Helen Kershaw
|
|
7
7
|
Author-email: Helen Kershaw <hkershaw@ucar.edu>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
8
9
|
Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
|
|
9
10
|
Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
|
|
10
11
|
Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
|
|
11
12
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Requires-Python: >=3.8
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
@@ -3,13 +3,13 @@ pydartdiags/matplots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
3
3
|
pydartdiags/matplots/matplots.py,sha256=Bo0TTz1gvsHEvTfTfLfdTi_3hNRN1okmyY5a5yYgtzk,13455
|
|
4
4
|
pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
pydartdiags/obs_sequence/composite_types.yaml,sha256=PVLMU6x6KcVMCwPB-U65C_e0YQUemfqUhYMpf1DhFOY,917
|
|
6
|
-
pydartdiags/obs_sequence/obs_sequence.py,sha256=
|
|
6
|
+
pydartdiags/obs_sequence/obs_sequence.py,sha256=5HfqOPoF2DyZQrUiGrYEwLJ9Iewe5DIzq0pdxR3bsnk,48037
|
|
7
7
|
pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pydartdiags/plots/plots.py,sha256=U7WQjE_qN-5a8-85D-PkkgILSFBzTJQ1mcGBa7l5DHI,6464
|
|
9
9
|
pydartdiags/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
pydartdiags/stats/stats.py,sha256=
|
|
11
|
-
pydartdiags-0.
|
|
12
|
-
pydartdiags-0.
|
|
13
|
-
pydartdiags-0.
|
|
14
|
-
pydartdiags-0.
|
|
15
|
-
pydartdiags-0.
|
|
10
|
+
pydartdiags/stats/stats.py,sha256=a88VuLoHOlhbjYjnrVPHVNnhiDx-4B3YA1jbc6FUSyU,20193
|
|
11
|
+
pydartdiags-0.6.0.dist-info/licenses/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
|
|
12
|
+
pydartdiags-0.6.0.dist-info/METADATA,sha256=ZeVGK6hTX2tgIiedCVcavDPn195yCh8LO9-ziliePog,2381
|
|
13
|
+
pydartdiags-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
pydartdiags-0.6.0.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
|
|
15
|
+
pydartdiags-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|