pydartdiags 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

Files changed (24) hide show
  1. {pydartdiags-0.5.1/src/pydartdiags.egg-info → pydartdiags-0.6.0}/PKG-INFO +2 -2
  2. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/pyproject.toml +2 -2
  3. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/setup.py +1 -1
  4. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/obs_sequence.py +74 -82
  5. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/stats/stats.py +93 -15
  6. {pydartdiags-0.5.1 → pydartdiags-0.6.0/src/pydartdiags.egg-info}/PKG-INFO +2 -2
  7. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/tests/test_obs_sequence.py +48 -48
  8. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/LICENSE +0 -0
  9. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/MANIFEST.in +0 -0
  10. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/README.md +0 -0
  11. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/setup.cfg +0 -0
  12. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/__init__.py +0 -0
  13. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/matplots/__init__.py +0 -0
  14. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/matplots/matplots.py +0 -0
  15. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/__init__.py +0 -0
  16. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/composite_types.yaml +0 -0
  17. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/plots/__init__.py +0 -0
  18. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/plots/plots.py +0 -0
  19. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/stats/__init__.py +0 -0
  20. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/SOURCES.txt +0 -0
  21. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/dependency_links.txt +0 -0
  22. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/requires.txt +0 -0
  23. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/top_level.txt +0 -0
  24. {pydartdiags-0.5.1 → pydartdiags-0.6.0}/tests/test_stats.py +0 -0
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydartdiags
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Observation Sequence Diagnostics for DART
5
5
  Home-page: https://github.com/NCAR/pyDARTdiags.git
6
6
  Author: Helen Kershaw
7
7
  Author-email: Helen Kershaw <hkershaw@ucar.edu>
8
+ License-Expression: Apache-2.0
8
9
  Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
9
10
  Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
10
11
  Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
11
12
  Classifier: Programming Language :: Python :: 3
12
- Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Operating System :: OS Independent
14
14
  Requires-Python: >=3.8
15
15
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pydartdiags"
7
- version = "0.5.1"
7
+ version = "0.6.0"
8
8
  authors = [
9
9
  { name="Helen Kershaw", email="hkershaw@ucar.edu" },
10
10
  ]
@@ -13,9 +13,9 @@ readme = "README.md"
13
13
  requires-python = ">=3.8"
14
14
  classifiers = [
15
15
  "Programming Language :: Python :: 3",
16
- "License :: OSI Approved :: Apache Software License",
17
16
  "Operating System :: OS Independent",
18
17
  ]
18
+ license = "Apache-2.0"
19
19
  dependencies = [
20
20
  "pandas>=2.2.0",
21
21
  "numpy>=1.26",
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="pydartdiags",
5
- version="0.5.1",
5
+ version="0.6.0",
6
6
  packages=find_packages(where="src"),
7
7
  package_dir={"": "src"},
8
8
  include_package_data=True,
@@ -19,17 +19,46 @@ def requires_assimilation_info(func):
19
19
  return wrapper
20
20
 
21
21
 
22
- class obs_sequence:
22
+ class ObsSequence:
23
23
  """
24
- Initialize an obs_sequence object from an ASCII or binary observation sequence file,
25
- or create an empty obs_sequence object from scratch.
24
+ Initialize an ObsSequence object from an ASCII or binary observation sequence file,
25
+ or create an empty ObsSequence object from scratch.
26
+
27
+ 1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
28
+
29
+ 3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
26
30
 
27
31
  Args:
28
32
  file (str): The input observation sequence ASCII or binary file.
29
- If None, an empty obs_sequence object is created from scratch.
33
+ If None, an empty ObsSequence object is created from scratch.
34
+ synonyms (list, optional): List of additional synonyms for the observation column in the DataFrame.
35
+ The default list is
36
+
37
+ .. code-block:: python
38
+
39
+ ['NCEP BUFR observation',
40
+ 'AIRS observation',
41
+ 'GTSPP observation',
42
+ 'SST observation',
43
+ 'observations',
44
+ 'WOD observation']
45
+
46
+ You can add more synonyms by providing a list of strings when
47
+ creating the ObsSequence object.
48
+
49
+ .. code-block:: python
50
+
51
+ ObsSequence(file, synonyms=['synonym1', 'synonym2'])
52
+
53
+ Raises:
54
+ ValueError: If neither 'loc3d' nor 'loc1d' could be found in the observation sequence.
55
+
56
+ Examples:
57
+
58
+ .. code-block:: python
59
+
60
+ obs_seq = ObsSequence(file='obs_seq.final')
30
61
 
31
- Returns:
32
- An obs_sequence object
33
62
 
34
63
  Attributes:
35
64
  df (pandas.DataFrame): The DataFrame containing the observation sequence data.
@@ -54,34 +83,18 @@ class obs_sequence:
54
83
  - scale height: 'VERTISSCALEHEIGHT' (unitless)
55
84
  loc_mod (str): The location model, either 'loc3d' or 'loc1d'.
56
85
  For 3D sphere models: latitude and longitude are in degrees in the DataFrame.
57
- types (dict): Dictionary of types of observations the observation sequence,
86
+ types (dict): Dictionary of types of observations in the observation sequence,
58
87
  e.g. {23: 'ACARS_TEMPERATURE'},
59
88
  reverse_types (dict): Dictionary of types with keys and values reversed, e.g
60
89
  {'ACARS_TEMPERATURE': 23}
61
90
  synonyms_for_obs (list): List of synonyms for the observation column in the DataFrame.
62
- The default list is
63
91
 
64
- .. code-block:: python
65
-
66
- [ 'NCEP BUFR observation',
67
- 'AIRS observation',
68
- 'GTSPP observation',
69
- 'SST observation',
70
- 'observations',
71
- 'WOD observation']
72
-
73
- You can add more synonyms by providing a list of strings when
74
- creating the obs_sequence object.
75
-
76
- .. code-block:: python
77
-
78
- obs_sequence(file, synonyms=['synonym1', 'synonym2']).df
79
92
 
80
93
  seq (generator): Generator of observations from the observation sequence file.
81
94
  all_obs (list): List of all observations, each observation is a list.
82
- Valid when the obs_sequence is created from a file.
83
- Set to None when the obs_sequence is created from scratch or multiple
84
- obs_sequences are joined.
95
+ Valid when the ObsSequence is created from a file.
96
+ Set to None when the ObsSequence is created from scratch or multiple
97
+ ObsSequences are joined.
85
98
  """
86
99
 
87
100
  vert = {
@@ -96,27 +109,6 @@ class obs_sequence:
96
109
  reversed_vert = {value: key for key, value in vert.items()}
97
110
 
98
111
  def __init__(self, file, synonyms=None):
99
- """
100
- Create an obs_sequence object from an ASCII or binary observation sequence file,
101
- or create an empty obs_sequence object from scratch.
102
-
103
- Args:
104
- file (str): The input observation sequence ASCII or binary file.
105
- If None, an empty obs_sequence object is created from scratch.
106
- synonyms (list, optional): List of synonyms for the observation column in the DataFrame.
107
-
108
- Returns:
109
- an obs_sequence object
110
- 1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
111
- 3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
112
-
113
- Examples:
114
-
115
- .. code-block:: python
116
-
117
- obs_seq = obs_sequence(file='obs_seq.final')
118
-
119
- """
120
112
 
121
113
  self.loc_mod = "None"
122
114
  self.file = file
@@ -214,7 +206,7 @@ class obs_sequence:
214
206
  data.append(float(location[0])) # location x
215
207
  data.append(float(location[1])) # location y
216
208
  data.append(float(location[2])) # location z
217
- data.append(obs_sequence.vert[int(location[3])])
209
+ data.append(ObsSequence.vert[int(location[3])])
218
210
  self.loc_mod = "loc3d"
219
211
  except ValueError:
220
212
  try:
@@ -372,7 +364,7 @@ class obs_sequence:
372
364
  ) # sort the DataFrame by time
373
365
  df_copy.reset_index(drop=True, inplace=True)
374
366
  df_copy["obs_num"] = df_copy.index + 1 # obs_num in time order
375
- df_copy["linked_list"] = obs_sequence.generate_linked_list_pattern(
367
+ df_copy["linked_list"] = ObsSequence.generate_linked_list_pattern(
376
368
  len(df_copy)
377
369
  ) # linked list pattern
378
370
 
@@ -594,7 +586,7 @@ class obs_sequence:
594
586
  with open(file, "rb") as f:
595
587
  while True:
596
588
  # Read the record length
597
- record_length = obs_sequence.read_record_length(f)
589
+ record_length = ObsSequence.read_record_length(f)
598
590
  if record_length is None:
599
591
  break
600
592
  record = f.read(record_length)
@@ -602,7 +594,7 @@ class obs_sequence:
602
594
  break
603
595
 
604
596
  # Read the trailing record length (should match the leading one)
605
- obs_sequence.check_trailing_record_length(f, record_length)
597
+ ObsSequence.check_trailing_record_length(f, record_length)
606
598
 
607
599
  linecount += 1
608
600
 
@@ -620,7 +612,7 @@ class obs_sequence:
620
612
  f.seek(0)
621
613
 
622
614
  for _ in range(2):
623
- record_length = obs_sequence.read_record_length(f)
615
+ record_length = ObsSequence.read_record_length(f)
624
616
  if record_length is None:
625
617
  break
626
618
 
@@ -628,7 +620,7 @@ class obs_sequence:
628
620
  if not record: # end of file
629
621
  break
630
622
 
631
- obs_sequence.check_trailing_record_length(f, record_length)
623
+ ObsSequence.check_trailing_record_length(f, record_length)
632
624
  header.append(record.decode("utf-8").strip())
633
625
 
634
626
  header.append(str(obs_types_definitions))
@@ -636,7 +628,7 @@ class obs_sequence:
636
628
  # obs_types_definitions
637
629
  for _ in range(3, 4 + obs_types_definitions):
638
630
  # Read the record length
639
- record_length = obs_sequence.read_record_length(f)
631
+ record_length = ObsSequence.read_record_length(f)
640
632
  if record_length is None:
641
633
  break
642
634
 
@@ -645,7 +637,7 @@ class obs_sequence:
645
637
  if not record: # end of file
646
638
  break
647
639
 
648
- obs_sequence.check_trailing_record_length(f, record_length)
640
+ ObsSequence.check_trailing_record_length(f, record_length)
649
641
 
650
642
  if _ == 3:
651
643
  continue # num obs_types_definitions
@@ -663,7 +655,7 @@ class obs_sequence:
663
655
  5 + obs_types_definitions + num_copies + num_qcs + 1,
664
656
  ):
665
657
  # Read the record length
666
- record_length = obs_sequence.read_record_length(f)
658
+ record_length = ObsSequence.read_record_length(f)
667
659
  if record_length is None:
668
660
  break
669
661
 
@@ -672,7 +664,7 @@ class obs_sequence:
672
664
  if not record:
673
665
  break
674
666
 
675
- obs_sequence.check_trailing_record_length(f, record_length)
667
+ ObsSequence.check_trailing_record_length(f, record_length)
676
668
 
677
669
  if _ == 5 + obs_types_definitions:
678
670
  continue
@@ -683,12 +675,12 @@ class obs_sequence:
683
675
 
684
676
  # first and last obs
685
677
  # Read the record length
686
- record_length = obs_sequence.read_record_length(f)
678
+ record_length = ObsSequence.read_record_length(f)
687
679
 
688
680
  # Read the actual record
689
681
  record = f.read(record_length)
690
682
 
691
- obs_sequence.check_trailing_record_length(f, record_length)
683
+ ObsSequence.check_trailing_record_length(f, record_length)
692
684
 
693
685
  # Read the whole record as a two integers
694
686
  first, last = struct.unpack("ii", record)[:8]
@@ -813,7 +805,7 @@ class obs_sequence:
813
805
  # Skip the first len(obs_seq.header) lines
814
806
  for _ in range(header_length - 1):
815
807
  # Read the record length
816
- record_length = obs_sequence.read_record_length(f)
808
+ record_length = ObsSequence.read_record_length(f)
817
809
  if record_length is None: # End of file
818
810
  break
819
811
 
@@ -830,7 +822,7 @@ class obs_sequence:
830
822
  obs.append(f"OBS {obs_num}")
831
823
  for _ in range(n): # number of copies
832
824
  # Read the record length
833
- record_length = obs_sequence.read_record_length(f)
825
+ record_length = ObsSequence.read_record_length(f)
834
826
  if record_length is None:
835
827
  break
836
828
  # Read the actual record (copie)
@@ -838,10 +830,10 @@ class obs_sequence:
838
830
  obs.append(struct.unpack("d", record)[0])
839
831
 
840
832
  # Read the trailing record length (should match the leading one)
841
- obs_sequence.check_trailing_record_length(f, record_length)
833
+ ObsSequence.check_trailing_record_length(f, record_length)
842
834
 
843
835
  # linked list info
844
- record_length = obs_sequence.read_record_length(f)
836
+ record_length = ObsSequence.read_record_length(f)
845
837
  if record_length is None:
846
838
  break
847
839
 
@@ -850,17 +842,17 @@ class obs_sequence:
850
842
  linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
851
843
  obs.append(linked_list_string)
852
844
 
853
- obs_sequence.check_trailing_record_length(f, record_length)
845
+ ObsSequence.check_trailing_record_length(f, record_length)
854
846
 
855
847
  # location (note no location header "loc3d" or "loc1d" for binary files)
856
848
  obs.append("loc3d")
857
- record_length = obs_sequence.read_record_length(f)
849
+ record_length = ObsSequence.read_record_length(f)
858
850
  record = f.read(record_length)
859
851
  x, y, z, vert = struct.unpack("dddi", record[:28])
860
852
  location_string = f"{x} {y} {z} {vert}"
861
853
  obs.append(location_string)
862
854
 
863
- obs_sequence.check_trailing_record_length(f, record_length)
855
+ ObsSequence.check_trailing_record_length(f, record_length)
864
856
 
865
857
  # kind (type of observation) value
866
858
  obs.append("kind")
@@ -870,23 +862,23 @@ class obs_sequence:
870
862
  kind = f"{struct.unpack('i', record)[0]}"
871
863
  obs.append(kind)
872
864
 
873
- obs_sequence.check_trailing_record_length(f, record_length)
865
+ ObsSequence.check_trailing_record_length(f, record_length)
874
866
 
875
867
  # time (seconds, days)
876
- record_length = obs_sequence.read_record_length(f)
868
+ record_length = ObsSequence.read_record_length(f)
877
869
  record = f.read(record_length)
878
870
  seconds, days = struct.unpack("ii", record)[:8]
879
871
  time_string = f"{seconds} {days}"
880
872
  obs.append(time_string)
881
873
 
882
- obs_sequence.check_trailing_record_length(f, record_length)
874
+ ObsSequence.check_trailing_record_length(f, record_length)
883
875
 
884
876
  # obs error variance
885
- record_length = obs_sequence.read_record_length(f)
877
+ record_length = ObsSequence.read_record_length(f)
886
878
  record = f.read(record_length)
887
879
  obs.append(struct.unpack("d", record)[0])
888
880
 
889
- obs_sequence.check_trailing_record_length(f, record_length)
881
+ ObsSequence.check_trailing_record_length(f, record_length)
890
882
 
891
883
  yield obs
892
884
 
@@ -945,29 +937,29 @@ class obs_sequence:
945
937
  """
946
938
  Join a list of observation sequences together.
947
939
 
948
- This method combines the headers and observations from a list of obs_sequence objects
949
- into a single obs_sequence object.
940
+ This method combines the headers and observations from a list of ObsSequence objects
941
+ into a single ObsSequence object.
950
942
 
951
943
  Args:
952
- obs_sequences (list of obs_sequences): The list of observation sequences objects to join.
944
+ obs_sequences (list of ObsSequences): The list of observation sequences objects to join.
953
945
  copies (list of str, optional): A list of copy names to include in the combined data.
954
946
  If not provided, all copies are included.
955
947
 
956
948
  Returns:
957
- A new obs_sequence object containing the combined data.
949
+ A new ObsSequence object containing the combined data.
958
950
 
959
951
  Example:
960
952
  .. code-block:: python
961
953
 
962
- obs_seq1 = obs_sequence(file='obs_seq1.final')
963
- obs_seq2 = obs_sequence(file='obs_seq2.final')
964
- obs_seq3 = obs_sequence(file='obs_seq3.final')
965
- combined = obs_sequence.join([obs_seq1, obs_seq2, obs_seq3])
954
+ obs_seq1 = ObsSequence(file='obs_seq1.final')
955
+ obs_seq2 = ObsSequence(file='obs_seq2.final')
956
+ obs_seq3 = ObsSequence(file='obs_seq3.final')
957
+ combined = ObsSequence.join([obs_seq1, obs_seq2, obs_seq3])
966
958
  """
967
959
  if not obs_sequences:
968
960
  raise ValueError("The list of observation sequences is empty.")
969
961
 
970
- # Create a new obs_sequnece object with the combined data
962
+ # Create a new ObsSequence object with the combined data
971
963
  combo = cls(file=None)
972
964
 
973
965
  # Check if all obs_sequences have compatible attributes
@@ -1092,7 +1084,7 @@ class obs_sequence:
1092
1084
 
1093
1085
  # create linked list for obs
1094
1086
  combo.df = combined_df.sort_values(by="time").reset_index(drop=True)
1095
- combo.df["linked_list"] = obs_sequence.generate_linked_list_pattern(
1087
+ combo.df["linked_list"] = ObsSequence.generate_linked_list_pattern(
1096
1088
  len(combo.df)
1097
1089
  )
1098
1090
  combo.df["obs_num"] = combined_df.index + 1
@@ -1125,7 +1117,7 @@ class obs_sequence:
1125
1117
  )
1126
1118
 
1127
1119
  def create_header(self, n):
1128
- """Create a header for the obs_seq file from the obs_sequence object."""
1120
+ """Create a header for the obs_seq file from the ObsSequence object."""
1129
1121
  assert (
1130
1122
  self.n_copies == self.n_non_qc + self.n_qc
1131
1123
  ), "n_copies must be equal to n_non_qc + n_qc"
@@ -4,8 +4,6 @@ import numpy as np
4
4
  from functools import wraps
5
5
  from datetime import datetime, timedelta
6
6
 
7
- # from pydartdiags.obs_sequence import obs_sequence as obsq
8
-
9
7
 
10
8
  def apply_to_phases_in_place(func):
11
9
  """
@@ -93,6 +91,12 @@ def calculate_rank(df, phase):
93
91
  """
94
92
  Calculate the rank of observations within an ensemble.
95
93
 
94
+ Note:
95
+
96
+ This function is decorated with @apply_to_phases_by_obs, which modifies its usage.
97
+ You should call it as calculate_rank(df), and the decorator will automatically apply the
98
+ function to all relevant phases (‘prior’ and ‘posterior’).
99
+
96
100
  This function takes a DataFrame containing ensemble predictions and observed values,
97
101
  adds sampling noise to the ensemble predictions, and calculates the rank of the observed
98
102
  value within the perturbed ensemble for each observation. The rank indicates the position
@@ -103,8 +107,6 @@ def calculate_rank(df, phase):
103
107
  Parameters:
104
108
  df (pd.DataFrame): A DataFrame with columns for rank, and observation type.
105
109
 
106
- phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
107
-
108
110
  Returns:
109
111
  DataFrame containing columns for 'rank' and observation 'type'.
110
112
  """
@@ -158,15 +160,20 @@ def diag_stats(df, phase):
158
160
  """
159
161
  Calculate diagnostic statistics for a given phase and add them to the DataFrame.
160
162
 
163
+ Note:
164
+ This function is decorated with @apply_to_phases_in_place, which modifies its usage.
165
+ You should call it as diag_stats(df), and the decorator will automatically apply the
166
+ function to all relevant phases (‘prior’ and ‘posterior’) modifying the DataFrame
167
+ in place.
168
+
161
169
  Args:
162
170
  df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
163
- The DataFrame must include the following columns:
164
- - 'observation': The actual observation values.
165
- - 'obs_err_var': The variance of the observation error.
166
- - 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
167
- - 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
171
+ The DataFrame must include the following columns:
168
172
 
169
- phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
173
+ - 'observation': The actual observation values.
174
+ - 'obs_err_var': The variance of the observation error.
175
+ - 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
176
+ - 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
170
177
 
171
178
  Returns:
172
179
  None: The function modifies the DataFrame in place by adding the following columns:
@@ -203,9 +210,12 @@ def bin_by_layer(df, levels, verticalUnit="pressure (Pa)"):
203
210
  vertical level bin. Only observations (row) with the specified vertical unit are binned.
204
211
 
205
212
  Args:
206
- df (pandas.DataFrame): The input DataFrame containing observation data. The DataFrame must include the following columns:
213
+ df (pandas.DataFrame): The input DataFrame containing observation data.
214
+ The DataFrame must include the following columns:
215
+
207
216
  - 'vertical': The vertical coordinate values of the observations.
208
217
  - 'vert_unit': The unit of the vertical coordinate values.
218
+
209
219
  levels (list): A list of bin edges for the vertical levels.
210
220
  verticalUnit (str, optional): The unit of the vertical axis (e.g., 'pressure (Pa)'). Default is 'pressure (Pa)'.
211
221
 
@@ -261,6 +271,28 @@ def bin_by_time(df, time_value):
261
271
 
262
272
  @apply_to_phases_by_type_return_df
263
273
  def grand_statistics(df, phase):
274
+ """
275
+ Calculate grand statistics (RMSE, bias, total spread) for each observation type and phase.
276
+
277
+ This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
278
+ have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data by observation
279
+ type and computes the root mean square error (RMSE), mean bias, and total spread for the specified phase.
280
+
281
+ Note:
282
+ This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
283
+ You should call it as grand_statistics(df), and the decorator will automatically apply the function
284
+ to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
285
+
286
+ Args:
287
+ df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
288
+
289
+ Returns:
290
+ pandas.DataFrame: A DataFrame with columns:
291
+ - 'type': The observation type.
292
+ - '{phase}_rmse': The root mean square error for the phase.
293
+ - '{phase}_bias': The mean bias for the phase.
294
+ - '{phase}_totalspread': The total spread for the phase.
295
+ """
264
296
 
265
297
  # assuming diag_stats has been called
266
298
  grand = (
@@ -283,6 +315,33 @@ def grand_statistics(df, phase):
283
315
 
284
316
  @apply_to_phases_by_type_return_df
285
317
  def layer_statistics(df, phase):
318
+ """
319
+ Calculate statistics (RMSE, bias, total spread) for each observation type and vertical layer.
320
+
321
+ This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
322
+ have already been computed with :func:`diag_stats` and are present in the DataFrame. It groups the data by
323
+ vertical layer midpoint and observation type, and computes the root mean square error (RMSE),
324
+ mean bias, and total spread for the specified phase for each vertical layer.
325
+
326
+ Note:
327
+ This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
328
+ You should call it as layer_statistics(df), and the decorator will automatically apply the function
329
+ to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
330
+
331
+ Args:
332
+ df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
333
+ phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
334
+
335
+ Returns:
336
+ pandas.DataFrame: A DataFrame with columns:
337
+ - 'midpoint': The midpoint of the vertical layer.
338
+ - 'type': The observation type.
339
+ - '{phase}_rmse': The root mean square error for the phase.
340
+ - '{phase}_bias': The mean bias for the phase.
341
+ - '{phase}_totalspread': The total spread for the phase.
342
+ - 'vert_unit': The vertical unit.
343
+ - 'vlevels': The categorized vertical level.
344
+ """
286
345
 
287
346
  # assuming diag_stats has been called
288
347
  layer_stats = (
@@ -310,14 +369,31 @@ def layer_statistics(df, phase):
310
369
  @apply_to_phases_by_type_return_df
311
370
  def time_statistics(df, phase):
312
371
  """
313
- Calculate time-based statistics for a given phase and return a new DataFrame.
372
+ Calculate time-based statistics (RMSE, bias, total spread) for each observation type and time bin.
373
+
374
+ This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
375
+ have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data
376
+ by time bin midpoint and observation type, and computes the root mean square error (RMSE), mean bias,
377
+ and total spread for the specified phase for each time bin.
378
+
379
+ Note:
380
+ This function is decorated with @apply_to_phases_by_type_return_df.
381
+ You should call it as time_statistics(df), and the decorator will automatically apply the function
382
+ to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
314
383
 
315
384
  Args:
316
- df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
385
+ df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
317
386
  phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
318
387
 
319
388
  Returns:
320
- pandas.DataFrame: A DataFrame containing time-based statistics for the specified phase.
389
+ pandas.DataFrame: A DataFrame with columns:
390
+ - 'time_bin_midpoint': The midpoint of the time bin.
391
+ - 'type': The observation type.
392
+ - '{phase}_rmse': The root mean square error for the phase.
393
+ - '{phase}_bias': The mean bias for the phase.
394
+ - '{phase}_totalspread': The total spread for the phase.
395
+ - 'time_bin': The time bin interval.
396
+ - 'time': The first time value in the bin.
321
397
  """
322
398
  # Assuming diag_stats has been called
323
399
  time_stats = (
@@ -402,7 +478,9 @@ def possible_vs_used_by_time(df):
402
478
  Calculates the count of possible vs. used observations by type and time bin.
403
479
 
404
480
  Args:
405
- df (pd.DataFrame): The input DataFrame containing observation data. The DataFrame must include:
481
+ df (pd.DataFrame): The input DataFrame containing observation data.
482
+ The DataFrame must include:
483
+
406
484
  - 'type': The observation type.
407
485
  - 'time_bin_midpoint': The midpoint of the time bin.
408
486
  - 'observation': The observation values.
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydartdiags
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Observation Sequence Diagnostics for DART
5
5
  Home-page: https://github.com/NCAR/pyDARTdiags.git
6
6
  Author: Helen Kershaw
7
7
  Author-email: Helen Kershaw <hkershaw@ucar.edu>
8
+ License-Expression: Apache-2.0
8
9
  Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
9
10
  Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
10
11
  Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
11
12
  Classifier: Programming Language :: Python :: 3
12
- Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Operating System :: OS Independent
14
14
  Requires-Python: >=3.8
15
15
  Description-Content-Type: text/markdown
@@ -43,7 +43,7 @@ class TestSanitizeInput:
43
43
  ValueError,
44
44
  match="Neither 'loc3d' nor 'loc1d' could be found in the observation sequence.",
45
45
  ):
46
- obj = obsq.obs_sequence(bad_loc_file_path)
46
+ obj = obsq.ObsSequence(bad_loc_file_path)
47
47
 
48
48
 
49
49
  class TestOneDimensional:
@@ -53,7 +53,7 @@ class TestOneDimensional:
53
53
  return os.path.join(test_dir, "data", "obs_seq.1d.final")
54
54
 
55
55
  def test_read1d(self, obs_seq_file_path):
56
- obj = obsq.obs_sequence(obs_seq_file_path)
56
+ obj = obsq.ObsSequence(obs_seq_file_path)
57
57
  assert obj.loc_mod == "loc1d"
58
58
  assert len(obj.df) == 40 # 40 obs in the file
59
59
  assert (
@@ -69,11 +69,11 @@ class TestSynonyms:
69
69
  return os.path.join(test_dir, "data", "obs_seq.final.ascii.syn")
70
70
 
71
71
  def test_single(self, synonym_file_path):
72
- obj1 = obsq.obs_sequence(synonym_file_path, synonyms="observationx")
72
+ obj1 = obsq.ObsSequence(synonym_file_path, synonyms="observationx")
73
73
  assert "observationx" in obj1.synonyms_for_obs
74
74
 
75
75
  def test_list(self, synonym_file_path):
76
- obj2 = obsq.obs_sequence(
76
+ obj2 = obsq.ObsSequence(
77
77
  synonym_file_path, synonyms=["synonym1", "synonym2", "observationx"]
78
78
  )
79
79
  assert "synonym1" in obj2.synonyms_for_obs
@@ -87,7 +87,7 @@ class TestBinaryObsSequence:
87
87
  return os.path.join(test_dir, "data", "obs_seq.final.binary.small")
88
88
 
89
89
  def test_read_binary(self, binary_obs_seq_file_path):
90
- obj = obsq.obs_sequence(binary_obs_seq_file_path)
90
+ obj = obsq.ObsSequence(binary_obs_seq_file_path)
91
91
  assert len(obj.df) > 0 # Ensure the DataFrame is not empty
92
92
 
93
93
 
@@ -172,7 +172,7 @@ class TestWriteAscii:
172
172
  temp_output_file_path = os.path.join(temp_dir, "obs_seq.final.ascii.write")
173
173
 
174
174
  # Create an instance of the obs_sequence class and write the output file
175
- obj = obsq.obs_sequence(ascii_obs_seq_file_path)
175
+ obj = obsq.ObsSequence(ascii_obs_seq_file_path)
176
176
  obj.write_obs_seq(temp_output_file_path)
177
177
 
178
178
  # Ensure the output file exists
@@ -199,7 +199,7 @@ class TestWriteAscii:
199
199
  )
200
200
 
201
201
  # Create an instance of the obs_sequence class and write the output file
202
- obj = obsq.obs_sequence(obs_seq_file_path)
202
+ obj = obsq.ObsSequence(obs_seq_file_path)
203
203
  stats.diag_stats(obj.df) # add the stats columns
204
204
  obj.write_obs_seq(temp_output_file_path)
205
205
 
@@ -227,7 +227,7 @@ class TestWriteAscii:
227
227
  )
228
228
 
229
229
  # Create an instance of the obs_sequence class and write the output file
230
- obj = obsq.obs_sequence(obs_seq_file_path)
230
+ obj = obsq.ObsSequence(obs_seq_file_path)
231
231
  hPalevels = [
232
232
  0.0,
233
233
  100.0,
@@ -264,7 +264,7 @@ class TestWriteAscii:
264
264
  obs_seq_file_path = os.path.join(
265
265
  os.path.dirname(__file__), "data", "obs_seq.final.ascii.small"
266
266
  )
267
- obj = obsq.obs_sequence(obs_seq_file_path)
267
+ obj = obsq.ObsSequence(obs_seq_file_path)
268
268
 
269
269
  # Remove obs except ACARS_TEMPERATURE
270
270
  obj.df = obj.df[(obj.df["type"] == "ACARS_TEMPERATURE")]
@@ -297,7 +297,7 @@ class TestObsDataframe:
297
297
  df = pd.DataFrame(data)
298
298
 
299
299
  # Create an instance of ObsSequence with the sample DataFrame
300
- obs_seq = obsq.obs_sequence(file=None)
300
+ obs_seq = obsq.ObsSequence(file=None)
301
301
  obs_seq.df = df
302
302
  return obs_seq
303
303
 
@@ -394,15 +394,15 @@ class TestJoin:
394
394
  with pytest.raises(
395
395
  ValueError, match="The list of observation sequences is empty."
396
396
  ):
397
- obsq.obs_sequence.join([])
397
+ obsq.ObsSequence.join([])
398
398
 
399
399
  def test_join_diff_locs(self, obs_seq1d_file_path, binary_obs_seq_file_path):
400
- obj1 = obsq.obs_sequence(obs_seq1d_file_path)
401
- obj2 = obsq.obs_sequence(binary_obs_seq_file_path)
400
+ obj1 = obsq.ObsSequence(obs_seq1d_file_path)
401
+ obj2 = obsq.ObsSequence(binary_obs_seq_file_path)
402
402
  with pytest.raises(
403
403
  ValueError, match="All observation sequences must have the same loc_mod."
404
404
  ):
405
- obsq.obs_sequence.join([obj1, obj2])
405
+ obsq.ObsSequence.join([obj1, obj2])
406
406
 
407
407
  def test_join_three_obs_seqs(
408
408
  self,
@@ -410,10 +410,10 @@ class TestJoin:
410
410
  ascii_obs_seq_file_path2,
411
411
  ascii_obs_seq_file_path3,
412
412
  ):
413
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
414
- obj2 = obsq.obs_sequence(ascii_obs_seq_file_path2)
415
- obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
416
- obs_seq_mega = obsq.obs_sequence.join([obj1, obj2, obj3])
413
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
414
+ obj2 = obsq.ObsSequence(ascii_obs_seq_file_path2)
415
+ obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
416
+ obs_seq_mega = obsq.ObsSequence.join([obj1, obj2, obj3])
417
417
 
418
418
  assert obs_seq_mega.all_obs == None
419
419
  assert len(obs_seq_mega.df) == 16 # obs in the dataframe
@@ -457,9 +457,9 @@ class TestJoin:
457
457
  def test_join_list_sub_copies(
458
458
  self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
459
459
  ):
460
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
461
- obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
462
- obs_seq_mega = obsq.obs_sequence.join(
460
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
461
+ obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
462
+ obs_seq_mega = obsq.ObsSequence.join(
463
463
  [obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
464
464
  )
465
465
 
@@ -475,9 +475,9 @@ class TestJoin:
475
475
  def test_join_list_sub_copies_no_qc(
476
476
  self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
477
477
  ):
478
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
479
- obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
480
- obs_seq_mega = obsq.obs_sequence.join(
478
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
479
+ obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
480
+ obs_seq_mega = obsq.ObsSequence.join(
481
481
  [obj1, obj3], ["observation", "prior_ensemble_spread"]
482
482
  )
483
483
 
@@ -489,29 +489,29 @@ class TestJoin:
489
489
  def test_join_copies_not_in_all(
490
490
  self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
491
491
  ):
492
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
493
- obj4 = obsq.obs_sequence(ascii_obs_seq_file_path4)
492
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
493
+ obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
494
494
  with pytest.raises(
495
495
  ValueError, match="All observation sequences must have the same copies."
496
496
  ):
497
- obsq.obs_sequence.join([obj1, obj4])
497
+ obsq.ObsSequence.join([obj1, obj4])
498
498
 
499
499
  def test_join_copies_not_all_have_subset(
500
500
  self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
501
501
  ):
502
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
503
- obj4 = obsq.obs_sequence(ascii_obs_seq_file_path4)
502
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
503
+ obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
504
504
  with pytest.raises(
505
505
  ValueError, match="All observation sequences must have the selected copies."
506
506
  ):
507
- obsq.obs_sequence.join([obj1, obj4], ["prior_ensemble_member_41"])
507
+ obsq.ObsSequence.join([obj1, obj4], ["prior_ensemble_member_41"])
508
508
 
509
509
  def test_join_list_sub_copies(
510
510
  self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
511
511
  ):
512
- obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
513
- obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
514
- obs_seq_mega = obsq.obs_sequence.join(
512
+ obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
513
+ obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
514
+ obs_seq_mega = obsq.ObsSequence.join(
515
515
  [obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
516
516
  )
517
517
  assert obs_seq_mega.has_assimilation_info() == False
@@ -520,7 +520,7 @@ class TestJoin:
520
520
 
521
521
  class TestCreateHeader:
522
522
  def test_create_header(self):
523
- obj = obsq.obs_sequence(file=None)
523
+ obj = obsq.ObsSequence(file=None)
524
524
 
525
525
  obj.types = {1: "ACARS_BELLYBUTTON", 2: "NCEP_TOES"}
526
526
  obj.n_non_qc = 2
@@ -551,7 +551,7 @@ class TestCreateHeader:
551
551
  class TestSplitMetadata:
552
552
  def test_split_metadata_with_external_FO(self):
553
553
  metadata = ["meta1", "meta2", "external_FO1", "meta3", "meta4"]
554
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
554
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
555
555
  metadata
556
556
  )
557
557
  assert before_external_FO == ["meta1", "meta2"]
@@ -559,7 +559,7 @@ class TestSplitMetadata:
559
559
 
560
560
  def test_split_metadata_without_external_FO(self):
561
561
  metadata = ["meta1", "meta2", "meta3", "meta4"]
562
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
562
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
563
563
  metadata
564
564
  )
565
565
  assert before_external_FO == ["meta1", "meta2", "meta3", "meta4"]
@@ -567,7 +567,7 @@ class TestSplitMetadata:
567
567
 
568
568
  def test_split_metadata_multiple_external_FO(self):
569
569
  metadata = ["meta1", "external_FO1", "meta2", "external_FO2", "meta3"]
570
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
570
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
571
571
  metadata
572
572
  )
573
573
  assert before_external_FO == ["meta1"]
@@ -575,7 +575,7 @@ class TestSplitMetadata:
575
575
 
576
576
  def test_split_metadata_empty_list(self):
577
577
  metadata = []
578
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
578
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
579
579
  metadata
580
580
  )
581
581
  assert before_external_FO == []
@@ -583,7 +583,7 @@ class TestSplitMetadata:
583
583
 
584
584
  def test_split_metadata_no_external_FO(self):
585
585
  metadata = ["meta1", "meta2", "meta3"]
586
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
586
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
587
587
  metadata
588
588
  )
589
589
  assert before_external_FO == ["meta1", "meta2", "meta3"]
@@ -591,7 +591,7 @@ class TestSplitMetadata:
591
591
 
592
592
  def test_split_metadata_external_FO_at_start(self):
593
593
  metadata = ["external_FO1", "meta1", "meta2"]
594
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
594
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
595
595
  metadata
596
596
  )
597
597
  assert before_external_FO == []
@@ -599,7 +599,7 @@ class TestSplitMetadata:
599
599
 
600
600
  def test_split_metadata_external_FO_at_end(self):
601
601
  metadata = ["meta1", "meta2", "external_FO1"]
602
- before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
602
+ before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
603
603
  metadata
604
604
  )
605
605
  assert before_external_FO == ["meta1", "meta2"]
@@ -610,7 +610,7 @@ class TestGenerateLinkedListPattern:
610
610
  def test_generate_linked_list_pattern(self):
611
611
  n = 1
612
612
  expected_pattern = ["0 -1 -1"]
613
- result = obsq.obs_sequence.generate_linked_list_pattern(n)
613
+ result = obsq.ObsSequence.generate_linked_list_pattern(n)
614
614
  assert result == expected_pattern
615
615
 
616
616
  n = 3
@@ -619,7 +619,7 @@ class TestGenerateLinkedListPattern:
619
619
  "1 3 -1",
620
620
  "2 -1 -1",
621
621
  ]
622
- result = obsq.obs_sequence.generate_linked_list_pattern(n)
622
+ result = obsq.ObsSequence.generate_linked_list_pattern(n)
623
623
  assert result == expected_pattern
624
624
 
625
625
  n = 6
@@ -631,7 +631,7 @@ class TestGenerateLinkedListPattern:
631
631
  "4 6 -1",
632
632
  "5 -1 -1",
633
633
  ]
634
- result = obsq.obs_sequence.generate_linked_list_pattern(n)
634
+ result = obsq.ObsSequence.generate_linked_list_pattern(n)
635
635
  assert result == expected_pattern
636
636
 
637
637
 
@@ -665,7 +665,7 @@ class TestCreateHeaderFromDataFrame:
665
665
  df = pd.DataFrame(data)
666
666
 
667
667
  # Create an instance of obs_sequence with the sample DataFrame
668
- obs_seq = obsq.obs_sequence(file=None)
668
+ obs_seq = obsq.ObsSequence(file=None)
669
669
  obs_seq.df = df
670
670
  obs_seq.reverse_types = {
671
671
  "ACARS_TEMPERATURE": 1,
@@ -732,7 +732,7 @@ class TestUpdateTypesDicts:
732
732
  "52": "PINEAPPLE_COUNT",
733
733
  }
734
734
 
735
- updated_reverse_types, types = obsq.obs_sequence.update_types_dicts(
735
+ updated_reverse_types, types = obsq.ObsSequence.update_types_dicts(
736
736
  sample_df, reverse_types
737
737
  )
738
738
 
@@ -747,7 +747,7 @@ class TestCompositeTypes:
747
747
  file_path = os.path.join(test_dir, "data", "three-obs.final")
748
748
 
749
749
  # Create an instance of obs_sequence with the 'three-obs.final' file
750
- obs_seq = obsq.obs_sequence(file_path)
750
+ obs_seq = obsq.ObsSequence(file_path)
751
751
  return obs_seq
752
752
 
753
753
  @pytest.mark.parametrize(
@@ -850,7 +850,7 @@ class TestCompositeTypes:
850
850
  test_dir = os.path.dirname(__file__)
851
851
  file_path = os.path.join(test_dir, "data", "dups-obs.final")
852
852
 
853
- dup = obsq.obs_sequence(file_path)
853
+ dup = obsq.ObsSequence(file_path)
854
854
  # Test that composite_types raises an error
855
855
  with pytest.raises(Exception, match="There are duplicates in the components."):
856
856
  dup.composite_types()
File without changes
File without changes
File without changes
File without changes