pydartdiags 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydartdiags might be problematic. Click here for more details.
- {pydartdiags-0.5.1/src/pydartdiags.egg-info → pydartdiags-0.6.0}/PKG-INFO +2 -2
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/pyproject.toml +2 -2
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/setup.py +1 -1
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/obs_sequence.py +74 -82
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/stats/stats.py +93 -15
- {pydartdiags-0.5.1 → pydartdiags-0.6.0/src/pydartdiags.egg-info}/PKG-INFO +2 -2
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/tests/test_obs_sequence.py +48 -48
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/LICENSE +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/MANIFEST.in +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/README.md +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/setup.cfg +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/__init__.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/matplots/__init__.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/matplots/matplots.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/__init__.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/composite_types.yaml +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/plots/__init__.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/plots/plots.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/stats/__init__.py +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/SOURCES.txt +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/dependency_links.txt +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/requires.txt +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags.egg-info/top_level.txt +0 -0
- {pydartdiags-0.5.1 → pydartdiags-0.6.0}/tests/test_stats.py +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydartdiags
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Observation Sequence Diagnostics for DART
|
|
5
5
|
Home-page: https://github.com/NCAR/pyDARTdiags.git
|
|
6
6
|
Author: Helen Kershaw
|
|
7
7
|
Author-email: Helen Kershaw <hkershaw@ucar.edu>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
8
9
|
Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
|
|
9
10
|
Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
|
|
10
11
|
Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
|
|
11
12
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Requires-Python: >=3.8
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pydartdiags"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.6.0"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Helen Kershaw", email="hkershaw@ucar.edu" },
|
|
10
10
|
]
|
|
@@ -13,9 +13,9 @@ readme = "README.md"
|
|
|
13
13
|
requires-python = ">=3.8"
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Programming Language :: Python :: 3",
|
|
16
|
-
"License :: OSI Approved :: Apache Software License",
|
|
17
16
|
"Operating System :: OS Independent",
|
|
18
17
|
]
|
|
18
|
+
license = "Apache-2.0"
|
|
19
19
|
dependencies = [
|
|
20
20
|
"pandas>=2.2.0",
|
|
21
21
|
"numpy>=1.26",
|
|
@@ -19,17 +19,46 @@ def requires_assimilation_info(func):
|
|
|
19
19
|
return wrapper
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class
|
|
22
|
+
class ObsSequence:
|
|
23
23
|
"""
|
|
24
|
-
Initialize an
|
|
25
|
-
or create an empty
|
|
24
|
+
Initialize an ObsSequence object from an ASCII or binary observation sequence file,
|
|
25
|
+
or create an empty ObsSequence object from scratch.
|
|
26
|
+
|
|
27
|
+
1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
|
|
28
|
+
|
|
29
|
+
3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
|
|
26
30
|
|
|
27
31
|
Args:
|
|
28
32
|
file (str): The input observation sequence ASCII or binary file.
|
|
29
|
-
|
|
33
|
+
If None, an empty ObsSequence object is created from scratch.
|
|
34
|
+
synonyms (list, optional): List of additional synonyms for the observation column in the DataFrame.
|
|
35
|
+
The default list is
|
|
36
|
+
|
|
37
|
+
.. code-block:: python
|
|
38
|
+
|
|
39
|
+
['NCEP BUFR observation',
|
|
40
|
+
'AIRS observation',
|
|
41
|
+
'GTSPP observation',
|
|
42
|
+
'SST observation',
|
|
43
|
+
'observations',
|
|
44
|
+
'WOD observation']
|
|
45
|
+
|
|
46
|
+
You can add more synonyms by providing a list of strings when
|
|
47
|
+
creating the ObsSequence object.
|
|
48
|
+
|
|
49
|
+
.. code-block:: python
|
|
50
|
+
|
|
51
|
+
ObsSequence(file, synonyms=['synonym1', 'synonym2'])
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If neither 'loc3d' nor 'loc1d' could be found in the observation sequence.
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
|
|
58
|
+
.. code-block:: python
|
|
59
|
+
|
|
60
|
+
obs_seq = ObsSequence(file='obs_seq.final')
|
|
30
61
|
|
|
31
|
-
Returns:
|
|
32
|
-
An obs_sequence object
|
|
33
62
|
|
|
34
63
|
Attributes:
|
|
35
64
|
df (pandas.DataFrame): The DataFrame containing the observation sequence data.
|
|
@@ -54,34 +83,18 @@ class obs_sequence:
|
|
|
54
83
|
- scale height: 'VERTISSCALEHEIGHT' (unitless)
|
|
55
84
|
loc_mod (str): The location model, either 'loc3d' or 'loc1d'.
|
|
56
85
|
For 3D sphere models: latitude and longitude are in degrees in the DataFrame.
|
|
57
|
-
types (dict): Dictionary of types of observations the observation sequence,
|
|
86
|
+
types (dict): Dictionary of types of observations in the observation sequence,
|
|
58
87
|
e.g. {23: 'ACARS_TEMPERATURE'},
|
|
59
88
|
reverse_types (dict): Dictionary of types with keys and values reversed, e.g
|
|
60
89
|
{'ACARS_TEMPERATURE': 23}
|
|
61
90
|
synonyms_for_obs (list): List of synonyms for the observation column in the DataFrame.
|
|
62
|
-
The default list is
|
|
63
91
|
|
|
64
|
-
.. code-block:: python
|
|
65
|
-
|
|
66
|
-
[ 'NCEP BUFR observation',
|
|
67
|
-
'AIRS observation',
|
|
68
|
-
'GTSPP observation',
|
|
69
|
-
'SST observation',
|
|
70
|
-
'observations',
|
|
71
|
-
'WOD observation']
|
|
72
|
-
|
|
73
|
-
You can add more synonyms by providing a list of strings when
|
|
74
|
-
creating the obs_sequence object.
|
|
75
|
-
|
|
76
|
-
.. code-block:: python
|
|
77
|
-
|
|
78
|
-
obs_sequence(file, synonyms=['synonym1', 'synonym2']).df
|
|
79
92
|
|
|
80
93
|
seq (generator): Generator of observations from the observation sequence file.
|
|
81
94
|
all_obs (list): List of all observations, each observation is a list.
|
|
82
|
-
Valid when the
|
|
83
|
-
Set to None when the
|
|
84
|
-
|
|
95
|
+
Valid when the ObsSequence is created from a file.
|
|
96
|
+
Set to None when the ObsSequence is created from scratch or multiple
|
|
97
|
+
ObsSequences are joined.
|
|
85
98
|
"""
|
|
86
99
|
|
|
87
100
|
vert = {
|
|
@@ -96,27 +109,6 @@ class obs_sequence:
|
|
|
96
109
|
reversed_vert = {value: key for key, value in vert.items()}
|
|
97
110
|
|
|
98
111
|
def __init__(self, file, synonyms=None):
|
|
99
|
-
"""
|
|
100
|
-
Create an obs_sequence object from an ASCII or binary observation sequence file,
|
|
101
|
-
or create an empty obs_sequence object from scratch.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
file (str): The input observation sequence ASCII or binary file.
|
|
105
|
-
If None, an empty obs_sequence object is created from scratch.
|
|
106
|
-
synonyms (list, optional): List of synonyms for the observation column in the DataFrame.
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
an obs_sequence object
|
|
110
|
-
1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
|
|
111
|
-
3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
|
|
112
|
-
|
|
113
|
-
Examples:
|
|
114
|
-
|
|
115
|
-
.. code-block:: python
|
|
116
|
-
|
|
117
|
-
obs_seq = obs_sequence(file='obs_seq.final')
|
|
118
|
-
|
|
119
|
-
"""
|
|
120
112
|
|
|
121
113
|
self.loc_mod = "None"
|
|
122
114
|
self.file = file
|
|
@@ -214,7 +206,7 @@ class obs_sequence:
|
|
|
214
206
|
data.append(float(location[0])) # location x
|
|
215
207
|
data.append(float(location[1])) # location y
|
|
216
208
|
data.append(float(location[2])) # location z
|
|
217
|
-
data.append(
|
|
209
|
+
data.append(ObsSequence.vert[int(location[3])])
|
|
218
210
|
self.loc_mod = "loc3d"
|
|
219
211
|
except ValueError:
|
|
220
212
|
try:
|
|
@@ -372,7 +364,7 @@ class obs_sequence:
|
|
|
372
364
|
) # sort the DataFrame by time
|
|
373
365
|
df_copy.reset_index(drop=True, inplace=True)
|
|
374
366
|
df_copy["obs_num"] = df_copy.index + 1 # obs_num in time order
|
|
375
|
-
df_copy["linked_list"] =
|
|
367
|
+
df_copy["linked_list"] = ObsSequence.generate_linked_list_pattern(
|
|
376
368
|
len(df_copy)
|
|
377
369
|
) # linked list pattern
|
|
378
370
|
|
|
@@ -594,7 +586,7 @@ class obs_sequence:
|
|
|
594
586
|
with open(file, "rb") as f:
|
|
595
587
|
while True:
|
|
596
588
|
# Read the record length
|
|
597
|
-
record_length =
|
|
589
|
+
record_length = ObsSequence.read_record_length(f)
|
|
598
590
|
if record_length is None:
|
|
599
591
|
break
|
|
600
592
|
record = f.read(record_length)
|
|
@@ -602,7 +594,7 @@ class obs_sequence:
|
|
|
602
594
|
break
|
|
603
595
|
|
|
604
596
|
# Read the trailing record length (should match the leading one)
|
|
605
|
-
|
|
597
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
606
598
|
|
|
607
599
|
linecount += 1
|
|
608
600
|
|
|
@@ -620,7 +612,7 @@ class obs_sequence:
|
|
|
620
612
|
f.seek(0)
|
|
621
613
|
|
|
622
614
|
for _ in range(2):
|
|
623
|
-
record_length =
|
|
615
|
+
record_length = ObsSequence.read_record_length(f)
|
|
624
616
|
if record_length is None:
|
|
625
617
|
break
|
|
626
618
|
|
|
@@ -628,7 +620,7 @@ class obs_sequence:
|
|
|
628
620
|
if not record: # end of file
|
|
629
621
|
break
|
|
630
622
|
|
|
631
|
-
|
|
623
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
632
624
|
header.append(record.decode("utf-8").strip())
|
|
633
625
|
|
|
634
626
|
header.append(str(obs_types_definitions))
|
|
@@ -636,7 +628,7 @@ class obs_sequence:
|
|
|
636
628
|
# obs_types_definitions
|
|
637
629
|
for _ in range(3, 4 + obs_types_definitions):
|
|
638
630
|
# Read the record length
|
|
639
|
-
record_length =
|
|
631
|
+
record_length = ObsSequence.read_record_length(f)
|
|
640
632
|
if record_length is None:
|
|
641
633
|
break
|
|
642
634
|
|
|
@@ -645,7 +637,7 @@ class obs_sequence:
|
|
|
645
637
|
if not record: # end of file
|
|
646
638
|
break
|
|
647
639
|
|
|
648
|
-
|
|
640
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
649
641
|
|
|
650
642
|
if _ == 3:
|
|
651
643
|
continue # num obs_types_definitions
|
|
@@ -663,7 +655,7 @@ class obs_sequence:
|
|
|
663
655
|
5 + obs_types_definitions + num_copies + num_qcs + 1,
|
|
664
656
|
):
|
|
665
657
|
# Read the record length
|
|
666
|
-
record_length =
|
|
658
|
+
record_length = ObsSequence.read_record_length(f)
|
|
667
659
|
if record_length is None:
|
|
668
660
|
break
|
|
669
661
|
|
|
@@ -672,7 +664,7 @@ class obs_sequence:
|
|
|
672
664
|
if not record:
|
|
673
665
|
break
|
|
674
666
|
|
|
675
|
-
|
|
667
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
676
668
|
|
|
677
669
|
if _ == 5 + obs_types_definitions:
|
|
678
670
|
continue
|
|
@@ -683,12 +675,12 @@ class obs_sequence:
|
|
|
683
675
|
|
|
684
676
|
# first and last obs
|
|
685
677
|
# Read the record length
|
|
686
|
-
record_length =
|
|
678
|
+
record_length = ObsSequence.read_record_length(f)
|
|
687
679
|
|
|
688
680
|
# Read the actual record
|
|
689
681
|
record = f.read(record_length)
|
|
690
682
|
|
|
691
|
-
|
|
683
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
692
684
|
|
|
693
685
|
# Read the whole record as a two integers
|
|
694
686
|
first, last = struct.unpack("ii", record)[:8]
|
|
@@ -813,7 +805,7 @@ class obs_sequence:
|
|
|
813
805
|
# Skip the first len(obs_seq.header) lines
|
|
814
806
|
for _ in range(header_length - 1):
|
|
815
807
|
# Read the record length
|
|
816
|
-
record_length =
|
|
808
|
+
record_length = ObsSequence.read_record_length(f)
|
|
817
809
|
if record_length is None: # End of file
|
|
818
810
|
break
|
|
819
811
|
|
|
@@ -830,7 +822,7 @@ class obs_sequence:
|
|
|
830
822
|
obs.append(f"OBS {obs_num}")
|
|
831
823
|
for _ in range(n): # number of copies
|
|
832
824
|
# Read the record length
|
|
833
|
-
record_length =
|
|
825
|
+
record_length = ObsSequence.read_record_length(f)
|
|
834
826
|
if record_length is None:
|
|
835
827
|
break
|
|
836
828
|
# Read the actual record (copie)
|
|
@@ -838,10 +830,10 @@ class obs_sequence:
|
|
|
838
830
|
obs.append(struct.unpack("d", record)[0])
|
|
839
831
|
|
|
840
832
|
# Read the trailing record length (should match the leading one)
|
|
841
|
-
|
|
833
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
842
834
|
|
|
843
835
|
# linked list info
|
|
844
|
-
record_length =
|
|
836
|
+
record_length = ObsSequence.read_record_length(f)
|
|
845
837
|
if record_length is None:
|
|
846
838
|
break
|
|
847
839
|
|
|
@@ -850,17 +842,17 @@ class obs_sequence:
|
|
|
850
842
|
linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
|
|
851
843
|
obs.append(linked_list_string)
|
|
852
844
|
|
|
853
|
-
|
|
845
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
854
846
|
|
|
855
847
|
# location (note no location header "loc3d" or "loc1d" for binary files)
|
|
856
848
|
obs.append("loc3d")
|
|
857
|
-
record_length =
|
|
849
|
+
record_length = ObsSequence.read_record_length(f)
|
|
858
850
|
record = f.read(record_length)
|
|
859
851
|
x, y, z, vert = struct.unpack("dddi", record[:28])
|
|
860
852
|
location_string = f"{x} {y} {z} {vert}"
|
|
861
853
|
obs.append(location_string)
|
|
862
854
|
|
|
863
|
-
|
|
855
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
864
856
|
|
|
865
857
|
# kind (type of observation) value
|
|
866
858
|
obs.append("kind")
|
|
@@ -870,23 +862,23 @@ class obs_sequence:
|
|
|
870
862
|
kind = f"{struct.unpack('i', record)[0]}"
|
|
871
863
|
obs.append(kind)
|
|
872
864
|
|
|
873
|
-
|
|
865
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
874
866
|
|
|
875
867
|
# time (seconds, days)
|
|
876
|
-
record_length =
|
|
868
|
+
record_length = ObsSequence.read_record_length(f)
|
|
877
869
|
record = f.read(record_length)
|
|
878
870
|
seconds, days = struct.unpack("ii", record)[:8]
|
|
879
871
|
time_string = f"{seconds} {days}"
|
|
880
872
|
obs.append(time_string)
|
|
881
873
|
|
|
882
|
-
|
|
874
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
883
875
|
|
|
884
876
|
# obs error variance
|
|
885
|
-
record_length =
|
|
877
|
+
record_length = ObsSequence.read_record_length(f)
|
|
886
878
|
record = f.read(record_length)
|
|
887
879
|
obs.append(struct.unpack("d", record)[0])
|
|
888
880
|
|
|
889
|
-
|
|
881
|
+
ObsSequence.check_trailing_record_length(f, record_length)
|
|
890
882
|
|
|
891
883
|
yield obs
|
|
892
884
|
|
|
@@ -945,29 +937,29 @@ class obs_sequence:
|
|
|
945
937
|
"""
|
|
946
938
|
Join a list of observation sequences together.
|
|
947
939
|
|
|
948
|
-
This method combines the headers and observations from a list of
|
|
949
|
-
into a single
|
|
940
|
+
This method combines the headers and observations from a list of ObsSequence objects
|
|
941
|
+
into a single ObsSequence object.
|
|
950
942
|
|
|
951
943
|
Args:
|
|
952
|
-
obs_sequences (list of
|
|
944
|
+
obs_sequences (list of ObsSequences): The list of observation sequences objects to join.
|
|
953
945
|
copies (list of str, optional): A list of copy names to include in the combined data.
|
|
954
946
|
If not provided, all copies are included.
|
|
955
947
|
|
|
956
948
|
Returns:
|
|
957
|
-
A new
|
|
949
|
+
A new ObsSequence object containing the combined data.
|
|
958
950
|
|
|
959
951
|
Example:
|
|
960
952
|
.. code-block:: python
|
|
961
953
|
|
|
962
|
-
obs_seq1 =
|
|
963
|
-
obs_seq2 =
|
|
964
|
-
obs_seq3 =
|
|
965
|
-
combined =
|
|
954
|
+
obs_seq1 = ObsSequence(file='obs_seq1.final')
|
|
955
|
+
obs_seq2 = ObsSequence(file='obs_seq2.final')
|
|
956
|
+
obs_seq3 = ObsSequence(file='obs_seq3.final')
|
|
957
|
+
combined = ObsSequence.join([obs_seq1, obs_seq2, obs_seq3])
|
|
966
958
|
"""
|
|
967
959
|
if not obs_sequences:
|
|
968
960
|
raise ValueError("The list of observation sequences is empty.")
|
|
969
961
|
|
|
970
|
-
# Create a new
|
|
962
|
+
# Create a new ObsSequence object with the combined data
|
|
971
963
|
combo = cls(file=None)
|
|
972
964
|
|
|
973
965
|
# Check if all obs_sequences have compatible attributes
|
|
@@ -1092,7 +1084,7 @@ class obs_sequence:
|
|
|
1092
1084
|
|
|
1093
1085
|
# create linked list for obs
|
|
1094
1086
|
combo.df = combined_df.sort_values(by="time").reset_index(drop=True)
|
|
1095
|
-
combo.df["linked_list"] =
|
|
1087
|
+
combo.df["linked_list"] = ObsSequence.generate_linked_list_pattern(
|
|
1096
1088
|
len(combo.df)
|
|
1097
1089
|
)
|
|
1098
1090
|
combo.df["obs_num"] = combined_df.index + 1
|
|
@@ -1125,7 +1117,7 @@ class obs_sequence:
|
|
|
1125
1117
|
)
|
|
1126
1118
|
|
|
1127
1119
|
def create_header(self, n):
|
|
1128
|
-
"""Create a header for the obs_seq file from the
|
|
1120
|
+
"""Create a header for the obs_seq file from the ObsSequence object."""
|
|
1129
1121
|
assert (
|
|
1130
1122
|
self.n_copies == self.n_non_qc + self.n_qc
|
|
1131
1123
|
), "n_copies must be equal to n_non_qc + n_qc"
|
|
@@ -4,8 +4,6 @@ import numpy as np
|
|
|
4
4
|
from functools import wraps
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
6
|
|
|
7
|
-
# from pydartdiags.obs_sequence import obs_sequence as obsq
|
|
8
|
-
|
|
9
7
|
|
|
10
8
|
def apply_to_phases_in_place(func):
|
|
11
9
|
"""
|
|
@@ -93,6 +91,12 @@ def calculate_rank(df, phase):
|
|
|
93
91
|
"""
|
|
94
92
|
Calculate the rank of observations within an ensemble.
|
|
95
93
|
|
|
94
|
+
Note:
|
|
95
|
+
|
|
96
|
+
This function is decorated with @apply_to_phases_by_obs, which modifies its usage.
|
|
97
|
+
You should call it as calculate_rank(df), and the decorator will automatically apply the
|
|
98
|
+
function to all relevant phases (‘prior’ and ‘posterior’).
|
|
99
|
+
|
|
96
100
|
This function takes a DataFrame containing ensemble predictions and observed values,
|
|
97
101
|
adds sampling noise to the ensemble predictions, and calculates the rank of the observed
|
|
98
102
|
value within the perturbed ensemble for each observation. The rank indicates the position
|
|
@@ -103,8 +107,6 @@ def calculate_rank(df, phase):
|
|
|
103
107
|
Parameters:
|
|
104
108
|
df (pd.DataFrame): A DataFrame with columns for rank, and observation type.
|
|
105
109
|
|
|
106
|
-
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
|
|
107
|
-
|
|
108
110
|
Returns:
|
|
109
111
|
DataFrame containing columns for 'rank' and observation 'type'.
|
|
110
112
|
"""
|
|
@@ -158,15 +160,20 @@ def diag_stats(df, phase):
|
|
|
158
160
|
"""
|
|
159
161
|
Calculate diagnostic statistics for a given phase and add them to the DataFrame.
|
|
160
162
|
|
|
163
|
+
Note:
|
|
164
|
+
This function is decorated with @apply_to_phases_in_place, which modifies its usage.
|
|
165
|
+
You should call it as diag_stats(df), and the decorator will automatically apply the
|
|
166
|
+
function to all relevant phases (‘prior’ and ‘posterior’) modifying the DataFrame
|
|
167
|
+
in place.
|
|
168
|
+
|
|
161
169
|
Args:
|
|
162
170
|
df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
|
|
163
|
-
|
|
164
|
-
- 'observation': The actual observation values.
|
|
165
|
-
- 'obs_err_var': The variance of the observation error.
|
|
166
|
-
- 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
|
|
167
|
-
- 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
|
|
171
|
+
The DataFrame must include the following columns:
|
|
168
172
|
|
|
169
|
-
|
|
173
|
+
- 'observation': The actual observation values.
|
|
174
|
+
- 'obs_err_var': The variance of the observation error.
|
|
175
|
+
- 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
|
|
176
|
+
- 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
|
|
170
177
|
|
|
171
178
|
Returns:
|
|
172
179
|
None: The function modifies the DataFrame in place by adding the following columns:
|
|
@@ -203,9 +210,12 @@ def bin_by_layer(df, levels, verticalUnit="pressure (Pa)"):
|
|
|
203
210
|
vertical level bin. Only observations (row) with the specified vertical unit are binned.
|
|
204
211
|
|
|
205
212
|
Args:
|
|
206
|
-
df (pandas.DataFrame): The input DataFrame containing observation data.
|
|
213
|
+
df (pandas.DataFrame): The input DataFrame containing observation data.
|
|
214
|
+
The DataFrame must include the following columns:
|
|
215
|
+
|
|
207
216
|
- 'vertical': The vertical coordinate values of the observations.
|
|
208
217
|
- 'vert_unit': The unit of the vertical coordinate values.
|
|
218
|
+
|
|
209
219
|
levels (list): A list of bin edges for the vertical levels.
|
|
210
220
|
verticalUnit (str, optional): The unit of the vertical axis (e.g., 'pressure (Pa)'). Default is 'pressure (Pa)'.
|
|
211
221
|
|
|
@@ -261,6 +271,28 @@ def bin_by_time(df, time_value):
|
|
|
261
271
|
|
|
262
272
|
@apply_to_phases_by_type_return_df
|
|
263
273
|
def grand_statistics(df, phase):
|
|
274
|
+
"""
|
|
275
|
+
Calculate grand statistics (RMSE, bias, total spread) for each observation type and phase.
|
|
276
|
+
|
|
277
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
278
|
+
have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data by observation
|
|
279
|
+
type and computes the root mean square error (RMSE), mean bias, and total spread for the specified phase.
|
|
280
|
+
|
|
281
|
+
Note:
|
|
282
|
+
This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
|
|
283
|
+
You should call it as grand_statistics(df), and the decorator will automatically apply the function
|
|
284
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
291
|
+
- 'type': The observation type.
|
|
292
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
293
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
294
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
295
|
+
"""
|
|
264
296
|
|
|
265
297
|
# assuming diag_stats has been called
|
|
266
298
|
grand = (
|
|
@@ -283,6 +315,33 @@ def grand_statistics(df, phase):
|
|
|
283
315
|
|
|
284
316
|
@apply_to_phases_by_type_return_df
|
|
285
317
|
def layer_statistics(df, phase):
|
|
318
|
+
"""
|
|
319
|
+
Calculate statistics (RMSE, bias, total spread) for each observation type and vertical layer.
|
|
320
|
+
|
|
321
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
322
|
+
have already been computed with :func:`diag_stats` and are present in the DataFrame. It groups the data by
|
|
323
|
+
vertical layer midpoint and observation type, and computes the root mean square error (RMSE),
|
|
324
|
+
mean bias, and total spread for the specified phase for each vertical layer.
|
|
325
|
+
|
|
326
|
+
Note:
|
|
327
|
+
This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
|
|
328
|
+
You should call it as layer_statistics(df), and the decorator will automatically apply the function
|
|
329
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
333
|
+
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
337
|
+
- 'midpoint': The midpoint of the vertical layer.
|
|
338
|
+
- 'type': The observation type.
|
|
339
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
340
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
341
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
342
|
+
- 'vert_unit': The vertical unit.
|
|
343
|
+
- 'vlevels': The categorized vertical level.
|
|
344
|
+
"""
|
|
286
345
|
|
|
287
346
|
# assuming diag_stats has been called
|
|
288
347
|
layer_stats = (
|
|
@@ -310,14 +369,31 @@ def layer_statistics(df, phase):
|
|
|
310
369
|
@apply_to_phases_by_type_return_df
|
|
311
370
|
def time_statistics(df, phase):
|
|
312
371
|
"""
|
|
313
|
-
Calculate time-based statistics for
|
|
372
|
+
Calculate time-based statistics (RMSE, bias, total spread) for each observation type and time bin.
|
|
373
|
+
|
|
374
|
+
This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
|
|
375
|
+
have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data
|
|
376
|
+
by time bin midpoint and observation type, and computes the root mean square error (RMSE), mean bias,
|
|
377
|
+
and total spread for the specified phase for each time bin.
|
|
378
|
+
|
|
379
|
+
Note:
|
|
380
|
+
This function is decorated with @apply_to_phases_by_type_return_df.
|
|
381
|
+
You should call it as time_statistics(df), and the decorator will automatically apply the function
|
|
382
|
+
to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
|
|
314
383
|
|
|
315
384
|
Args:
|
|
316
|
-
df (pandas.DataFrame): The input DataFrame containing
|
|
385
|
+
df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
|
|
317
386
|
phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
|
|
318
387
|
|
|
319
388
|
Returns:
|
|
320
|
-
pandas.DataFrame: A DataFrame
|
|
389
|
+
pandas.DataFrame: A DataFrame with columns:
|
|
390
|
+
- 'time_bin_midpoint': The midpoint of the time bin.
|
|
391
|
+
- 'type': The observation type.
|
|
392
|
+
- '{phase}_rmse': The root mean square error for the phase.
|
|
393
|
+
- '{phase}_bias': The mean bias for the phase.
|
|
394
|
+
- '{phase}_totalspread': The total spread for the phase.
|
|
395
|
+
- 'time_bin': The time bin interval.
|
|
396
|
+
- 'time': The first time value in the bin.
|
|
321
397
|
"""
|
|
322
398
|
# Assuming diag_stats has been called
|
|
323
399
|
time_stats = (
|
|
@@ -402,7 +478,9 @@ def possible_vs_used_by_time(df):
|
|
|
402
478
|
Calculates the count of possible vs. used observations by type and time bin.
|
|
403
479
|
|
|
404
480
|
Args:
|
|
405
|
-
df (pd.DataFrame): The input DataFrame containing observation data.
|
|
481
|
+
df (pd.DataFrame): The input DataFrame containing observation data.
|
|
482
|
+
The DataFrame must include:
|
|
483
|
+
|
|
406
484
|
- 'type': The observation type.
|
|
407
485
|
- 'time_bin_midpoint': The midpoint of the time bin.
|
|
408
486
|
- 'observation': The observation values.
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydartdiags
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Observation Sequence Diagnostics for DART
|
|
5
5
|
Home-page: https://github.com/NCAR/pyDARTdiags.git
|
|
6
6
|
Author: Helen Kershaw
|
|
7
7
|
Author-email: Helen Kershaw <hkershaw@ucar.edu>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
8
9
|
Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
|
|
9
10
|
Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
|
|
10
11
|
Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
|
|
11
12
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Requires-Python: >=3.8
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
@@ -43,7 +43,7 @@ class TestSanitizeInput:
|
|
|
43
43
|
ValueError,
|
|
44
44
|
match="Neither 'loc3d' nor 'loc1d' could be found in the observation sequence.",
|
|
45
45
|
):
|
|
46
|
-
obj = obsq.
|
|
46
|
+
obj = obsq.ObsSequence(bad_loc_file_path)
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class TestOneDimensional:
|
|
@@ -53,7 +53,7 @@ class TestOneDimensional:
|
|
|
53
53
|
return os.path.join(test_dir, "data", "obs_seq.1d.final")
|
|
54
54
|
|
|
55
55
|
def test_read1d(self, obs_seq_file_path):
|
|
56
|
-
obj = obsq.
|
|
56
|
+
obj = obsq.ObsSequence(obs_seq_file_path)
|
|
57
57
|
assert obj.loc_mod == "loc1d"
|
|
58
58
|
assert len(obj.df) == 40 # 40 obs in the file
|
|
59
59
|
assert (
|
|
@@ -69,11 +69,11 @@ class TestSynonyms:
|
|
|
69
69
|
return os.path.join(test_dir, "data", "obs_seq.final.ascii.syn")
|
|
70
70
|
|
|
71
71
|
def test_single(self, synonym_file_path):
|
|
72
|
-
obj1 = obsq.
|
|
72
|
+
obj1 = obsq.ObsSequence(synonym_file_path, synonyms="observationx")
|
|
73
73
|
assert "observationx" in obj1.synonyms_for_obs
|
|
74
74
|
|
|
75
75
|
def test_list(self, synonym_file_path):
|
|
76
|
-
obj2 = obsq.
|
|
76
|
+
obj2 = obsq.ObsSequence(
|
|
77
77
|
synonym_file_path, synonyms=["synonym1", "synonym2", "observationx"]
|
|
78
78
|
)
|
|
79
79
|
assert "synonym1" in obj2.synonyms_for_obs
|
|
@@ -87,7 +87,7 @@ class TestBinaryObsSequence:
|
|
|
87
87
|
return os.path.join(test_dir, "data", "obs_seq.final.binary.small")
|
|
88
88
|
|
|
89
89
|
def test_read_binary(self, binary_obs_seq_file_path):
|
|
90
|
-
obj = obsq.
|
|
90
|
+
obj = obsq.ObsSequence(binary_obs_seq_file_path)
|
|
91
91
|
assert len(obj.df) > 0 # Ensure the DataFrame is not empty
|
|
92
92
|
|
|
93
93
|
|
|
@@ -172,7 +172,7 @@ class TestWriteAscii:
|
|
|
172
172
|
temp_output_file_path = os.path.join(temp_dir, "obs_seq.final.ascii.write")
|
|
173
173
|
|
|
174
174
|
# Create an instance of the obs_sequence class and write the output file
|
|
175
|
-
obj = obsq.
|
|
175
|
+
obj = obsq.ObsSequence(ascii_obs_seq_file_path)
|
|
176
176
|
obj.write_obs_seq(temp_output_file_path)
|
|
177
177
|
|
|
178
178
|
# Ensure the output file exists
|
|
@@ -199,7 +199,7 @@ class TestWriteAscii:
|
|
|
199
199
|
)
|
|
200
200
|
|
|
201
201
|
# Create an instance of the obs_sequence class and write the output file
|
|
202
|
-
obj = obsq.
|
|
202
|
+
obj = obsq.ObsSequence(obs_seq_file_path)
|
|
203
203
|
stats.diag_stats(obj.df) # add the stats columns
|
|
204
204
|
obj.write_obs_seq(temp_output_file_path)
|
|
205
205
|
|
|
@@ -227,7 +227,7 @@ class TestWriteAscii:
|
|
|
227
227
|
)
|
|
228
228
|
|
|
229
229
|
# Create an instance of the obs_sequence class and write the output file
|
|
230
|
-
obj = obsq.
|
|
230
|
+
obj = obsq.ObsSequence(obs_seq_file_path)
|
|
231
231
|
hPalevels = [
|
|
232
232
|
0.0,
|
|
233
233
|
100.0,
|
|
@@ -264,7 +264,7 @@ class TestWriteAscii:
|
|
|
264
264
|
obs_seq_file_path = os.path.join(
|
|
265
265
|
os.path.dirname(__file__), "data", "obs_seq.final.ascii.small"
|
|
266
266
|
)
|
|
267
|
-
obj = obsq.
|
|
267
|
+
obj = obsq.ObsSequence(obs_seq_file_path)
|
|
268
268
|
|
|
269
269
|
# Remove obs except ACARS_TEMPERATURE
|
|
270
270
|
obj.df = obj.df[(obj.df["type"] == "ACARS_TEMPERATURE")]
|
|
@@ -297,7 +297,7 @@ class TestObsDataframe:
|
|
|
297
297
|
df = pd.DataFrame(data)
|
|
298
298
|
|
|
299
299
|
# Create an instance of ObsSequence with the sample DataFrame
|
|
300
|
-
obs_seq = obsq.
|
|
300
|
+
obs_seq = obsq.ObsSequence(file=None)
|
|
301
301
|
obs_seq.df = df
|
|
302
302
|
return obs_seq
|
|
303
303
|
|
|
@@ -394,15 +394,15 @@ class TestJoin:
|
|
|
394
394
|
with pytest.raises(
|
|
395
395
|
ValueError, match="The list of observation sequences is empty."
|
|
396
396
|
):
|
|
397
|
-
obsq.
|
|
397
|
+
obsq.ObsSequence.join([])
|
|
398
398
|
|
|
399
399
|
def test_join_diff_locs(self, obs_seq1d_file_path, binary_obs_seq_file_path):
|
|
400
|
-
obj1 = obsq.
|
|
401
|
-
obj2 = obsq.
|
|
400
|
+
obj1 = obsq.ObsSequence(obs_seq1d_file_path)
|
|
401
|
+
obj2 = obsq.ObsSequence(binary_obs_seq_file_path)
|
|
402
402
|
with pytest.raises(
|
|
403
403
|
ValueError, match="All observation sequences must have the same loc_mod."
|
|
404
404
|
):
|
|
405
|
-
obsq.
|
|
405
|
+
obsq.ObsSequence.join([obj1, obj2])
|
|
406
406
|
|
|
407
407
|
def test_join_three_obs_seqs(
|
|
408
408
|
self,
|
|
@@ -410,10 +410,10 @@ class TestJoin:
|
|
|
410
410
|
ascii_obs_seq_file_path2,
|
|
411
411
|
ascii_obs_seq_file_path3,
|
|
412
412
|
):
|
|
413
|
-
obj1 = obsq.
|
|
414
|
-
obj2 = obsq.
|
|
415
|
-
obj3 = obsq.
|
|
416
|
-
obs_seq_mega = obsq.
|
|
413
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
414
|
+
obj2 = obsq.ObsSequence(ascii_obs_seq_file_path2)
|
|
415
|
+
obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
|
|
416
|
+
obs_seq_mega = obsq.ObsSequence.join([obj1, obj2, obj3])
|
|
417
417
|
|
|
418
418
|
assert obs_seq_mega.all_obs == None
|
|
419
419
|
assert len(obs_seq_mega.df) == 16 # obs in the dataframe
|
|
@@ -457,9 +457,9 @@ class TestJoin:
|
|
|
457
457
|
def test_join_list_sub_copies(
|
|
458
458
|
self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
|
|
459
459
|
):
|
|
460
|
-
obj1 = obsq.
|
|
461
|
-
obj3 = obsq.
|
|
462
|
-
obs_seq_mega = obsq.
|
|
460
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
461
|
+
obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
|
|
462
|
+
obs_seq_mega = obsq.ObsSequence.join(
|
|
463
463
|
[obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
|
|
464
464
|
)
|
|
465
465
|
|
|
@@ -475,9 +475,9 @@ class TestJoin:
|
|
|
475
475
|
def test_join_list_sub_copies_no_qc(
|
|
476
476
|
self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
|
|
477
477
|
):
|
|
478
|
-
obj1 = obsq.
|
|
479
|
-
obj3 = obsq.
|
|
480
|
-
obs_seq_mega = obsq.
|
|
478
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
479
|
+
obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
|
|
480
|
+
obs_seq_mega = obsq.ObsSequence.join(
|
|
481
481
|
[obj1, obj3], ["observation", "prior_ensemble_spread"]
|
|
482
482
|
)
|
|
483
483
|
|
|
@@ -489,29 +489,29 @@ class TestJoin:
|
|
|
489
489
|
def test_join_copies_not_in_all(
|
|
490
490
|
self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
|
|
491
491
|
):
|
|
492
|
-
obj1 = obsq.
|
|
493
|
-
obj4 = obsq.
|
|
492
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
493
|
+
obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
|
|
494
494
|
with pytest.raises(
|
|
495
495
|
ValueError, match="All observation sequences must have the same copies."
|
|
496
496
|
):
|
|
497
|
-
obsq.
|
|
497
|
+
obsq.ObsSequence.join([obj1, obj4])
|
|
498
498
|
|
|
499
499
|
def test_join_copies_not_all_have_subset(
|
|
500
500
|
self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
|
|
501
501
|
):
|
|
502
|
-
obj1 = obsq.
|
|
503
|
-
obj4 = obsq.
|
|
502
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
503
|
+
obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
|
|
504
504
|
with pytest.raises(
|
|
505
505
|
ValueError, match="All observation sequences must have the selected copies."
|
|
506
506
|
):
|
|
507
|
-
obsq.
|
|
507
|
+
obsq.ObsSequence.join([obj1, obj4], ["prior_ensemble_member_41"])
|
|
508
508
|
|
|
509
509
|
def test_join_list_sub_copies(
|
|
510
510
|
self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
|
|
511
511
|
):
|
|
512
|
-
obj1 = obsq.
|
|
513
|
-
obj3 = obsq.
|
|
514
|
-
obs_seq_mega = obsq.
|
|
512
|
+
obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
|
|
513
|
+
obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
|
|
514
|
+
obs_seq_mega = obsq.ObsSequence.join(
|
|
515
515
|
[obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
|
|
516
516
|
)
|
|
517
517
|
assert obs_seq_mega.has_assimilation_info() == False
|
|
@@ -520,7 +520,7 @@ class TestJoin:
|
|
|
520
520
|
|
|
521
521
|
class TestCreateHeader:
|
|
522
522
|
def test_create_header(self):
|
|
523
|
-
obj = obsq.
|
|
523
|
+
obj = obsq.ObsSequence(file=None)
|
|
524
524
|
|
|
525
525
|
obj.types = {1: "ACARS_BELLYBUTTON", 2: "NCEP_TOES"}
|
|
526
526
|
obj.n_non_qc = 2
|
|
@@ -551,7 +551,7 @@ class TestCreateHeader:
|
|
|
551
551
|
class TestSplitMetadata:
|
|
552
552
|
def test_split_metadata_with_external_FO(self):
|
|
553
553
|
metadata = ["meta1", "meta2", "external_FO1", "meta3", "meta4"]
|
|
554
|
-
before_external_FO, after_external_FO = obsq.
|
|
554
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
555
555
|
metadata
|
|
556
556
|
)
|
|
557
557
|
assert before_external_FO == ["meta1", "meta2"]
|
|
@@ -559,7 +559,7 @@ class TestSplitMetadata:
|
|
|
559
559
|
|
|
560
560
|
def test_split_metadata_without_external_FO(self):
|
|
561
561
|
metadata = ["meta1", "meta2", "meta3", "meta4"]
|
|
562
|
-
before_external_FO, after_external_FO = obsq.
|
|
562
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
563
563
|
metadata
|
|
564
564
|
)
|
|
565
565
|
assert before_external_FO == ["meta1", "meta2", "meta3", "meta4"]
|
|
@@ -567,7 +567,7 @@ class TestSplitMetadata:
|
|
|
567
567
|
|
|
568
568
|
def test_split_metadata_multiple_external_FO(self):
|
|
569
569
|
metadata = ["meta1", "external_FO1", "meta2", "external_FO2", "meta3"]
|
|
570
|
-
before_external_FO, after_external_FO = obsq.
|
|
570
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
571
571
|
metadata
|
|
572
572
|
)
|
|
573
573
|
assert before_external_FO == ["meta1"]
|
|
@@ -575,7 +575,7 @@ class TestSplitMetadata:
|
|
|
575
575
|
|
|
576
576
|
def test_split_metadata_empty_list(self):
|
|
577
577
|
metadata = []
|
|
578
|
-
before_external_FO, after_external_FO = obsq.
|
|
578
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
579
579
|
metadata
|
|
580
580
|
)
|
|
581
581
|
assert before_external_FO == []
|
|
@@ -583,7 +583,7 @@ class TestSplitMetadata:
|
|
|
583
583
|
|
|
584
584
|
def test_split_metadata_no_external_FO(self):
|
|
585
585
|
metadata = ["meta1", "meta2", "meta3"]
|
|
586
|
-
before_external_FO, after_external_FO = obsq.
|
|
586
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
587
587
|
metadata
|
|
588
588
|
)
|
|
589
589
|
assert before_external_FO == ["meta1", "meta2", "meta3"]
|
|
@@ -591,7 +591,7 @@ class TestSplitMetadata:
|
|
|
591
591
|
|
|
592
592
|
def test_split_metadata_external_FO_at_start(self):
|
|
593
593
|
metadata = ["external_FO1", "meta1", "meta2"]
|
|
594
|
-
before_external_FO, after_external_FO = obsq.
|
|
594
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
595
595
|
metadata
|
|
596
596
|
)
|
|
597
597
|
assert before_external_FO == []
|
|
@@ -599,7 +599,7 @@ class TestSplitMetadata:
|
|
|
599
599
|
|
|
600
600
|
def test_split_metadata_external_FO_at_end(self):
|
|
601
601
|
metadata = ["meta1", "meta2", "external_FO1"]
|
|
602
|
-
before_external_FO, after_external_FO = obsq.
|
|
602
|
+
before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
|
|
603
603
|
metadata
|
|
604
604
|
)
|
|
605
605
|
assert before_external_FO == ["meta1", "meta2"]
|
|
@@ -610,7 +610,7 @@ class TestGenerateLinkedListPattern:
|
|
|
610
610
|
def test_generate_linked_list_pattern(self):
|
|
611
611
|
n = 1
|
|
612
612
|
expected_pattern = ["0 -1 -1"]
|
|
613
|
-
result = obsq.
|
|
613
|
+
result = obsq.ObsSequence.generate_linked_list_pattern(n)
|
|
614
614
|
assert result == expected_pattern
|
|
615
615
|
|
|
616
616
|
n = 3
|
|
@@ -619,7 +619,7 @@ class TestGenerateLinkedListPattern:
|
|
|
619
619
|
"1 3 -1",
|
|
620
620
|
"2 -1 -1",
|
|
621
621
|
]
|
|
622
|
-
result = obsq.
|
|
622
|
+
result = obsq.ObsSequence.generate_linked_list_pattern(n)
|
|
623
623
|
assert result == expected_pattern
|
|
624
624
|
|
|
625
625
|
n = 6
|
|
@@ -631,7 +631,7 @@ class TestGenerateLinkedListPattern:
|
|
|
631
631
|
"4 6 -1",
|
|
632
632
|
"5 -1 -1",
|
|
633
633
|
]
|
|
634
|
-
result = obsq.
|
|
634
|
+
result = obsq.ObsSequence.generate_linked_list_pattern(n)
|
|
635
635
|
assert result == expected_pattern
|
|
636
636
|
|
|
637
637
|
|
|
@@ -665,7 +665,7 @@ class TestCreateHeaderFromDataFrame:
|
|
|
665
665
|
df = pd.DataFrame(data)
|
|
666
666
|
|
|
667
667
|
# Create an instance of obs_sequence with the sample DataFrame
|
|
668
|
-
obs_seq = obsq.
|
|
668
|
+
obs_seq = obsq.ObsSequence(file=None)
|
|
669
669
|
obs_seq.df = df
|
|
670
670
|
obs_seq.reverse_types = {
|
|
671
671
|
"ACARS_TEMPERATURE": 1,
|
|
@@ -732,7 +732,7 @@ class TestUpdateTypesDicts:
|
|
|
732
732
|
"52": "PINEAPPLE_COUNT",
|
|
733
733
|
}
|
|
734
734
|
|
|
735
|
-
updated_reverse_types, types = obsq.
|
|
735
|
+
updated_reverse_types, types = obsq.ObsSequence.update_types_dicts(
|
|
736
736
|
sample_df, reverse_types
|
|
737
737
|
)
|
|
738
738
|
|
|
@@ -747,7 +747,7 @@ class TestCompositeTypes:
|
|
|
747
747
|
file_path = os.path.join(test_dir, "data", "three-obs.final")
|
|
748
748
|
|
|
749
749
|
# Create an instance of obs_sequence with the 'three-obs.final' file
|
|
750
|
-
obs_seq = obsq.
|
|
750
|
+
obs_seq = obsq.ObsSequence(file_path)
|
|
751
751
|
return obs_seq
|
|
752
752
|
|
|
753
753
|
@pytest.mark.parametrize(
|
|
@@ -850,7 +850,7 @@ class TestCompositeTypes:
|
|
|
850
850
|
test_dir = os.path.dirname(__file__)
|
|
851
851
|
file_path = os.path.join(test_dir, "data", "dups-obs.final")
|
|
852
852
|
|
|
853
|
-
dup = obsq.
|
|
853
|
+
dup = obsq.ObsSequence(file_path)
|
|
854
854
|
# Test that composite_types raises an error
|
|
855
855
|
with pytest.raises(Exception, match="There are duplicates in the components."):
|
|
856
856
|
dup.composite_types()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|