pypromice 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/postprocess/bufr_to_csv.py +6 -1
- pypromice/postprocess/bufr_utilities.py +91 -18
- pypromice/postprocess/create_bufr_files.py +178 -0
- pypromice/postprocess/get_bufr.py +248 -397
- pypromice/postprocess/make_metadata_csv.py +214 -0
- pypromice/postprocess/real_time_utilities.py +41 -11
- pypromice/process/L0toL1.py +12 -5
- pypromice/process/L1toL2.py +69 -14
- pypromice/process/L2toL3.py +1033 -186
- pypromice/process/aws.py +130 -808
- pypromice/process/get_l2.py +90 -0
- pypromice/process/get_l2tol3.py +111 -0
- pypromice/process/join_l2.py +112 -0
- pypromice/process/join_l3.py +551 -120
- pypromice/process/load.py +161 -0
- pypromice/process/resample.py +128 -0
- pypromice/process/utilities.py +68 -0
- pypromice/process/write.py +503 -0
- pypromice/qc/github_data_issues.py +10 -16
- pypromice/qc/persistence.py +52 -30
- pypromice/resources/__init__.py +28 -0
- pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
- pypromice/resources/variable_aliases_GC-Net.csv +78 -0
- pypromice/resources/variables.csv +106 -0
- pypromice/station_configuration.py +118 -0
- pypromice/tx/get_l0tx.py +7 -4
- pypromice/tx/payload_formats.csv +1 -0
- pypromice/tx/tx.py +27 -6
- pypromice/utilities/__init__.py +0 -0
- pypromice/utilities/git.py +61 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/METADATA +3 -3
- pypromice-1.4.0.dist-info/RECORD +53 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
- pypromice-1.4.0.dist-info/entry_points.txt +13 -0
- pypromice/postprocess/station_configurations.toml +0 -762
- pypromice/process/get_l3.py +0 -46
- pypromice/process/variables.csv +0 -92
- pypromice/qc/persistence_test.py +0 -150
- pypromice/test/test_config1.toml +0 -69
- pypromice/test/test_config2.toml +0 -54
- pypromice/test/test_email +0 -75
- pypromice/test/test_payload_formats.csv +0 -4
- pypromice/test/test_payload_types.csv +0 -7
- pypromice/test/test_percentile.py +0 -229
- pypromice/test/test_raw1.txt +0 -4468
- pypromice/test/test_raw_DataTable2.txt +0 -11167
- pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
- pypromice/test/test_raw_transmitted1.txt +0 -15411
- pypromice/test/test_raw_transmitted2.txt +0 -28
- pypromice-1.3.6.dist-info/RECORD +0 -53
- pypromice-1.3.6.dist-info/entry_points.txt +0 -8
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import xarray as xr
|
|
8
|
-
|
|
9
|
-
from pypromice.qc.percentiles.outlier_detector import (
|
|
10
|
-
detect_outliers,
|
|
11
|
-
filter_data,
|
|
12
|
-
ThresholdBasedOutlierDetector,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class PercentileQCTestCase(unittest.TestCase):
|
|
17
|
-
def test_column_pattern_matches(self):
|
|
18
|
-
self._test_column_pattern("p_i", True)
|
|
19
|
-
|
|
20
|
-
def test_column_pattern_no_match(self):
|
|
21
|
-
self._test_column_pattern("p_l", False)
|
|
22
|
-
|
|
23
|
-
def test_column_pattern_with_prefix(self):
|
|
24
|
-
self._test_column_pattern("prefix_p_i", False)
|
|
25
|
-
|
|
26
|
-
def test_column_pattern_with_suffix(self):
|
|
27
|
-
self._test_column_pattern("p_i_suffix", False)
|
|
28
|
-
|
|
29
|
-
def _test_column_pattern(self, column_name: str, expected_output: bool):
|
|
30
|
-
season_indices = pd.DatetimeIndex(
|
|
31
|
-
[
|
|
32
|
-
datetime(2022, 3, 1),
|
|
33
|
-
]
|
|
34
|
-
)
|
|
35
|
-
thresholds = pd.DataFrame(
|
|
36
|
-
[
|
|
37
|
-
dict(
|
|
38
|
-
stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
|
|
39
|
-
),
|
|
40
|
-
]
|
|
41
|
-
)
|
|
42
|
-
value_outside_range = -325
|
|
43
|
-
input_data = pd.DataFrame(
|
|
44
|
-
index=season_indices, columns=[column_name], data=[value_outside_range]
|
|
45
|
-
)
|
|
46
|
-
if expected_output:
|
|
47
|
-
expected_mask = pd.DataFrame(
|
|
48
|
-
index=season_indices, columns=[column_name], data=[expected_output]
|
|
49
|
-
)
|
|
50
|
-
else:
|
|
51
|
-
expected_mask = pd.DataFrame(index=season_indices, columns=[], data=[])
|
|
52
|
-
|
|
53
|
-
mask = detect_outliers(input_data, thresholds)
|
|
54
|
-
|
|
55
|
-
pd.testing.assert_frame_equal(expected_mask, mask)
|
|
56
|
-
|
|
57
|
-
def test_column_pattern_multicolumns(self):
|
|
58
|
-
thresholds = pd.DataFrame(
|
|
59
|
-
[
|
|
60
|
-
dict(
|
|
61
|
-
stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
|
|
62
|
-
),
|
|
63
|
-
]
|
|
64
|
-
)
|
|
65
|
-
date_index = pd.DatetimeIndex([datetime(2022, 3, 1)])
|
|
66
|
-
input_data = pd.DataFrame(
|
|
67
|
-
index=date_index,
|
|
68
|
-
data=[
|
|
69
|
-
dict(
|
|
70
|
-
p_i=-10,
|
|
71
|
-
p_u=1000,
|
|
72
|
-
p_j=1000,
|
|
73
|
-
)
|
|
74
|
-
],
|
|
75
|
-
)
|
|
76
|
-
# p_j is not in the mask because it doesn't match the pattern
|
|
77
|
-
expected_mask = pd.DataFrame(
|
|
78
|
-
index=date_index,
|
|
79
|
-
data=[
|
|
80
|
-
dict(
|
|
81
|
-
p_i=False,
|
|
82
|
-
p_u=True,
|
|
83
|
-
)
|
|
84
|
-
],
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
mask = detect_outliers(input_data, thresholds)
|
|
88
|
-
|
|
89
|
-
pd.testing.assert_frame_equal(expected_mask, mask)
|
|
90
|
-
|
|
91
|
-
def test_no_season(self):
|
|
92
|
-
season_indices = pd.DatetimeIndex(
|
|
93
|
-
[
|
|
94
|
-
datetime(2022, 3, 1),
|
|
95
|
-
datetime(2022, 8, 1),
|
|
96
|
-
]
|
|
97
|
-
)
|
|
98
|
-
thresholds = pd.DataFrame(
|
|
99
|
-
[
|
|
100
|
-
dict(stid="stid", variable_pattern="p_i", lo=-100, hi=100, season=None),
|
|
101
|
-
]
|
|
102
|
-
)
|
|
103
|
-
input_data = pd.DataFrame(index=season_indices, columns=["p_i"], data=[0, -243])
|
|
104
|
-
expected_mask = pd.DataFrame(
|
|
105
|
-
index=season_indices, columns=["p_i"], data=[False, True]
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
mask = detect_outliers(input_data, thresholds)
|
|
109
|
-
|
|
110
|
-
pd.testing.assert_frame_equal(expected_mask, mask)
|
|
111
|
-
|
|
112
|
-
def test_season_filter_invalid_winter_and_spring(self):
|
|
113
|
-
self._test_season_filter(
|
|
114
|
-
input_values=[0, 0, 0, 0], expected_mask=[True, True, False, False]
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
def test_season_filter_invalid_summer(self):
|
|
118
|
-
self._test_season_filter(
|
|
119
|
-
input_values=[-10, -10, -10, -10], expected_mask=[False, False, True, False]
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
def test_season_filter_valid_season_values(self):
|
|
123
|
-
self._test_season_filter(
|
|
124
|
-
input_values=[-12, -8, -1, -3], expected_mask=[False, False, False, False]
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
def _test_season_filter(self, input_values: List[float], expected_mask: List[bool]):
|
|
128
|
-
stid = "A_STID"
|
|
129
|
-
thresholds = pd.DataFrame(
|
|
130
|
-
[
|
|
131
|
-
dict(
|
|
132
|
-
stid=stid, variable_pattern="t_i", lo=-20, hi=-10, season="winter"
|
|
133
|
-
),
|
|
134
|
-
dict(stid=stid, variable_pattern="t_i", lo=-10, hi=-1, season="spring"),
|
|
135
|
-
dict(stid=stid, variable_pattern="t_i", lo=-5, hi=5, season="summer"),
|
|
136
|
-
dict(stid=stid, variable_pattern="t_i", lo=-10, hi=0, season="fall"),
|
|
137
|
-
]
|
|
138
|
-
)
|
|
139
|
-
season_indices = pd.DatetimeIndex(
|
|
140
|
-
[
|
|
141
|
-
datetime(2021, 12, 1), # winter
|
|
142
|
-
datetime(2022, 3, 1), # spring
|
|
143
|
-
datetime(2022, 6, 1), # summer
|
|
144
|
-
datetime(2022, 9, 1), # fall
|
|
145
|
-
]
|
|
146
|
-
)
|
|
147
|
-
input_data = pd.DataFrame(
|
|
148
|
-
index=season_indices, columns=["t_i"], data=input_values
|
|
149
|
-
)
|
|
150
|
-
expected_mask = pd.DataFrame(
|
|
151
|
-
index=season_indices, columns=["t_i"], data=expected_mask
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
mask = detect_outliers(input_data, thresholds)
|
|
155
|
-
|
|
156
|
-
pd.testing.assert_frame_equal(expected_mask, mask)
|
|
157
|
-
|
|
158
|
-
def test_remove_outliers(self):
|
|
159
|
-
thresholds = pd.DataFrame(
|
|
160
|
-
columns=[
|
|
161
|
-
"stid",
|
|
162
|
-
"variable_pattern",
|
|
163
|
-
"lo",
|
|
164
|
-
"hi",
|
|
165
|
-
"season",
|
|
166
|
-
],
|
|
167
|
-
data=[
|
|
168
|
-
["stid", "t_[iu]", -40, 0, "winter"],
|
|
169
|
-
["stid", "t_[iu]", -4, 10, "summer"],
|
|
170
|
-
],
|
|
171
|
-
)
|
|
172
|
-
date_index = pd.DatetimeIndex(
|
|
173
|
-
[
|
|
174
|
-
datetime(2022, 1, 1),
|
|
175
|
-
datetime(2022, 8, 1),
|
|
176
|
-
]
|
|
177
|
-
)
|
|
178
|
-
input_data = pd.DataFrame(
|
|
179
|
-
index=date_index,
|
|
180
|
-
data=[
|
|
181
|
-
dict(t_i=-10, p_u=994),
|
|
182
|
-
dict(t_i=37, p_u=1024),
|
|
183
|
-
],
|
|
184
|
-
)
|
|
185
|
-
mask = detect_outliers(input_data, thresholds)
|
|
186
|
-
expected_output_data = input_data.copy()
|
|
187
|
-
expected_output_data[mask] = np.nan
|
|
188
|
-
|
|
189
|
-
output_data = filter_data(input_data, thresholds)
|
|
190
|
-
self.assertIsNot(output_data, input_data)
|
|
191
|
-
pd.testing.assert_frame_equal(output_data, expected_output_data)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
class ThresholdBasedOutlierDetectorTestCase(unittest.TestCase):
|
|
195
|
-
def test_default_init(self):
|
|
196
|
-
outlier_detector = ThresholdBasedOutlierDetector.default()
|
|
197
|
-
self.assertIsInstance(outlier_detector, ThresholdBasedOutlierDetector)
|
|
198
|
-
|
|
199
|
-
def test_filter_data_aws_with_threshold(self):
|
|
200
|
-
stid = "NUK_K"
|
|
201
|
-
index = pd.period_range("2023-10-01", "2023-11-01", freq="1h")
|
|
202
|
-
columns = ["p_i", "t_i", "p_l", "wpsd_u", "foo"]
|
|
203
|
-
dataset: xr.Dataset = pd.DataFrame(
|
|
204
|
-
index=index,
|
|
205
|
-
columns=columns,
|
|
206
|
-
data=np.random.random((len(index), len(columns))),
|
|
207
|
-
).to_xarray()
|
|
208
|
-
dataset = dataset.assign_attrs(dict(station_id=stid))
|
|
209
|
-
outlier_detector = ThresholdBasedOutlierDetector.default()
|
|
210
|
-
|
|
211
|
-
dataset_output = outlier_detector.filter_data(dataset)
|
|
212
|
-
|
|
213
|
-
self.assertIsInstance(dataset_output, xr.Dataset)
|
|
214
|
-
self.assertSetEqual(
|
|
215
|
-
set(dict(dataset.items())),
|
|
216
|
-
set(dict(dataset_output.items())),
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
pass
|
|
220
|
-
|
|
221
|
-
def test_filter_data_aws_without_threshold(self):
|
|
222
|
-
stid = "non_exsiting"
|
|
223
|
-
dataset = xr.Dataset(attrs=dict(station_id=stid))
|
|
224
|
-
outlier_detector = ThresholdBasedOutlierDetector.default()
|
|
225
|
-
self.assertNotIn(stid, outlier_detector.thresholds.stid)
|
|
226
|
-
|
|
227
|
-
output_dataset = outlier_detector.filter_data(dataset)
|
|
228
|
-
|
|
229
|
-
xr.testing.assert_equal(output_dataset, dataset)
|