pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +15 -3
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1034 -186
  10. pypromice/process/aws.py +139 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +147 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +62 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
  32. pypromice-1.4.1.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.1.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0
@@ -1,229 +0,0 @@
1
- import unittest
2
- from datetime import datetime
3
- from typing import List
4
-
5
- import numpy as np
6
- import pandas as pd
7
- import xarray as xr
8
-
9
- from pypromice.qc.percentiles.outlier_detector import (
10
- detect_outliers,
11
- filter_data,
12
- ThresholdBasedOutlierDetector,
13
- )
14
-
15
-
16
- class PercentileQCTestCase(unittest.TestCase):
17
- def test_column_pattern_matches(self):
18
- self._test_column_pattern("p_i", True)
19
-
20
- def test_column_pattern_no_match(self):
21
- self._test_column_pattern("p_l", False)
22
-
23
- def test_column_pattern_with_prefix(self):
24
- self._test_column_pattern("prefix_p_i", False)
25
-
26
- def test_column_pattern_with_suffix(self):
27
- self._test_column_pattern("p_i_suffix", False)
28
-
29
- def _test_column_pattern(self, column_name: str, expected_output: bool):
30
- season_indices = pd.DatetimeIndex(
31
- [
32
- datetime(2022, 3, 1),
33
- ]
34
- )
35
- thresholds = pd.DataFrame(
36
- [
37
- dict(
38
- stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
39
- ),
40
- ]
41
- )
42
- value_outside_range = -325
43
- input_data = pd.DataFrame(
44
- index=season_indices, columns=[column_name], data=[value_outside_range]
45
- )
46
- if expected_output:
47
- expected_mask = pd.DataFrame(
48
- index=season_indices, columns=[column_name], data=[expected_output]
49
- )
50
- else:
51
- expected_mask = pd.DataFrame(index=season_indices, columns=[], data=[])
52
-
53
- mask = detect_outliers(input_data, thresholds)
54
-
55
- pd.testing.assert_frame_equal(expected_mask, mask)
56
-
57
- def test_column_pattern_multicolumns(self):
58
- thresholds = pd.DataFrame(
59
- [
60
- dict(
61
- stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
62
- ),
63
- ]
64
- )
65
- date_index = pd.DatetimeIndex([datetime(2022, 3, 1)])
66
- input_data = pd.DataFrame(
67
- index=date_index,
68
- data=[
69
- dict(
70
- p_i=-10,
71
- p_u=1000,
72
- p_j=1000,
73
- )
74
- ],
75
- )
76
- # p_j is not in the mask because it doesn't match the pattern
77
- expected_mask = pd.DataFrame(
78
- index=date_index,
79
- data=[
80
- dict(
81
- p_i=False,
82
- p_u=True,
83
- )
84
- ],
85
- )
86
-
87
- mask = detect_outliers(input_data, thresholds)
88
-
89
- pd.testing.assert_frame_equal(expected_mask, mask)
90
-
91
- def test_no_season(self):
92
- season_indices = pd.DatetimeIndex(
93
- [
94
- datetime(2022, 3, 1),
95
- datetime(2022, 8, 1),
96
- ]
97
- )
98
- thresholds = pd.DataFrame(
99
- [
100
- dict(stid="stid", variable_pattern="p_i", lo=-100, hi=100, season=None),
101
- ]
102
- )
103
- input_data = pd.DataFrame(index=season_indices, columns=["p_i"], data=[0, -243])
104
- expected_mask = pd.DataFrame(
105
- index=season_indices, columns=["p_i"], data=[False, True]
106
- )
107
-
108
- mask = detect_outliers(input_data, thresholds)
109
-
110
- pd.testing.assert_frame_equal(expected_mask, mask)
111
-
112
- def test_season_filter_invalid_winter_and_spring(self):
113
- self._test_season_filter(
114
- input_values=[0, 0, 0, 0], expected_mask=[True, True, False, False]
115
- )
116
-
117
- def test_season_filter_invalid_summer(self):
118
- self._test_season_filter(
119
- input_values=[-10, -10, -10, -10], expected_mask=[False, False, True, False]
120
- )
121
-
122
- def test_season_filter_valid_season_values(self):
123
- self._test_season_filter(
124
- input_values=[-12, -8, -1, -3], expected_mask=[False, False, False, False]
125
- )
126
-
127
- def _test_season_filter(self, input_values: List[float], expected_mask: List[bool]):
128
- stid = "A_STID"
129
- thresholds = pd.DataFrame(
130
- [
131
- dict(
132
- stid=stid, variable_pattern="t_i", lo=-20, hi=-10, season="winter"
133
- ),
134
- dict(stid=stid, variable_pattern="t_i", lo=-10, hi=-1, season="spring"),
135
- dict(stid=stid, variable_pattern="t_i", lo=-5, hi=5, season="summer"),
136
- dict(stid=stid, variable_pattern="t_i", lo=-10, hi=0, season="fall"),
137
- ]
138
- )
139
- season_indices = pd.DatetimeIndex(
140
- [
141
- datetime(2021, 12, 1), # winter
142
- datetime(2022, 3, 1), # spring
143
- datetime(2022, 6, 1), # summer
144
- datetime(2022, 9, 1), # fall
145
- ]
146
- )
147
- input_data = pd.DataFrame(
148
- index=season_indices, columns=["t_i"], data=input_values
149
- )
150
- expected_mask = pd.DataFrame(
151
- index=season_indices, columns=["t_i"], data=expected_mask
152
- )
153
-
154
- mask = detect_outliers(input_data, thresholds)
155
-
156
- pd.testing.assert_frame_equal(expected_mask, mask)
157
-
158
- def test_remove_outliers(self):
159
- thresholds = pd.DataFrame(
160
- columns=[
161
- "stid",
162
- "variable_pattern",
163
- "lo",
164
- "hi",
165
- "season",
166
- ],
167
- data=[
168
- ["stid", "t_[iu]", -40, 0, "winter"],
169
- ["stid", "t_[iu]", -4, 10, "summer"],
170
- ],
171
- )
172
- date_index = pd.DatetimeIndex(
173
- [
174
- datetime(2022, 1, 1),
175
- datetime(2022, 8, 1),
176
- ]
177
- )
178
- input_data = pd.DataFrame(
179
- index=date_index,
180
- data=[
181
- dict(t_i=-10, p_u=994),
182
- dict(t_i=37, p_u=1024),
183
- ],
184
- )
185
- mask = detect_outliers(input_data, thresholds)
186
- expected_output_data = input_data.copy()
187
- expected_output_data[mask] = np.nan
188
-
189
- output_data = filter_data(input_data, thresholds)
190
- self.assertIsNot(output_data, input_data)
191
- pd.testing.assert_frame_equal(output_data, expected_output_data)
192
-
193
-
194
- class ThresholdBasedOutlierDetectorTestCase(unittest.TestCase):
195
- def test_default_init(self):
196
- outlier_detector = ThresholdBasedOutlierDetector.default()
197
- self.assertIsInstance(outlier_detector, ThresholdBasedOutlierDetector)
198
-
199
- def test_filter_data_aws_with_threshold(self):
200
- stid = "NUK_K"
201
- index = pd.period_range("2023-10-01", "2023-11-01", freq="1h")
202
- columns = ["p_i", "t_i", "p_l", "wpsd_u", "foo"]
203
- dataset: xr.Dataset = pd.DataFrame(
204
- index=index,
205
- columns=columns,
206
- data=np.random.random((len(index), len(columns))),
207
- ).to_xarray()
208
- dataset = dataset.assign_attrs(dict(station_id=stid))
209
- outlier_detector = ThresholdBasedOutlierDetector.default()
210
-
211
- dataset_output = outlier_detector.filter_data(dataset)
212
-
213
- self.assertIsInstance(dataset_output, xr.Dataset)
214
- self.assertSetEqual(
215
- set(dict(dataset.items())),
216
- set(dict(dataset_output.items())),
217
- )
218
-
219
- pass
220
-
221
- def test_filter_data_aws_without_threshold(self):
222
- stid = "non_exsiting"
223
- dataset = xr.Dataset(attrs=dict(station_id=stid))
224
- outlier_detector = ThresholdBasedOutlierDetector.default()
225
- self.assertNotIn(stid, outlier_detector.thresholds.stid)
226
-
227
- output_dataset = outlier_detector.filter_data(dataset)
228
-
229
- xr.testing.assert_equal(output_dataset, dataset)