pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (55) hide show
  1. pypromice/get/get.py +19 -19
  2. pypromice/postprocess/bufr_to_csv.py +6 -1
  3. pypromice/postprocess/bufr_utilities.py +91 -18
  4. pypromice/postprocess/create_bufr_files.py +178 -0
  5. pypromice/postprocess/get_bufr.py +248 -397
  6. pypromice/postprocess/make_metadata_csv.py +214 -0
  7. pypromice/postprocess/real_time_utilities.py +41 -11
  8. pypromice/process/L0toL1.py +12 -5
  9. pypromice/process/L1toL2.py +159 -30
  10. pypromice/process/L2toL3.py +1034 -187
  11. pypromice/process/aws.py +131 -752
  12. pypromice/process/get_l2.py +90 -0
  13. pypromice/process/get_l2tol3.py +111 -0
  14. pypromice/process/join_l2.py +112 -0
  15. pypromice/process/join_l3.py +551 -120
  16. pypromice/process/load.py +161 -0
  17. pypromice/process/resample.py +128 -0
  18. pypromice/process/utilities.py +68 -0
  19. pypromice/process/write.py +503 -0
  20. pypromice/qc/github_data_issues.py +10 -16
  21. pypromice/qc/percentiles/thresholds.csv +2 -2
  22. pypromice/qc/persistence.py +71 -25
  23. pypromice/resources/__init__.py +28 -0
  24. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  25. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  26. pypromice/resources/variables.csv +106 -0
  27. pypromice/station_configuration.py +118 -0
  28. pypromice/tx/get_l0tx.py +7 -4
  29. pypromice/tx/payload_formats.csv +1 -0
  30. pypromice/tx/tx.py +27 -6
  31. pypromice/utilities/__init__.py +0 -0
  32. pypromice/utilities/git.py +61 -0
  33. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
  34. pypromice-1.4.0.dist-info/RECORD +53 -0
  35. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
  36. pypromice-1.4.0.dist-info/entry_points.txt +13 -0
  37. pypromice/postprocess/station_configurations.toml +0 -762
  38. pypromice/process/get_l3.py +0 -46
  39. pypromice/process/variables.csv +0 -92
  40. pypromice/qc/persistence_test.py +0 -150
  41. pypromice/test/test_config1.toml +0 -69
  42. pypromice/test/test_config2.toml +0 -54
  43. pypromice/test/test_email +0 -75
  44. pypromice/test/test_payload_formats.csv +0 -4
  45. pypromice/test/test_payload_types.csv +0 -7
  46. pypromice/test/test_percentile.py +0 -229
  47. pypromice/test/test_raw1.txt +0 -4468
  48. pypromice/test/test_raw_DataTable2.txt +0 -11167
  49. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  50. pypromice/test/test_raw_transmitted1.txt +0 -15411
  51. pypromice/test/test_raw_transmitted2.txt +0 -28
  52. pypromice-1.3.5.dist-info/RECORD +0 -53
  53. pypromice-1.3.5.dist-info/entry_points.txt +0 -8
  54. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
  55. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,229 +0,0 @@
1
- import unittest
2
- from datetime import datetime
3
- from typing import List
4
-
5
- import numpy as np
6
- import pandas as pd
7
- import xarray as xr
8
-
9
- from pypromice.qc.percentiles.outlier_detector import (
10
- detect_outliers,
11
- filter_data,
12
- ThresholdBasedOutlierDetector,
13
- )
14
-
15
-
16
- class PercentileQCTestCase(unittest.TestCase):
17
- def test_column_pattern_matches(self):
18
- self._test_column_pattern("p_i", True)
19
-
20
- def test_column_pattern_no_match(self):
21
- self._test_column_pattern("p_l", False)
22
-
23
- def test_column_pattern_with_prefix(self):
24
- self._test_column_pattern("prefix_p_i", False)
25
-
26
- def test_column_pattern_with_suffix(self):
27
- self._test_column_pattern("p_i_suffix", False)
28
-
29
- def _test_column_pattern(self, column_name: str, expected_output: bool):
30
- season_indices = pd.DatetimeIndex(
31
- [
32
- datetime(2022, 3, 1),
33
- ]
34
- )
35
- thresholds = pd.DataFrame(
36
- [
37
- dict(
38
- stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
39
- ),
40
- ]
41
- )
42
- value_outside_range = -325
43
- input_data = pd.DataFrame(
44
- index=season_indices, columns=[column_name], data=[value_outside_range]
45
- )
46
- if expected_output:
47
- expected_mask = pd.DataFrame(
48
- index=season_indices, columns=[column_name], data=[expected_output]
49
- )
50
- else:
51
- expected_mask = pd.DataFrame(index=season_indices, columns=[], data=[])
52
-
53
- mask = detect_outliers(input_data, thresholds)
54
-
55
- pd.testing.assert_frame_equal(expected_mask, mask)
56
-
57
- def test_column_pattern_multicolumns(self):
58
- thresholds = pd.DataFrame(
59
- [
60
- dict(
61
- stid="stid", variable_pattern="p_[iu]", lo=-100, hi=100, season=None
62
- ),
63
- ]
64
- )
65
- date_index = pd.DatetimeIndex([datetime(2022, 3, 1)])
66
- input_data = pd.DataFrame(
67
- index=date_index,
68
- data=[
69
- dict(
70
- p_i=-10,
71
- p_u=1000,
72
- p_j=1000,
73
- )
74
- ],
75
- )
76
- # p_j is not in the mask because it doesn't match the pattern
77
- expected_mask = pd.DataFrame(
78
- index=date_index,
79
- data=[
80
- dict(
81
- p_i=False,
82
- p_u=True,
83
- )
84
- ],
85
- )
86
-
87
- mask = detect_outliers(input_data, thresholds)
88
-
89
- pd.testing.assert_frame_equal(expected_mask, mask)
90
-
91
- def test_no_season(self):
92
- season_indices = pd.DatetimeIndex(
93
- [
94
- datetime(2022, 3, 1),
95
- datetime(2022, 8, 1),
96
- ]
97
- )
98
- thresholds = pd.DataFrame(
99
- [
100
- dict(stid="stid", variable_pattern="p_i", lo=-100, hi=100, season=None),
101
- ]
102
- )
103
- input_data = pd.DataFrame(index=season_indices, columns=["p_i"], data=[0, -243])
104
- expected_mask = pd.DataFrame(
105
- index=season_indices, columns=["p_i"], data=[False, True]
106
- )
107
-
108
- mask = detect_outliers(input_data, thresholds)
109
-
110
- pd.testing.assert_frame_equal(expected_mask, mask)
111
-
112
- def test_season_filter_invalid_winter_and_spring(self):
113
- self._test_season_filter(
114
- input_values=[0, 0, 0, 0], expected_mask=[True, True, False, False]
115
- )
116
-
117
- def test_season_filter_invalid_summer(self):
118
- self._test_season_filter(
119
- input_values=[-10, -10, -10, -10], expected_mask=[False, False, True, False]
120
- )
121
-
122
- def test_season_filter_valid_season_values(self):
123
- self._test_season_filter(
124
- input_values=[-12, -8, -1, -3], expected_mask=[False, False, False, False]
125
- )
126
-
127
- def _test_season_filter(self, input_values: List[float], expected_mask: List[bool]):
128
- stid = "A_STID"
129
- thresholds = pd.DataFrame(
130
- [
131
- dict(
132
- stid=stid, variable_pattern="t_i", lo=-20, hi=-10, season="winter"
133
- ),
134
- dict(stid=stid, variable_pattern="t_i", lo=-10, hi=-1, season="spring"),
135
- dict(stid=stid, variable_pattern="t_i", lo=-5, hi=5, season="summer"),
136
- dict(stid=stid, variable_pattern="t_i", lo=-10, hi=0, season="fall"),
137
- ]
138
- )
139
- season_indices = pd.DatetimeIndex(
140
- [
141
- datetime(2021, 12, 1), # winter
142
- datetime(2022, 3, 1), # spring
143
- datetime(2022, 6, 1), # summer
144
- datetime(2022, 9, 1), # fall
145
- ]
146
- )
147
- input_data = pd.DataFrame(
148
- index=season_indices, columns=["t_i"], data=input_values
149
- )
150
- expected_mask = pd.DataFrame(
151
- index=season_indices, columns=["t_i"], data=expected_mask
152
- )
153
-
154
- mask = detect_outliers(input_data, thresholds)
155
-
156
- pd.testing.assert_frame_equal(expected_mask, mask)
157
-
158
- def test_remove_outliers(self):
159
- thresholds = pd.DataFrame(
160
- columns=[
161
- "stid",
162
- "variable_pattern",
163
- "lo",
164
- "hi",
165
- "season",
166
- ],
167
- data=[
168
- ["stid", "t_[iu]", -40, 0, "winter"],
169
- ["stid", "t_[iu]", -4, 10, "summer"],
170
- ],
171
- )
172
- date_index = pd.DatetimeIndex(
173
- [
174
- datetime(2022, 1, 1),
175
- datetime(2022, 8, 1),
176
- ]
177
- )
178
- input_data = pd.DataFrame(
179
- index=date_index,
180
- data=[
181
- dict(t_i=-10, p_u=994),
182
- dict(t_i=37, p_u=1024),
183
- ],
184
- )
185
- mask = detect_outliers(input_data, thresholds)
186
- expected_output_data = input_data.copy()
187
- expected_output_data[mask] = np.nan
188
-
189
- output_data = filter_data(input_data, thresholds)
190
- self.assertIsNot(output_data, input_data)
191
- pd.testing.assert_frame_equal(output_data, expected_output_data)
192
-
193
-
194
- class ThresholdBasedOutlierDetectorTestCase(unittest.TestCase):
195
- def test_default_init(self):
196
- outlier_detector = ThresholdBasedOutlierDetector.default()
197
- self.assertIsInstance(outlier_detector, ThresholdBasedOutlierDetector)
198
-
199
- def test_filter_data_aws_with_threshold(self):
200
- stid = "NUK_K"
201
- index = pd.period_range("2023-10-01", "2023-11-01", freq="1h")
202
- columns = ["p_i", "t_i", "p_l", "wpsd_u", "foo"]
203
- dataset: xr.Dataset = pd.DataFrame(
204
- index=index,
205
- columns=columns,
206
- data=np.random.random((len(index), len(columns))),
207
- ).to_xarray()
208
- dataset = dataset.assign_attrs(dict(station_id=stid))
209
- outlier_detector = ThresholdBasedOutlierDetector.default()
210
-
211
- dataset_output = outlier_detector.filter_data(dataset)
212
-
213
- self.assertIsInstance(dataset_output, xr.Dataset)
214
- self.assertSetEqual(
215
- set(dict(dataset.items())),
216
- set(dict(dataset_output.items())),
217
- )
218
-
219
- pass
220
-
221
- def test_filter_data_aws_without_threshold(self):
222
- stid = "non_exsiting"
223
- dataset = xr.Dataset(attrs=dict(station_id=stid))
224
- outlier_detector = ThresholdBasedOutlierDetector.default()
225
- self.assertNotIn(stid, outlier_detector.thresholds.stid)
226
-
227
- output_dataset = outlier_detector.filter_data(dataset)
228
-
229
- xr.testing.assert_equal(output_dataset, dataset)