pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/__init__.py +2 -0
- pypromice/{qc → core/qc}/github_data_issues.py +22 -13
- pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
- pypromice/{qc → core/qc}/persistence.py +22 -29
- pypromice/{process → core/qc}/value_clipping.py +3 -3
- pypromice/core/resampling.py +142 -0
- pypromice/core/variables/__init__.py +1 -0
- pypromice/core/variables/air_temperature.py +64 -0
- pypromice/core/variables/gps.py +221 -0
- pypromice/core/variables/humidity.py +111 -0
- pypromice/core/variables/precipitation.py +108 -0
- pypromice/core/variables/pressure_transducer_depth.py +79 -0
- pypromice/core/variables/radiation.py +422 -0
- pypromice/core/variables/station_boom_height.py +75 -0
- pypromice/core/variables/station_pose.py +375 -0
- pypromice/io/bufr/__init__.py +0 -0
- pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
- pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
- pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
- pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
- pypromice/io/ingest/__init__.py +0 -0
- pypromice/{utilities → io/ingest}/git.py +1 -3
- pypromice/io/ingest/l0.py +294 -0
- pypromice/io/ingest/l0_repository.py +103 -0
- pypromice/io/ingest/toa5.py +87 -0
- pypromice/{process → io}/write.py +1 -1
- pypromice/pipeline/L0toL1.py +291 -0
- pypromice/pipeline/L1toL2.py +233 -0
- pypromice/{process → pipeline}/L2toL3.py +113 -118
- pypromice/pipeline/__init__.py +4 -0
- pypromice/{process → pipeline}/aws.py +10 -82
- pypromice/{process → pipeline}/get_l2.py +2 -2
- pypromice/{process → pipeline}/get_l2tol3.py +19 -22
- pypromice/{process → pipeline}/join_l2.py +31 -32
- pypromice/{process → pipeline}/join_l3.py +16 -14
- pypromice/{process → pipeline}/resample.py +75 -51
- pypromice/{process → pipeline}/utilities.py +0 -22
- pypromice/resources/file_attributes.csv +4 -4
- pypromice/resources/variable_aliases_GC-Net.csv +2 -2
- pypromice/resources/variables.csv +27 -24
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
- pypromice-1.7.0.dist-info/RECORD +65 -0
- pypromice-1.7.0.dist-info/entry_points.txt +12 -0
- pypromice/get/__init__.py +0 -1
- pypromice/get/get.py +0 -211
- pypromice/get/get_promice_data.py +0 -56
- pypromice/process/L0toL1.py +0 -564
- pypromice/process/L1toL2.py +0 -824
- pypromice/process/__init__.py +0 -4
- pypromice/process/load.py +0 -161
- pypromice-1.5.3.dist-info/RECORD +0 -54
- pypromice-1.5.3.dist-info/entry_points.txt +0 -13
- /pypromice/{postprocess → core}/__init__.py +0 -0
- /pypromice/{utilities → core}/dependency_graph.py +0 -0
- /pypromice/{qc → core/qc}/__init__.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
- /pypromice/{process → core/variables}/wind.py +0 -0
- /pypromice/{utilities → io}/__init__.py +0 -0
- /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
- /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
- /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
- /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0
pypromice/__init__.py
CHANGED
|
@@ -159,7 +159,7 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
|
|
|
159
159
|
adj_info.loc[adj_info.t0.isnull()|(adj_info.t0==''), "t0"] = None
|
|
160
160
|
|
|
161
161
|
# if "*" is in the variable name then we interpret it as regex
|
|
162
|
-
selec =
|
|
162
|
+
selec = adj_info['variable'].str.contains(r'\*') & (adj_info['variable'] != "*")
|
|
163
163
|
for ind in adj_info.loc[selec, :].index:
|
|
164
164
|
line_template = adj_info.loc[ind, :].copy()
|
|
165
165
|
regex = adj_info.loc[ind, 'variable']
|
|
@@ -209,23 +209,11 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
|
|
|
209
209
|
|
|
210
210
|
if func == "add":
|
|
211
211
|
ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
|
|
212
|
-
# flagging adjusted values
|
|
213
|
-
# if var + "_adj_flag" not in ds_out.columns:
|
|
214
|
-
# ds_out[var + "_adj_flag"] = 0
|
|
215
|
-
# msk = ds_out[var].loc[index_slice])].notnull()
|
|
216
|
-
# ind = ds_out[var].loc[index_slice])].loc[msk].time
|
|
217
|
-
# ds_out.loc[ind, var + "_adj_flag"] = 1
|
|
218
212
|
|
|
219
213
|
if func == "multiply":
|
|
220
214
|
ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values * val
|
|
221
215
|
if "DW" in var:
|
|
222
216
|
ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice] % 360
|
|
223
|
-
# flagging adjusted values
|
|
224
|
-
# if var + "_adj_flag" not in ds_out.columns:
|
|
225
|
-
# ds_out[var + "_adj_flag"] = 0
|
|
226
|
-
# msk = ds_out[var].loc[index_slice].notnull()
|
|
227
|
-
# ind = ds_out[var].loc[index_slice].loc[msk].time
|
|
228
|
-
# ds_out.loc[ind, var + "_adj_flag"] = 1
|
|
229
217
|
|
|
230
218
|
if func == "min_filter":
|
|
231
219
|
tmp = ds_out[var].loc[index_slice].values
|
|
@@ -277,6 +265,27 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
|
|
|
277
265
|
ds_out[var2].loc[index_slice] = val_var
|
|
278
266
|
ds_out[var].loc[index_slice] = val_var2
|
|
279
267
|
|
|
268
|
+
if "delete_when_same_as_" in func:
|
|
269
|
+
var2 = func.replace('delete_when_same_as_','')
|
|
270
|
+
tmp = ds_out[var].loc[index_slice]
|
|
271
|
+
msk = np.abs(tmp - ds_out[var2].loc[index_slice]) < val
|
|
272
|
+
tmp = tmp.where(~msk)
|
|
273
|
+
# remove isolated singletons and pairs surrounded by NaNs
|
|
274
|
+
m1 = tmp.notnull() & tmp.shift(time=1).isnull() & tmp.shift(time=-1).isnull()
|
|
275
|
+
|
|
276
|
+
m2_first = (tmp.notnull()
|
|
277
|
+
& tmp.shift(time=1).isnull() # left is NaN
|
|
278
|
+
& tmp.shift(time=-1).notnull() # right is value
|
|
279
|
+
& tmp.shift(time=-2).isnull()) # right+1 is NaN
|
|
280
|
+
|
|
281
|
+
m2_second = (tmp.notnull()
|
|
282
|
+
& tmp.shift(time=-1).isnull() # right is NaN
|
|
283
|
+
& tmp.shift(time=1).notnull() # left is value
|
|
284
|
+
& tmp.shift(time=2).isnull()) # left-1 is NaN
|
|
285
|
+
|
|
286
|
+
tmp = tmp.where(~(m1 | m2_first | m2_second))
|
|
287
|
+
ds_out[var].loc[index_slice] = tmp.values
|
|
288
|
+
|
|
280
289
|
if func == "rotate":
|
|
281
290
|
ds_out[var].loc[index_slice] = (ds_out[var].loc[index_slice].values + val) % 360
|
|
282
291
|
|
|
@@ -3,10 +3,10 @@ from datetime import datetime
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from pypromice.
|
|
6
|
+
from pypromice.pipeline.aws import AWS
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import logging
|
|
9
|
-
from pypromice.qc.github_data_issues import adjustTime, flagNAN, adjustData
|
|
9
|
+
from pypromice.core.qc.github_data_issues import adjustTime, flagNAN, adjustData
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
# %%
|
|
@@ -19,27 +19,22 @@ DEFAULT_VARIABLE_THRESHOLDS = {
|
|
|
19
19
|
"t_i": {"max_diff": 0.0001, "period": 2},
|
|
20
20
|
"t_u": {"max_diff": 0.0001, "period": 2},
|
|
21
21
|
"t_l": {"max_diff": 0.0001, "period": 2},
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
|
|
23
|
+
"p_i": {"max_diff": 0.0001, "period": 3},
|
|
24
|
+
"p_u": {"max_diff": 0.0001, "period": 150},
|
|
25
|
+
"p_l": {"max_diff": 0.0001, "period": 150},
|
|
26
|
+
|
|
27
|
+
# gets special handling to remove simultaneously constant gps_lat and gps_lon
|
|
28
|
+
"gps_lat_lon": {"max_diff": 0.000001, "period": 6},
|
|
29
|
+
|
|
29
30
|
"gps_alt": {"max_diff": 0.0001, "period": 6},
|
|
30
31
|
"t_rad": {"max_diff": 0.0001, "period": 2},
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
"period": 2,
|
|
38
|
-
}, # gets special handling to allow constant 100%
|
|
39
|
-
"rh_l": {
|
|
40
|
-
"max_diff": 0.0001,
|
|
41
|
-
"period": 2,
|
|
42
|
-
}, # gets special handling to allow constant 100%
|
|
32
|
+
|
|
33
|
+
# gets special handling to allow constant 100%
|
|
34
|
+
"rh_i": {"max_diff": 0.0001, "period": 2},
|
|
35
|
+
"rh_u": {"max_diff": 0.0001, "period": 2},
|
|
36
|
+
"rh_l": {"max_diff": 0.0001, "period": 2},
|
|
37
|
+
|
|
43
38
|
"wspd_i": {"max_diff": 0.0001, "period": 6},
|
|
44
39
|
"wspd_u": {"max_diff": 0.0001, "period": 6},
|
|
45
40
|
"wspd_l": {"max_diff": 0.0001, "period": 6},
|
|
@@ -83,15 +78,11 @@ def persistence_qc(
|
|
|
83
78
|
variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS
|
|
84
79
|
logger.debug(f"Running persistence_qc using {variable_thresholds}")
|
|
85
80
|
else:
|
|
86
|
-
logger.info(f"Running persistence_qc using custom thresholds:\n {variable_thresholds}")
|
|
81
|
+
logger.info(f"Running persistence_qc using custom thresholds:\n {variable_thresholds}")
|
|
87
82
|
|
|
88
83
|
for k in variable_thresholds.keys():
|
|
89
84
|
if k in ["t", "p", "rh", "wspd", "wdir", "z_boom"]:
|
|
90
|
-
var_all = [
|
|
91
|
-
k + "_u",
|
|
92
|
-
k + "_l",
|
|
93
|
-
k + "_i",
|
|
94
|
-
] # apply to upper, lower boom, and instant
|
|
85
|
+
var_all = [k + l for l in ["_u", "_l", "_i"]] # apply to upper, lower boom, and instant
|
|
95
86
|
else:
|
|
96
87
|
var_all = [k]
|
|
97
88
|
max_diff = variable_thresholds[k]["max_diff"] # loading persistent limit
|
|
@@ -140,10 +131,12 @@ def find_persistent_regions(
|
|
|
140
131
|
"""
|
|
141
132
|
Algorithm that ensures values can stay the same within the outliers_mask
|
|
142
133
|
"""
|
|
143
|
-
consecutive_true_df
|
|
144
|
-
persistent_regions = consecutive_true_df
|
|
145
|
-
|
|
146
|
-
|
|
134
|
+
consecutive_true_df = count_consecutive_persistent_values(data, max_diff)
|
|
135
|
+
persistent_regions = consecutive_true_df >= min_repeats
|
|
136
|
+
for i in range(1, min_repeats):
|
|
137
|
+
persistent_regions |= persistent_regions.shift(-1, fill_value=False)
|
|
138
|
+
# Ignore entries which already nan in the input data
|
|
139
|
+
persistent_regions[data.isna()] = False
|
|
147
140
|
return persistent_regions
|
|
148
141
|
|
|
149
142
|
|
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
|
2
2
|
import pandas
|
|
3
3
|
import xarray
|
|
4
4
|
|
|
5
|
-
from pypromice.
|
|
5
|
+
from pypromice.core.dependency_graph import DependencyGraph
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def clip_values(
|
|
@@ -24,11 +24,11 @@ def clip_values(
|
|
|
24
24
|
ds : `xarray.Dataset`
|
|
25
25
|
Dataset with clipped data
|
|
26
26
|
"""
|
|
27
|
-
cols = ["lo", "hi", "
|
|
27
|
+
cols = ["lo", "hi", "dependent_variables"]
|
|
28
28
|
assert set(cols) <= set(var_configurations.columns)
|
|
29
29
|
|
|
30
30
|
variable_limits = var_configurations[cols].assign(
|
|
31
|
-
dependents=lambda df: df.
|
|
31
|
+
dependents=lambda df: df.dependent_variables.fillna("").str.split(),
|
|
32
32
|
# Find the closure of dependents using the DependencyGraph class
|
|
33
33
|
dependents_closure=lambda df: DependencyGraph.from_child_mapping(
|
|
34
34
|
df.dependents
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
DEFAULT_COMPLETENESS_THRESHOLDS = {
|
|
6
|
+
"default": 0.8,
|
|
7
|
+
"albedo": 0.2,
|
|
8
|
+
"p_u": 0.5,
|
|
9
|
+
"p_l": 0.5,
|
|
10
|
+
"z_boom_u": 0.1,
|
|
11
|
+
"z_boom_l": 0.1,
|
|
12
|
+
"z_boom_cor_u": 0.1,
|
|
13
|
+
"z_boom_cor_l": 0.1,
|
|
14
|
+
"z_stake": 0.1,
|
|
15
|
+
"z_stake_cor": 0.1,
|
|
16
|
+
"z_surf_combined": 0.1,
|
|
17
|
+
"t_i_1": 0.1,
|
|
18
|
+
"t_i_2": 0.1,
|
|
19
|
+
"t_i_3": 0.1,
|
|
20
|
+
"t_i_4": 0.1,
|
|
21
|
+
"t_i_5": 0.1,
|
|
22
|
+
"t_i_6": 0.1,
|
|
23
|
+
"t_i_7": 0.1,
|
|
24
|
+
"t_i_8": 0.1,
|
|
25
|
+
"t_i_9": 0.1,
|
|
26
|
+
"t_i_10": 0.1,
|
|
27
|
+
"t_i_11": 0.1,
|
|
28
|
+
"gps_lat": 0.1,
|
|
29
|
+
"gps_lon": 0.1,
|
|
30
|
+
"gps_alt": 0.1,
|
|
31
|
+
"batt_v": 0.1,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
ALLOWED_TIME_STAMP_DURATIONS = (
|
|
35
|
+
datetime.timedelta(minutes=10),
|
|
36
|
+
datetime.timedelta(minutes=30),
|
|
37
|
+
datetime.timedelta(hours=1),
|
|
38
|
+
datetime.timedelta(hours=6),
|
|
39
|
+
datetime.timedelta(days=1),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def classify_timestamp_durations(
|
|
44
|
+
index: pd.DatetimeIndex,
|
|
45
|
+
) -> pd.TimedeltaIndex:
|
|
46
|
+
"""
|
|
47
|
+
Classifies the durations between consecutive timestamps in a given DatetimeIndex.
|
|
48
|
+
|
|
49
|
+
The function computes the time differences between consecutive timestamps and
|
|
50
|
+
checks if these differences belong to a predefined set of allowed durations.
|
|
51
|
+
It performs backward filling to handle missing values
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
index : pd.DatetimeIndex
|
|
56
|
+
A pandas DatetimeIndex containing the timestamps to classify.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
pd.TimedeltaIndex
|
|
61
|
+
A TimedeltaIndex containing the classified durations for the corresponding
|
|
62
|
+
timestamps in the input index.
|
|
63
|
+
"""
|
|
64
|
+
return pd.TimedeltaIndex(
|
|
65
|
+
index.to_series()
|
|
66
|
+
.diff()
|
|
67
|
+
.where(lambda d: d.isin(ALLOWED_TIME_STAMP_DURATIONS))
|
|
68
|
+
.bfill()
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_completeness_mask(
|
|
73
|
+
data_frame: pd.DataFrame,
|
|
74
|
+
resample_offset: str,
|
|
75
|
+
completeness_thresholds: dict[str, float] = DEFAULT_COMPLETENESS_THRESHOLDS,
|
|
76
|
+
*,
|
|
77
|
+
atol: float = 1e-9,
|
|
78
|
+
) -> pd.DataFrame:
|
|
79
|
+
"""
|
|
80
|
+
Returns a completeness mask for the given DataFrame based on the specified
|
|
81
|
+
resampling offset, completeness threshold, and tolerance for over-completeness.
|
|
82
|
+
|
|
83
|
+
This function evaluates the completeness of timestamped data, ensuring that
|
|
84
|
+
records match the expected durations defined by the `resample_offset`. It
|
|
85
|
+
computes whether each resampled group of data satisfies the completeness
|
|
86
|
+
constraints defined by the `completeness_threshold` and `atol`.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
data_frame : pd.DataFrame
|
|
91
|
+
Input data containing a DatetimeIndex and associated values. The index must
|
|
92
|
+
be a DatetimeIndex as the function relies on timestamp durations for
|
|
93
|
+
computations.
|
|
94
|
+
resample_offset : str
|
|
95
|
+
Offset string defining resampling frequency. Examples include 'MS' (month
|
|
96
|
+
start) or other Pandas-compatible offset strings.
|
|
97
|
+
completeness_threshold : float, optional
|
|
98
|
+
Dictionary containing the variable-specific minimum completeness ratio
|
|
99
|
+
required to consider a time period as valid. Must contain a key 'default'
|
|
100
|
+
used for variables not explicitly listed.
|
|
101
|
+
Defaults to the dictionary `DEFAULT_COMPLETENESS_THRESHOLD`.
|
|
102
|
+
atol : float, optional
|
|
103
|
+
Absolute tolerance for over-completeness. Specifies an allowable margin by
|
|
104
|
+
which completeness can exceed 1. Defaults to 1e-9.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
pd.DataFrame
|
|
109
|
+
A DataFrame containing Boolean values, where True indicates that the data
|
|
110
|
+
for the corresponding time period satisfies the completeness constraints,
|
|
111
|
+
while False indicates the data is either under-complete or over-complete.
|
|
112
|
+
"""
|
|
113
|
+
if resample_offset in ['MS', 'ME']:
|
|
114
|
+
offset_timedelta = datetime.timedelta(days=30)
|
|
115
|
+
# Increase tolerance for overcomplete values in monthly resampling
|
|
116
|
+
# to handle months with 31 days.
|
|
117
|
+
atol = 1/30 + atol
|
|
118
|
+
else:
|
|
119
|
+
offset_timedelta = pd.to_timedelta(resample_offset)
|
|
120
|
+
|
|
121
|
+
index = data_frame.index
|
|
122
|
+
assert isinstance(index, pd.DatetimeIndex)
|
|
123
|
+
|
|
124
|
+
timestamp_durations = classify_timestamp_durations(index)
|
|
125
|
+
timestamp_coverage = timestamp_durations / np.array(offset_timedelta)
|
|
126
|
+
data_frame_is_valid = data_frame.notna()
|
|
127
|
+
|
|
128
|
+
completeness = (
|
|
129
|
+
data_frame_is_valid
|
|
130
|
+
.mul(timestamp_coverage, axis=0)
|
|
131
|
+
.resample(resample_offset).sum()
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
thresholds = pd.Series(
|
|
135
|
+
{col: completeness_thresholds.get(col, completeness_thresholds["default"])
|
|
136
|
+
for col in data_frame.columns}
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
is_under_complete = completeness.lt(thresholds, axis=1)
|
|
140
|
+
is_over_complete = completeness.gt(1 + atol)
|
|
141
|
+
completeness_mask = ~(is_under_complete | is_over_complete)
|
|
142
|
+
return completeness_mask
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
__all__=["clip_and_interpolate", "get_cloud_coefficients"]
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import xarray as xr
|
|
5
|
+
|
|
6
|
+
T_0=273.15 # degrees Celsius to Kelvin conversion
|
|
7
|
+
eps_overcast = 1.0 # Clouds overcast default coefficient
|
|
8
|
+
eps_clear = 9.36508e-6 # Clouds clear default coefficient
|
|
9
|
+
|
|
10
|
+
def clip_and_interpolate(temp : xr.DataArray,
|
|
11
|
+
lo : float,
|
|
12
|
+
hi : float,
|
|
13
|
+
max_interp : pd.Timedelta = pd.Timedelta(12,'h')
|
|
14
|
+
) -> xr.DataArray:
|
|
15
|
+
"""Clip and interpolate temperature dataset for use in
|
|
16
|
+
corrections
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
temp : `xr.DataArray`
|
|
21
|
+
Array of temperature data
|
|
22
|
+
lo : float
|
|
23
|
+
Minimum threshold value for clipping
|
|
24
|
+
hi : float
|
|
25
|
+
Maximum threshold value for clipping
|
|
26
|
+
max_interp : `pd.Timedelta`
|
|
27
|
+
Maximum time steps to interpolate across.
|
|
28
|
+
The default is 12 hours.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
temp_interp : `xr.DataArray`
|
|
33
|
+
Array of interpolated temperature data
|
|
34
|
+
"""
|
|
35
|
+
# Clip values to high and low threshold values
|
|
36
|
+
temp = temp.where((temp >= lo) & (temp <= hi))
|
|
37
|
+
|
|
38
|
+
# Drop duplicates and interpolate across NaN values
|
|
39
|
+
temp_interp = temp.interpolate_na(dim='time',
|
|
40
|
+
max_gap=max_interp)
|
|
41
|
+
|
|
42
|
+
return temp_interp
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_cloud_coefficients(temp: xr.DataArray
|
|
46
|
+
) -> tuple[xr.DataArray, xr.DataArray]:
|
|
47
|
+
"""Get overcast and clear cloud longwave coefficients using
|
|
48
|
+
air temperature, based on assumptions from Swinbank (1963)
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
temp : xr.DataArray
|
|
53
|
+
Air temperature
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
LR_overcast : xr.DataArray
|
|
58
|
+
Overcast cloud coefficients, using overcast cloud assumption from Swinbank (1963)
|
|
59
|
+
LR_clear : xr.DataArray
|
|
60
|
+
Clear cloud coefficients, using clear cloud assumption, from Swinbank (1963)
|
|
61
|
+
"""
|
|
62
|
+
LR_overcast = eps_overcast * 5.67e-8 * (temp + T_0) ** 4
|
|
63
|
+
LR_clear = eps_clear * 5.67e-8 * (temp + T_0) ** 6
|
|
64
|
+
return LR_overcast, LR_clear
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
__all__ = ["decode_and_convert", "filter",
|
|
2
|
+
"decode", "convert_from_degrees_and_decimal_minutes",
|
|
3
|
+
"convert_from_decimal_minutes"]
|
|
4
|
+
import re
|
|
5
|
+
import xarray as xr
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from sklearn.linear_model import LinearRegression
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
def decode_and_convert(gps_lat: xr.DataArray,
|
|
14
|
+
gps_lon: xr.DataArray,
|
|
15
|
+
gps_time: xr.DataArray,
|
|
16
|
+
latitude: float,
|
|
17
|
+
longitude: float
|
|
18
|
+
) -> tuple[xr.DataArray,xr.DataArray,xr.DataArray]:
|
|
19
|
+
"""Decode and convert GPS latitude, longtitude and time values."flag_decimal_minutes",
|
|
20
|
+
"flag_for_decoding"
|
|
21
|
+
Decoding is performed if values are detected as string types.
|
|
22
|
+
Conversion consists of transforming to decimal degrees (DD),
|
|
23
|
+
from either decimal minutes (mm.mmmmm) or degrees and
|
|
24
|
+
decimal minutes (ddmm.mmmm)
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
gps_lat : `xr.DataArray`
|
|
29
|
+
GPS latitude
|
|
30
|
+
gps_lon : `xr.DataArray`
|
|
31
|
+
GPS longitude
|
|
32
|
+
gps_time : `xr.DataArray`
|
|
33
|
+
GPS time
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
gps_lat : `xr.DataArray`
|
|
38
|
+
Decoded and converted GPS latitude
|
|
39
|
+
gps_lon : `xr.DataArray`
|
|
40
|
+
Decoded and converted GPS longitude
|
|
41
|
+
gps_time : `xr.DataArray`
|
|
42
|
+
Decoded and converted GPS time
|
|
43
|
+
"""
|
|
44
|
+
# Retain GPS array attributes
|
|
45
|
+
lat_attrs = gps_lat.attrs
|
|
46
|
+
lon_attrs = gps_lon.attrs
|
|
47
|
+
time_attrs = gps_time.attrs
|
|
48
|
+
|
|
49
|
+
# Decode GPS information if array is an object array
|
|
50
|
+
if gps_lat.dtype.kind == "O":
|
|
51
|
+
lat, lon, time = decode(gps_lat, gps_lon, gps_time)
|
|
52
|
+
if lat is None:
|
|
53
|
+
logger.warning("GPS decoding failed, skipping this routine.")
|
|
54
|
+
else:
|
|
55
|
+
gps_lat, gps_lon, gps_time = lat, lon, time
|
|
56
|
+
|
|
57
|
+
# Reformat values to numeric
|
|
58
|
+
gps_lat.values = pd.to_numeric(gps_lat, errors='coerce')
|
|
59
|
+
gps_lon.values = pd.to_numeric(gps_lon, errors='coerce')
|
|
60
|
+
gps_time.values = pd.to_numeric(gps_time, errors='coerce')
|
|
61
|
+
|
|
62
|
+
# Convert GPS positions to decimal degrees
|
|
63
|
+
if np.any((gps_lat <= 90) & (gps_lat > 0)):
|
|
64
|
+
gps_lat = convert_from_decimal_minutes(gps_lat, latitude)
|
|
65
|
+
gps_lon = convert_from_decimal_minutes(gps_lon, longitude)
|
|
66
|
+
else:
|
|
67
|
+
gps_lat = convert_from_degrees_and_decimal_minutes(gps_lat)
|
|
68
|
+
gps_lon = convert_from_degrees_and_decimal_minutes(gps_lon)
|
|
69
|
+
|
|
70
|
+
# Reassign GPS array attributes
|
|
71
|
+
gps_lat.attrs = lat_attrs
|
|
72
|
+
gps_lon.attrs = lon_attrs
|
|
73
|
+
gps_time.attrs = time_attrs
|
|
74
|
+
|
|
75
|
+
return gps_lat, gps_lon, gps_time
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def filter(gps_lat: xr.DataArray,
|
|
79
|
+
gps_lon: xr.DataArray,
|
|
80
|
+
gps_alt: xr.DataArray
|
|
81
|
+
) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray]:
|
|
82
|
+
""" Filter GPS latitude, longitude and altitude based on the difference
|
|
83
|
+
to a baseline elevation. The baseline elevation is a gap-filled monthly
|
|
84
|
+
median elevation based on the inputted GPS altitude.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
gps_lat : xr.DataArray
|
|
89
|
+
GPS latitude
|
|
90
|
+
gps_lon : xr.DataArray
|
|
91
|
+
GPS longitude
|
|
92
|
+
gps_alt : xr.DataArray
|
|
93
|
+
GPS altitude values with a time dimension
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
----------
|
|
97
|
+
gps_lat_filtered : xr.DataArray
|
|
98
|
+
Filtered latitude values
|
|
99
|
+
gps_lon_filtered : xr.DataArray
|
|
100
|
+
Filtered longitude values
|
|
101
|
+
gps_alt_filtered : xr.DataArray
|
|
102
|
+
Filtered altitude values
|
|
103
|
+
"""
|
|
104
|
+
# Get altitude monthly median (at month start)
|
|
105
|
+
# This will serve as baseline elevations for filtering
|
|
106
|
+
ser = gps_alt.to_series()
|
|
107
|
+
monthly_median = ser.resample("MS").median()
|
|
108
|
+
baseline_elevation = (
|
|
109
|
+
monthly_median
|
|
110
|
+
.reindex(ser.index, method="nearest")
|
|
111
|
+
.ffill()
|
|
112
|
+
.bfill()
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Produce conditional mask
|
|
116
|
+
mask = (np.abs(gps_alt - baseline_elevation) < 100) | gps_alt.isnull()
|
|
117
|
+
|
|
118
|
+
# Apply mask
|
|
119
|
+
gps_lat_filtered = gps_lat.where(mask)
|
|
120
|
+
gps_lon_filtered = gps_lon.where(mask)
|
|
121
|
+
gps_alt_filtered = gps_alt.where(mask)
|
|
122
|
+
|
|
123
|
+
return gps_lat_filtered, gps_lon_filtered, gps_alt_filtered
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def convert_from_degrees_and_decimal_minutes(gps):
|
|
127
|
+
"""Convert positions (i.e. latitude, longitude) from degrees
|
|
128
|
+
and decimal minutes (ddmm.mmmm) to decimal degree values (DD)"""
|
|
129
|
+
return np.floor(gps / 100) + (gps / 100 - np.floor(gps / 100)) * 100 / 60
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def convert_from_decimal_minutes(gps: xr.DataArray, pos: float
|
|
133
|
+
) -> xr.DataArray:
|
|
134
|
+
"""Convert decimal minutes (mm.mmmmm) to decimal degree
|
|
135
|
+
values (DD), using a predefined position to append values to.
|
|
136
|
+
Needed in the case of PROMICE v1 stations, where logger
|
|
137
|
+
programs saved positions only in decimal minutes."""
|
|
138
|
+
new_gps = np.sign(pos) * (gps + 100 * np.floor(np.abs(pos)))
|
|
139
|
+
return convert_from_degrees_and_decimal_minutes(new_gps)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def decode(gps_lat: xr.DataArray,
|
|
143
|
+
gps_lon: xr.DataArray,
|
|
144
|
+
gps_time: xr.DataArray
|
|
145
|
+
) -> tuple[xr.DataArray,xr.DataArray,xr.DataArray]:
|
|
146
|
+
"""Decode GPS information. This should be applied if gps information
|
|
147
|
+
consists of strings and not float values. GPS information is returned in
|
|
148
|
+
decimal degrees (ddmm.mmmm) format.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
gps_lat : `xr.DataArray`
|
|
153
|
+
GPS latitude
|
|
154
|
+
gps_lon : `xr.DataArray`
|
|
155
|
+
GPS longitude
|
|
156
|
+
gps_time : `xr.DataArray`
|
|
157
|
+
GPS time
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
new_lat : `xr.DataArray`
|
|
162
|
+
Decoded GPS latitude
|
|
163
|
+
new_lon : `xr.DataArray`
|
|
164
|
+
Decoded GPS longitude
|
|
165
|
+
new_time : `xr.DataArray`
|
|
166
|
+
Decoded GPS time
|
|
167
|
+
"""
|
|
168
|
+
# Pick the first non-null sample safely and detect decoding format
|
|
169
|
+
non_null = gps_lat.dropna(dim='time').values
|
|
170
|
+
sample_value = str(non_null[0])
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# Object decoding
|
|
174
|
+
if "NH" in sample_value:
|
|
175
|
+
new_lat = gps_object_decoder(gps_lat)
|
|
176
|
+
new_lon = gps_object_decoder(gps_lon)
|
|
177
|
+
new_time = gps_object_decoder(gps_time)
|
|
178
|
+
return new_lat, new_lon, new_time
|
|
179
|
+
|
|
180
|
+
# L-string decoding
|
|
181
|
+
elif "L" in sample_value:
|
|
182
|
+
logger.info("Found 'L' in GPS string; applying decode + scaling.")
|
|
183
|
+
new_lat = gps_l_string_decoder(gps_lat)
|
|
184
|
+
new_lon = gps_l_string_decoder(gps_lon)
|
|
185
|
+
new_time = gps_object_decoder(gps_time)
|
|
186
|
+
return new_lat, new_lon, new_time
|
|
187
|
+
|
|
188
|
+
# Unknown format, attempt to decode
|
|
189
|
+
else:
|
|
190
|
+
logger.info("Unknown GPS string format; attempting generic decode.")
|
|
191
|
+
new_lat = gps_object_decoder(gps_lat)
|
|
192
|
+
new_lon = gps_object_decoder(gps_lon)
|
|
193
|
+
new_time = gps_object_decoder(gps_time)
|
|
194
|
+
return new_lat, new_lon, new_time
|
|
195
|
+
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Failed to decode GPS data: {e!r} "
|
|
198
|
+
f"(dtype={gps_lat.dtype})")
|
|
199
|
+
return None, None, None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def gps_object_decoder(gps : xr.DataArray) -> xr.DataArray:
|
|
203
|
+
"""GPS decoder for object array formatting. For example, PROMICE v2
|
|
204
|
+
stations should send information as 'NH6429.01544,WH04932.86061'
|
|
205
|
+
original formatting (NUK_L 2022); PROMICE v3 stations should send
|
|
206
|
+
coordinates as '6430,4916' (NUK_Uv3); and GC-Net stations should
|
|
207
|
+
send coordinates as '6628.93936',04617.59187' (DY2)"""
|
|
208
|
+
str2nums = [re.findall(r"[-+]?\d*\.\d+|\d+", _) if isinstance(_, str) else [np.nan] for _ in gps.values]
|
|
209
|
+
gps[:] = pd.DataFrame(str2nums).astype(float).T.values[0]
|
|
210
|
+
gps = gps.astype(float)
|
|
211
|
+
return gps
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def gps_l_string_decoder(gps : xr.DataArray) -> xr.DataArray:
|
|
215
|
+
"""GPS L-string decoder"""
|
|
216
|
+
# Convert from object array
|
|
217
|
+
gps = gps_object_decoder(gps)
|
|
218
|
+
|
|
219
|
+
# Convert from integer-like values to degrees
|
|
220
|
+
gps = gps/100000
|
|
221
|
+
return gps
|