pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (67) hide show
  1. pypromice/__init__.py +2 -0
  2. pypromice/{qc → core/qc}/github_data_issues.py +22 -13
  3. pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
  4. pypromice/{qc → core/qc}/persistence.py +22 -29
  5. pypromice/{process → core/qc}/value_clipping.py +3 -3
  6. pypromice/core/resampling.py +142 -0
  7. pypromice/core/variables/__init__.py +1 -0
  8. pypromice/core/variables/air_temperature.py +64 -0
  9. pypromice/core/variables/gps.py +221 -0
  10. pypromice/core/variables/humidity.py +111 -0
  11. pypromice/core/variables/precipitation.py +108 -0
  12. pypromice/core/variables/pressure_transducer_depth.py +79 -0
  13. pypromice/core/variables/radiation.py +422 -0
  14. pypromice/core/variables/station_boom_height.py +75 -0
  15. pypromice/core/variables/station_pose.py +375 -0
  16. pypromice/io/bufr/__init__.py +0 -0
  17. pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
  18. pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
  19. pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
  20. pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
  21. pypromice/io/ingest/__init__.py +0 -0
  22. pypromice/{utilities → io/ingest}/git.py +1 -3
  23. pypromice/io/ingest/l0.py +294 -0
  24. pypromice/io/ingest/l0_repository.py +103 -0
  25. pypromice/io/ingest/toa5.py +87 -0
  26. pypromice/{process → io}/write.py +1 -1
  27. pypromice/pipeline/L0toL1.py +291 -0
  28. pypromice/pipeline/L1toL2.py +233 -0
  29. pypromice/{process → pipeline}/L2toL3.py +113 -118
  30. pypromice/pipeline/__init__.py +4 -0
  31. pypromice/{process → pipeline}/aws.py +10 -82
  32. pypromice/{process → pipeline}/get_l2.py +2 -2
  33. pypromice/{process → pipeline}/get_l2tol3.py +19 -22
  34. pypromice/{process → pipeline}/join_l2.py +31 -32
  35. pypromice/{process → pipeline}/join_l3.py +16 -14
  36. pypromice/{process → pipeline}/resample.py +75 -51
  37. pypromice/{process → pipeline}/utilities.py +0 -22
  38. pypromice/resources/file_attributes.csv +4 -4
  39. pypromice/resources/variable_aliases_GC-Net.csv +2 -2
  40. pypromice/resources/variables.csv +27 -24
  41. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
  42. pypromice-1.7.0.dist-info/RECORD +65 -0
  43. pypromice-1.7.0.dist-info/entry_points.txt +12 -0
  44. pypromice/get/__init__.py +0 -1
  45. pypromice/get/get.py +0 -211
  46. pypromice/get/get_promice_data.py +0 -56
  47. pypromice/process/L0toL1.py +0 -564
  48. pypromice/process/L1toL2.py +0 -824
  49. pypromice/process/__init__.py +0 -4
  50. pypromice/process/load.py +0 -161
  51. pypromice-1.5.3.dist-info/RECORD +0 -54
  52. pypromice-1.5.3.dist-info/entry_points.txt +0 -13
  53. /pypromice/{postprocess → core}/__init__.py +0 -0
  54. /pypromice/{utilities → core}/dependency_graph.py +0 -0
  55. /pypromice/{qc → core/qc}/__init__.py +0 -0
  56. /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
  57. /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
  58. /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
  59. /pypromice/{process → core/variables}/wind.py +0 -0
  60. /pypromice/{utilities → io}/__init__.py +0 -0
  61. /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
  62. /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
  63. /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
  64. /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
  65. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
  66. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0
pypromice/__init__.py CHANGED
@@ -0,0 +1,2 @@
1
+ from importlib.metadata import version
2
+ __version__ = version("pypromice")
@@ -159,7 +159,7 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
159
159
  adj_info.loc[adj_info.t0.isnull()|(adj_info.t0==''), "t0"] = None
160
160
 
161
161
  # if "*" is in the variable name then we interpret it as regex
162
- selec = adj_info['variable'].str.contains('\*') & (adj_info['variable'] != "*")
162
+ selec = adj_info['variable'].str.contains(r'\*') & (adj_info['variable'] != "*")
163
163
  for ind in adj_info.loc[selec, :].index:
164
164
  line_template = adj_info.loc[ind, :].copy()
165
165
  regex = adj_info.loc[ind, 'variable']
@@ -209,23 +209,11 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
209
209
 
210
210
  if func == "add":
211
211
  ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
212
- # flagging adjusted values
213
- # if var + "_adj_flag" not in ds_out.columns:
214
- # ds_out[var + "_adj_flag"] = 0
215
- # msk = ds_out[var].loc[index_slice])].notnull()
216
- # ind = ds_out[var].loc[index_slice])].loc[msk].time
217
- # ds_out.loc[ind, var + "_adj_flag"] = 1
218
212
 
219
213
  if func == "multiply":
220
214
  ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values * val
221
215
  if "DW" in var:
222
216
  ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice] % 360
223
- # flagging adjusted values
224
- # if var + "_adj_flag" not in ds_out.columns:
225
- # ds_out[var + "_adj_flag"] = 0
226
- # msk = ds_out[var].loc[index_slice].notnull()
227
- # ind = ds_out[var].loc[index_slice].loc[msk].time
228
- # ds_out.loc[ind, var + "_adj_flag"] = 1
229
217
 
230
218
  if func == "min_filter":
231
219
  tmp = ds_out[var].loc[index_slice].values
@@ -277,6 +265,27 @@ def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
277
265
  ds_out[var2].loc[index_slice] = val_var
278
266
  ds_out[var].loc[index_slice] = val_var2
279
267
 
268
+ if "delete_when_same_as_" in func:
269
+ var2 = func.replace('delete_when_same_as_','')
270
+ tmp = ds_out[var].loc[index_slice]
271
+ msk = np.abs(tmp - ds_out[var2].loc[index_slice]) < val
272
+ tmp = tmp.where(~msk)
273
+ # remove isolated singletons and pairs surrounded by NaNs
274
+ m1 = tmp.notnull() & tmp.shift(time=1).isnull() & tmp.shift(time=-1).isnull()
275
+
276
+ m2_first = (tmp.notnull()
277
+ & tmp.shift(time=1).isnull() # left is NaN
278
+ & tmp.shift(time=-1).notnull() # right is value
279
+ & tmp.shift(time=-2).isnull()) # right+1 is NaN
280
+
281
+ m2_second = (tmp.notnull()
282
+ & tmp.shift(time=-1).isnull() # right is NaN
283
+ & tmp.shift(time=1).notnull() # left is value
284
+ & tmp.shift(time=2).isnull()) # left-1 is NaN
285
+
286
+ tmp = tmp.where(~(m1 | m2_first | m2_second))
287
+ ds_out[var].loc[index_slice] = tmp.values
288
+
280
289
  if func == "rotate":
281
290
  ds_out[var].loc[index_slice] = (ds_out[var].loc[index_slice].values + val) % 360
282
291
 
@@ -3,10 +3,10 @@ from datetime import datetime
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from pypromice.process import AWS
6
+ from pypromice.pipeline.aws import AWS
7
7
  from pathlib import Path
8
8
  import logging
9
- from pypromice.qc.github_data_issues import adjustTime, flagNAN, adjustData
9
+ from pypromice.core.qc.github_data_issues import adjustTime, flagNAN, adjustData
10
10
 
11
11
 
12
12
  # %%
@@ -19,27 +19,22 @@ DEFAULT_VARIABLE_THRESHOLDS = {
19
19
  "t_i": {"max_diff": 0.0001, "period": 2},
20
20
  "t_u": {"max_diff": 0.0001, "period": 2},
21
21
  "t_l": {"max_diff": 0.0001, "period": 2},
22
- "p_i": {"max_diff": 0.0001, "period": 2},
23
- # "p_u": {"max_diff": 0.0001, "period": 2},
24
- # "p_l": {"max_diff": 0.0001, "period": 2},
25
- "gps_lat_lon": {
26
- "max_diff": 0.000001,
27
- "period": 6,
28
- }, # gets special handling to remove simultaneously constant gps_lat and gps_lon
22
+
23
+ "p_i": {"max_diff": 0.0001, "period": 3},
24
+ "p_u": {"max_diff": 0.0001, "period": 150},
25
+ "p_l": {"max_diff": 0.0001, "period": 150},
26
+
27
+ # gets special handling to remove simultaneously constant gps_lat and gps_lon
28
+ "gps_lat_lon": {"max_diff": 0.000001, "period": 6},
29
+
29
30
  "gps_alt": {"max_diff": 0.0001, "period": 6},
30
31
  "t_rad": {"max_diff": 0.0001, "period": 2},
31
- "rh_i": {
32
- "max_diff": 0.0001,
33
- "period": 2,
34
- }, # gets special handling to allow constant 100%
35
- "rh_u": {
36
- "max_diff": 0.0001,
37
- "period": 2,
38
- }, # gets special handling to allow constant 100%
39
- "rh_l": {
40
- "max_diff": 0.0001,
41
- "period": 2,
42
- }, # gets special handling to allow constant 100%
32
+
33
+ # gets special handling to allow constant 100%
34
+ "rh_i": {"max_diff": 0.0001, "period": 2},
35
+ "rh_u": {"max_diff": 0.0001, "period": 2},
36
+ "rh_l": {"max_diff": 0.0001, "period": 2},
37
+
43
38
  "wspd_i": {"max_diff": 0.0001, "period": 6},
44
39
  "wspd_u": {"max_diff": 0.0001, "period": 6},
45
40
  "wspd_l": {"max_diff": 0.0001, "period": 6},
@@ -83,15 +78,11 @@ def persistence_qc(
83
78
  variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS
84
79
  logger.debug(f"Running persistence_qc using {variable_thresholds}")
85
80
  else:
86
- logger.info(f"Running persistence_qc using custom thresholds:\n {variable_thresholds}")
81
+ logger.info(f"Running persistence_qc using custom thresholds:\n {variable_thresholds}")
87
82
 
88
83
  for k in variable_thresholds.keys():
89
84
  if k in ["t", "p", "rh", "wspd", "wdir", "z_boom"]:
90
- var_all = [
91
- k + "_u",
92
- k + "_l",
93
- k + "_i",
94
- ] # apply to upper, lower boom, and instant
85
+ var_all = [k + l for l in ["_u", "_l", "_i"]] # apply to upper, lower boom, and instant
95
86
  else:
96
87
  var_all = [k]
97
88
  max_diff = variable_thresholds[k]["max_diff"] # loading persistent limit
@@ -140,10 +131,12 @@ def find_persistent_regions(
140
131
  """
141
132
  Algorithm that ensures values can stay the same within the outliers_mask
142
133
  """
143
- consecutive_true_df = count_consecutive_persistent_values(data, max_diff)
144
- persistent_regions = consecutive_true_df >= min_repeats
145
- # Ignore entries which already nan in the input data
146
- persistent_regions[data.isna()] = False
134
+ consecutive_true_df = count_consecutive_persistent_values(data, max_diff)
135
+ persistent_regions = consecutive_true_df >= min_repeats
136
+ for i in range(1, min_repeats):
137
+ persistent_regions |= persistent_regions.shift(-1, fill_value=False)
138
+ # Ignore entries which already nan in the input data
139
+ persistent_regions[data.isna()] = False
147
140
  return persistent_regions
148
141
 
149
142
 
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas
3
3
  import xarray
4
4
 
5
- from pypromice.utilities.dependency_graph import DependencyGraph
5
+ from pypromice.core.dependency_graph import DependencyGraph
6
6
 
7
7
 
8
8
  def clip_values(
@@ -24,11 +24,11 @@ def clip_values(
24
24
  ds : `xarray.Dataset`
25
25
  Dataset with clipped data
26
26
  """
27
- cols = ["lo", "hi", "OOL"]
27
+ cols = ["lo", "hi", "dependent_variables"]
28
28
  assert set(cols) <= set(var_configurations.columns)
29
29
 
30
30
  variable_limits = var_configurations[cols].assign(
31
- dependents=lambda df: df.OOL.fillna("").str.split(),
31
+ dependents=lambda df: df.dependent_variables.fillna("").str.split(),
32
32
  # Find the closure of dependents using the DependencyGraph class
33
33
  dependents_closure=lambda df: DependencyGraph.from_child_mapping(
34
34
  df.dependents
@@ -0,0 +1,142 @@
1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ DEFAULT_COMPLETENESS_THRESHOLDS = {
6
+ "default": 0.8,
7
+ "albedo": 0.2,
8
+ "p_u": 0.5,
9
+ "p_l": 0.5,
10
+ "z_boom_u": 0.1,
11
+ "z_boom_l": 0.1,
12
+ "z_boom_cor_u": 0.1,
13
+ "z_boom_cor_l": 0.1,
14
+ "z_stake": 0.1,
15
+ "z_stake_cor": 0.1,
16
+ "z_surf_combined": 0.1,
17
+ "t_i_1": 0.1,
18
+ "t_i_2": 0.1,
19
+ "t_i_3": 0.1,
20
+ "t_i_4": 0.1,
21
+ "t_i_5": 0.1,
22
+ "t_i_6": 0.1,
23
+ "t_i_7": 0.1,
24
+ "t_i_8": 0.1,
25
+ "t_i_9": 0.1,
26
+ "t_i_10": 0.1,
27
+ "t_i_11": 0.1,
28
+ "gps_lat": 0.1,
29
+ "gps_lon": 0.1,
30
+ "gps_alt": 0.1,
31
+ "batt_v": 0.1,
32
+ }
33
+
34
+ ALLOWED_TIME_STAMP_DURATIONS = (
35
+ datetime.timedelta(minutes=10),
36
+ datetime.timedelta(minutes=30),
37
+ datetime.timedelta(hours=1),
38
+ datetime.timedelta(hours=6),
39
+ datetime.timedelta(days=1),
40
+ )
41
+
42
+
43
+ def classify_timestamp_durations(
44
+ index: pd.DatetimeIndex,
45
+ ) -> pd.TimedeltaIndex:
46
+ """
47
+ Classifies the durations between consecutive timestamps in a given DatetimeIndex.
48
+
49
+ The function computes the time differences between consecutive timestamps and
50
+ checks if these differences belong to a predefined set of allowed durations.
51
+ It performs backward filling to handle missing values
52
+
53
+ Parameters
54
+ ----------
55
+ index : pd.DatetimeIndex
56
+ A pandas DatetimeIndex containing the timestamps to classify.
57
+
58
+ Returns
59
+ -------
60
+ pd.TimedeltaIndex
61
+ A TimedeltaIndex containing the classified durations for the corresponding
62
+ timestamps in the input index.
63
+ """
64
+ return pd.TimedeltaIndex(
65
+ index.to_series()
66
+ .diff()
67
+ .where(lambda d: d.isin(ALLOWED_TIME_STAMP_DURATIONS))
68
+ .bfill()
69
+ )
70
+
71
+
72
+ def get_completeness_mask(
73
+ data_frame: pd.DataFrame,
74
+ resample_offset: str,
75
+ completeness_thresholds: dict[str, float] = DEFAULT_COMPLETENESS_THRESHOLDS,
76
+ *,
77
+ atol: float = 1e-9,
78
+ ) -> pd.DataFrame:
79
+ """
80
+ Returns a completeness mask for the given DataFrame based on the specified
81
+ resampling offset, completeness threshold, and tolerance for over-completeness.
82
+
83
+ This function evaluates the completeness of timestamped data, ensuring that
84
+ records match the expected durations defined by the `resample_offset`. It
85
+ computes whether each resampled group of data satisfies the completeness
86
+ constraints defined by the `completeness_threshold` and `atol`.
87
+
88
+ Parameters
89
+ ----------
90
+ data_frame : pd.DataFrame
91
+ Input data containing a DatetimeIndex and associated values. The index must
92
+ be a DatetimeIndex as the function relies on timestamp durations for
93
+ computations.
94
+ resample_offset : str
95
+ Offset string defining resampling frequency. Examples include 'MS' (month
96
+ start) or other Pandas-compatible offset strings.
97
+ completeness_threshold : float, optional
98
+ Dictionary containing the variable-specific minimum completeness ratio
99
+ required to consider a time period as valid. Must contain a key 'default'
100
+ used for variables not explicitly listed.
101
+ Defaults to the dictionary `DEFAULT_COMPLETENESS_THRESHOLD`.
102
+ atol : float, optional
103
+ Absolute tolerance for over-completeness. Specifies an allowable margin by
104
+ which completeness can exceed 1. Defaults to 1e-9.
105
+
106
+ Returns
107
+ -------
108
+ pd.DataFrame
109
+ A DataFrame containing Boolean values, where True indicates that the data
110
+ for the corresponding time period satisfies the completeness constraints,
111
+ while False indicates the data is either under-complete or over-complete.
112
+ """
113
+ if resample_offset in ['MS', 'ME']:
114
+ offset_timedelta = datetime.timedelta(days=30)
115
+ # Increase tolerance for overcomplete values in monthly resampling
116
+ # to handle months with 31 days.
117
+ atol = 1/30 + atol
118
+ else:
119
+ offset_timedelta = pd.to_timedelta(resample_offset)
120
+
121
+ index = data_frame.index
122
+ assert isinstance(index, pd.DatetimeIndex)
123
+
124
+ timestamp_durations = classify_timestamp_durations(index)
125
+ timestamp_coverage = timestamp_durations / np.array(offset_timedelta)
126
+ data_frame_is_valid = data_frame.notna()
127
+
128
+ completeness = (
129
+ data_frame_is_valid
130
+ .mul(timestamp_coverage, axis=0)
131
+ .resample(resample_offset).sum()
132
+ )
133
+
134
+ thresholds = pd.Series(
135
+ {col: completeness_thresholds.get(col, completeness_thresholds["default"])
136
+ for col in data_frame.columns}
137
+ )
138
+
139
+ is_under_complete = completeness.lt(thresholds, axis=1)
140
+ is_over_complete = completeness.gt(1 + atol)
141
+ completeness_mask = ~(is_under_complete | is_over_complete)
142
+ return completeness_mask
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,64 @@
1
+ __all__=["clip_and_interpolate", "get_cloud_coefficients"]
2
+
3
+ import pandas as pd
4
+ import xarray as xr
5
+
6
+ T_0=273.15 # degrees Celsius to Kelvin conversion
7
+ eps_overcast = 1.0 # Clouds overcast default coefficient
8
+ eps_clear = 9.36508e-6 # Clouds clear default coefficient
9
+
10
+ def clip_and_interpolate(temp : xr.DataArray,
11
+ lo : float,
12
+ hi : float,
13
+ max_interp : pd.Timedelta = pd.Timedelta(12,'h')
14
+ ) -> xr.DataArray:
15
+ """Clip and interpolate temperature dataset for use in
16
+ corrections
17
+
18
+ Parameters
19
+ ----------
20
+ temp : `xr.DataArray`
21
+ Array of temperature data
22
+ lo : float
23
+ Minimum threshold value for clipping
24
+ hi : float
25
+ Maximum threshold value for clipping
26
+ max_interp : `pd.Timedelta`
27
+ Maximum time steps to interpolate across.
28
+ The default is 12 hours.
29
+
30
+ Returns
31
+ -------
32
+ temp_interp : `xr.DataArray`
33
+ Array of interpolated temperature data
34
+ """
35
+ # Clip values to high and low threshold values
36
+ temp = temp.where((temp >= lo) & (temp <= hi))
37
+
38
+ # Drop duplicates and interpolate across NaN values
39
+ temp_interp = temp.interpolate_na(dim='time',
40
+ max_gap=max_interp)
41
+
42
+ return temp_interp
43
+
44
+
45
+ def get_cloud_coefficients(temp: xr.DataArray
46
+ ) -> tuple[xr.DataArray, xr.DataArray]:
47
+ """Get overcast and clear cloud longwave coefficients using
48
+ air temperature, based on assumptions from Swinbank (1963)
49
+
50
+ Parameters
51
+ ----------
52
+ temp : xr.DataArray
53
+ Air temperature
54
+
55
+ Returns
56
+ -------
57
+ LR_overcast : xr.DataArray
58
+ Overcast cloud coefficients, using overcast cloud assumption from Swinbank (1963)
59
+ LR_clear : xr.DataArray
60
+ Clear cloud coefficients, using clear cloud assumption, from Swinbank (1963)
61
+ """
62
+ LR_overcast = eps_overcast * 5.67e-8 * (temp + T_0) ** 4
63
+ LR_clear = eps_clear * 5.67e-8 * (temp + T_0) ** 6
64
+ return LR_overcast, LR_clear
@@ -0,0 +1,221 @@
1
+ __all__ = ["decode_and_convert", "filter",
2
+ "decode", "convert_from_degrees_and_decimal_minutes",
3
+ "convert_from_decimal_minutes"]
4
+ import re
5
+ import xarray as xr
6
+ import numpy as np
7
+ import pandas as pd
8
+ from sklearn.linear_model import LinearRegression
9
+
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ def decode_and_convert(gps_lat: xr.DataArray,
14
+ gps_lon: xr.DataArray,
15
+ gps_time: xr.DataArray,
16
+ latitude: float,
17
+ longitude: float
18
+ ) -> tuple[xr.DataArray,xr.DataArray,xr.DataArray]:
19
+ """Decode and convert GPS latitude, longtitude and time values."flag_decimal_minutes",
20
+ "flag_for_decoding"
21
+ Decoding is performed if values are detected as string types.
22
+ Conversion consists of transforming to decimal degrees (DD),
23
+ from either decimal minutes (mm.mmmmm) or degrees and
24
+ decimal minutes (ddmm.mmmm)
25
+
26
+ Parameters
27
+ ----------
28
+ gps_lat : `xr.DataArray`
29
+ GPS latitude
30
+ gps_lon : `xr.DataArray`
31
+ GPS longitude
32
+ gps_time : `xr.DataArray`
33
+ GPS time
34
+
35
+ Returns
36
+ -------
37
+ gps_lat : `xr.DataArray`
38
+ Decoded and converted GPS latitude
39
+ gps_lon : `xr.DataArray`
40
+ Decoded and converted GPS longitude
41
+ gps_time : `xr.DataArray`
42
+ Decoded and converted GPS time
43
+ """
44
+ # Retain GPS array attributes
45
+ lat_attrs = gps_lat.attrs
46
+ lon_attrs = gps_lon.attrs
47
+ time_attrs = gps_time.attrs
48
+
49
+ # Decode GPS information if array is an object array
50
+ if gps_lat.dtype.kind == "O":
51
+ lat, lon, time = decode(gps_lat, gps_lon, gps_time)
52
+ if lat is None:
53
+ logger.warning("GPS decoding failed, skipping this routine.")
54
+ else:
55
+ gps_lat, gps_lon, gps_time = lat, lon, time
56
+
57
+ # Reformat values to numeric
58
+ gps_lat.values = pd.to_numeric(gps_lat, errors='coerce')
59
+ gps_lon.values = pd.to_numeric(gps_lon, errors='coerce')
60
+ gps_time.values = pd.to_numeric(gps_time, errors='coerce')
61
+
62
+ # Convert GPS positions to decimal degrees
63
+ if np.any((gps_lat <= 90) & (gps_lat > 0)):
64
+ gps_lat = convert_from_decimal_minutes(gps_lat, latitude)
65
+ gps_lon = convert_from_decimal_minutes(gps_lon, longitude)
66
+ else:
67
+ gps_lat = convert_from_degrees_and_decimal_minutes(gps_lat)
68
+ gps_lon = convert_from_degrees_and_decimal_minutes(gps_lon)
69
+
70
+ # Reassign GPS array attributes
71
+ gps_lat.attrs = lat_attrs
72
+ gps_lon.attrs = lon_attrs
73
+ gps_time.attrs = time_attrs
74
+
75
+ return gps_lat, gps_lon, gps_time
76
+
77
+
78
+ def filter(gps_lat: xr.DataArray,
79
+ gps_lon: xr.DataArray,
80
+ gps_alt: xr.DataArray
81
+ ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray]:
82
+ """ Filter GPS latitude, longitude and altitude based on the difference
83
+ to a baseline elevation. The baseline elevation is a gap-filled monthly
84
+ median elevation based on the inputted GPS altitude.
85
+
86
+ Parameters
87
+ ----------
88
+ gps_lat : xr.DataArray
89
+ GPS latitude
90
+ gps_lon : xr.DataArray
91
+ GPS longitude
92
+ gps_alt : xr.DataArray
93
+ GPS altitude values with a time dimension
94
+
95
+ Returns
96
+ ----------
97
+ gps_lat_filtered : xr.DataArray
98
+ Filtered latitude values
99
+ gps_lon_filtered : xr.DataArray
100
+ Filtered longitude values
101
+ gps_alt_filtered : xr.DataArray
102
+ Filtered altitude values
103
+ """
104
+ # Get altitude monthly median (at month start)
105
+ # This will serve as baseline elevations for filtering
106
+ ser = gps_alt.to_series()
107
+ monthly_median = ser.resample("MS").median()
108
+ baseline_elevation = (
109
+ monthly_median
110
+ .reindex(ser.index, method="nearest")
111
+ .ffill()
112
+ .bfill()
113
+ )
114
+
115
+ # Produce conditional mask
116
+ mask = (np.abs(gps_alt - baseline_elevation) < 100) | gps_alt.isnull()
117
+
118
+ # Apply mask
119
+ gps_lat_filtered = gps_lat.where(mask)
120
+ gps_lon_filtered = gps_lon.where(mask)
121
+ gps_alt_filtered = gps_alt.where(mask)
122
+
123
+ return gps_lat_filtered, gps_lon_filtered, gps_alt_filtered
124
+
125
+
126
+ def convert_from_degrees_and_decimal_minutes(gps):
127
+ """Convert positions (i.e. latitude, longitude) from degrees
128
+ and decimal minutes (ddmm.mmmm) to decimal degree values (DD)"""
129
+ return np.floor(gps / 100) + (gps / 100 - np.floor(gps / 100)) * 100 / 60
130
+
131
+
132
+ def convert_from_decimal_minutes(gps: xr.DataArray, pos: float
133
+ ) -> xr.DataArray:
134
+ """Convert decimal minutes (mm.mmmmm) to decimal degree
135
+ values (DD), using a predefined position to append values to.
136
+ Needed in the case of PROMICE v1 stations, where logger
137
+ programs saved positions only in decimal minutes."""
138
+ new_gps = np.sign(pos) * (gps + 100 * np.floor(np.abs(pos)))
139
+ return convert_from_degrees_and_decimal_minutes(new_gps)
140
+
141
+
142
+ def decode(gps_lat: xr.DataArray,
143
+ gps_lon: xr.DataArray,
144
+ gps_time: xr.DataArray
145
+ ) -> tuple[xr.DataArray,xr.DataArray,xr.DataArray]:
146
+ """Decode GPS information. This should be applied if gps information
147
+ consists of strings and not float values. GPS information is returned in
148
+ decimal degrees (ddmm.mmmm) format.
149
+
150
+ Parameters
151
+ ----------
152
+ gps_lat : `xr.DataArray`
153
+ GPS latitude
154
+ gps_lon : `xr.DataArray`
155
+ GPS longitude
156
+ gps_time : `xr.DataArray`
157
+ GPS time
158
+
159
+ Returns
160
+ -------
161
+ new_lat : `xr.DataArray`
162
+ Decoded GPS latitude
163
+ new_lon : `xr.DataArray`
164
+ Decoded GPS longitude
165
+ new_time : `xr.DataArray`
166
+ Decoded GPS time
167
+ """
168
+ # Pick the first non-null sample safely and detect decoding format
169
+ non_null = gps_lat.dropna(dim='time').values
170
+ sample_value = str(non_null[0])
171
+
172
+ try:
173
+ # Object decoding
174
+ if "NH" in sample_value:
175
+ new_lat = gps_object_decoder(gps_lat)
176
+ new_lon = gps_object_decoder(gps_lon)
177
+ new_time = gps_object_decoder(gps_time)
178
+ return new_lat, new_lon, new_time
179
+
180
+ # L-string decoding
181
+ elif "L" in sample_value:
182
+ logger.info("Found 'L' in GPS string; applying decode + scaling.")
183
+ new_lat = gps_l_string_decoder(gps_lat)
184
+ new_lon = gps_l_string_decoder(gps_lon)
185
+ new_time = gps_object_decoder(gps_time)
186
+ return new_lat, new_lon, new_time
187
+
188
+ # Unknown format, attempt to decode
189
+ else:
190
+ logger.info("Unknown GPS string format; attempting generic decode.")
191
+ new_lat = gps_object_decoder(gps_lat)
192
+ new_lon = gps_object_decoder(gps_lon)
193
+ new_time = gps_object_decoder(gps_time)
194
+ return new_lat, new_lon, new_time
195
+
196
+ except Exception as e:
197
+ logger.error(f"Failed to decode GPS data: {e!r} "
198
+ f"(dtype={gps_lat.dtype})")
199
+ return None, None, None
200
+
201
+
202
+ def gps_object_decoder(gps : xr.DataArray) -> xr.DataArray:
203
+ """GPS decoder for object array formatting. For example, PROMICE v2
204
+ stations should send information as 'NH6429.01544,WH04932.86061'
205
+ original formatting (NUK_L 2022); PROMICE v3 stations should send
206
+ coordinates as '6430,4916' (NUK_Uv3); and GC-Net stations should
207
+ send coordinates as '6628.93936',04617.59187' (DY2)"""
208
+ str2nums = [re.findall(r"[-+]?\d*\.\d+|\d+", _) if isinstance(_, str) else [np.nan] for _ in gps.values]
209
+ gps[:] = pd.DataFrame(str2nums).astype(float).T.values[0]
210
+ gps = gps.astype(float)
211
+ return gps
212
+
213
+
214
+ def gps_l_string_decoder(gps : xr.DataArray) -> xr.DataArray:
215
+ """GPS L-string decoder"""
216
+ # Convert from object array
217
+ gps = gps_object_decoder(gps)
218
+
219
+ # Convert from integer-like values to degrees
220
+ gps = gps/100000
221
+ return gps