pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (67) hide show
  1. pypromice/__init__.py +2 -0
  2. pypromice/{qc → core/qc}/github_data_issues.py +22 -13
  3. pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
  4. pypromice/{qc → core/qc}/persistence.py +22 -29
  5. pypromice/{process → core/qc}/value_clipping.py +3 -3
  6. pypromice/core/resampling.py +142 -0
  7. pypromice/core/variables/__init__.py +1 -0
  8. pypromice/core/variables/air_temperature.py +64 -0
  9. pypromice/core/variables/gps.py +221 -0
  10. pypromice/core/variables/humidity.py +111 -0
  11. pypromice/core/variables/precipitation.py +108 -0
  12. pypromice/core/variables/pressure_transducer_depth.py +79 -0
  13. pypromice/core/variables/radiation.py +422 -0
  14. pypromice/core/variables/station_boom_height.py +75 -0
  15. pypromice/core/variables/station_pose.py +375 -0
  16. pypromice/io/bufr/__init__.py +0 -0
  17. pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
  18. pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
  19. pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
  20. pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
  21. pypromice/io/ingest/__init__.py +0 -0
  22. pypromice/{utilities → io/ingest}/git.py +1 -3
  23. pypromice/io/ingest/l0.py +294 -0
  24. pypromice/io/ingest/l0_repository.py +103 -0
  25. pypromice/io/ingest/toa5.py +87 -0
  26. pypromice/{process → io}/write.py +1 -1
  27. pypromice/pipeline/L0toL1.py +291 -0
  28. pypromice/pipeline/L1toL2.py +233 -0
  29. pypromice/{process → pipeline}/L2toL3.py +113 -118
  30. pypromice/pipeline/__init__.py +4 -0
  31. pypromice/{process → pipeline}/aws.py +10 -82
  32. pypromice/{process → pipeline}/get_l2.py +2 -2
  33. pypromice/{process → pipeline}/get_l2tol3.py +19 -22
  34. pypromice/{process → pipeline}/join_l2.py +31 -32
  35. pypromice/{process → pipeline}/join_l3.py +16 -14
  36. pypromice/{process → pipeline}/resample.py +75 -51
  37. pypromice/{process → pipeline}/utilities.py +0 -22
  38. pypromice/resources/file_attributes.csv +4 -4
  39. pypromice/resources/variable_aliases_GC-Net.csv +2 -2
  40. pypromice/resources/variables.csv +27 -24
  41. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
  42. pypromice-1.7.0.dist-info/RECORD +65 -0
  43. pypromice-1.7.0.dist-info/entry_points.txt +12 -0
  44. pypromice/get/__init__.py +0 -1
  45. pypromice/get/get.py +0 -211
  46. pypromice/get/get_promice_data.py +0 -56
  47. pypromice/process/L0toL1.py +0 -564
  48. pypromice/process/L1toL2.py +0 -824
  49. pypromice/process/__init__.py +0 -4
  50. pypromice/process/load.py +0 -161
  51. pypromice-1.5.3.dist-info/RECORD +0 -54
  52. pypromice-1.5.3.dist-info/entry_points.txt +0 -13
  53. /pypromice/{postprocess → core}/__init__.py +0 -0
  54. /pypromice/{utilities → core}/dependency_graph.py +0 -0
  55. /pypromice/{qc → core/qc}/__init__.py +0 -0
  56. /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
  57. /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
  58. /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
  59. /pypromice/{process → core/variables}/wind.py +0 -0
  60. /pypromice/{utilities → io}/__init__.py +0 -0
  61. /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
  62. /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
  63. /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
  64. /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
  65. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
  66. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0
@@ -1,564 +0,0 @@
1
- #!/usr/bin/env python
2
- """
3
- AWS Level 0 (L0) to Level 1 (L1) data processing
4
- """
5
- import numpy as np
6
- import pandas as pd
7
- import xarray as xr
8
- import re, logging
9
- from pypromice.process.value_clipping import clip_values
10
- from pypromice.process import wind
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
15
- '''Process one Level 0 (L0) product to Level 1
16
-
17
- Parameters
18
- ----------
19
- L0 : xarray.Dataset
20
- Level 0 dataset
21
- vars_df : pd.DataFrame
22
- Metadata dataframe
23
- T_0 : int
24
- Air temperature for sonic ranger adjustment
25
- tilt_threshold : int
26
- Tilt-o-meter threshold for valid measurements
27
-
28
- Returns
29
- -------
30
- ds : xarray.Dataset
31
- Level 1 dataset
32
- '''
33
- assert(type(L0) == xr.Dataset)
34
- ds = L0
35
- ds.attrs['level'] = 'L1'
36
-
37
- for l in list(ds.keys()):
38
- if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
39
- ds[l] = _reformatArray(ds[l])
40
-
41
- # ds['time_orig'] = ds['time'] # Not used
42
-
43
- # The following drops duplicate datetime indices. Needs to run before _addTimeShift!
44
- # We can optionally also drop duplicates within _addTimeShift using pandas duplicated,
45
- # but retaining the following code instead to preserve previous methods. PJW
46
- _, index = np.unique(ds['time'], return_index=True)
47
- ds = ds.isel(time=index)
48
-
49
- # If we do not want to shift hourly average values back -1 hr, then comment the following line.
50
- ds = addTimeShift(ds, vars_df)
51
-
52
- if hasattr(ds, 'dsr_eng_coef'):
53
- ds['dsr'] = (ds['dsr'] * 10) / ds.attrs['dsr_eng_coef'] # Convert radiation from engineering to physical units
54
- if hasattr(ds, 'usr_eng_coef'): # TODO add metadata to indicate whether radiometer values are corrected with calibration values or not
55
- ds['usr'] = (ds['usr'] * 10) / ds.attrs['usr_eng_coef']
56
- if hasattr(ds, 'dlr_eng_coef'):
57
- ds['dlr'] = ((ds['dlr'] * 10) / ds.attrs['dlr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
58
- if hasattr(ds, 'ulr_eng_coef'):
59
- ds['ulr'] = ((ds['ulr'] * 10) / ds.attrs['ulr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
60
-
61
- ds['z_boom_u'] = _reformatArray(ds['z_boom_u']) # Reformat boom height
62
-
63
- ds['t_u_interp'] = interpTemp(ds['t_u'], vars_df)
64
- ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u_interp'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
65
-
66
- if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
67
- if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
68
- ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
69
- elif 'L' in ds['gps_lat'].dropna(dim='time').values[1]:
70
- logger.info('Found L in GPS string')
71
- ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
72
- for l in ['gps_lat', 'gps_lon']:
73
- ds[l] = ds[l]/100000
74
- else:
75
- try:
76
- ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time']) # TODO this is a work around specifically for L0 RAW processing for THU_U. Find a way to make this slicker
77
-
78
- except:
79
- print('Invalid GPS type {ds["gps_lat"].dtype} for decoding')
80
-
81
- for l in ['gps_lat', 'gps_lon', 'gps_alt','gps_time']:
82
- ds[l] = _reformatArray(ds[l])
83
-
84
- if hasattr(ds, 'latitude') and hasattr(ds, 'longitude'):
85
- ds['gps_lat'] = reformatGPS(ds['gps_lat'], ds.attrs['latitude'])
86
- ds['gps_lon'] = reformatGPS(ds['gps_lon'], ds.attrs['longitude'])
87
-
88
- if hasattr(ds, 'logger_type'): # Convert tilt voltage to degrees
89
- if ds.attrs['logger_type'].upper() == 'CR1000':
90
- ds['tilt_x'] = getTiltDegrees(ds['tilt_x'], tilt_threshold)
91
- ds['tilt_y'] = getTiltDegrees(ds['tilt_y'], tilt_threshold)
92
-
93
- if hasattr(ds, 'tilt_y_factor'): # Apply tilt factor (e.g. -1 will invert tilt angle)
94
- ds['tilt_y'] = ds['tilt_y']*ds.attrs['tilt_y_factor']
95
-
96
- # Smooth everything
97
- # Note that this should be OK for CR1000 tx (data only every 6 hrs),
98
- # since we interpolate above in _getTiltDegrees. PJW
99
- ds['tilt_x'] = smoothTilt(ds['tilt_x'], 7) # Smooth tilt
100
- ds['tilt_y'] = smoothTilt(ds['tilt_y'], 7)
101
-
102
- # Apply wind factor if provided
103
- # This is in the case of an anemometer rotations improperly translated to wind speed by the logger program
104
- if hasattr(ds, 'wind_u_coef'):
105
- logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_u_coef"]}')
106
- ds['wspd_u'] = wind.correct_wind_speed(ds['wspd_u'],
107
- ds.attrs['wind_u_coef'])
108
- if hasattr(ds, 'wind_l_coef'):
109
- logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_l_coef"]}')
110
- ds['wspd_l'] = wind.correct_wind_speed(ds['wspd_l'],
111
- ds.attrs['wind_l_coef'])
112
- if hasattr(ds, 'wind_i_coef'):
113
- logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_i_coef"]}')
114
- ds['wspd_i'] = wind.correct_wind_speed(ds['wspd_i'],
115
- ds.attrs['wind_i_coef'])
116
-
117
- # Handle cases where the bedrock attribute is incorrectly set
118
- if not 'bedrock' in ds.attrs:
119
- logger.warning('bedrock attribute is not set')
120
- ds.attrs['bedrock'] = False
121
- elif not isinstance(ds.attrs['bedrock'], bool):
122
- logger.warning(f'bedrock attribute is not boolean: {ds.attrs["bedrock"]}')
123
- ds.attrs['bedrock'] = str(ds.attrs['bedrock']).lower() == 'true'
124
-
125
- is_bedrock = ds.attrs['bedrock']
126
-
127
- if is_bedrock:
128
- # some bedrock stations (e.g. KAN_B) do not have tilt in L0 files
129
- # we need to create them manually
130
- for var in ['tilt_x','tilt_y']:
131
- if var not in ds.data_vars:
132
- ds[var] = (('time'), np.full(ds['time'].size, np.nan))
133
-
134
- # WEG_B has a non-null z_pt even though it is a berock station
135
- if ~ds['z_pt'].isnull().all(): # Calculate pressure transducer fluid density
136
- ds['z_pt'] = (('time'), np.full(ds['time'].size, np.nan))
137
- logger.info('Warning: Non-null data for z_pt at a bedrock site')
138
-
139
- if ds.attrs['number_of_booms']==1: # 1-boom processing
140
- if ~ds['z_pt'].isnull().all(): # Calculate pressure transducer fluid density
141
- if hasattr(ds, 'pt_z_offset'): # Apply SR50 stake offset
142
- ds['z_pt'] = ds['z_pt'] + int(ds.attrs['pt_z_offset'])
143
- ds['z_pt_cor'],ds['z_pt']=getPressDepth(ds['z_pt'], ds['p_u'],
144
- ds.attrs['pt_antifreeze'],
145
- ds.attrs['pt_z_factor'],
146
- ds.attrs['pt_z_coef'],
147
- ds.attrs['pt_z_p_coef'])
148
- ds['z_stake'] = _reformatArray(ds['z_stake']) # Reformat boom height
149
- ds['z_stake'] = ds['z_stake'] * ((ds['t_u'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
150
-
151
- elif ds.attrs['number_of_booms']==2: # 2-boom processing
152
- ds['z_boom_l'] = _reformatArray(ds['z_boom_l']) # Reformat boom height
153
- ds['t_l_interp'] = interpTemp(ds['t_l'], vars_df)
154
- ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l_interp']+ T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
155
-
156
- ds = clip_values(ds, vars_df)
157
- for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
158
- 'dlr_eng_coef', 'ulr_eng_coef', 'wind_u_coef','wind_l_coef',
159
- 'wind_i_coef', 'pt_z_coef', 'pt_z_p_coef', 'pt_z_factor',
160
- 'pt_antifreeze', 'boom_azimuth', 'nodata', 'conf', 'file']:
161
- ds.attrs.pop(key, None)
162
-
163
- return ds
164
-
165
- def addTimeShift(ds, vars_df):
166
- '''Shift times based on file format and logger type (shifting only hourly averaged values,
167
- and not instantaneous variables). For raw (10 min), all values are sampled instantaneously
168
- so do not shift. For STM (1 hour), values are averaged and assigned to end-of-hour by the
169
- logger, so shift by -1 hr. For TX (time frequency depends on v2 or v3) then time is shifted
170
- depending on logger type. We use the 'instantaneous_hourly' boolean from variables.csv to
171
- determine if a variable is considered instantaneous at hourly samples.
172
-
173
- This approach creates two separate sub-dataframes, one for hourly-averaged variables
174
- and another for instantaneous variables. The instantaneous dataframe should never be
175
- shifted. We apply shifting only to the hourly average dataframe, then concat the two
176
- dataframes back together.
177
-
178
- It is possible to use pandas merge or join instead of concat, there are equivalent methods
179
- in each. In this case, we use concat throughout.
180
-
181
- Fausto et al. 2021 specifies the convention of assigning hourly averages to start-of-hour,
182
- so we need to retain this unless clearly communicated to users.
183
-
184
- Parameters
185
- ----------
186
- ds : xarray.Dataset
187
- Dataset to apply time shift to
188
- vars_df : pd.DataFrame
189
- Metadata dataframe
190
-
191
- Returns
192
- -------
193
- ds_out : xarray.Dataset
194
- Dataset with shifted times
195
- '''
196
- df = ds.to_dataframe()
197
- # No need to drop duplicates here if performed prior to calling this function.
198
- # df = df[~df.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
199
- df['doy'] = df.index.dayofyear
200
- i_cols = [x for x in df.columns if x in vars_df.index and vars_df['instantaneous_hourly'][x] is True] # instantaneous only, list of columns
201
- df_i = df.filter(items=i_cols, axis=1) # instantaneous only dataframe
202
- df_a = df.drop(df_i.columns, axis=1) # hourly ave dataframe
203
-
204
- if ds.attrs['format'] == 'raw':
205
- # 10-minute data, no shifting
206
- df_out = df
207
- elif ds.attrs['format'] == 'STM':
208
- # hourly-averaged, non-transmitted
209
- # shift everything except instantaneous, any logger type
210
- df_a = df_a.shift(periods=-1, freq="h")
211
- df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
212
- df_out = df_out.sort_index()
213
- elif ds.attrs['format'] == 'TX':
214
- if ds.attrs['logger_type'] == 'CR1000X':
215
- # v3, data is hourly all year long
216
- # shift everything except instantaneous
217
- df_a = df_a.shift(periods=-1, freq="h")
218
- df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
219
- df_out = df_out.sort_index()
220
- elif ds.attrs['logger_type'] == 'CR1000':
221
- # v2, data is hourly (6-hr for instantaneous) for DOY 100-300, otherwise daily at 00 UTC
222
- # shift non-instantaneous hourly for DOY 100-300, else do not shift daily
223
- df_a_hourly = df_a.loc[(df_a['doy'] >= 100) & (df_a['doy'] <= 300)]
224
- # df_a_hourly = df_a.loc[df_a['doy'].between(100, 300, inclusive='both')] # equivalent to above
225
- df_a_daily_1 = df_a.loc[(df_a['doy'] < 100)]
226
- df_a_daily_2 = df_a.loc[(df_a['doy'] > 300)]
227
-
228
- # shift the hourly ave data
229
- df_a_hourly = df_a_hourly.shift(periods=-1, freq="h")
230
-
231
- # stitch everything back together
232
- df_concat_u = pd.concat([df_a_daily_1, df_a_daily_2, df_a_hourly], axis=0) # same columns, different datetime indices
233
- # It's now possible for df_concat_u to have duplicate datetime indices
234
- df_concat_u = df_concat_u[~df_concat_u.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
235
-
236
- df_out = pd.concat([df_concat_u, df_i], axis=1) # different columns, same datetime indices
237
- df_out = df_out.sort_index()
238
-
239
- # Back to xarray, and re-assign the original attrs
240
- df_out = df_out.drop('doy', axis=1)
241
- ds_out = df_out.to_xarray()
242
- ds_out = ds_out.assign_attrs(ds.attrs) # Dataset attrs
243
- for x in ds_out.data_vars: # variable-specific attrs
244
- ds_out[x].attrs = ds[x].attrs
245
-
246
- # equivalent to above:
247
- # vals = [xr.DataArray(data=df_out[c], dims=['time'], coords={'time':df_out.index}, attrs=ds[c].attrs) for c in df_out.columns]
248
- # ds_out = xr.Dataset(dict(zip(df_out.columns, vals)), attrs=ds.attrs)
249
- return ds_out
250
-
251
- def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
252
- '''Adjust pressure depth and calculate pressure transducer depth based on
253
- pressure transducer fluid density
254
-
255
- Parameters
256
- ----------
257
- z_pt : xr.Dataarray
258
- Pressure transducer height (corrected for offset)
259
- p : xr.Dataarray
260
- Air pressure
261
- pt_antifreeze : float
262
- Pressure transducer anti-freeze percentage for fluid density
263
- correction
264
- pt_z_factor : float
265
- Pressure transducer factor
266
- pt_z_coef : float
267
- Pressure transducer coefficient
268
- pt_z_p_coef : float
269
- Pressure transducer coefficient
270
-
271
- Returns
272
- -------
273
- z_pt_cor : xr.Dataarray
274
- Pressure transducer height corrected
275
- z_pt : xr.Dataarray
276
- Pressure transducer depth
277
- '''
278
- # Calculate pressure transducer fluid density
279
- if pt_antifreeze == 50: #TODO: Implement function w/ reference (analytical or from LUT)
280
- rho_af = 1092 #TODO: Track uncertainty
281
- elif pt_antifreeze == 100:
282
- rho_af = 1145
283
- else:
284
- rho_af = np.nan
285
- logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
286
- f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%')
287
- # assert(False)
288
-
289
- # Correct pressure depth
290
- z_pt_cor = z_pt * pt_z_coef * pt_z_factor * 998.0 / rho_af + 100 * (pt_z_p_coef - p) / (rho_af * 9.81)
291
-
292
- # Calculate pressure transducer depth
293
- z_pt = z_pt * pt_z_coef * pt_z_factor * 998.0 / rho_af
294
-
295
- return z_pt_cor, z_pt
296
-
297
-
298
- def interpTemp(temp, var_configurations, max_interp=pd.Timedelta(12,'h')):
299
- '''Clip and interpolate temperature dataset for use in corrections
300
-
301
- Parameters
302
- ----------
303
- temp : `xarray.DataArray`
304
- Array of temperature data
305
- vars_df : `pandas.DataFrame`
306
- Dataframe to retrieve attribute hi-lo values from for temperature clipping
307
- max_interp : `pandas.Timedelta`
308
- Maximum time steps to interpolate across. The default is 12 hours.
309
-
310
- Returns
311
- -------
312
- temp_interp : `xarray.DataArray`
313
- Array of interpolatedtemperature data
314
- '''
315
- # Determine if upper or lower temperature array
316
- var = temp.name.lower()
317
-
318
- # Find range threshold and use it to clip measurements
319
- cols = ["lo", "hi", "OOL"]
320
- assert set(cols) <= set(var_configurations.columns)
321
- variable_limits = var_configurations[cols].dropna(how="all")
322
- temp = temp.where(temp >= variable_limits.loc[var,'lo'])
323
- temp = temp.where(temp <= variable_limits.loc[var, 'hi'])
324
-
325
- # Drop duplicates and interpolate across NaN values
326
- # temp_interp = temp.drop_duplicates(dim='time', keep='first')
327
- temp_interp = temp.interpolate_na(dim='time', max_gap=max_interp)
328
-
329
- return temp_interp
330
-
331
-
332
- def smoothTilt(tilt, win_size):
333
- '''Smooth tilt values using a rolling window. This is translated from the
334
- previous IDL/GDL smoothing algorithm:
335
- tiltX = smooth(tiltX,7,/EDGE_MIRROR,MISSING=-999) & tiltY = smooth(tiltY,7,/EDGE_MIRROR, MISSING=-999)
336
- endif
337
- In Python, this should be
338
- dstxy = dstxy.rolling(time=7, win_type='boxcar', center=True).mean()
339
- But the EDGE_MIRROR makes it a bit more complicated
340
-
341
- Parameters
342
- ----------
343
- tilt : xarray.DataArray
344
- Array (either 'tilt_x' or 'tilt_y'), tilt values (can be in degrees or voltage)
345
- win_size : int
346
- Window size to use in pandas 'rolling' method.
347
- e.g. a value of 7 spans 70 minutes using 10 minute data.
348
-
349
- Returns
350
- -------
351
- tdf_rolling : tuple, as: (str, numpy.ndarray)
352
- The numpy array is the tilt values, smoothed with a rolling mean
353
- '''
354
- s = int(win_size/2)
355
- tdf = tilt.to_dataframe()
356
- mirror_start = tdf.iloc[:s][::-1]
357
- mirror_end = tdf.iloc[-s:][::-1]
358
- mirrored_tdf = pd.concat([mirror_start, tdf, mirror_end])
359
-
360
- tdf_rolling = (
361
- ('time'),
362
- mirrored_tdf.rolling(
363
- win_size, win_type='boxcar', min_periods=1, center=True
364
- ).mean()[s:-s].values.flatten()
365
- )
366
- return tdf_rolling
367
-
368
- def getTiltDegrees(tilt, threshold):
369
- '''Filter tilt with given threshold, and convert from voltage to degrees.
370
- Voltage-to-degrees converseion is based on the equation in 3.2.9 at
371
- https://essd.copernicus.org/articles/13/3819/2021/#section3
372
-
373
- Parameters
374
- ----------
375
- tilt : xarray.DataArray
376
- Array (either 'tilt_x' or 'tilt_y'), tilt values (voltage)
377
- threshold : int
378
- Values below this threshold (-100) will not be retained.
379
-
380
- Returns
381
- -------
382
- dst.interpolate_na() : xarray.DataArray
383
- Array (either 'tilt_x' or 'tilt_y'), tilt values (degrees)
384
- '''
385
- # notOKtiltX = where(tiltX lt -100, complement=OKtiltX) & notOKtiltY = where(tiltY lt -100, complement=OKtiltY)
386
- notOKtilt = (tilt < threshold)
387
- OKtilt = (tilt >= threshold)
388
- tilt = tilt / 10
389
-
390
- # IDL version:
391
- # tiltX = tiltX/10.
392
- # tiltnonzero = where(tiltX ne 0 and tiltX gt -40 and tiltX lt 40)
393
- # if n_elements(tiltnonzero) ne 1 then tiltX[tiltnonzero] = tiltX[tiltnonzero]/abs(tiltX[tiltnonzero])*(-0.49*(abs(tiltX[tiltnonzero]))^4 + 3.6*(abs(tiltX[tiltnonzero]))^3 - 10.4*(abs(tiltX[tiltnonzero]))^2 +21.1*(abs(tiltX[tiltnonzero])))
394
- # tiltY = tiltY/10.
395
- # tiltnonzero = where(tiltY ne 0 and tiltY gt -40 and tiltY lt 40)
396
- # if n_elements(tiltnonzero) ne 1 then tiltY[tiltnonzero] = tiltY[tiltnonzero]/abs(tiltY[tiltnonzero])*(-0.49*(abs(tiltY[tiltnonzero]))^4 + 3.6*(abs(tiltY[tiltnonzero]))^3 - 10.4*(abs(tiltY[tiltnonzero]))^2 +21.1*(abs(tiltY[tiltnonzero])))
397
-
398
- dst = tilt
399
- nz = (dst != 0) & (np.abs(dst) < 40)
400
-
401
- dst = dst.where(~nz, other = dst / np.abs(dst)
402
- * (-0.49
403
- * (np.abs(dst))**4 + 3.6
404
- * (np.abs(dst))**3 - 10.4
405
- * (np.abs(dst))**2 + 21.1
406
- * (np.abs(dst))))
407
-
408
- # if n_elements(OKtiltX) gt 1 then tiltX[notOKtiltX] = interpol(tiltX[OKtiltX],OKtiltX,notOKtiltX) ; Interpolate over gaps for radiation correction; set to -999 again below.
409
- dst = dst.where(~notOKtilt)
410
- return dst.interpolate_na(dim='time', use_coordinate=False) #TODO: Filling w/o considering time gaps to re-create IDL/GDL outputs. Should fill with coordinate not False. Also consider 'max_gap' option?
411
-
412
-
413
- def decodeGPS(ds, gps_names):
414
- '''Decode GPS information based on names of GPS attributes. This should be
415
- applied if gps information does not consist of float values
416
-
417
- Parameters
418
- ----------
419
- ds : xr.Dataset
420
- Data set
421
- gps_names : list
422
- Variable names for GPS information, such as "gps_lat", "gps_lon" and
423
- "gps_alt"
424
-
425
- Returns
426
- -------
427
- ds : xr.Dataset
428
- Data set with decoded GPS information
429
- '''
430
- for v in gps_names:
431
- a = ds[v].attrs
432
- str2nums = [re.findall(r"[-+]?\d*\.\d+|\d+", _) if isinstance(_, str) else [np.nan] for _ in ds[v].values]
433
- ds[v][:] = pd.DataFrame(str2nums).astype(float).T.values[0]
434
- ds[v] = ds[v].astype(float)
435
- ds[v].attrs = a
436
- return ds
437
-
438
- def reformatGPS(pos_arr, attrs):
439
- '''Correct latitude and longitude from native format to decimal degrees.
440
-
441
- v2 stations should send "NH6429.01544","WH04932.86061" (NUK_L 2022)
442
- v3 stations should send coordinates as "6628.93936","04617.59187" (DY2) or 6430,4916 (NUK_Uv3)
443
- decodeGPS should have decoded these strings to floats in ddmm.mmmm format
444
- v1 stations however only saved decimal minutes (mm.mmmmm) as float<=60. '
445
- In this case, we use the integer part of the latitude given in the config
446
- file and append the gps value after it.
447
-
448
- Parameters
449
- ----------
450
- pos_arr : xr.Dataarray
451
- Array of latitude or longitude measured by the GPS
452
- attrs : dict
453
- The global attribute 'latitude' or 'longitude' associated with the
454
- file being processed. It is the standard latitude/longitude given in the
455
- config file for that station.
456
-
457
- Returns
458
- -------
459
- pos_arr : xr.Dataarray
460
- Formatted GPS position array in decimal degree
461
- '''
462
- if np.any((pos_arr <= 90) & (pos_arr > 0)):
463
- # then pos_arr is in decimal minutes, so we add to it the integer
464
- # part of the latitude given in the config file x100
465
- # so that it reads ddmm.mmmmmm like for v2 and v3 files
466
- # Note that np.sign and np.attrs handles negative longitudes.
467
- pos_arr = np.sign(attrs) * (pos_arr + 100*np.floor(np.abs(attrs)))
468
- a = pos_arr.attrs
469
- pos_arr = np.floor(pos_arr / 100) + (pos_arr / 100 - np.floor(pos_arr / 100)) * 100 / 60
470
- pos_arr.attrs = a
471
- return pos_arr
472
-
473
- def _reformatArray(ds_arr):
474
- '''Reformat DataArray values and attributes
475
-
476
- Parameters
477
- ----------
478
- ds_arr : xr.Dataarray
479
- Data array
480
-
481
- Returns
482
- -------
483
- ds_arr : xr.Dataarray
484
- Formatted data array
485
- '''
486
- a = ds_arr.attrs # Store
487
- ds_arr.values = pd.to_numeric(ds_arr, errors='coerce')
488
- ds_arr.attrs = a # Reformat
489
- return ds_arr
490
-
491
- def _removeVars(ds, v_names):
492
- '''Remove redundant variables if present in dataset
493
-
494
- Parameters
495
- ----------
496
- ds : xr.Dataset
497
- Data set
498
- v_names : list
499
- List of column names to drop
500
-
501
- Returns
502
- -------
503
- ds : xr.Dataset
504
- Data set with removed variables
505
- '''
506
- for v in v_names:
507
- if v in list(ds.variables): ds = ds.drop_vars(v)
508
- return ds
509
-
510
- def _popCols(ds, booms, data_type, vars_df, cols):
511
- '''Populate data array columns with given variable names from look-up table
512
-
513
- Parameters
514
- ----------
515
- ds : xr.Dataset
516
- Data set
517
- booms : int
518
- Number of booms (1 or 2)
519
- data_type : str
520
- Type of data ("tx", "raw")
521
- vars_df : pd.DataFrame
522
- Variables lookup table
523
- cols : list
524
- Names of columns to populate
525
-
526
- Returns
527
- -------
528
- ds : xr.Dataset
529
- Data with populated columns
530
- '''
531
- if booms==1:
532
- names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
533
-
534
- elif booms==2:
535
- names = vars_df.loc[(vars_df[cols[0]]!='one-boom')]
536
-
537
- for v in list(names.index):
538
- if v not in list(ds.variables):
539
- ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
540
- return ds
541
-
542
- # def _popCols(ds, booms, data_type, vars_df, cols):
543
- # if booms==1:
544
- # if data_type !='TX':
545
- # names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
546
- # else:
547
- # names = vars_df.loc[(vars_df[cols[0]] != 'two-boom') & vars_df[cols[1]] != 'tx']
548
-
549
- # elif booms==2:
550
- # if data_type !='TX':
551
- # names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
552
- # else:
553
- # names = vars_df.loc[(vars_df[cols[0]] != 'two-boom') & vars_df[cols[1]] != 'tx']
554
-
555
- # for v in list(names.index):
556
- # if v not in list(ds.variables):
557
- # ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
558
- # return ds
559
-
560
- #------------------------------------------------------------------------------
561
-
562
- if __name__ == "__main__":
563
- # unittest.main()
564
- pass