pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/__init__.py +2 -0
- pypromice/{qc → core/qc}/github_data_issues.py +22 -13
- pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
- pypromice/{qc → core/qc}/persistence.py +22 -29
- pypromice/{process → core/qc}/value_clipping.py +3 -3
- pypromice/core/resampling.py +142 -0
- pypromice/core/variables/__init__.py +1 -0
- pypromice/core/variables/air_temperature.py +64 -0
- pypromice/core/variables/gps.py +221 -0
- pypromice/core/variables/humidity.py +111 -0
- pypromice/core/variables/precipitation.py +108 -0
- pypromice/core/variables/pressure_transducer_depth.py +79 -0
- pypromice/core/variables/radiation.py +422 -0
- pypromice/core/variables/station_boom_height.py +75 -0
- pypromice/core/variables/station_pose.py +375 -0
- pypromice/io/bufr/__init__.py +0 -0
- pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
- pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
- pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
- pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
- pypromice/io/ingest/__init__.py +0 -0
- pypromice/{utilities → io/ingest}/git.py +1 -3
- pypromice/io/ingest/l0.py +294 -0
- pypromice/io/ingest/l0_repository.py +103 -0
- pypromice/io/ingest/toa5.py +87 -0
- pypromice/{process → io}/write.py +1 -1
- pypromice/pipeline/L0toL1.py +291 -0
- pypromice/pipeline/L1toL2.py +233 -0
- pypromice/{process → pipeline}/L2toL3.py +113 -118
- pypromice/pipeline/__init__.py +4 -0
- pypromice/{process → pipeline}/aws.py +10 -82
- pypromice/{process → pipeline}/get_l2.py +2 -2
- pypromice/{process → pipeline}/get_l2tol3.py +19 -22
- pypromice/{process → pipeline}/join_l2.py +31 -32
- pypromice/{process → pipeline}/join_l3.py +16 -14
- pypromice/{process → pipeline}/resample.py +75 -51
- pypromice/{process → pipeline}/utilities.py +0 -22
- pypromice/resources/file_attributes.csv +4 -4
- pypromice/resources/variable_aliases_GC-Net.csv +2 -2
- pypromice/resources/variables.csv +27 -24
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
- pypromice-1.7.0.dist-info/RECORD +65 -0
- pypromice-1.7.0.dist-info/entry_points.txt +12 -0
- pypromice/get/__init__.py +0 -1
- pypromice/get/get.py +0 -211
- pypromice/get/get_promice_data.py +0 -56
- pypromice/process/L0toL1.py +0 -564
- pypromice/process/L1toL2.py +0 -824
- pypromice/process/__init__.py +0 -4
- pypromice/process/load.py +0 -161
- pypromice-1.5.3.dist-info/RECORD +0 -54
- pypromice-1.5.3.dist-info/entry_points.txt +0 -13
- /pypromice/{postprocess → core}/__init__.py +0 -0
- /pypromice/{utilities → core}/dependency_graph.py +0 -0
- /pypromice/{qc → core/qc}/__init__.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
- /pypromice/{process → core/variables}/wind.py +0 -0
- /pypromice/{utilities → io}/__init__.py +0 -0
- /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
- /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
- /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
- /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0
pypromice/process/L0toL1.py
DELETED
|
@@ -1,564 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
"""
|
|
3
|
-
AWS Level 0 (L0) to Level 1 (L1) data processing
|
|
4
|
-
"""
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import xarray as xr
|
|
8
|
-
import re, logging
|
|
9
|
-
from pypromice.process.value_clipping import clip_values
|
|
10
|
-
from pypromice.process import wind
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
|
|
15
|
-
'''Process one Level 0 (L0) product to Level 1
|
|
16
|
-
|
|
17
|
-
Parameters
|
|
18
|
-
----------
|
|
19
|
-
L0 : xarray.Dataset
|
|
20
|
-
Level 0 dataset
|
|
21
|
-
vars_df : pd.DataFrame
|
|
22
|
-
Metadata dataframe
|
|
23
|
-
T_0 : int
|
|
24
|
-
Air temperature for sonic ranger adjustment
|
|
25
|
-
tilt_threshold : int
|
|
26
|
-
Tilt-o-meter threshold for valid measurements
|
|
27
|
-
|
|
28
|
-
Returns
|
|
29
|
-
-------
|
|
30
|
-
ds : xarray.Dataset
|
|
31
|
-
Level 1 dataset
|
|
32
|
-
'''
|
|
33
|
-
assert(type(L0) == xr.Dataset)
|
|
34
|
-
ds = L0
|
|
35
|
-
ds.attrs['level'] = 'L1'
|
|
36
|
-
|
|
37
|
-
for l in list(ds.keys()):
|
|
38
|
-
if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
|
|
39
|
-
ds[l] = _reformatArray(ds[l])
|
|
40
|
-
|
|
41
|
-
# ds['time_orig'] = ds['time'] # Not used
|
|
42
|
-
|
|
43
|
-
# The following drops duplicate datetime indices. Needs to run before _addTimeShift!
|
|
44
|
-
# We can optionally also drop duplicates within _addTimeShift using pandas duplicated,
|
|
45
|
-
# but retaining the following code instead to preserve previous methods. PJW
|
|
46
|
-
_, index = np.unique(ds['time'], return_index=True)
|
|
47
|
-
ds = ds.isel(time=index)
|
|
48
|
-
|
|
49
|
-
# If we do not want to shift hourly average values back -1 hr, then comment the following line.
|
|
50
|
-
ds = addTimeShift(ds, vars_df)
|
|
51
|
-
|
|
52
|
-
if hasattr(ds, 'dsr_eng_coef'):
|
|
53
|
-
ds['dsr'] = (ds['dsr'] * 10) / ds.attrs['dsr_eng_coef'] # Convert radiation from engineering to physical units
|
|
54
|
-
if hasattr(ds, 'usr_eng_coef'): # TODO add metadata to indicate whether radiometer values are corrected with calibration values or not
|
|
55
|
-
ds['usr'] = (ds['usr'] * 10) / ds.attrs['usr_eng_coef']
|
|
56
|
-
if hasattr(ds, 'dlr_eng_coef'):
|
|
57
|
-
ds['dlr'] = ((ds['dlr'] * 10) / ds.attrs['dlr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
|
|
58
|
-
if hasattr(ds, 'ulr_eng_coef'):
|
|
59
|
-
ds['ulr'] = ((ds['ulr'] * 10) / ds.attrs['ulr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
|
|
60
|
-
|
|
61
|
-
ds['z_boom_u'] = _reformatArray(ds['z_boom_u']) # Reformat boom height
|
|
62
|
-
|
|
63
|
-
ds['t_u_interp'] = interpTemp(ds['t_u'], vars_df)
|
|
64
|
-
ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u_interp'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
|
|
65
|
-
|
|
66
|
-
if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
|
|
67
|
-
if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
|
|
68
|
-
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
|
|
69
|
-
elif 'L' in ds['gps_lat'].dropna(dim='time').values[1]:
|
|
70
|
-
logger.info('Found L in GPS string')
|
|
71
|
-
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
|
|
72
|
-
for l in ['gps_lat', 'gps_lon']:
|
|
73
|
-
ds[l] = ds[l]/100000
|
|
74
|
-
else:
|
|
75
|
-
try:
|
|
76
|
-
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time']) # TODO this is a work around specifically for L0 RAW processing for THU_U. Find a way to make this slicker
|
|
77
|
-
|
|
78
|
-
except:
|
|
79
|
-
print('Invalid GPS type {ds["gps_lat"].dtype} for decoding')
|
|
80
|
-
|
|
81
|
-
for l in ['gps_lat', 'gps_lon', 'gps_alt','gps_time']:
|
|
82
|
-
ds[l] = _reformatArray(ds[l])
|
|
83
|
-
|
|
84
|
-
if hasattr(ds, 'latitude') and hasattr(ds, 'longitude'):
|
|
85
|
-
ds['gps_lat'] = reformatGPS(ds['gps_lat'], ds.attrs['latitude'])
|
|
86
|
-
ds['gps_lon'] = reformatGPS(ds['gps_lon'], ds.attrs['longitude'])
|
|
87
|
-
|
|
88
|
-
if hasattr(ds, 'logger_type'): # Convert tilt voltage to degrees
|
|
89
|
-
if ds.attrs['logger_type'].upper() == 'CR1000':
|
|
90
|
-
ds['tilt_x'] = getTiltDegrees(ds['tilt_x'], tilt_threshold)
|
|
91
|
-
ds['tilt_y'] = getTiltDegrees(ds['tilt_y'], tilt_threshold)
|
|
92
|
-
|
|
93
|
-
if hasattr(ds, 'tilt_y_factor'): # Apply tilt factor (e.g. -1 will invert tilt angle)
|
|
94
|
-
ds['tilt_y'] = ds['tilt_y']*ds.attrs['tilt_y_factor']
|
|
95
|
-
|
|
96
|
-
# Smooth everything
|
|
97
|
-
# Note that this should be OK for CR1000 tx (data only every 6 hrs),
|
|
98
|
-
# since we interpolate above in _getTiltDegrees. PJW
|
|
99
|
-
ds['tilt_x'] = smoothTilt(ds['tilt_x'], 7) # Smooth tilt
|
|
100
|
-
ds['tilt_y'] = smoothTilt(ds['tilt_y'], 7)
|
|
101
|
-
|
|
102
|
-
# Apply wind factor if provided
|
|
103
|
-
# This is in the case of an anemometer rotations improperly translated to wind speed by the logger program
|
|
104
|
-
if hasattr(ds, 'wind_u_coef'):
|
|
105
|
-
logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_u_coef"]}')
|
|
106
|
-
ds['wspd_u'] = wind.correct_wind_speed(ds['wspd_u'],
|
|
107
|
-
ds.attrs['wind_u_coef'])
|
|
108
|
-
if hasattr(ds, 'wind_l_coef'):
|
|
109
|
-
logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_l_coef"]}')
|
|
110
|
-
ds['wspd_l'] = wind.correct_wind_speed(ds['wspd_l'],
|
|
111
|
-
ds.attrs['wind_l_coef'])
|
|
112
|
-
if hasattr(ds, 'wind_i_coef'):
|
|
113
|
-
logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_i_coef"]}')
|
|
114
|
-
ds['wspd_i'] = wind.correct_wind_speed(ds['wspd_i'],
|
|
115
|
-
ds.attrs['wind_i_coef'])
|
|
116
|
-
|
|
117
|
-
# Handle cases where the bedrock attribute is incorrectly set
|
|
118
|
-
if not 'bedrock' in ds.attrs:
|
|
119
|
-
logger.warning('bedrock attribute is not set')
|
|
120
|
-
ds.attrs['bedrock'] = False
|
|
121
|
-
elif not isinstance(ds.attrs['bedrock'], bool):
|
|
122
|
-
logger.warning(f'bedrock attribute is not boolean: {ds.attrs["bedrock"]}')
|
|
123
|
-
ds.attrs['bedrock'] = str(ds.attrs['bedrock']).lower() == 'true'
|
|
124
|
-
|
|
125
|
-
is_bedrock = ds.attrs['bedrock']
|
|
126
|
-
|
|
127
|
-
if is_bedrock:
|
|
128
|
-
# some bedrock stations (e.g. KAN_B) do not have tilt in L0 files
|
|
129
|
-
# we need to create them manually
|
|
130
|
-
for var in ['tilt_x','tilt_y']:
|
|
131
|
-
if var not in ds.data_vars:
|
|
132
|
-
ds[var] = (('time'), np.full(ds['time'].size, np.nan))
|
|
133
|
-
|
|
134
|
-
# WEG_B has a non-null z_pt even though it is a berock station
|
|
135
|
-
if ~ds['z_pt'].isnull().all(): # Calculate pressure transducer fluid density
|
|
136
|
-
ds['z_pt'] = (('time'), np.full(ds['time'].size, np.nan))
|
|
137
|
-
logger.info('Warning: Non-null data for z_pt at a bedrock site')
|
|
138
|
-
|
|
139
|
-
if ds.attrs['number_of_booms']==1: # 1-boom processing
|
|
140
|
-
if ~ds['z_pt'].isnull().all(): # Calculate pressure transducer fluid density
|
|
141
|
-
if hasattr(ds, 'pt_z_offset'): # Apply SR50 stake offset
|
|
142
|
-
ds['z_pt'] = ds['z_pt'] + int(ds.attrs['pt_z_offset'])
|
|
143
|
-
ds['z_pt_cor'],ds['z_pt']=getPressDepth(ds['z_pt'], ds['p_u'],
|
|
144
|
-
ds.attrs['pt_antifreeze'],
|
|
145
|
-
ds.attrs['pt_z_factor'],
|
|
146
|
-
ds.attrs['pt_z_coef'],
|
|
147
|
-
ds.attrs['pt_z_p_coef'])
|
|
148
|
-
ds['z_stake'] = _reformatArray(ds['z_stake']) # Reformat boom height
|
|
149
|
-
ds['z_stake'] = ds['z_stake'] * ((ds['t_u'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
|
|
150
|
-
|
|
151
|
-
elif ds.attrs['number_of_booms']==2: # 2-boom processing
|
|
152
|
-
ds['z_boom_l'] = _reformatArray(ds['z_boom_l']) # Reformat boom height
|
|
153
|
-
ds['t_l_interp'] = interpTemp(ds['t_l'], vars_df)
|
|
154
|
-
ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l_interp']+ T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
|
|
155
|
-
|
|
156
|
-
ds = clip_values(ds, vars_df)
|
|
157
|
-
for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
|
|
158
|
-
'dlr_eng_coef', 'ulr_eng_coef', 'wind_u_coef','wind_l_coef',
|
|
159
|
-
'wind_i_coef', 'pt_z_coef', 'pt_z_p_coef', 'pt_z_factor',
|
|
160
|
-
'pt_antifreeze', 'boom_azimuth', 'nodata', 'conf', 'file']:
|
|
161
|
-
ds.attrs.pop(key, None)
|
|
162
|
-
|
|
163
|
-
return ds
|
|
164
|
-
|
|
165
|
-
def addTimeShift(ds, vars_df):
|
|
166
|
-
'''Shift times based on file format and logger type (shifting only hourly averaged values,
|
|
167
|
-
and not instantaneous variables). For raw (10 min), all values are sampled instantaneously
|
|
168
|
-
so do not shift. For STM (1 hour), values are averaged and assigned to end-of-hour by the
|
|
169
|
-
logger, so shift by -1 hr. For TX (time frequency depends on v2 or v3) then time is shifted
|
|
170
|
-
depending on logger type. We use the 'instantaneous_hourly' boolean from variables.csv to
|
|
171
|
-
determine if a variable is considered instantaneous at hourly samples.
|
|
172
|
-
|
|
173
|
-
This approach creates two separate sub-dataframes, one for hourly-averaged variables
|
|
174
|
-
and another for instantaneous variables. The instantaneous dataframe should never be
|
|
175
|
-
shifted. We apply shifting only to the hourly average dataframe, then concat the two
|
|
176
|
-
dataframes back together.
|
|
177
|
-
|
|
178
|
-
It is possible to use pandas merge or join instead of concat, there are equivalent methods
|
|
179
|
-
in each. In this case, we use concat throughout.
|
|
180
|
-
|
|
181
|
-
Fausto et al. 2021 specifies the convention of assigning hourly averages to start-of-hour,
|
|
182
|
-
so we need to retain this unless clearly communicated to users.
|
|
183
|
-
|
|
184
|
-
Parameters
|
|
185
|
-
----------
|
|
186
|
-
ds : xarray.Dataset
|
|
187
|
-
Dataset to apply time shift to
|
|
188
|
-
vars_df : pd.DataFrame
|
|
189
|
-
Metadata dataframe
|
|
190
|
-
|
|
191
|
-
Returns
|
|
192
|
-
-------
|
|
193
|
-
ds_out : xarray.Dataset
|
|
194
|
-
Dataset with shifted times
|
|
195
|
-
'''
|
|
196
|
-
df = ds.to_dataframe()
|
|
197
|
-
# No need to drop duplicates here if performed prior to calling this function.
|
|
198
|
-
# df = df[~df.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
|
|
199
|
-
df['doy'] = df.index.dayofyear
|
|
200
|
-
i_cols = [x for x in df.columns if x in vars_df.index and vars_df['instantaneous_hourly'][x] is True] # instantaneous only, list of columns
|
|
201
|
-
df_i = df.filter(items=i_cols, axis=1) # instantaneous only dataframe
|
|
202
|
-
df_a = df.drop(df_i.columns, axis=1) # hourly ave dataframe
|
|
203
|
-
|
|
204
|
-
if ds.attrs['format'] == 'raw':
|
|
205
|
-
# 10-minute data, no shifting
|
|
206
|
-
df_out = df
|
|
207
|
-
elif ds.attrs['format'] == 'STM':
|
|
208
|
-
# hourly-averaged, non-transmitted
|
|
209
|
-
# shift everything except instantaneous, any logger type
|
|
210
|
-
df_a = df_a.shift(periods=-1, freq="h")
|
|
211
|
-
df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
|
|
212
|
-
df_out = df_out.sort_index()
|
|
213
|
-
elif ds.attrs['format'] == 'TX':
|
|
214
|
-
if ds.attrs['logger_type'] == 'CR1000X':
|
|
215
|
-
# v3, data is hourly all year long
|
|
216
|
-
# shift everything except instantaneous
|
|
217
|
-
df_a = df_a.shift(periods=-1, freq="h")
|
|
218
|
-
df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
|
|
219
|
-
df_out = df_out.sort_index()
|
|
220
|
-
elif ds.attrs['logger_type'] == 'CR1000':
|
|
221
|
-
# v2, data is hourly (6-hr for instantaneous) for DOY 100-300, otherwise daily at 00 UTC
|
|
222
|
-
# shift non-instantaneous hourly for DOY 100-300, else do not shift daily
|
|
223
|
-
df_a_hourly = df_a.loc[(df_a['doy'] >= 100) & (df_a['doy'] <= 300)]
|
|
224
|
-
# df_a_hourly = df_a.loc[df_a['doy'].between(100, 300, inclusive='both')] # equivalent to above
|
|
225
|
-
df_a_daily_1 = df_a.loc[(df_a['doy'] < 100)]
|
|
226
|
-
df_a_daily_2 = df_a.loc[(df_a['doy'] > 300)]
|
|
227
|
-
|
|
228
|
-
# shift the hourly ave data
|
|
229
|
-
df_a_hourly = df_a_hourly.shift(periods=-1, freq="h")
|
|
230
|
-
|
|
231
|
-
# stitch everything back together
|
|
232
|
-
df_concat_u = pd.concat([df_a_daily_1, df_a_daily_2, df_a_hourly], axis=0) # same columns, different datetime indices
|
|
233
|
-
# It's now possible for df_concat_u to have duplicate datetime indices
|
|
234
|
-
df_concat_u = df_concat_u[~df_concat_u.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
|
|
235
|
-
|
|
236
|
-
df_out = pd.concat([df_concat_u, df_i], axis=1) # different columns, same datetime indices
|
|
237
|
-
df_out = df_out.sort_index()
|
|
238
|
-
|
|
239
|
-
# Back to xarray, and re-assign the original attrs
|
|
240
|
-
df_out = df_out.drop('doy', axis=1)
|
|
241
|
-
ds_out = df_out.to_xarray()
|
|
242
|
-
ds_out = ds_out.assign_attrs(ds.attrs) # Dataset attrs
|
|
243
|
-
for x in ds_out.data_vars: # variable-specific attrs
|
|
244
|
-
ds_out[x].attrs = ds[x].attrs
|
|
245
|
-
|
|
246
|
-
# equivalent to above:
|
|
247
|
-
# vals = [xr.DataArray(data=df_out[c], dims=['time'], coords={'time':df_out.index}, attrs=ds[c].attrs) for c in df_out.columns]
|
|
248
|
-
# ds_out = xr.Dataset(dict(zip(df_out.columns, vals)), attrs=ds.attrs)
|
|
249
|
-
return ds_out
|
|
250
|
-
|
|
251
|
-
def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
|
|
252
|
-
'''Adjust pressure depth and calculate pressure transducer depth based on
|
|
253
|
-
pressure transducer fluid density
|
|
254
|
-
|
|
255
|
-
Parameters
|
|
256
|
-
----------
|
|
257
|
-
z_pt : xr.Dataarray
|
|
258
|
-
Pressure transducer height (corrected for offset)
|
|
259
|
-
p : xr.Dataarray
|
|
260
|
-
Air pressure
|
|
261
|
-
pt_antifreeze : float
|
|
262
|
-
Pressure transducer anti-freeze percentage for fluid density
|
|
263
|
-
correction
|
|
264
|
-
pt_z_factor : float
|
|
265
|
-
Pressure transducer factor
|
|
266
|
-
pt_z_coef : float
|
|
267
|
-
Pressure transducer coefficient
|
|
268
|
-
pt_z_p_coef : float
|
|
269
|
-
Pressure transducer coefficient
|
|
270
|
-
|
|
271
|
-
Returns
|
|
272
|
-
-------
|
|
273
|
-
z_pt_cor : xr.Dataarray
|
|
274
|
-
Pressure transducer height corrected
|
|
275
|
-
z_pt : xr.Dataarray
|
|
276
|
-
Pressure transducer depth
|
|
277
|
-
'''
|
|
278
|
-
# Calculate pressure transducer fluid density
|
|
279
|
-
if pt_antifreeze == 50: #TODO: Implement function w/ reference (analytical or from LUT)
|
|
280
|
-
rho_af = 1092 #TODO: Track uncertainty
|
|
281
|
-
elif pt_antifreeze == 100:
|
|
282
|
-
rho_af = 1145
|
|
283
|
-
else:
|
|
284
|
-
rho_af = np.nan
|
|
285
|
-
logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
|
|
286
|
-
f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%')
|
|
287
|
-
# assert(False)
|
|
288
|
-
|
|
289
|
-
# Correct pressure depth
|
|
290
|
-
z_pt_cor = z_pt * pt_z_coef * pt_z_factor * 998.0 / rho_af + 100 * (pt_z_p_coef - p) / (rho_af * 9.81)
|
|
291
|
-
|
|
292
|
-
# Calculate pressure transducer depth
|
|
293
|
-
z_pt = z_pt * pt_z_coef * pt_z_factor * 998.0 / rho_af
|
|
294
|
-
|
|
295
|
-
return z_pt_cor, z_pt
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
def interpTemp(temp, var_configurations, max_interp=pd.Timedelta(12,'h')):
|
|
299
|
-
'''Clip and interpolate temperature dataset for use in corrections
|
|
300
|
-
|
|
301
|
-
Parameters
|
|
302
|
-
----------
|
|
303
|
-
temp : `xarray.DataArray`
|
|
304
|
-
Array of temperature data
|
|
305
|
-
vars_df : `pandas.DataFrame`
|
|
306
|
-
Dataframe to retrieve attribute hi-lo values from for temperature clipping
|
|
307
|
-
max_interp : `pandas.Timedelta`
|
|
308
|
-
Maximum time steps to interpolate across. The default is 12 hours.
|
|
309
|
-
|
|
310
|
-
Returns
|
|
311
|
-
-------
|
|
312
|
-
temp_interp : `xarray.DataArray`
|
|
313
|
-
Array of interpolatedtemperature data
|
|
314
|
-
'''
|
|
315
|
-
# Determine if upper or lower temperature array
|
|
316
|
-
var = temp.name.lower()
|
|
317
|
-
|
|
318
|
-
# Find range threshold and use it to clip measurements
|
|
319
|
-
cols = ["lo", "hi", "OOL"]
|
|
320
|
-
assert set(cols) <= set(var_configurations.columns)
|
|
321
|
-
variable_limits = var_configurations[cols].dropna(how="all")
|
|
322
|
-
temp = temp.where(temp >= variable_limits.loc[var,'lo'])
|
|
323
|
-
temp = temp.where(temp <= variable_limits.loc[var, 'hi'])
|
|
324
|
-
|
|
325
|
-
# Drop duplicates and interpolate across NaN values
|
|
326
|
-
# temp_interp = temp.drop_duplicates(dim='time', keep='first')
|
|
327
|
-
temp_interp = temp.interpolate_na(dim='time', max_gap=max_interp)
|
|
328
|
-
|
|
329
|
-
return temp_interp
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
def smoothTilt(tilt, win_size):
|
|
333
|
-
'''Smooth tilt values using a rolling window. This is translated from the
|
|
334
|
-
previous IDL/GDL smoothing algorithm:
|
|
335
|
-
tiltX = smooth(tiltX,7,/EDGE_MIRROR,MISSING=-999) & tiltY = smooth(tiltY,7,/EDGE_MIRROR, MISSING=-999)
|
|
336
|
-
endif
|
|
337
|
-
In Python, this should be
|
|
338
|
-
dstxy = dstxy.rolling(time=7, win_type='boxcar', center=True).mean()
|
|
339
|
-
But the EDGE_MIRROR makes it a bit more complicated
|
|
340
|
-
|
|
341
|
-
Parameters
|
|
342
|
-
----------
|
|
343
|
-
tilt : xarray.DataArray
|
|
344
|
-
Array (either 'tilt_x' or 'tilt_y'), tilt values (can be in degrees or voltage)
|
|
345
|
-
win_size : int
|
|
346
|
-
Window size to use in pandas 'rolling' method.
|
|
347
|
-
e.g. a value of 7 spans 70 minutes using 10 minute data.
|
|
348
|
-
|
|
349
|
-
Returns
|
|
350
|
-
-------
|
|
351
|
-
tdf_rolling : tuple, as: (str, numpy.ndarray)
|
|
352
|
-
The numpy array is the tilt values, smoothed with a rolling mean
|
|
353
|
-
'''
|
|
354
|
-
s = int(win_size/2)
|
|
355
|
-
tdf = tilt.to_dataframe()
|
|
356
|
-
mirror_start = tdf.iloc[:s][::-1]
|
|
357
|
-
mirror_end = tdf.iloc[-s:][::-1]
|
|
358
|
-
mirrored_tdf = pd.concat([mirror_start, tdf, mirror_end])
|
|
359
|
-
|
|
360
|
-
tdf_rolling = (
|
|
361
|
-
('time'),
|
|
362
|
-
mirrored_tdf.rolling(
|
|
363
|
-
win_size, win_type='boxcar', min_periods=1, center=True
|
|
364
|
-
).mean()[s:-s].values.flatten()
|
|
365
|
-
)
|
|
366
|
-
return tdf_rolling
|
|
367
|
-
|
|
368
|
-
def getTiltDegrees(tilt, threshold):
|
|
369
|
-
'''Filter tilt with given threshold, and convert from voltage to degrees.
|
|
370
|
-
Voltage-to-degrees converseion is based on the equation in 3.2.9 at
|
|
371
|
-
https://essd.copernicus.org/articles/13/3819/2021/#section3
|
|
372
|
-
|
|
373
|
-
Parameters
|
|
374
|
-
----------
|
|
375
|
-
tilt : xarray.DataArray
|
|
376
|
-
Array (either 'tilt_x' or 'tilt_y'), tilt values (voltage)
|
|
377
|
-
threshold : int
|
|
378
|
-
Values below this threshold (-100) will not be retained.
|
|
379
|
-
|
|
380
|
-
Returns
|
|
381
|
-
-------
|
|
382
|
-
dst.interpolate_na() : xarray.DataArray
|
|
383
|
-
Array (either 'tilt_x' or 'tilt_y'), tilt values (degrees)
|
|
384
|
-
'''
|
|
385
|
-
# notOKtiltX = where(tiltX lt -100, complement=OKtiltX) & notOKtiltY = where(tiltY lt -100, complement=OKtiltY)
|
|
386
|
-
notOKtilt = (tilt < threshold)
|
|
387
|
-
OKtilt = (tilt >= threshold)
|
|
388
|
-
tilt = tilt / 10
|
|
389
|
-
|
|
390
|
-
# IDL version:
|
|
391
|
-
# tiltX = tiltX/10.
|
|
392
|
-
# tiltnonzero = where(tiltX ne 0 and tiltX gt -40 and tiltX lt 40)
|
|
393
|
-
# if n_elements(tiltnonzero) ne 1 then tiltX[tiltnonzero] = tiltX[tiltnonzero]/abs(tiltX[tiltnonzero])*(-0.49*(abs(tiltX[tiltnonzero]))^4 + 3.6*(abs(tiltX[tiltnonzero]))^3 - 10.4*(abs(tiltX[tiltnonzero]))^2 +21.1*(abs(tiltX[tiltnonzero])))
|
|
394
|
-
# tiltY = tiltY/10.
|
|
395
|
-
# tiltnonzero = where(tiltY ne 0 and tiltY gt -40 and tiltY lt 40)
|
|
396
|
-
# if n_elements(tiltnonzero) ne 1 then tiltY[tiltnonzero] = tiltY[tiltnonzero]/abs(tiltY[tiltnonzero])*(-0.49*(abs(tiltY[tiltnonzero]))^4 + 3.6*(abs(tiltY[tiltnonzero]))^3 - 10.4*(abs(tiltY[tiltnonzero]))^2 +21.1*(abs(tiltY[tiltnonzero])))
|
|
397
|
-
|
|
398
|
-
dst = tilt
|
|
399
|
-
nz = (dst != 0) & (np.abs(dst) < 40)
|
|
400
|
-
|
|
401
|
-
dst = dst.where(~nz, other = dst / np.abs(dst)
|
|
402
|
-
* (-0.49
|
|
403
|
-
* (np.abs(dst))**4 + 3.6
|
|
404
|
-
* (np.abs(dst))**3 - 10.4
|
|
405
|
-
* (np.abs(dst))**2 + 21.1
|
|
406
|
-
* (np.abs(dst))))
|
|
407
|
-
|
|
408
|
-
# if n_elements(OKtiltX) gt 1 then tiltX[notOKtiltX] = interpol(tiltX[OKtiltX],OKtiltX,notOKtiltX) ; Interpolate over gaps for radiation correction; set to -999 again below.
|
|
409
|
-
dst = dst.where(~notOKtilt)
|
|
410
|
-
return dst.interpolate_na(dim='time', use_coordinate=False) #TODO: Filling w/o considering time gaps to re-create IDL/GDL outputs. Should fill with coordinate not False. Also consider 'max_gap' option?
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
def decodeGPS(ds, gps_names):
|
|
414
|
-
'''Decode GPS information based on names of GPS attributes. This should be
|
|
415
|
-
applied if gps information does not consist of float values
|
|
416
|
-
|
|
417
|
-
Parameters
|
|
418
|
-
----------
|
|
419
|
-
ds : xr.Dataset
|
|
420
|
-
Data set
|
|
421
|
-
gps_names : list
|
|
422
|
-
Variable names for GPS information, such as "gps_lat", "gps_lon" and
|
|
423
|
-
"gps_alt"
|
|
424
|
-
|
|
425
|
-
Returns
|
|
426
|
-
-------
|
|
427
|
-
ds : xr.Dataset
|
|
428
|
-
Data set with decoded GPS information
|
|
429
|
-
'''
|
|
430
|
-
for v in gps_names:
|
|
431
|
-
a = ds[v].attrs
|
|
432
|
-
str2nums = [re.findall(r"[-+]?\d*\.\d+|\d+", _) if isinstance(_, str) else [np.nan] for _ in ds[v].values]
|
|
433
|
-
ds[v][:] = pd.DataFrame(str2nums).astype(float).T.values[0]
|
|
434
|
-
ds[v] = ds[v].astype(float)
|
|
435
|
-
ds[v].attrs = a
|
|
436
|
-
return ds
|
|
437
|
-
|
|
438
|
-
def reformatGPS(pos_arr, attrs):
|
|
439
|
-
'''Correct latitude and longitude from native format to decimal degrees.
|
|
440
|
-
|
|
441
|
-
v2 stations should send "NH6429.01544","WH04932.86061" (NUK_L 2022)
|
|
442
|
-
v3 stations should send coordinates as "6628.93936","04617.59187" (DY2) or 6430,4916 (NUK_Uv3)
|
|
443
|
-
decodeGPS should have decoded these strings to floats in ddmm.mmmm format
|
|
444
|
-
v1 stations however only saved decimal minutes (mm.mmmmm) as float<=60. '
|
|
445
|
-
In this case, we use the integer part of the latitude given in the config
|
|
446
|
-
file and append the gps value after it.
|
|
447
|
-
|
|
448
|
-
Parameters
|
|
449
|
-
----------
|
|
450
|
-
pos_arr : xr.Dataarray
|
|
451
|
-
Array of latitude or longitude measured by the GPS
|
|
452
|
-
attrs : dict
|
|
453
|
-
The global attribute 'latitude' or 'longitude' associated with the
|
|
454
|
-
file being processed. It is the standard latitude/longitude given in the
|
|
455
|
-
config file for that station.
|
|
456
|
-
|
|
457
|
-
Returns
|
|
458
|
-
-------
|
|
459
|
-
pos_arr : xr.Dataarray
|
|
460
|
-
Formatted GPS position array in decimal degree
|
|
461
|
-
'''
|
|
462
|
-
if np.any((pos_arr <= 90) & (pos_arr > 0)):
|
|
463
|
-
# then pos_arr is in decimal minutes, so we add to it the integer
|
|
464
|
-
# part of the latitude given in the config file x100
|
|
465
|
-
# so that it reads ddmm.mmmmmm like for v2 and v3 files
|
|
466
|
-
# Note that np.sign and np.attrs handles negative longitudes.
|
|
467
|
-
pos_arr = np.sign(attrs) * (pos_arr + 100*np.floor(np.abs(attrs)))
|
|
468
|
-
a = pos_arr.attrs
|
|
469
|
-
pos_arr = np.floor(pos_arr / 100) + (pos_arr / 100 - np.floor(pos_arr / 100)) * 100 / 60
|
|
470
|
-
pos_arr.attrs = a
|
|
471
|
-
return pos_arr
|
|
472
|
-
|
|
473
|
-
def _reformatArray(ds_arr):
|
|
474
|
-
'''Reformat DataArray values and attributes
|
|
475
|
-
|
|
476
|
-
Parameters
|
|
477
|
-
----------
|
|
478
|
-
ds_arr : xr.Dataarray
|
|
479
|
-
Data array
|
|
480
|
-
|
|
481
|
-
Returns
|
|
482
|
-
-------
|
|
483
|
-
ds_arr : xr.Dataarray
|
|
484
|
-
Formatted data array
|
|
485
|
-
'''
|
|
486
|
-
a = ds_arr.attrs # Store
|
|
487
|
-
ds_arr.values = pd.to_numeric(ds_arr, errors='coerce')
|
|
488
|
-
ds_arr.attrs = a # Reformat
|
|
489
|
-
return ds_arr
|
|
490
|
-
|
|
491
|
-
def _removeVars(ds, v_names):
|
|
492
|
-
'''Remove redundant variables if present in dataset
|
|
493
|
-
|
|
494
|
-
Parameters
|
|
495
|
-
----------
|
|
496
|
-
ds : xr.Dataset
|
|
497
|
-
Data set
|
|
498
|
-
v_names : list
|
|
499
|
-
List of column names to drop
|
|
500
|
-
|
|
501
|
-
Returns
|
|
502
|
-
-------
|
|
503
|
-
ds : xr.Dataset
|
|
504
|
-
Data set with removed variables
|
|
505
|
-
'''
|
|
506
|
-
for v in v_names:
|
|
507
|
-
if v in list(ds.variables): ds = ds.drop_vars(v)
|
|
508
|
-
return ds
|
|
509
|
-
|
|
510
|
-
def _popCols(ds, booms, data_type, vars_df, cols):
|
|
511
|
-
'''Populate data array columns with given variable names from look-up table
|
|
512
|
-
|
|
513
|
-
Parameters
|
|
514
|
-
----------
|
|
515
|
-
ds : xr.Dataset
|
|
516
|
-
Data set
|
|
517
|
-
booms : int
|
|
518
|
-
Number of booms (1 or 2)
|
|
519
|
-
data_type : str
|
|
520
|
-
Type of data ("tx", "raw")
|
|
521
|
-
vars_df : pd.DataFrame
|
|
522
|
-
Variables lookup table
|
|
523
|
-
cols : list
|
|
524
|
-
Names of columns to populate
|
|
525
|
-
|
|
526
|
-
Returns
|
|
527
|
-
-------
|
|
528
|
-
ds : xr.Dataset
|
|
529
|
-
Data with populated columns
|
|
530
|
-
'''
|
|
531
|
-
if booms==1:
|
|
532
|
-
names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
|
|
533
|
-
|
|
534
|
-
elif booms==2:
|
|
535
|
-
names = vars_df.loc[(vars_df[cols[0]]!='one-boom')]
|
|
536
|
-
|
|
537
|
-
for v in list(names.index):
|
|
538
|
-
if v not in list(ds.variables):
|
|
539
|
-
ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
|
|
540
|
-
return ds
|
|
541
|
-
|
|
542
|
-
# def _popCols(ds, booms, data_type, vars_df, cols):
|
|
543
|
-
# if booms==1:
|
|
544
|
-
# if data_type !='TX':
|
|
545
|
-
# names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
|
|
546
|
-
# else:
|
|
547
|
-
# names = vars_df.loc[(vars_df[cols[0]] != 'two-boom') & vars_df[cols[1]] != 'tx']
|
|
548
|
-
|
|
549
|
-
# elif booms==2:
|
|
550
|
-
# if data_type !='TX':
|
|
551
|
-
# names = vars_df.loc[(vars_df[cols[0]]!='two-boom')]
|
|
552
|
-
# else:
|
|
553
|
-
# names = vars_df.loc[(vars_df[cols[0]] != 'two-boom') & vars_df[cols[1]] != 'tx']
|
|
554
|
-
|
|
555
|
-
# for v in list(names.index):
|
|
556
|
-
# if v not in list(ds.variables):
|
|
557
|
-
# ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
|
|
558
|
-
# return ds
|
|
559
|
-
|
|
560
|
-
#------------------------------------------------------------------------------
|
|
561
|
-
|
|
562
|
-
if __name__ == "__main__":
|
|
563
|
-
# unittest.main()
|
|
564
|
-
pass
|