pypromice 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/__init__.py +2 -0
- pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
- pypromice/{qc → core/qc}/persistence.py +22 -29
- pypromice/{process → core/qc}/value_clipping.py +3 -3
- pypromice/core/variables/__init__.py +1 -0
- pypromice/core/variables/air_temperature.py +64 -0
- pypromice/core/variables/gps.py +221 -0
- pypromice/core/variables/humidity.py +111 -0
- pypromice/core/variables/precipitation.py +108 -0
- pypromice/core/variables/pressure_transducer_depth.py +79 -0
- pypromice/core/variables/radiation.py +422 -0
- pypromice/core/variables/station_boom_height.py +49 -0
- pypromice/core/variables/station_pose.py +375 -0
- pypromice/core/variables/wind.py +66 -0
- pypromice/io/bufr/__init__.py +0 -0
- pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
- pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
- pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
- pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
- pypromice/io/ingest/__init__.py +0 -0
- pypromice/{utilities → io/ingest}/git.py +1 -3
- pypromice/io/ingest/l0.py +294 -0
- pypromice/io/ingest/l0_repository.py +103 -0
- pypromice/io/ingest/toa5.py +87 -0
- pypromice/{process → io}/write.py +1 -1
- pypromice/pipeline/L0toL1.py +291 -0
- pypromice/pipeline/L1toL2.py +233 -0
- pypromice/{process → pipeline}/L2toL3.py +102 -120
- pypromice/pipeline/__init__.py +4 -0
- pypromice/{process → pipeline}/aws.py +10 -82
- pypromice/{process → pipeline}/get_l2.py +2 -2
- pypromice/{process → pipeline}/get_l2tol3.py +19 -22
- pypromice/{process → pipeline}/join_l2.py +31 -32
- pypromice/{process → pipeline}/join_l3.py +16 -14
- pypromice/{process → pipeline}/resample.py +59 -46
- pypromice/{process → pipeline}/utilities.py +0 -22
- pypromice/resources/file_attributes.csv +4 -4
- pypromice/resources/variables.csv +27 -24
- {pypromice-1.5.2.dist-info → pypromice-1.6.0.dist-info}/METADATA +1 -2
- pypromice-1.6.0.dist-info/RECORD +64 -0
- {pypromice-1.5.2.dist-info → pypromice-1.6.0.dist-info}/WHEEL +1 -1
- pypromice-1.6.0.dist-info/entry_points.txt +12 -0
- pypromice/get/__init__.py +0 -1
- pypromice/get/get.py +0 -211
- pypromice/get/get_promice_data.py +0 -56
- pypromice/process/L0toL1.py +0 -536
- pypromice/process/L1toL2.py +0 -839
- pypromice/process/__init__.py +0 -4
- pypromice/process/load.py +0 -161
- pypromice-1.5.2.dist-info/RECORD +0 -53
- pypromice-1.5.2.dist-info/entry_points.txt +0 -13
- /pypromice/{postprocess → core}/__init__.py +0 -0
- /pypromice/{utilities → core}/dependency_graph.py +0 -0
- /pypromice/{qc → core/qc}/__init__.py +0 -0
- /pypromice/{qc → core/qc}/github_data_issues.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
- /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
- /pypromice/{utilities → io}/__init__.py +0 -0
- /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
- /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
- /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
- /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
- {pypromice-1.5.2.dist-info → pypromice-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {pypromice-1.5.2.dist-info → pypromice-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
AWS Level 0 (L0) to Level 1 (L1) data processing
|
|
4
|
+
"""
|
|
5
|
+
__all__ = ["toL1"]
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import xarray as xr
|
|
10
|
+
import re, logging
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
from pypromice.core.variables.pressure_transducer_depth import correct_and_calculate_depth
|
|
14
|
+
from pypromice.core.qc.value_clipping import clip_values
|
|
15
|
+
from pypromice.core.variables import (wind,
|
|
16
|
+
air_temperature,
|
|
17
|
+
gps,
|
|
18
|
+
radiation,
|
|
19
|
+
station_boom_height,
|
|
20
|
+
station_pose,
|
|
21
|
+
pressure_transducer_depth)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def toL1(L0: xr.DataArray,
|
|
25
|
+
vars_df: pd.DataFrame
|
|
26
|
+
) -> xr.DataArray:
|
|
27
|
+
"""Process one Level 0 (L0) dataset to a
|
|
28
|
+
Level 1 (L1) dataset
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
L0 : xarray.Dataset
|
|
33
|
+
Level 0 dataset
|
|
34
|
+
vars_df : pd.DataFrame
|
|
35
|
+
Metadata dataframe
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
ds : xarray.Dataset
|
|
40
|
+
Level 1 dataset
|
|
41
|
+
"""
|
|
42
|
+
assert(type(L0) == xr.Dataset)
|
|
43
|
+
ds = L0
|
|
44
|
+
|
|
45
|
+
for l in list(ds.keys()):
|
|
46
|
+
if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
|
|
47
|
+
ds[l] = _reformat_array(ds[l])
|
|
48
|
+
|
|
49
|
+
# The following drops duplicate datetime indices. Needs to run before _addTimeShift!
|
|
50
|
+
# We can optionally also drop duplicates within _addTimeShift using pandas duplicated,
|
|
51
|
+
# but retaining the following code instead to preserve previous methods. PJW
|
|
52
|
+
_, index = np.unique(ds['time'], return_index=True)
|
|
53
|
+
ds = ds.isel(time=index)
|
|
54
|
+
|
|
55
|
+
# If we do not want to shift hourly average values back -1 hr, then comment the following line.
|
|
56
|
+
ds = add_time_shift(ds, vars_df)
|
|
57
|
+
|
|
58
|
+
# Convert radiation from engineering to physical units
|
|
59
|
+
# TODO add metadata to indicate whether radiometer values are corrected with calibration values or not
|
|
60
|
+
if hasattr(ds, 'dsr_eng_coef'):
|
|
61
|
+
ds['dsr'] = radiation.convert_sr(ds['dsr'],
|
|
62
|
+
ds.attrs['dsr_eng_coef'])
|
|
63
|
+
if hasattr(ds, 'usr_eng_coef'):
|
|
64
|
+
ds['usr'] = radiation.convert_sr(ds['usr'],
|
|
65
|
+
ds.attrs['usr_eng_coef'])
|
|
66
|
+
if hasattr(ds, 'dlr_eng_coef'):
|
|
67
|
+
ds['dlr'] = radiation.convert_lr(ds['dlr'],
|
|
68
|
+
ds['t_rad'],
|
|
69
|
+
ds.attrs['dlr_eng_coef'])
|
|
70
|
+
if hasattr(ds, 'ulr_eng_coef'):
|
|
71
|
+
ds['ulr'] = radiation.convert_lr(ds['ulr'],
|
|
72
|
+
ds['t_rad'],
|
|
73
|
+
ds.attrs['ulr_eng_coef'])
|
|
74
|
+
|
|
75
|
+
# Reformat boom height
|
|
76
|
+
ds['z_boom_u'] = _reformat_array(ds['z_boom_u'])
|
|
77
|
+
|
|
78
|
+
# Adjust sonic ranger readings for sensitivity to air temperature (interpolated)
|
|
79
|
+
tu_lo = vars_df.loc["t_u","lo"]
|
|
80
|
+
tu_hi = vars_df.loc["t_u","hi"]
|
|
81
|
+
ds["t_u_interp"] = air_temperature.clip_and_interpolate(ds["t_u"], tu_lo, tu_hi)
|
|
82
|
+
ds["z_boom_cor_u"] = station_boom_height.adjust(ds["z_boom_u"], ds["t_u_interp"])
|
|
83
|
+
|
|
84
|
+
# Decode and convert GPS positions
|
|
85
|
+
ds["gps_lat"], ds["gps_lon"], ds["gps_time"] = gps.decode_and_convert(ds["gps_lat"],
|
|
86
|
+
ds["gps_lon"],
|
|
87
|
+
ds["gps_time"],
|
|
88
|
+
ds.attrs["latitude"],
|
|
89
|
+
ds.attrs["longitude"])
|
|
90
|
+
|
|
91
|
+
# Reformat GPS information
|
|
92
|
+
for l in ['gps_lat', 'gps_lon', 'gps_alt','gps_time']:
|
|
93
|
+
ds[l] = _reformat_array(ds[l])
|
|
94
|
+
|
|
95
|
+
# Convert tilt voltage to degrees
|
|
96
|
+
if hasattr(ds, "logger_type"):
|
|
97
|
+
if ds.attrs["logger_type"].upper() == "CR1000":
|
|
98
|
+
ds["tilt_x"] = station_pose.convert_and_filter_tilt(ds["tilt_x"])
|
|
99
|
+
ds["tilt_y"] = station_pose.convert_and_filter_tilt(ds["tilt_y"])
|
|
100
|
+
|
|
101
|
+
# Apply tilt factor (e.g. -1 will invert tilt angle)
|
|
102
|
+
if hasattr(ds, "tilt_y_factor"):
|
|
103
|
+
ds["tilt_y"] = station_pose.apply_tilt_factor(ds["tilt_y"],
|
|
104
|
+
ds.attrs["tilt_y_factor"])
|
|
105
|
+
|
|
106
|
+
# Smooth station tilt
|
|
107
|
+
# Note that this should be OK for CR1000 tx (data only every 6 hrs),
|
|
108
|
+
# since we interpolate above in station_pose.convert_and_filter_tilt(). PJW
|
|
109
|
+
# TODO smoothing should be changed to a fixed time window rather than based on sample steps. PHO
|
|
110
|
+
# TODO a smoothing is performed here and at L1toL2 also. Is this needed? PHO
|
|
111
|
+
ds["tilt_x"] = station_pose.smooth_tilt_with_moving_window(ds["tilt_x"])
|
|
112
|
+
ds["tilt_y"] = station_pose.smooth_tilt_with_moving_window(ds["tilt_y"])
|
|
113
|
+
|
|
114
|
+
# Apply wind factor if provided
|
|
115
|
+
# This is in the case of an anemometer rotations improperly translated to wind speed by the logger program
|
|
116
|
+
if hasattr(ds, 'wind_u_coef'):
|
|
117
|
+
logger.info(f'Wind speed correction applied to wspd_u based on factor of {ds.attrs["wind_u_coef"]}')
|
|
118
|
+
ds['wspd_u'] = wind.correct_wind_speed(ds['wspd_u'],
|
|
119
|
+
ds.attrs['wind_u_coef'])
|
|
120
|
+
if hasattr(ds, 'wind_l_coef'):
|
|
121
|
+
logger.info(f'Wind speed correction applied to wspd_l based on factor of {ds.attrs["wind_l_coef"]}')
|
|
122
|
+
ds['wspd_l'] = wind.correct_wind_speed(ds['wspd_l'],
|
|
123
|
+
ds.attrs['wind_l_coef'])
|
|
124
|
+
if hasattr(ds, 'wind_i_coef'):
|
|
125
|
+
logger.info(f'Wind speed correction applied to wspd_i based on factor of {ds.attrs["wind_i_coef"]}')
|
|
126
|
+
ds['wspd_i'] = wind.correct_wind_speed(ds['wspd_i'],
|
|
127
|
+
ds.attrs['wind_i_coef'])
|
|
128
|
+
|
|
129
|
+
# Handle cases where the bedrock attribute is incorrectly set
|
|
130
|
+
if not 'bedrock' in ds.attrs:
|
|
131
|
+
logger.warning('bedrock attribute is not set')
|
|
132
|
+
ds.attrs['bedrock'] = False
|
|
133
|
+
elif not isinstance(ds.attrs['bedrock'], bool):
|
|
134
|
+
logger.warning(f'bedrock attribute is not boolean: {ds.attrs["bedrock"]}')
|
|
135
|
+
ds.attrs['bedrock'] = str(ds.attrs['bedrock']).lower() == 'true'
|
|
136
|
+
is_bedrock = ds.attrs['bedrock']
|
|
137
|
+
|
|
138
|
+
# Some bedrock stations (e.g. KAN_B) do not have tilt in L0 files
|
|
139
|
+
# so we need to create them manually
|
|
140
|
+
if is_bedrock:
|
|
141
|
+
for var in ['tilt_x','tilt_y']:
|
|
142
|
+
if var not in ds.data_vars:
|
|
143
|
+
ds[var] = (('time'), np.full(ds['time'].size, np.nan))
|
|
144
|
+
|
|
145
|
+
# WEG_B has a non-null z_pt even though it is a bedrock station
|
|
146
|
+
if ~ds['z_pt'].isnull().all():
|
|
147
|
+
ds['z_pt'] = (('time'), np.full(ds['time'].size, np.nan))
|
|
148
|
+
logger.info('Warning: Non-null data for z_pt at a bedrock site')
|
|
149
|
+
|
|
150
|
+
# Perform one-boom variable processing
|
|
151
|
+
if ds.attrs["number_of_booms"]==1:
|
|
152
|
+
if ~ds["z_pt"].isnull().all():
|
|
153
|
+
|
|
154
|
+
# Adjust PTA fluid density and calculate depth
|
|
155
|
+
if hasattr(ds, "pt_z_offset"):
|
|
156
|
+
ds["z_pt"] = pressure_transducer_depth.apply_offset(ds["z_pt"],
|
|
157
|
+
int(ds.attrs["pt_z_offset"]))
|
|
158
|
+
|
|
159
|
+
ds["z_pt_cor"],ds["z_pt"]=pressure_transducer_depth.correct_and_calculate_depth(ds["z_pt"],
|
|
160
|
+
ds["p_u"],
|
|
161
|
+
ds.attrs["pt_antifreeze"],
|
|
162
|
+
ds.attrs["pt_z_factor"],
|
|
163
|
+
ds.attrs["pt_z_coef"],
|
|
164
|
+
ds.attrs["pt_z_p_coef"])
|
|
165
|
+
|
|
166
|
+
# Adjust sonic ranger readings on stake for sensitivity to air temperature
|
|
167
|
+
ds['z_stake'] = _reformat_array(ds['z_stake'])
|
|
168
|
+
ds["z_stake_cor"] = station_boom_height.adjust(ds["z_stake"], ds["t_u_interp"])
|
|
169
|
+
|
|
170
|
+
# Perform two-boom variable processing
|
|
171
|
+
elif ds.attrs['number_of_booms']==2:
|
|
172
|
+
|
|
173
|
+
# Reformat boom height
|
|
174
|
+
ds['z_boom_l'] = _reformat_array(ds['z_boom_l'])
|
|
175
|
+
|
|
176
|
+
# Adjust sonic ranger readings for sensitivity to air temperature (interpolated)
|
|
177
|
+
tl_lo = vars_df.loc["t_l","lo"]
|
|
178
|
+
tl_hi = vars_df.loc["t_l","hi"]
|
|
179
|
+
ds["t_l_interp"] = air_temperature.clip_and_interpolate(ds["t_l"], tl_lo, tl_hi)
|
|
180
|
+
ds["z_boom_cor_l"] = station_boom_height.adjust(ds["z_boom_l"], ds["t_l_interp"])
|
|
181
|
+
|
|
182
|
+
# Clip values and remove redundant attribute information
|
|
183
|
+
ds = clip_values(ds, vars_df)
|
|
184
|
+
for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
|
|
185
|
+
'dlr_eng_coef', 'ulr_eng_coef', 'wind_u_coef','wind_l_coef',
|
|
186
|
+
'wind_i_coef', 'pt_z_coef', 'pt_z_p_coef', 'pt_z_factor',
|
|
187
|
+
'pt_antifreeze', 'boom_azimuth', 'nodata', 'conf', 'file']:
|
|
188
|
+
ds.attrs.pop(key, None)
|
|
189
|
+
|
|
190
|
+
# Return Level 1 dataset
|
|
191
|
+
ds.attrs['level'] = 'L1'
|
|
192
|
+
return ds
|
|
193
|
+
|
|
194
|
+
def add_time_shift(ds, vars_df):
|
|
195
|
+
'''Shift times based on file format and logger type (shifting only hourly averaged values,
|
|
196
|
+
and not instantaneous variables). For raw (10 min), all values are sampled instantaneously
|
|
197
|
+
so do not shift. For STM (1 hour), values are averaged and assigned to end-of-hour by the
|
|
198
|
+
logger, so shift by -1 hr. For TX (time frequency depends on v2 or v3) then time is shifted
|
|
199
|
+
depending on logger type. We use the 'instantaneous_hourly' boolean from variables.csv to
|
|
200
|
+
determine if a variable is considered instantaneous at hourly samples.
|
|
201
|
+
|
|
202
|
+
This approach creates two separate sub-dataframes, one for hourly-averaged variables
|
|
203
|
+
and another for instantaneous variables. The instantaneous dataframe should never be
|
|
204
|
+
shifted. We apply shifting only to the hourly average dataframe, then concat the two
|
|
205
|
+
dataframes back together.
|
|
206
|
+
|
|
207
|
+
It is possible to use pandas merge or join instead of concat, there are equivalent methods
|
|
208
|
+
in each. In this case, we use concat throughout.
|
|
209
|
+
|
|
210
|
+
Fausto et al. 2021 specifies the convention of assigning hourly averages to start-of-hour,
|
|
211
|
+
so we need to retain this unless clearly communicated to users.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
ds : xarray.Dataset
|
|
216
|
+
Dataset to apply time shift to
|
|
217
|
+
vars_df : pd.DataFrame
|
|
218
|
+
Metadata dataframe
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
ds_out : xarray.Dataset
|
|
223
|
+
Dataset with shifted times
|
|
224
|
+
'''
|
|
225
|
+
df = ds.to_dataframe()
|
|
226
|
+
# No need to drop duplicates here if performed prior to calling this function.
|
|
227
|
+
# df = df[~df.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
|
|
228
|
+
df['doy'] = df.index.dayofyear
|
|
229
|
+
i_cols = [x for x in df.columns if x in vars_df.index and vars_df['instantaneous_hourly'][x] is True] # instantaneous only, list of columns
|
|
230
|
+
df_i = df.filter(items=i_cols, axis=1) # instantaneous only dataframe
|
|
231
|
+
df_a = df.drop(df_i.columns, axis=1) # hourly ave dataframe
|
|
232
|
+
|
|
233
|
+
if ds.attrs['format'] == 'raw':
|
|
234
|
+
# 10-minute data, no shifting
|
|
235
|
+
df_out = df
|
|
236
|
+
elif ds.attrs['format'] == 'STM':
|
|
237
|
+
# hourly-averaged, non-transmitted
|
|
238
|
+
# shift everything except instantaneous, any logger type
|
|
239
|
+
df_a = df_a.shift(periods=-1, freq="h")
|
|
240
|
+
df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
|
|
241
|
+
df_out = df_out.sort_index()
|
|
242
|
+
elif ds.attrs['format'] == 'TX':
|
|
243
|
+
if ds.attrs['logger_type'] == 'CR1000X':
|
|
244
|
+
# v3, data is hourly all year long
|
|
245
|
+
# shift everything except instantaneous
|
|
246
|
+
df_a = df_a.shift(periods=-1, freq="h")
|
|
247
|
+
df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
|
|
248
|
+
df_out = df_out.sort_index()
|
|
249
|
+
elif ds.attrs['logger_type'] == 'CR1000':
|
|
250
|
+
# v2, data is hourly (6-hr for instantaneous) for DOY 100-300, otherwise daily at 00 UTC
|
|
251
|
+
# shift non-instantaneous hourly for DOY 100-300, else do not shift daily
|
|
252
|
+
df_a_hourly = df_a.loc[(df_a['doy'] >= 100) & (df_a['doy'] <= 300)]
|
|
253
|
+
# df_a_hourly = df_a.loc[df_a['doy'].between(100, 300, inclusive='both')] # equivalent to above
|
|
254
|
+
df_a_daily_1 = df_a.loc[(df_a['doy'] < 100)]
|
|
255
|
+
df_a_daily_2 = df_a.loc[(df_a['doy'] > 300)]
|
|
256
|
+
|
|
257
|
+
# shift the hourly ave data
|
|
258
|
+
df_a_hourly = df_a_hourly.shift(periods=-1, freq="h")
|
|
259
|
+
|
|
260
|
+
# stitch everything back together
|
|
261
|
+
df_concat_u = pd.concat([df_a_daily_1, df_a_daily_2, df_a_hourly], axis=0) # same columns, different datetime indices
|
|
262
|
+
# It's now possible for df_concat_u to have duplicate datetime indices
|
|
263
|
+
df_concat_u = df_concat_u[~df_concat_u.index.duplicated(keep='first')] # drop duplicates, keep=first is arbitrary
|
|
264
|
+
|
|
265
|
+
df_out = pd.concat([df_concat_u, df_i], axis=1) # different columns, same datetime indices
|
|
266
|
+
df_out = df_out.sort_index()
|
|
267
|
+
|
|
268
|
+
# Back to xarray, and re-assign the original attrs
|
|
269
|
+
df_out = df_out.drop('doy', axis=1)
|
|
270
|
+
ds_out = df_out.to_xarray()
|
|
271
|
+
ds_out = ds_out.assign_attrs(ds.attrs) # Dataset attrs
|
|
272
|
+
for x in ds_out.data_vars: # variable-specific attrs
|
|
273
|
+
ds_out[x].attrs = ds[x].attrs
|
|
274
|
+
|
|
275
|
+
# equivalent to above:
|
|
276
|
+
# vals = [xr.DataArray(data=df_out[c], dims=['time'], coords={'time':df_out.index}, attrs=ds[c].attrs) for c in df_out.columns]
|
|
277
|
+
# ds_out = xr.Dataset(dict(zip(df_out.columns, vals)), attrs=ds.attrs)
|
|
278
|
+
return ds_out
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _reformat_array(ds_arr):
|
|
282
|
+
"""Reformat DataArray values and attributes"""
|
|
283
|
+
a = ds_arr.attrs
|
|
284
|
+
ds_arr.values = pd.to_numeric(ds_arr, errors='coerce')
|
|
285
|
+
ds_arr.attrs = a
|
|
286
|
+
return ds_arr
|
|
287
|
+
|
|
288
|
+
#------------------------------------------------------------------------------
|
|
289
|
+
|
|
290
|
+
if __name__ == "__main__":
|
|
291
|
+
pass
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
AWS Level 1 (L1) to Level 2 (L2) data processing
|
|
4
|
+
"""
|
|
5
|
+
__all__ = ["toL2"]
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import xarray as xr
|
|
14
|
+
|
|
15
|
+
from pypromice.core.qc.github_data_issues import flagNAN, adjustTime, adjustData
|
|
16
|
+
from pypromice.core.qc.percentiles.outlier_detector import ThresholdBasedOutlierDetector
|
|
17
|
+
from pypromice.core.qc.persistence import persistence_qc
|
|
18
|
+
from pypromice.core.qc.value_clipping import clip_values
|
|
19
|
+
from pypromice.core.variables import (wind,
|
|
20
|
+
gps,
|
|
21
|
+
precipitation,
|
|
22
|
+
humidity,
|
|
23
|
+
radiation,
|
|
24
|
+
station_pose,
|
|
25
|
+
air_temperature)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def toL2(L1: xr.Dataset,
|
|
29
|
+
vars_df: pd.DataFrame,
|
|
30
|
+
data_flags_dir: Path,
|
|
31
|
+
data_adjustments_dir: Path
|
|
32
|
+
) -> xr.Dataset:
|
|
33
|
+
"""Process one Level 1 (L1) product to Level 2.
|
|
34
|
+
In this step we do:
|
|
35
|
+
- manual flagging and adjustments
|
|
36
|
+
- automated QC: persistence, percentile
|
|
37
|
+
- custom filter: gps_alt filter, NaN t_rad removed from dlr & ulr
|
|
38
|
+
- smoothing of tilt and rot
|
|
39
|
+
- calculation of rh with regard to ice in subfreezing conditions
|
|
40
|
+
- calculation of cloud coverage
|
|
41
|
+
- correction of dsr and usr for tilt
|
|
42
|
+
- filtering of dsr based on a theoretical TOA irradiance and grazing light
|
|
43
|
+
- calculation of albedo
|
|
44
|
+
- calculation of directional wind speed
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
L1 : xr.Dataset
|
|
49
|
+
Level 1 dataset
|
|
50
|
+
vars_df : pd.DataFrame
|
|
51
|
+
Metadata dataframe
|
|
52
|
+
data_flags_dir : pathlib.Path
|
|
53
|
+
Directory path to data flags file
|
|
54
|
+
data_adjustments_dir : pathlib.Path
|
|
55
|
+
Directory path to data adjustments file
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
ds : xr.Dataset
|
|
60
|
+
Level 2 dataset
|
|
61
|
+
"""
|
|
62
|
+
ds = L1.copy()
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# Adjust time after a user-defined csv files
|
|
66
|
+
ds = adjustTime(ds, adj_dir=data_adjustments_dir.as_posix())
|
|
67
|
+
|
|
68
|
+
# Flag NaNs after a user-defined csv files
|
|
69
|
+
ds = flagNAN(ds, flag_dir=data_flags_dir.as_posix())
|
|
70
|
+
|
|
71
|
+
# Adjust data after a user-defined csv files
|
|
72
|
+
ds = adjustData(ds, adj_dir=data_adjustments_dir.as_posix())
|
|
73
|
+
|
|
74
|
+
except Exception:
|
|
75
|
+
logger.exception("Flagging and fixing failed:")
|
|
76
|
+
|
|
77
|
+
# Flag and remove persistence outliers
|
|
78
|
+
ds = persistence_qc(ds)
|
|
79
|
+
|
|
80
|
+
# if ds.attrs['format'] == 'TX':
|
|
81
|
+
# # TODO: The configuration should be provided explicitly
|
|
82
|
+
# outlier_detector = ThresholdBasedOutlierDetector.default()
|
|
83
|
+
# ds = outlier_detector.filter_data(ds)
|
|
84
|
+
|
|
85
|
+
# Filter GPS values based on baseline elevation
|
|
86
|
+
ds["gps_lat"], ds["gps_lon"], ds["gps_alt"] = gps.filter(ds["gps_lat"],
|
|
87
|
+
ds["gps_lon"],
|
|
88
|
+
ds["gps_alt"])
|
|
89
|
+
|
|
90
|
+
# Removing dlr and ulr that are missing t_rad
|
|
91
|
+
# This is done now because t_rad can be filtered either manually or with persistence
|
|
92
|
+
ds["dlr"] = radiation.filter_lr(ds["dlr"], ds["t_rad"])
|
|
93
|
+
ds["ulr"] = radiation.filter_lr(ds["ulr"], ds["t_rad"])
|
|
94
|
+
|
|
95
|
+
# Calculate relative humidity with regard to ice
|
|
96
|
+
ds["rh_u_wrt_ice_or_water"] = humidity.adjust(ds["rh_u"], ds["t_u"])
|
|
97
|
+
|
|
98
|
+
if ds.attrs["number_of_booms"]==2:
|
|
99
|
+
ds["rh_l_wrt_ice_or_water"] = humidity.adjust(ds["rh_l"], ds["t_l"])
|
|
100
|
+
|
|
101
|
+
if hasattr(ds,"t_i"):
|
|
102
|
+
if ~ds["t_i"].isnull().all():
|
|
103
|
+
ds["rh_i_wrt_ice_or_water"] = humidity.adjust(ds["rh_i"], ds["t_i"])
|
|
104
|
+
|
|
105
|
+
# Determine surface temperature
|
|
106
|
+
ds["t_surf"] = radiation.calculate_surface_temperature(ds["dlr"],
|
|
107
|
+
ds["ulr"])
|
|
108
|
+
is_bedrock = ds.attrs["bedrock"]
|
|
109
|
+
if not is_bedrock:
|
|
110
|
+
ds["t_surf"] = ds["t_surf"].clip(max=0)
|
|
111
|
+
|
|
112
|
+
# Interpolate and smooth station tilt and rotation
|
|
113
|
+
# TODO tilt smoothing is performed here and at L0toL1 also (and they are different functions). Is this needed? PHO
|
|
114
|
+
ds['tilt_x'] = station_pose.interpolate_tilt(ds['tilt_x'])
|
|
115
|
+
ds['tilt_y'] = station_pose.interpolate_tilt(ds['tilt_y'])
|
|
116
|
+
ds['rot'] = station_pose.interpolate_rotation(ds['rot'])
|
|
117
|
+
|
|
118
|
+
# Determine cloud cover for on-ice stations
|
|
119
|
+
if not is_bedrock:
|
|
120
|
+
|
|
121
|
+
# Selected stations have pre-defined cloud assumption coefficients
|
|
122
|
+
# TODO Ideally these will be pre-defined for all stations eventually
|
|
123
|
+
if ds.attrs["station_id"] == "KAN_M":
|
|
124
|
+
LR_overcast = 315 + 4 * ds["t_u"]
|
|
125
|
+
LR_clear = 30 + 4.6e-13 * (ds["t_u"] + air_temperature.T_0) ** 6
|
|
126
|
+
elif ds.attrs["station_id"] == "KAN_U":
|
|
127
|
+
LR_overcast = 305 + 4 * ds["t_u"]
|
|
128
|
+
LR_clear = 220 + 3.5 * ds["t_u"]
|
|
129
|
+
|
|
130
|
+
# Else, calculate cloud assumption coefficients based on default values
|
|
131
|
+
else:
|
|
132
|
+
LR_overcast, LR_clear = air_temperature.get_cloud_coefficients(ds["t_u"])
|
|
133
|
+
|
|
134
|
+
ds["cc"] = radiation.calculate_cloud_coverage(ds["dlr"], LR_overcast, LR_clear)
|
|
135
|
+
|
|
136
|
+
# Set cloud cover to nans if station is not on ice
|
|
137
|
+
else:
|
|
138
|
+
ds["cc"] = ds["dlr"].copy() * np.nan
|
|
139
|
+
|
|
140
|
+
# Determine station pose relative to sun position
|
|
141
|
+
# TODO Why is mean GPS lat lon not preferred for calcs?
|
|
142
|
+
if hasattr(ds, 'latitude') and hasattr(ds, 'longitude'):
|
|
143
|
+
lat = ds.attrs['latitude']
|
|
144
|
+
lon = ds.attrs['longitude']
|
|
145
|
+
else:
|
|
146
|
+
lat = ds['gps_lat'].mean()
|
|
147
|
+
lon = ds['gps_lon'].mean()
|
|
148
|
+
|
|
149
|
+
# Determine station position relative to sun
|
|
150
|
+
doy = ds['time'].dt.dayofyear
|
|
151
|
+
hour = ds['time'].dt.hour
|
|
152
|
+
minute = ds['time'].dt.minute
|
|
153
|
+
phi_sensor_rad, theta_sensor_rad = station_pose.calculate_spherical_tilt(ds['tilt_x'], ds['tilt_y'])
|
|
154
|
+
Declination_rad = station_pose.calculate_declination(doy, hour, minute)
|
|
155
|
+
HourAngle_rad = station_pose.calculate_hour_angle(hour, minute, lon)
|
|
156
|
+
ZenithAngle_rad, ZenithAngle_deg = station_pose.calculate_zenith(lat,
|
|
157
|
+
Declination_rad,
|
|
158
|
+
HourAngle_rad)
|
|
159
|
+
AngleDif_deg = station_pose.calculate_angle_difference(ZenithAngle_rad,
|
|
160
|
+
HourAngle_rad,
|
|
161
|
+
phi_sensor_rad,
|
|
162
|
+
theta_sensor_rad)
|
|
163
|
+
|
|
164
|
+
# Filter shortwave radiation
|
|
165
|
+
ds["dsr"], ds["usr"], _ = radiation.filter_sr(ds["dsr"],
|
|
166
|
+
ds["usr"],
|
|
167
|
+
ds["cc"],
|
|
168
|
+
ZenithAngle_rad,
|
|
169
|
+
ZenithAngle_deg,
|
|
170
|
+
AngleDif_deg)
|
|
171
|
+
|
|
172
|
+
# Correct shortwave radiation
|
|
173
|
+
ds["dsr_cor"], ds["usr_cor"], _ = radiation.correct_sr(ds["dsr"],
|
|
174
|
+
ds["usr"],
|
|
175
|
+
ds["cc"],
|
|
176
|
+
phi_sensor_rad,
|
|
177
|
+
theta_sensor_rad,
|
|
178
|
+
lat,
|
|
179
|
+
Declination_rad,
|
|
180
|
+
HourAngle_rad,
|
|
181
|
+
ZenithAngle_rad,
|
|
182
|
+
ZenithAngle_deg,
|
|
183
|
+
AngleDif_deg)
|
|
184
|
+
|
|
185
|
+
ds['albedo'], _ = radiation.calculate_albedo(ds["dsr"],
|
|
186
|
+
ds["usr"],
|
|
187
|
+
ds["dsr_cor"],
|
|
188
|
+
ds["cc"],
|
|
189
|
+
ZenithAngle_deg,
|
|
190
|
+
AngleDif_deg)
|
|
191
|
+
|
|
192
|
+
# Determine if precipitation filtering and rate needed
|
|
193
|
+
if hasattr(ds, "correct_precip"):
|
|
194
|
+
precip_flag = ds.attrs["correct_precip"]
|
|
195
|
+
else:
|
|
196
|
+
precip_flag=True
|
|
197
|
+
|
|
198
|
+
if ~ds["precip_u"].isnull().all() and precip_flag:
|
|
199
|
+
ds["precip_u"] = precipitation.filter_lufft_errors(ds["precip_u"], ds["t_u"], ds["p_u"], ds["rh_u"])
|
|
200
|
+
ds["rainfall_u"] = precipitation.get_rainfall_per_timestep(ds["precip_u"], ds["t_u"])
|
|
201
|
+
ds["rainfall_cor_u"] = precipitation.correct_rainfall_undercatch(ds["rainfall_u"], ds["wspd_u"])
|
|
202
|
+
|
|
203
|
+
if ds.attrs["number_of_booms"]==2:
|
|
204
|
+
if ~ds["precip_l"].isnull().all() and precip_flag:
|
|
205
|
+
ds["precip_l"] = precipitation.filter_lufft_errors(ds["precip_l"], ds["t_l"], ds["p_l"], ds["rh_l"])
|
|
206
|
+
ds["rainfall_l"] = precipitation.get_rainfall_per_timestep(ds["precip_l"], ds["t_l"])
|
|
207
|
+
ds["rainfall_cor_l"] = precipitation.correct_rainfall_undercatch(ds["rainfall_l"], ds["wspd_l"])
|
|
208
|
+
|
|
209
|
+
# Calculate directional wind speed for upper boom
|
|
210
|
+
ds['wdir_u'] = wind.filter_wind_direction(ds['wdir_u'], ds['wspd_u'])
|
|
211
|
+
ds['wspd_x_u'], ds['wspd_y_u'] = wind.calculate_directional_wind_speed(ds['wspd_u'], ds['wdir_u'])
|
|
212
|
+
|
|
213
|
+
# Calculate directional wind speed for lower boom
|
|
214
|
+
if ds.attrs['number_of_booms'] == 2:
|
|
215
|
+
ds['wdir_l'] = wind.filter_wind_direction(ds['wdir_l'], ds['wspd_l'])
|
|
216
|
+
ds['wspd_x_l'], ds['wspd_y_l'] = wind.calculate_directional_wind_speed(ds['wspd_l'], ds['wdir_l'])
|
|
217
|
+
|
|
218
|
+
# Calculate directional wind speed for instantaneous measurements
|
|
219
|
+
if hasattr(ds, 'wdir_i'):
|
|
220
|
+
if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all():
|
|
221
|
+
ds['wdir_i'] = wind.filter_wind_direction(ds['wdir_i'], ds['wspd_i'])
|
|
222
|
+
ds['wspd_x_i'], ds['wspd_y_i'] = wind.calculate_directional_wind_speed(ds['wspd_i'], ds['wdir_i'])
|
|
223
|
+
|
|
224
|
+
# Clip values (i.e. threshold filtering)
|
|
225
|
+
ds = clip_values(ds, vars_df)
|
|
226
|
+
|
|
227
|
+
# Return L2 dataset
|
|
228
|
+
ds.attrs['level'] = 'L2'
|
|
229
|
+
return ds
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
if __name__ == "__main__":
|
|
233
|
+
pass
|