pypromice 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/postprocess/bufr_to_csv.py +6 -1
- pypromice/postprocess/bufr_utilities.py +91 -18
- pypromice/postprocess/create_bufr_files.py +178 -0
- pypromice/postprocess/get_bufr.py +248 -397
- pypromice/postprocess/make_metadata_csv.py +214 -0
- pypromice/postprocess/real_time_utilities.py +41 -11
- pypromice/process/L0toL1.py +12 -5
- pypromice/process/L1toL2.py +69 -14
- pypromice/process/L2toL3.py +1033 -186
- pypromice/process/aws.py +130 -808
- pypromice/process/get_l2.py +90 -0
- pypromice/process/get_l2tol3.py +111 -0
- pypromice/process/join_l2.py +112 -0
- pypromice/process/join_l3.py +551 -120
- pypromice/process/load.py +161 -0
- pypromice/process/resample.py +128 -0
- pypromice/process/utilities.py +68 -0
- pypromice/process/write.py +503 -0
- pypromice/qc/github_data_issues.py +10 -16
- pypromice/qc/persistence.py +52 -30
- pypromice/resources/__init__.py +28 -0
- pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
- pypromice/resources/variable_aliases_GC-Net.csv +78 -0
- pypromice/resources/variables.csv +106 -0
- pypromice/station_configuration.py +118 -0
- pypromice/tx/get_l0tx.py +7 -4
- pypromice/tx/payload_formats.csv +1 -0
- pypromice/tx/tx.py +27 -6
- pypromice/utilities/__init__.py +0 -0
- pypromice/utilities/git.py +61 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/METADATA +3 -3
- pypromice-1.4.0.dist-info/RECORD +53 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
- pypromice-1.4.0.dist-info/entry_points.txt +13 -0
- pypromice/postprocess/station_configurations.toml +0 -762
- pypromice/process/get_l3.py +0 -46
- pypromice/process/variables.csv +0 -92
- pypromice/qc/persistence_test.py +0 -150
- pypromice/test/test_config1.toml +0 -69
- pypromice/test/test_config2.toml +0 -54
- pypromice/test/test_email +0 -75
- pypromice/test/test_payload_formats.csv +0 -4
- pypromice/test/test_payload_types.csv +0 -7
- pypromice/test/test_percentile.py +0 -229
- pypromice/test/test_raw1.txt +0 -4468
- pypromice/test/test_raw_DataTable2.txt +0 -11167
- pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
- pypromice/test/test_raw_transmitted1.txt +0 -15411
- pypromice/test/test_raw_transmitted2.txt +0 -28
- pypromice-1.3.6.dist-info/RECORD +0 -53
- pypromice-1.3.6.dist-info/entry_points.txt +0 -8
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
pypromice/process/L2toL3.py
CHANGED
|
@@ -2,135 +2,1051 @@
|
|
|
2
2
|
"""
|
|
3
3
|
AWS Level 2 (L2) to Level 3 (L3) data processing
|
|
4
4
|
"""
|
|
5
|
+
import pandas as pd
|
|
5
6
|
import numpy as np
|
|
6
7
|
import xarray as xr
|
|
8
|
+
from sklearn.linear_model import LinearRegression
|
|
9
|
+
from pypromice.qc.github_data_issues import adjustData
|
|
10
|
+
from scipy.interpolate import interp1d
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import logging
|
|
7
13
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
def toL3(L2,
|
|
17
|
+
data_adjustments_dir: Path,
|
|
18
|
+
station_config={},
|
|
19
|
+
T_0=273.15):
|
|
20
|
+
'''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all
|
|
21
|
+
derived variables:
|
|
22
|
+
- Turbulent fluxes
|
|
23
|
+
- smoothed and inter/extrapolated GPS coordinates
|
|
24
|
+
- continuous surface height, ice surface height, snow height
|
|
25
|
+
- thermistor depths
|
|
26
|
+
|
|
11
27
|
|
|
12
28
|
Parameters
|
|
13
29
|
----------
|
|
14
30
|
L2 : xarray:Dataset
|
|
15
31
|
L2 AWS data
|
|
32
|
+
station_config : Dict
|
|
33
|
+
Dictionary containing the information necessary for the processing of
|
|
34
|
+
L3 variables (relocation dates for coordinates processing, or thermistor
|
|
35
|
+
string maintenance date for the thermistors depth)
|
|
16
36
|
T_0 : int
|
|
17
|
-
|
|
18
|
-
z_0 : int
|
|
19
|
-
Aerodynamic surface roughness length for momention, assumed constant
|
|
20
|
-
for all ice/snow surfaces. Default is 0.001.
|
|
21
|
-
R_d : int
|
|
22
|
-
Gas constant of dry air. Default is 287.05.
|
|
23
|
-
eps : int
|
|
24
|
-
Default is 0.622.
|
|
25
|
-
es_0 : int
|
|
26
|
-
Saturation vapour pressure at the melting point (hPa). Default is 6.1071.
|
|
27
|
-
es_100 : int
|
|
28
|
-
Saturation vapour pressure at steam point temperature (hPa). Default is
|
|
29
|
-
1013.246.
|
|
37
|
+
Freezing point temperature. Default is 273.15.
|
|
30
38
|
'''
|
|
31
39
|
ds = L2
|
|
40
|
+
ds.attrs['level'] = 'L3'
|
|
32
41
|
|
|
33
|
-
T_100 =
|
|
42
|
+
T_100 = T_0+100 # Get steam point temperature as K
|
|
34
43
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
# Turbulent heat flux calculation
|
|
45
|
+
if ('t_u' in ds.keys()) and \
|
|
46
|
+
('p_u' in ds.keys()) and \
|
|
47
|
+
('rh_u_cor' in ds.keys()):
|
|
48
|
+
# Upper boom bulk calculation
|
|
49
|
+
T_h_u = ds['t_u'].copy() # Copy for processing
|
|
50
|
+
p_h_u = ds['p_u'].copy()
|
|
51
|
+
RH_cor_h_u = ds['rh_u_cor'].copy()
|
|
52
|
+
|
|
53
|
+
q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity
|
|
54
|
+
if ('wspd_u' in ds.keys()) and \
|
|
55
|
+
('t_surf' in ds.keys()) and \
|
|
56
|
+
('z_boom_u' in ds.keys()):
|
|
57
|
+
WS_h_u = ds['wspd_u'].copy()
|
|
58
|
+
Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters?
|
|
59
|
+
z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer
|
|
60
|
+
z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer
|
|
61
|
+
|
|
62
|
+
if not ds.attrs['bedrock']:
|
|
63
|
+
SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes
|
|
64
|
+
z_WS_u, z_T_u, q_h_u, p_h_u)
|
|
46
65
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
ds['dshf_u'] = (('time'), SHF_h_u.data)
|
|
57
|
-
ds['dlhf_u'] = (('time'), LHF_h_u.data)
|
|
58
|
-
q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg
|
|
59
|
-
q_h_u = cleanSpHumid(q_h_u, T_h_u, Tsurf_h, p_h_u, RH_cor_h_u) # Clean sp.humid values
|
|
60
|
-
ds['qh_u'] = (('time'), q_h_u.data)
|
|
66
|
+
ds['dshf_u'] = (('time'), SHF_h_u.data)
|
|
67
|
+
ds['dlhf_u'] = (('time'), LHF_h_u.data)
|
|
68
|
+
else:
|
|
69
|
+
logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes')
|
|
70
|
+
|
|
71
|
+
q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg
|
|
72
|
+
ds['qh_u'] = (('time'), q_h_u.data)
|
|
73
|
+
else:
|
|
74
|
+
logger.info('t_u, p_u or rh_u_cor missing, cannot calulate tubrulent heat fluxes')
|
|
61
75
|
|
|
62
76
|
# Lower boom bulk calculation
|
|
63
|
-
if ds.attrs['number_of_booms']==2:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
77
|
+
if ds.attrs['number_of_booms']==2:
|
|
78
|
+
if ('t_l' in ds.keys()) and \
|
|
79
|
+
('p_l' in ds.keys()) and \
|
|
80
|
+
('rh_l_cor' in ds.keys()):
|
|
81
|
+
T_h_l = ds['t_l'].copy() # Copy for processing
|
|
82
|
+
p_h_l = ds['p_l'].copy()
|
|
83
|
+
RH_cor_h_l = ds['rh_l_cor'].copy()
|
|
84
|
+
|
|
85
|
+
q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity
|
|
86
|
+
|
|
87
|
+
if ('wspd_l' in ds.keys()) and \
|
|
88
|
+
('t_surf' in ds.keys()) and \
|
|
89
|
+
('z_boom_l' in ds.keys()):
|
|
90
|
+
z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W
|
|
91
|
+
z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer
|
|
92
|
+
WS_h_l = ds['wspd_l'].copy()
|
|
93
|
+
if not ds.attrs['bedrock']:
|
|
94
|
+
SHF_h_l, LHF_h_l= calculate_tubulent_heat_fluxes(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes
|
|
95
|
+
z_WS_l, z_T_l, q_h_l, p_h_l)
|
|
96
|
+
|
|
97
|
+
ds['dshf_l'] = (('time'), SHF_h_l.data)
|
|
98
|
+
ds['dlhf_l'] = (('time'), LHF_h_l.data)
|
|
99
|
+
else:
|
|
100
|
+
logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes')
|
|
101
|
+
|
|
102
|
+
q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg
|
|
103
|
+
ds['qh_l'] = (('time'), q_h_l.data)
|
|
104
|
+
else:
|
|
105
|
+
logger.info('t_l, p_l or rh_l_cor missing, cannot calulate tubrulent heat fluxes')
|
|
106
|
+
|
|
107
|
+
if len(station_config)==0:
|
|
108
|
+
logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n')
|
|
109
|
+
|
|
110
|
+
# Smoothing and inter/extrapolation of GPS coordinates
|
|
111
|
+
for var in ['gps_lat', 'gps_lon', 'gps_alt']:
|
|
112
|
+
ds[var.replace('gps_','')] = ('time', gps_coordinate_postprocessing(ds, var, station_config))
|
|
113
|
+
|
|
114
|
+
# processing continuous surface height, ice surface height, snow height
|
|
115
|
+
try:
|
|
116
|
+
ds = process_surface_height(ds, data_adjustments_dir, station_config)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error("Error processing surface height at %s"%L2.attrs['station_id'])
|
|
119
|
+
logging.error(e, exc_info=True)
|
|
120
|
+
|
|
121
|
+
# making sure dataset has the attributes contained in the config files
|
|
122
|
+
if 'project' in station_config.keys():
|
|
123
|
+
ds.attrs['project'] = station_config['project']
|
|
124
|
+
else:
|
|
125
|
+
logger.error('No project info in station_config. Using \"PROMICE\".')
|
|
126
|
+
ds.attrs['project'] = "PROMICE"
|
|
127
|
+
|
|
128
|
+
if 'location_type' in station_config.keys():
|
|
129
|
+
ds.attrs['location_type'] = station_config['location_type']
|
|
130
|
+
else:
|
|
131
|
+
logger.error('No project info in station_config. Using \"ice sheet\".')
|
|
132
|
+
ds.attrs['location_type'] = "ice sheet"
|
|
95
133
|
|
|
96
134
|
return ds
|
|
97
135
|
|
|
98
136
|
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
|
|
137
|
+
def process_surface_height(ds, data_adjustments_dir, station_config={}):
|
|
138
|
+
"""
|
|
139
|
+
Process surface height data for different site types and create
|
|
140
|
+
surface height variables.
|
|
141
|
+
|
|
102
142
|
Parameters
|
|
103
143
|
----------
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
144
|
+
ds : xarray.Dataset
|
|
145
|
+
The dataset containing various measurements and attributes including
|
|
146
|
+
'site_type' which determines the type of site (e.g., 'ablation',
|
|
147
|
+
'accumulation', 'bedrock') and other relevant data variables such as
|
|
148
|
+
'z_boom_u', 'z_stake', 'z_pt_cor', etc.
|
|
149
|
+
|
|
111
150
|
Returns
|
|
112
151
|
-------
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
152
|
+
xarray.Dataset
|
|
153
|
+
The dataset with additional processed surface height variables:
|
|
154
|
+
'z_surf_1', 'z_surf_2', 'z_ice_surf', 'z_surf_combined', 'snow_height',
|
|
155
|
+
and possibly depth variables derived from temperature measurements.
|
|
156
|
+
"""
|
|
157
|
+
# Initialize surface height variables with NaNs
|
|
158
|
+
ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan)
|
|
159
|
+
ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan)
|
|
121
160
|
|
|
161
|
+
if ds.attrs['site_type'] == 'ablation':
|
|
162
|
+
# Calculate surface heights for ablation sites
|
|
163
|
+
ds['z_surf_1'] = 2.6 - ds['z_boom_u']
|
|
164
|
+
if ds.z_stake.notnull().any():
|
|
165
|
+
first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0]
|
|
166
|
+
ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
|
|
167
|
+
|
|
168
|
+
# Use corrected point data if available
|
|
169
|
+
if 'z_pt_cor' in ds.data_vars:
|
|
170
|
+
ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data)
|
|
171
|
+
|
|
172
|
+
else:
|
|
173
|
+
# Calculate surface heights for other site types
|
|
174
|
+
first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0]
|
|
175
|
+
ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
|
|
176
|
+
if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
|
|
177
|
+
first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
|
|
178
|
+
ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
|
|
179
|
+
if 'z_boom_l' in ds.data_vars:
|
|
180
|
+
# need a combine first because KAN_U switches from having a z_stake
|
|
181
|
+
# to having a z_boom_l
|
|
182
|
+
first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0]
|
|
183
|
+
ds['z_surf_2'] = ds['z_surf_2'].combine_first(
|
|
184
|
+
ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])
|
|
185
|
+
|
|
186
|
+
# Adjust data for the created surface height variables
|
|
187
|
+
ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
|
|
188
|
+
|
|
189
|
+
# Convert to dataframe and combine surface height variables
|
|
190
|
+
df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe()
|
|
191
|
+
|
|
192
|
+
(ds['z_surf_combined'], ds['z_ice_surf'],
|
|
193
|
+
ds['z_surf_1_adj'], ds['z_surf_2_adj']) = combine_surface_height(df_in, ds.attrs['site_type'])
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
if ds.attrs['site_type'] == 'ablation':
|
|
197
|
+
# Calculate rolling minimum for ice surface height and snow height
|
|
198
|
+
ts_interpolated = np.minimum(
|
|
199
|
+
xr.where(ds.z_ice_surf.notnull(),
|
|
200
|
+
ds.z_ice_surf,ds.z_surf_combined),
|
|
201
|
+
ds.z_surf_combined).to_series().resample('h').interpolate(limit=72)
|
|
202
|
+
|
|
203
|
+
if len(ts_interpolated)>24*7:
|
|
204
|
+
# Apply the rolling window with median calculation
|
|
205
|
+
z_ice_surf = (ts_interpolated
|
|
206
|
+
.rolling('14D', center=True, min_periods=1)
|
|
207
|
+
.median())
|
|
208
|
+
# Overprint the first and last 7 days with interpolated values
|
|
209
|
+
# because of edge effect of rolling windows
|
|
210
|
+
z_ice_surf.iloc[:24*7] = (ts_interpolated.iloc[:24*7]
|
|
211
|
+
.rolling('1D', center=True, min_periods=1)
|
|
212
|
+
.median().values)
|
|
213
|
+
z_ice_surf.iloc[-24*7:] = (ts_interpolated.iloc[-24*7:]
|
|
214
|
+
.rolling('1D', center=True, min_periods=1)
|
|
215
|
+
.median().values)
|
|
216
|
+
else:
|
|
217
|
+
z_ice_surf = (ts_interpolated
|
|
218
|
+
.rolling('1D', center=True, min_periods=1)
|
|
219
|
+
.median())
|
|
220
|
+
|
|
221
|
+
z_ice_surf = z_ice_surf.loc[ds.time]
|
|
222
|
+
# here we make sure that the periods where both z_stake and z_pt are
|
|
223
|
+
# missing are also missing in z_ice_surf
|
|
224
|
+
msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull()
|
|
225
|
+
z_ice_surf = z_ice_surf.where(msk)
|
|
226
|
+
|
|
227
|
+
# taking running minimum to get ice
|
|
228
|
+
z_ice_surf = z_ice_surf.cummin()
|
|
229
|
+
|
|
230
|
+
# filling gaps only if they are less than a year long and if values on both
|
|
231
|
+
# sides are less than 0.01 m appart
|
|
232
|
+
|
|
233
|
+
# Forward and backward fill to identify bounds of gaps
|
|
234
|
+
df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')
|
|
235
|
+
|
|
236
|
+
# Identify gaps and their start and end dates
|
|
237
|
+
gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
|
|
238
|
+
gaps['prev_value'] = df_filled.shift(1)
|
|
239
|
+
gaps['next_value'] = df_filled.shift(-1)
|
|
240
|
+
gaps['gap_start'] = gaps.index.to_series().shift(1)
|
|
241
|
+
gaps['gap_end'] = gaps.index.to_series().shift(-1)
|
|
242
|
+
gaps['gap_duration'] = (gaps['gap_end'] - gaps['gap_start']).dt.days
|
|
243
|
+
gaps['value_diff'] = (gaps['next_value'] - gaps['prev_value']).abs()
|
|
244
|
+
|
|
245
|
+
# Determine which gaps to fill
|
|
246
|
+
mask = (gaps['gap_duration'] < 365) & (gaps['value_diff'] < 0.01)
|
|
247
|
+
gaps_to_fill = gaps[mask].index
|
|
248
|
+
|
|
249
|
+
# Fill gaps in the original Series
|
|
250
|
+
z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]
|
|
251
|
+
|
|
252
|
+
# bringing the variable into the dataset
|
|
253
|
+
ds['z_ice_surf'] = z_ice_surf
|
|
254
|
+
|
|
255
|
+
ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
|
|
256
|
+
ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
|
|
257
|
+
elif ds.attrs['site_type'] in ['accumulation', 'bedrock']:
|
|
258
|
+
# Handle accumulation and bedrock site types
|
|
259
|
+
ds['z_ice_surf'] = ('time', ds['z_surf_1'].data * np.nan)
|
|
260
|
+
ds['snow_height'] = ds['z_surf_combined']
|
|
261
|
+
else:
|
|
262
|
+
# Log info for other site types
|
|
263
|
+
logger.info('other site type')
|
|
264
|
+
|
|
265
|
+
if ds.attrs['site_type'] != 'bedrock':
|
|
266
|
+
# Process ice temperature data and create depth variables
|
|
267
|
+
ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
|
|
268
|
+
vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars]
|
|
269
|
+
vars_out.append('t_i_10m')
|
|
270
|
+
df_out = get_thermistor_depth(
|
|
271
|
+
ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(),
|
|
272
|
+
ds.attrs['station_id'],
|
|
273
|
+
station_config)
|
|
274
|
+
for var in df_out.columns:
|
|
275
|
+
ds[var] = ('time', df_out[var].values)
|
|
276
|
+
|
|
277
|
+
return ds
|
|
278
|
+
|
|
279
|
+
def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
|
|
280
|
+
'''Combines the data from three sensor: the two sonic rangers and the
|
|
281
|
+
pressure transducer, to recreate the surface height, the ice surface height
|
|
282
|
+
and the snow depth through the years. For the accumulation sites, it is
|
|
283
|
+
only the average of the two sonic rangers (after manual adjustments to
|
|
284
|
+
correct maintenance shifts). For the ablation sites, first an ablation
|
|
285
|
+
period is estimated each year (either the period when z_pt_cor decreases
|
|
286
|
+
or JJA if no better estimate) then different adjustmnents are conducted
|
|
287
|
+
to stitch the three time series together: z_ice_surface (adjusted from
|
|
288
|
+
z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake)
|
|
289
|
+
are used in the ablation period while an average of z_surf_1 and z_surf_2
|
|
290
|
+
are used otherwise, after they are being adjusted to z_ice_surf at the end
|
|
291
|
+
of the ablation season.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
df : pandas.dataframe
|
|
296
|
+
Dataframe with datetime index and variables z_surf_1, z_surf_2 and z_ice_surf
|
|
297
|
+
site_type : str
|
|
298
|
+
Either 'accumulation' or 'ablation'
|
|
299
|
+
threshold_ablation : float
|
|
300
|
+
Threshold to which a z_pt_cor hourly decrease is compared. If the decrease
|
|
301
|
+
is higher, then there is ablation.
|
|
302
|
+
'''
|
|
303
|
+
logger.info('Combining surface height')
|
|
304
|
+
|
|
305
|
+
if 'z_surf_2' not in df.columns:
|
|
306
|
+
logger.info('-> did not find z_surf_2')
|
|
307
|
+
df["z_surf_2"] = df["z_surf_1"].values*np.nan
|
|
308
|
+
|
|
309
|
+
if 'z_ice_surf' not in df.columns:
|
|
310
|
+
logger.info('-> did not find z_ice_surf')
|
|
311
|
+
df["z_ice_surf"] = df["z_surf_1"].values*np.nan
|
|
312
|
+
|
|
313
|
+
if site_type in ['accumulation', 'bedrock']:
|
|
314
|
+
logger.info('-> no z_pt or accumulation site: averaging z_surf_1 and z_surf_2')
|
|
315
|
+
df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values
|
|
316
|
+
df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values
|
|
317
|
+
# adjusting z_surf_2 to z_surf_1
|
|
318
|
+
df["z_surf_2_adj"] = df["z_surf_2_adj"] + (df["z_surf_1_adj"]- df["z_surf_2_adj"]).mean()
|
|
319
|
+
# z_surf_combined is the average of the two z_surf
|
|
320
|
+
if df.z_surf_1_adj.notnull().any() & df.z_surf_2_adj.notnull().any():
|
|
321
|
+
df['z_surf_combined'] = df[['z_surf_1_adj', 'z_surf_2_adj']].mean(axis = 1).values
|
|
322
|
+
elif df.z_surf_1_adj.notnull().any():
|
|
323
|
+
df['z_surf_combined'] = df.z_surf_1_adj.values
|
|
324
|
+
elif df.z_surf_2_adj.notnull().any():
|
|
325
|
+
df['z_surf_combined'] = df.z_surf_2_adj.values
|
|
122
326
|
|
|
123
|
-
|
|
327
|
+
# df["z_surf_combined"] = hampel(df["z_surf_combined"].interpolate(limit=72)).values
|
|
328
|
+
return (df['z_surf_combined'], df["z_surf_combined"]*np.nan,
|
|
329
|
+
df["z_surf_1_adj"], df["z_surf_2_adj"])
|
|
330
|
+
|
|
331
|
+
else:
|
|
332
|
+
logger.info('-> ablation site')
|
|
333
|
+
# smoothing and filtering pressure transducer data
|
|
334
|
+
df["z_ice_surf_adj"] = hampel(df["z_ice_surf"].interpolate(limit=72)).values
|
|
335
|
+
df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values
|
|
336
|
+
df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values
|
|
337
|
+
|
|
338
|
+
df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72), k=24, t0=5).values
|
|
339
|
+
df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72), k=24, t0=5).values
|
|
340
|
+
|
|
341
|
+
# defining ice ablation period from the decrease of a smoothed version of z_pt
|
|
342
|
+
# meaning when smoothed_z_pt.diff() < threshold_ablation
|
|
343
|
+
# first smoothing
|
|
344
|
+
smoothed_PT = (df['z_ice_surf']
|
|
345
|
+
.resample('h')
|
|
346
|
+
.interpolate(limit=72)
|
|
347
|
+
.rolling('14D',center=True, min_periods=1)
|
|
348
|
+
.mean())
|
|
349
|
+
# second smoothing
|
|
350
|
+
smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean()
|
|
351
|
+
|
|
352
|
+
smoothed_PT = smoothed_PT.reindex(df.index,method='ffill')
|
|
353
|
+
# smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan
|
|
354
|
+
|
|
355
|
+
# logical index where ablation is detected
|
|
356
|
+
ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation,
|
|
357
|
+
np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9]))
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# finding the beginning and end of each period with True
|
|
361
|
+
idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
|
|
362
|
+
idx[:, 1] -= 1
|
|
363
|
+
|
|
364
|
+
# fill small gaps in the ice ablation periods.
|
|
365
|
+
for i in range(len(idx)-1):
|
|
366
|
+
ind = idx[i]
|
|
367
|
+
ind_next = idx[i+1]
|
|
368
|
+
# if the end of an ablation period is less than 60 days away from
|
|
369
|
+
# the next ablation, then it is still considered like the same ablation
|
|
370
|
+
# season
|
|
371
|
+
if df.index[ind_next[0]]-df.index[ind[1]]<pd.to_timedelta('60 days'):
|
|
372
|
+
ind_ablation[ind[1]:ind_next[0]]=True
|
|
373
|
+
|
|
374
|
+
# finding the beginning and end of each period with True
|
|
375
|
+
idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
|
|
376
|
+
idx[:, 1] -= 1
|
|
377
|
+
|
|
378
|
+
# because the smooth_PT sees 7 days ahead, it starts showing a decline
|
|
379
|
+
# 7 days in advance, we therefore need to exclude the first 7 days of
|
|
380
|
+
# each ablation period
|
|
381
|
+
for start, end in idx:
|
|
382
|
+
period_start = df.index[start]
|
|
383
|
+
period_end = period_start + pd.Timedelta(days=7)
|
|
384
|
+
exclusion_period = (df.index >= period_start) & (df.index < period_end)
|
|
385
|
+
ind_ablation[exclusion_period] = False
|
|
386
|
+
|
|
387
|
+
hs1=df["z_surf_1_adj"].interpolate(limit=24*2).copy()
|
|
388
|
+
hs2=df["z_surf_2_adj"].interpolate(limit=24*2).copy()
|
|
389
|
+
z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy()
|
|
390
|
+
|
|
391
|
+
# the surface heights are adjusted so that they start at 0
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
if any(~np.isnan(hs2.iloc[:24*7])):
|
|
395
|
+
hs2 = hs2 - hs2.iloc[:24*7].mean()
|
|
396
|
+
|
|
397
|
+
if any(~np.isnan(hs2.iloc[:24*7])) & any(~np.isnan(hs1.iloc[:24*7])):
|
|
398
|
+
hs2 = hs2 + hs1.iloc[:24*7].mean() - hs2.iloc[:24*7].mean()
|
|
399
|
+
|
|
400
|
+
if any(~np.isnan(z.iloc[:24*7])):
|
|
401
|
+
# expressing ice surface height relative to its mean value in the
|
|
402
|
+
# first week of the record
|
|
403
|
+
z = z - z.iloc[:24*7].mean()
|
|
404
|
+
elif z.notnull().any():
|
|
405
|
+
# if there is no data in the first week but that there are some
|
|
406
|
+
# PT data afterwards
|
|
407
|
+
if ((z.first_valid_index() - hs1.first_valid_index()) < pd.to_timedelta('251D')) &\
|
|
408
|
+
((z.first_valid_index() - hs1.first_valid_index()) > pd.to_timedelta('0H')):
|
|
409
|
+
# if the pressure transducer is installed the year after then
|
|
410
|
+
# we use the mean surface height 1 on its first week as a 0
|
|
411
|
+
# for the ice height
|
|
412
|
+
z = z - z.loc[
|
|
413
|
+
z.first_valid_index():(z.first_valid_index()+pd.to_timedelta('14D'))
|
|
414
|
+
].mean() + hs1.iloc[:24*7].mean()
|
|
415
|
+
else:
|
|
416
|
+
# if there is more than a year (actually 251 days) between the
|
|
417
|
+
# initiation of the AWS and the installation of the pressure transducer
|
|
418
|
+
# we remove the intercept in the pressure transducer data.
|
|
419
|
+
# Removing the intercept
|
|
420
|
+
# means that we consider the ice surface height at 0 when the AWS
|
|
421
|
+
# is installed, and not when the pressure transducer is installed.
|
|
422
|
+
Y = z.iloc[:].values.reshape(-1, 1)
|
|
423
|
+
X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1)
|
|
424
|
+
Y = Y[~np.isnan(Y)]
|
|
425
|
+
linear_regressor = LinearRegression()
|
|
426
|
+
linear_regressor.fit(X, Y)
|
|
427
|
+
Y_pred = linear_regressor.predict(z.index.astype(np.int64).values.reshape(-1, 1) )
|
|
428
|
+
z = z-Y_pred[0]
|
|
429
|
+
|
|
430
|
+
years = df.index.year.unique().values
|
|
431
|
+
ind_start = years.copy()
|
|
432
|
+
ind_end = years.copy()
|
|
433
|
+
logger.debug('-> estimating ablation period for each year')
|
|
434
|
+
for i, y in enumerate(years):
|
|
435
|
+
# for each year
|
|
436
|
+
ind_yr = df.index.year.values==y
|
|
437
|
+
ind_abl_yr = np.logical_and(ind_yr, ind_ablation)
|
|
438
|
+
|
|
439
|
+
if df.loc[
|
|
440
|
+
np.logical_and(ind_yr, df.index.month.isin([6,7,8])),
|
|
441
|
+
"z_ice_surf_adj"].isnull().all():
|
|
442
|
+
|
|
443
|
+
ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8]))
|
|
444
|
+
ind_ablation[ind_yr] = ind_abl_yr[ind_yr]
|
|
445
|
+
logger.debug(str(y)+' no z_ice_surf, just using JJA')
|
|
446
|
+
|
|
447
|
+
else:
|
|
448
|
+
logger.debug(str(y)+ ' derived from z_ice_surf')
|
|
449
|
+
|
|
450
|
+
if np.any(ind_abl_yr):
|
|
451
|
+
# if there are some ablation flagged for that year
|
|
452
|
+
# then find begining and end
|
|
453
|
+
ind_start[i] = np.argwhere(ind_abl_yr)[0][0]
|
|
454
|
+
ind_end[i] = np.argwhere(ind_abl_yr)[-1][0]
|
|
455
|
+
|
|
456
|
+
else:
|
|
457
|
+
logger.debug(str(y) + ' could not estimate ablation season')
|
|
458
|
+
# otherwise left as nan
|
|
459
|
+
ind_start[i] = -999
|
|
460
|
+
ind_end[i] = -999
|
|
461
|
+
|
|
462
|
+
# adjustement loop
|
|
463
|
+
missing_hs2 = 0 # if hs2 is missing then when it comes back it is adjusted to hs1
|
|
464
|
+
hs2_ref = 0 # by default, the PT is the reference: hs1 and 2 will be adjusted to PT
|
|
465
|
+
# but if it is missing one year or one winter, then it needs to be rajusted
|
|
466
|
+
# to hs1 and hs2 the year after.
|
|
467
|
+
|
|
468
|
+
for i, y in enumerate(years):
|
|
469
|
+
# if y == 2014:
|
|
470
|
+
# import pdb; pdb.set_trace()
|
|
471
|
+
logger.debug(str(y))
|
|
472
|
+
# defining subsets of hs1, hs2, z
|
|
473
|
+
hs1_jja = hs1[str(y)+'-06-01':str(y)+'-09-01']
|
|
474
|
+
hs2_jja = hs2[str(y)+'-06-01':str(y)+'-09-01']
|
|
475
|
+
z_jja = z[str(y)+'-06-01':str(y)+'-09-01']
|
|
476
|
+
|
|
477
|
+
z_ablation = z.iloc[ind_start[i]:ind_end[i]]
|
|
478
|
+
hs2_ablation = hs2.iloc[ind_start[i]:ind_end[i]]
|
|
479
|
+
|
|
480
|
+
hs1_year = hs1[str(y)]
|
|
481
|
+
hs2_year = hs2[str(y)]
|
|
482
|
+
|
|
483
|
+
hs2_winter = hs2[str(y)+'-01-01':str(y)+'-03-01'].copy()
|
|
484
|
+
z_winter = z[str(y)+'-01-01':str(y)+'-03-01'].copy()
|
|
485
|
+
|
|
486
|
+
z_year = z[str(y)]
|
|
487
|
+
if hs1_jja.isnull().all() and hs2_jja.isnull().all() and z_jja.isnull().all():
|
|
488
|
+
# if there is no height for a year between June and September
|
|
489
|
+
# then the adjustment cannot be made automatically
|
|
490
|
+
# it needs to be specified manually on the adjustment files
|
|
491
|
+
# on https://github.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues
|
|
492
|
+
continue
|
|
493
|
+
|
|
494
|
+
if all(np.isnan(z_jja)) and any(~np.isnan(hs2_jja)):
|
|
495
|
+
# if there is no PT for a given year, but there is some hs2
|
|
496
|
+
# then z will be adjusted to hs2 next time it is available
|
|
497
|
+
hs2_ref = 1
|
|
498
|
+
|
|
499
|
+
if all(np.isnan(z_winter)) and all(np.isnan(hs2_winter)):
|
|
500
|
+
# if there is no PT nor hs2 during the winter, then again
|
|
501
|
+
# we need to adjust z to match hs2 when ablation starts
|
|
502
|
+
hs2_ref = 1
|
|
503
|
+
|
|
504
|
+
# adjustment at the start of the ablation season
|
|
505
|
+
if hs2_ref:
|
|
506
|
+
# if hs2 has been taken as reference in the previous years
|
|
507
|
+
# then we check if pressure transducer is reinstalled and needs
|
|
508
|
+
# to be adjusted
|
|
509
|
+
if ind_start[i] != -999:
|
|
510
|
+
# the first year there is both ablation and PT data available
|
|
511
|
+
# then PT is adjusted to hs2
|
|
512
|
+
if any(~np.isnan(z_ablation)) and any(~np.isnan(hs2_ablation)):
|
|
513
|
+
tmp1 = z_ablation.copy()
|
|
514
|
+
tmp2 = hs2_ablation.copy()
|
|
515
|
+
# tmp1[np.isnan(tmp2)] = np.nan
|
|
516
|
+
# tmp2[np.isnan(tmp1)] = np.nan
|
|
517
|
+
|
|
518
|
+
# in some instances, the PT data is available but no ablation
|
|
519
|
+
# is recorded, then hs2 remains the reference during that time.
|
|
520
|
+
# When eventually there is ablation, then we need to find the
|
|
521
|
+
# first index in these preceding ablation-free years
|
|
522
|
+
# the shift will be applied back from this point
|
|
523
|
+
# first_index = z[:z[str(y)].first_valid_index()].isnull().iloc[::-1].idxmax()
|
|
524
|
+
# z[first_index:] = z[first_index:] - np.nanmean(tmp1) + np.nanmean(tmp2)
|
|
525
|
+
# hs2_ref = 0 # from now on PT is the reference
|
|
526
|
+
|
|
527
|
+
# in some other instance, z just need to be adjusted to hs2
|
|
528
|
+
# first_index = z[str(y)].first_valid_index()
|
|
529
|
+
first_index = z.iloc[ind_start[i]:].first_valid_index() # of ablation
|
|
530
|
+
if np.isnan(hs2[first_index]):
|
|
531
|
+
first_index_2 = hs2.iloc[ind_start[i]:].first_valid_index()
|
|
532
|
+
if (first_index_2 - first_index)>pd.Timedelta('30d'):
|
|
533
|
+
logger.debug('adjusting z to hs1')
|
|
534
|
+
if np.isnan(hs1[first_index]):
|
|
535
|
+
first_index = hs1.iloc[ind_start[i]:].first_valid_index()
|
|
536
|
+
z[first_index:] = z[first_index:] - z[first_index] + hs1[first_index]
|
|
537
|
+
else:
|
|
538
|
+
logger.debug('adjusting z to hs1')
|
|
539
|
+
first_index = hs2.iloc[ind_start[i]:].first_valid_index()
|
|
540
|
+
z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index]
|
|
541
|
+
else:
|
|
542
|
+
logger.debug('adjusting z to hs1')
|
|
543
|
+
z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index]
|
|
544
|
+
hs2_ref = 0 # from now on PT is the reference
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
else:
|
|
548
|
+
# if z_pt is the reference and there is some ablation
|
|
549
|
+
# then hs1 and hs2 are adjusted to z_pt
|
|
550
|
+
if (ind_start[i] != -999) & z_year.notnull().any():
|
|
551
|
+
# calculating first index with PT, hs1 and hs2
|
|
552
|
+
first_index = z_year.first_valid_index()
|
|
553
|
+
if hs1_year.notnull().any():
|
|
554
|
+
first_index = np.max(np.array(
|
|
555
|
+
[first_index,
|
|
556
|
+
hs1_year.first_valid_index()]))
|
|
557
|
+
if hs2_year.notnull().any():
|
|
558
|
+
first_index = np.max(np.array(
|
|
559
|
+
[first_index,
|
|
560
|
+
hs2_year.first_valid_index()]))
|
|
561
|
+
|
|
562
|
+
# if PT, hs1 and hs2 are all nan until station is reactivated, then
|
|
563
|
+
first_day_of_year = pd.to_datetime(str(y)+'-01-01')
|
|
564
|
+
|
|
565
|
+
if len(z[first_day_of_year:first_index-pd.to_timedelta('1D')])>0:
|
|
566
|
+
if z[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \
|
|
567
|
+
hs1[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \
|
|
568
|
+
hs2[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all():
|
|
569
|
+
if (~np.isnan(np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])) \
|
|
570
|
+
and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))):
|
|
571
|
+
logger.debug(' ======= adjusting hs1 and hs2 to z_pt')
|
|
572
|
+
if ~np.isnan(np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) ):
|
|
573
|
+
hs1[first_index:] = hs1[first_index:] \
|
|
574
|
+
- np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) \
|
|
575
|
+
+ np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])
|
|
576
|
+
if ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]) ):
|
|
577
|
+
hs2[first_index:] = hs2[first_index:] \
|
|
578
|
+
- np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]) \
|
|
579
|
+
+ np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])
|
|
580
|
+
|
|
581
|
+
# adjustment taking place at the end of the ablation period
|
|
582
|
+
if (ind_end[i] != -999):
|
|
583
|
+
# if y == 2023:
|
|
584
|
+
# import pdb; pdb.set_trace()
|
|
585
|
+
# if there's ablation and
|
|
586
|
+
# if there are PT data available at the end of the melt season
|
|
587
|
+
if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any():
|
|
588
|
+
logger.debug('adjusting hs2 to z')
|
|
589
|
+
# then we adjust hs2 to the end-of-ablation z
|
|
590
|
+
# first trying at the end of melt season
|
|
591
|
+
if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])):
|
|
592
|
+
logger.debug('using end of melt season')
|
|
593
|
+
hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \
|
|
594
|
+
np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) + \
|
|
595
|
+
np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])
|
|
596
|
+
# if not possible, then trying the end of the following accumulation season
|
|
597
|
+
elif (i+1 < len(ind_start)):
|
|
598
|
+
if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])):
|
|
599
|
+
logger.debug('using end of accumulation season')
|
|
600
|
+
hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \
|
|
601
|
+
np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \
|
|
602
|
+
np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
|
|
603
|
+
else:
|
|
604
|
+
logger.debug('no ablation')
|
|
605
|
+
hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
606
|
+
hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
607
|
+
if all(np.isnan(hs2_following_winter)):
|
|
608
|
+
logger.debug('no hs2')
|
|
609
|
+
missing_hs2 = 1
|
|
610
|
+
elif missing_hs2 == 1:
|
|
611
|
+
logger.debug('adjusting hs2')
|
|
612
|
+
# and if there are some hs2 during the accumulation period
|
|
613
|
+
if any(~np.isnan(hs1_following_winter)):
|
|
614
|
+
logger.debug('to hs1')
|
|
615
|
+
# then we adjust hs1 to hs2 during the accumulation area
|
|
616
|
+
# adjustment is done so that the mean hs1 and mean hs2 match
|
|
617
|
+
# for the period when both are available
|
|
618
|
+
hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan
|
|
619
|
+
hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan
|
|
620
|
+
|
|
621
|
+
hs2[str(y)+'-01-01':] = hs2[str(y)+'-01-01':] \
|
|
622
|
+
- np.nanmean(hs2_following_winter) + np.nanmean(hs1_following_winter)
|
|
623
|
+
missing_hs2 = 0
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
627
|
+
hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
628
|
+
# adjusting hs1 to hs2 (no ablation case)
|
|
629
|
+
if any(~np.isnan(hs1_following_winter)):
|
|
630
|
+
logger.debug('adjusting hs1')
|
|
631
|
+
# and if there are some hs2 during the accumulation period
|
|
632
|
+
if any(~np.isnan(hs2_following_winter)):
|
|
633
|
+
logger.debug('to hs2')
|
|
634
|
+
# then we adjust hs1 to hs2 during the accumulation area
|
|
635
|
+
# adjustment is done so that the mean hs1 and mean hs2 match
|
|
636
|
+
# for the period when both are available
|
|
637
|
+
hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan
|
|
638
|
+
hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan
|
|
639
|
+
|
|
640
|
+
hs1[str(y)+'-09-01':] = hs1[str(y)+'-09-01':] \
|
|
641
|
+
- np.nanmean(hs1_following_winter) + np.nanmean(hs2_following_winter)
|
|
642
|
+
hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
643
|
+
|
|
644
|
+
if ind_end[i] != -999:
|
|
645
|
+
# if there is some hs1
|
|
646
|
+
hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
647
|
+
hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
|
|
648
|
+
if any(~np.isnan(hs1_following_winter)):
|
|
649
|
+
logger.debug('adjusting hs1')
|
|
650
|
+
# and if there are some hs2 during the accumulation period
|
|
651
|
+
if any(~np.isnan(hs2_following_winter)):
|
|
652
|
+
logger.debug('to hs2, minimizing winter difference')
|
|
653
|
+
# then we adjust hs1 to hs2 during the accumulation area
|
|
654
|
+
# adjustment is done so that the mean hs1 and mean hs2 match
|
|
655
|
+
# for the period when both are available
|
|
656
|
+
tmp1 = hs1.iloc[ind_end[i]:min(len(hs1),ind_end[i]+24*30*9)].copy()
|
|
657
|
+
tmp2 = hs2.iloc[ind_end[i]:min(len(hs2),ind_end[i]+24*30*9)].copy()
|
|
658
|
+
|
|
659
|
+
tmp1[np.isnan(tmp2)] = np.nan
|
|
660
|
+
tmp2[np.isnan(tmp1)] = np.nan
|
|
661
|
+
if tmp1.isnull().all():
|
|
662
|
+
tmp1 = hs1_following_winter.copy()
|
|
663
|
+
tmp2 = hs2_following_winter.copy()
|
|
664
|
+
|
|
665
|
+
tmp1[np.isnan(tmp2)] = np.nan
|
|
666
|
+
tmp2[np.isnan(tmp1)] = np.nan
|
|
667
|
+
hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - np.nanmean(tmp1) + np.nanmean(tmp2)
|
|
668
|
+
|
|
669
|
+
# if no hs2, then use PT data available at the end of the melt season
|
|
670
|
+
elif np.any(~np.isnan(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*7)])):
|
|
671
|
+
logger.debug('to z')
|
|
672
|
+
# then we adjust hs2 to the end-of-ablation z
|
|
673
|
+
# first trying at the end of melt season
|
|
674
|
+
if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])):
|
|
675
|
+
logger.debug('using end of melt season')
|
|
676
|
+
hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \
|
|
677
|
+
np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)]) + \
|
|
678
|
+
np.nanmean(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])
|
|
679
|
+
# if not possible, then trying the end of the following accumulation season
|
|
680
|
+
elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])):
|
|
681
|
+
logger.debug('using end of accumulation season')
|
|
682
|
+
hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \
|
|
683
|
+
np.nanmean(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]) + \
|
|
684
|
+
np.nanmean(z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])
|
|
685
|
+
elif any(~np.isnan(hs2_year)):
|
|
686
|
+
logger.debug('to the last value of hs2')
|
|
687
|
+
# then we adjust hs1 to hs2 during the accumulation area
|
|
688
|
+
# adjustment is done so that the mean hs1 and mean hs2 match
|
|
689
|
+
# for the period when both are available
|
|
690
|
+
half_span = pd.to_timedelta('7D')
|
|
691
|
+
tmp1 = hs1_year.loc[(hs2_year.last_valid_index()-half_span):(hs2_year.last_valid_index()+half_span)].copy()
|
|
692
|
+
tmp2 = hs2_year.loc[(hs2_year.last_valid_index()-half_span):(hs2_year.last_valid_index()+half_span)].copy()
|
|
693
|
+
|
|
694
|
+
hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - np.nanmean(tmp1) + np.nanmean(tmp2)
|
|
695
|
+
|
|
696
|
+
df["z_surf_1_adj"] = hs1.interpolate(limit=2*24).values
|
|
697
|
+
df["z_surf_2_adj"] = hs2.interpolate(limit=2*24).values
|
|
698
|
+
df["z_ice_surf_adj"] = z.interpolate(limit=2*24).values
|
|
699
|
+
|
|
700
|
+
# making a summary of the surface height
|
|
701
|
+
df["z_surf_combined"] = np.nan
|
|
702
|
+
|
|
703
|
+
# in winter, both SR1 and SR2 are used
|
|
704
|
+
df["z_surf_combined"] = df["z_surf_2_adj"].interpolate(limit=72).values
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# in ablation season we use SR2 instead of the SR1&2 average
|
|
708
|
+
# here two options:
|
|
709
|
+
# 1) we ignore the SR1 and only use SR2
|
|
710
|
+
# 2) we use SR1 when SR2 is not available (commented)
|
|
711
|
+
# the later one can cause jumps when SR2 starts to be available few days after SR1
|
|
712
|
+
data_update = df[["z_surf_1_adj", "z_surf_2_adj"]].mean(axis=1).values
|
|
713
|
+
|
|
714
|
+
ind_update = ~ind_ablation
|
|
715
|
+
#ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update))
|
|
716
|
+
df.loc[ind_update,"z_surf_combined"] = data_update[ind_update]
|
|
717
|
+
|
|
718
|
+
# in ablation season we use pressure transducer over all other options
|
|
719
|
+
data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values
|
|
720
|
+
ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update))
|
|
721
|
+
df.loc[ind_update,"z_surf_combined"] = data_update[ind_update]
|
|
722
|
+
|
|
723
|
+
logger.info('surface height combination finished')
|
|
724
|
+
return df['z_surf_combined'], df["z_ice_surf_adj"], df["z_surf_1_adj"], df["z_surf_2_adj"]
|
|
725
|
+
|
|
726
|
+
def hampel(vals_orig, k=7*24, t0=15):
|
|
727
|
+
'''
|
|
728
|
+
vals: pandas series of values from which to remove outliers
|
|
729
|
+
k: size of window (including the sample; 7 is equal to 3 on either side of value)
|
|
730
|
+
'''
|
|
731
|
+
#Make copy so original not edited
|
|
732
|
+
vals=vals_orig.copy()
|
|
733
|
+
#Hampel Filter
|
|
734
|
+
L= 1.4826
|
|
735
|
+
rolling_median=vals.rolling(k).median()
|
|
736
|
+
difference=np.abs(rolling_median-vals)
|
|
737
|
+
median_abs_deviation=difference.rolling(k).median()
|
|
738
|
+
threshold= t0 *L * median_abs_deviation
|
|
739
|
+
outlier_idx=difference>threshold
|
|
740
|
+
outlier_idx[0:round(k/2)]=False
|
|
741
|
+
vals.loc[outlier_idx]=np.nan
|
|
742
|
+
return(vals)
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def get_thermistor_depth(df_in, site, station_config):
|
|
746
|
+
'''Calculates the depth of the thermistors through time based on their
|
|
747
|
+
installation depth (collected in a google sheet) and on the change of surface
|
|
748
|
+
height: instruments getting buried under new snow or surfacing due to ablation.
|
|
749
|
+
There is a potential for additional filtering of thermistor data for surfaced
|
|
750
|
+
(or just noisy) thermistors, but that is currently deactivated because slow.
|
|
751
|
+
|
|
752
|
+
Parameters
|
|
753
|
+
----------
|
|
754
|
+
df_in : pandas:dataframe
|
|
755
|
+
dataframe containing the ice/firn temperature t_i_* as well as the
|
|
756
|
+
combined surface height z_surf_combined
|
|
757
|
+
site : str
|
|
758
|
+
stid, so that maintenance date and sensor installation depths can be found
|
|
759
|
+
in database
|
|
760
|
+
station_config : dict
|
|
761
|
+
potentially containing the key string_maintenance
|
|
762
|
+
with station_config["string_maintenance"] being a list of dictionaries
|
|
763
|
+
containing maintenance information in the format:
|
|
764
|
+
[
|
|
765
|
+
{"date": "2007-08-20", "installation_depth": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0]},
|
|
766
|
+
{"date": "2008-07-17", "installation_depth": [1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 10.2]}
|
|
767
|
+
# Add more entries as needed
|
|
768
|
+
]
|
|
769
|
+
'''
|
|
770
|
+
|
|
771
|
+
temp_cols_name = ['t_i_'+str(i) for i in range(12) if 't_i_'+str(i) in df_in.columns]
|
|
772
|
+
num_therm = len(temp_cols_name)
|
|
773
|
+
depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)]
|
|
774
|
+
|
|
775
|
+
if df_in['z_surf_combined'].isnull().all():
|
|
776
|
+
logger.info('No valid surface height at '+site+', cannot calculate thermistor depth')
|
|
777
|
+
df_in[depth_cols_name + ['t_i_10m']] = np.nan
|
|
778
|
+
else:
|
|
779
|
+
logger.info('Calculating thermistor depth')
|
|
780
|
+
|
|
781
|
+
# Convert maintenance_info to DataFrame for easier manipulation
|
|
782
|
+
maintenance_string = pd.DataFrame(
|
|
783
|
+
station_config.get("string_maintenance",[]),
|
|
784
|
+
columns = ['date', 'installation_depths']
|
|
785
|
+
)
|
|
786
|
+
maintenance_string["date"] = pd.to_datetime(maintenance_string["date"])
|
|
787
|
+
maintenance_string = maintenance_string.sort_values(by='date', ascending=True)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
if num_therm == 8:
|
|
791
|
+
ini_depth = [1, 2, 3, 4, 5, 6, 7, 10]
|
|
792
|
+
else:
|
|
793
|
+
ini_depth = [0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]
|
|
794
|
+
df_in[depth_cols_name] = np.nan
|
|
795
|
+
|
|
796
|
+
# filtering the surface height
|
|
797
|
+
surface_height = df_in["z_surf_combined"].copy()
|
|
798
|
+
ind_filter = surface_height.rolling(window=14, center=True).var() > 0.1
|
|
799
|
+
if any(ind_filter):
|
|
800
|
+
surface_height[ind_filter] = np.nan
|
|
801
|
+
df_in["z_surf_combined"] = surface_height.values
|
|
802
|
+
z_surf_interp = df_in["z_surf_combined"].interpolate()
|
|
803
|
+
|
|
804
|
+
# first initialization of the depths
|
|
805
|
+
for i, col in enumerate(depth_cols_name):
|
|
806
|
+
df_in[col] = (
|
|
807
|
+
ini_depth[i]
|
|
808
|
+
+ z_surf_interp.values
|
|
809
|
+
- z_surf_interp[z_surf_interp.first_valid_index()]
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# reseting depth at maintenance
|
|
813
|
+
if len(maintenance_string.date) == 0:
|
|
814
|
+
logger.info("No maintenance at "+site)
|
|
815
|
+
|
|
816
|
+
for date in maintenance_string.date:
|
|
817
|
+
if date > z_surf_interp.last_valid_index():
|
|
818
|
+
continue
|
|
819
|
+
new_depth = maintenance_string.loc[
|
|
820
|
+
maintenance_string.date == date
|
|
821
|
+
].installation_depths.values[0]
|
|
822
|
+
|
|
823
|
+
for i, col in enumerate(depth_cols_name[:len(new_depth)]):
|
|
824
|
+
tmp = df_in[col].copy()
|
|
825
|
+
tmp.loc[date:] = (
|
|
826
|
+
new_depth[i]
|
|
827
|
+
+ z_surf_interp[date:].values
|
|
828
|
+
- z_surf_interp[date:][
|
|
829
|
+
z_surf_interp[date:].first_valid_index()
|
|
830
|
+
]
|
|
831
|
+
)
|
|
832
|
+
df_in[col] = tmp.values
|
|
833
|
+
|
|
834
|
+
# % Filtering thermistor data
|
|
835
|
+
for i in range(len(temp_cols_name)):
|
|
836
|
+
tmp = df_in[temp_cols_name[i]].copy()
|
|
837
|
+
|
|
838
|
+
# variance filter
|
|
839
|
+
# ind_filter = (
|
|
840
|
+
# df_in[temp_cols_name[i]]
|
|
841
|
+
# .interpolate(limit=14)
|
|
842
|
+
# .rolling(window=7)
|
|
843
|
+
# .var()
|
|
844
|
+
# > 0.5
|
|
845
|
+
# )
|
|
846
|
+
# month = (
|
|
847
|
+
# df_in[temp_cols_name[i]].interpolate(limit=14).index.month.values
|
|
848
|
+
# )
|
|
849
|
+
# ind_filter.loc[np.isin(month, [5, 6, 7])] = False
|
|
850
|
+
# if any(ind_filter):
|
|
851
|
+
# tmp.loc[ind_filter] = np.nan
|
|
852
|
+
|
|
853
|
+
# before and after maintenance adaptation filter
|
|
854
|
+
if len(maintenance_string.date) > 0:
|
|
855
|
+
for date in maintenance_string.date:
|
|
856
|
+
if isinstance(
|
|
857
|
+
maintenance_string.loc[
|
|
858
|
+
maintenance_string.date == date
|
|
859
|
+
].installation_depths.values[0],
|
|
860
|
+
str,
|
|
861
|
+
):
|
|
862
|
+
ind_adapt = np.abs(
|
|
863
|
+
tmp.interpolate(limit=14).index.values
|
|
864
|
+
- pd.to_datetime(date).to_datetime64()
|
|
865
|
+
) < np.timedelta64(7, "D")
|
|
866
|
+
if any(ind_adapt):
|
|
867
|
+
tmp.loc[ind_adapt] = np.nan
|
|
868
|
+
|
|
869
|
+
# surfaced thermistor
|
|
870
|
+
ind_pos = df_in[depth_cols_name[i]] < 0.1
|
|
871
|
+
if any(ind_pos):
|
|
872
|
+
tmp.loc[ind_pos] = np.nan
|
|
873
|
+
|
|
874
|
+
# copying the filtered values to the original table
|
|
875
|
+
df_in[temp_cols_name[i]] = tmp.values
|
|
876
|
+
|
|
877
|
+
# removing negative depth
|
|
878
|
+
df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan
|
|
879
|
+
logger.info("interpolating 10 m firn/ice temperature")
|
|
880
|
+
df_in['t_i_10m'] = interpolate_temperature(
|
|
881
|
+
df_in.index.values,
|
|
882
|
+
df_in[depth_cols_name].values.astype(float),
|
|
883
|
+
df_in[temp_cols_name].values.astype(float),
|
|
884
|
+
kind="linear",
|
|
885
|
+
min_diff_to_depth=1.5,
|
|
886
|
+
).set_index('date').values
|
|
887
|
+
|
|
888
|
+
# filtering
|
|
889
|
+
ind_pos = df_in["t_i_10m"] > 0.1
|
|
890
|
+
ind_low = df_in["t_i_10m"] < -70
|
|
891
|
+
df_in.loc[ind_pos, "t_i_10m"] = np.nan
|
|
892
|
+
df_in.loc[ind_low, "t_i_10m"] = np.nan
|
|
893
|
+
|
|
894
|
+
return df_in[depth_cols_name + ['t_i_10m']]
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=2,
|
|
898
|
+
kind="quadratic"):
|
|
899
|
+
'''Calculates the depth of the thermistors through time based on their
|
|
900
|
+
installation depth (collected in a google sheet) and on the change of surface
|
|
901
|
+
height: instruments getting buried under new snow or surfacing due to ablation.
|
|
902
|
+
There is a potential for additional filtering of thermistor data for surfaced
|
|
903
|
+
(or just noisy) thermistors, but that is currently deactivated because slow.
|
|
904
|
+
|
|
905
|
+
Parameters
|
|
906
|
+
----------
|
|
907
|
+
dates : numpy.array
|
|
908
|
+
array of datetime64
|
|
909
|
+
depth_cor : numpy.ndarray
|
|
910
|
+
matrix of depths
|
|
911
|
+
temp : numpy.ndarray
|
|
912
|
+
matrix of temperatures
|
|
913
|
+
depth : float
|
|
914
|
+
constant depth at which (depth_cor, temp) should be interpolated.
|
|
915
|
+
min_diff_to_depth: float
|
|
916
|
+
maximum difference allowed between the available depht and the target depth
|
|
917
|
+
for the interpolation to be done.
|
|
918
|
+
kind : str
|
|
919
|
+
type of interpolation from scipy.interpolate.interp1d
|
|
920
|
+
'''
|
|
921
|
+
|
|
922
|
+
depth_cor = depth_cor.astype(float)
|
|
923
|
+
df_interp = pd.DataFrame()
|
|
924
|
+
df_interp["date"] = dates
|
|
925
|
+
df_interp["temperatureObserved"] = np.nan
|
|
926
|
+
|
|
927
|
+
# preprocessing temperatures for small gaps
|
|
928
|
+
tmp = pd.DataFrame(temp)
|
|
929
|
+
tmp["time"] = dates
|
|
930
|
+
tmp = tmp.set_index("time")
|
|
931
|
+
# tmp = tmp.resample("H").mean()
|
|
932
|
+
# tmp = tmp.interpolate(limit=24*7)
|
|
933
|
+
temp = tmp.loc[dates].values
|
|
934
|
+
for i in (range(len(dates))):
|
|
935
|
+
x = depth_cor[i, :].astype(float)
|
|
936
|
+
y = temp[i, :].astype(float)
|
|
937
|
+
ind_no_nan = ~np.isnan(x + y)
|
|
938
|
+
x = x[ind_no_nan]
|
|
939
|
+
y = y[ind_no_nan]
|
|
940
|
+
x, indices = np.unique(x, return_index=True)
|
|
941
|
+
y = y[indices]
|
|
942
|
+
if len(x) < 2 or np.min(np.abs(x - depth)) > min_diff_to_depth:
|
|
943
|
+
continue
|
|
944
|
+
f = interp1d(x, y, kind, fill_value="extrapolate")
|
|
945
|
+
df_interp.iloc[i, 1] = np.min(f(depth), 0)
|
|
946
|
+
|
|
947
|
+
if df_interp.iloc[:5, 1].std() > 0.1:
|
|
948
|
+
df_interp.iloc[:5, 1] = np.nan
|
|
949
|
+
|
|
950
|
+
return df_interp
|
|
951
|
+
|
|
952
|
+
def gps_coordinate_postprocessing(ds, var, station_config={}):
|
|
953
|
+
# saving the static value of 'lat','lon' or 'alt' stored in attribute
|
|
954
|
+
# as it might be the only coordinate available for certain stations (e.g. bedrock)
|
|
955
|
+
var_out = var.replace('gps_','')
|
|
956
|
+
coord_names = {'lat':'latitude','lon':'longitude', 'alt':'altitude'}
|
|
957
|
+
if coord_names[var_out] in list(ds.attrs.keys()):
|
|
958
|
+
static_value = float(ds.attrs[coord_names[var_out]])
|
|
959
|
+
else:
|
|
960
|
+
static_value = np.nan
|
|
961
|
+
|
|
962
|
+
# if there is no gps observations, then we use the static value repeated
|
|
963
|
+
# for each time stamp
|
|
964
|
+
if var not in ds.data_vars:
|
|
965
|
+
print('no',var,'at', ds.attrs['station_id'])
|
|
966
|
+
return np.ones_like(ds['t_u'].data)*static_value
|
|
967
|
+
|
|
968
|
+
if ds[var].isnull().all():
|
|
969
|
+
print('no',var,'at',ds.attrs['station_id'])
|
|
970
|
+
return np.ones_like(ds['t_u'].data)*static_value
|
|
971
|
+
|
|
972
|
+
# Extract station relocations from the config dict
|
|
973
|
+
station_relocations = station_config.get("station_relocation", [])
|
|
974
|
+
|
|
975
|
+
# Convert the ISO8601 strings to pandas datetime objects
|
|
976
|
+
breaks = [pd.to_datetime(date_str) for date_str in station_relocations]
|
|
977
|
+
if len(breaks)==0:
|
|
978
|
+
logger.info('processing '+var+' without relocation')
|
|
979
|
+
else:
|
|
980
|
+
logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks]))
|
|
981
|
+
|
|
982
|
+
return piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)
|
|
983
|
+
|
|
984
|
+
def piecewise_smoothing_and_interpolation(data_series, breaks):
|
|
985
|
+
'''Smoothes, inter- or extrapolate the GPS observations. The processing is
|
|
986
|
+
done piecewise so that each period between station relocations are done
|
|
987
|
+
separately (no smoothing of the jump due to relocation). Piecewise linear
|
|
988
|
+
regression is then used to smooth the available observations. Then this
|
|
989
|
+
smoothed curve is interpolated linearly over internal gaps. Eventually, this
|
|
990
|
+
interpolated curve is extrapolated linearly for timestamps before the first
|
|
991
|
+
valid measurement and after the last valid measurement.
|
|
992
|
+
|
|
993
|
+
Parameters
|
|
994
|
+
----------
|
|
995
|
+
data_series : pandas.Series
|
|
996
|
+
Series of observed latitude, longitude or elevation with datetime index.
|
|
997
|
+
breaks: list
|
|
998
|
+
List of timestamps of station relocation. First and last item should be
|
|
999
|
+
None so that they can be used in slice(breaks[i], breaks[i+1])
|
|
1000
|
+
|
|
1001
|
+
Returns
|
|
1002
|
+
-------
|
|
1003
|
+
np.ndarray
|
|
1004
|
+
Smoothed and interpolated values corresponding to the input series.
|
|
1005
|
+
'''
|
|
1006
|
+
df_all = pd.Series(dtype=float) # Initialize an empty Series to gather all smoothed pieces
|
|
1007
|
+
breaks = [None] + breaks + [None]
|
|
1008
|
+
_inferred_series = []
|
|
1009
|
+
for i in range(len(breaks) - 1):
|
|
1010
|
+
df = data_series.loc[slice(breaks[i], breaks[i+1])]
|
|
1011
|
+
|
|
1012
|
+
# Drop NaN values and calculate the number of segments based on valid data
|
|
1013
|
+
df_valid = df.dropna()
|
|
1014
|
+
if df_valid.shape[0] > 2:
|
|
1015
|
+
# Fit linear regression model to the valid data range
|
|
1016
|
+
x = pd.to_numeric(df_valid.index).values.reshape(-1, 1)
|
|
1017
|
+
y = df_valid.values.reshape(-1, 1)
|
|
1018
|
+
|
|
1019
|
+
model = LinearRegression()
|
|
1020
|
+
model.fit(x, y)
|
|
1021
|
+
|
|
1022
|
+
# Predict using the model for the entire segment range
|
|
1023
|
+
x_pred = pd.to_numeric(df.index).values.reshape(-1, 1)
|
|
1024
|
+
|
|
1025
|
+
y_pred = model.predict(x_pred)
|
|
1026
|
+
df = pd.Series(y_pred.flatten(), index=df.index)
|
|
1027
|
+
# adds to list the predicted values for the current segment
|
|
1028
|
+
_inferred_series.append(df)
|
|
1029
|
+
|
|
1030
|
+
df_all = pd.concat(_inferred_series)
|
|
1031
|
+
|
|
1032
|
+
# Fill internal gaps with linear interpolation
|
|
1033
|
+
df_all = df_all.interpolate(method='linear', limit_area='inside')
|
|
1034
|
+
|
|
1035
|
+
# Remove duplicate indices and return values as numpy array
|
|
1036
|
+
df_all = df_all[~df_all.index.duplicated(keep='last')]
|
|
1037
|
+
return df_all.values
|
|
1038
|
+
|
|
1039
|
+
def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h,
|
|
124
1040
|
kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622,
|
|
125
1041
|
gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7,
|
|
126
|
-
bb=0.75, cc=5., dd=0.35):
|
|
1042
|
+
bb=0.75, cc=5., dd=0.35, R_d=287.05):
|
|
127
1043
|
'''Calculate latent and sensible heat flux using the bulk calculation
|
|
128
1044
|
method
|
|
129
1045
|
|
|
130
1046
|
Parameters
|
|
131
1047
|
----------
|
|
132
1048
|
T_0 : int
|
|
133
|
-
|
|
1049
|
+
Freezing point temperature
|
|
134
1050
|
T_h : xarray.DataArray
|
|
135
1051
|
Air temperature
|
|
136
1052
|
Tsurf_h : xarray.DataArray
|
|
@@ -143,8 +1059,6 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h,
|
|
|
143
1059
|
Height of anemometer
|
|
144
1060
|
z_T : float
|
|
145
1061
|
Height of thermometer
|
|
146
|
-
nu : float
|
|
147
|
-
Kinematic viscosity of air
|
|
148
1062
|
q_h : xarray.DataArray
|
|
149
1063
|
Specific humidity
|
|
150
1064
|
p_h : xarray.DataArray
|
|
@@ -159,7 +1073,7 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h,
|
|
|
159
1073
|
g : int
|
|
160
1074
|
Gravitational acceleration (m/s2). Default is 9.82.
|
|
161
1075
|
es_0 : int
|
|
162
|
-
Saturation vapour pressure at the melting point (hPa). Default is 6.1071.
|
|
1076
|
+
Saturation vapour pressure at the melting point (hPa). Default is 6.1071.
|
|
163
1077
|
eps : int
|
|
164
1078
|
Ratio of molar masses of vapor and dry air (0.622).
|
|
165
1079
|
gamma : int
|
|
@@ -182,6 +1096,8 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h,
|
|
|
182
1096
|
dd : int
|
|
183
1097
|
Flux profile correction constants (Holtslag & De Bruin '88). Default is
|
|
184
1098
|
0.35.
|
|
1099
|
+
R_d : int
|
|
1100
|
+
Gas constant of dry air. Default is 287.05.
|
|
185
1101
|
|
|
186
1102
|
Returns
|
|
187
1103
|
-------
|
|
@@ -190,6 +1106,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h,
|
|
|
190
1106
|
LHF_h : xarray.DataArray
|
|
191
1107
|
Latent heat flux
|
|
192
1108
|
'''
|
|
1109
|
+
rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density
|
|
1110
|
+
nu = calculate_viscosity(T_h, T_0, rho_atm) # Calculate kinematic viscosity
|
|
1111
|
+
|
|
193
1112
|
SHF_h = xr.zeros_like(T_h) # Create empty xarrays
|
|
194
1113
|
LHF_h = xr.zeros_like(T_h)
|
|
195
1114
|
L = xr.full_like(T_h, 1E5)
|
|
@@ -275,10 +1194,14 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h,
|
|
|
275
1194
|
# If n_elements(where(L_dif > L_dif_max)) eq 1 then break
|
|
276
1195
|
if np.all(L_dif <= L_dif_max):
|
|
277
1196
|
break
|
|
278
|
-
|
|
1197
|
+
|
|
1198
|
+
HF_nan = np.isnan(p_h) | np.isnan(T_h) | np.isnan(Tsurf_h) \
|
|
1199
|
+
| np.isnan(q_h) | np.isnan(WS_h) | np.isnan(z_T)
|
|
1200
|
+
SHF_h[HF_nan] = np.nan
|
|
1201
|
+
LHF_h[HF_nan] = np.nan
|
|
279
1202
|
return SHF_h, LHF_h
|
|
280
1203
|
|
|
281
|
-
def
|
|
1204
|
+
def calculate_viscosity(T_h, T_0, rho_atm):
|
|
282
1205
|
'''Calculate kinematic viscosity of air
|
|
283
1206
|
|
|
284
1207
|
Parameters
|
|
@@ -301,9 +1224,8 @@ def calcVisc(T_h, T_0, rho_atm):
|
|
|
301
1224
|
# Kinematic viscosity of air in m^2/s
|
|
302
1225
|
return mu / rho_atm
|
|
303
1226
|
|
|
304
|
-
def
|
|
1227
|
+
def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, eps=0.622):
|
|
305
1228
|
'''Calculate specific humidity
|
|
306
|
-
|
|
307
1229
|
Parameters
|
|
308
1230
|
----------
|
|
309
1231
|
T_0 : float
|
|
@@ -312,16 +1234,16 @@ def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h):
|
|
|
312
1234
|
Steam point temperature in Kelvin
|
|
313
1235
|
T_h : xarray.DataArray
|
|
314
1236
|
Air temperature
|
|
315
|
-
eps : int
|
|
316
|
-
ratio of molar masses of vapor and dry air (0.622)
|
|
317
|
-
es_0 : float
|
|
318
|
-
Saturation vapour pressure at the melting point (hPa)
|
|
319
|
-
es_100 : float
|
|
320
|
-
Saturation vapour pressure at steam point temperature (hPa)
|
|
321
1237
|
p_h : xarray.DataArray
|
|
322
1238
|
Air pressure
|
|
323
1239
|
RH_cor_h : xarray.DataArray
|
|
324
1240
|
Relative humidity corrected
|
|
1241
|
+
es_0 : float
|
|
1242
|
+
Saturation vapour pressure at the melting point (hPa)
|
|
1243
|
+
es_100 : float
|
|
1244
|
+
Saturation vapour pressure at steam point temperature (hPa)
|
|
1245
|
+
eps : int
|
|
1246
|
+
ratio of molar masses of vapor and dry air (0.622)
|
|
325
1247
|
|
|
326
1248
|
Returns
|
|
327
1249
|
-------
|
|
@@ -346,86 +1268,11 @@ def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h):
|
|
|
346
1268
|
freezing = T_h < 0
|
|
347
1269
|
q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing])
|
|
348
1270
|
|
|
1271
|
+
q_nan = np.isnan(T_h) | np.isnan(p_h)
|
|
1272
|
+
q_sat[q_nan] = np.nan
|
|
1273
|
+
|
|
349
1274
|
# Convert to kg/kg
|
|
350
1275
|
return RH_cor_h * q_sat / 100
|
|
351
|
-
|
|
352
|
-
def cleanHeatFlux(SHF, LHF, T, Tsurf, p, WS, RH_cor, z_boom):
|
|
353
|
-
'''Find invalid heat flux data values and replace with NaNs, based on
|
|
354
|
-
air temperature, surface temperature, air pressure, wind speed,
|
|
355
|
-
corrected relative humidity, and boom height
|
|
356
|
-
|
|
357
|
-
Parameters
|
|
358
|
-
----------
|
|
359
|
-
SHF : xarray.DataArray
|
|
360
|
-
Sensible heat flux
|
|
361
|
-
LHF : xarray.DataArray
|
|
362
|
-
Latent heat flux
|
|
363
|
-
T : xarray.DataArray
|
|
364
|
-
Air temperature
|
|
365
|
-
Tsurf : xarray.DataArray
|
|
366
|
-
Surface temperature
|
|
367
|
-
p : xarray.DataArray
|
|
368
|
-
Air pressure
|
|
369
|
-
WS : xarray.DataArray
|
|
370
|
-
Wind speed
|
|
371
|
-
RH_cor : xarray.DataArray
|
|
372
|
-
Relative humidity corrected
|
|
373
|
-
z_boom : xarray.DataArray
|
|
374
|
-
Boom height
|
|
375
|
-
|
|
376
|
-
Returns
|
|
377
|
-
-------
|
|
378
|
-
SHF : xarray.DataArray
|
|
379
|
-
Sensible heat flux corrected
|
|
380
|
-
LHF : xarray.DataArray
|
|
381
|
-
Latent heat flux corrected
|
|
382
|
-
'''
|
|
383
|
-
HF_nan = np.isnan(p) | np.isnan(T) | np.isnan(Tsurf) \
|
|
384
|
-
| np.isnan(RH_cor) | np.isnan(WS) | np.isnan(z_boom)
|
|
385
|
-
SHF[HF_nan] = np.nan
|
|
386
|
-
LHF[HF_nan] = np.nan
|
|
387
|
-
return SHF, LHF
|
|
388
|
-
|
|
389
|
-
def cleanSpHumid(q_h, T, Tsurf, p, RH_cor):
|
|
390
|
-
'''Find invalid specific humidity data values and replace with NaNs,
|
|
391
|
-
based on air temperature, surface temperature, air pressure,
|
|
392
|
-
and corrected relative humidity
|
|
393
|
-
|
|
394
|
-
Parameters
|
|
395
|
-
----------
|
|
396
|
-
q_h : xarray.DataArray
|
|
397
|
-
Specific humidity
|
|
398
|
-
T : xarray.DataArray
|
|
399
|
-
Air temperature
|
|
400
|
-
Tsurf : xarray.DataArray
|
|
401
|
-
Surface temperature
|
|
402
|
-
p : xarray.DataArray
|
|
403
|
-
Air pressure
|
|
404
|
-
RH_cor : xarray.DataArray
|
|
405
|
-
Relative humidity corrected
|
|
406
|
-
|
|
407
|
-
Returns
|
|
408
|
-
-------
|
|
409
|
-
q_h : xarray.DataArray
|
|
410
|
-
Specific humidity corrected'''
|
|
411
|
-
q_nan = np.isnan(T) | np.isnan(RH_cor) | np.isnan(p) | np.isnan(Tsurf)
|
|
412
|
-
q_h[q_nan] = np.nan
|
|
413
|
-
return q_h
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
def _calcAtmosDens(p_h, R_d, T_h, T_0): # TODO: check this shouldn't be in this step somewhere
|
|
417
|
-
'''Calculate atmospheric density'''
|
|
418
|
-
return 100 * p_h / R_d / (T_h + T_0)
|
|
419
|
-
|
|
420
|
-
def _getTempK(T_0):
|
|
421
|
-
'''Return steam point temperature in K'''
|
|
422
|
-
return T_0+100
|
|
423
|
-
|
|
424
|
-
def _getRotation():
|
|
425
|
-
'''Return degrees-to-radians and radians-to-degrees'''
|
|
426
|
-
deg2rad = np.pi / 180
|
|
427
|
-
rad2deg = 1 / deg2rad
|
|
428
|
-
return deg2rad, rad2deg
|
|
429
1276
|
|
|
430
1277
|
if __name__ == "__main__":
|
|
431
1278
|
# unittest.main()
|