pypromice 1.3.3__py3-none-any.whl → 1.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/postprocess/bufr_to_csv.py +11 -0
- pypromice/postprocess/bufr_utilities.py +489 -0
- pypromice/postprocess/get_bufr.py +622 -284
- pypromice/postprocess/positions_seed.csv +5 -0
- pypromice/postprocess/real_time_utilities.py +241 -0
- pypromice/postprocess/station_configurations.toml +762 -0
- pypromice/process/L0toL1.py +4 -2
- pypromice/process/L1toL2.py +1 -0
- pypromice/process/value_clipping.py +4 -13
- pypromice/process/variables.csv +13 -15
- pypromice/qc/github_data_issues.py +10 -40
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/METADATA +2 -1
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/RECORD +17 -14
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/WHEEL +1 -1
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/entry_points.txt +1 -1
- pypromice/postprocess/csv2bufr.py +0 -508
- pypromice/postprocess/wmo_config.py +0 -179
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.3.dist-info → pypromice-1.3.5.dist-info}/top_level.txt +0 -0
|
@@ -1,508 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
Post-processing functions for AWS station data, such as converting PROMICE and GC-Net data files to WMO-compliant BUFR files
|
|
5
|
-
"""
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import sys, traceback
|
|
8
|
-
import os
|
|
9
|
-
from datetime import datetime, timedelta
|
|
10
|
-
from eccodes import codes_set, codes_write, codes_release, \
|
|
11
|
-
codes_bufr_new_from_samples, CodesInternalError, \
|
|
12
|
-
codes_is_defined
|
|
13
|
-
import math
|
|
14
|
-
import numpy as np
|
|
15
|
-
from sklearn.linear_model import LinearRegression
|
|
16
|
-
|
|
17
|
-
from pypromice.postprocess.wmo_config import ibufr_settings, stid_to_skip, vars_to_skip, positions_update_timestamp_only
|
|
18
|
-
|
|
19
|
-
# from IPython import embed
|
|
20
|
-
|
|
21
|
-
# To suppress pandas SettingWithCopyWarning
|
|
22
|
-
pd.options.mode.chained_assignment = None # default='warn'
|
|
23
|
-
|
|
24
|
-
#------------------------------------------------------------------------------
|
|
25
|
-
|
|
26
|
-
def getBUFR(s1, outBUFR, stid, land_stids):
|
|
27
|
-
'''Construct and export .bufr messages to file from Series or DataFrame.
|
|
28
|
-
PRIMARY DRIVER FUNCTION
|
|
29
|
-
|
|
30
|
-
Parameters
|
|
31
|
-
----------
|
|
32
|
-
s1 : pandas.Series
|
|
33
|
-
Pandas series of single most recent obset for a station
|
|
34
|
-
outBUFR : str
|
|
35
|
-
File path that .bufr file will be exported to
|
|
36
|
-
stid : str
|
|
37
|
-
The station ID to be processed. e.g. 'KPC_U'
|
|
38
|
-
land_stids : list
|
|
39
|
-
List of station IDs for land-based stations
|
|
40
|
-
|
|
41
|
-
Returns
|
|
42
|
-
-------
|
|
43
|
-
remove_file : boolean
|
|
44
|
-
Status object to return to getBUFR indicating successful completion
|
|
45
|
-
'''
|
|
46
|
-
remove_file = False
|
|
47
|
-
|
|
48
|
-
# Open bufr file
|
|
49
|
-
fout = open(outBUFR, 'wb')
|
|
50
|
-
|
|
51
|
-
# Create new bufr message to write to
|
|
52
|
-
ibufr = codes_bufr_new_from_samples('BUFR4')
|
|
53
|
-
timestamp = datetime.strptime(s1['time'], '%Y-%m-%d %H:%M:%S')
|
|
54
|
-
config_key = 'mobile'
|
|
55
|
-
if stid in land_stids:
|
|
56
|
-
config_key = 'land'
|
|
57
|
-
try:
|
|
58
|
-
# we must pass all the following functions without error.
|
|
59
|
-
# If handled (or unhandled) errors occur, we re-raise and
|
|
60
|
-
# the exceptions below will set remove_file to True.
|
|
61
|
-
setTemplate(ibufr, timestamp, stid, config_key)
|
|
62
|
-
setStation(ibufr, stid, config_key)
|
|
63
|
-
setAWSvariables(ibufr, s1, timestamp, stid)
|
|
64
|
-
|
|
65
|
-
#Encode keys in data section
|
|
66
|
-
codes_set(ibufr, 'pack', 1)
|
|
67
|
-
|
|
68
|
-
#Write bufr message to bufr file
|
|
69
|
-
codes_write(ibufr, fout)
|
|
70
|
-
|
|
71
|
-
except CodesInternalError as ec:
|
|
72
|
-
print(traceback.format_exc())
|
|
73
|
-
# print(ec)
|
|
74
|
-
print(f'-----> CodesInternalError in getBUFR for {stid}!')
|
|
75
|
-
remove_file = True
|
|
76
|
-
except Exception as e:
|
|
77
|
-
# Catch anything else here...
|
|
78
|
-
print(traceback.format_exc())
|
|
79
|
-
# print(e)
|
|
80
|
-
print(f'-----> ERROR in getBUFR for {stid}')
|
|
81
|
-
remove_file = True
|
|
82
|
-
|
|
83
|
-
codes_release(ibufr)
|
|
84
|
-
|
|
85
|
-
fout.close()
|
|
86
|
-
|
|
87
|
-
if remove_file is True:
|
|
88
|
-
print(f'-----> Removing file for {stid}')
|
|
89
|
-
os.remove(fout.name)
|
|
90
|
-
return remove_file
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def setTemplate(ibufr, timestamp, stid, config_key):
|
|
94
|
-
'''Set bufr message template.
|
|
95
|
-
|
|
96
|
-
Parameters
|
|
97
|
-
----------
|
|
98
|
-
ibufr : bufr.msg
|
|
99
|
-
Bufr message object
|
|
100
|
-
timestamp : datetime.Datetime
|
|
101
|
-
Timestamp of observation
|
|
102
|
-
stid : str
|
|
103
|
-
The station ID to be processed. e.g. 'KPC_U'
|
|
104
|
-
config_key : str
|
|
105
|
-
Defines which config dict to use in wmo_config.ibufr_settings, 'mobile' or 'land'
|
|
106
|
-
'''
|
|
107
|
-
for k, v in ibufr_settings[config_key]['template'].items():
|
|
108
|
-
if codes_is_defined(ibufr, k) == 1:
|
|
109
|
-
codes_set(ibufr, k, v)
|
|
110
|
-
else:
|
|
111
|
-
print('-----> setTemplate Key not defined: {}'.format(k))
|
|
112
|
-
continue
|
|
113
|
-
|
|
114
|
-
codes_set(ibufr, 'typicalYear', timestamp.year)
|
|
115
|
-
codes_set(ibufr, 'typicalMonth', timestamp.month)
|
|
116
|
-
codes_set(ibufr, 'typicalDay', timestamp.day)
|
|
117
|
-
codes_set(ibufr, 'typicalHour', timestamp.hour)
|
|
118
|
-
codes_set(ibufr, 'typicalMinute', timestamp.minute)
|
|
119
|
-
# codes_set(ibufr, 'typicalSecond', timestamp.second)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def setStation(ibufr, stid, config_key):
|
|
123
|
-
'''Set station-specific info to bufr message.
|
|
124
|
-
|
|
125
|
-
Parameters
|
|
126
|
-
----------
|
|
127
|
-
ibufr : bufr.msg
|
|
128
|
-
Bufr message object
|
|
129
|
-
stid : str
|
|
130
|
-
The station ID to be processed. e.g. 'KPC_U'
|
|
131
|
-
config_key : str
|
|
132
|
-
Defines which config dict to use in wmo_config.ibufr_settings, 'mobile' or 'land'
|
|
133
|
-
'''
|
|
134
|
-
station_indentifier_keys = ('shipOrMobileLandStationIdentifier','stationNumber')
|
|
135
|
-
for k, v in ibufr_settings[config_key]['station'].items():
|
|
136
|
-
if k in station_indentifier_keys:
|
|
137
|
-
# Deal with any string replacement of stid names before indexing
|
|
138
|
-
if ('v3' in stid) and (stid.replace('v3','') in stid_to_skip['use_v3']):
|
|
139
|
-
# We are reading the v3 station ID file, and the config says to use it!
|
|
140
|
-
# But we need to write to BUFR without v3 name
|
|
141
|
-
stid = stid.replace('v3','')
|
|
142
|
-
if stid == 'THU_U2':
|
|
143
|
-
stid = 'THU_U'
|
|
144
|
-
if stid in ('JAR_O','SWC_O'):
|
|
145
|
-
stid = stid.replace('_O','')
|
|
146
|
-
if stid == 'CEN2':
|
|
147
|
-
stid = 'CEN'
|
|
148
|
-
try:
|
|
149
|
-
codes_set(ibufr, k, v[stid])
|
|
150
|
-
except KeyError as e:
|
|
151
|
-
print(f'-----> ID not found for {stid}')
|
|
152
|
-
raise # throw error back to getBUFR where it is handled
|
|
153
|
-
else:
|
|
154
|
-
if codes_is_defined(ibufr, k) == 1:
|
|
155
|
-
codes_set(ibufr, k, v)
|
|
156
|
-
else:
|
|
157
|
-
print(f'-----> setStation Key for {stid} not defined: {k}')
|
|
158
|
-
continue
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def setAWSvariables(ibufr, row, timestamp, stid):
|
|
162
|
-
'''Set AWS measurements to bufr message.
|
|
163
|
-
|
|
164
|
-
Parameters
|
|
165
|
-
----------
|
|
166
|
-
ibufr : bufr.msg
|
|
167
|
-
Bufr message object
|
|
168
|
-
row : pandas.DataFrame row, or pandas.Series
|
|
169
|
-
DataFrame row (or Series) with AWS variable data
|
|
170
|
-
timestamp : datetime.datetime
|
|
171
|
-
timestamp for this row
|
|
172
|
-
stid : str
|
|
173
|
-
The station ID to be processed. e.g. 'KPC_U'
|
|
174
|
-
'''
|
|
175
|
-
# Set timestamp fields
|
|
176
|
-
setBUFRvalue(ibufr, 'year', timestamp.year)
|
|
177
|
-
setBUFRvalue(ibufr, 'month', timestamp.month)
|
|
178
|
-
setBUFRvalue(ibufr, 'day', timestamp.day)
|
|
179
|
-
setBUFRvalue(ibufr, 'hour', timestamp.hour)
|
|
180
|
-
setBUFRvalue(ibufr, 'minute', timestamp.minute)
|
|
181
|
-
|
|
182
|
-
vars_dict = {
|
|
183
|
-
'relativeHumidity': 'rh_i', # DMI wants non-corrected rh
|
|
184
|
-
'airTemperature': 't_i',
|
|
185
|
-
'pressure': 'p_i',
|
|
186
|
-
'windDirection': 'wdir_i',
|
|
187
|
-
'windSpeed': 'wspd_i'
|
|
188
|
-
}
|
|
189
|
-
for bufr_key, source_var in vars_dict.items():
|
|
190
|
-
if (stid in vars_to_skip) and (source_var in vars_to_skip[stid]):
|
|
191
|
-
print('----> Skipping var: {} {}'.format(stid,source_var))
|
|
192
|
-
else:
|
|
193
|
-
setBUFRvalue(ibufr, bufr_key, row[source_var])
|
|
194
|
-
|
|
195
|
-
# Set position metadata
|
|
196
|
-
setBUFRvalue(ibufr, 'latitude', row['gps_lat_fit'])
|
|
197
|
-
setBUFRvalue(ibufr, 'longitude', row['gps_lon_fit'])
|
|
198
|
-
setBUFRvalue(ibufr, 'heightOfStationGroundAboveMeanSeaLevel', row['gps_alt_fit']) # also height and heightOfStation?
|
|
199
|
-
|
|
200
|
-
# The ## in the codes_set() indicate the position in the BUFR for the parameter.
|
|
201
|
-
# e.g. #10#timePeriod will assign to the 10th occurence of "timePeriod", which corresponds
|
|
202
|
-
# to the wind speed section. Note that both the "synopMobil" and "synopLand" templates
|
|
203
|
-
# appear to have the same positions for all parameters that are set here.
|
|
204
|
-
# View the output BUFR to see section keys with 'bufr_dump filename.bufr'.
|
|
205
|
-
if math.isnan(row['wspd_i']) is False:
|
|
206
|
-
#Set time significance (2=temporally averaged)
|
|
207
|
-
codes_set(ibufr, '#1#timeSignificance', 2)
|
|
208
|
-
#Set monitoring time period (-10=10 minutes)
|
|
209
|
-
codes_set(ibufr, '#10#timePeriod', -10)
|
|
210
|
-
|
|
211
|
-
#Set measurement heights
|
|
212
|
-
if math.isnan(row['z_boom_u_smooth']) is False:
|
|
213
|
-
codes_set(ibufr,
|
|
214
|
-
'#1#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform',
|
|
215
|
-
row['z_boom_u_smooth']-0.1) # For air temp and RH
|
|
216
|
-
codes_set(ibufr,
|
|
217
|
-
'#7#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform',
|
|
218
|
-
row['z_boom_u_smooth']+0.4) # For wind speed
|
|
219
|
-
if math.isnan(row['gps_alt_fit']) is False:
|
|
220
|
-
codes_set(ibufr, 'heightOfBarometerAboveMeanSeaLevel',
|
|
221
|
-
row['gps_alt_fit']+row['z_boom_u_smooth']) # For pressure
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def setBUFRvalue(ibufr, b_name, value):
|
|
225
|
-
'''Set variable in BUFR message
|
|
226
|
-
Called in setAWSvariables() to make sure we aren't passing NaNs
|
|
227
|
-
|
|
228
|
-
Parameters
|
|
229
|
-
----------
|
|
230
|
-
ibufr : bufr.msg
|
|
231
|
-
Active BUFR message
|
|
232
|
-
b_name : str
|
|
233
|
-
BUFR message variable name
|
|
234
|
-
value : int/float
|
|
235
|
-
Value to be assigned to variable
|
|
236
|
-
'''
|
|
237
|
-
if math.isnan(value) is False:
|
|
238
|
-
try:
|
|
239
|
-
codes_set(ibufr, b_name, value)
|
|
240
|
-
except CodesInternalError as ec:
|
|
241
|
-
print(f'{ec}: {b_name}')
|
|
242
|
-
print('-----> CodesInternalError in setBUFRvalue!')
|
|
243
|
-
raise # throw error back to getBUFR where it is handled
|
|
244
|
-
else:
|
|
245
|
-
print('----> {} {}'.format(b_name, value))
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
def linear_fit(df, column, decimals, stid):
|
|
249
|
-
'''Apply a linear regression to the input column
|
|
250
|
-
|
|
251
|
-
Linear regression is following:
|
|
252
|
-
https://realpython.com/linear-regression-in-python/#simple-linear-regression-with-scikit-learn
|
|
253
|
-
|
|
254
|
-
Parameters
|
|
255
|
-
----------
|
|
256
|
-
df : pandas.Dataframe
|
|
257
|
-
datetime-indexed df, limited to desired time length for linear fit
|
|
258
|
-
column : str
|
|
259
|
-
The target column for applying linear fit
|
|
260
|
-
decimals : int
|
|
261
|
-
How many decimals to round the output fit values
|
|
262
|
-
stid : str
|
|
263
|
-
The station ID to be processed. e.g. 'KPC_U'
|
|
264
|
-
extrapolate : boolean
|
|
265
|
-
If False (default), only apply linear fit to timestamps with valid data
|
|
266
|
-
If True, then extrapolate positions based on linear fit model
|
|
267
|
-
|
|
268
|
-
Returns
|
|
269
|
-
-------
|
|
270
|
-
df : pandas.Dataframe
|
|
271
|
-
The original input df, with added column for the linear regression values
|
|
272
|
-
pos_valid : boolean
|
|
273
|
-
If True (default), sufficient valid data found in recent (limited) data.
|
|
274
|
-
If False, we need to return this status to find_positions and use full station history instead.
|
|
275
|
-
'''
|
|
276
|
-
# print('=========== linear_fit ===========')
|
|
277
|
-
pos_valid = True
|
|
278
|
-
if column in df:
|
|
279
|
-
df_dropna = df[df[column].notna()] # limit to only non-nan for the target column
|
|
280
|
-
# if len(df_dropna[column].index.normalize().unique()) >= 10: # must have at least 10 unique days
|
|
281
|
-
if len(df_dropna[column]) >= 15: # must have at least 15 data points (could be hourly or daily)
|
|
282
|
-
# Get datetime x values into epoch sec integers
|
|
283
|
-
x_epoch = df_dropna.index.values.astype(np.int64) // 10 ** 9
|
|
284
|
-
x = x_epoch.reshape(-1,1)
|
|
285
|
-
y = df_dropna[column].values # can also reshape this, but not necessary
|
|
286
|
-
model = LinearRegression().fit(x, y)
|
|
287
|
-
|
|
288
|
-
# Adding prediction back to original df
|
|
289
|
-
x_all = df.index.values.astype(np.int64) // 10 ** 9
|
|
290
|
-
df['{}_fit'.format(column)] = model.predict(x_all.reshape(-1,1)).round(decimals=decimals)
|
|
291
|
-
|
|
292
|
-
# Plot data if desired
|
|
293
|
-
# if stid == 'LYN_T':
|
|
294
|
-
# if (column == 'gps_lat') or (column == 'gps_lon') or (column == 'gps_alt'):
|
|
295
|
-
# import matplotlib.pyplot as plt
|
|
296
|
-
# plt.figure()
|
|
297
|
-
# df_dropna[column].plot(marker='o',ls='None')
|
|
298
|
-
# df['{}_fit'.format(column)].plot(marker='o', ls='None', color='red')
|
|
299
|
-
# plt.title('{} {}'.format(stid, column))
|
|
300
|
-
# plt.xlim(df.index.min(),df.index.max())
|
|
301
|
-
# plt.show()
|
|
302
|
-
else:
|
|
303
|
-
# Do not have 10 days of valid data, or all data is NaN.
|
|
304
|
-
print('----> Insufficient {} data for {}!'.format(column, stid))
|
|
305
|
-
pos_valid = False
|
|
306
|
-
else:
|
|
307
|
-
print('----> {} not found in dataframe!'.format(column))
|
|
308
|
-
pass
|
|
309
|
-
return df, pos_valid
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
def rolling_window(df, column, window, min_periods, decimals):
|
|
313
|
-
'''Apply a rolling window (smoothing) to the input column
|
|
314
|
-
|
|
315
|
-
Parameters
|
|
316
|
-
----------
|
|
317
|
-
df : pandas.Dataframe
|
|
318
|
-
datetime-indexed df
|
|
319
|
-
column : str
|
|
320
|
-
The target column for applying rolling window
|
|
321
|
-
window : str
|
|
322
|
-
Window size (e.g. '24H' or 30D')
|
|
323
|
-
min_periods : int
|
|
324
|
-
Minimum number of observations in window required to have a value;
|
|
325
|
-
otherwise, result is np.nan.
|
|
326
|
-
decimals : int
|
|
327
|
-
How many decimal places to round the output smoothed values
|
|
328
|
-
|
|
329
|
-
Returns
|
|
330
|
-
-------
|
|
331
|
-
df : pandas.Dataframe
|
|
332
|
-
The original input df, with added column for the smoothed values
|
|
333
|
-
'''
|
|
334
|
-
df['{}_smooth'.format(column)] = df[column].rolling(
|
|
335
|
-
window,
|
|
336
|
-
min_periods=min_periods,
|
|
337
|
-
center=True, # set the window labels as the center of the window
|
|
338
|
-
closed='both' # no points in the window are excluded (first or last)
|
|
339
|
-
).median().round(decimals=decimals) # could also round to whole meters (decimals=0)
|
|
340
|
-
return df
|
|
341
|
-
|
|
342
|
-
def round_values(s):
|
|
343
|
-
'''Enforce precision
|
|
344
|
-
Note the sensor accuracies listed here:
|
|
345
|
-
https://essd.copernicus.org/articles/13/3819/2021/#section8
|
|
346
|
-
In addition to sensor accuracy, WMO requires pressure and heights
|
|
347
|
-
to be reported at 0.1 precision.
|
|
348
|
-
|
|
349
|
-
Parameters
|
|
350
|
-
----------
|
|
351
|
-
s : pandas series (could also be a dataframe)
|
|
352
|
-
|
|
353
|
-
Returns
|
|
354
|
-
-------
|
|
355
|
-
s : modified pandas series (could also be a dataframe)
|
|
356
|
-
'''
|
|
357
|
-
s['rh_i'] = s['rh_i'].round(decimals=0)
|
|
358
|
-
s['wspd_i'] = s['wspd_i'].round(decimals=1)
|
|
359
|
-
s['wdir_i'] = s['wdir_i'].round(decimals=0)
|
|
360
|
-
s['t_i'] = s['t_i'].round(decimals=1)
|
|
361
|
-
s['p_i'] = s['p_i'].round(decimals=1)
|
|
362
|
-
|
|
363
|
-
# gps_lat,gps_lon,gps_alt,z_boom_u are all rounded in linear_fit() or rolling_window()
|
|
364
|
-
return s
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def find_positions(df, stid, time_limit, current_timestamp=None, positions=None):
|
|
368
|
-
''' Driver function to run linear_fit() and set valid lat, lon, and alt
|
|
369
|
-
to df_limited, which is then used to set position data in BUFR.
|
|
370
|
-
If 'positions' is not None (must pass --positions arg), we also write to
|
|
371
|
-
the positions dict which will be written to AWS_latest_locations.csv for
|
|
372
|
-
all stations (whether processed or skipped)
|
|
373
|
-
|
|
374
|
-
Parameters
|
|
375
|
-
----------
|
|
376
|
-
df : pandas dataframe
|
|
377
|
-
The full tx dataframe
|
|
378
|
-
stid : str
|
|
379
|
-
The station ID, such as NUK_L
|
|
380
|
-
time_limit : str
|
|
381
|
-
Previous time to limit dataframe before applying linear regression.
|
|
382
|
-
(e.g. '3M')
|
|
383
|
-
current_timestamp : datetime64 time
|
|
384
|
-
The timestamp for the most recent valid instantaneous data
|
|
385
|
-
positions : dict, or None
|
|
386
|
-
Dict storing current station positions. If present, we are writing
|
|
387
|
-
positions to file.
|
|
388
|
-
|
|
389
|
-
Returns
|
|
390
|
-
-------
|
|
391
|
-
df_limited : pandas dataframe
|
|
392
|
-
Dataframe limited to time_limit, and including position data
|
|
393
|
-
positions : dict
|
|
394
|
-
Modified dict storing most-recent station positions.
|
|
395
|
-
'''
|
|
396
|
-
if stid in positions_update_timestamp_only:
|
|
397
|
-
# we don't have a position-associated timestamp, just use the most recent transmission.
|
|
398
|
-
# e.g. KAN_B (does not transmit position, and currently skipped because does not transmit
|
|
399
|
-
# instantaneous obs). If KAN_B ever submits inst data (but not position) we will need to use
|
|
400
|
-
# the config-seeded position coordinates to set positions here in df_limited.
|
|
401
|
-
positions[stid]['timestamp'] = df.index.max()
|
|
402
|
-
df_limited = df # just to return something
|
|
403
|
-
else:
|
|
404
|
-
print(f'finding positions for {stid}')
|
|
405
|
-
df_limited = df.last(time_limit).copy()
|
|
406
|
-
print(f'last transmission: {df_limited.index.max()}')
|
|
407
|
-
|
|
408
|
-
# Extrapolate recommended for altitude, optional for lat and lon.
|
|
409
|
-
df_limited, lat_valid = linear_fit(df_limited, 'gps_lat', 6, stid)
|
|
410
|
-
df_limited, lon_valid = linear_fit(df_limited, 'gps_lon', 6, stid)
|
|
411
|
-
df_limited, alt_valid = linear_fit(df_limited, 'gps_alt', 1, stid)
|
|
412
|
-
|
|
413
|
-
# If we have no valid lat, lon or alt data in the df_limited window, then interpolate
|
|
414
|
-
# using full tx dataset.
|
|
415
|
-
check_valid = {'gps_lat': lat_valid, 'gps_lon': lon_valid, 'gps_alt': alt_valid}
|
|
416
|
-
check_valid_again = {}
|
|
417
|
-
for k,v in check_valid.items():
|
|
418
|
-
if v is False:
|
|
419
|
-
print(f'----> Using full history for linear extrapolation: {k}')
|
|
420
|
-
print(f'first transmission: {df.index.min()}')
|
|
421
|
-
if k == 'gps_alt':
|
|
422
|
-
df, valid = linear_fit(df, k, 1, stid)
|
|
423
|
-
else:
|
|
424
|
-
df, valid = linear_fit(df, k, 6, stid)
|
|
425
|
-
check_valid_again[k] = valid
|
|
426
|
-
if check_valid_again[k] is True:
|
|
427
|
-
df_limited[f'{k}_fit'] = df.last(time_limit)[f'{k}_fit']
|
|
428
|
-
else:
|
|
429
|
-
print(f'----> No data exists for {k}. Stubbing out with NaN.')
|
|
430
|
-
df_limited[f'{k}_fit'] = pd.Series(np.nan, index= df.last(time_limit).index)
|
|
431
|
-
|
|
432
|
-
# SET POSITIONS FOR CSV FILE
|
|
433
|
-
if positions is not None:
|
|
434
|
-
if current_timestamp is None:
|
|
435
|
-
# This is old data (> 2 days), not submitting to DMI, but writing to positions csv
|
|
436
|
-
# Find the most recent row that has valid lat, lon and alt
|
|
437
|
-
last_valid_timestamp = df_limited[['gps_lon_fit','gps_lat_fit','gps_alt_fit']].dropna().last_valid_index()
|
|
438
|
-
if last_valid_timestamp is None:
|
|
439
|
-
# we are likely missing gps_alt_fit
|
|
440
|
-
last_valid_timestamp = df_limited[['gps_lon_fit','gps_lat_fit']].dropna().last_valid_index()
|
|
441
|
-
if last_valid_timestamp is None:
|
|
442
|
-
# last ditch effort
|
|
443
|
-
last_valid_timestamp = df_limited.index.max()
|
|
444
|
-
s = df_limited.loc[last_valid_timestamp]
|
|
445
|
-
else:
|
|
446
|
-
s = df_limited.loc[current_timestamp]
|
|
447
|
-
print(f'writing positions for {stid}')
|
|
448
|
-
pos_strings = ['lat','lon','alt']
|
|
449
|
-
for p in pos_strings:
|
|
450
|
-
if (f'gps_{p}_fit' in s) and (pd.isna(s[f'gps_{p}_fit']) is False):
|
|
451
|
-
positions[stid][p] = s[f'gps_{p}_fit']
|
|
452
|
-
# Add timestamp
|
|
453
|
-
positions[stid]['timestamp'] = s['time']
|
|
454
|
-
|
|
455
|
-
return df_limited, positions if positions else df_limited
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
def min_data_check(s, stid):
|
|
459
|
-
'''Check that we have minimum required fields to proceed with writing to BUFR
|
|
460
|
-
For wx vars, we currently require both air temp and pressure to be non-NaN.
|
|
461
|
-
If you know a specific var is reporting bad data, you can ignore just that var
|
|
462
|
-
using the vars_to_skip dict in wmo_config.
|
|
463
|
-
|
|
464
|
-
Parameters
|
|
465
|
-
----------
|
|
466
|
-
s : pandas series
|
|
467
|
-
The current obset we are working with (for BUFR submission)
|
|
468
|
-
stid : str
|
|
469
|
-
The station ID, such as NUK_L
|
|
470
|
-
|
|
471
|
-
Returns
|
|
472
|
-
-------
|
|
473
|
-
min_data_wx_result : bool
|
|
474
|
-
True (default), the test for min wx data passed. False, the test failed.
|
|
475
|
-
min_data_pos_result : bool
|
|
476
|
-
True (default), the test for min position data passed. False, the test failed.
|
|
477
|
-
'''
|
|
478
|
-
min_data_wx_result = True
|
|
479
|
-
min_data_pos_result = True
|
|
480
|
-
|
|
481
|
-
# Can use pd.isna() or math.isnan() below...
|
|
482
|
-
|
|
483
|
-
# Always require valid air temp and valid pressure (both must be non-nan)
|
|
484
|
-
# if (pd.isna(s['t_i']) is False) and (pd.isna(s['p_i']) is False):
|
|
485
|
-
# pass
|
|
486
|
-
# else:
|
|
487
|
-
# print('----> Failed min_data_check for air temp and pressure!')
|
|
488
|
-
# min_data_wx_result = False
|
|
489
|
-
|
|
490
|
-
# If both air temp and pressure are nan, do not submit.
|
|
491
|
-
# This will allow the case of having only one or the other.
|
|
492
|
-
if (pd.isna(s['t_i']) is True) and (pd.isna(s['p_i']) is True):
|
|
493
|
-
print('----> Failed min_data_check for air temp and pressure!')
|
|
494
|
-
min_data_wx_result = False
|
|
495
|
-
|
|
496
|
-
# Missing just elevation OK
|
|
497
|
-
# if (pd.isna(s['gps_lat_fit']) is False) and (pd.isna(s['gps_lon_fit']) is False):
|
|
498
|
-
# pass
|
|
499
|
-
# Require all three: lat, lon, elev
|
|
500
|
-
if ((pd.isna(s['gps_lat_fit']) is False) and
|
|
501
|
-
(pd.isna(s['gps_lon_fit']) is False) and
|
|
502
|
-
(pd.isna(s['gps_alt_fit']) is False)):
|
|
503
|
-
pass
|
|
504
|
-
else:
|
|
505
|
-
print('----> Failed min_data_check for position!')
|
|
506
|
-
min_data_pos_result = False
|
|
507
|
-
|
|
508
|
-
return min_data_wx_result, min_data_pos_result
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
'''
|
|
3
|
-
Config file to store wmo-related reference objects
|
|
4
|
-
Imported by csv2bufr.py
|
|
5
|
-
Patrick Wright, GEUS
|
|
6
|
-
Nov, 2022
|
|
7
|
-
|
|
8
|
-
see documentation here:
|
|
9
|
-
https://confluence.ecmwf.int/display/ECC/Documentation
|
|
10
|
-
|
|
11
|
-
BUFR element table for WMO master table version 32
|
|
12
|
-
https://confluence.ecmwf.int/display/ECC/WMO%3D32+element+table
|
|
13
|
-
'''
|
|
14
|
-
stid_to_skip = { # All the following IDS will not be processed or submitted
|
|
15
|
-
'test': ['XXX'],
|
|
16
|
-
'not_registered': ['UWN','ZAK_A','WEG_L'], # Need to register UWN with Norwegion met
|
|
17
|
-
'discontinued': ['CEN1','TAS_U','QAS_A','NUK_N','THU_U','JAR','SWC'],
|
|
18
|
-
'no_instantaneous': ['ZAK_L','ZAK_U','KAN_B'], # currently not transmitting instantaneous values
|
|
19
|
-
'suspect_data': [], # instantaneous data is suspect
|
|
20
|
-
'use_v3': ['NUK_U', 'ZAK_L', 'ZAK_U', 'QAS_U', 'QAS_L', 'QAS_M', 'KAN_L'], # use v3 versions instead (but registered IDs are non-v3 names)
|
|
21
|
-
'v3_bad': ['KPC_L', 'KPC_U', ]
|
|
22
|
-
}
|
|
23
|
-
# NOTE: Use both THU_L and THU_L2; use ONLY THU_U2, but register it as THU_U (this is dealt with in csv2bufr.py)
|
|
24
|
-
# NOTE: JAR_O and SWC_O are used, but registered as JAR and SWC
|
|
25
|
-
# NOTE: CEN2 data is registered as CEN
|
|
26
|
-
|
|
27
|
-
vars_to_skip = { # skip specific variables for stations
|
|
28
|
-
# If a variable has known bad data, use this dict to skip the var
|
|
29
|
-
# Note that if a station is not reporting both air temp and pressure it will be skipped,
|
|
30
|
-
# as currently implemented in csv2bufr.min_data_check().
|
|
31
|
-
# 'CP1': ['p_i'], # EXAMPLE
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
positions_seed = { # discontinued stations that are not in aws-l3/tx but still present in aws-l3/level_3
|
|
35
|
-
# enter last known positions and timestamp of last transmission
|
|
36
|
-
'TAS_U': {'lat':65.6978, 'lon':-38.8668, 'alt':570.0, 'timestamp':'2015-08-13 14:00:00'},
|
|
37
|
-
'QAS_A': {'lat':61.243, 'lon':-46.7328, 'alt':1000.0, 'timestamp':'2015-08-24 17:00:00'},
|
|
38
|
-
'NUK_N': {'lat':64.9452, 'lon':-49.885, 'alt':920.0, 'timestamp':'2014-07-25 11:00:00'},
|
|
39
|
-
'KAN_B': {'lat':67.1252, 'lon':-50.1832, 'alt':350.0, 'timestamp':'2023-01-01 00:00:00'}, # bedrock station, not transmitting coordinates (placeholder timestamp)
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
positions_update_timestamp_only = ('KAN_B',)
|
|
43
|
-
|
|
44
|
-
ibufr_settings = {
|
|
45
|
-
'mobile': { # mobile stations (on moving ice)
|
|
46
|
-
'template': {
|
|
47
|
-
'unexpandedDescriptors': (307090), #message template, "synopMobil"
|
|
48
|
-
'edition': 4, #latest edition
|
|
49
|
-
'masterTableNumber': 0,
|
|
50
|
-
'masterTablesVersionNumber': 32, #DMI recommends any table version between 28-32
|
|
51
|
-
'localTablesVersionNumber': 0,
|
|
52
|
-
'bufrHeaderCentre': 94, #originating centre 98=ECMWF, 94=DMI
|
|
53
|
-
# 'bufrHeaderSubCentre': 0,
|
|
54
|
-
'updateSequenceNumber': 0, #0 is original message, incremented by 1 for updates
|
|
55
|
-
'dataCategory': 0, #surface data - land
|
|
56
|
-
'internationalDataSubCategory': 3, #hourly synoptic observations from mobile-land stations (SYNOP MOBIL)
|
|
57
|
-
# 'dataSubCategory': 0,
|
|
58
|
-
'observedData': 1,
|
|
59
|
-
'compressedData': 0,
|
|
60
|
-
},
|
|
61
|
-
'station': {
|
|
62
|
-
'shipOrMobileLandStationIdentifier': {
|
|
63
|
-
# use str; fails with "gribapi.errors.InvalidArgumentError: Invalid argument" if passed as int
|
|
64
|
-
'QAS_L': '04401',
|
|
65
|
-
'QAS_U': '04402',
|
|
66
|
-
'NUK_L': '04403',
|
|
67
|
-
'TAS_L': '04404',
|
|
68
|
-
'CEN': '04407',
|
|
69
|
-
'TAS_A': '04408',
|
|
70
|
-
'KAN_U': '04409',
|
|
71
|
-
'KAN_M': '04411',
|
|
72
|
-
'KAN_L': '04412',
|
|
73
|
-
'SCO_L': '04413',
|
|
74
|
-
'NAE': '04420',
|
|
75
|
-
'SCO_U': '04421',
|
|
76
|
-
'UPE_U': '04422',
|
|
77
|
-
'UPE_L': '04423',
|
|
78
|
-
'THU_L': '04424',
|
|
79
|
-
'TUN': '04425',
|
|
80
|
-
'KPC_U': '04427',
|
|
81
|
-
'KPC_L': '04428',
|
|
82
|
-
'LYN_T': '04429',
|
|
83
|
-
'MIT': '04430',
|
|
84
|
-
'HUM': '04432',
|
|
85
|
-
'NEM': '04436',
|
|
86
|
-
'NUK_K': '04437',
|
|
87
|
-
'NUK_U': '04439',
|
|
88
|
-
'QAS_M': '04441',
|
|
89
|
-
'CP1': '04442',
|
|
90
|
-
'NAU': '04443',
|
|
91
|
-
'LYN_L': '04450',
|
|
92
|
-
'EGP': '04451',
|
|
93
|
-
'JAR': '04452',
|
|
94
|
-
'THU_L2': '04453',
|
|
95
|
-
'THU_U': '04454',
|
|
96
|
-
'SWC': '04458',
|
|
97
|
-
'ZAK_L': '04461',
|
|
98
|
-
'ZAK_U': '04462',
|
|
99
|
-
'DY2': '04464',
|
|
100
|
-
'SDL': '04485',
|
|
101
|
-
'NSE': '04488',
|
|
102
|
-
'SDM': '04492'
|
|
103
|
-
},
|
|
104
|
-
# 'blockNumber': 4, #4 is Greenland, 6 is Denmark; not valid with synopMobil template
|
|
105
|
-
'regionNumber': 6, #6 is Europe, 7 is MISSING VALUE; not valid with synopLand template
|
|
106
|
-
'centre': 94, #94 is Copenhagen
|
|
107
|
-
# 'agencyInChargeOfOperatingObservingPlatform': , #nothing for DMI or GEUS in code table
|
|
108
|
-
# 'wmoRegionSubArea': 1,
|
|
109
|
-
# 'stationOrSiteName': , #not valid with synopMobil template
|
|
110
|
-
# 'shortStationName': , #not valid with synopMobil template
|
|
111
|
-
# 'longStationName': , #not valid with synopMobil template
|
|
112
|
-
# 'directionOfMotionOfMovingObservingPlatform': ,
|
|
113
|
-
# 'movingObservingPlatformSpeed': ,
|
|
114
|
-
'stationType': 0, #automatic station
|
|
115
|
-
'instrumentationForWindMeasurement': 8, #certified instruments
|
|
116
|
-
'stationElevationQualityMarkForMobileStations': 1, #Excellent - within 3m; not valid with synopLand template
|
|
117
|
-
}
|
|
118
|
-
},
|
|
119
|
-
'land': { # land-based (non-mobile) stations
|
|
120
|
-
'template': {
|
|
121
|
-
'unexpandedDescriptors': (307080), #message template, "synopLand"
|
|
122
|
-
'edition': 4, #latest edition
|
|
123
|
-
'masterTableNumber': 0,
|
|
124
|
-
'masterTablesVersionNumber': 32, #DMI recommends any table version between 28-32
|
|
125
|
-
'localTablesVersionNumber': 0,
|
|
126
|
-
'bufrHeaderCentre': 94, #originating centre 98=ECMWF, 94=DMI
|
|
127
|
-
# 'bufrHeaderSubCentre': 0,
|
|
128
|
-
'updateSequenceNumber': 0, #0 is original message, incremented by 1 for updates
|
|
129
|
-
'dataCategory': 0, #surface data - land
|
|
130
|
-
'internationalDataSubCategory': 0, #Hourly synoptic observations from fixed-land stations (SYNOP)
|
|
131
|
-
# 'dataSubCategory': 0,
|
|
132
|
-
'observedData': 1,
|
|
133
|
-
'compressedData': 0,
|
|
134
|
-
},
|
|
135
|
-
'station': {
|
|
136
|
-
'stationNumber': {
|
|
137
|
-
# use int; fails with "Segmentation fault (core dumped)" if passed as string
|
|
138
|
-
# This is the last three digits of the DMI Station ID, e.g. for "04401" use 401.
|
|
139
|
-
# 'blockNumber' is used to register the first part of the ID ("04")
|
|
140
|
-
'WEG_B': 460,
|
|
141
|
-
'KAN_B': 445
|
|
142
|
-
},
|
|
143
|
-
'blockNumber': 4, #4 is Greenland, 6 is Denmark; not valid with synopMobil template
|
|
144
|
-
# 'regionNumber': 6, #6 is Europe, 7 is MISSING VALUE; not valid with synopLand template
|
|
145
|
-
'centre': 94, #94 is Copenhagen
|
|
146
|
-
# 'agencyInChargeOfOperatingObservingPlatform': , #nothing for DMI or GEUS in code table
|
|
147
|
-
# 'wmoRegionSubArea': 1,
|
|
148
|
-
# 'stationOrSiteName': , #not valid with synopMobil template
|
|
149
|
-
# 'shortStationName': , #not valid with synopMobil template
|
|
150
|
-
# 'longStationName': , #not valid with synopMobil template
|
|
151
|
-
'stationType': 0, #automatic station
|
|
152
|
-
'instrumentationForWindMeasurement': 8, #certified instruments
|
|
153
|
-
# 'stationElevationQualityMarkForMobileStations': 1, #Excellent - within 3m; not valid with synopLand template
|
|
154
|
-
}
|
|
155
|
-
},
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
'''
|
|
159
|
-
The following are not valid with either synopMobil or synopLand templates:
|
|
160
|
-
'measuringEquipmentType': 0, #Pressure instrument associated with wind-measuring equipment;
|
|
161
|
-
'temperatureObservationPrecision': 0.1, #Kelvin;
|
|
162
|
-
'pressureSensorType': 0, #capacitance aneroid;
|
|
163
|
-
'temperatureSensorType': 2, #capacitance bead;
|
|
164
|
-
'humiditySensorType': 4, #capacitance sensor;
|
|
165
|
-
'anemometerType': 1, #propeller rotor;
|
|
166
|
-
'methodOfPrecipitationMeasurement': 1, #tipping bucket method;
|
|
167
|
-
'''
|
|
168
|
-
|
|
169
|
-
# Optionally export to file on disk
|
|
170
|
-
|
|
171
|
-
# Export .json
|
|
172
|
-
# import json
|
|
173
|
-
# with open('ibufr_settings.json', 'w') as f:
|
|
174
|
-
# json.dump(ibufr_settings, f)
|
|
175
|
-
|
|
176
|
-
# Export .pickle
|
|
177
|
-
# import pickle
|
|
178
|
-
# with open('ibufr_settings.pickle', 'wb') as handle:
|
|
179
|
-
# pickle.dump(ibufr_settings, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
|
File without changes
|