pypromice 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (65) hide show
  1. pypromice/__init__.py +2 -0
  2. pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
  3. pypromice/{qc → core/qc}/persistence.py +22 -29
  4. pypromice/{process → core/qc}/value_clipping.py +3 -3
  5. pypromice/core/variables/__init__.py +1 -0
  6. pypromice/core/variables/air_temperature.py +64 -0
  7. pypromice/core/variables/gps.py +221 -0
  8. pypromice/core/variables/humidity.py +111 -0
  9. pypromice/core/variables/precipitation.py +108 -0
  10. pypromice/core/variables/pressure_transducer_depth.py +79 -0
  11. pypromice/core/variables/radiation.py +422 -0
  12. pypromice/core/variables/station_boom_height.py +49 -0
  13. pypromice/core/variables/station_pose.py +375 -0
  14. pypromice/io/bufr/__init__.py +0 -0
  15. pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
  16. pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
  17. pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
  18. pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
  19. pypromice/io/ingest/__init__.py +0 -0
  20. pypromice/{utilities → io/ingest}/git.py +1 -3
  21. pypromice/io/ingest/l0.py +294 -0
  22. pypromice/io/ingest/l0_repository.py +103 -0
  23. pypromice/io/ingest/toa5.py +87 -0
  24. pypromice/{process → io}/write.py +1 -1
  25. pypromice/pipeline/L0toL1.py +291 -0
  26. pypromice/pipeline/L1toL2.py +233 -0
  27. pypromice/{process → pipeline}/L2toL3.py +97 -118
  28. pypromice/pipeline/__init__.py +4 -0
  29. pypromice/{process → pipeline}/aws.py +10 -82
  30. pypromice/{process → pipeline}/get_l2.py +2 -2
  31. pypromice/{process → pipeline}/get_l2tol3.py +19 -22
  32. pypromice/{process → pipeline}/join_l2.py +31 -32
  33. pypromice/{process → pipeline}/join_l3.py +16 -14
  34. pypromice/{process → pipeline}/resample.py +58 -45
  35. pypromice/{process → pipeline}/utilities.py +0 -22
  36. pypromice/resources/file_attributes.csv +4 -4
  37. pypromice/resources/variables.csv +27 -24
  38. {pypromice-1.5.3.dist-info → pypromice-1.6.0.dist-info}/METADATA +1 -2
  39. pypromice-1.6.0.dist-info/RECORD +64 -0
  40. pypromice-1.6.0.dist-info/entry_points.txt +12 -0
  41. pypromice/get/__init__.py +0 -1
  42. pypromice/get/get.py +0 -211
  43. pypromice/get/get_promice_data.py +0 -56
  44. pypromice/process/L0toL1.py +0 -564
  45. pypromice/process/L1toL2.py +0 -824
  46. pypromice/process/__init__.py +0 -4
  47. pypromice/process/load.py +0 -161
  48. pypromice-1.5.3.dist-info/RECORD +0 -54
  49. pypromice-1.5.3.dist-info/entry_points.txt +0 -13
  50. /pypromice/{postprocess → core}/__init__.py +0 -0
  51. /pypromice/{utilities → core}/dependency_graph.py +0 -0
  52. /pypromice/{qc → core/qc}/__init__.py +0 -0
  53. /pypromice/{qc → core/qc}/github_data_issues.py +0 -0
  54. /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
  55. /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
  56. /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
  57. /pypromice/{process → core/variables}/wind.py +0 -0
  58. /pypromice/{utilities → io}/__init__.py +0 -0
  59. /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
  60. /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
  61. /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
  62. /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
  63. {pypromice-1.5.3.dist-info → pypromice-1.6.0.dist-info}/WHEEL +0 -0
  64. {pypromice-1.5.3.dist-info → pypromice-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  65. {pypromice-1.5.3.dist-info → pypromice-1.6.0.dist-info}/top_level.txt +0 -0
@@ -6,11 +6,14 @@ import pandas as pd
6
6
  import numpy as np
7
7
  import xarray as xr
8
8
  from sklearn.linear_model import LinearRegression
9
- from pypromice.qc.github_data_issues import adjustData
10
9
  from scipy.interpolate import interp1d
11
10
  from pathlib import Path
11
+ from pypromice.core.qc.github_data_issues import adjustData
12
12
  import logging
13
13
 
14
+ from pypromice.core.qc.github_data_issues import adjustData
15
+ from pypromice.core.variables import humidity, station_boom_height
16
+
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
  def toL3(L2,
@@ -24,7 +27,6 @@ def toL3(L2,
24
27
  - continuous surface height, ice surface height, snow height
25
28
  - thermistor depths
26
29
 
27
-
28
30
  Parameters
29
31
  ----------
30
32
  L2 : xarray:Dataset
@@ -50,16 +52,20 @@ def toL3(L2,
50
52
  # Upper boom bulk calculation
51
53
  T_h_u = ds['t_u'].copy() # Copy for processing
52
54
  p_h_u = ds['p_u'].copy()
53
- rh_h_u_wrt_ice_or_water = ds['rh_u_wrt_ice_or_water'].copy()
54
55
 
55
- q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, rh_h_u_wrt_ice_or_water) # Calculate specific humidity
56
+ # Calculate specific humidity
57
+ q_h_u = humidity.calculate_specific_humidity(ds["t_u"],
58
+ ds["p_u"],
59
+ ds["rh_u_wrt_ice_or_water"])
60
+
56
61
  if ('wspd_u' in ds.keys()) and \
57
62
  ('t_surf' in ds.keys()) and \
58
- ('z_boom_u' in ds.keys()):
63
+ ('z_boom_cor_u' in ds.keys()):
59
64
  WS_h_u = ds['wspd_u'].copy()
60
65
  Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters?
61
- z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer
62
- z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer
66
+
67
+ z_WS_u = ds['z_boom_cor_u'].copy() + 0.4 # Get height of Anemometer
68
+ z_T_u = ds['z_boom_cor_u'].copy() - 0.1 # Get height of thermometer
63
69
 
64
70
  if not is_bedrock:
65
71
  SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes
@@ -68,12 +74,12 @@ def toL3(L2,
68
74
  ds['dshf_u'] = (('time'), SHF_h_u.data)
69
75
  ds['dlhf_u'] = (('time'), LHF_h_u.data)
70
76
  else:
71
- logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes')
77
+ logger.info('wspd_u, t_surf or z_boom_cor_u missing, cannot calculate turbulent heat fluxes')
72
78
 
73
- q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg
74
- ds['qh_u'] = (('time'), q_h_u.data)
79
+ # Convert specific humidity from kg/kg to g/kg
80
+ ds['qh_u'] = humidity.convert(q_h_u)
75
81
  else:
76
- logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
82
+ logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
77
83
 
78
84
  # Lower boom bulk calculation
79
85
  if ds.attrs['number_of_booms']==2:
@@ -82,15 +88,19 @@ def toL3(L2,
82
88
  ('rh_l_wrt_ice_or_water' in ds.keys()):
83
89
  T_h_l = ds['t_l'].copy() # Copy for processing
84
90
  p_h_l = ds['p_l'].copy()
85
- rh_h_l_wrt_ice_or_water = ds['rh_l_wrt_ice_or_water'].copy()
86
91
 
87
- q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, rh_h_l_wrt_ice_or_water) # Calculate sp.humidity
92
+ # Calculate specific humidity
93
+ q_h_l = humidity.calculate_specific_humidity(ds["t_l"],
94
+ ds["p_l"],
95
+ ds["rh_l_wrt_ice_or_water"])
88
96
 
89
97
  if ('wspd_l' in ds.keys()) and \
90
98
  ('t_surf' in ds.keys()) and \
91
- ('z_boom_l' in ds.keys()):
92
- z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W
93
- z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer
99
+ ('z_boom_cor_l' in ds.keys()):
100
+ z_WS_l = ds['z_boom_cor_l'].copy() + 0.4 # Get height of radiometer
101
+ z_T_l = ds['z_boom_cor_l'].copy() - 0.1 # Get height of thermometer
102
+
103
+ # Get wind speed lower boom measurements
94
104
  WS_h_l = ds['wspd_l'].copy()
95
105
 
96
106
  if not is_bedrock:
@@ -100,12 +110,13 @@ def toL3(L2,
100
110
  ds['dshf_l'] = (('time'), SHF_h_l.data)
101
111
  ds['dlhf_l'] = (('time'), LHF_h_l.data)
102
112
  else:
103
- logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes')
113
+ logger.info('wspd_l, t_surf or z_boom_cor_l missing, cannot calculate turbulent heat fluxes')
114
+
115
+ # Convert specific humidity from kg/kg to g/kg
116
+ ds['qh_l'] = humidity.convert(q_h_l)
104
117
 
105
- q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg
106
- ds['qh_l'] = (('time'), q_h_l.data)
107
118
  else:
108
- logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
119
+ logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
109
120
 
110
121
  if len(station_config)==0:
111
122
  logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n')
@@ -161,12 +172,18 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
161
172
  ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan)
162
173
  ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan)
163
174
 
175
+ z_boom_best_u = station_boom_height.adjust_and_include_uncorrected_values(ds["z_boom_u"], ds["t_u"])
176
+
164
177
  if ds.attrs['site_type'] == 'ablation':
165
178
  # Calculate surface heights for ablation sites
166
- ds['z_surf_1'] = 2.6 - ds['z_boom_u']
179
+ ds['z_surf_1'] = 2.6 - z_boom_best_u
167
180
  if ds.z_stake.notnull().any():
168
- first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0]
169
- ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
181
+
182
+ # Calculate stake boom height correction with uncorrected values where needed
183
+ z_stake_best = station_boom_height.adjust_and_include_uncorrected_values(ds["z_stake"], ds["t_u"])
184
+
185
+ first_valid_index = ds.time.where((z_stake_best + z_boom_best_u).notnull(), drop=True).data[0]
186
+ ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + z_stake_best.sel(time=first_valid_index) - z_stake_best
170
187
 
171
188
  # Use corrected point data if available
172
189
  if 'z_pt_cor' in ds.data_vars:
@@ -174,17 +191,24 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
174
191
 
175
192
  else:
176
193
  # Calculate surface heights for other site types
177
- first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0]
178
- ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
194
+ first_valid_index = ds.time.where(z_boom_best_u.notnull(), drop=True).data[0]
195
+ ds['z_surf_1'] = z_boom_best_u.sel(time=first_valid_index) - z_boom_best_u
196
+
179
197
  if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
180
- first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
181
- ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
198
+ z_stake_best = station_boom_height.adjust_and_include_uncorrected_values(ds["z_stake"], ds["t_u"])
199
+ first_valid_index = ds.time.where(z_stake_best.notnull(), drop=True).data[0]
200
+ ds['z_surf_2'] = z_stake_best.sel(time=first_valid_index) - z_stake_best
201
+
182
202
  if 'z_boom_l' in ds.data_vars:
183
- # need a combine first because KAN_U switches from having a z_stake
184
- # to having a z_boom_l
185
- first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0]
203
+
204
+ # Calculate lower boom height correction with uncorrected values where needed
205
+ z_boom_best_l = station_boom_height.adjust_and_include_uncorrected_values(ds["z_boom_l"], ds["t_l"])
206
+
207
+ # need a combine first because KAN_U switches from having a z_stake_best
208
+ # to having a z_boom_best_l
209
+ first_valid_index = ds.time.where(z_boom_best_l.notnull(), drop=True).data[0]
186
210
  ds['z_surf_2'] = ds['z_surf_2'].combine_first(
187
- ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])
211
+ z_boom_best_l.sel(time=first_valid_index) - z_boom_best_l)
188
212
 
189
213
  # Adjust data for the created surface height variables
190
214
  ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
@@ -221,8 +245,10 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
221
245
  .rolling('1D', center=True, min_periods=1)
222
246
  .median())
223
247
 
224
- z_ice_surf = z_ice_surf.loc[ds.time]
225
- # here we make sure that the periods where both z_stake and z_pt are
248
+ z_ice_surf = z_ice_surf.reindex(ds.time,
249
+ method=None).interpolate(method='time')
250
+
251
+ # here we make sure that the periods where both z_stake_best and z_pt are
226
252
  # missing are also missing in z_ice_surf
227
253
  msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull()
228
254
  z_ice_surf = z_ice_surf.where(msk)
@@ -234,7 +260,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
234
260
  # sides are less than 0.01 m appart
235
261
 
236
262
  # Forward and backward fill to identify bounds of gaps
237
- df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')
263
+ df_filled = z_ice_surf.ffill().bfill()
238
264
 
239
265
  # Identify gaps and their start and end dates
240
266
  gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
@@ -253,7 +279,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
253
279
  z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]
254
280
 
255
281
  # bringing the variable into the dataset
256
- ds['z_ice_surf'] = z_ice_surf
282
+ ds['z_ice_surf'] = ('time', z_ice_surf.values)
257
283
 
258
284
  ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
259
285
  ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
@@ -271,6 +297,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
271
297
  ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
272
298
  vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars]
273
299
  vars_out.append('t_i_10m')
300
+
274
301
  df_out = get_thermistor_depth(
275
302
  ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(),
276
303
  ds.attrs['station_id'],
@@ -289,7 +316,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
289
316
  period is estimated each year (either the period when z_pt_cor decreases
290
317
  or JJA if no better estimate) then different adjustmnents are conducted
291
318
  to stitch the three time series together: z_ice_surface (adjusted from
292
- z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake)
319
+ z_pt_cor) or if unavailable, z_surf_2 (adjusted from z_stake)
293
320
  are used in the ablation period while an average of z_surf_1 and z_surf_2
294
321
  are used otherwise, after they are being adjusted to z_ice_surf at the end
295
322
  of the ablation season.
@@ -344,22 +371,24 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
344
371
 
345
372
  # defining ice ablation period from the decrease of a smoothed version of z_pt
346
373
  # meaning when smoothed_z_pt.diff() < threshold_ablation
347
- # first smoothing
348
- smoothed_PT = (df['z_ice_surf']
349
- .resample('h')
350
- .interpolate(limit=72)
351
- .rolling('14D',center=True, min_periods=1)
352
- .mean())
353
- # second smoothing
354
- smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean()
355
-
356
- smoothed_PT = smoothed_PT.reindex(df.index,method='ffill')
357
- # smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan
358
-
359
- # logical index where ablation is detected
360
- ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation,
361
- np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9]))
362
-
374
+ hourly_interp = (df["z_ice_surf"]
375
+ .resample("h")
376
+ .interpolate(limit=72))
377
+ once_smoothed = hourly_interp.rolling("14D", center=True, min_periods=1).mean()
378
+ smoothed_PT = once_smoothed.rolling("14D", center=True, min_periods=1).mean()
379
+
380
+ # ablation detection
381
+ diff_series = smoothed_PT.diff()
382
+ ind_ablation = np.full_like(diff_series, False, dtype=bool)
383
+ ind_ablation = np.logical_and(diff_series.values < threshold_ablation,
384
+ np.isin(diff_series.index.month, [6, 7, 8, 9]))
385
+ # making sure that we only qualify as ablation timestamps where we actually have ablation data
386
+ msk = np.isnan(smoothed_PT.values)
387
+ ind_ablation[msk] = False
388
+
389
+ # reindex back to df
390
+ smoothed_PT = smoothed_PT.reindex(df.index, method="ffill")
391
+ ind_ablation = pd.Series(ind_ablation, index=diff_series.index).reindex(df.index, fill_value=False).values
363
392
 
364
393
  # finding the beginning and end of each period with True
365
394
  idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
@@ -378,13 +407,12 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
378
407
  # finding the beginning and end of each period with True
379
408
  idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
380
409
  idx[:, 1] -= 1
381
-
382
410
  # because the smooth_PT sees 7 days ahead, it starts showing a decline
383
- # 7 days in advance, we therefore need to exclude the first 7 days of
411
+ # 7 days in advance, we therefore need to exclude the first few days of
384
412
  # each ablation period
385
413
  for start, end in idx:
386
414
  period_start = df.index[start]
387
- period_end = period_start + pd.Timedelta(days=7)
415
+ period_end = period_start + pd.Timedelta(days=3)
388
416
  exclusion_period = (df.index >= period_start) & (df.index < period_end)
389
417
  ind_ablation[exclusion_period] = False
390
418
 
@@ -393,8 +421,6 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
393
421
  z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy()
394
422
 
395
423
  # the surface heights are adjusted so that they start at 0
396
-
397
-
398
424
  if any(~np.isnan(hs2.iloc[:24*7])):
399
425
  hs2 = hs2 - hs2.iloc[:24*7].mean()
400
426
 
@@ -470,9 +496,8 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
470
496
  # to hs1 and hs2 the year after.
471
497
 
472
498
  for i, y in enumerate(years):
473
- # if y == 2014:
474
- # import pdb; pdb.set_trace()
475
- logger.debug(str(y))
499
+ logger.debug(f'{y}: Ablation from {z.index[ind_start[i]]} to {z.index[ind_end[i]]}')
500
+
476
501
  # defining subsets of hs1, hs2, z
477
502
  hs1_jja = hs1[str(y)+'-06-01':str(y)+'-09-01']
478
503
  hs2_jja = hs2[str(y)+'-06-01':str(y)+'-09-01']
@@ -588,7 +613,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
588
613
  # import pdb; pdb.set_trace()
589
614
  # if there's ablation and
590
615
  # if there are PT data available at the end of the melt season
591
- if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any():
616
+ if z.iloc[(ind_end[i]-24*7):ind_end[i]].notnull().any():
592
617
  logger.debug('adjusting hs2 to z')
593
618
  # then we adjust hs2 to the end-of-ablation z
594
619
  # first trying at the end of melt season
@@ -605,7 +630,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
605
630
  np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \
606
631
  np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
607
632
  else:
608
- logger.debug('no ablation')
633
+ logger.debug('no ablation data')
609
634
  hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
610
635
  hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
611
636
  if all(np.isnan(hs2_following_winter)):
@@ -880,14 +905,18 @@ def get_thermistor_depth(df_in, site, station_config):
880
905
 
881
906
  # removing negative depth
882
907
  df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan
883
- logger.info("interpolating 10 m firn/ice temperature")
884
- df_in['t_i_10m'] = interpolate_temperature(
885
- df_in.index.values,
886
- df_in[depth_cols_name].values.astype(float),
887
- df_in[temp_cols_name].values.astype(float),
908
+
909
+ logger.info("interpolating 10 m firn/ice temperature (on hourly values)")
910
+ df_in_h = df_in[depth_cols_name+temp_cols_name].resample('h').mean()
911
+ df_in_h['t_i_10m'] = interpolate_temperature(
912
+ df_in_h.index.values,
913
+ df_in_h[depth_cols_name].values.astype(float),
914
+ df_in_h[temp_cols_name].values.astype(float),
888
915
  kind="linear",
889
916
  min_diff_to_depth=1.5,
890
917
  ).set_index('date').values
918
+ df_in['t_i_10m'] = df_in_h['t_i_10m'].reindex(df_in.index,
919
+ method=None)
891
920
 
892
921
  # filtering
893
922
  ind_pos = df_in["t_i_10m"] > 0.1
@@ -996,7 +1025,7 @@ def piecewise_smoothing_and_interpolation(data_series, breaks):
996
1025
 
997
1026
  Parameters
998
1027
  ----------
999
- data_series : pandas.Series
1028
+ data_series : pd.Series
1000
1029
  Series of observed latitude, longitude or elevation with datetime index.
1001
1030
  breaks: list
1002
1031
  List of timestamps of station relocation. First and last item should be
@@ -1228,56 +1257,6 @@ def calculate_viscosity(T_h, T_0, rho_atm):
1228
1257
  # Kinematic viscosity of air in m^2/s
1229
1258
  return mu / rho_atm
1230
1259
 
1231
- def calculate_specific_humidity(T_0, T_100, T_h, p_h, rh_h_wrt_ice_or_water, es_0=6.1071, es_100=1013.246, eps=0.622):
1232
- '''Calculate specific humidity
1233
- Parameters
1234
- ----------
1235
- T_0 : float
1236
- Steam point temperature. Default is 273.15.
1237
- T_100 : float
1238
- Steam point temperature in Kelvin
1239
- T_h : xarray.DataArray
1240
- Air temperature
1241
- p_h : xarray.DataArray
1242
- Air pressure
1243
- rh_h_wrt_ice_or_water : xarray.DataArray
1244
- Relative humidity corrected
1245
- es_0 : float
1246
- Saturation vapour pressure at the melting point (hPa)
1247
- es_100 : float
1248
- Saturation vapour pressure at steam point temperature (hPa)
1249
- eps : int
1250
- ratio of molar masses of vapor and dry air (0.622)
1251
-
1252
- Returns
1253
- -------
1254
- xarray.DataArray
1255
- Specific humidity data array
1256
- '''
1257
- # Saturation vapour pressure above 0 C (hPa)
1258
- es_wtr = 10**(-7.90298 * (T_100 / (T_h + T_0) - 1) + 5.02808 * np.log10(T_100 / (T_h + T_0))
1259
- - 1.3816E-7 * (10**(11.344 * (1 - (T_h + T_0) / T_100)) - 1)
1260
- + 8.1328E-3 * (10**(-3.49149 * (T_100 / (T_h + T_0) -1)) - 1) + np.log10(es_100))
1261
-
1262
- # Saturation vapour pressure below 0 C (hPa)
1263
- es_ice = 10**(-9.09718 * (T_0 / (T_h + T_0) - 1) - 3.56654
1264
- * np.log10(T_0 / (T_h + T_0)) + 0.876793
1265
- * (1 - (T_h + T_0) / T_0)
1266
- + np.log10(es_0))
1267
-
1268
- # Specific humidity at saturation (incorrect below melting point)
1269
- q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr)
1270
-
1271
- # Replace saturation specific humidity values below melting point
1272
- freezing = T_h < 0
1273
- q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing])
1274
-
1275
- q_nan = np.isnan(T_h) | np.isnan(p_h)
1276
- q_sat[q_nan] = np.nan
1277
-
1278
- # Convert to kg/kg
1279
- return rh_h_wrt_ice_or_water * q_sat / 100
1280
-
1281
1260
  if __name__ == "__main__":
1282
1261
  # unittest.main()
1283
1262
  pass
@@ -0,0 +1,4 @@
1
+ from pypromice.pipeline.aws import *
2
+ from pypromice.pipeline import L0toL1
3
+ from pypromice.pipeline import L1toL2
4
+ from pypromice.pipeline import L2toL3
@@ -16,11 +16,13 @@ from importlib import metadata
16
16
 
17
17
 
18
18
  import pypromice.resources
19
- from pypromice.process.L0toL1 import toL1
20
- from pypromice.process.L1toL2 import toL2
21
- from pypromice.process.L2toL3 import toL3
22
- from pypromice.process import write, load, utilities
23
- from pypromice.utilities.git import get_commit_hash_and_check_dirty
19
+ from pypromice.pipeline.L0toL1 import toL1
20
+ from pypromice.pipeline.L1toL2 import toL2
21
+ from pypromice.pipeline.L2toL3 import toL3
22
+ from pypromice.pipeline import utilities
23
+ from pypromice.io import write
24
+ from pypromice.io.ingest.l0 import (load_data_files, load_config)
25
+ from pypromice.io.ingest.git import get_commit_hash_and_check_dirty
24
26
 
25
27
  pd.set_option("display.precision", 2)
26
28
  xr.set_options(keep_attrs=True)
@@ -66,7 +68,6 @@ class AWS(object):
66
68
  )
67
69
 
68
70
  # Load config, variables CSF standards, and L0 files
69
- self.config = self.loadConfig(config_file, inpath)
70
71
  self.vars = pypromice.resources.load_variables(var_file)
71
72
  self.meta = pypromice.resources.load_metadata(meta_file)
72
73
  self.data_issues_repository = Path(data_issues_repository)
@@ -85,7 +86,9 @@ class AWS(object):
85
86
  self.meta["source"] = json.dumps(source_dict)
86
87
 
87
88
  # Load config file
88
- L0 = self.loadL0()
89
+ config = load_config(config_file, inpath)
90
+ L0 = load_data_files(config)
91
+
89
92
  self.L0 = []
90
93
  for l in L0:
91
94
  n = write.getColNames(self.vars, l)
@@ -148,78 +151,3 @@ class AWS(object):
148
151
  logger.info("Level 3 processing...")
149
152
  self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
150
153
 
151
- def loadConfig(self, config_file, inpath):
152
- """Load configuration from .toml file
153
-
154
- Parameters
155
- ----------
156
- config_file : str
157
- TOML file path
158
- inpath : str
159
- Input folder directory where L0 files can be found
160
-
161
- Returns
162
- -------
163
- conf : dict
164
- Configuration parameters
165
- """
166
- conf = load.getConfig(config_file, inpath)
167
- return conf
168
-
169
- def loadL0(self):
170
- """Load level 0 (L0) data from associated TOML-formatted
171
- config file and L0 data file
172
-
173
- Try readL0file() using the config with msg_lat & msg_lon appended. The
174
- specific ParserError except will occur when the number of columns in
175
- the tx file does not match the expected columns. In this case, remove
176
- msg_lat & msg_lon from the config and call readL0file() again. These
177
- station files either have no data after Nov 2022 (when msg_lat &
178
- msg_lon were added to processing), or for whatever reason these fields
179
- did not exist in the modem message and were not added.
180
-
181
- Returns
182
- -------
183
- ds_list : list
184
- List of L0 xr.Dataset objects
185
- """
186
- ds_list = []
187
- for k in self.config.keys():
188
- target = self.config[k]
189
- try:
190
- ds_list.append(self.readL0file(target))
191
-
192
- except pd.errors.ParserError as e:
193
- # ParserError: Too many columns specified: expected 40 and found 38
194
- # logger.info(f'-----> No msg_lat or msg_lon for {k}')
195
- for item in ["msg_lat", "msg_lon"]:
196
- target["columns"].remove(item) # Also removes from self.config
197
- ds_list.append(self.readL0file(target))
198
- logger.info(f"L0 data successfully loaded from {k}")
199
- return ds_list
200
-
201
- def readL0file(self, conf):
202
- """Read L0 .txt file to Dataset object using config dictionary and
203
- populate with initial metadata
204
-
205
- Parameters
206
- ----------
207
- conf : dict
208
- Configuration parameters
209
-
210
- Returns
211
- -------
212
- ds : xr.Dataset
213
- L0 data
214
- """
215
- file_version = conf.get("file_version", -1)
216
- ds = load.getL0(
217
- conf["file"],
218
- conf["nodata"],
219
- conf["columns"],
220
- conf["skiprows"],
221
- file_version,
222
- time_offset=conf.get("time_offset"),
223
- )
224
- ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
225
- return ds
@@ -5,8 +5,8 @@ import sys
5
5
  from argparse import ArgumentParser
6
6
  from pathlib import Path
7
7
 
8
- from pypromice.process.aws import AWS
9
- from pypromice.process.write import prepare_and_write
8
+ from pypromice.pipeline.aws import AWS
9
+ from pypromice.io.write import prepare_and_write
10
10
 
11
11
 
12
12
  def parse_arguments_l2():
@@ -5,9 +5,9 @@ from pathlib import Path
5
5
  import xarray as xr
6
6
  from argparse import ArgumentParser
7
7
  import pypromice
8
- from pypromice.process.L2toL3 import toL3
8
+ from pypromice.pipeline.L2toL3 import toL3
9
9
  import pypromice.resources
10
- from pypromice.process.write import prepare_and_write
10
+ from pypromice.io.write import prepare_and_write
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
  def parse_arguments_l2tol3(debug_args=None):
@@ -17,13 +17,13 @@ def parse_arguments_l2tol3(debug_args=None):
17
17
  parser.add_argument('-c', '--config_folder', type=str, required=True,
18
18
  default='../aws-l0/metadata/station_configurations/',
19
19
  help='Path to folder with sites configuration (TOML) files')
20
- parser.add_argument('-i', '--inpath', type=str, required=True,
20
+ parser.add_argument('-i', '--inpath', type=str, required=True,
21
21
  help='Path to Level 2 .nc data file')
22
- parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
22
+ parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
23
23
  help='Path where to write output')
24
- parser.add_argument('-v', '--variables', default=None, type=str,
24
+ parser.add_argument('-v', '--variables', default=None, type=str,
25
25
  required=False, help='File path to variables look-up table')
26
- parser.add_argument('-m', '--metadata', default=None, type=str,
26
+ parser.add_argument('-m', '--metadata', default=None, type=str,
27
27
  required=False, help='File path to metadata')
28
28
  parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository")
29
29
 
@@ -40,11 +40,11 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
40
40
  level=logging.INFO,
41
41
  stream=sys.stdout,
42
42
  )
43
-
43
+
44
44
  # Define Level 2 dataset from file
45
45
  with xr.open_dataset(inpath) as l2:
46
46
  l2.load()
47
-
47
+
48
48
  # Remove encoding attributes from NetCDF
49
49
  for varname in l2.variables:
50
50
  if l2[varname].encoding!={}:
@@ -54,7 +54,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
54
54
  l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True'
55
55
  if 'number_of_booms' in l2.attrs.keys():
56
56
  l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms'])
57
-
57
+
58
58
  # importing station_config (dict) from config_folder (str path)
59
59
  config_file = config_folder / (l2.attrs['station_id']+'.toml')
60
60
 
@@ -62,7 +62,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
62
62
  # File exists, load the configuration
63
63
  station_config = toml.load(config_file)
64
64
  else:
65
- # File does not exist, initialize with standard info
65
+ # File does not exist, initialize with standard info
66
66
  # this was prefered by RSF over exiting with error
67
67
  logger.error("\n***\nNo station_configuration file for %s.\nPlease create one on AWS-L0/metadata/station_configurations.\n***"%l2.attrs['station_id'])
68
68
  station_config = {"stid":l2.attrs['station_id'],
@@ -70,7 +70,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
70
70
  "project": "PROMICE",
71
71
  "location_type": "ice sheet",
72
72
  }
73
-
73
+
74
74
  # checking that the adjustement directory is properly given
75
75
  if data_issues_path is None:
76
76
  data_issues_path = Path("../PROMICE-AWS-data-issues")
@@ -82,7 +82,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
82
82
  data_issues_path = Path(data_issues_path)
83
83
 
84
84
  data_adjustments_dir = data_issues_path / "adjustments"
85
-
85
+
86
86
  # Perform Level 3 processing
87
87
  l3 = toL3(l2, data_adjustments_dir, station_config)
88
88
 
@@ -92,20 +92,17 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
92
92
  if outpath is not None:
93
93
  prepare_and_write(l3, outpath, v, m, '60min')
94
94
  prepare_and_write(l3, outpath, v, m, '1D')
95
- prepare_and_write(l3, outpath, v, m, 'M')
95
+ prepare_and_write(l3, outpath, v, m, 'ME')
96
96
  return l3
97
97
 
98
98
  def main():
99
99
  args = parse_arguments_l2tol3()
100
-
101
-
102
-
103
- _ = get_l2tol3(args.config_folder,
104
- args.inpath,
100
+ _ = get_l2tol3(args.config_folder,
101
+ args.inpath,
105
102
  args.outpath,
106
- args.variables,
107
- args.metadata,
103
+ args.variables,
104
+ args.metadata,
108
105
  args.data_issues_path)
109
-
110
- if __name__ == "__main__":
106
+
107
+ if __name__ == "__main__":
111
108
  main()