pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (67) hide show
  1. pypromice/__init__.py +2 -0
  2. pypromice/{qc → core/qc}/github_data_issues.py +22 -13
  3. pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
  4. pypromice/{qc → core/qc}/persistence.py +22 -29
  5. pypromice/{process → core/qc}/value_clipping.py +3 -3
  6. pypromice/core/resampling.py +142 -0
  7. pypromice/core/variables/__init__.py +1 -0
  8. pypromice/core/variables/air_temperature.py +64 -0
  9. pypromice/core/variables/gps.py +221 -0
  10. pypromice/core/variables/humidity.py +111 -0
  11. pypromice/core/variables/precipitation.py +108 -0
  12. pypromice/core/variables/pressure_transducer_depth.py +79 -0
  13. pypromice/core/variables/radiation.py +422 -0
  14. pypromice/core/variables/station_boom_height.py +75 -0
  15. pypromice/core/variables/station_pose.py +375 -0
  16. pypromice/io/bufr/__init__.py +0 -0
  17. pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
  18. pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
  19. pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
  20. pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
  21. pypromice/io/ingest/__init__.py +0 -0
  22. pypromice/{utilities → io/ingest}/git.py +1 -3
  23. pypromice/io/ingest/l0.py +294 -0
  24. pypromice/io/ingest/l0_repository.py +103 -0
  25. pypromice/io/ingest/toa5.py +87 -0
  26. pypromice/{process → io}/write.py +1 -1
  27. pypromice/pipeline/L0toL1.py +291 -0
  28. pypromice/pipeline/L1toL2.py +233 -0
  29. pypromice/{process → pipeline}/L2toL3.py +113 -118
  30. pypromice/pipeline/__init__.py +4 -0
  31. pypromice/{process → pipeline}/aws.py +10 -82
  32. pypromice/{process → pipeline}/get_l2.py +2 -2
  33. pypromice/{process → pipeline}/get_l2tol3.py +19 -22
  34. pypromice/{process → pipeline}/join_l2.py +31 -32
  35. pypromice/{process → pipeline}/join_l3.py +16 -14
  36. pypromice/{process → pipeline}/resample.py +75 -51
  37. pypromice/{process → pipeline}/utilities.py +0 -22
  38. pypromice/resources/file_attributes.csv +4 -4
  39. pypromice/resources/variable_aliases_GC-Net.csv +2 -2
  40. pypromice/resources/variables.csv +27 -24
  41. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
  42. pypromice-1.7.0.dist-info/RECORD +65 -0
  43. pypromice-1.7.0.dist-info/entry_points.txt +12 -0
  44. pypromice/get/__init__.py +0 -1
  45. pypromice/get/get.py +0 -211
  46. pypromice/get/get_promice_data.py +0 -56
  47. pypromice/process/L0toL1.py +0 -564
  48. pypromice/process/L1toL2.py +0 -824
  49. pypromice/process/__init__.py +0 -4
  50. pypromice/process/load.py +0 -161
  51. pypromice-1.5.3.dist-info/RECORD +0 -54
  52. pypromice-1.5.3.dist-info/entry_points.txt +0 -13
  53. /pypromice/{postprocess → core}/__init__.py +0 -0
  54. /pypromice/{utilities → core}/dependency_graph.py +0 -0
  55. /pypromice/{qc → core/qc}/__init__.py +0 -0
  56. /pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
  57. /pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
  58. /pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
  59. /pypromice/{process → core/variables}/wind.py +0 -0
  60. /pypromice/{utilities → io}/__init__.py +0 -0
  61. /pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
  62. /pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
  63. /pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
  64. /pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
  65. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
  66. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0
@@ -6,11 +6,14 @@ import pandas as pd
6
6
  import numpy as np
7
7
  import xarray as xr
8
8
  from sklearn.linear_model import LinearRegression
9
- from pypromice.qc.github_data_issues import adjustData
10
9
  from scipy.interpolate import interp1d
11
10
  from pathlib import Path
11
+ from pypromice.core.qc.github_data_issues import adjustData
12
12
  import logging
13
13
 
14
+ from pypromice.core.qc.github_data_issues import adjustData
15
+ from pypromice.core.variables import humidity, station_boom_height
16
+
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
  def toL3(L2,
@@ -24,7 +27,6 @@ def toL3(L2,
24
27
  - continuous surface height, ice surface height, snow height
25
28
  - thermistor depths
26
29
 
27
-
28
30
  Parameters
29
31
  ----------
30
32
  L2 : xarray:Dataset
@@ -50,16 +52,20 @@ def toL3(L2,
50
52
  # Upper boom bulk calculation
51
53
  T_h_u = ds['t_u'].copy() # Copy for processing
52
54
  p_h_u = ds['p_u'].copy()
53
- rh_h_u_wrt_ice_or_water = ds['rh_u_wrt_ice_or_water'].copy()
54
55
 
55
- q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, rh_h_u_wrt_ice_or_water) # Calculate specific humidity
56
+ # Calculate specific humidity
57
+ q_h_u = humidity.calculate_specific_humidity(ds["t_u"],
58
+ ds["p_u"],
59
+ ds["rh_u_wrt_ice_or_water"])
60
+
56
61
  if ('wspd_u' in ds.keys()) and \
57
62
  ('t_surf' in ds.keys()) and \
58
- ('z_boom_u' in ds.keys()):
63
+ ('z_boom_cor_u' in ds.keys()):
59
64
  WS_h_u = ds['wspd_u'].copy()
60
65
  Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters?
61
- z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer
62
- z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer
66
+
67
+ z_WS_u = ds['z_boom_cor_u'].copy() + 0.4 # Get height of Anemometer
68
+ z_T_u = ds['z_boom_cor_u'].copy() - 0.1 # Get height of thermometer
63
69
 
64
70
  if not is_bedrock:
65
71
  SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes
@@ -68,12 +74,12 @@ def toL3(L2,
68
74
  ds['dshf_u'] = (('time'), SHF_h_u.data)
69
75
  ds['dlhf_u'] = (('time'), LHF_h_u.data)
70
76
  else:
71
- logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes')
77
+ logger.info('wspd_u, t_surf or z_boom_cor_u missing, cannot calculate turbulent heat fluxes')
72
78
 
73
- q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg
74
- ds['qh_u'] = (('time'), q_h_u.data)
79
+ # Convert specific humidity from kg/kg to g/kg
80
+ ds['qh_u'] = humidity.convert(q_h_u)
75
81
  else:
76
- logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
82
+ logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
77
83
 
78
84
  # Lower boom bulk calculation
79
85
  if ds.attrs['number_of_booms']==2:
@@ -82,15 +88,19 @@ def toL3(L2,
82
88
  ('rh_l_wrt_ice_or_water' in ds.keys()):
83
89
  T_h_l = ds['t_l'].copy() # Copy for processing
84
90
  p_h_l = ds['p_l'].copy()
85
- rh_h_l_wrt_ice_or_water = ds['rh_l_wrt_ice_or_water'].copy()
86
91
 
87
- q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, rh_h_l_wrt_ice_or_water) # Calculate sp.humidity
92
+ # Calculate specific humidity
93
+ q_h_l = humidity.calculate_specific_humidity(ds["t_l"],
94
+ ds["p_l"],
95
+ ds["rh_l_wrt_ice_or_water"])
88
96
 
89
97
  if ('wspd_l' in ds.keys()) and \
90
98
  ('t_surf' in ds.keys()) and \
91
- ('z_boom_l' in ds.keys()):
92
- z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W
93
- z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer
99
+ ('z_boom_cor_l' in ds.keys()):
100
+ z_WS_l = ds['z_boom_cor_l'].copy() + 0.4 # Get height of radiometer
101
+ z_T_l = ds['z_boom_cor_l'].copy() - 0.1 # Get height of thermometer
102
+
103
+ # Get wind speed lower boom measurements
94
104
  WS_h_l = ds['wspd_l'].copy()
95
105
 
96
106
  if not is_bedrock:
@@ -100,12 +110,13 @@ def toL3(L2,
100
110
  ds['dshf_l'] = (('time'), SHF_h_l.data)
101
111
  ds['dlhf_l'] = (('time'), LHF_h_l.data)
102
112
  else:
103
- logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes')
113
+ logger.info('wspd_l, t_surf or z_boom_cor_l missing, cannot calculate turbulent heat fluxes')
114
+
115
+ # Convert specific humidity from kg/kg to g/kg
116
+ ds['qh_l'] = humidity.convert(q_h_l)
104
117
 
105
- q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg
106
- ds['qh_l'] = (('time'), q_h_l.data)
107
118
  else:
108
- logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
119
+ logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
109
120
 
110
121
  if len(station_config)==0:
111
122
  logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n')
@@ -161,12 +172,30 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
161
172
  ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan)
162
173
  ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan)
163
174
 
175
+ z_boom_best_u = station_boom_height.include_uncorrected_values(
176
+ ds["z_boom_u"],
177
+ ds["z_boom_cor_u"],
178
+ ds["t_u"],
179
+ ds["t_l"] if "t_l" in ds.data_vars else None,
180
+ ds["t_rad"] if "t_rad" in ds.data_vars else None)
181
+
182
+
183
+
184
+ if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
185
+ # Calculate stake boom height correction with uncorrected values where needed
186
+ z_stake_best = station_boom_height.include_uncorrected_values(
187
+ ds["z_stake"],
188
+ ds["z_stake_cor"],
189
+ ds["t_u"],
190
+ ds["t_l"] if "t_l" in ds.data_vars else None,
191
+ ds["t_rad"] if "t_rad" in ds.data_vars else None)
192
+
164
193
  if ds.attrs['site_type'] == 'ablation':
165
194
  # Calculate surface heights for ablation sites
166
- ds['z_surf_1'] = 2.6 - ds['z_boom_u']
195
+ ds['z_surf_1'] = 2.6 - z_boom_best_u
167
196
  if ds.z_stake.notnull().any():
168
- first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0]
169
- ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
197
+ first_valid_index = ds.time.where((z_stake_best + z_boom_best_u).notnull(), drop=True).data[0]
198
+ ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + z_stake_best.sel(time=first_valid_index) - z_stake_best
170
199
 
171
200
  # Use corrected point data if available
172
201
  if 'z_pt_cor' in ds.data_vars:
@@ -174,17 +203,28 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
174
203
 
175
204
  else:
176
205
  # Calculate surface heights for other site types
177
- first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0]
178
- ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
206
+ first_valid_index = ds.time.where(z_boom_best_u.notnull(), drop=True).data[0]
207
+ ds['z_surf_1'] = z_boom_best_u.sel(time=first_valid_index) - z_boom_best_u
208
+
179
209
  if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
180
- first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
181
- ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
210
+ first_valid_index = ds.time.where(z_stake_best.notnull(), drop=True).data[0]
211
+ ds['z_surf_2'] = z_stake_best.sel(time=first_valid_index) - z_stake_best
212
+
182
213
  if 'z_boom_l' in ds.data_vars:
183
- # need a combine first because KAN_U switches from having a z_stake
184
- # to having a z_boom_l
185
- first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0]
214
+
215
+ # Calculate lower boom height correction with uncorrected values where needed
216
+ z_boom_best_l = station_boom_height.include_uncorrected_values(
217
+ ds["z_boom_l"],
218
+ ds["z_boom_cor_l"],
219
+ ds["t_l"],
220
+ ds["t_u"] if "t_u" in ds.data_vars else None,
221
+ ds["t_rad"] if "t_rad" in ds.data_vars else None)
222
+
223
+ # need a combine first because KAN_U switches from having a z_stake_best
224
+ # to having a z_boom_best_l
225
+ first_valid_index = ds.time.where(z_boom_best_l.notnull(), drop=True).data[0]
186
226
  ds['z_surf_2'] = ds['z_surf_2'].combine_first(
187
- ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])
227
+ z_boom_best_l.sel(time=first_valid_index) - z_boom_best_l)
188
228
 
189
229
  # Adjust data for the created surface height variables
190
230
  ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
@@ -221,8 +261,10 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
221
261
  .rolling('1D', center=True, min_periods=1)
222
262
  .median())
223
263
 
224
- z_ice_surf = z_ice_surf.loc[ds.time]
225
- # here we make sure that the periods where both z_stake and z_pt are
264
+ z_ice_surf = z_ice_surf.reindex(ds.time,
265
+ method=None).interpolate(method='time')
266
+
267
+ # here we make sure that the periods where both z_stake_best and z_pt are
226
268
  # missing are also missing in z_ice_surf
227
269
  msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull()
228
270
  z_ice_surf = z_ice_surf.where(msk)
@@ -234,7 +276,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
234
276
  # sides are less than 0.01 m appart
235
277
 
236
278
  # Forward and backward fill to identify bounds of gaps
237
- df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')
279
+ df_filled = z_ice_surf.ffill().bfill()
238
280
 
239
281
  # Identify gaps and their start and end dates
240
282
  gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
@@ -253,7 +295,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
253
295
  z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]
254
296
 
255
297
  # bringing the variable into the dataset
256
- ds['z_ice_surf'] = z_ice_surf
298
+ ds['z_ice_surf'] = ('time', z_ice_surf.values)
257
299
 
258
300
  ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
259
301
  ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
@@ -271,6 +313,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
271
313
  ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
272
314
  vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars]
273
315
  vars_out.append('t_i_10m')
316
+
274
317
  df_out = get_thermistor_depth(
275
318
  ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(),
276
319
  ds.attrs['station_id'],
@@ -289,7 +332,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
289
332
  period is estimated each year (either the period when z_pt_cor decreases
290
333
  or JJA if no better estimate) then different adjustmnents are conducted
291
334
  to stitch the three time series together: z_ice_surface (adjusted from
292
- z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake)
335
+ z_pt_cor) or if unavailable, z_surf_2 (adjusted from z_stake)
293
336
  are used in the ablation period while an average of z_surf_1 and z_surf_2
294
337
  are used otherwise, after they are being adjusted to z_ice_surf at the end
295
338
  of the ablation season.
@@ -344,22 +387,24 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
344
387
 
345
388
  # defining ice ablation period from the decrease of a smoothed version of z_pt
346
389
  # meaning when smoothed_z_pt.diff() < threshold_ablation
347
- # first smoothing
348
- smoothed_PT = (df['z_ice_surf']
349
- .resample('h')
350
- .interpolate(limit=72)
351
- .rolling('14D',center=True, min_periods=1)
352
- .mean())
353
- # second smoothing
354
- smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean()
355
-
356
- smoothed_PT = smoothed_PT.reindex(df.index,method='ffill')
357
- # smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan
358
-
359
- # logical index where ablation is detected
360
- ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation,
361
- np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9]))
362
-
390
+ hourly_interp = (df["z_ice_surf"]
391
+ .resample("h")
392
+ .interpolate(limit=72))
393
+ once_smoothed = hourly_interp.rolling("14D", center=True, min_periods=1).mean()
394
+ smoothed_PT = once_smoothed.rolling("14D", center=True, min_periods=1).mean()
395
+
396
+ # ablation detection
397
+ diff_series = smoothed_PT.diff()
398
+ ind_ablation = np.full_like(diff_series, False, dtype=bool)
399
+ ind_ablation = np.logical_and(diff_series.values < threshold_ablation,
400
+ np.isin(diff_series.index.month, [6, 7, 8, 9]))
401
+ # making sure that we only qualify as ablation timestamps where we actually have ablation data
402
+ msk = np.isnan(smoothed_PT.values)
403
+ ind_ablation[msk] = False
404
+
405
+ # reindex back to df
406
+ smoothed_PT = smoothed_PT.reindex(df.index, method="ffill")
407
+ ind_ablation = pd.Series(ind_ablation, index=diff_series.index).reindex(df.index, fill_value=False).values
363
408
 
364
409
  # finding the beginning and end of each period with True
365
410
  idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
@@ -378,13 +423,12 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
378
423
  # finding the beginning and end of each period with True
379
424
  idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
380
425
  idx[:, 1] -= 1
381
-
382
426
  # because the smooth_PT sees 7 days ahead, it starts showing a decline
383
- # 7 days in advance, we therefore need to exclude the first 7 days of
427
+ # 7 days in advance, we therefore need to exclude the first few days of
384
428
  # each ablation period
385
429
  for start, end in idx:
386
430
  period_start = df.index[start]
387
- period_end = period_start + pd.Timedelta(days=7)
431
+ period_end = period_start + pd.Timedelta(days=3)
388
432
  exclusion_period = (df.index >= period_start) & (df.index < period_end)
389
433
  ind_ablation[exclusion_period] = False
390
434
 
@@ -393,8 +437,6 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
393
437
  z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy()
394
438
 
395
439
  # the surface heights are adjusted so that they start at 0
396
-
397
-
398
440
  if any(~np.isnan(hs2.iloc[:24*7])):
399
441
  hs2 = hs2 - hs2.iloc[:24*7].mean()
400
442
 
@@ -470,9 +512,8 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
470
512
  # to hs1 and hs2 the year after.
471
513
 
472
514
  for i, y in enumerate(years):
473
- # if y == 2014:
474
- # import pdb; pdb.set_trace()
475
- logger.debug(str(y))
515
+ logger.debug(f'{y}: Ablation from {z.index[ind_start[i]]} to {z.index[ind_end[i]]}')
516
+
476
517
  # defining subsets of hs1, hs2, z
477
518
  hs1_jja = hs1[str(y)+'-06-01':str(y)+'-09-01']
478
519
  hs2_jja = hs2[str(y)+'-06-01':str(y)+'-09-01']
@@ -588,7 +629,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
588
629
  # import pdb; pdb.set_trace()
589
630
  # if there's ablation and
590
631
  # if there are PT data available at the end of the melt season
591
- if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any():
632
+ if z.iloc[(ind_end[i]-24*7):ind_end[i]].notnull().any():
592
633
  logger.debug('adjusting hs2 to z')
593
634
  # then we adjust hs2 to the end-of-ablation z
594
635
  # first trying at the end of melt season
@@ -605,7 +646,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
605
646
  np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \
606
647
  np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
607
648
  else:
608
- logger.debug('no ablation')
649
+ logger.debug('no ablation data')
609
650
  hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
610
651
  hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
611
652
  if all(np.isnan(hs2_following_winter)):
@@ -880,14 +921,18 @@ def get_thermistor_depth(df_in, site, station_config):
880
921
 
881
922
  # removing negative depth
882
923
  df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan
883
- logger.info("interpolating 10 m firn/ice temperature")
884
- df_in['t_i_10m'] = interpolate_temperature(
885
- df_in.index.values,
886
- df_in[depth_cols_name].values.astype(float),
887
- df_in[temp_cols_name].values.astype(float),
924
+
925
+ logger.info("interpolating 10 m firn/ice temperature (on hourly values)")
926
+ df_in_h = df_in[depth_cols_name+temp_cols_name].resample('h').mean()
927
+ df_in_h['t_i_10m'] = interpolate_temperature(
928
+ df_in_h.index.values,
929
+ df_in_h[depth_cols_name].values.astype(float),
930
+ df_in_h[temp_cols_name].values.astype(float),
888
931
  kind="linear",
889
932
  min_diff_to_depth=1.5,
890
933
  ).set_index('date').values
934
+ df_in['t_i_10m'] = df_in_h['t_i_10m'].reindex(df_in.index,
935
+ method=None)
891
936
 
892
937
  # filtering
893
938
  ind_pos = df_in["t_i_10m"] > 0.1
@@ -996,7 +1041,7 @@ def piecewise_smoothing_and_interpolation(data_series, breaks):
996
1041
 
997
1042
  Parameters
998
1043
  ----------
999
- data_series : pandas.Series
1044
+ data_series : pd.Series
1000
1045
  Series of observed latitude, longitude or elevation with datetime index.
1001
1046
  breaks: list
1002
1047
  List of timestamps of station relocation. First and last item should be
@@ -1228,56 +1273,6 @@ def calculate_viscosity(T_h, T_0, rho_atm):
1228
1273
  # Kinematic viscosity of air in m^2/s
1229
1274
  return mu / rho_atm
1230
1275
 
1231
- def calculate_specific_humidity(T_0, T_100, T_h, p_h, rh_h_wrt_ice_or_water, es_0=6.1071, es_100=1013.246, eps=0.622):
1232
- '''Calculate specific humidity
1233
- Parameters
1234
- ----------
1235
- T_0 : float
1236
- Steam point temperature. Default is 273.15.
1237
- T_100 : float
1238
- Steam point temperature in Kelvin
1239
- T_h : xarray.DataArray
1240
- Air temperature
1241
- p_h : xarray.DataArray
1242
- Air pressure
1243
- rh_h_wrt_ice_or_water : xarray.DataArray
1244
- Relative humidity corrected
1245
- es_0 : float
1246
- Saturation vapour pressure at the melting point (hPa)
1247
- es_100 : float
1248
- Saturation vapour pressure at steam point temperature (hPa)
1249
- eps : int
1250
- ratio of molar masses of vapor and dry air (0.622)
1251
-
1252
- Returns
1253
- -------
1254
- xarray.DataArray
1255
- Specific humidity data array
1256
- '''
1257
- # Saturation vapour pressure above 0 C (hPa)
1258
- es_wtr = 10**(-7.90298 * (T_100 / (T_h + T_0) - 1) + 5.02808 * np.log10(T_100 / (T_h + T_0))
1259
- - 1.3816E-7 * (10**(11.344 * (1 - (T_h + T_0) / T_100)) - 1)
1260
- + 8.1328E-3 * (10**(-3.49149 * (T_100 / (T_h + T_0) -1)) - 1) + np.log10(es_100))
1261
-
1262
- # Saturation vapour pressure below 0 C (hPa)
1263
- es_ice = 10**(-9.09718 * (T_0 / (T_h + T_0) - 1) - 3.56654
1264
- * np.log10(T_0 / (T_h + T_0)) + 0.876793
1265
- * (1 - (T_h + T_0) / T_0)
1266
- + np.log10(es_0))
1267
-
1268
- # Specific humidity at saturation (incorrect below melting point)
1269
- q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr)
1270
-
1271
- # Replace saturation specific humidity values below melting point
1272
- freezing = T_h < 0
1273
- q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing])
1274
-
1275
- q_nan = np.isnan(T_h) | np.isnan(p_h)
1276
- q_sat[q_nan] = np.nan
1277
-
1278
- # Convert to kg/kg
1279
- return rh_h_wrt_ice_or_water * q_sat / 100
1280
-
1281
1276
  if __name__ == "__main__":
1282
1277
  # unittest.main()
1283
1278
  pass
@@ -0,0 +1,4 @@
1
+ from pypromice.pipeline.aws import *
2
+ from pypromice.pipeline import L0toL1
3
+ from pypromice.pipeline import L1toL2
4
+ from pypromice.pipeline import L2toL3
@@ -16,11 +16,13 @@ from importlib import metadata
16
16
 
17
17
 
18
18
  import pypromice.resources
19
- from pypromice.process.L0toL1 import toL1
20
- from pypromice.process.L1toL2 import toL2
21
- from pypromice.process.L2toL3 import toL3
22
- from pypromice.process import write, load, utilities
23
- from pypromice.utilities.git import get_commit_hash_and_check_dirty
19
+ from pypromice.pipeline.L0toL1 import toL1
20
+ from pypromice.pipeline.L1toL2 import toL2
21
+ from pypromice.pipeline.L2toL3 import toL3
22
+ from pypromice.pipeline import utilities
23
+ from pypromice.io import write
24
+ from pypromice.io.ingest.l0 import (load_data_files, load_config)
25
+ from pypromice.io.ingest.git import get_commit_hash_and_check_dirty
24
26
 
25
27
  pd.set_option("display.precision", 2)
26
28
  xr.set_options(keep_attrs=True)
@@ -66,7 +68,6 @@ class AWS(object):
66
68
  )
67
69
 
68
70
  # Load config, variables CSF standards, and L0 files
69
- self.config = self.loadConfig(config_file, inpath)
70
71
  self.vars = pypromice.resources.load_variables(var_file)
71
72
  self.meta = pypromice.resources.load_metadata(meta_file)
72
73
  self.data_issues_repository = Path(data_issues_repository)
@@ -85,7 +86,9 @@ class AWS(object):
85
86
  self.meta["source"] = json.dumps(source_dict)
86
87
 
87
88
  # Load config file
88
- L0 = self.loadL0()
89
+ config = load_config(config_file, inpath)
90
+ L0 = load_data_files(config)
91
+
89
92
  self.L0 = []
90
93
  for l in L0:
91
94
  n = write.getColNames(self.vars, l)
@@ -148,78 +151,3 @@ class AWS(object):
148
151
  logger.info("Level 3 processing...")
149
152
  self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
150
153
 
151
- def loadConfig(self, config_file, inpath):
152
- """Load configuration from .toml file
153
-
154
- Parameters
155
- ----------
156
- config_file : str
157
- TOML file path
158
- inpath : str
159
- Input folder directory where L0 files can be found
160
-
161
- Returns
162
- -------
163
- conf : dict
164
- Configuration parameters
165
- """
166
- conf = load.getConfig(config_file, inpath)
167
- return conf
168
-
169
- def loadL0(self):
170
- """Load level 0 (L0) data from associated TOML-formatted
171
- config file and L0 data file
172
-
173
- Try readL0file() using the config with msg_lat & msg_lon appended. The
174
- specific ParserError except will occur when the number of columns in
175
- the tx file does not match the expected columns. In this case, remove
176
- msg_lat & msg_lon from the config and call readL0file() again. These
177
- station files either have no data after Nov 2022 (when msg_lat &
178
- msg_lon were added to processing), or for whatever reason these fields
179
- did not exist in the modem message and were not added.
180
-
181
- Returns
182
- -------
183
- ds_list : list
184
- List of L0 xr.Dataset objects
185
- """
186
- ds_list = []
187
- for k in self.config.keys():
188
- target = self.config[k]
189
- try:
190
- ds_list.append(self.readL0file(target))
191
-
192
- except pd.errors.ParserError as e:
193
- # ParserError: Too many columns specified: expected 40 and found 38
194
- # logger.info(f'-----> No msg_lat or msg_lon for {k}')
195
- for item in ["msg_lat", "msg_lon"]:
196
- target["columns"].remove(item) # Also removes from self.config
197
- ds_list.append(self.readL0file(target))
198
- logger.info(f"L0 data successfully loaded from {k}")
199
- return ds_list
200
-
201
- def readL0file(self, conf):
202
- """Read L0 .txt file to Dataset object using config dictionary and
203
- populate with initial metadata
204
-
205
- Parameters
206
- ----------
207
- conf : dict
208
- Configuration parameters
209
-
210
- Returns
211
- -------
212
- ds : xr.Dataset
213
- L0 data
214
- """
215
- file_version = conf.get("file_version", -1)
216
- ds = load.getL0(
217
- conf["file"],
218
- conf["nodata"],
219
- conf["columns"],
220
- conf["skiprows"],
221
- file_version,
222
- time_offset=conf.get("time_offset"),
223
- )
224
- ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
225
- return ds
@@ -5,8 +5,8 @@ import sys
5
5
  from argparse import ArgumentParser
6
6
  from pathlib import Path
7
7
 
8
- from pypromice.process.aws import AWS
9
- from pypromice.process.write import prepare_and_write
8
+ from pypromice.pipeline.aws import AWS
9
+ from pypromice.io.write import prepare_and_write
10
10
 
11
11
 
12
12
  def parse_arguments_l2():
@@ -5,9 +5,9 @@ from pathlib import Path
5
5
  import xarray as xr
6
6
  from argparse import ArgumentParser
7
7
  import pypromice
8
- from pypromice.process.L2toL3 import toL3
8
+ from pypromice.pipeline.L2toL3 import toL3
9
9
  import pypromice.resources
10
- from pypromice.process.write import prepare_and_write
10
+ from pypromice.io.write import prepare_and_write
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
  def parse_arguments_l2tol3(debug_args=None):
@@ -17,13 +17,13 @@ def parse_arguments_l2tol3(debug_args=None):
17
17
  parser.add_argument('-c', '--config_folder', type=str, required=True,
18
18
  default='../aws-l0/metadata/station_configurations/',
19
19
  help='Path to folder with sites configuration (TOML) files')
20
- parser.add_argument('-i', '--inpath', type=str, required=True,
20
+ parser.add_argument('-i', '--inpath', type=str, required=True,
21
21
  help='Path to Level 2 .nc data file')
22
- parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
22
+ parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
23
23
  help='Path where to write output')
24
- parser.add_argument('-v', '--variables', default=None, type=str,
24
+ parser.add_argument('-v', '--variables', default=None, type=str,
25
25
  required=False, help='File path to variables look-up table')
26
- parser.add_argument('-m', '--metadata', default=None, type=str,
26
+ parser.add_argument('-m', '--metadata', default=None, type=str,
27
27
  required=False, help='File path to metadata')
28
28
  parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository")
29
29
 
@@ -40,11 +40,11 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
40
40
  level=logging.INFO,
41
41
  stream=sys.stdout,
42
42
  )
43
-
43
+
44
44
  # Define Level 2 dataset from file
45
45
  with xr.open_dataset(inpath) as l2:
46
46
  l2.load()
47
-
47
+
48
48
  # Remove encoding attributes from NetCDF
49
49
  for varname in l2.variables:
50
50
  if l2[varname].encoding!={}:
@@ -54,7 +54,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
54
54
  l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True'
55
55
  if 'number_of_booms' in l2.attrs.keys():
56
56
  l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms'])
57
-
57
+
58
58
  # importing station_config (dict) from config_folder (str path)
59
59
  config_file = config_folder / (l2.attrs['station_id']+'.toml')
60
60
 
@@ -62,7 +62,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
62
62
  # File exists, load the configuration
63
63
  station_config = toml.load(config_file)
64
64
  else:
65
- # File does not exist, initialize with standard info
65
+ # File does not exist, initialize with standard info
66
66
  # this was prefered by RSF over exiting with error
67
67
  logger.error("\n***\nNo station_configuration file for %s.\nPlease create one on AWS-L0/metadata/station_configurations.\n***"%l2.attrs['station_id'])
68
68
  station_config = {"stid":l2.attrs['station_id'],
@@ -70,7 +70,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
70
70
  "project": "PROMICE",
71
71
  "location_type": "ice sheet",
72
72
  }
73
-
73
+
74
74
  # checking that the adjustement directory is properly given
75
75
  if data_issues_path is None:
76
76
  data_issues_path = Path("../PROMICE-AWS-data-issues")
@@ -82,7 +82,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
82
82
  data_issues_path = Path(data_issues_path)
83
83
 
84
84
  data_adjustments_dir = data_issues_path / "adjustments"
85
-
85
+
86
86
  # Perform Level 3 processing
87
87
  l3 = toL3(l2, data_adjustments_dir, station_config)
88
88
 
@@ -92,20 +92,17 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
92
92
  if outpath is not None:
93
93
  prepare_and_write(l3, outpath, v, m, '60min')
94
94
  prepare_and_write(l3, outpath, v, m, '1D')
95
- prepare_and_write(l3, outpath, v, m, 'M')
95
+ prepare_and_write(l3, outpath, v, m, 'MS')
96
96
  return l3
97
97
 
98
98
  def main():
99
99
  args = parse_arguments_l2tol3()
100
-
101
-
102
-
103
- _ = get_l2tol3(args.config_folder,
104
- args.inpath,
100
+ _ = get_l2tol3(args.config_folder,
101
+ args.inpath,
105
102
  args.outpath,
106
- args.variables,
107
- args.metadata,
103
+ args.variables,
104
+ args.metadata,
108
105
  args.data_issues_path)
109
-
110
- if __name__ == "__main__":
106
+
107
+ if __name__ == "__main__":
111
108
  main()