pypromice 1.3.2__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (56) hide show
  1. {pypromice-1.3.2/src/pypromice.egg-info → pypromice-1.3.3}/PKG-INFO +1 -1
  2. {pypromice-1.3.2 → pypromice-1.3.3}/setup.py +1 -1
  3. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/L0toL1.py +40 -2
  4. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/value_clipping.py +2 -0
  5. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/github_data_issues.py +55 -53
  6. {pypromice-1.3.2 → pypromice-1.3.3/src/pypromice.egg-info}/PKG-INFO +1 -1
  7. {pypromice-1.3.2 → pypromice-1.3.3}/LICENSE.txt +0 -0
  8. {pypromice-1.3.2 → pypromice-1.3.3}/MANIFEST.in +0 -0
  9. {pypromice-1.3.2 → pypromice-1.3.3}/README.md +0 -0
  10. {pypromice-1.3.2 → pypromice-1.3.3}/setup.cfg +0 -0
  11. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/__init__.py +0 -0
  12. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/get/__init__.py +0 -0
  13. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/get/get.py +0 -0
  14. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/get/get_promice_data.py +0 -0
  15. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/postprocess/__init__.py +0 -0
  16. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/postprocess/csv2bufr.py +0 -0
  17. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/postprocess/get_bufr.py +0 -0
  18. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/postprocess/wmo_config.py +0 -0
  19. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/L1toL2.py +0 -0
  20. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/L2toL3.py +0 -0
  21. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/__init__.py +0 -0
  22. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/aws.py +0 -0
  23. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/get_l3.py +0 -0
  24. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/join_l3.py +0 -0
  25. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/metadata.csv +0 -0
  26. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/variables.csv +0 -0
  27. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/__init__.py +0 -0
  28. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/percentiles/__init__.py +0 -0
  29. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/percentiles/compute_thresholds.py +0 -0
  30. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/percentiles/outlier_detector.py +0 -0
  31. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/percentiles/thresholds.csv +0 -0
  32. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/persistence.py +0 -0
  33. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/persistence_test.py +0 -0
  34. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_config1.toml +0 -0
  35. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_config2.toml +0 -0
  36. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_email +0 -0
  37. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_payload_formats.csv +0 -0
  38. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_payload_types.csv +0 -0
  39. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_percentile.py +0 -0
  40. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_raw1.txt +0 -0
  41. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_raw_DataTable2.txt +0 -0
  42. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_raw_SlimTableMem1.txt +0 -0
  43. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_raw_transmitted1.txt +0 -0
  44. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/test/test_raw_transmitted2.txt +0 -0
  45. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/__init__.py +0 -0
  46. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/get_l0tx.py +0 -0
  47. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/get_msg.py +0 -0
  48. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/get_watsontx.py +0 -0
  49. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/payload_formats.csv +0 -0
  50. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/payload_types.csv +0 -0
  51. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/tx/tx.py +0 -0
  52. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice.egg-info/SOURCES.txt +0 -0
  53. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice.egg-info/dependency_links.txt +0 -0
  54. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice.egg-info/entry_points.txt +0 -0
  55. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice.egg-info/requires.txt +0 -0
  56. {pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pypromice
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Summary: PROMICE/GC-Net data processing toolbox
5
5
  Home-page: https://github.com/GEUS-Glaciology-and-Climate/pypromice
6
6
  Author: GEUS Glaciology and Climate
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="pypromice",
8
- version="1.3.2",
8
+ version="1.3.3",
9
9
  author="GEUS Glaciology and Climate",
10
10
  description="PROMICE/GC-Net data processing toolbox",
11
11
  long_description=long_description,
@@ -57,7 +57,9 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
57
57
  ds['ulr'] = ((ds['ulr'] * 10) / ds.attrs['ulr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
58
58
 
59
59
  ds['z_boom_u'] = _reformatArray(ds['z_boom_u']) # Reformat boom height
60
- ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
60
+
61
+ ds['t_u_interp'] = interpTemp(ds['t_u'], vars_df)
62
+ ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u_interp'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
61
63
 
62
64
  if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
63
65
  if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
@@ -113,7 +115,8 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
113
115
 
114
116
  elif ds.attrs['number_of_booms']==2: # 2-boom processing
115
117
  ds['z_boom_l'] = _reformatArray(ds['z_boom_l']) # Reformat boom height
116
- ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
118
+ ds['t_l_interp'] = interpTemp(ds['t_l'], vars_df)
119
+ ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l_interp']+ T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
117
120
 
118
121
  ds = clip_values(ds, vars_df)
119
122
  for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
@@ -254,6 +257,41 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
254
257
 
255
258
  return z_pt_cor, z_pt
256
259
 
260
+
261
+ def interpTemp(temp, var_configurations, max_interp=pd.Timedelta(12,'h')):
262
+ '''Clip and interpolate temperature dataset for use in corrections
263
+
264
+ Parameters
265
+ ----------
266
+ temp : `xarray.DataArray`
267
+ Array of temperature data
268
+ vars_df : `pandas.DataFrame`
269
+ Dataframe to retrieve attribute hi-lo values from for temperature clipping
270
+ max_interp : `pandas.Timedelta`
271
+ Maximum time steps to interpolate across. The default is 12 hours.
272
+
273
+ Returns
274
+ -------
275
+ temp_interp : `xarray.DataArray`
276
+ Array of interpolatedtemperature data
277
+ '''
278
+ # Determine if upper or lower temperature array
279
+ var = temp.name.lower()
280
+
281
+ # Find range threshold and use it to clip measurements
282
+ cols = ["lo", "hi", "OOL"]
283
+ assert set(cols) <= set(var_configurations.columns)
284
+ variable_limits = var_configurations[cols].dropna(how="all")
285
+ temp = temp.where(temp >= variable_limits.loc[var,'lo'])
286
+ temp = temp.where(temp <= variable_limits.loc[var, 'hi'])
287
+
288
+ # Drop duplicates and interpolate across NaN values
289
+ # temp_interp = temp.drop_duplicates(dim='time', keep='first')
290
+ temp_interp = temp.interpolate_na(dim='time', max_gap=max_interp)
291
+
292
+ return temp_interp
293
+
294
+
257
295
  def smoothTilt(tilt, win_size):
258
296
  '''Smooth tilt values using a rolling window. This is translated from the
259
297
  previous IDL/GDL smoothing algorithm:
@@ -29,6 +29,7 @@ def clip_values(
29
29
 
30
30
  variable_limits = var_configurations[cols].dropna(how="all")
31
31
  for var, row in variable_limits.iterrows():
32
+
32
33
  if var not in list(ds.variables):
33
34
  continue
34
35
 
@@ -56,4 +57,5 @@ def clip_values(
56
57
  ds[var] = ds[var].where(ds[var] >= row.lo)
57
58
  if ~np.isnan(row.hi):
58
59
  ds[var] = ds[var].where(ds[var] <= row.hi)
60
+
59
61
  return ds
@@ -36,7 +36,7 @@ def flagNAN(ds_in,
36
36
  ds : xr.Dataset
37
37
  Level 0 data with flagged data
38
38
  '''
39
- ds = ds_in.copy()
39
+ ds = ds_in.copy(deep=True)
40
40
  df = None
41
41
 
42
42
  df = _getDF(flag_url + ds.attrs["station_id"] + ".csv",
@@ -71,7 +71,7 @@ def flagNAN(ds_in,
71
71
 
72
72
  for v in varlist:
73
73
  if v in list(ds.keys()):
74
- logger.info(f'---> flagging {t0} {t1} {v}')
74
+ logger.info(f'---> flagging {v} between {t0} and {t1}')
75
75
  ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1))
76
76
  else:
77
77
  logger.info(f'---> could not flag {v} not in dataset')
@@ -99,7 +99,7 @@ def adjustTime(ds,
99
99
  ds : xr.Dataset
100
100
  Level 0 data with flagged data
101
101
  '''
102
- ds_out = ds.copy()
102
+ ds_out = ds.copy(deep=True)
103
103
  adj_info=None
104
104
 
105
105
  adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
@@ -165,7 +165,7 @@ def adjustData(ds,
165
165
  ds : xr.Dataset
166
166
  Level 0 data with flagged data
167
167
  '''
168
- ds_out = ds.copy()
168
+ ds_out = ds.copy(deep=True)
169
169
  adj_info=None
170
170
  adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
171
171
  os.path.join(adj_dir, ds.attrs["station_id"] + ".csv"),
@@ -176,13 +176,11 @@ def adjustData(ds,
176
176
  # removing potential time shifts from the adjustment list
177
177
  adj_info = adj_info.loc[adj_info.adjust_function != "time_shift", :]
178
178
 
179
- # if t1 is left empty, then adjustment is applied until the end of the file
180
- adj_info.loc[adj_info.t0.isnull(), "t0"] = ds_out.time.values[0]
181
- adj_info.loc[adj_info.t1.isnull(), "t1"] = ds_out.time.values[-1]
182
- # making all timestamps timezone naive (compatibility with xarray)
183
- adj_info.t0 = pd.to_datetime(adj_info.t0).dt.tz_localize(None)
184
- adj_info.t1 = pd.to_datetime(adj_info.t1).dt.tz_localize(None)
185
-
179
+ # making sure that t0 and t1 columns are object dtype then replaceing nan with None
180
+ adj_info[['t0','t1']] = adj_info[['t0','t1']].astype(object)
181
+ adj_info.loc[adj_info.t1.isnull()|(adj_info.t1==''), "t1"] = None
182
+ adj_info.loc[adj_info.t0.isnull()|(adj_info.t0==''), "t0"] = None
183
+
186
184
  # if "*" is in the variable name then we interpret it as regex
187
185
  selec = adj_info['variable'].str.contains('\*') & (adj_info['variable'] != "*")
188
186
  for ind in adj_info.loc[selec, :].index:
@@ -217,88 +215,92 @@ def adjustData(ds,
217
215
  adj_info.loc[var].adjust_function,
218
216
  adj_info.loc[var].adjust_value,
219
217
  ):
220
- if (t0 > pd.to_datetime(ds_out.time.values[-1])) | (t1 < pd.to_datetime(ds_out.time.values[0])):
218
+ # making all timestamps timezone naive (compatibility with xarray)
219
+ if isinstance(t0, str):
220
+ t0 = pd.to_datetime(t0, utc=True).tz_localize(None)
221
+ if isinstance(t1, str):
222
+ t1 = pd.to_datetime(t1, utc=True).tz_localize(None)
223
+
224
+ index_slice = dict(time=slice(t0, t1))
225
+
226
+ if len(ds_out[var].loc[index_slice].time.time) == 0:
227
+ logger.info("Time range does not intersect with dataset")
221
228
  continue
222
- logger.info(f'---> {t0} {t1} {var} {func} {val}')
229
+
230
+ logger.info(f'---> adjusting {var} between {t0} and {t1} ({func} {val})')
231
+
223
232
  if func == "add":
224
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values + val
233
+ ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
225
234
  # flagging adjusted values
226
235
  # if var + "_adj_flag" not in ds_out.columns:
227
236
  # ds_out[var + "_adj_flag"] = 0
228
- # msk = ds_out[var].loc[dict(time=slice(t0, t1))])].notnull()
229
- # ind = ds_out[var].loc[dict(time=slice(t0, t1))])].loc[msk].time
237
+ # msk = ds_out[var].loc[index_slice])].notnull()
238
+ # ind = ds_out[var].loc[index_slice])].loc[msk].time
230
239
  # ds_out.loc[ind, var + "_adj_flag"] = 1
231
240
 
232
241
  if func == "multiply":
233
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values * val
242
+ ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values * val
234
243
  if "DW" in var:
235
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))] % 360
244
+ ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice] % 360
236
245
  # flagging adjusted values
237
246
  # if var + "_adj_flag" not in ds_out.columns:
238
247
  # ds_out[var + "_adj_flag"] = 0
239
- # msk = ds_out[var].loc[dict(time=slice(t0, t1))].notnull()
240
- # ind = ds_out[var].loc[dict(time=slice(t0, t1))].loc[msk].time
248
+ # msk = ds_out[var].loc[index_slice].notnull()
249
+ # ind = ds_out[var].loc[index_slice].loc[msk].time
241
250
  # ds_out.loc[ind, var + "_adj_flag"] = 1
242
251
 
243
252
  if func == "min_filter":
244
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
253
+ tmp = ds_out[var].loc[index_slice].values
245
254
  tmp[tmp < val] = np.nan
246
255
 
247
256
  if func == "max_filter":
248
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
257
+ tmp = ds_out[var].loc[index_slice].values
249
258
  tmp[tmp > val] = np.nan
250
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
259
+ ds_out[var].loc[index_slice] = tmp
251
260
 
252
261
  if func == "upper_perc_filter":
253
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
254
- df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").quantile(1 - val / 100)
255
- df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").var()
262
+ tmp = ds_out[var].loc[index_slice].copy()
263
+ df_w = ds_out[var].loc[index_slice].resample(time="14D").quantile(1 - val / 100)
264
+ df_w = ds_out[var].loc[index_slice].resample(time="14D").var()
256
265
  for m_start, m_end in zip(df_w.time[:-2], df_w.time[1:]):
257
266
  msk = (tmp.time >= m_start) & (tmp.time < m_end)
258
267
  values_month = tmp.loc[msk].values
259
268
  values_month[values_month < df_w.loc[m_start]] = np.nan
260
269
  tmp.loc[msk] = values_month
261
270
 
262
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
271
+ ds_out[var].loc[index_slice] = tmp.values
263
272
 
264
273
  if func == "biweekly_upper_range_filter":
265
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
266
- df_max = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").max()
267
- for m_start, m_end in zip(df_max.time[:-2], df_max.time[1:]):
268
- msk = (tmp.time >= m_start) & (tmp.time < m_end)
269
- lim = df_max.loc[m_start] - val
270
- values_month = tmp.loc[msk].values
271
- values_month[values_month < lim] = np.nan
272
- tmp.loc[msk] = values_month
273
- # remaining samples following outside of the last 2 weeks window
274
- msk = tmp.time >= m_end
275
- lim = df_max.loc[m_start] - val
276
- values_month = tmp.loc[msk].values
277
- values_month[values_month < lim] = np.nan
278
- tmp.loc[msk] = values_month
279
- # updating original pandas
280
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
274
+ df_max = (
275
+ ds_out[var].loc[index_slice].copy(deep=True)
276
+ .resample(time="14D",offset='7D').max()
277
+ .sel(time=ds_out[var].loc[index_slice].time.values, method='ffill')
278
+ )
279
+ df_max['time'] = ds_out[var].loc[index_slice].time
280
+ # updating original pandas
281
+ ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].where(ds_out[var].loc[index_slice] > df_max-val)
282
+
281
283
 
282
284
  if func == "hampel_filter":
283
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))]
285
+ tmp = ds_out[var].loc[index_slice]
284
286
  tmp = _hampel(tmp, k=7 * 24, t0=val)
285
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
287
+ ds_out[var].loc[index_slice] = tmp.values
286
288
 
287
289
  if func == "grad_filter":
288
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
289
- msk = ds_out[var].loc[dict(time=slice(t0, t1))].copy().diff()
290
+ tmp = ds_out[var].loc[index_slice].copy()
291
+ msk = ds_out[var].loc[index_slice].copy().diff()
290
292
  tmp[np.roll(msk.abs() > val, -1)] = np.nan
291
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
293
+ ds_out[var].loc[index_slice] = tmp
292
294
 
293
295
  if "swap_with_" in func:
294
296
  var2 = func[10:]
295
- val_var = ds_out[var].loc[dict(time=slice(t0, t1))].values.copy()
296
- val_var2 = ds_out[var2].loc[dict(time=slice(t0, t1))].values.copy()
297
- ds_out[var2].loc[dict(time=slice(t0, t1))] = val_var
298
- ds_out[var].loc[dict(time=slice(t0, t1))] = val_var2
297
+ val_var = ds_out[var].loc[index_slice].values.copy()
298
+ val_var2 = ds_out[var2].loc[index_slice].values.copy()
299
+ ds_out[var2].loc[index_slice] = val_var
300
+ ds_out[var].loc[index_slice] = val_var2
299
301
 
300
302
  if func == "rotate":
301
- ds_out[var].loc[dict(time=slice(t0, t1))] = (ds_out[var].loc[dict(time=slice(t0, t1))].values + val) % 360
303
+ ds_out[var].loc[index_slice] = (ds_out[var].loc[index_slice].values + val) % 360
302
304
 
303
305
  return ds_out
304
306
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pypromice
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Summary: PROMICE/GC-Net data processing toolbox
5
5
  Home-page: https://github.com/GEUS-Glaciology-and-Climate/pypromice
6
6
  Author: GEUS Glaciology and Climate
File without changes
File without changes
File without changes
File without changes