pypromice 1.3.1__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (56) hide show
  1. {pypromice-1.3.1/src/pypromice.egg-info → pypromice-1.3.3}/PKG-INFO +8 -5
  2. {pypromice-1.3.1 → pypromice-1.3.3}/README.md +7 -4
  3. {pypromice-1.3.1 → pypromice-1.3.3}/setup.py +2 -1
  4. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/L0toL1.py +58 -8
  5. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/L1toL2.py +23 -359
  6. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/metadata.csv +3 -3
  7. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/value_clipping.py +2 -0
  8. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/variables.csv +11 -11
  9. pypromice-1.3.3/src/pypromice/qc/github_data_issues.py +376 -0
  10. pypromice-1.3.3/src/pypromice/qc/percentiles/__init__.py +0 -0
  11. pypromice-1.3.3/src/pypromice/qc/percentiles/compute_thresholds.py +221 -0
  12. pypromice-1.3.3/src/pypromice/qc/percentiles/outlier_detector.py +112 -0
  13. pypromice-1.3.3/src/pypromice/qc/percentiles/thresholds.csv +312 -0
  14. pypromice-1.3.3/src/pypromice/test/test_percentile.py +229 -0
  15. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/get_watsontx.py +9 -6
  16. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/tx.py +8 -5
  17. {pypromice-1.3.1 → pypromice-1.3.3/src/pypromice.egg-info}/PKG-INFO +8 -5
  18. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice.egg-info/SOURCES.txt +6 -0
  19. {pypromice-1.3.1 → pypromice-1.3.3}/LICENSE.txt +0 -0
  20. {pypromice-1.3.1 → pypromice-1.3.3}/MANIFEST.in +0 -0
  21. {pypromice-1.3.1 → pypromice-1.3.3}/setup.cfg +0 -0
  22. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/__init__.py +0 -0
  23. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/get/__init__.py +0 -0
  24. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/get/get.py +0 -0
  25. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/get/get_promice_data.py +0 -0
  26. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/postprocess/__init__.py +0 -0
  27. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/postprocess/csv2bufr.py +0 -0
  28. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/postprocess/get_bufr.py +0 -0
  29. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/postprocess/wmo_config.py +0 -0
  30. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/L2toL3.py +0 -0
  31. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/__init__.py +0 -0
  32. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/aws.py +0 -0
  33. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/get_l3.py +0 -0
  34. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/process/join_l3.py +0 -0
  35. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/qc/__init__.py +0 -0
  36. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/qc/persistence.py +0 -0
  37. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/qc/persistence_test.py +0 -0
  38. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_config1.toml +0 -0
  39. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_config2.toml +0 -0
  40. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_email +0 -0
  41. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_payload_formats.csv +0 -0
  42. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_payload_types.csv +0 -0
  43. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_raw1.txt +0 -0
  44. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_raw_DataTable2.txt +0 -0
  45. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_raw_SlimTableMem1.txt +0 -0
  46. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_raw_transmitted1.txt +0 -0
  47. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/test/test_raw_transmitted2.txt +0 -0
  48. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/__init__.py +0 -0
  49. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/get_l0tx.py +0 -0
  50. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/get_msg.py +0 -0
  51. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/payload_formats.csv +0 -0
  52. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice/tx/payload_types.csv +0 -0
  53. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice.egg-info/dependency_links.txt +0 -0
  54. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice.egg-info/entry_points.txt +0 -0
  55. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice.egg-info/requires.txt +0 -0
  56. {pypromice-1.3.1 → pypromice-1.3.3}/src/pypromice.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pypromice
3
- Version: 1.3.1
3
+ Version: 1.3.3
4
4
  Summary: PROMICE/GC-Net data processing toolbox
5
5
  Home-page: https://github.com/GEUS-Glaciology-and-Climate/pypromice
6
6
  Author: GEUS Glaciology and Climate
@@ -29,10 +29,9 @@ Requires-Dist: netcdf4
29
29
  Requires-Dist: pyDataverse
30
30
 
31
31
  # pypromice
32
- [![PyPI version](https://badge.fury.io/py/pypromice.svg)](https://badge.fury.io/py/pypromice)
33
- [![](<https://img.shields.io/badge/Dataverse DOI-10.22008/FK2/3TSBF0-orange>)](https://www.doi.org/10.22008/FK2/3TSBF0) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05298/status.svg)](https://doi.org/10.21105/joss.05298) [![Documentation Status](https://readthedocs.org/projects/pypromice/badge/?version=latest)](https://pypromice.readthedocs.io/en/latest/?badge=latest)
32
+ [![PyPI version](https://badge.fury.io/py/pypromice.svg)](https://badge.fury.io/py/pypromice) [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pypromice/badges/version.svg)](https://anaconda.org/conda-forge/pypromice) [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pypromice/badges/platforms.svg)](https://anaconda.org/conda-forge/pypromice) [![](<https://img.shields.io/badge/Dataverse DOI-10.22008/FK2/3TSBF0-orange>)](https://www.doi.org/10.22008/FK2/3TSBF0) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05298/status.svg)](https://doi.org/10.21105/joss.05298) [![Documentation Status](https://readthedocs.org/projects/pypromice/badge/?version=latest)](https://pypromice.readthedocs.io/en/latest/?badge=latest)
34
33
 
35
- pypromice is designed for processing and handling [PROMICE](https://promice.dk) automated weather station (AWS) data.
34
+ pypromice is designed for processing and handling [PROMICE](https://promice.org) automated weather station (AWS) data.
36
35
 
37
36
  It is envisioned for pypromice to be the go-to toolbox for handling and processing [PROMICE](https://promice.dk) and [GC-Net](http://cires1.colorado.edu/steffen/gcnet/) datasets. New releases of pypromice are uploaded alongside PROMICE AWS data releases to our [Dataverse](https://dataverse.geus.dk/dataverse/PROMICE) for transparency purposes and to encourage collaboration on improving our data. Please visit the pypromice [readthedocs](https://pypromice.readthedocs.io/en/latest/?badge=latest) for more information.
38
37
 
@@ -48,7 +47,11 @@ If you intend to use PROMICE AWS data and/or pypromice in your work, please cite
48
47
 
49
48
  ### Quick install
50
49
 
51
- The latest release of pypromice can installed using pip:
50
+ The latest release of pypromice can installed using conda or pip:
51
+
52
+ ```
53
+ $ conda install pypromice -c conda-forge
54
+ ```
52
55
 
53
56
  ```
54
57
  $ pip install pypromice
@@ -1,8 +1,7 @@
1
1
  # pypromice
2
- [![PyPI version](https://badge.fury.io/py/pypromice.svg)](https://badge.fury.io/py/pypromice)
3
- [![](<https://img.shields.io/badge/Dataverse DOI-10.22008/FK2/3TSBF0-orange>)](https://www.doi.org/10.22008/FK2/3TSBF0) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05298/status.svg)](https://doi.org/10.21105/joss.05298) [![Documentation Status](https://readthedocs.org/projects/pypromice/badge/?version=latest)](https://pypromice.readthedocs.io/en/latest/?badge=latest)
2
+ [![PyPI version](https://badge.fury.io/py/pypromice.svg)](https://badge.fury.io/py/pypromice) [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pypromice/badges/version.svg)](https://anaconda.org/conda-forge/pypromice) [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pypromice/badges/platforms.svg)](https://anaconda.org/conda-forge/pypromice) [![](<https://img.shields.io/badge/Dataverse DOI-10.22008/FK2/3TSBF0-orange>)](https://www.doi.org/10.22008/FK2/3TSBF0) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05298/status.svg)](https://doi.org/10.21105/joss.05298) [![Documentation Status](https://readthedocs.org/projects/pypromice/badge/?version=latest)](https://pypromice.readthedocs.io/en/latest/?badge=latest)
4
3
 
5
- pypromice is designed for processing and handling [PROMICE](https://promice.dk) automated weather station (AWS) data.
4
+ pypromice is designed for processing and handling [PROMICE](https://promice.org) automated weather station (AWS) data.
6
5
 
7
6
  It is envisioned for pypromice to be the go-to toolbox for handling and processing [PROMICE](https://promice.dk) and [GC-Net](http://cires1.colorado.edu/steffen/gcnet/) datasets. New releases of pypromice are uploaded alongside PROMICE AWS data releases to our [Dataverse](https://dataverse.geus.dk/dataverse/PROMICE) for transparency purposes and to encourage collaboration on improving our data. Please visit the pypromice [readthedocs](https://pypromice.readthedocs.io/en/latest/?badge=latest) for more information.
8
7
 
@@ -18,7 +17,11 @@ If you intend to use PROMICE AWS data and/or pypromice in your work, please cite
18
17
 
19
18
  ### Quick install
20
19
 
21
- The latest release of pypromice can installed using pip:
20
+ The latest release of pypromice can installed using conda or pip:
21
+
22
+ ```
23
+ $ conda install pypromice -c conda-forge
24
+ ```
22
25
 
23
26
  ```
24
27
  $ pip install pypromice
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="pypromice",
8
- version="1.3.1",
8
+ version="1.3.3",
9
9
  author="GEUS Glaciology and Climate",
10
10
  description="PROMICE/GC-Net data processing toolbox",
11
11
  long_description=long_description,
@@ -30,6 +30,7 @@ setuptools.setup(
30
30
  include_package_data = True,
31
31
  packages=setuptools.find_packages(where="src"),
32
32
  python_requires=">=3.8",
33
+ package_data={"pypromice.qc.percentiles": ["thresholds.csv"]},
33
34
  install_requires=['numpy>=1.23.0', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'scikit-learn>=1.1.0', 'Bottleneck', 'netcdf4', 'pyDataverse'],
34
35
  entry_points={
35
36
  'console_scripts': [
@@ -57,7 +57,9 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
57
57
  ds['ulr'] = ((ds['ulr'] * 10) / ds.attrs['ulr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
58
58
 
59
59
  ds['z_boom_u'] = _reformatArray(ds['z_boom_u']) # Reformat boom height
60
- ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
60
+
61
+ ds['t_u_interp'] = interpTemp(ds['t_u'], vars_df)
62
+ ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u_interp'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
61
63
 
62
64
  if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
63
65
  if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
@@ -113,7 +115,8 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
113
115
 
114
116
  elif ds.attrs['number_of_booms']==2: # 2-boom processing
115
117
  ds['z_boom_l'] = _reformatArray(ds['z_boom_l']) # Reformat boom height
116
- ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l'] + T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
118
+ ds['t_l_interp'] = interpTemp(ds['t_l'], vars_df)
119
+ ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l_interp']+ T_0)/T_0)**0.5 # Adjust sonic ranger readings for sensitivity to air temperature
117
120
 
118
121
  ds = clip_values(ds, vars_df)
119
122
  for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
@@ -254,6 +257,41 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
254
257
 
255
258
  return z_pt_cor, z_pt
256
259
 
260
+
261
+ def interpTemp(temp, var_configurations, max_interp=pd.Timedelta(12,'h')):
262
+ '''Clip and interpolate temperature dataset for use in corrections
263
+
264
+ Parameters
265
+ ----------
266
+ temp : `xarray.DataArray`
267
+ Array of temperature data
268
+ vars_df : `pandas.DataFrame`
269
+ Dataframe to retrieve attribute hi-lo values from for temperature clipping
270
+ max_interp : `pandas.Timedelta`
271
+ Maximum time steps to interpolate across. The default is 12 hours.
272
+
273
+ Returns
274
+ -------
275
+ temp_interp : `xarray.DataArray`
276
+ Array of interpolatedtemperature data
277
+ '''
278
+ # Determine if upper or lower temperature array
279
+ var = temp.name.lower()
280
+
281
+ # Find range threshold and use it to clip measurements
282
+ cols = ["lo", "hi", "OOL"]
283
+ assert set(cols) <= set(var_configurations.columns)
284
+ variable_limits = var_configurations[cols].dropna(how="all")
285
+ temp = temp.where(temp >= variable_limits.loc[var,'lo'])
286
+ temp = temp.where(temp <= variable_limits.loc[var, 'hi'])
287
+
288
+ # Drop duplicates and interpolate across NaN values
289
+ # temp_interp = temp.drop_duplicates(dim='time', keep='first')
290
+ temp_interp = temp.interpolate_na(dim='time', max_gap=max_interp)
291
+
292
+ return temp_interp
293
+
294
+
257
295
  def smoothTilt(tilt, win_size):
258
296
  '''Smooth tilt values using a rolling window. This is translated from the
259
297
  previous IDL/GDL smoothing algorithm:
@@ -361,23 +399,35 @@ def decodeGPS(ds, gps_names):
361
399
  return ds
362
400
 
363
401
  def reformatGPS(pos_arr, attrs):
364
- '''Correct position if only recorded minutes (and not degrees), and
365
- reformat values and attributes
402
+ '''Correct latitude and longitude from native format to decimal degrees.
366
403
 
404
+ v2 stations should send "NH6429.01544","WH04932.86061" (NUK_L 2022)
405
+ v3 stations should send coordinates as "6628.93936","04617.59187" (DY2) or 6430,4916 (NUK_Uv3)
406
+ decodeGPS should have decoded these strings to floats in ddmm.mmmm format
407
+ v1 stations however only saved decimal minutes (mm.mmmmm) as float<=60. '
408
+ In this case, we use the integer part of the latitude given in the config
409
+ file and append the gps value after it.
410
+
367
411
  Parameters
368
412
  ----------
369
413
  pos_arr : xr.Dataarray
370
- GPS position array
414
+ Array of latitude or longitude measured by the GPS
371
415
  attrs : dict
372
- Array attributes
416
+ The global attribute 'latitude' or 'longitude' associated with the
417
+ file being processed. It is the standard latitude/longitude given in the
418
+ config file for that station.
373
419
 
374
420
  Returns
375
421
  -------
376
422
  pos_arr : xr.Dataarray
377
- Formatted GPS position array
423
+ Formatted GPS position array in decimal degree
378
424
  '''
379
425
  if np.any((pos_arr <= 90) & (pos_arr > 0)):
380
- pos_arr = pos_arr + 100*attrs
426
+ # then pos_arr is in decimal minutes, so we add to it the integer
427
+ # part of the latitude given in the config file x100
428
+ # so that it reads ddmm.mmmmmm like for v2 and v3 files
429
+ # Note that np.sign and np.attrs handles negative longitudes.
430
+ pos_arr = np.sign(attrs) * (pos_arr + 100*np.floor(np.abs(attrs)))
381
431
  a = pos_arr.attrs
382
432
  pos_arr = np.floor(pos_arr / 100) + (pos_arr / 100 - np.floor(pos_arr / 100)) * 100 / 60
383
433
  pos_arr.attrs = a
@@ -5,17 +5,21 @@ AWS Level 1 (L1) to Level 2 (L2) data processing
5
5
  import logging
6
6
 
7
7
  import numpy as np
8
- import urllib.request
9
- from urllib.error import HTTPError, URLError
10
8
  import pandas as pd
11
- import os
12
9
  import xarray as xr
13
10
 
11
+ from pypromice.qc.github_data_issues import flagNAN, adjustTime, adjustData
12
+ from pypromice.qc.percentiles.outlier_detector import ThresholdBasedOutlierDetector
14
13
  from pypromice.qc.persistence import persistence_qc
15
14
  from pypromice.process.value_clipping import clip_values
16
15
 
16
+ __all__ = [
17
+ "toL2",
18
+ ]
19
+
17
20
  logger = logging.getLogger(__name__)
18
21
 
22
+
19
23
  def toL2(
20
24
  L1: xr.Dataset,
21
25
  vars_df: pd.DataFrame,
@@ -61,8 +65,12 @@ def toL2(
61
65
  ds = adjustData(ds) # Adjust data after a user-defined csv files
62
66
  except Exception:
63
67
  logger.exception('Flagging and fixing failed:')
68
+
64
69
  if ds.attrs['format'] == 'TX':
65
- ds = persistence_qc(ds) # Detect and filter data points that seems to be static
70
+ ds = persistence_qc(ds) # Flag and remove persistence outliers
71
+ # TODO: The configuration should be provided explicitly
72
+ outlier_detector = ThresholdBasedOutlierDetector.default()
73
+ ds = outlier_detector.filter_data(ds) # Flag and remove percentile outliers
66
74
 
67
75
  T_100 = _getTempK(T_0)
68
76
  ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'],
@@ -81,7 +89,7 @@ def toL2(
81
89
  ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'], # Calculate surface temperature
82
90
  emissivity)
83
91
  if not ds.attrs['bedrock']:
84
- ds['t_surf'] = ds['t_surf'].where(ds['t_surf'] <= 0, other = 0)
92
+ ds['t_surf'] = xr.where(ds['t_surf'] > 0, 0, ds['t_surf'])
85
93
 
86
94
  # Determine station position relative to sun
87
95
  doy = ds['time'].to_dataframe().index.dayofyear.values # Gather variables to calculate sun pos
@@ -167,291 +175,6 @@ def toL2(
167
175
  ds = clip_values(ds, vars_df)
168
176
  return ds
169
177
 
170
- def flagNAN(ds_in,
171
- flag_url='https://raw.githubusercontent.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues/master/flags/',
172
- flag_dir='local/flags/'):
173
- '''Read flagged data from .csv file. For each variable, and downstream
174
- dependents, flag as invalid (or other) if set in the flag .csv
175
-
176
- Parameters
177
- ----------
178
- ds_in : xr.Dataset
179
- Level 0 dataset
180
- flag_url : str
181
- URL to directory where .csv flag files can be found
182
- flag_dir : str
183
- File directory where .csv flag files can be found
184
-
185
- Returns
186
- -------
187
- ds : xr.Dataset
188
- Level 0 data with flagged data
189
- '''
190
- ds = ds_in.copy()
191
- df = None
192
-
193
- df = _getDF(flag_url + ds.attrs["station_id"] + ".csv",
194
- os.path.join(flag_dir, ds.attrs["station_id"] + ".csv"),
195
- # download = False, # only for working on draft local flag'n'fix files
196
- )
197
-
198
- if isinstance(df, pd.DataFrame):
199
- df.t0 = pd.to_datetime(df.t0).dt.tz_localize(None)
200
- df.t1 = pd.to_datetime(df.t1).dt.tz_localize(None)
201
-
202
- if df.shape[0] > 0:
203
- for i in df.index:
204
- t0, t1, avar = df.loc[i,['t0','t1','variable']]
205
-
206
- if avar == '*':
207
- # Set to all vars if var is "*"
208
- varlist = list(ds.keys())
209
- elif '*' in avar:
210
- # Reads as regex if contains "*" and other characters (e.g. 't_i_.*($)')
211
- varlist = pd.DataFrame(columns = list(ds.keys())).filter(regex=(avar)).columns
212
- else:
213
- varlist = avar.split()
214
-
215
- if 'time' in varlist: varlist.remove("time")
216
-
217
- # Set to all times if times are "n/a"
218
- if pd.isnull(t0):
219
- t0 = ds['time'].values[0]
220
- if pd.isnull(t1):
221
- t1 = ds['time'].values[-1]
222
-
223
- for v in varlist:
224
- if v in list(ds.keys()):
225
- logger.info(f'---> flagging {t0} {t1} {v}')
226
- ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1))
227
- else:
228
- logger.info(f'---> could not flag {v} not in dataset')
229
-
230
- return ds
231
-
232
-
233
- def adjustTime(ds,
234
- adj_url="https://raw.githubusercontent.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues/master/adjustments/",
235
- adj_dir='local/adjustments/',
236
- var_list=[], skip_var=[]):
237
- '''Read adjustment data from .csv file. Only applies the "time_shift" adjustment
238
-
239
- Parameters
240
- ----------
241
- ds : xr.Dataset
242
- Level 0 dataset
243
- adj_url : str
244
- URL to directory where .csv adjustment files can be found
245
- adj_dir : str
246
- File directory where .csv adjustment files can be found
247
-
248
- Returns
249
- -------
250
- ds : xr.Dataset
251
- Level 0 data with flagged data
252
- '''
253
- ds_out = ds.copy()
254
- adj_info=None
255
-
256
- adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
257
- os.path.join(adj_dir, ds.attrs["station_id"] + ".csv"),)
258
-
259
- if isinstance(adj_info, pd.DataFrame):
260
-
261
-
262
- if "time_shift" in adj_info.adjust_function.values:
263
- time_shifts = adj_info.loc[adj_info.adjust_function == "time_shift", :]
264
- # if t1 is left empty, then adjustment is applied until the end of the file
265
- time_shifts.loc[time_shifts.t1.isnull(), "t1"] = pd.to_datetime(ds_out.time.values[-1]).isoformat()
266
- time_shifts.t0 = pd.to_datetime(time_shifts.t0).dt.tz_localize(None)
267
- time_shifts.t1 = pd.to_datetime(time_shifts.t1).dt.tz_localize(None)
268
-
269
- for t0, t1, val in zip(
270
- time_shifts.t0,
271
- time_shifts.t1,
272
- time_shifts.adjust_value,
273
- ):
274
- ds_shifted = ds_out.sel(time=slice(t0,t1))
275
- ds_shifted['time'] = ds_shifted.time.values + pd.Timedelta(days = val)
276
-
277
- # here we concatenate what was before the shifted part, the shifted
278
- # part and what was after the shifted part
279
- # note that if any data was already present in the target period
280
- # (where the data lands after the shift), it is overwritten
281
-
282
- ds_out = xr.concat(
283
- (
284
- ds_out.sel(time=slice(pd.to_datetime(ds_out.time.values[0]),
285
- t0 + pd.Timedelta(days = val))),
286
- ds_shifted,
287
- ds_out.sel(time=slice(t1 + pd.Timedelta(days = val),
288
- pd.to_datetime(ds_out.time.values[-1])))
289
- ),
290
- dim = 'time',
291
- )
292
- if t0 > pd.Timestamp.now():
293
- ds_out = ds_out.sel(time=slice(pd.to_datetime(ds_out.time.values[0]),
294
- t0))
295
- return ds_out
296
-
297
-
298
- def adjustData(ds,
299
- adj_url="https://raw.githubusercontent.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues/master/adjustments/",
300
- adj_dir='local/adjustments/',
301
- var_list=[], skip_var=[]):
302
- '''Read adjustment data from .csv file. For each variable, and downstream
303
- dependents, adjust data accordingly if set in the adjustment .csv
304
-
305
- Parameters
306
- ----------
307
- ds : xr.Dataset
308
- Level 0 dataset
309
- adj_url : str
310
- URL to directory where .csv adjustment files can be found
311
- adj_dir : str
312
- File directory where .csv adjustment files can be found
313
-
314
- Returns
315
- -------
316
- ds : xr.Dataset
317
- Level 0 data with flagged data
318
- '''
319
- ds_out = ds.copy()
320
- adj_info=None
321
- adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
322
- os.path.join(adj_dir, ds.attrs["station_id"] + ".csv"),
323
- # download = False, # only for working on draft local flag'n'fix files
324
- )
325
-
326
- if isinstance(adj_info, pd.DataFrame):
327
- # removing potential time shifts from the adjustment list
328
- adj_info = adj_info.loc[adj_info.adjust_function != "time_shift", :]
329
-
330
- # if t1 is left empty, then adjustment is applied until the end of the file
331
- adj_info.loc[adj_info.t0.isnull(), "t0"] = ds_out.time.values[0]
332
- adj_info.loc[adj_info.t1.isnull(), "t1"] = ds_out.time.values[-1]
333
- # making all timestamps timezone naive (compatibility with xarray)
334
- adj_info.t0 = pd.to_datetime(adj_info.t0).dt.tz_localize(None)
335
- adj_info.t1 = pd.to_datetime(adj_info.t1).dt.tz_localize(None)
336
-
337
- # if "*" is in the variable name then we interpret it as regex
338
- selec = adj_info['variable'].str.contains('\*') & (adj_info['variable'] != "*")
339
- for ind in adj_info.loc[selec, :].index:
340
- line_template = adj_info.loc[ind, :].copy()
341
- regex = adj_info.loc[ind, 'variable']
342
- for var in pd.DataFrame(columns = list(ds.keys())).filter(regex=regex).columns:
343
- line_template.variable = var
344
- line_template.name = adj_info.index.max() + 1
345
- adj_info = pd.concat((adj_info, line_template.to_frame().transpose()),axis=0)
346
- adj_info = adj_info.drop(labels=ind, axis=0)
347
-
348
- adj_info = adj_info.sort_values(by=["variable", "t0"])
349
- adj_info.set_index(["variable", "t0"], drop=False, inplace=True)
350
-
351
- if len(var_list) == 0:
352
- var_list = np.unique(adj_info.variable)
353
- else:
354
- adj_info = adj_info.loc[np.isin(adj_info.variable, var_list), :]
355
- var_list = np.unique(adj_info.variable)
356
-
357
- if len(skip_var) > 0:
358
- adj_info = adj_info.loc[~np.isin(adj_info.variable, skip_var), :]
359
- var_list = np.unique(adj_info.variable)
360
-
361
- for var in var_list:
362
- if var not in list(ds_out.keys()):
363
- logger.info(f'could not adjust {var } not in dataset')
364
- continue
365
- for t0, t1, func, val in zip(
366
- adj_info.loc[var].t0,
367
- adj_info.loc[var].t1,
368
- adj_info.loc[var].adjust_function,
369
- adj_info.loc[var].adjust_value,
370
- ):
371
- if (t0 > pd.to_datetime(ds_out.time.values[-1])) | (t1 < pd.to_datetime(ds_out.time.values[0])):
372
- continue
373
- logger.info(f'---> {t0} {t1} {var} {func} {val}')
374
- if func == "add":
375
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values + val
376
- # flagging adjusted values
377
- # if var + "_adj_flag" not in ds_out.columns:
378
- # ds_out[var + "_adj_flag"] = 0
379
- # msk = ds_out[var].loc[dict(time=slice(t0, t1))])].notnull()
380
- # ind = ds_out[var].loc[dict(time=slice(t0, t1))])].loc[msk].time
381
- # ds_out.loc[ind, var + "_adj_flag"] = 1
382
-
383
- if func == "multiply":
384
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values * val
385
- if "DW" in var:
386
- ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))] % 360
387
- # flagging adjusted values
388
- # if var + "_adj_flag" not in ds_out.columns:
389
- # ds_out[var + "_adj_flag"] = 0
390
- # msk = ds_out[var].loc[dict(time=slice(t0, t1))].notnull()
391
- # ind = ds_out[var].loc[dict(time=slice(t0, t1))].loc[msk].time
392
- # ds_out.loc[ind, var + "_adj_flag"] = 1
393
-
394
- if func == "min_filter":
395
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
396
- tmp[tmp < val] = np.nan
397
-
398
- if func == "max_filter":
399
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
400
- tmp[tmp > val] = np.nan
401
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
402
-
403
- if func == "upper_perc_filter":
404
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
405
- df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").quantile(1 - val / 100)
406
- df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").var()
407
- for m_start, m_end in zip(df_w.time[:-2], df_w.time[1:]):
408
- msk = (tmp.time >= m_start) & (tmp.time < m_end)
409
- values_month = tmp.loc[msk].values
410
- values_month[values_month < df_w.loc[m_start]] = np.nan
411
- tmp.loc[msk] = values_month
412
-
413
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
414
-
415
- if func == "biweekly_upper_range_filter":
416
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
417
- df_max = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").max()
418
- for m_start, m_end in zip(df_max.time[:-2], df_max.time[1:]):
419
- msk = (tmp.time >= m_start) & (tmp.time < m_end)
420
- lim = df_max.loc[m_start] - val
421
- values_month = tmp.loc[msk].values
422
- values_month[values_month < lim] = np.nan
423
- tmp.loc[msk] = values_month
424
- # remaining samples following outside of the last 2 weeks window
425
- msk = tmp.time >= m_end
426
- lim = df_max.loc[m_start] - val
427
- values_month = tmp.loc[msk].values
428
- values_month[values_month < lim] = np.nan
429
- tmp.loc[msk] = values_month
430
- # updating original pandas
431
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
432
-
433
- if func == "hampel_filter":
434
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))]
435
- tmp = _hampel(tmp, k=7 * 24, t0=val)
436
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
437
-
438
- if func == "grad_filter":
439
- tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
440
- msk = ds_out[var].loc[dict(time=slice(t0, t1))].copy().diff()
441
- tmp[np.roll(msk.abs() > val, -1)] = np.nan
442
- ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
443
-
444
- if "swap_with_" in func:
445
- var2 = func[10:]
446
- val_var = ds_out[var].loc[dict(time=slice(t0, t1))].values.copy()
447
- val_var2 = ds_out[var2].loc[dict(time=slice(t0, t1))].values.copy()
448
- ds_out[var2].loc[dict(time=slice(t0, t1))] = val_var
449
- ds_out[var].loc[dict(time=slice(t0, t1))] = val_var2
450
-
451
- if func == "rotate":
452
- ds_out[var].loc[dict(time=slice(t0, t1))] = (ds_out[var].loc[dict(time=slice(t0, t1))].values + val) % 360
453
-
454
- return ds_out
455
178
 
456
179
  def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id):
457
180
  '''Calculate cloud cover from T and T_0
@@ -493,6 +216,7 @@ def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id):
493
216
  cc[cc < 0] = 0
494
217
  return cc
495
218
 
219
+
496
220
  def calcSurfaceTemperature(T_0, ulr, dlr, emissivity):
497
221
  '''Calculate surface temperature from air temperature, upwelling and
498
222
  downwelling radiation and emissivity
@@ -516,6 +240,7 @@ def calcSurfaceTemperature(T_0, ulr, dlr, emissivity):
516
240
  t_surf = ((ulr - (1 - emissivity) * dlr) / emissivity / 5.67e-8)**0.25 - T_0
517
241
  return t_surf
518
242
 
243
+
519
244
  def calcTilt(tilt_x, tilt_y, deg2rad):
520
245
  '''Calculate station tilt
521
246
 
@@ -557,6 +282,7 @@ def calcTilt(tilt_x, tilt_y, deg2rad):
557
282
  # theta_sensor_deg = theta_sensor_rad * rad2deg
558
283
  return phi_sensor_rad, theta_sensor_rad
559
284
 
285
+
560
286
  def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed
561
287
  '''Correct relative humidity using Groff & Gratch method, where values are
562
288
  set when freezing and remain the original values when not freezing
@@ -599,6 +325,7 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f
599
325
  rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice))
600
326
  return rh_cor
601
327
 
328
+
602
329
  def correctPrecip(precip, wspd):
603
330
  '''Correct precipitation with the undercatch correction method used in
604
331
  Yang et al. (1999) and Box et al. (2022), based on Goodison et al. (1998)
@@ -654,6 +381,7 @@ def correctPrecip(precip, wspd):
654
381
 
655
382
  return precip_cor, precip_rate
656
383
 
384
+
657
385
  def calcDeclination(doy, hour, minute):
658
386
  '''Calculate sun declination based on time
659
387
 
@@ -702,6 +430,7 @@ def calcHourAngle(hour, minute, lon):
702
430
  return 2 * np.pi * (((hour + minute / 60) / 24 - 0.5) - lon/360)
703
431
  # ; - 15.*timezone/360.)
704
432
 
433
+
705
434
  def calcDirectionDeg(HourAngle_rad): #TODO remove if not plan to use this
706
435
  '''Calculate sun direction as degrees. This is an alternative to
707
436
  _calcHourAngle that is currently not implemented into the offical L0>>L3
@@ -754,6 +483,7 @@ def calcZenith(lat, Declination_rad, HourAngle_rad, deg2rad, rad2deg):
754
483
  ZenithAngle_deg = ZenithAngle_rad * rad2deg
755
484
  return ZenithAngle_rad, ZenithAngle_deg
756
485
 
486
+
757
487
  def calcAngleDiff(ZenithAngle_rad, HourAngle_rad, phi_sensor_rad,
758
488
  theta_sensor_rad):
759
489
  '''Calculate angle between sun and upper sensor (to determine when sun is
@@ -822,6 +552,7 @@ def calcAlbedo(usr, dsr_cor, AngleDif_deg, ZenithAngle_deg):
822
552
  albedo = albedo.ffill(dim='time').bfill(dim='time') #TODO remove this line and one above?
823
553
  return albedo, OKalbedos
824
554
 
555
+
825
556
  def calcTOA(ZenithAngle_deg, ZenithAngle_rad):
826
557
  '''Calculate incoming shortwave radiation at the top of the atmosphere,
827
558
  accounting for sunset periods
@@ -912,75 +643,6 @@ def calcCorrectionFactor(Declination_rad, phi_sensor_rad, theta_sensor_rad,
912
643
 
913
644
  return CorFac_all
914
645
 
915
- def _getDF(flag_url, flag_file, download=True):
916
- '''Get dataframe from flag or adjust file. First attempt to retrieve from
917
- URL. If this fails then attempt to retrieve from local file
918
-
919
- Parameters
920
- ----------
921
- flag_url : str
922
- URL address to file
923
- flag_file : str
924
- Local path to file
925
- download : bool
926
- Flag to download file from URL
927
-
928
- Returns
929
- -------
930
- df : pd.DataFrame
931
- Flag or adjustment dataframe
932
- '''
933
-
934
- # Download local copy as csv
935
- if download==True:
936
- os.makedirs(os.path.dirname(flag_file), exist_ok = True)
937
-
938
- try:
939
- urllib.request.urlretrieve(flag_url, flag_file)
940
- logger.info(f"Downloaded a {flag_file.split('/')[-2][:-1],} file to {flag_file}")
941
- except (HTTPError, URLError) as e:
942
- logger.info(f"Unable to download {flag_file.split('/')[-2][:-1],} file, using local file: {flag_file}")
943
- else:
944
- logger.info(f"Using local {flag_file.split('/')[-2][:-1],} file: {flag_file}")
945
-
946
- if os.path.isfile(flag_file):
947
- df = pd.read_csv(
948
- flag_file,
949
- comment="#",
950
- skipinitialspace=True,
951
- ).dropna(how='all', axis='rows')
952
- else:
953
- df=None
954
- logger.info(f"No {flag_file.split('/')[-2][:-1]} file to read.")
955
- return df
956
-
957
-
958
- def _hampel(vals_orig, k=7*24, t0=3):
959
- '''Hampel filter
960
-
961
- Parameters
962
- ----------
963
- vals : pd.DataSeries
964
- Series of values from which to remove outliers
965
- k : int
966
- Size of window, including the sample. For example, 7 is equal to 3 on
967
- either side of value. The default is 7*24.
968
- t0 : int
969
- Threshold value. The default is 3.
970
- '''
971
- #Make copy so original not edited
972
- vals=vals_orig.copy()
973
-
974
- #Hampel Filter
975
- L= 1.4826
976
- rolling_median=vals.rolling(k).median()
977
- difference=np.abs(rolling_median-vals)
978
- median_abs_deviation=difference.rolling(k).median()
979
- threshold= t0 *L * median_abs_deviation
980
- outlier_idx=difference>threshold
981
- outlier_idx[0:round(k/2)]=False
982
- vals.loc[outlier_idx]=np.nan
983
- return(vals)
984
646
 
985
647
  def _checkSunPos(ds, OKalbedos, sundown, sunonlowerdome, TOA_crit_nopass):
986
648
  '''Check sun position
@@ -1025,12 +687,14 @@ def _getTempK(T_0): #
1025
687
  Steam point temperature in K'''
1026
688
  return T_0+100
1027
689
 
690
+
1028
691
  def _getRotation(): #TODO same as L2toL3._getRotation()
1029
692
  '''Return degrees-to-radians and radians-to-degrees'''
1030
693
  deg2rad = np.pi / 180
1031
694
  rad2deg = 1 / deg2rad
1032
695
  return deg2rad, rad2deg
1033
696
 
697
+
1034
698
  if __name__ == "__main__":
1035
699
  # unittest.main()
1036
700
  pass