pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +15 -3
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1034 -186
  10. pypromice/process/aws.py +139 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +147 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +62 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
  32. pypromice-1.4.1.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.1.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0
pypromice/process/aws.py CHANGED
@@ -2,37 +2,43 @@
2
2
  """
3
3
  AWS data processing module
4
4
  """
5
- import logging
6
- from functools import reduce
7
- from importlib import metadata
8
- import os, unittest, toml, datetime, uuid, pkg_resources
9
- from typing import Sequence, Optional
10
-
11
- import numpy as np
5
+ import json
12
6
  import warnings
13
7
 
14
- warnings.simplefilter(action='ignore', category=FutureWarning)
8
+ warnings.simplefilter(action="ignore", category=FutureWarning)
9
+
10
+ import logging, os
11
+ from pathlib import Path
15
12
  import pandas as pd
16
13
  import xarray as xr
17
- from datetime import timedelta
14
+ from functools import reduce
15
+ from importlib import metadata
18
16
 
17
+
18
+ import pypromice.resources
19
19
  from pypromice.process.L0toL1 import toL1
20
20
  from pypromice.process.L1toL2 import toL2
21
21
  from pypromice.process.L2toL3 import toL3
22
+ from pypromice.process import write, load, utilities
23
+ from pypromice.utilities.git import get_commit_hash_and_check_dirty
22
24
 
23
- pd.set_option('display.precision', 2)
25
+ pd.set_option("display.precision", 2)
24
26
  xr.set_options(keep_attrs=True)
25
-
26
27
  logger = logging.getLogger(__name__)
27
28
 
28
- #------------------------------------------------------------------------------
29
-
30
29
 
31
30
  class AWS(object):
32
- '''AWS object to load and process PROMICE AWS data'''
33
-
34
- def __init__(self, config_file, inpath, var_file=None, meta_file=None):
35
- '''Object initialisation
31
+ """AWS object to load and process PROMICE AWS data"""
32
+
33
+ def __init__(
34
+ self,
35
+ config_file,
36
+ inpath,
37
+ data_issues_repository: Path | str,
38
+ var_file=None,
39
+ meta_file=None,
40
+ ):
41
+ """Object initialisation
36
42
 
37
43
  Parameters
38
44
  ----------
@@ -46,22 +52,54 @@ class AWS(object):
46
52
  meta_file: str, optional
47
53
  Metadata info file path. If not given then pypromice's
48
54
  metadata file is used. The default is None.
49
- '''
50
- assert(os.path.isfile(config_file)), "cannot find "+config_file
51
- assert(os.path.isdir(inpath)), "cannot find "+inpath
52
- logger.info('AWS object initialising...')
55
+ """
56
+ assert os.path.isfile(config_file), "cannot find " + config_file
57
+ assert os.path.isdir(inpath), "cannot find " + inpath
58
+ logger.info(
59
+ "AWS("
60
+ f"config_file={config_file},"
61
+ f" inpath={inpath},"
62
+ f" data_issues_repository={data_issues_repository},"
63
+ f" var_file={var_file},"
64
+ f" meta_file={meta_file}"
65
+ ")"
66
+ )
53
67
 
54
68
  # Load config, variables CSF standards, and L0 files
55
69
  self.config = self.loadConfig(config_file, inpath)
56
- self.vars = getVars(var_file)
57
- self.meta = getMeta(meta_file)
70
+ self.vars = pypromice.resources.load_variables(var_file)
71
+ self.meta = pypromice.resources.load_metadata(meta_file)
72
+ self.data_issues_repository = Path(data_issues_repository)
73
+
74
+ config_hash = get_commit_hash_and_check_dirty(Path(config_file))
75
+ config_source_string = f"{Path(config_file).name}:{config_hash}"
76
+ inpath_hash = get_commit_hash_and_check_dirty(Path(inpath))
77
+ data_issues_hash = get_commit_hash_and_check_dirty(self.data_issues_repository)
78
+ source_dict = dict(
79
+ pypromice=metadata.version("pypromice"),
80
+ l0_config_file=config_source_string,
81
+ l0_data_root=inpath_hash,
82
+ data_issues=data_issues_hash,
83
+ )
84
+ logger.debug('Source information: %s', source_dict)
85
+ self.meta["source"] = json.dumps(source_dict)
58
86
 
59
87
  # Load config file
60
88
  L0 = self.loadL0()
61
- self.L0=[]
89
+ self.L0 = []
62
90
  for l in L0:
63
- n = getColNames(self.vars, l.attrs['number_of_booms'], l.attrs['format'])
64
- self.L0.append(popCols(l, n))
91
+ n = write.getColNames(self.vars, l)
92
+ self.L0.append(utilities.popCols(l, n))
93
+
94
+ formats = {dataset.attrs["format"].lower() for dataset in self.L0}
95
+ if "raw" in formats:
96
+ self.format = "raw"
97
+ elif "STM" in formats:
98
+ self.format = "STM"
99
+ elif "tx" in formats:
100
+ self.format = "tx"
101
+ else:
102
+ raise ValueError(f"Unknown formats from l0 datasets: {','.join(formats)}")
65
103
 
66
104
  self.L1 = None
67
105
  self.L1A = None
@@ -69,120 +107,87 @@ class AWS(object):
69
107
  self.L3 = None
70
108
 
71
109
  def process(self):
72
- '''Perform L0 to L3 data processing'''
110
+ """Perform L0 to L3 data processing"""
73
111
  try:
74
- logger.info(f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...')
112
+ logger.info(
113
+ f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
114
+ )
115
+ logger.info(
116
+ f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
117
+ )
75
118
  except:
76
- logger.info(f'Commencing {self.L0[0].attrs["number_of_booms"]}-boom processing...')
119
+ logger.info(
120
+ f'Commencing {self.L0[0].attrs["number_of_booms"]}-boom processing...'
121
+ )
77
122
  self.getL1()
78
123
  self.getL2()
79
124
  self.getL3()
80
125
 
81
- def write(self, outpath):
82
- '''Write L3 data to .csv and .nc file'''
126
+ def writeL2(self, outpath):
127
+ """Write L2 data to .csv and .nc file"""
128
+ if os.path.isdir(outpath):
129
+ self.writeArr(self.L2, outpath)
130
+ else:
131
+ logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
132
+ pass
133
+
134
+ def writeL3(self, outpath):
135
+ """Write L3 data to .csv and .nc file"""
83
136
  if os.path.isdir(outpath):
84
- self.writeArr(outpath)
137
+ self.writeArr(self.L3, outpath)
85
138
  else:
86
- logger.info(f'Outpath f{outpath} does not exist. Unable to save to file')
139
+ logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
87
140
  pass
88
141
 
89
142
  def getL1(self):
90
- '''Perform L0 to L1 data processing'''
91
- logger.info('Level 1 processing...')
92
- self.L0 = [addBasicMeta(item, self.vars) for item in self.L0]
143
+ """Perform L0 to L1 data processing"""
144
+ logger.info("Level 1 processing...")
145
+ self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0]
93
146
  self.L1 = [toL1(item, self.vars) for item in self.L0]
94
- self.L1A = reduce(xr.Dataset.combine_first, self.L1)
147
+ self.L1A = reduce(xr.Dataset.combine_first, reversed(self.L1))
148
+ self.L1A.attrs["format"] = self.format
95
149
 
96
150
  def getL2(self):
97
- '''Perform L1 to L2 data processing'''
98
- logger.info('Level 2 processing...')
99
- self.L2 = toL2(self.L1A, vars_df=self.vars)
151
+ """Perform L1 to L2 data processing"""
152
+ logger.info("Level 2 processing...")
153
+
154
+ self.L2 = toL2(
155
+ self.L1A,
156
+ vars_df=self.vars,
157
+ data_flags_dir=self.data_issues_repository / "flags",
158
+ data_adjustments_dir=self.data_issues_repository / "adjustments",
159
+ )
100
160
 
101
161
  def getL3(self):
102
- '''Perform L2 to L3 data processing, including resampling and metadata
103
- and attribute population'''
104
- logger.info('Level 3 processing...')
105
- self.L3 = toL3(self.L2)
106
-
107
- # Resample L3 product
108
- f = [l.attrs['format'] for l in self.L0]
109
- if 'raw' in f or 'STM' in f:
110
- logger.info('Resampling to 10 minute')
111
- self.L3 = resampleL3(self.L3, '10min')
112
- else:
113
- self.L3 = resampleL3(self.L3, '60min')
114
- logger.info('Resampling to hour')
115
-
116
- # Re-format time
117
- t = self.L3['time'].values
118
- self.L3['time'] = list(t)
119
-
120
- # Switch gps_lon to negative (degrees_east)
121
- # Do this here, and NOT in addMeta, otherwise we switch back to positive
122
- # when calling getMeta in joinL3! PJW
123
- if self.L3.attrs['station_id'] not in ['UWN', 'Roof_GEUS', 'Roof_PROMICE']:
124
- self.L3['gps_lon'] = self.L3['gps_lon'] * -1
125
-
126
- # Add variable attributes and metadata
127
- self.L3 = self.addAttributes(self.L3)
128
-
129
- # Round all values to specified decimals places
130
- self.L3 = roundValues(self.L3, self.vars)
162
+ """Perform L2 to L3 data processing, including resampling and metadata
163
+ and attribute population"""
164
+ logger.info("Level 3 processing...")
165
+ self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
131
166
 
132
- def addAttributes(self, L3):
133
- '''Add variable and attribute metadata
134
-
135
- Parameters
136
- ----------
137
- L3 : xr.Dataset
138
- Level-3 data object
139
-
140
- Returns
141
- -------
142
- L3 : xr.Dataset
143
- Level-3 data object with attributes
144
- '''
145
- L3 = addVars(L3, self.vars)
146
- L3 = addMeta(L3, self.meta)
147
- return L3
148
-
149
- def writeArr(self, outpath):
150
- '''Write L3 data to .nc and .csv hourly and daily files
167
+ def writeArr(self, dataset, outpath, t=None):
168
+ """Write L3 data to .nc and .csv hourly and daily files
151
169
 
152
170
  Parameters
153
171
  ----------
172
+ dataset : xarray.Dataset
173
+ Dataset to write to file
154
174
  outpath : str
155
175
  Output directory
156
- L3 : AWS.L3
157
- Level-3 data object
158
- '''
159
- outdir = os.path.join(outpath, self.L3.attrs['station_id'])
160
- if not os.path.isdir(outdir):
161
- os.mkdir(outdir)
162
-
163
- col_names = getColNames(
164
- self.vars,
165
- self.L3.attrs['number_of_booms'],
166
- self.L3.attrs['format'],
167
- self.L3.attrs['bedrock'],
168
- )
169
-
170
- t = int(pd.Timedelta((self.L3['time'][1] - self.L3['time'][0]).values).total_seconds())
171
- logger.info('Writing to files...')
172
- if t == 600:
173
- out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.csv')
174
- out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.nc')
176
+ t : str
177
+ Resampling string. This is automatically defined based
178
+ on the data type if not given. The default is None.
179
+ """
180
+ if t is not None:
181
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, t)
175
182
  else:
176
- out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.csv')
177
- out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.nc')
178
- writeCSV(out_csv, self.L3, col_names)
179
- col_names = col_names + ['lat', 'lon', 'alt']
180
- writeNC(out_nc, self.L3, col_names)
181
- logger.info(f'Written to {out_csv}')
182
- logger.info(f'Written to {out_nc}')
183
+ f = [l.attrs["format"] for l in self.L0]
184
+ if "raw" in f or "STM" in f:
185
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, "10min")
186
+ else:
187
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, "60min")
183
188
 
184
189
  def loadConfig(self, config_file, inpath):
185
- '''Load configuration from .toml file
190
+ """Load configuration from .toml file
186
191
 
187
192
  Parameters
188
193
  ----------
@@ -195,12 +200,12 @@ class AWS(object):
195
200
  -------
196
201
  conf : dict
197
202
  Configuration parameters
198
- '''
199
- conf = getConfig(config_file, inpath)
203
+ """
204
+ conf = load.getConfig(config_file, inpath)
200
205
  return conf
201
206
 
202
207
  def loadL0(self):
203
- '''Load level 0 (L0) data from associated TOML-formatted
208
+ """Load level 0 (L0) data from associated TOML-formatted
204
209
  config file and L0 data file
205
210
 
206
211
  Try readL0file() using the config with msg_lat & msg_lon appended. The
@@ -215,7 +220,7 @@ class AWS(object):
215
220
  -------
216
221
  ds_list : list
217
222
  List of L0 xr.Dataset objects
218
- '''
223
+ """
219
224
  ds_list = []
220
225
  for k in self.config.keys():
221
226
  target = self.config[k]
@@ -225,14 +230,14 @@ class AWS(object):
225
230
  except pd.errors.ParserError as e:
226
231
  # ParserError: Too many columns specified: expected 40 and found 38
227
232
  # logger.info(f'-----> No msg_lat or msg_lon for {k}')
228
- for item in ['msg_lat', 'msg_lon']:
229
- target['columns'].remove(item) # Also removes from self.config
233
+ for item in ["msg_lat", "msg_lon"]:
234
+ target["columns"].remove(item) # Also removes from self.config
230
235
  ds_list.append(self.readL0file(target))
231
- logger.info(f'L0 data successfully loaded from {k}')
236
+ logger.info(f"L0 data successfully loaded from {k}")
232
237
  return ds_list
233
238
 
234
239
  def readL0file(self, conf):
235
- '''Read L0 .txt file to Dataset object using config dictionary and
240
+ """Read L0 .txt file to Dataset object using config dictionary and
236
241
  populate with initial metadata
237
242
 
238
243
  Parameters
@@ -244,689 +249,15 @@ class AWS(object):
244
249
  -------
245
250
  ds : xr.Dataset
246
251
  L0 data
247
- '''
248
- file_version = conf.get('file_version', -1)
249
- ds = getL0(conf['file'], conf['nodata'], conf['columns'],
250
- conf["skiprows"], file_version, time_offset=conf.get('time_offset'))
251
- ds = populateMeta(ds, conf, ["columns", "skiprows", "modem"])
252
+ """
253
+ file_version = conf.get("file_version", -1)
254
+ ds = load.getL0(
255
+ conf["file"],
256
+ conf["nodata"],
257
+ conf["columns"],
258
+ conf["skiprows"],
259
+ file_version,
260
+ time_offset=conf.get("time_offset"),
261
+ )
262
+ ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
252
263
  return ds
253
-
254
- #------------------------------------------------------------------------------
255
-
256
- def getConfig(config_file, inpath, default_columns: Sequence[str] = ('msg_lat', 'msg_lon')):
257
- '''Load configuration from .toml file. PROMICE .toml files support defining
258
- features at the top level which apply to all nested properties, but do not
259
- overwrite nested properties if they are defined
260
-
261
- Parameters
262
- ----------
263
- config_file : str
264
- TOML file path
265
- inpath : str
266
- Input folder directory where L0 files can be found
267
-
268
- Returns
269
- -------
270
- conf : dict
271
- Configuration dictionary
272
- '''
273
- conf = toml.load(config_file) # Move all top level keys to nested properties,
274
- top = [_ for _ in conf.keys() if not type(conf[_]) is dict] # if they are not already defined in the nested properties
275
- subs = [_ for _ in conf.keys() if type(conf[_]) is dict] # Insert the section name (config_file) as a file property and config file
276
- for s in subs:
277
- for t in top:
278
- if t not in conf[s].keys():
279
- conf[s][t] = conf[t]
280
-
281
- conf[s]['conf'] = config_file
282
- conf[s]['file'] = os.path.join(inpath, s)
283
- conf[s]["columns"].extend(default_columns)
284
-
285
- for t in top: conf.pop(t) # Delete all top level keys beause each file
286
- # should carry all properties with it
287
- for k in conf.keys(): # Check required fields are present
288
- for field in ["columns", "station_id", "format", "skiprows"]:
289
- assert(field in conf[k].keys()), field+" not in config keys"
290
- return conf
291
-
292
- def getL0(infile, nodata, cols, skiprows, file_version,
293
- delimiter=',', comment='#', time_offset: Optional[float] = None) -> xr.Dataset:
294
- ''' Read L0 data file into pandas DataFrame object
295
-
296
- Parameters
297
- ----------
298
- infile : str
299
- L0 file path
300
- nodata : list
301
- List containing value for nan values and reassigned value
302
- cols : list
303
- List of columns in file
304
- skiprows : int
305
- Skip rows value
306
- file_version : int
307
- Version of L0 file
308
- delimiter : str
309
- String delimiter for L0 file
310
- comment : str
311
- Notifier of commented sections in L0 file
312
- time_offset : Optional[float]
313
- Time offset in hours for correcting for non utc time data.
314
- Returns
315
- -------
316
- ds : xarray.Dataset
317
- L0 Dataset
318
- '''
319
- if file_version == 1:
320
- df = pd.read_csv(infile, comment=comment, index_col=0,
321
- na_values=nodata, names=cols,
322
- sep=delimiter,
323
- skiprows=skiprows, skip_blank_lines=True,
324
- usecols=range(len(cols)),
325
- low_memory=False)
326
- df['time'] = pd.to_datetime(
327
- df.year.astype(str) \
328
- + df.doy.astype(str).str.zfill(3) \
329
- + df.hhmm.astype(str).str.zfill(4),
330
- format='%Y%j%H%M'
331
- )
332
- df = df.set_index('time')
333
-
334
- else:
335
- df = pd.read_csv(infile, comment=comment, index_col=0,
336
- na_values=nodata, names=cols, parse_dates=True,
337
- sep=delimiter, skiprows=skiprows,
338
- skip_blank_lines=True,
339
- usecols=range(len(cols)),
340
- low_memory=False)
341
- try:
342
- df.index = pd.to_datetime(df.index)
343
- except ValueError as e:
344
- logger.info("\n", infile)
345
- logger.info("\nValueError:")
346
- logger.info(e)
347
- logger.info('\t\t> Trying pd.to_datetime with format=mixed')
348
- try:
349
- df.index = pd.to_datetime(df.index, format='mixed')
350
- except Exception as e:
351
- logger.info("\nDateParseError:")
352
- logger.info(e)
353
- logger.info('\t\t> Trying again removing apostrophes in timestamp (old files format)')
354
- df.index = pd.to_datetime(df.index.str.replace("\"",""))
355
-
356
- if time_offset is not None:
357
- df.index = df.index + timedelta(hours=time_offset)
358
-
359
- # Drop SKIP columns
360
- for c in df.columns:
361
- if c[0:4] == 'SKIP':
362
- df.drop(columns=c, inplace=True)
363
-
364
- # Carry relevant metadata with ds
365
- ds = xr.Dataset.from_dataframe(df)
366
- return ds
367
-
368
- def addBasicMeta(ds, vars_df):
369
- ''' Use a variable lookup table DataFrame to add the basic metadata
370
- to the xarray dataset. This is later amended to finalise L3
371
-
372
- Parameters
373
- ----------
374
- ds : xr.Dataset
375
- Dataset to add metadata to
376
- vars_df : pd.DataFrame
377
- Metadata dataframe
378
-
379
- Returns
380
- -------
381
- ds : xr.Dataset
382
- Dataset with added metadata
383
- '''
384
- for v in vars_df.index:
385
- if v == 'time': continue # coordinate variable, not normal var
386
- if v not in list(ds.variables): continue
387
- for c in ['standard_name', 'long_name', 'units']:
388
- if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue
389
- ds[v].attrs[c] = vars_df[c][v]
390
- return ds
391
-
392
- def populateMeta(ds, conf, skip):
393
- '''Populate L0 Dataset with metadata dictionary
394
-
395
- Parameters
396
- ----------
397
- ds : xarray.Dataset
398
- L0 dataset
399
- conf : dict
400
- Metadata dictionary
401
- skip : list
402
- List of column names to skip parsing to metadata
403
-
404
- Returns
405
- -------
406
- ds : xarray.Dataset
407
- L0 dataset with metadata populated as Dataset attributes
408
- '''
409
- meta = {}
410
- # skip = ["columns", "skiprows"]
411
- for k in conf.keys():
412
- if k not in skip: meta[k] = conf[k]
413
- ds.attrs = meta
414
- return ds
415
-
416
- def writeCSV(outfile, Lx, csv_order):
417
- '''Write data product to CSV file
418
-
419
- Parameters
420
- ----------
421
- outfile : str
422
- Output file path
423
- Lx : xr.Dataset
424
- Dataset to write to file
425
- csv_order : list
426
- List order of variables
427
- '''
428
- Lcsv = Lx.to_dataframe().dropna(how='all')
429
- if csv_order is not None:
430
- names = [c for c in csv_order if c in list(Lcsv.columns)]
431
- Lcsv = Lcsv[names]
432
- Lcsv.to_csv(outfile)
433
-
434
- def writeNC(outfile, Lx, col_names=None):
435
- '''Write data product to NetCDF file
436
-
437
- Parameters
438
- ----------
439
- outfile : str
440
- Output file path
441
- Lx : xr.Dataset
442
- Dataset to write to file
443
- '''
444
- if os.path.isfile(outfile):
445
- os.remove(outfile)
446
- if col_names is not None:
447
- names = [c for c in col_names if c in list(Lx.keys())]
448
- else:
449
- names = list(Lx.keys())
450
- Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True)
451
-
452
- def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None):
453
- '''Write L3 hourly, daily and monthly datasets to .nc and .csv
454
- files
455
-
456
- outpath : str
457
- Output file path
458
- station_id : str
459
- Station name
460
- l3_h : xr.Dataset
461
- L3 hourly data
462
- l3_d : xr.Dataset
463
- L3 daily data
464
- l3_m : xr.Dataset
465
- L3 monthly data
466
- csv_order : list, optional
467
- List order of variables
468
- '''
469
- if not os.path.isdir(outpath):
470
- os.mkdir(outpath)
471
- outfile_h = os.path.join(outpath, station_id + '_hour')
472
- outfile_d = os.path.join(outpath, station_id + '_day')
473
- outfile_m = os.path.join(outpath, station_id + '_month')
474
- for o,l in zip([outfile_h, outfile_d, outfile_m], [l3_h ,l3_d, l3_m]):
475
- writeCSV(o+'.csv',l, csv_order)
476
- writeNC(o+'.nc',l)
477
-
478
-
479
- def popCols(ds, names):
480
- '''Populate dataset with all given variable names
481
-
482
- Parammeters
483
- -----------
484
- ds : xr.Dataset
485
- Dataset
486
- names : list
487
- List of variable names to populate
488
- '''
489
- for v in names:
490
- if v not in list(ds.variables):
491
- ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
492
- return ds
493
-
494
- def getColNames(vars_df, booms=None, data_type=None, bedrock=False):
495
- '''Get all variable names for a given data type, based on a variables
496
- look-up table
497
-
498
- Parameters
499
- ----------
500
- vars_df : pd.DataFrame
501
- Variables look-up table
502
- booms : int, optional
503
- Number of booms. If this parameter is empty then all variables
504
- regardless of boom type will be passed. The default is None.
505
- data_type : str, optional
506
- Data type, "tx", "STM" or "raw". If this parameter is empty then all
507
- variables regardless of data type will be passed. The default is None.
508
-
509
- Returns
510
- -------
511
- list
512
- Variable names
513
- '''
514
- if booms==1:
515
- vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])]
516
- elif booms==2:
517
- vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])]
518
-
519
- if data_type=='TX':
520
- vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])]
521
- elif data_type=='STM' or data_type=='raw':
522
- vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])]
523
-
524
- col_names = list(vars_df.index)
525
- if isinstance(bedrock, str):
526
- bedrock = (bedrock.lower() == 'true')
527
- if bedrock == True:
528
- col_names.remove('cc')
529
- for v in ['dlhf_u', 'dlhf_l', 'dshf_u', 'dshf_l']:
530
- try:
531
- col_names.remove(v)
532
- except:
533
- pass
534
- return col_names
535
-
536
- def roundValues(ds, df, col='max_decimals'):
537
- '''Round all variable values in data array based on pre-defined rounding
538
- value in variables look-up table DataFrame
539
-
540
- Parameters
541
- ----------
542
- ds : xr.Dataset
543
- Dataset to round values in
544
- df : pd.Dataframe
545
- Variable look-up table with rounding values
546
- col : str
547
- Column in variable look-up table that contains rounding values. The
548
- default is "max_decimals"
549
- '''
550
- df = df[col]
551
- df = df.dropna(how='all')
552
- for var in df.index:
553
- if var not in list(ds.variables):
554
- continue
555
- if df[var] is not np.nan:
556
- ds[var] = ds[var].round(decimals=int(df[var]))
557
- return ds
558
-
559
- def addVars(ds, variables):
560
- '''Add variable attributes from file to dataset
561
-
562
- Parameters
563
- ----------
564
- ds : xarray.Dataset
565
- Dataset to add variable attributes to
566
- variables : pandas.DataFrame
567
- Variables lookup table file
568
-
569
- Returns
570
- -------
571
- ds : xarray.Dataset
572
- Dataset with metadata
573
- '''
574
- for k in ds.keys():
575
- if k not in variables.index: continue
576
- ds[k].attrs['standard_name'] = variables.loc[k]['standard_name']
577
- ds[k].attrs['long_name'] = variables.loc[k]['long_name']
578
- ds[k].attrs['units'] = variables.loc[k]['units']
579
- ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type']
580
- ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']
581
- return ds
582
-
583
- def addMeta(ds, meta):
584
- '''Add metadata attributes from file to dataset
585
-
586
- Parameters
587
- ----------
588
- ds : xarray.Dataset
589
- Dataset to add metadata attributes to
590
- meta : dict
591
- Metadata file
592
-
593
- Returns
594
- -------
595
- ds : xarray.Dataset
596
- Dataset with metadata
597
- '''
598
- ds['lon'] = ds['gps_lon'].mean()
599
- ds['lon'].attrs = ds['gps_lon'].attrs
600
-
601
- ds['lat'] = ds['gps_lat'].mean()
602
- ds['lat'].attrs = ds['gps_lat'].attrs
603
-
604
- ds['alt'] = ds['gps_alt'].mean()
605
- ds['alt'].attrs = ds['gps_alt'].attrs
606
-
607
- # for k in ds.keys(): # for each var
608
- # if 'units' in ds[k].attrs:
609
- # if ds[k].attrs['units'] == 'C':
610
- # ds[k].attrs['units'] = 'degrees_C'
611
-
612
- # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds
613
- ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['station_id']))
614
- ds.attrs['history'] = 'Generated on ' + datetime.datetime.utcnow().isoformat()
615
- ds.attrs['date_created'] = str(datetime.datetime.now().isoformat())
616
- ds.attrs['date_modified'] = ds.attrs['date_created']
617
- ds.attrs['date_issued'] = ds.attrs['date_created']
618
- ds.attrs['date_metadata_modified'] = ds.attrs['date_created']
619
-
620
- ds.attrs['geospatial_bounds'] = "POLYGON((" + \
621
- f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
622
- f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
623
- f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
624
- f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
625
- f"{ds['lat'].min().values} {ds['lon'].min().values}))"
626
-
627
- ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
628
- ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
629
- ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
630
- ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
631
- ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
632
- ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
633
- ds.attrs['geospatial_vertical_positive'] = 'up'
634
- ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
635
- ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
636
-
637
- try:
638
- ds.attrs['source']= 'pypromice v' + str(metadata.version('pypromice'))
639
- except:
640
- ds.attrs['source'] = 'pypromice'
641
-
642
- # https://www.digi.com/resources/documentation/digidocs/90001437-13/reference/r_iso_8601_duration_format.htm
643
- try:
644
- ds.attrs['time_coverage_duration'] = str(pd.Timedelta((ds['time'][-1] - ds['time'][0]).values).isoformat())
645
- ds.attrs['time_coverage_resolution'] = str(pd.Timedelta((ds['time'][1] - ds['time'][0]).values).isoformat())
646
- except:
647
- ds.attrs['time_coverage_duration'] = str(pd.Timedelta(0).isoformat())
648
- ds.attrs['time_coverage_resolution'] = str(pd.Timedelta(0).isoformat())
649
-
650
- # Note: int64 dtype (long int) is incompatible with OPeNDAP access via THREDDS for NetCDF files
651
- # See https://stackoverflow.com/questions/48895227/output-int32-time-dimension-in-netcdf-using-xarray
652
- ds.time.encoding["dtype"] = "i4" # 32-bit signed integer
653
- #ds.time.encoding["calendar"] = 'proleptic_gregorian' # this is default
654
-
655
- # Load metadata attributes and add to Dataset
656
- [_addAttr(ds, key, value) for key,value in meta.items()]
657
-
658
- # Check attribute formating
659
- for k,v in ds.attrs.items():
660
- if not isinstance(v, str) or not isinstance(v, int):
661
- ds.attrs[k]=str(v)
662
- return ds
663
-
664
-
665
- def getVars(v_file=None):
666
- '''Load variables.csv file
667
-
668
- Parameters
669
- ----------
670
- v_file : str
671
- Variable lookup table file path
672
-
673
- Returns
674
- -------
675
- pandas.DataFrame
676
- Variables dataframe
677
- '''
678
- if v_file is None:
679
- with pkg_resources.resource_stream('pypromice', 'process/variables.csv') as stream:
680
- return pd.read_csv(stream, index_col=0, comment="#", encoding='utf-8')
681
- else:
682
- return pd.read_csv(v_file, index_col=0, comment="#")
683
-
684
-
685
- def getMeta(m_file=None, delimiter=','): #TODO change to DataFrame output to match variables.csv
686
- '''Load metadata table
687
-
688
- Parameters
689
- ----------
690
- m_file : str
691
- Metadata file path
692
- delimiter : str
693
- Metadata character delimiter. The default is ","
694
-
695
- Returns
696
- -------
697
- meta : dict
698
- Metadata dictionary
699
- '''
700
- meta={}
701
- if m_file is None:
702
- with pkg_resources.resource_stream('pypromice', 'process/metadata.csv') as stream:
703
- lines = stream.read().decode("utf-8")
704
- lines = lines.split("\n")
705
- else:
706
- with open(m_file, 'r') as f:
707
- lines = f.readlines()
708
- for l in lines[1:]:
709
- try:
710
- meta[l.split(',')[0]] = l.split(delimiter)[1].split('\n')[0].replace(';',',')
711
- except IndexError:
712
- pass
713
- return meta
714
-
715
- def resampleL3(ds_h, t):
716
- '''Resample L3 AWS data, e.g. hourly to daily average. This uses pandas
717
- DataFrame resampling at the moment as a work-around to the xarray Dataset
718
- resampling. As stated, xarray resampling is a lengthy process that takes
719
- ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean()
720
- This has now been fixed, so needs implementing:
721
- https://github.com/pydata/xarray/issues/4498#event-6610799698
722
-
723
- Parameters
724
- ----------
725
- ds_h : xarray.Dataset
726
- L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data)
727
- t : str
728
- Resample factor, same variable definition as in
729
- pandas.DataFrame.resample()
730
-
731
- Returns
732
- -------
733
- ds_d : xarray.Dataset
734
- L3 AWS dataset resampled to the frequency defined by t
735
- '''
736
- df_d = ds_h.to_dataframe().resample(t).mean()
737
-
738
- # recalculating wind direction from averaged directional wind speeds
739
- for var in ['wdir_u','wdir_l','wdir_i']:
740
- if var in df_d.columns:
741
- if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_x_'+var.split('_')[1] in df_d.columns):
742
- df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]],
743
- df_d['wspd_y_'+var.split('_')[1]])
744
- else:
745
- logger.info(var,'in dataframe but not','wspd_x_'+var.split('_')[1],'wspd_x_'+var.split('_')[1])
746
-
747
- # recalculating relative humidity from average vapour pressure and average
748
- # saturation vapor pressure
749
- for var in ['rh_u','rh_l']:
750
- lvl = var.split('_')[1]
751
- if var in df_d.columns:
752
- if ('t_'+lvl in ds_h.keys()):
753
- es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl])
754
- p_vap = ds_h[var] / 100 * es_wtr
755
-
756
- df_d[var] = (p_vap.to_series().resample(t).mean() \
757
- / es_wtr.to_series().resample(t).mean())*100
758
- df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \
759
- / es_cor.to_series().resample(t).mean())*100
760
-
761
- vals = [xr.DataArray(data=df_d[c], dims=['time'],
762
- coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns]
763
- ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
764
- return ds_d
765
-
766
-
767
- def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071,
768
- es_100=1013.246, eps=0.622):
769
- '''Calculate specific humidity
770
-
771
- Parameters
772
- ----------
773
- T_0 : float
774
- Steam point temperature. Default is 273.15.
775
- T_100 : float
776
- Steam point temperature in Kelvin
777
- t : xarray.DataArray
778
- Air temperature
779
- es_0 : float
780
- Saturation vapour pressure at the melting point (hPa)
781
- es_100 : float
782
- Saturation vapour pressure at steam point temperature (hPa)
783
-
784
- Returns
785
- -------
786
- xarray.DataArray
787
- Saturation vapour pressure with regard to water above 0 C (hPa)
788
- xarray.DataArray
789
- Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa)
790
- '''
791
- # Saturation vapour pressure above 0 C (hPa)
792
- es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0))
793
- - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1)
794
- + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100))
795
-
796
- # Saturation vapour pressure below 0 C (hPa)
797
- es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654
798
- * np.log10(T_0 / (t + T_0)) + 0.876793
799
- * (1 - (t + T_0) / T_0)
800
- + np.log10(es_0))
801
-
802
- # Saturation vapour pressure (hPa)
803
- es_cor = xr.where(t < 0, es_ice, es_wtr)
804
-
805
- return es_wtr, es_cor
806
-
807
-
808
- def _calcWindDir(wspd_x, wspd_y):
809
- '''Calculate wind direction in degrees
810
-
811
- Parameters
812
- ----------
813
- wspd_x : xarray.DataArray
814
- Wind speed in X direction
815
- wspd_y : xarray.DataArray
816
- Wind speed in Y direction
817
-
818
- Returns
819
- -------
820
- wdir : xarray.DataArray
821
- Wind direction'''
822
- deg2rad = np.pi / 180
823
- rad2deg = 1 / deg2rad
824
- wdir = np.arctan2(wspd_x, wspd_y) * rad2deg
825
- wdir = (wdir + 360) % 360
826
- return wdir
827
-
828
-
829
- def _addAttr(ds, key, value):
830
- '''Add attribute to xarray dataset
831
-
832
- ds : xr.Dataset
833
- Dataset to add attribute to
834
- key : str
835
- Attribute name, with "." denoting variable attributes
836
- value : str/int
837
- Value for attribute'''
838
- if len(key.split('.')) == 2:
839
- try:
840
- ds[key.split('.')[0]].attrs[key.split('.')[1]] = str(value)
841
- except:
842
- pass
843
- # logger.info(f'Unable to add metadata to {key.split(".")[0]}')
844
- else:
845
- ds.attrs[key] = value
846
-
847
-
848
- #------------------------------------------------------------------------------
849
-
850
- class TestProcess(unittest.TestCase):
851
-
852
- def testgetVars(self):
853
- '''Test variable table lookup retrieval'''
854
- v = getVars()
855
- self.assertIsInstance(v, pd.DataFrame)
856
- self.assertTrue(v.columns[0] in 'standard_name')
857
- self.assertTrue(v.columns[2] in 'units')
858
-
859
- def testgetMeta(self):
860
- '''Test AWS names retrieval'''
861
- m = getMeta()
862
- self.assertIsInstance(m, dict)
863
- self.assertTrue('references' in m)
864
-
865
- def testAddAll(self):
866
- '''Test variable and metadata attributes added to Dataset'''
867
- d = xr.Dataset()
868
- v = getVars()
869
- att = list(v.index)
870
- att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p']
871
- for a in att:
872
- d[a]=[0,1]
873
- for a in att1:
874
- d[a]=[0,1]
875
- d['time'] = [datetime.datetime.now(),
876
- datetime.datetime.now()-timedelta(days=365)]
877
- d.attrs['station_id']='TEST'
878
- meta = getMeta()
879
- d = addVars(d, v)
880
- d = addMeta(d, meta)
881
- self.assertTrue(d.attrs['station_id']=='TEST')
882
- self.assertIsInstance(d.attrs['references'], str)
883
-
884
- def testL0toL3(self):
885
- '''Test L0 to L3 processing'''
886
- try:
887
- import pypromice
888
- pAWS = AWS(os.path.join(os.path.dirname(pypromice.__file__),'test/test_config1.toml'),
889
- os.path.join(os.path.dirname(pypromice.__file__),'test'))
890
- except:
891
- pAWS = AWS('../test/test_config1.toml', '../test/')
892
- pAWS.process()
893
- self.assertIsInstance(pAWS.L3, xr.Dataset)
894
- self.assertTrue(pAWS.L3.attrs['station_id']=='TEST1')
895
-
896
- def testCLIgetl3(self):
897
- '''Test get_l3 CLI'''
898
- exit_status = os.system('get_l3 -h')
899
- self.assertEqual(exit_status, 0)
900
-
901
- def testCLIjoinl3(self):
902
- '''Test join_l3 CLI'''
903
- exit_status = os.system('join_l3 -h')
904
- self.assertEqual(exit_status, 0)
905
-
906
- #------------------------------------------------------------------------------
907
-
908
- if __name__ == "__main__":
909
-
910
- # # Test an individual station
911
- # test_station = 'xxx'
912
- # # config_file = '../../../../aws-l0/raw/config/{}.toml'.format(test_station)
913
- # config_file = '../../../../aws-l0/tx/config/{}.toml'.format(test_station)
914
- # # inpath= '../../../../aws-l0/raw/{}/'.format(test_station)
915
- # inpath= '../../../../aws-l0/tx/'
916
- # vari = 'variables.csv'
917
- # pAWS_gc = AWS(config_file, inpath, var_file=vari)
918
- # pAWS_gc.process()
919
- # pAWS_gc.getL1()
920
- # pAWS_gc.getL2()
921
- # pAWS_gc.getL3()
922
-
923
- # # Use test configs
924
- # config_files = ['test/test_config1.toml', 'test/test_config2.toml']
925
- # inpath= 'test/'
926
- # outpath = 'test/'
927
- # vari = 'variables.csv'
928
- # for cf in config_files:
929
- # pAWS_gc = AWS(cf, inpath, var_file=vari)
930
- # pAWS_gc.process()
931
-
932
- unittest.main()