pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (55) hide show
  1. pypromice/get/get.py +19 -19
  2. pypromice/postprocess/bufr_to_csv.py +6 -1
  3. pypromice/postprocess/bufr_utilities.py +91 -18
  4. pypromice/postprocess/create_bufr_files.py +178 -0
  5. pypromice/postprocess/get_bufr.py +248 -397
  6. pypromice/postprocess/make_metadata_csv.py +214 -0
  7. pypromice/postprocess/real_time_utilities.py +41 -11
  8. pypromice/process/L0toL1.py +12 -5
  9. pypromice/process/L1toL2.py +159 -30
  10. pypromice/process/L2toL3.py +1034 -187
  11. pypromice/process/aws.py +131 -752
  12. pypromice/process/get_l2.py +90 -0
  13. pypromice/process/get_l2tol3.py +111 -0
  14. pypromice/process/join_l2.py +112 -0
  15. pypromice/process/join_l3.py +551 -120
  16. pypromice/process/load.py +161 -0
  17. pypromice/process/resample.py +128 -0
  18. pypromice/process/utilities.py +68 -0
  19. pypromice/process/write.py +503 -0
  20. pypromice/qc/github_data_issues.py +10 -16
  21. pypromice/qc/percentiles/thresholds.csv +2 -2
  22. pypromice/qc/persistence.py +71 -25
  23. pypromice/resources/__init__.py +28 -0
  24. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  25. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  26. pypromice/resources/variables.csv +106 -0
  27. pypromice/station_configuration.py +118 -0
  28. pypromice/tx/get_l0tx.py +7 -4
  29. pypromice/tx/payload_formats.csv +1 -0
  30. pypromice/tx/tx.py +27 -6
  31. pypromice/utilities/__init__.py +0 -0
  32. pypromice/utilities/git.py +61 -0
  33. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
  34. pypromice-1.4.0.dist-info/RECORD +53 -0
  35. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
  36. pypromice-1.4.0.dist-info/entry_points.txt +13 -0
  37. pypromice/postprocess/station_configurations.toml +0 -762
  38. pypromice/process/get_l3.py +0 -46
  39. pypromice/process/variables.csv +0 -92
  40. pypromice/qc/persistence_test.py +0 -150
  41. pypromice/test/test_config1.toml +0 -69
  42. pypromice/test/test_config2.toml +0 -54
  43. pypromice/test/test_email +0 -75
  44. pypromice/test/test_payload_formats.csv +0 -4
  45. pypromice/test/test_payload_types.csv +0 -7
  46. pypromice/test/test_percentile.py +0 -229
  47. pypromice/test/test_raw1.txt +0 -4468
  48. pypromice/test/test_raw_DataTable2.txt +0 -11167
  49. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  50. pypromice/test/test_raw_transmitted1.txt +0 -15411
  51. pypromice/test/test_raw_transmitted2.txt +0 -28
  52. pypromice-1.3.5.dist-info/RECORD +0 -53
  53. pypromice-1.3.5.dist-info/entry_points.txt +0 -8
  54. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
  55. {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
pypromice/process/aws.py CHANGED
@@ -2,37 +2,43 @@
2
2
  """
3
3
  AWS data processing module
4
4
  """
5
- import logging
6
- from functools import reduce
7
- from importlib import metadata
8
- import os, unittest, toml, datetime, uuid, pkg_resources
9
- from typing import Sequence, Optional
10
-
11
- import numpy as np
5
+ import json
12
6
  import warnings
13
7
 
14
- warnings.simplefilter(action='ignore', category=FutureWarning)
8
+ warnings.simplefilter(action="ignore", category=FutureWarning)
9
+
10
+ import logging, os
11
+ from pathlib import Path
15
12
  import pandas as pd
16
13
  import xarray as xr
17
- from datetime import timedelta
14
+ from functools import reduce
15
+ from importlib import metadata
18
16
 
17
+
18
+ import pypromice.resources
19
19
  from pypromice.process.L0toL1 import toL1
20
20
  from pypromice.process.L1toL2 import toL2
21
21
  from pypromice.process.L2toL3 import toL3
22
+ from pypromice.process import write, load, utilities
23
+ from pypromice.utilities.git import get_commit_hash_and_check_dirty
22
24
 
23
- pd.set_option('display.precision', 2)
25
+ pd.set_option("display.precision", 2)
24
26
  xr.set_options(keep_attrs=True)
25
-
26
27
  logger = logging.getLogger(__name__)
27
28
 
28
- #------------------------------------------------------------------------------
29
-
30
29
 
31
30
  class AWS(object):
32
- '''AWS object to load and process PROMICE AWS data'''
33
-
34
- def __init__(self, config_file, inpath, var_file=None, meta_file=None):
35
- '''Object initialisation
31
+ """AWS object to load and process PROMICE AWS data"""
32
+
33
+ def __init__(
34
+ self,
35
+ config_file,
36
+ inpath,
37
+ data_issues_repository: Path | str,
38
+ var_file=None,
39
+ meta_file=None,
40
+ ):
41
+ """Object initialisation
36
42
 
37
43
  Parameters
38
44
  ----------
@@ -46,22 +52,45 @@ class AWS(object):
46
52
  meta_file: str, optional
47
53
  Metadata info file path. If not given then pypromice's
48
54
  metadata file is used. The default is None.
49
- '''
50
- assert(os.path.isfile(config_file)), "cannot find "+config_file
51
- assert(os.path.isdir(inpath)), "cannot find "+inpath
52
- logger.info('AWS object initialising...')
55
+ """
56
+ assert os.path.isfile(config_file), "cannot find " + config_file
57
+ assert os.path.isdir(inpath), "cannot find " + inpath
58
+ logger.info("AWS object initialising...")
53
59
 
54
60
  # Load config, variables CSF standards, and L0 files
55
61
  self.config = self.loadConfig(config_file, inpath)
56
- self.vars = getVars(var_file)
57
- self.meta = getMeta(meta_file)
62
+ self.vars = pypromice.resources.load_variables(var_file)
63
+ self.meta = pypromice.resources.load_metadata(meta_file)
64
+ self.data_issues_repository = Path(data_issues_repository)
65
+
66
+ config_hash = get_commit_hash_and_check_dirty(Path(config_file))
67
+ config_source_string = f"{Path(config_file).name}:{config_hash}"
68
+ inpath_hash = get_commit_hash_and_check_dirty(Path(inpath))
69
+ data_issues_hash = get_commit_hash_and_check_dirty(self.data_issues_repository)
70
+ source_dict = dict(
71
+ pypromice=metadata.version("pypromice"),
72
+ l0_config_file=config_source_string,
73
+ l0_data_root=inpath_hash,
74
+ data_issues=data_issues_hash,
75
+ )
76
+ self.meta["source"] = json.dumps(source_dict)
58
77
 
59
78
  # Load config file
60
79
  L0 = self.loadL0()
61
- self.L0=[]
80
+ self.L0 = []
62
81
  for l in L0:
63
- n = getColNames(self.vars, l.attrs['number_of_booms'], l.attrs['format'])
64
- self.L0.append(popCols(l, n))
82
+ n = write.getColNames(self.vars, l)
83
+ self.L0.append(utilities.popCols(l, n))
84
+
85
+ formats = {dataset.attrs["format"].lower() for dataset in self.L0}
86
+ if "raw" in formats:
87
+ self.format = "raw"
88
+ elif "STM" in formats:
89
+ self.format = "STM"
90
+ elif "tx" in formats:
91
+ self.format = "tx"
92
+ else:
93
+ raise ValueError(f"Unknown formats from l0 datasets: {','.join(formats)}")
65
94
 
66
95
  self.L1 = None
67
96
  self.L1A = None
@@ -69,120 +98,87 @@ class AWS(object):
69
98
  self.L3 = None
70
99
 
71
100
  def process(self):
72
- '''Perform L0 to L3 data processing'''
101
+ """Perform L0 to L3 data processing"""
73
102
  try:
74
- logger.info(f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...')
103
+ logger.info(
104
+ f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
105
+ )
106
+ logger.info(
107
+ f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
108
+ )
75
109
  except:
76
- logger.info(f'Commencing {self.L0[0].attrs["number_of_booms"]}-boom processing...')
110
+ logger.info(
111
+ f'Commencing {self.L0[0].attrs["number_of_booms"]}-boom processing...'
112
+ )
77
113
  self.getL1()
78
114
  self.getL2()
79
115
  self.getL3()
80
116
 
81
- def write(self, outpath):
82
- '''Write L3 data to .csv and .nc file'''
117
+ def writeL2(self, outpath):
118
+ """Write L2 data to .csv and .nc file"""
83
119
  if os.path.isdir(outpath):
84
- self.writeArr(outpath)
120
+ self.writeArr(self.L2, outpath)
85
121
  else:
86
- logger.info(f'Outpath f{outpath} does not exist. Unable to save to file')
122
+ logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
123
+ pass
124
+
125
+ def writeL3(self, outpath):
126
+ """Write L3 data to .csv and .nc file"""
127
+ if os.path.isdir(outpath):
128
+ self.writeArr(self.L3, outpath)
129
+ else:
130
+ logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
87
131
  pass
88
132
 
89
133
  def getL1(self):
90
- '''Perform L0 to L1 data processing'''
91
- logger.info('Level 1 processing...')
92
- self.L0 = [addBasicMeta(item, self.vars) for item in self.L0]
134
+ """Perform L0 to L1 data processing"""
135
+ logger.info("Level 1 processing...")
136
+ self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0]
93
137
  self.L1 = [toL1(item, self.vars) for item in self.L0]
94
- self.L1A = reduce(xr.Dataset.combine_first, self.L1)
138
+ self.L1A = reduce(xr.Dataset.combine_first, reversed(self.L1))
139
+ self.L1A.attrs["format"] = self.format
95
140
 
96
141
  def getL2(self):
97
- '''Perform L1 to L2 data processing'''
98
- logger.info('Level 2 processing...')
99
- self.L2 = toL2(self.L1A, vars_df=self.vars)
142
+ """Perform L1 to L2 data processing"""
143
+ logger.info("Level 2 processing...")
144
+
145
+ self.L2 = toL2(
146
+ self.L1A,
147
+ vars_df=self.vars,
148
+ data_flags_dir=self.data_issues_repository / "flags",
149
+ data_adjustments_dir=self.data_issues_repository / "adjustments",
150
+ )
100
151
 
101
152
  def getL3(self):
102
- '''Perform L2 to L3 data processing, including resampling and metadata
103
- and attribute population'''
104
- logger.info('Level 3 processing...')
105
- self.L3 = toL3(self.L2)
106
-
107
- # Resample L3 product
108
- f = [l.attrs['format'] for l in self.L0]
109
- if 'raw' in f or 'STM' in f:
110
- logger.info('Resampling to 10 minute')
111
- self.L3 = resampleL3(self.L3, '10min')
112
- else:
113
- self.L3 = resampleL3(self.L3, '60min')
114
- logger.info('Resampling to hour')
153
+ """Perform L2 to L3 data processing, including resampling and metadata
154
+ and attribute population"""
155
+ logger.info("Level 3 processing...")
156
+ self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
115
157
 
116
- # Re-format time
117
- t = self.L3['time'].values
118
- self.L3['time'] = list(t)
119
-
120
- # Switch gps_lon to negative (degrees_east)
121
- # Do this here, and NOT in addMeta, otherwise we switch back to positive
122
- # when calling getMeta in joinL3! PJW
123
- if self.L3.attrs['station_id'] not in ['UWN', 'Roof_GEUS', 'Roof_PROMICE']:
124
- self.L3['gps_lon'] = self.L3['gps_lon'] * -1
125
-
126
- # Add variable attributes and metadata
127
- self.L3 = self.addAttributes(self.L3)
128
-
129
- # Round all values to specified decimals places
130
- self.L3 = roundValues(self.L3, self.vars)
131
-
132
- def addAttributes(self, L3):
133
- '''Add variable and attribute metadata
134
-
135
- Parameters
136
- ----------
137
- L3 : xr.Dataset
138
- Level-3 data object
139
-
140
- Returns
141
- -------
142
- L3 : xr.Dataset
143
- Level-3 data object with attributes
144
- '''
145
- L3 = addVars(L3, self.vars)
146
- L3 = addMeta(L3, self.meta)
147
- return L3
148
-
149
- def writeArr(self, outpath):
150
- '''Write L3 data to .nc and .csv hourly and daily files
158
+ def writeArr(self, dataset, outpath, t=None):
159
+ """Write L3 data to .nc and .csv hourly and daily files
151
160
 
152
161
  Parameters
153
162
  ----------
163
+ dataset : xarray.Dataset
164
+ Dataset to write to file
154
165
  outpath : str
155
166
  Output directory
156
- L3 : AWS.L3
157
- Level-3 data object
158
- '''
159
- outdir = os.path.join(outpath, self.L3.attrs['station_id'])
160
- if not os.path.isdir(outdir):
161
- os.mkdir(outdir)
162
-
163
- col_names = getColNames(
164
- self.vars,
165
- self.L3.attrs['number_of_booms'],
166
- self.L3.attrs['format'],
167
- self.L3.attrs['bedrock'],
168
- )
169
-
170
- t = int(pd.Timedelta((self.L3['time'][1] - self.L3['time'][0]).values).total_seconds())
171
- logger.info('Writing to files...')
172
- if t == 600:
173
- out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.csv')
174
- out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.nc')
167
+ t : str
168
+ Resampling string. This is automatically defined based
169
+ on the data type if not given. The default is None.
170
+ """
171
+ if t is not None:
172
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, t)
175
173
  else:
176
- out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.csv')
177
- out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.nc')
178
- writeCSV(out_csv, self.L3, col_names)
179
- col_names = col_names + ['lat', 'lon', 'alt']
180
- writeNC(out_nc, self.L3, col_names)
181
- logger.info(f'Written to {out_csv}')
182
- logger.info(f'Written to {out_nc}')
174
+ f = [l.attrs["format"] for l in self.L0]
175
+ if "raw" in f or "STM" in f:
176
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, "10min")
177
+ else:
178
+ write.prepare_and_write(dataset, outpath, self.vars, self.meta, "60min")
183
179
 
184
180
  def loadConfig(self, config_file, inpath):
185
- '''Load configuration from .toml file
181
+ """Load configuration from .toml file
186
182
 
187
183
  Parameters
188
184
  ----------
@@ -195,12 +191,12 @@ class AWS(object):
195
191
  -------
196
192
  conf : dict
197
193
  Configuration parameters
198
- '''
199
- conf = getConfig(config_file, inpath)
194
+ """
195
+ conf = load.getConfig(config_file, inpath)
200
196
  return conf
201
197
 
202
198
  def loadL0(self):
203
- '''Load level 0 (L0) data from associated TOML-formatted
199
+ """Load level 0 (L0) data from associated TOML-formatted
204
200
  config file and L0 data file
205
201
 
206
202
  Try readL0file() using the config with msg_lat & msg_lon appended. The
@@ -215,7 +211,7 @@ class AWS(object):
215
211
  -------
216
212
  ds_list : list
217
213
  List of L0 xr.Dataset objects
218
- '''
214
+ """
219
215
  ds_list = []
220
216
  for k in self.config.keys():
221
217
  target = self.config[k]
@@ -224,15 +220,15 @@ class AWS(object):
224
220
 
225
221
  except pd.errors.ParserError as e:
226
222
  # ParserError: Too many columns specified: expected 40 and found 38
227
- logger.info(f'-----> No msg_lat or msg_lon for {k}')
228
- for item in ['msg_lat', 'msg_lon']:
229
- target['columns'].remove(item) # Also removes from self.config
223
+ # logger.info(f'-----> No msg_lat or msg_lon for {k}')
224
+ for item in ["msg_lat", "msg_lon"]:
225
+ target["columns"].remove(item) # Also removes from self.config
230
226
  ds_list.append(self.readL0file(target))
231
- logger.info(f'L0 data successfully loaded from {k}')
227
+ logger.info(f"L0 data successfully loaded from {k}")
232
228
  return ds_list
233
229
 
234
230
  def readL0file(self, conf):
235
- '''Read L0 .txt file to Dataset object using config dictionary and
231
+ """Read L0 .txt file to Dataset object using config dictionary and
236
232
  populate with initial metadata
237
233
 
238
234
  Parameters
@@ -244,632 +240,15 @@ class AWS(object):
244
240
  -------
245
241
  ds : xr.Dataset
246
242
  L0 data
247
- '''
248
- file_version = conf.get('file_version', -1)
249
- ds = getL0(conf['file'], conf['nodata'], conf['columns'],
250
- conf["skiprows"], file_version, time_offset=conf.get('time_offset'))
251
- ds = populateMeta(ds, conf, ["columns", "skiprows", "modem"])
243
+ """
244
+ file_version = conf.get("file_version", -1)
245
+ ds = load.getL0(
246
+ conf["file"],
247
+ conf["nodata"],
248
+ conf["columns"],
249
+ conf["skiprows"],
250
+ file_version,
251
+ time_offset=conf.get("time_offset"),
252
+ )
253
+ ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
252
254
  return ds
253
-
254
- #------------------------------------------------------------------------------
255
-
256
- def getConfig(config_file, inpath, default_columns: Sequence[str] = ('msg_lat', 'msg_lon')):
257
- '''Load configuration from .toml file. PROMICE .toml files support defining
258
- features at the top level which apply to all nested properties, but do not
259
- overwrite nested properties if they are defined
260
-
261
- Parameters
262
- ----------
263
- config_file : str
264
- TOML file path
265
- inpath : str
266
- Input folder directory where L0 files can be found
267
-
268
- Returns
269
- -------
270
- conf : dict
271
- Configuration dictionary
272
- '''
273
- conf = toml.load(config_file) # Move all top level keys to nested properties,
274
- top = [_ for _ in conf.keys() if not type(conf[_]) is dict] # if they are not already defined in the nested properties
275
- subs = [_ for _ in conf.keys() if type(conf[_]) is dict] # Insert the section name (config_file) as a file property and config file
276
- for s in subs:
277
- for t in top:
278
- if t not in conf[s].keys():
279
- conf[s][t] = conf[t]
280
-
281
- conf[s]['conf'] = config_file
282
- conf[s]['file'] = os.path.join(inpath, s)
283
- conf[s]["columns"].extend(default_columns)
284
-
285
- for t in top: conf.pop(t) # Delete all top level keys beause each file
286
- # should carry all properties with it
287
- for k in conf.keys(): # Check required fields are present
288
- for field in ["columns", "station_id", "format", "skiprows"]:
289
- assert(field in conf[k].keys()), field+" not in config keys"
290
- return conf
291
-
292
- def getL0(infile, nodata, cols, skiprows, file_version,
293
- delimiter=',', comment='#', time_offset: Optional[float] = None) -> xr.Dataset:
294
- ''' Read L0 data file into pandas DataFrame object
295
-
296
- Parameters
297
- ----------
298
- infile : str
299
- L0 file path
300
- nodata : list
301
- List containing value for nan values and reassigned value
302
- cols : list
303
- List of columns in file
304
- skiprows : int
305
- Skip rows value
306
- file_version : int
307
- Version of L0 file
308
- delimiter : str
309
- String delimiter for L0 file
310
- comment : str
311
- Notifier of commented sections in L0 file
312
- time_offset : Optional[float]
313
- Time offset in hours for correcting for non utc time data.
314
- Returns
315
- -------
316
- ds : xarray.Dataset
317
- L0 Dataset
318
- '''
319
- if file_version == 1:
320
- df = pd.read_csv(infile, comment=comment, index_col=0,
321
- na_values=nodata, names=cols,
322
- sep=delimiter,
323
- skiprows=skiprows, skip_blank_lines=True,
324
- usecols=range(len(cols)),
325
- low_memory=False)
326
- df['time'] = pd.to_datetime(
327
- df.year.astype(str) \
328
- + df.doy.astype(str).str.zfill(3) \
329
- + df.hhmm.astype(str).str.zfill(4),
330
- format='%Y%j%H%M'
331
- )
332
- df = df.set_index('time')
333
-
334
- else:
335
- df = pd.read_csv(infile, comment=comment, index_col=0,
336
- na_values=nodata, names=cols, parse_dates=True,
337
- sep=delimiter, skiprows=skiprows,
338
- skip_blank_lines=True,
339
- usecols=range(len(cols)),
340
- low_memory=False)
341
- try:
342
- df.index = pd.to_datetime(df.index)
343
- except ValueError as e:
344
- logger.info("\n", infile)
345
- logger.info("\nValueError:")
346
- logger.info(e)
347
- logger.info('\t\t> Trying pd.to_datetime with format=mixed')
348
- try:
349
- df.index = pd.to_datetime(df.index, format='mixed')
350
- except Exception as e:
351
- logger.info("\nDateParseError:")
352
- logger.info(e)
353
- logger.info('\t\t> Trying again removing apostrophes in timestamp (old files format)')
354
- df.index = pd.to_datetime(df.index.str.replace("\"",""))
355
-
356
- if time_offset is not None:
357
- df.index = df.index + timedelta(hours=time_offset)
358
-
359
- # Drop SKIP columns
360
- for c in df.columns:
361
- if c[0:4] == 'SKIP':
362
- df.drop(columns=c, inplace=True)
363
-
364
- # Carry relevant metadata with ds
365
- ds = xr.Dataset.from_dataframe(df)
366
- return ds
367
-
368
- def addBasicMeta(ds, vars_df):
369
- ''' Use a variable lookup table DataFrame to add the basic metadata
370
- to the xarray dataset. This is later amended to finalise L3
371
-
372
- Parameters
373
- ----------
374
- ds : xr.Dataset
375
- Dataset to add metadata to
376
- vars_df : pd.DataFrame
377
- Metadata dataframe
378
-
379
- Returns
380
- -------
381
- ds : xr.Dataset
382
- Dataset with added metadata
383
- '''
384
- for v in vars_df.index:
385
- if v == 'time': continue # coordinate variable, not normal var
386
- if v not in list(ds.variables): continue
387
- for c in ['standard_name', 'long_name', 'units']:
388
- if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue
389
- ds[v].attrs[c] = vars_df[c][v]
390
- return ds
391
-
392
- def populateMeta(ds, conf, skip):
393
- '''Populate L0 Dataset with metadata dictionary
394
-
395
- Parameters
396
- ----------
397
- ds : xarray.Dataset
398
- L0 dataset
399
- conf : dict
400
- Metadata dictionary
401
- skip : list
402
- List of column names to skip parsing to metadata
403
-
404
- Returns
405
- -------
406
- ds : xarray.Dataset
407
- L0 dataset with metadata populated as Dataset attributes
408
- '''
409
- meta = {}
410
- # skip = ["columns", "skiprows"]
411
- for k in conf.keys():
412
- if k not in skip: meta[k] = conf[k]
413
- ds.attrs = meta
414
- return ds
415
-
416
- def writeCSV(outfile, Lx, csv_order):
417
- '''Write data product to CSV file
418
-
419
- Parameters
420
- ----------
421
- outfile : str
422
- Output file path
423
- Lx : xr.Dataset
424
- Dataset to write to file
425
- csv_order : list
426
- List order of variables
427
- '''
428
- Lcsv = Lx.to_dataframe().dropna(how='all')
429
- if csv_order is not None:
430
- names = [c for c in csv_order if c in list(Lcsv.columns)]
431
- Lcsv = Lcsv[names]
432
- Lcsv.to_csv(outfile)
433
-
434
- def writeNC(outfile, Lx, col_names=None):
435
- '''Write data product to NetCDF file
436
-
437
- Parameters
438
- ----------
439
- outfile : str
440
- Output file path
441
- Lx : xr.Dataset
442
- Dataset to write to file
443
- '''
444
- if os.path.isfile(outfile):
445
- os.remove(outfile)
446
- if col_names is not None:
447
- names = [c for c in col_names if c in list(Lx.keys())]
448
- else:
449
- names = list(Lx.keys())
450
- Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True)
451
-
452
- def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None):
453
- '''Write L3 hourly, daily and monthly datasets to .nc and .csv
454
- files
455
-
456
- outpath : str
457
- Output file path
458
- station_id : str
459
- Station name
460
- l3_h : xr.Dataset
461
- L3 hourly data
462
- l3_d : xr.Dataset
463
- L3 daily data
464
- l3_m : xr.Dataset
465
- L3 monthly data
466
- csv_order : list, optional
467
- List order of variables
468
- '''
469
- if not os.path.isdir(outpath):
470
- os.mkdir(outpath)
471
- outfile_h = os.path.join(outpath, station_id + '_hour')
472
- outfile_d = os.path.join(outpath, station_id + '_day')
473
- outfile_m = os.path.join(outpath, station_id + '_month')
474
- for o,l in zip([outfile_h, outfile_d, outfile_m], [l3_h ,l3_d, l3_m]):
475
- writeCSV(o+'.csv',l, csv_order)
476
- writeNC(o+'.nc',l)
477
-
478
-
479
- def popCols(ds, names):
480
- '''Populate dataset with all given variable names
481
-
482
- Parammeters
483
- -----------
484
- ds : xr.Dataset
485
- Dataset
486
- names : list
487
- List of variable names to populate
488
- '''
489
- for v in names:
490
- if v not in list(ds.variables):
491
- ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
492
- return ds
493
-
494
- def getColNames(vars_df, booms=None, data_type=None, bedrock=False):
495
- '''Get all variable names for a given data type, based on a variables
496
- look-up table
497
-
498
- Parameters
499
- ----------
500
- vars_df : pd.DataFrame
501
- Variables look-up table
502
- booms : int, optional
503
- Number of booms. If this parameter is empty then all variables
504
- regardless of boom type will be passed. The default is None.
505
- data_type : str, optional
506
- Data type, "tx", "STM" or "raw". If this parameter is empty then all
507
- variables regardless of data type will be passed. The default is None.
508
-
509
- Returns
510
- -------
511
- list
512
- Variable names
513
- '''
514
- if booms==1:
515
- vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])]
516
- elif booms==2:
517
- vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])]
518
-
519
- if data_type=='TX':
520
- vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])]
521
- elif data_type=='STM' or data_type=='raw':
522
- vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])]
523
-
524
- col_names = list(vars_df.index)
525
- if isinstance(bedrock, str):
526
- bedrock = (bedrock.lower() == 'true')
527
- if bedrock == True:
528
- col_names.remove('cc')
529
- for v in ['dlhf_u', 'dlhf_l', 'dshf_u', 'dshf_l']:
530
- try:
531
- col_names.remove(v)
532
- except:
533
- pass
534
- return col_names
535
-
536
- def roundValues(ds, df, col='max_decimals'):
537
- '''Round all variable values in data array based on pre-defined rounding
538
- value in variables look-up table DataFrame
539
-
540
- Parameters
541
- ----------
542
- ds : xr.Dataset
543
- Dataset to round values in
544
- df : pd.Dataframe
545
- Variable look-up table with rounding values
546
- col : str
547
- Column in variable look-up table that contains rounding values. The
548
- default is "max_decimals"
549
- '''
550
- df = df[col]
551
- df = df.dropna(how='all')
552
- for var in df.index:
553
- if var not in list(ds.variables):
554
- continue
555
- if df[var] is not np.nan:
556
- ds[var] = ds[var].round(decimals=int(df[var]))
557
- return ds
558
-
559
- def addVars(ds, variables):
560
- '''Add variable attributes from file to dataset
561
-
562
- Parameters
563
- ----------
564
- ds : xarray.Dataset
565
- Dataset to add variable attributes to
566
- variables : pandas.DataFrame
567
- Variables lookup table file
568
-
569
- Returns
570
- -------
571
- ds : xarray.Dataset
572
- Dataset with metadata
573
- '''
574
- for k in ds.keys():
575
- if k not in variables.index: continue
576
- ds[k].attrs['standard_name'] = variables.loc[k]['standard_name']
577
- ds[k].attrs['long_name'] = variables.loc[k]['long_name']
578
- ds[k].attrs['units'] = variables.loc[k]['units']
579
- ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type']
580
- ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']
581
- return ds
582
-
583
- def addMeta(ds, meta):
584
- '''Add metadata attributes from file to dataset
585
-
586
- Parameters
587
- ----------
588
- ds : xarray.Dataset
589
- Dataset to add metadata attributes to
590
- meta : dict
591
- Metadata file
592
-
593
- Returns
594
- -------
595
- ds : xarray.Dataset
596
- Dataset with metadata
597
- '''
598
- ds['lon'] = ds['gps_lon'].mean()
599
- ds['lon'].attrs = ds['gps_lon'].attrs
600
-
601
- ds['lat'] = ds['gps_lat'].mean()
602
- ds['lat'].attrs = ds['gps_lat'].attrs
603
-
604
- ds['alt'] = ds['gps_alt'].mean()
605
- ds['alt'].attrs = ds['gps_alt'].attrs
606
-
607
- # for k in ds.keys(): # for each var
608
- # if 'units' in ds[k].attrs:
609
- # if ds[k].attrs['units'] == 'C':
610
- # ds[k].attrs['units'] = 'degrees_C'
611
-
612
- # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds
613
- ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['station_id']))
614
- ds.attrs['history'] = 'Generated on ' + datetime.datetime.utcnow().isoformat()
615
- ds.attrs['date_created'] = str(datetime.datetime.now().isoformat())
616
- ds.attrs['date_modified'] = ds.attrs['date_created']
617
- ds.attrs['date_issued'] = ds.attrs['date_created']
618
- ds.attrs['date_metadata_modified'] = ds.attrs['date_created']
619
-
620
- ds.attrs['geospatial_bounds'] = "POLYGON((" + \
621
- f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
622
- f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
623
- f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
624
- f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
625
- f"{ds['lat'].min().values} {ds['lon'].min().values}))"
626
-
627
- ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
628
- ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
629
- ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
630
- ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
631
- ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
632
- ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
633
- ds.attrs['geospatial_vertical_positive'] = 'up'
634
- ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
635
- ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
636
-
637
- try:
638
- ds.attrs['source']= 'pypromice v' + str(metadata.version('pypromice'))
639
- except:
640
- ds.attrs['source'] = 'pypromice'
641
-
642
- # https://www.digi.com/resources/documentation/digidocs/90001437-13/reference/r_iso_8601_duration_format.htm
643
- try:
644
- ds.attrs['time_coverage_duration'] = str(pd.Timedelta((ds['time'][-1] - ds['time'][0]).values).isoformat())
645
- ds.attrs['time_coverage_resolution'] = str(pd.Timedelta((ds['time'][1] - ds['time'][0]).values).isoformat())
646
- except:
647
- ds.attrs['time_coverage_duration'] = str(pd.Timedelta(0).isoformat())
648
- ds.attrs['time_coverage_resolution'] = str(pd.Timedelta(0).isoformat())
649
-
650
- # Note: int64 dtype (long int) is incompatible with OPeNDAP access via THREDDS for NetCDF files
651
- # See https://stackoverflow.com/questions/48895227/output-int32-time-dimension-in-netcdf-using-xarray
652
- ds.time.encoding["dtype"] = "i4" # 32-bit signed integer
653
- #ds.time.encoding["calendar"] = 'proleptic_gregorian' # this is default
654
-
655
- # Load metadata attributes and add to Dataset
656
- [_addAttr(ds, key, value) for key,value in meta.items()]
657
-
658
- # Check attribute formating
659
- for k,v in ds.attrs.items():
660
- if not isinstance(v, str) or not isinstance(v, int):
661
- ds.attrs[k]=str(v)
662
- return ds
663
-
664
-
665
- def getVars(v_file=None):
666
- '''Load variables.csv file
667
-
668
- Parameters
669
- ----------
670
- v_file : str
671
- Variable lookup table file path
672
-
673
- Returns
674
- -------
675
- pandas.DataFrame
676
- Variables dataframe
677
- '''
678
- if v_file is None:
679
- with pkg_resources.resource_stream('pypromice', 'process/variables.csv') as stream:
680
- return pd.read_csv(stream, index_col=0, comment="#", encoding='utf-8')
681
- else:
682
- return pd.read_csv(v_file, index_col=0, comment="#")
683
-
684
-
685
- def getMeta(m_file=None, delimiter=','): #TODO change to DataFrame output to match variables.csv
686
- '''Load metadata table
687
-
688
- Parameters
689
- ----------
690
- m_file : str
691
- Metadata file path
692
- delimiter : str
693
- Metadata character delimiter. The default is ","
694
-
695
- Returns
696
- -------
697
- meta : dict
698
- Metadata dictionary
699
- '''
700
- meta={}
701
- if m_file is None:
702
- with pkg_resources.resource_stream('pypromice', 'process/metadata.csv') as stream:
703
- lines = stream.read().decode("utf-8")
704
- lines = lines.split("\n")
705
- else:
706
- with open(m_file, 'r') as f:
707
- lines = f.readlines()
708
- for l in lines[1:]:
709
- try:
710
- meta[l.split(',')[0]] = l.split(delimiter)[1].split('\n')[0].replace(';',',')
711
- except IndexError:
712
- pass
713
- return meta
714
-
715
- def resampleL3(ds_h, t):
716
- '''Resample L3 AWS data, e.g. hourly to daily average. This uses pandas
717
- DataFrame resampling at the moment as a work-around to the xarray Dataset
718
- resampling. As stated, xarray resampling is a lengthy process that takes
719
- ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean()
720
- This has now been fixed, so needs implementing:
721
- https://github.com/pydata/xarray/issues/4498#event-6610799698
722
-
723
- Parameters
724
- ----------
725
- ds_h : xarray.Dataset
726
- L3 AWS daily dataset
727
- t : str
728
- Resample factor, same variable definition as in
729
- pandas.DataFrame.resample()
730
-
731
- Returns
732
- -------
733
- ds_d : xarray.Dataset
734
- L3 AWS hourly dataset
735
- '''
736
- df_d = ds_h.to_dataframe().resample(t).mean()
737
- # recalculating wind direction from averaged directional wind speeds
738
- for var in ['wdir_u','wdir_l','wdir_i']:
739
- if var in df_d.columns:
740
- if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_x_'+var.split('_')[1] in df_d.columns):
741
- df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]],
742
- df_d['wspd_y_'+var.split('_')[1]])
743
- else:
744
- logger.info(var,'in dataframe but not','wspd_x_'+var.split('_')[1],'wspd_x_'+var.split('_')[1])
745
- vals = [xr.DataArray(data=df_d[c], dims=['time'],
746
- coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns]
747
- ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
748
- return ds_d
749
-
750
-
751
- def _calcWindDir(wspd_x, wspd_y):
752
- '''Calculate wind direction in degrees
753
-
754
- Parameters
755
- ----------
756
- wspd_x : xarray.DataArray
757
- Wind speed in X direction
758
- wspd_y : xarray.DataArray
759
- Wind speed in Y direction
760
-
761
- Returns
762
- -------
763
- wdir : xarray.DataArray
764
- Wind direction'''
765
- deg2rad = np.pi / 180
766
- rad2deg = 1 / deg2rad
767
- wdir = np.arctan2(wspd_x, wspd_y) * rad2deg
768
- wdir = (wdir + 360) % 360
769
- return wdir
770
-
771
-
772
- def _addAttr(ds, key, value):
773
- '''Add attribute to xarray dataset
774
-
775
- ds : xr.Dataset
776
- Dataset to add attribute to
777
- key : str
778
- Attribute name, with "." denoting variable attributes
779
- value : str/int
780
- Value for attribute'''
781
- if len(key.split('.')) == 2:
782
- try:
783
- ds[key.split('.')[0]].attrs[key.split('.')[1]] = str(value)
784
- except:
785
- pass
786
- # logger.info(f'Unable to add metadata to {key.split(".")[0]}')
787
- else:
788
- ds.attrs[key] = value
789
-
790
-
791
- #------------------------------------------------------------------------------
792
-
793
- class TestProcess(unittest.TestCase):
794
-
795
- def testgetVars(self):
796
- '''Test variable table lookup retrieval'''
797
- v = getVars()
798
- self.assertIsInstance(v, pd.DataFrame)
799
- self.assertTrue(v.columns[0] in 'standard_name')
800
- self.assertTrue(v.columns[2] in 'units')
801
-
802
- def testgetMeta(self):
803
- '''Test AWS names retrieval'''
804
- m = getMeta()
805
- self.assertIsInstance(m, dict)
806
- self.assertTrue('references' in m)
807
-
808
- def testAddAll(self):
809
- '''Test variable and metadata attributes added to Dataset'''
810
- d = xr.Dataset()
811
- v = getVars()
812
- att = list(v.index)
813
- att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p']
814
- for a in att:
815
- d[a]=[0,1]
816
- for a in att1:
817
- d[a]=[0,1]
818
- d['time'] = [datetime.datetime.now(),
819
- datetime.datetime.now()-timedelta(days=365)]
820
- d.attrs['station_id']='TEST'
821
- meta = getMeta()
822
- d = addVars(d, v)
823
- d = addMeta(d, meta)
824
- self.assertTrue(d.attrs['station_id']=='TEST')
825
- self.assertIsInstance(d.attrs['references'], str)
826
-
827
- def testL0toL3(self):
828
- '''Test L0 to L3 processing'''
829
- try:
830
- import pypromice
831
- pAWS = AWS(os.path.join(os.path.dirname(pypromice.__file__),'test/test_config1.toml'),
832
- os.path.join(os.path.dirname(pypromice.__file__),'test'))
833
- except:
834
- pAWS = AWS('../test/test_config1.toml', '../test/')
835
- pAWS.process()
836
- self.assertIsInstance(pAWS.L3, xr.Dataset)
837
- self.assertTrue(pAWS.L3.attrs['station_id']=='TEST1')
838
-
839
- def testCLIgetl3(self):
840
- '''Test get_l3 CLI'''
841
- exit_status = os.system('get_l3 -h')
842
- self.assertEqual(exit_status, 0)
843
-
844
- def testCLIjoinl3(self):
845
- '''Test join_l3 CLI'''
846
- exit_status = os.system('join_l3 -h')
847
- self.assertEqual(exit_status, 0)
848
-
849
- #------------------------------------------------------------------------------
850
-
851
- if __name__ == "__main__":
852
-
853
- # # Test an individual station
854
- # test_station = 'xxx'
855
- # # config_file = '../../../../aws-l0/raw/config/{}.toml'.format(test_station)
856
- # config_file = '../../../../aws-l0/tx/config/{}.toml'.format(test_station)
857
- # # inpath= '../../../../aws-l0/raw/{}/'.format(test_station)
858
- # inpath= '../../../../aws-l0/tx/'
859
- # vari = 'variables.csv'
860
- # pAWS_gc = AWS(config_file, inpath, var_file=vari)
861
- # pAWS_gc.process()
862
- # pAWS_gc.getL1()
863
- # pAWS_gc.getL2()
864
- # pAWS_gc.getL3()
865
-
866
- # # Use test configs
867
- # config_files = ['test/test_config1.toml', 'test/test_config2.toml']
868
- # inpath= 'test/'
869
- # outpath = 'test/'
870
- # vari = 'variables.csv'
871
- # for cf in config_files:
872
- # pAWS_gc = AWS(cf, inpath, var_file=vari)
873
- # pAWS_gc.process()
874
-
875
- unittest.main()