pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/get/get.py +19 -19
- pypromice/postprocess/bufr_to_csv.py +6 -1
- pypromice/postprocess/bufr_utilities.py +91 -18
- pypromice/postprocess/create_bufr_files.py +178 -0
- pypromice/postprocess/get_bufr.py +248 -397
- pypromice/postprocess/make_metadata_csv.py +214 -0
- pypromice/postprocess/real_time_utilities.py +41 -11
- pypromice/process/L0toL1.py +12 -5
- pypromice/process/L1toL2.py +159 -30
- pypromice/process/L2toL3.py +1034 -187
- pypromice/process/aws.py +131 -752
- pypromice/process/get_l2.py +90 -0
- pypromice/process/get_l2tol3.py +111 -0
- pypromice/process/join_l2.py +112 -0
- pypromice/process/join_l3.py +551 -120
- pypromice/process/load.py +161 -0
- pypromice/process/resample.py +128 -0
- pypromice/process/utilities.py +68 -0
- pypromice/process/write.py +503 -0
- pypromice/qc/github_data_issues.py +10 -16
- pypromice/qc/percentiles/thresholds.csv +2 -2
- pypromice/qc/persistence.py +71 -25
- pypromice/resources/__init__.py +28 -0
- pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
- pypromice/resources/variable_aliases_GC-Net.csv +78 -0
- pypromice/resources/variables.csv +106 -0
- pypromice/station_configuration.py +118 -0
- pypromice/tx/get_l0tx.py +7 -4
- pypromice/tx/payload_formats.csv +1 -0
- pypromice/tx/tx.py +27 -6
- pypromice/utilities/__init__.py +0 -0
- pypromice/utilities/git.py +61 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
- pypromice-1.4.0.dist-info/RECORD +53 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
- pypromice-1.4.0.dist-info/entry_points.txt +13 -0
- pypromice/postprocess/station_configurations.toml +0 -762
- pypromice/process/get_l3.py +0 -46
- pypromice/process/variables.csv +0 -92
- pypromice/qc/persistence_test.py +0 -150
- pypromice/test/test_config1.toml +0 -69
- pypromice/test/test_config2.toml +0 -54
- pypromice/test/test_email +0 -75
- pypromice/test/test_payload_formats.csv +0 -4
- pypromice/test/test_payload_types.csv +0 -7
- pypromice/test/test_percentile.py +0 -229
- pypromice/test/test_raw1.txt +0 -4468
- pypromice/test/test_raw_DataTable2.txt +0 -11167
- pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
- pypromice/test/test_raw_transmitted1.txt +0 -15411
- pypromice/test/test_raw_transmitted2.txt +0 -28
- pypromice-1.3.5.dist-info/RECORD +0 -53
- pypromice-1.3.5.dist-info/entry_points.txt +0 -8
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
pypromice/process/aws.py
CHANGED
|
@@ -2,37 +2,43 @@
|
|
|
2
2
|
"""
|
|
3
3
|
AWS data processing module
|
|
4
4
|
"""
|
|
5
|
-
import
|
|
6
|
-
from functools import reduce
|
|
7
|
-
from importlib import metadata
|
|
8
|
-
import os, unittest, toml, datetime, uuid, pkg_resources
|
|
9
|
-
from typing import Sequence, Optional
|
|
10
|
-
|
|
11
|
-
import numpy as np
|
|
5
|
+
import json
|
|
12
6
|
import warnings
|
|
13
7
|
|
|
14
|
-
warnings.simplefilter(action=
|
|
8
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
9
|
+
|
|
10
|
+
import logging, os
|
|
11
|
+
from pathlib import Path
|
|
15
12
|
import pandas as pd
|
|
16
13
|
import xarray as xr
|
|
17
|
-
from
|
|
14
|
+
from functools import reduce
|
|
15
|
+
from importlib import metadata
|
|
18
16
|
|
|
17
|
+
|
|
18
|
+
import pypromice.resources
|
|
19
19
|
from pypromice.process.L0toL1 import toL1
|
|
20
20
|
from pypromice.process.L1toL2 import toL2
|
|
21
21
|
from pypromice.process.L2toL3 import toL3
|
|
22
|
+
from pypromice.process import write, load, utilities
|
|
23
|
+
from pypromice.utilities.git import get_commit_hash_and_check_dirty
|
|
22
24
|
|
|
23
|
-
pd.set_option(
|
|
25
|
+
pd.set_option("display.precision", 2)
|
|
24
26
|
xr.set_options(keep_attrs=True)
|
|
25
|
-
|
|
26
27
|
logger = logging.getLogger(__name__)
|
|
27
28
|
|
|
28
|
-
#------------------------------------------------------------------------------
|
|
29
|
-
|
|
30
29
|
|
|
31
30
|
class AWS(object):
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def __init__(
|
|
35
|
-
|
|
31
|
+
"""AWS object to load and process PROMICE AWS data"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
config_file,
|
|
36
|
+
inpath,
|
|
37
|
+
data_issues_repository: Path | str,
|
|
38
|
+
var_file=None,
|
|
39
|
+
meta_file=None,
|
|
40
|
+
):
|
|
41
|
+
"""Object initialisation
|
|
36
42
|
|
|
37
43
|
Parameters
|
|
38
44
|
----------
|
|
@@ -46,22 +52,45 @@ class AWS(object):
|
|
|
46
52
|
meta_file: str, optional
|
|
47
53
|
Metadata info file path. If not given then pypromice's
|
|
48
54
|
metadata file is used. The default is None.
|
|
49
|
-
|
|
50
|
-
assert
|
|
51
|
-
assert
|
|
52
|
-
logger.info(
|
|
55
|
+
"""
|
|
56
|
+
assert os.path.isfile(config_file), "cannot find " + config_file
|
|
57
|
+
assert os.path.isdir(inpath), "cannot find " + inpath
|
|
58
|
+
logger.info("AWS object initialising...")
|
|
53
59
|
|
|
54
60
|
# Load config, variables CSF standards, and L0 files
|
|
55
61
|
self.config = self.loadConfig(config_file, inpath)
|
|
56
|
-
self.vars =
|
|
57
|
-
self.meta =
|
|
62
|
+
self.vars = pypromice.resources.load_variables(var_file)
|
|
63
|
+
self.meta = pypromice.resources.load_metadata(meta_file)
|
|
64
|
+
self.data_issues_repository = Path(data_issues_repository)
|
|
65
|
+
|
|
66
|
+
config_hash = get_commit_hash_and_check_dirty(Path(config_file))
|
|
67
|
+
config_source_string = f"{Path(config_file).name}:{config_hash}"
|
|
68
|
+
inpath_hash = get_commit_hash_and_check_dirty(Path(inpath))
|
|
69
|
+
data_issues_hash = get_commit_hash_and_check_dirty(self.data_issues_repository)
|
|
70
|
+
source_dict = dict(
|
|
71
|
+
pypromice=metadata.version("pypromice"),
|
|
72
|
+
l0_config_file=config_source_string,
|
|
73
|
+
l0_data_root=inpath_hash,
|
|
74
|
+
data_issues=data_issues_hash,
|
|
75
|
+
)
|
|
76
|
+
self.meta["source"] = json.dumps(source_dict)
|
|
58
77
|
|
|
59
78
|
# Load config file
|
|
60
79
|
L0 = self.loadL0()
|
|
61
|
-
self.L0=[]
|
|
80
|
+
self.L0 = []
|
|
62
81
|
for l in L0:
|
|
63
|
-
n = getColNames(self.vars, l
|
|
64
|
-
self.L0.append(popCols(l, n))
|
|
82
|
+
n = write.getColNames(self.vars, l)
|
|
83
|
+
self.L0.append(utilities.popCols(l, n))
|
|
84
|
+
|
|
85
|
+
formats = {dataset.attrs["format"].lower() for dataset in self.L0}
|
|
86
|
+
if "raw" in formats:
|
|
87
|
+
self.format = "raw"
|
|
88
|
+
elif "STM" in formats:
|
|
89
|
+
self.format = "STM"
|
|
90
|
+
elif "tx" in formats:
|
|
91
|
+
self.format = "tx"
|
|
92
|
+
else:
|
|
93
|
+
raise ValueError(f"Unknown formats from l0 datasets: {','.join(formats)}")
|
|
65
94
|
|
|
66
95
|
self.L1 = None
|
|
67
96
|
self.L1A = None
|
|
@@ -69,120 +98,87 @@ class AWS(object):
|
|
|
69
98
|
self.L3 = None
|
|
70
99
|
|
|
71
100
|
def process(self):
|
|
72
|
-
|
|
101
|
+
"""Perform L0 to L3 data processing"""
|
|
73
102
|
try:
|
|
74
|
-
logger.info(
|
|
103
|
+
logger.info(
|
|
104
|
+
f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
|
|
105
|
+
)
|
|
106
|
+
logger.info(
|
|
107
|
+
f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
|
|
108
|
+
)
|
|
75
109
|
except:
|
|
76
|
-
logger.info(
|
|
110
|
+
logger.info(
|
|
111
|
+
f'Commencing {self.L0[0].attrs["number_of_booms"]}-boom processing...'
|
|
112
|
+
)
|
|
77
113
|
self.getL1()
|
|
78
114
|
self.getL2()
|
|
79
115
|
self.getL3()
|
|
80
116
|
|
|
81
|
-
def
|
|
82
|
-
|
|
117
|
+
def writeL2(self, outpath):
|
|
118
|
+
"""Write L2 data to .csv and .nc file"""
|
|
83
119
|
if os.path.isdir(outpath):
|
|
84
|
-
self.writeArr(outpath)
|
|
120
|
+
self.writeArr(self.L2, outpath)
|
|
85
121
|
else:
|
|
86
|
-
logger.info(f
|
|
122
|
+
logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
def writeL3(self, outpath):
|
|
126
|
+
"""Write L3 data to .csv and .nc file"""
|
|
127
|
+
if os.path.isdir(outpath):
|
|
128
|
+
self.writeArr(self.L3, outpath)
|
|
129
|
+
else:
|
|
130
|
+
logger.info(f"Outpath f{outpath} does not exist. Unable to save to file")
|
|
87
131
|
pass
|
|
88
132
|
|
|
89
133
|
def getL1(self):
|
|
90
|
-
|
|
91
|
-
logger.info(
|
|
92
|
-
self.L0 = [addBasicMeta(item, self.vars) for item in self.L0]
|
|
134
|
+
"""Perform L0 to L1 data processing"""
|
|
135
|
+
logger.info("Level 1 processing...")
|
|
136
|
+
self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0]
|
|
93
137
|
self.L1 = [toL1(item, self.vars) for item in self.L0]
|
|
94
|
-
self.L1A = reduce(xr.Dataset.combine_first, self.L1)
|
|
138
|
+
self.L1A = reduce(xr.Dataset.combine_first, reversed(self.L1))
|
|
139
|
+
self.L1A.attrs["format"] = self.format
|
|
95
140
|
|
|
96
141
|
def getL2(self):
|
|
97
|
-
|
|
98
|
-
logger.info(
|
|
99
|
-
|
|
142
|
+
"""Perform L1 to L2 data processing"""
|
|
143
|
+
logger.info("Level 2 processing...")
|
|
144
|
+
|
|
145
|
+
self.L2 = toL2(
|
|
146
|
+
self.L1A,
|
|
147
|
+
vars_df=self.vars,
|
|
148
|
+
data_flags_dir=self.data_issues_repository / "flags",
|
|
149
|
+
data_adjustments_dir=self.data_issues_repository / "adjustments",
|
|
150
|
+
)
|
|
100
151
|
|
|
101
152
|
def getL3(self):
|
|
102
|
-
|
|
103
|
-
and attribute population
|
|
104
|
-
logger.info(
|
|
105
|
-
self.L3 = toL3(self.L2)
|
|
106
|
-
|
|
107
|
-
# Resample L3 product
|
|
108
|
-
f = [l.attrs['format'] for l in self.L0]
|
|
109
|
-
if 'raw' in f or 'STM' in f:
|
|
110
|
-
logger.info('Resampling to 10 minute')
|
|
111
|
-
self.L3 = resampleL3(self.L3, '10min')
|
|
112
|
-
else:
|
|
113
|
-
self.L3 = resampleL3(self.L3, '60min')
|
|
114
|
-
logger.info('Resampling to hour')
|
|
153
|
+
"""Perform L2 to L3 data processing, including resampling and metadata
|
|
154
|
+
and attribute population"""
|
|
155
|
+
logger.info("Level 3 processing...")
|
|
156
|
+
self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
|
|
115
157
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
self.L3['time'] = list(t)
|
|
119
|
-
|
|
120
|
-
# Switch gps_lon to negative (degrees_east)
|
|
121
|
-
# Do this here, and NOT in addMeta, otherwise we switch back to positive
|
|
122
|
-
# when calling getMeta in joinL3! PJW
|
|
123
|
-
if self.L3.attrs['station_id'] not in ['UWN', 'Roof_GEUS', 'Roof_PROMICE']:
|
|
124
|
-
self.L3['gps_lon'] = self.L3['gps_lon'] * -1
|
|
125
|
-
|
|
126
|
-
# Add variable attributes and metadata
|
|
127
|
-
self.L3 = self.addAttributes(self.L3)
|
|
128
|
-
|
|
129
|
-
# Round all values to specified decimals places
|
|
130
|
-
self.L3 = roundValues(self.L3, self.vars)
|
|
131
|
-
|
|
132
|
-
def addAttributes(self, L3):
|
|
133
|
-
'''Add variable and attribute metadata
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
L3 : xr.Dataset
|
|
138
|
-
Level-3 data object
|
|
139
|
-
|
|
140
|
-
Returns
|
|
141
|
-
-------
|
|
142
|
-
L3 : xr.Dataset
|
|
143
|
-
Level-3 data object with attributes
|
|
144
|
-
'''
|
|
145
|
-
L3 = addVars(L3, self.vars)
|
|
146
|
-
L3 = addMeta(L3, self.meta)
|
|
147
|
-
return L3
|
|
148
|
-
|
|
149
|
-
def writeArr(self, outpath):
|
|
150
|
-
'''Write L3 data to .nc and .csv hourly and daily files
|
|
158
|
+
def writeArr(self, dataset, outpath, t=None):
|
|
159
|
+
"""Write L3 data to .nc and .csv hourly and daily files
|
|
151
160
|
|
|
152
161
|
Parameters
|
|
153
162
|
----------
|
|
163
|
+
dataset : xarray.Dataset
|
|
164
|
+
Dataset to write to file
|
|
154
165
|
outpath : str
|
|
155
166
|
Output directory
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
if not
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
col_names = getColNames(
|
|
164
|
-
self.vars,
|
|
165
|
-
self.L3.attrs['number_of_booms'],
|
|
166
|
-
self.L3.attrs['format'],
|
|
167
|
-
self.L3.attrs['bedrock'],
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
t = int(pd.Timedelta((self.L3['time'][1] - self.L3['time'][0]).values).total_seconds())
|
|
171
|
-
logger.info('Writing to files...')
|
|
172
|
-
if t == 600:
|
|
173
|
-
out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.csv')
|
|
174
|
-
out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.nc')
|
|
167
|
+
t : str
|
|
168
|
+
Resampling string. This is automatically defined based
|
|
169
|
+
on the data type if not given. The default is None.
|
|
170
|
+
"""
|
|
171
|
+
if t is not None:
|
|
172
|
+
write.prepare_and_write(dataset, outpath, self.vars, self.meta, t)
|
|
175
173
|
else:
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
logger.info(f'Written to {out_csv}')
|
|
182
|
-
logger.info(f'Written to {out_nc}')
|
|
174
|
+
f = [l.attrs["format"] for l in self.L0]
|
|
175
|
+
if "raw" in f or "STM" in f:
|
|
176
|
+
write.prepare_and_write(dataset, outpath, self.vars, self.meta, "10min")
|
|
177
|
+
else:
|
|
178
|
+
write.prepare_and_write(dataset, outpath, self.vars, self.meta, "60min")
|
|
183
179
|
|
|
184
180
|
def loadConfig(self, config_file, inpath):
|
|
185
|
-
|
|
181
|
+
"""Load configuration from .toml file
|
|
186
182
|
|
|
187
183
|
Parameters
|
|
188
184
|
----------
|
|
@@ -195,12 +191,12 @@ class AWS(object):
|
|
|
195
191
|
-------
|
|
196
192
|
conf : dict
|
|
197
193
|
Configuration parameters
|
|
198
|
-
|
|
199
|
-
conf = getConfig(config_file, inpath)
|
|
194
|
+
"""
|
|
195
|
+
conf = load.getConfig(config_file, inpath)
|
|
200
196
|
return conf
|
|
201
197
|
|
|
202
198
|
def loadL0(self):
|
|
203
|
-
|
|
199
|
+
"""Load level 0 (L0) data from associated TOML-formatted
|
|
204
200
|
config file and L0 data file
|
|
205
201
|
|
|
206
202
|
Try readL0file() using the config with msg_lat & msg_lon appended. The
|
|
@@ -215,7 +211,7 @@ class AWS(object):
|
|
|
215
211
|
-------
|
|
216
212
|
ds_list : list
|
|
217
213
|
List of L0 xr.Dataset objects
|
|
218
|
-
|
|
214
|
+
"""
|
|
219
215
|
ds_list = []
|
|
220
216
|
for k in self.config.keys():
|
|
221
217
|
target = self.config[k]
|
|
@@ -224,15 +220,15 @@ class AWS(object):
|
|
|
224
220
|
|
|
225
221
|
except pd.errors.ParserError as e:
|
|
226
222
|
# ParserError: Too many columns specified: expected 40 and found 38
|
|
227
|
-
logger.info(f'-----> No msg_lat or msg_lon for {k}')
|
|
228
|
-
for item in [
|
|
229
|
-
target[
|
|
223
|
+
# logger.info(f'-----> No msg_lat or msg_lon for {k}')
|
|
224
|
+
for item in ["msg_lat", "msg_lon"]:
|
|
225
|
+
target["columns"].remove(item) # Also removes from self.config
|
|
230
226
|
ds_list.append(self.readL0file(target))
|
|
231
|
-
logger.info(f
|
|
227
|
+
logger.info(f"L0 data successfully loaded from {k}")
|
|
232
228
|
return ds_list
|
|
233
229
|
|
|
234
230
|
def readL0file(self, conf):
|
|
235
|
-
|
|
231
|
+
"""Read L0 .txt file to Dataset object using config dictionary and
|
|
236
232
|
populate with initial metadata
|
|
237
233
|
|
|
238
234
|
Parameters
|
|
@@ -244,632 +240,15 @@ class AWS(object):
|
|
|
244
240
|
-------
|
|
245
241
|
ds : xr.Dataset
|
|
246
242
|
L0 data
|
|
247
|
-
|
|
248
|
-
file_version = conf.get(
|
|
249
|
-
ds = getL0(
|
|
250
|
-
|
|
251
|
-
|
|
243
|
+
"""
|
|
244
|
+
file_version = conf.get("file_version", -1)
|
|
245
|
+
ds = load.getL0(
|
|
246
|
+
conf["file"],
|
|
247
|
+
conf["nodata"],
|
|
248
|
+
conf["columns"],
|
|
249
|
+
conf["skiprows"],
|
|
250
|
+
file_version,
|
|
251
|
+
time_offset=conf.get("time_offset"),
|
|
252
|
+
)
|
|
253
|
+
ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
|
|
252
254
|
return ds
|
|
253
|
-
|
|
254
|
-
#------------------------------------------------------------------------------
|
|
255
|
-
|
|
256
|
-
def getConfig(config_file, inpath, default_columns: Sequence[str] = ('msg_lat', 'msg_lon')):
|
|
257
|
-
'''Load configuration from .toml file. PROMICE .toml files support defining
|
|
258
|
-
features at the top level which apply to all nested properties, but do not
|
|
259
|
-
overwrite nested properties if they are defined
|
|
260
|
-
|
|
261
|
-
Parameters
|
|
262
|
-
----------
|
|
263
|
-
config_file : str
|
|
264
|
-
TOML file path
|
|
265
|
-
inpath : str
|
|
266
|
-
Input folder directory where L0 files can be found
|
|
267
|
-
|
|
268
|
-
Returns
|
|
269
|
-
-------
|
|
270
|
-
conf : dict
|
|
271
|
-
Configuration dictionary
|
|
272
|
-
'''
|
|
273
|
-
conf = toml.load(config_file) # Move all top level keys to nested properties,
|
|
274
|
-
top = [_ for _ in conf.keys() if not type(conf[_]) is dict] # if they are not already defined in the nested properties
|
|
275
|
-
subs = [_ for _ in conf.keys() if type(conf[_]) is dict] # Insert the section name (config_file) as a file property and config file
|
|
276
|
-
for s in subs:
|
|
277
|
-
for t in top:
|
|
278
|
-
if t not in conf[s].keys():
|
|
279
|
-
conf[s][t] = conf[t]
|
|
280
|
-
|
|
281
|
-
conf[s]['conf'] = config_file
|
|
282
|
-
conf[s]['file'] = os.path.join(inpath, s)
|
|
283
|
-
conf[s]["columns"].extend(default_columns)
|
|
284
|
-
|
|
285
|
-
for t in top: conf.pop(t) # Delete all top level keys beause each file
|
|
286
|
-
# should carry all properties with it
|
|
287
|
-
for k in conf.keys(): # Check required fields are present
|
|
288
|
-
for field in ["columns", "station_id", "format", "skiprows"]:
|
|
289
|
-
assert(field in conf[k].keys()), field+" not in config keys"
|
|
290
|
-
return conf
|
|
291
|
-
|
|
292
|
-
def getL0(infile, nodata, cols, skiprows, file_version,
|
|
293
|
-
delimiter=',', comment='#', time_offset: Optional[float] = None) -> xr.Dataset:
|
|
294
|
-
''' Read L0 data file into pandas DataFrame object
|
|
295
|
-
|
|
296
|
-
Parameters
|
|
297
|
-
----------
|
|
298
|
-
infile : str
|
|
299
|
-
L0 file path
|
|
300
|
-
nodata : list
|
|
301
|
-
List containing value for nan values and reassigned value
|
|
302
|
-
cols : list
|
|
303
|
-
List of columns in file
|
|
304
|
-
skiprows : int
|
|
305
|
-
Skip rows value
|
|
306
|
-
file_version : int
|
|
307
|
-
Version of L0 file
|
|
308
|
-
delimiter : str
|
|
309
|
-
String delimiter for L0 file
|
|
310
|
-
comment : str
|
|
311
|
-
Notifier of commented sections in L0 file
|
|
312
|
-
time_offset : Optional[float]
|
|
313
|
-
Time offset in hours for correcting for non utc time data.
|
|
314
|
-
Returns
|
|
315
|
-
-------
|
|
316
|
-
ds : xarray.Dataset
|
|
317
|
-
L0 Dataset
|
|
318
|
-
'''
|
|
319
|
-
if file_version == 1:
|
|
320
|
-
df = pd.read_csv(infile, comment=comment, index_col=0,
|
|
321
|
-
na_values=nodata, names=cols,
|
|
322
|
-
sep=delimiter,
|
|
323
|
-
skiprows=skiprows, skip_blank_lines=True,
|
|
324
|
-
usecols=range(len(cols)),
|
|
325
|
-
low_memory=False)
|
|
326
|
-
df['time'] = pd.to_datetime(
|
|
327
|
-
df.year.astype(str) \
|
|
328
|
-
+ df.doy.astype(str).str.zfill(3) \
|
|
329
|
-
+ df.hhmm.astype(str).str.zfill(4),
|
|
330
|
-
format='%Y%j%H%M'
|
|
331
|
-
)
|
|
332
|
-
df = df.set_index('time')
|
|
333
|
-
|
|
334
|
-
else:
|
|
335
|
-
df = pd.read_csv(infile, comment=comment, index_col=0,
|
|
336
|
-
na_values=nodata, names=cols, parse_dates=True,
|
|
337
|
-
sep=delimiter, skiprows=skiprows,
|
|
338
|
-
skip_blank_lines=True,
|
|
339
|
-
usecols=range(len(cols)),
|
|
340
|
-
low_memory=False)
|
|
341
|
-
try:
|
|
342
|
-
df.index = pd.to_datetime(df.index)
|
|
343
|
-
except ValueError as e:
|
|
344
|
-
logger.info("\n", infile)
|
|
345
|
-
logger.info("\nValueError:")
|
|
346
|
-
logger.info(e)
|
|
347
|
-
logger.info('\t\t> Trying pd.to_datetime with format=mixed')
|
|
348
|
-
try:
|
|
349
|
-
df.index = pd.to_datetime(df.index, format='mixed')
|
|
350
|
-
except Exception as e:
|
|
351
|
-
logger.info("\nDateParseError:")
|
|
352
|
-
logger.info(e)
|
|
353
|
-
logger.info('\t\t> Trying again removing apostrophes in timestamp (old files format)')
|
|
354
|
-
df.index = pd.to_datetime(df.index.str.replace("\"",""))
|
|
355
|
-
|
|
356
|
-
if time_offset is not None:
|
|
357
|
-
df.index = df.index + timedelta(hours=time_offset)
|
|
358
|
-
|
|
359
|
-
# Drop SKIP columns
|
|
360
|
-
for c in df.columns:
|
|
361
|
-
if c[0:4] == 'SKIP':
|
|
362
|
-
df.drop(columns=c, inplace=True)
|
|
363
|
-
|
|
364
|
-
# Carry relevant metadata with ds
|
|
365
|
-
ds = xr.Dataset.from_dataframe(df)
|
|
366
|
-
return ds
|
|
367
|
-
|
|
368
|
-
def addBasicMeta(ds, vars_df):
|
|
369
|
-
''' Use a variable lookup table DataFrame to add the basic metadata
|
|
370
|
-
to the xarray dataset. This is later amended to finalise L3
|
|
371
|
-
|
|
372
|
-
Parameters
|
|
373
|
-
----------
|
|
374
|
-
ds : xr.Dataset
|
|
375
|
-
Dataset to add metadata to
|
|
376
|
-
vars_df : pd.DataFrame
|
|
377
|
-
Metadata dataframe
|
|
378
|
-
|
|
379
|
-
Returns
|
|
380
|
-
-------
|
|
381
|
-
ds : xr.Dataset
|
|
382
|
-
Dataset with added metadata
|
|
383
|
-
'''
|
|
384
|
-
for v in vars_df.index:
|
|
385
|
-
if v == 'time': continue # coordinate variable, not normal var
|
|
386
|
-
if v not in list(ds.variables): continue
|
|
387
|
-
for c in ['standard_name', 'long_name', 'units']:
|
|
388
|
-
if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue
|
|
389
|
-
ds[v].attrs[c] = vars_df[c][v]
|
|
390
|
-
return ds
|
|
391
|
-
|
|
392
|
-
def populateMeta(ds, conf, skip):
|
|
393
|
-
'''Populate L0 Dataset with metadata dictionary
|
|
394
|
-
|
|
395
|
-
Parameters
|
|
396
|
-
----------
|
|
397
|
-
ds : xarray.Dataset
|
|
398
|
-
L0 dataset
|
|
399
|
-
conf : dict
|
|
400
|
-
Metadata dictionary
|
|
401
|
-
skip : list
|
|
402
|
-
List of column names to skip parsing to metadata
|
|
403
|
-
|
|
404
|
-
Returns
|
|
405
|
-
-------
|
|
406
|
-
ds : xarray.Dataset
|
|
407
|
-
L0 dataset with metadata populated as Dataset attributes
|
|
408
|
-
'''
|
|
409
|
-
meta = {}
|
|
410
|
-
# skip = ["columns", "skiprows"]
|
|
411
|
-
for k in conf.keys():
|
|
412
|
-
if k not in skip: meta[k] = conf[k]
|
|
413
|
-
ds.attrs = meta
|
|
414
|
-
return ds
|
|
415
|
-
|
|
416
|
-
def writeCSV(outfile, Lx, csv_order):
|
|
417
|
-
'''Write data product to CSV file
|
|
418
|
-
|
|
419
|
-
Parameters
|
|
420
|
-
----------
|
|
421
|
-
outfile : str
|
|
422
|
-
Output file path
|
|
423
|
-
Lx : xr.Dataset
|
|
424
|
-
Dataset to write to file
|
|
425
|
-
csv_order : list
|
|
426
|
-
List order of variables
|
|
427
|
-
'''
|
|
428
|
-
Lcsv = Lx.to_dataframe().dropna(how='all')
|
|
429
|
-
if csv_order is not None:
|
|
430
|
-
names = [c for c in csv_order if c in list(Lcsv.columns)]
|
|
431
|
-
Lcsv = Lcsv[names]
|
|
432
|
-
Lcsv.to_csv(outfile)
|
|
433
|
-
|
|
434
|
-
def writeNC(outfile, Lx, col_names=None):
|
|
435
|
-
'''Write data product to NetCDF file
|
|
436
|
-
|
|
437
|
-
Parameters
|
|
438
|
-
----------
|
|
439
|
-
outfile : str
|
|
440
|
-
Output file path
|
|
441
|
-
Lx : xr.Dataset
|
|
442
|
-
Dataset to write to file
|
|
443
|
-
'''
|
|
444
|
-
if os.path.isfile(outfile):
|
|
445
|
-
os.remove(outfile)
|
|
446
|
-
if col_names is not None:
|
|
447
|
-
names = [c for c in col_names if c in list(Lx.keys())]
|
|
448
|
-
else:
|
|
449
|
-
names = list(Lx.keys())
|
|
450
|
-
Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True)
|
|
451
|
-
|
|
452
|
-
def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None):
|
|
453
|
-
'''Write L3 hourly, daily and monthly datasets to .nc and .csv
|
|
454
|
-
files
|
|
455
|
-
|
|
456
|
-
outpath : str
|
|
457
|
-
Output file path
|
|
458
|
-
station_id : str
|
|
459
|
-
Station name
|
|
460
|
-
l3_h : xr.Dataset
|
|
461
|
-
L3 hourly data
|
|
462
|
-
l3_d : xr.Dataset
|
|
463
|
-
L3 daily data
|
|
464
|
-
l3_m : xr.Dataset
|
|
465
|
-
L3 monthly data
|
|
466
|
-
csv_order : list, optional
|
|
467
|
-
List order of variables
|
|
468
|
-
'''
|
|
469
|
-
if not os.path.isdir(outpath):
|
|
470
|
-
os.mkdir(outpath)
|
|
471
|
-
outfile_h = os.path.join(outpath, station_id + '_hour')
|
|
472
|
-
outfile_d = os.path.join(outpath, station_id + '_day')
|
|
473
|
-
outfile_m = os.path.join(outpath, station_id + '_month')
|
|
474
|
-
for o,l in zip([outfile_h, outfile_d, outfile_m], [l3_h ,l3_d, l3_m]):
|
|
475
|
-
writeCSV(o+'.csv',l, csv_order)
|
|
476
|
-
writeNC(o+'.nc',l)
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
def popCols(ds, names):
|
|
480
|
-
'''Populate dataset with all given variable names
|
|
481
|
-
|
|
482
|
-
Parammeters
|
|
483
|
-
-----------
|
|
484
|
-
ds : xr.Dataset
|
|
485
|
-
Dataset
|
|
486
|
-
names : list
|
|
487
|
-
List of variable names to populate
|
|
488
|
-
'''
|
|
489
|
-
for v in names:
|
|
490
|
-
if v not in list(ds.variables):
|
|
491
|
-
ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
|
|
492
|
-
return ds
|
|
493
|
-
|
|
494
|
-
def getColNames(vars_df, booms=None, data_type=None, bedrock=False):
|
|
495
|
-
'''Get all variable names for a given data type, based on a variables
|
|
496
|
-
look-up table
|
|
497
|
-
|
|
498
|
-
Parameters
|
|
499
|
-
----------
|
|
500
|
-
vars_df : pd.DataFrame
|
|
501
|
-
Variables look-up table
|
|
502
|
-
booms : int, optional
|
|
503
|
-
Number of booms. If this parameter is empty then all variables
|
|
504
|
-
regardless of boom type will be passed. The default is None.
|
|
505
|
-
data_type : str, optional
|
|
506
|
-
Data type, "tx", "STM" or "raw". If this parameter is empty then all
|
|
507
|
-
variables regardless of data type will be passed. The default is None.
|
|
508
|
-
|
|
509
|
-
Returns
|
|
510
|
-
-------
|
|
511
|
-
list
|
|
512
|
-
Variable names
|
|
513
|
-
'''
|
|
514
|
-
if booms==1:
|
|
515
|
-
vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])]
|
|
516
|
-
elif booms==2:
|
|
517
|
-
vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])]
|
|
518
|
-
|
|
519
|
-
if data_type=='TX':
|
|
520
|
-
vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])]
|
|
521
|
-
elif data_type=='STM' or data_type=='raw':
|
|
522
|
-
vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])]
|
|
523
|
-
|
|
524
|
-
col_names = list(vars_df.index)
|
|
525
|
-
if isinstance(bedrock, str):
|
|
526
|
-
bedrock = (bedrock.lower() == 'true')
|
|
527
|
-
if bedrock == True:
|
|
528
|
-
col_names.remove('cc')
|
|
529
|
-
for v in ['dlhf_u', 'dlhf_l', 'dshf_u', 'dshf_l']:
|
|
530
|
-
try:
|
|
531
|
-
col_names.remove(v)
|
|
532
|
-
except:
|
|
533
|
-
pass
|
|
534
|
-
return col_names
|
|
535
|
-
|
|
536
|
-
def roundValues(ds, df, col='max_decimals'):
|
|
537
|
-
'''Round all variable values in data array based on pre-defined rounding
|
|
538
|
-
value in variables look-up table DataFrame
|
|
539
|
-
|
|
540
|
-
Parameters
|
|
541
|
-
----------
|
|
542
|
-
ds : xr.Dataset
|
|
543
|
-
Dataset to round values in
|
|
544
|
-
df : pd.Dataframe
|
|
545
|
-
Variable look-up table with rounding values
|
|
546
|
-
col : str
|
|
547
|
-
Column in variable look-up table that contains rounding values. The
|
|
548
|
-
default is "max_decimals"
|
|
549
|
-
'''
|
|
550
|
-
df = df[col]
|
|
551
|
-
df = df.dropna(how='all')
|
|
552
|
-
for var in df.index:
|
|
553
|
-
if var not in list(ds.variables):
|
|
554
|
-
continue
|
|
555
|
-
if df[var] is not np.nan:
|
|
556
|
-
ds[var] = ds[var].round(decimals=int(df[var]))
|
|
557
|
-
return ds
|
|
558
|
-
|
|
559
|
-
def addVars(ds, variables):
|
|
560
|
-
'''Add variable attributes from file to dataset
|
|
561
|
-
|
|
562
|
-
Parameters
|
|
563
|
-
----------
|
|
564
|
-
ds : xarray.Dataset
|
|
565
|
-
Dataset to add variable attributes to
|
|
566
|
-
variables : pandas.DataFrame
|
|
567
|
-
Variables lookup table file
|
|
568
|
-
|
|
569
|
-
Returns
|
|
570
|
-
-------
|
|
571
|
-
ds : xarray.Dataset
|
|
572
|
-
Dataset with metadata
|
|
573
|
-
'''
|
|
574
|
-
for k in ds.keys():
|
|
575
|
-
if k not in variables.index: continue
|
|
576
|
-
ds[k].attrs['standard_name'] = variables.loc[k]['standard_name']
|
|
577
|
-
ds[k].attrs['long_name'] = variables.loc[k]['long_name']
|
|
578
|
-
ds[k].attrs['units'] = variables.loc[k]['units']
|
|
579
|
-
ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type']
|
|
580
|
-
ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']
|
|
581
|
-
return ds
|
|
582
|
-
|
|
583
|
-
def addMeta(ds, meta):
|
|
584
|
-
'''Add metadata attributes from file to dataset
|
|
585
|
-
|
|
586
|
-
Parameters
|
|
587
|
-
----------
|
|
588
|
-
ds : xarray.Dataset
|
|
589
|
-
Dataset to add metadata attributes to
|
|
590
|
-
meta : dict
|
|
591
|
-
Metadata file
|
|
592
|
-
|
|
593
|
-
Returns
|
|
594
|
-
-------
|
|
595
|
-
ds : xarray.Dataset
|
|
596
|
-
Dataset with metadata
|
|
597
|
-
'''
|
|
598
|
-
ds['lon'] = ds['gps_lon'].mean()
|
|
599
|
-
ds['lon'].attrs = ds['gps_lon'].attrs
|
|
600
|
-
|
|
601
|
-
ds['lat'] = ds['gps_lat'].mean()
|
|
602
|
-
ds['lat'].attrs = ds['gps_lat'].attrs
|
|
603
|
-
|
|
604
|
-
ds['alt'] = ds['gps_alt'].mean()
|
|
605
|
-
ds['alt'].attrs = ds['gps_alt'].attrs
|
|
606
|
-
|
|
607
|
-
# for k in ds.keys(): # for each var
|
|
608
|
-
# if 'units' in ds[k].attrs:
|
|
609
|
-
# if ds[k].attrs['units'] == 'C':
|
|
610
|
-
# ds[k].attrs['units'] = 'degrees_C'
|
|
611
|
-
|
|
612
|
-
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds
|
|
613
|
-
ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['station_id']))
|
|
614
|
-
ds.attrs['history'] = 'Generated on ' + datetime.datetime.utcnow().isoformat()
|
|
615
|
-
ds.attrs['date_created'] = str(datetime.datetime.now().isoformat())
|
|
616
|
-
ds.attrs['date_modified'] = ds.attrs['date_created']
|
|
617
|
-
ds.attrs['date_issued'] = ds.attrs['date_created']
|
|
618
|
-
ds.attrs['date_metadata_modified'] = ds.attrs['date_created']
|
|
619
|
-
|
|
620
|
-
ds.attrs['geospatial_bounds'] = "POLYGON((" + \
|
|
621
|
-
f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
|
|
622
|
-
f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
|
|
623
|
-
f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
|
|
624
|
-
f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
|
|
625
|
-
f"{ds['lat'].min().values} {ds['lon'].min().values}))"
|
|
626
|
-
|
|
627
|
-
ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
|
|
628
|
-
ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
|
|
629
|
-
ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
|
|
630
|
-
ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
|
|
631
|
-
ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
|
|
632
|
-
ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
|
|
633
|
-
ds.attrs['geospatial_vertical_positive'] = 'up'
|
|
634
|
-
ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
|
|
635
|
-
ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
|
|
636
|
-
|
|
637
|
-
try:
|
|
638
|
-
ds.attrs['source']= 'pypromice v' + str(metadata.version('pypromice'))
|
|
639
|
-
except:
|
|
640
|
-
ds.attrs['source'] = 'pypromice'
|
|
641
|
-
|
|
642
|
-
# https://www.digi.com/resources/documentation/digidocs/90001437-13/reference/r_iso_8601_duration_format.htm
|
|
643
|
-
try:
|
|
644
|
-
ds.attrs['time_coverage_duration'] = str(pd.Timedelta((ds['time'][-1] - ds['time'][0]).values).isoformat())
|
|
645
|
-
ds.attrs['time_coverage_resolution'] = str(pd.Timedelta((ds['time'][1] - ds['time'][0]).values).isoformat())
|
|
646
|
-
except:
|
|
647
|
-
ds.attrs['time_coverage_duration'] = str(pd.Timedelta(0).isoformat())
|
|
648
|
-
ds.attrs['time_coverage_resolution'] = str(pd.Timedelta(0).isoformat())
|
|
649
|
-
|
|
650
|
-
# Note: int64 dtype (long int) is incompatible with OPeNDAP access via THREDDS for NetCDF files
|
|
651
|
-
# See https://stackoverflow.com/questions/48895227/output-int32-time-dimension-in-netcdf-using-xarray
|
|
652
|
-
ds.time.encoding["dtype"] = "i4" # 32-bit signed integer
|
|
653
|
-
#ds.time.encoding["calendar"] = 'proleptic_gregorian' # this is default
|
|
654
|
-
|
|
655
|
-
# Load metadata attributes and add to Dataset
|
|
656
|
-
[_addAttr(ds, key, value) for key,value in meta.items()]
|
|
657
|
-
|
|
658
|
-
# Check attribute formating
|
|
659
|
-
for k,v in ds.attrs.items():
|
|
660
|
-
if not isinstance(v, str) or not isinstance(v, int):
|
|
661
|
-
ds.attrs[k]=str(v)
|
|
662
|
-
return ds
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
def getVars(v_file=None):
|
|
666
|
-
'''Load variables.csv file
|
|
667
|
-
|
|
668
|
-
Parameters
|
|
669
|
-
----------
|
|
670
|
-
v_file : str
|
|
671
|
-
Variable lookup table file path
|
|
672
|
-
|
|
673
|
-
Returns
|
|
674
|
-
-------
|
|
675
|
-
pandas.DataFrame
|
|
676
|
-
Variables dataframe
|
|
677
|
-
'''
|
|
678
|
-
if v_file is None:
|
|
679
|
-
with pkg_resources.resource_stream('pypromice', 'process/variables.csv') as stream:
|
|
680
|
-
return pd.read_csv(stream, index_col=0, comment="#", encoding='utf-8')
|
|
681
|
-
else:
|
|
682
|
-
return pd.read_csv(v_file, index_col=0, comment="#")
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
def getMeta(m_file=None, delimiter=','): #TODO change to DataFrame output to match variables.csv
|
|
686
|
-
'''Load metadata table
|
|
687
|
-
|
|
688
|
-
Parameters
|
|
689
|
-
----------
|
|
690
|
-
m_file : str
|
|
691
|
-
Metadata file path
|
|
692
|
-
delimiter : str
|
|
693
|
-
Metadata character delimiter. The default is ","
|
|
694
|
-
|
|
695
|
-
Returns
|
|
696
|
-
-------
|
|
697
|
-
meta : dict
|
|
698
|
-
Metadata dictionary
|
|
699
|
-
'''
|
|
700
|
-
meta={}
|
|
701
|
-
if m_file is None:
|
|
702
|
-
with pkg_resources.resource_stream('pypromice', 'process/metadata.csv') as stream:
|
|
703
|
-
lines = stream.read().decode("utf-8")
|
|
704
|
-
lines = lines.split("\n")
|
|
705
|
-
else:
|
|
706
|
-
with open(m_file, 'r') as f:
|
|
707
|
-
lines = f.readlines()
|
|
708
|
-
for l in lines[1:]:
|
|
709
|
-
try:
|
|
710
|
-
meta[l.split(',')[0]] = l.split(delimiter)[1].split('\n')[0].replace(';',',')
|
|
711
|
-
except IndexError:
|
|
712
|
-
pass
|
|
713
|
-
return meta
|
|
714
|
-
|
|
715
|
-
def resampleL3(ds_h, t):
|
|
716
|
-
'''Resample L3 AWS data, e.g. hourly to daily average. This uses pandas
|
|
717
|
-
DataFrame resampling at the moment as a work-around to the xarray Dataset
|
|
718
|
-
resampling. As stated, xarray resampling is a lengthy process that takes
|
|
719
|
-
~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean()
|
|
720
|
-
This has now been fixed, so needs implementing:
|
|
721
|
-
https://github.com/pydata/xarray/issues/4498#event-6610799698
|
|
722
|
-
|
|
723
|
-
Parameters
|
|
724
|
-
----------
|
|
725
|
-
ds_h : xarray.Dataset
|
|
726
|
-
L3 AWS daily dataset
|
|
727
|
-
t : str
|
|
728
|
-
Resample factor, same variable definition as in
|
|
729
|
-
pandas.DataFrame.resample()
|
|
730
|
-
|
|
731
|
-
Returns
|
|
732
|
-
-------
|
|
733
|
-
ds_d : xarray.Dataset
|
|
734
|
-
L3 AWS hourly dataset
|
|
735
|
-
'''
|
|
736
|
-
df_d = ds_h.to_dataframe().resample(t).mean()
|
|
737
|
-
# recalculating wind direction from averaged directional wind speeds
|
|
738
|
-
for var in ['wdir_u','wdir_l','wdir_i']:
|
|
739
|
-
if var in df_d.columns:
|
|
740
|
-
if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_x_'+var.split('_')[1] in df_d.columns):
|
|
741
|
-
df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]],
|
|
742
|
-
df_d['wspd_y_'+var.split('_')[1]])
|
|
743
|
-
else:
|
|
744
|
-
logger.info(var,'in dataframe but not','wspd_x_'+var.split('_')[1],'wspd_x_'+var.split('_')[1])
|
|
745
|
-
vals = [xr.DataArray(data=df_d[c], dims=['time'],
|
|
746
|
-
coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns]
|
|
747
|
-
ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
|
|
748
|
-
return ds_d
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
def _calcWindDir(wspd_x, wspd_y):
|
|
752
|
-
'''Calculate wind direction in degrees
|
|
753
|
-
|
|
754
|
-
Parameters
|
|
755
|
-
----------
|
|
756
|
-
wspd_x : xarray.DataArray
|
|
757
|
-
Wind speed in X direction
|
|
758
|
-
wspd_y : xarray.DataArray
|
|
759
|
-
Wind speed in Y direction
|
|
760
|
-
|
|
761
|
-
Returns
|
|
762
|
-
-------
|
|
763
|
-
wdir : xarray.DataArray
|
|
764
|
-
Wind direction'''
|
|
765
|
-
deg2rad = np.pi / 180
|
|
766
|
-
rad2deg = 1 / deg2rad
|
|
767
|
-
wdir = np.arctan2(wspd_x, wspd_y) * rad2deg
|
|
768
|
-
wdir = (wdir + 360) % 360
|
|
769
|
-
return wdir
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
def _addAttr(ds, key, value):
|
|
773
|
-
'''Add attribute to xarray dataset
|
|
774
|
-
|
|
775
|
-
ds : xr.Dataset
|
|
776
|
-
Dataset to add attribute to
|
|
777
|
-
key : str
|
|
778
|
-
Attribute name, with "." denoting variable attributes
|
|
779
|
-
value : str/int
|
|
780
|
-
Value for attribute'''
|
|
781
|
-
if len(key.split('.')) == 2:
|
|
782
|
-
try:
|
|
783
|
-
ds[key.split('.')[0]].attrs[key.split('.')[1]] = str(value)
|
|
784
|
-
except:
|
|
785
|
-
pass
|
|
786
|
-
# logger.info(f'Unable to add metadata to {key.split(".")[0]}')
|
|
787
|
-
else:
|
|
788
|
-
ds.attrs[key] = value
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
#------------------------------------------------------------------------------
|
|
792
|
-
|
|
793
|
-
class TestProcess(unittest.TestCase):
|
|
794
|
-
|
|
795
|
-
def testgetVars(self):
|
|
796
|
-
'''Test variable table lookup retrieval'''
|
|
797
|
-
v = getVars()
|
|
798
|
-
self.assertIsInstance(v, pd.DataFrame)
|
|
799
|
-
self.assertTrue(v.columns[0] in 'standard_name')
|
|
800
|
-
self.assertTrue(v.columns[2] in 'units')
|
|
801
|
-
|
|
802
|
-
def testgetMeta(self):
|
|
803
|
-
'''Test AWS names retrieval'''
|
|
804
|
-
m = getMeta()
|
|
805
|
-
self.assertIsInstance(m, dict)
|
|
806
|
-
self.assertTrue('references' in m)
|
|
807
|
-
|
|
808
|
-
def testAddAll(self):
|
|
809
|
-
'''Test variable and metadata attributes added to Dataset'''
|
|
810
|
-
d = xr.Dataset()
|
|
811
|
-
v = getVars()
|
|
812
|
-
att = list(v.index)
|
|
813
|
-
att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p']
|
|
814
|
-
for a in att:
|
|
815
|
-
d[a]=[0,1]
|
|
816
|
-
for a in att1:
|
|
817
|
-
d[a]=[0,1]
|
|
818
|
-
d['time'] = [datetime.datetime.now(),
|
|
819
|
-
datetime.datetime.now()-timedelta(days=365)]
|
|
820
|
-
d.attrs['station_id']='TEST'
|
|
821
|
-
meta = getMeta()
|
|
822
|
-
d = addVars(d, v)
|
|
823
|
-
d = addMeta(d, meta)
|
|
824
|
-
self.assertTrue(d.attrs['station_id']=='TEST')
|
|
825
|
-
self.assertIsInstance(d.attrs['references'], str)
|
|
826
|
-
|
|
827
|
-
def testL0toL3(self):
|
|
828
|
-
'''Test L0 to L3 processing'''
|
|
829
|
-
try:
|
|
830
|
-
import pypromice
|
|
831
|
-
pAWS = AWS(os.path.join(os.path.dirname(pypromice.__file__),'test/test_config1.toml'),
|
|
832
|
-
os.path.join(os.path.dirname(pypromice.__file__),'test'))
|
|
833
|
-
except:
|
|
834
|
-
pAWS = AWS('../test/test_config1.toml', '../test/')
|
|
835
|
-
pAWS.process()
|
|
836
|
-
self.assertIsInstance(pAWS.L3, xr.Dataset)
|
|
837
|
-
self.assertTrue(pAWS.L3.attrs['station_id']=='TEST1')
|
|
838
|
-
|
|
839
|
-
def testCLIgetl3(self):
|
|
840
|
-
'''Test get_l3 CLI'''
|
|
841
|
-
exit_status = os.system('get_l3 -h')
|
|
842
|
-
self.assertEqual(exit_status, 0)
|
|
843
|
-
|
|
844
|
-
def testCLIjoinl3(self):
|
|
845
|
-
'''Test join_l3 CLI'''
|
|
846
|
-
exit_status = os.system('join_l3 -h')
|
|
847
|
-
self.assertEqual(exit_status, 0)
|
|
848
|
-
|
|
849
|
-
#------------------------------------------------------------------------------
|
|
850
|
-
|
|
851
|
-
if __name__ == "__main__":
|
|
852
|
-
|
|
853
|
-
# # Test an individual station
|
|
854
|
-
# test_station = 'xxx'
|
|
855
|
-
# # config_file = '../../../../aws-l0/raw/config/{}.toml'.format(test_station)
|
|
856
|
-
# config_file = '../../../../aws-l0/tx/config/{}.toml'.format(test_station)
|
|
857
|
-
# # inpath= '../../../../aws-l0/raw/{}/'.format(test_station)
|
|
858
|
-
# inpath= '../../../../aws-l0/tx/'
|
|
859
|
-
# vari = 'variables.csv'
|
|
860
|
-
# pAWS_gc = AWS(config_file, inpath, var_file=vari)
|
|
861
|
-
# pAWS_gc.process()
|
|
862
|
-
# pAWS_gc.getL1()
|
|
863
|
-
# pAWS_gc.getL2()
|
|
864
|
-
# pAWS_gc.getL3()
|
|
865
|
-
|
|
866
|
-
# # Use test configs
|
|
867
|
-
# config_files = ['test/test_config1.toml', 'test/test_config2.toml']
|
|
868
|
-
# inpath= 'test/'
|
|
869
|
-
# outpath = 'test/'
|
|
870
|
-
# vari = 'variables.csv'
|
|
871
|
-
# for cf in config_files:
|
|
872
|
-
# pAWS_gc = AWS(cf, inpath, var_file=vari)
|
|
873
|
-
# pAWS_gc.process()
|
|
874
|
-
|
|
875
|
-
unittest.main()
|