pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/get/get.py +19 -19
- pypromice/postprocess/bufr_to_csv.py +6 -1
- pypromice/postprocess/bufr_utilities.py +91 -18
- pypromice/postprocess/create_bufr_files.py +178 -0
- pypromice/postprocess/get_bufr.py +248 -397
- pypromice/postprocess/make_metadata_csv.py +214 -0
- pypromice/postprocess/real_time_utilities.py +41 -11
- pypromice/process/L0toL1.py +12 -5
- pypromice/process/L1toL2.py +159 -30
- pypromice/process/L2toL3.py +1034 -187
- pypromice/process/aws.py +131 -752
- pypromice/process/get_l2.py +90 -0
- pypromice/process/get_l2tol3.py +111 -0
- pypromice/process/join_l2.py +112 -0
- pypromice/process/join_l3.py +551 -120
- pypromice/process/load.py +161 -0
- pypromice/process/resample.py +128 -0
- pypromice/process/utilities.py +68 -0
- pypromice/process/write.py +503 -0
- pypromice/qc/github_data_issues.py +10 -16
- pypromice/qc/percentiles/thresholds.csv +2 -2
- pypromice/qc/persistence.py +71 -25
- pypromice/resources/__init__.py +28 -0
- pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
- pypromice/resources/variable_aliases_GC-Net.csv +78 -0
- pypromice/resources/variables.csv +106 -0
- pypromice/station_configuration.py +118 -0
- pypromice/tx/get_l0tx.py +7 -4
- pypromice/tx/payload_formats.csv +1 -0
- pypromice/tx/tx.py +27 -6
- pypromice/utilities/__init__.py +0 -0
- pypromice/utilities/git.py +61 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
- pypromice-1.4.0.dist-info/RECORD +53 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
- pypromice-1.4.0.dist-info/entry_points.txt +13 -0
- pypromice/postprocess/station_configurations.toml +0 -762
- pypromice/process/get_l3.py +0 -46
- pypromice/process/variables.csv +0 -92
- pypromice/qc/persistence_test.py +0 -150
- pypromice/test/test_config1.toml +0 -69
- pypromice/test/test_config2.toml +0 -54
- pypromice/test/test_email +0 -75
- pypromice/test/test_payload_formats.csv +0 -4
- pypromice/test/test_payload_types.csv +0 -7
- pypromice/test/test_percentile.py +0 -229
- pypromice/test/test_raw1.txt +0 -4468
- pypromice/test/test_raw_DataTable2.txt +0 -11167
- pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
- pypromice/test/test_raw_transmitted1.txt +0 -15411
- pypromice/test/test_raw_transmitted2.txt +0 -28
- pypromice-1.3.5.dist-info/RECORD +0 -53
- pypromice-1.3.5.dist-info/entry_points.txt +0 -8
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
pypromice/process/join_l3.py
CHANGED
|
@@ -1,130 +1,561 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
|
-
import
|
|
2
|
+
import json
|
|
3
|
+
import logging, os, sys, toml
|
|
4
|
+
from argparse import ArgumentParser
|
|
5
|
+
|
|
6
|
+
from pypromice.utilities.git import get_commit_hash_and_check_dirty
|
|
7
|
+
|
|
8
|
+
import pypromice.resources
|
|
9
|
+
from pypromice.process.write import prepare_and_write
|
|
10
|
+
import numpy as np
|
|
3
11
|
import pandas as pd
|
|
4
12
|
import xarray as xr
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
parser.add_argument(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
13
|
+
|
|
14
|
+
logging.basicConfig(
|
|
15
|
+
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
|
|
16
|
+
level=logging.INFO,
|
|
17
|
+
stream=sys.stdout,
|
|
18
|
+
)
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_arguments_joinl3(debug_args=None):
|
|
23
|
+
parser = ArgumentParser(
|
|
24
|
+
description="AWS L3 script for the processing L3 data from L2 and merging the L3 data with its historical site. An hourly, daily and monthly L3 data product is outputted to the defined output path"
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"-c",
|
|
28
|
+
"--config_folder",
|
|
29
|
+
type=str,
|
|
30
|
+
required=True,
|
|
31
|
+
help="Path to folder with sites configuration (TOML) files",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"-s",
|
|
35
|
+
"--site",
|
|
36
|
+
default=None,
|
|
37
|
+
type=str,
|
|
38
|
+
required=False,
|
|
39
|
+
help="Name of site to process (default: all sites are processed)",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"-l3", "--folder_l3", type=str, required=True, help="Path to level 3 folder"
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"-gc",
|
|
47
|
+
"--folder_gcnet",
|
|
48
|
+
type=str,
|
|
49
|
+
required=False,
|
|
50
|
+
help="Path to GC-Net historical L1 folder",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"-o",
|
|
55
|
+
"--outpath",
|
|
56
|
+
default=os.getcwd(),
|
|
57
|
+
type=str,
|
|
58
|
+
required=True,
|
|
59
|
+
help="Path where to write output",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"-v",
|
|
64
|
+
"--variables",
|
|
65
|
+
default=None,
|
|
66
|
+
type=str,
|
|
67
|
+
required=False,
|
|
68
|
+
help="Path to variables look-up table .csv file for variable name retained" "",
|
|
69
|
+
),
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"-m",
|
|
72
|
+
"--metadata",
|
|
73
|
+
default=None,
|
|
74
|
+
type=str,
|
|
75
|
+
required=False,
|
|
76
|
+
help="Path to metadata table .csv file for metadata information" "",
|
|
77
|
+
),
|
|
78
|
+
|
|
79
|
+
args = parser.parse_args(args=debug_args)
|
|
25
80
|
return args
|
|
26
81
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
82
|
+
|
|
83
|
+
def readNead(infile):
|
|
84
|
+
with open(infile) as f:
|
|
85
|
+
fmt = f.readline()
|
|
86
|
+
assert fmt[0] == "#"
|
|
87
|
+
assert fmt.split("#")[1].split()[0] == "NEAD"
|
|
88
|
+
assert fmt.split("#")[1].split()[1] == "1.0"
|
|
89
|
+
assert fmt.split("#")[1].split()[2] == "UTF-8"
|
|
90
|
+
|
|
91
|
+
line = f.readline()
|
|
92
|
+
assert line[0] == "#"
|
|
93
|
+
assert line.split("#")[1].strip() == "[METADATA]"
|
|
94
|
+
|
|
95
|
+
meta = {}
|
|
96
|
+
fields = {}
|
|
97
|
+
section = "meta"
|
|
98
|
+
while True:
|
|
99
|
+
line = f.readline()
|
|
100
|
+
if line.strip(" ") == "#":
|
|
101
|
+
continue
|
|
102
|
+
if line == "# [DATA]\n":
|
|
103
|
+
break # done reading header
|
|
104
|
+
if line == "# [FIELDS]\n":
|
|
105
|
+
section = "fields"
|
|
106
|
+
continue # done reading header
|
|
107
|
+
|
|
108
|
+
if line[0] == "\n":
|
|
109
|
+
continue # blank line
|
|
110
|
+
assert line[0] == "#" # if not blank, must start with "#"
|
|
111
|
+
|
|
112
|
+
key_eq_val = line.split("#")[1].strip()
|
|
113
|
+
if key_eq_val == "" or key_eq_val == None:
|
|
114
|
+
continue # Line is just "#" or "# " or "# #"...
|
|
115
|
+
assert "=" in key_eq_val, print(line, key_eq_val)
|
|
116
|
+
key = key_eq_val.split("=")[0].strip()
|
|
117
|
+
val = key_eq_val.split("=")[1].strip()
|
|
118
|
+
|
|
119
|
+
# Convert from string to number if it is a number
|
|
120
|
+
if val.strip("-").strip("+").replace(".", "").isdigit():
|
|
121
|
+
val = float(val)
|
|
122
|
+
if val == int(val):
|
|
123
|
+
val = int(val)
|
|
124
|
+
|
|
125
|
+
if section == "meta":
|
|
126
|
+
meta[key] = val
|
|
127
|
+
if section == "fields":
|
|
128
|
+
fields[key] = val
|
|
129
|
+
# done reading header
|
|
130
|
+
|
|
131
|
+
# Find delimiter and fields for reading NEAD as simple CSV
|
|
132
|
+
assert "field_delimiter" in meta.keys()
|
|
133
|
+
assert "fields" in fields.keys()
|
|
134
|
+
FD = meta["field_delimiter"]
|
|
135
|
+
names = [_.strip() for _ in fields.pop("fields").split(FD)]
|
|
136
|
+
|
|
137
|
+
df = pd.read_csv(
|
|
138
|
+
infile,
|
|
139
|
+
comment="#",
|
|
140
|
+
names=names,
|
|
141
|
+
sep=FD,
|
|
142
|
+
usecols=np.arange(len(names)),
|
|
143
|
+
skip_blank_lines=True,
|
|
144
|
+
)
|
|
145
|
+
df["timestamp"] = pd.to_datetime(df.timestamp).dt.tz_localize(None)
|
|
146
|
+
df = df.set_index("timestamp")
|
|
147
|
+
ds = df.to_xarray()
|
|
148
|
+
ds.attrs = meta
|
|
149
|
+
|
|
150
|
+
# renaming variables
|
|
151
|
+
file_path = pypromice.resources.DEFAULT_VARIABLES_ALIASES_GCNET_PATH
|
|
152
|
+
var_name = pd.read_csv(file_path)
|
|
153
|
+
var_name = var_name.set_index("old_name").GEUS_name
|
|
154
|
+
msk = [v for v in var_name.index if v in ds.data_vars]
|
|
155
|
+
var_name = var_name.loc[msk].to_dict()
|
|
156
|
+
|
|
157
|
+
# combining thermocouple and CS100 temperatures
|
|
158
|
+
ds["TA1"] = ds["TA1"].combine_first(ds["TA3"])
|
|
159
|
+
ds["TA2"] = ds["TA2"].combine_first(ds["TA4"])
|
|
160
|
+
|
|
161
|
+
# renaming variables to the GEUS names
|
|
162
|
+
ds = ds.rename(var_name)
|
|
163
|
+
|
|
164
|
+
# variables always dropped from the historical GC-Net files
|
|
165
|
+
# could be move to the config files at some point
|
|
166
|
+
standard_vars_to_drop = [
|
|
167
|
+
"NR",
|
|
168
|
+
"TA3",
|
|
169
|
+
"TA4",
|
|
170
|
+
"TA5",
|
|
171
|
+
"NR_cor",
|
|
172
|
+
"TA2m",
|
|
173
|
+
"RH2m",
|
|
174
|
+
"VW10m",
|
|
175
|
+
"SZA",
|
|
176
|
+
"SAA",
|
|
177
|
+
]
|
|
178
|
+
standard_vars_to_drop = standard_vars_to_drop + [
|
|
179
|
+
v for v in list(ds.keys()) if v.endswith("_adj_flag")
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
# Drop the variables if they are present in the dataset
|
|
183
|
+
ds = ds.drop_vars([var for var in standard_vars_to_drop if var in ds])
|
|
184
|
+
|
|
185
|
+
ds = ds.rename({"timestamp": "time"})
|
|
186
|
+
|
|
187
|
+
# in the historical GC-Net processing, periods with missing z_surf_combined
|
|
188
|
+
# are filled with a constant value, these values should be removed to
|
|
189
|
+
# allow a better alignement with the z_surf_combined estimated for the GEUS stations
|
|
190
|
+
ds["z_surf_combined"] = ds["z_surf_combined"].where(
|
|
191
|
+
ds["z_surf_combined"].diff(dim="time") != 0
|
|
192
|
+
)
|
|
193
|
+
return ds
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def loadArr(infile, isNead):
|
|
197
|
+
if infile.split(".")[-1].lower() in "csv":
|
|
198
|
+
if isNead:
|
|
199
|
+
ds = readNead(infile)
|
|
200
|
+
else:
|
|
201
|
+
df = pd.read_csv(infile)
|
|
202
|
+
df["time"] = pd.to_datetime(df["time"]).dt.tz_localize(None)
|
|
203
|
+
df = df.set_index("time")
|
|
204
|
+
ds = xr.Dataset.from_dataframe(df)
|
|
205
|
+
|
|
206
|
+
elif infile.split(".")[-1].lower() in "nc":
|
|
207
|
+
with xr.open_dataset(infile) as ds:
|
|
208
|
+
ds.load()
|
|
209
|
+
# Remove encoding attributes from NetCDF
|
|
210
|
+
for varname in ds.variables:
|
|
211
|
+
if ds[varname].encoding != {}:
|
|
212
|
+
ds[varname].encoding = {}
|
|
213
|
+
|
|
35
214
|
try:
|
|
36
|
-
name = ds.attrs[
|
|
215
|
+
name = ds.attrs["station_name"]
|
|
37
216
|
except:
|
|
38
|
-
name = infile.split(
|
|
39
|
-
|
|
40
|
-
print(f
|
|
217
|
+
name = infile.split("/")[-1].split(".")[0].split("_hour")[0].split("_10min")[0]
|
|
218
|
+
|
|
219
|
+
print(f"{name} array loaded from {infile}")
|
|
41
220
|
return ds, name
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def align_surface_heights(data_series_new, data_series_old):
|
|
224
|
+
"""
|
|
225
|
+
Align two surface height time series based on the gap between their end and
|
|
226
|
+
start.
|
|
227
|
+
|
|
228
|
+
If the gap between the end of `data_series_old` and the start of `data_series_new`
|
|
229
|
+
is less than a week, the function aligns them based on the median value of
|
|
230
|
+
the last week of `data_series_old` and the first week of `data_series_new`.
|
|
231
|
+
If the gap is larger than a week, it aligns them using a linear fit. If
|
|
232
|
+
there is overlap, the function uses the overlapping period to adjust the
|
|
233
|
+
newer time series.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
data_series_old : pandas.Series
|
|
238
|
+
The older time series data.
|
|
239
|
+
data_series_new : pandas.Series
|
|
240
|
+
The newer time series data.
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
numpy.ndarray
|
|
245
|
+
Array containing the aligned newer time series data.
|
|
246
|
+
"""
|
|
247
|
+
# Get the first and last valid indices of both series
|
|
248
|
+
last_old_idx = data_series_old.last_valid_index()
|
|
249
|
+
first_new_idx = data_series_new.first_valid_index()
|
|
250
|
+
|
|
251
|
+
# Check for overlap
|
|
252
|
+
if first_new_idx <= last_old_idx:
|
|
253
|
+
# Find the overlapping period
|
|
254
|
+
overlap_start = first_new_idx
|
|
255
|
+
overlap_end = min(last_old_idx, overlap_start + pd.to_timedelta("7D"))
|
|
256
|
+
|
|
257
|
+
# Compute the median values for the overlapping period
|
|
258
|
+
overlap_old = data_series_old[overlap_start:overlap_end].median()
|
|
259
|
+
overlap_new = data_series_new[overlap_start:overlap_end].median()
|
|
260
|
+
|
|
261
|
+
if np.isnan(overlap_old) or np.isnan(overlap_new):
|
|
262
|
+
overlap_end = min(last_old_idx, data_series_new.last_valid_index())
|
|
263
|
+
|
|
264
|
+
# Compute the median values for the overlapping period
|
|
265
|
+
overlap_old = data_series_old[overlap_start:overlap_end].median()
|
|
266
|
+
overlap_new = data_series_new[overlap_start:overlap_end].median()
|
|
267
|
+
|
|
268
|
+
# Align based on the overlapping median values
|
|
269
|
+
data_series_new = data_series_new - overlap_new + overlap_old
|
|
270
|
+
|
|
271
|
+
elif (first_new_idx - last_old_idx).days <= 7:
|
|
272
|
+
# Compute the median of the last week of data in the old series
|
|
273
|
+
last_week_old = data_series_old[
|
|
274
|
+
last_old_idx - pd.Timedelta(weeks=1) : last_old_idx
|
|
275
|
+
].median()
|
|
276
|
+
|
|
277
|
+
# Compute the median of the first week of data in the new series
|
|
278
|
+
first_week_new = data_series_new[
|
|
279
|
+
first_new_idx : first_new_idx + pd.Timedelta(weeks=1)
|
|
280
|
+
].median()
|
|
281
|
+
|
|
282
|
+
# Align based on the median values
|
|
283
|
+
data_series_new = data_series_new - first_week_new + last_week_old
|
|
284
|
+
else:
|
|
285
|
+
# Perform a linear fit on the last 5x365x24 non-nan values
|
|
286
|
+
hours_in_5_years = 5 * 365 * 24
|
|
287
|
+
|
|
288
|
+
# Drop NaN values and extract the last `hours_in_5_years` non-NaN data points
|
|
289
|
+
data_series_old_nonan = data_series_old.dropna()
|
|
290
|
+
data_series_old_last_5_years = data_series_old_nonan.iloc[
|
|
291
|
+
-min(len(data_series_old), hours_in_5_years):
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
# Perform a linear fit on the last 5 years of data
|
|
295
|
+
fit = np.polyfit(
|
|
296
|
+
data_series_old_last_5_years.index.astype("int64"),
|
|
297
|
+
data_series_old_last_5_years.values,
|
|
298
|
+
1,
|
|
299
|
+
)
|
|
300
|
+
fit_fn = np.poly1d(fit)
|
|
301
|
+
|
|
302
|
+
data_series_new = (
|
|
303
|
+
data_series_new.values
|
|
304
|
+
+ fit_fn(data_series_new.index.astype("int64")[0])
|
|
305
|
+
- data_series_new[first_new_idx]
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
return data_series_new
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def build_station_list(config_folder: str, target_station_site: str) -> list:
|
|
312
|
+
"""
|
|
313
|
+
Get a list of unique station information dictionaries for a given station site.
|
|
314
|
+
|
|
315
|
+
Parameters
|
|
316
|
+
----------
|
|
317
|
+
config_folder : str
|
|
318
|
+
Path to the folder containing the station configuration TOML files.
|
|
319
|
+
target_station_site : str
|
|
320
|
+
The station site to filter the station information by.
|
|
321
|
+
|
|
322
|
+
Returns
|
|
323
|
+
-------
|
|
324
|
+
list
|
|
325
|
+
A list of dictionaries containing station information that have the specified station site.
|
|
326
|
+
"""
|
|
327
|
+
station_info_list = [] # Initialize an empty list to store station information
|
|
328
|
+
|
|
329
|
+
found_as_station = False
|
|
330
|
+
for filename in os.listdir(config_folder):
|
|
331
|
+
if filename.endswith(".toml"):
|
|
332
|
+
file_path = os.path.join(config_folder, filename)
|
|
333
|
+
|
|
334
|
+
with open(file_path, "r") as file:
|
|
335
|
+
data = toml.load(file) # Load the TOML file
|
|
336
|
+
station_site = data.get("station_site") # Get the station site
|
|
337
|
+
stid = data.get("stid") # Get the station ID
|
|
338
|
+
|
|
339
|
+
# Check if the station site matches the target and stid is unique
|
|
340
|
+
if stid == target_station_site:
|
|
341
|
+
found_as_station = True
|
|
342
|
+
if station_site == target_station_site and stid:
|
|
343
|
+
station_info = data.copy() # Copy all attributes from the TOML file
|
|
344
|
+
station_info_list.append(
|
|
345
|
+
station_info
|
|
346
|
+
) # Add the station info to the list
|
|
347
|
+
|
|
348
|
+
if len(station_info_list) == 0 and not found_as_station:
|
|
349
|
+
logger.error(
|
|
350
|
+
"\n***\nNo station_configuration file found for %s.\nProcessing it as a single-station PROMICE site.\n***"
|
|
351
|
+
% target_station_site
|
|
352
|
+
)
|
|
353
|
+
station_info = {
|
|
354
|
+
"stid": target_station_site,
|
|
355
|
+
"station_site": target_station_site,
|
|
356
|
+
"project": "PROMICE",
|
|
357
|
+
"location_type": "ice sheet",
|
|
358
|
+
}
|
|
359
|
+
station_info_list.append(station_info)
|
|
360
|
+
elif len(station_info_list) == 0:
|
|
361
|
+
logger.error(
|
|
362
|
+
'\n***\nThe name "%s" passed to join_l3 is a station name and not a site name (e.g. SCO_Lv3 instead of SCO_L). Please provide a site name that is named at least once in the "station_site" attribute of the station configuration files.\n***'
|
|
363
|
+
% target_station_site
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return station_info_list
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, metadata):
|
|
370
|
+
# Get the list of station information dictionaries associated with the given site
|
|
371
|
+
list_station_info = build_station_list(config_folder, site)
|
|
372
|
+
|
|
373
|
+
# Read the datasets and store them into a list along with their latest timestamp and station info
|
|
374
|
+
list_station_data = []
|
|
375
|
+
for station_info in list_station_info:
|
|
376
|
+
stid = station_info["stid"]
|
|
377
|
+
|
|
378
|
+
filepath = os.path.join(folder_l3, stid, stid + "_hour.nc")
|
|
379
|
+
isNead = False
|
|
380
|
+
if station_info["project"].lower() in ["historical gc-net"]:
|
|
381
|
+
filepath = os.path.join(folder_gcnet, stid + ".csv")
|
|
382
|
+
isNead = True
|
|
383
|
+
if not os.path.isfile(filepath):
|
|
384
|
+
logger.error(
|
|
385
|
+
"\n***\n"
|
|
386
|
+
+ stid
|
|
387
|
+
+ " was listed as station but could not be found in "
|
|
388
|
+
+ folder_l3
|
|
389
|
+
+ " nor "
|
|
390
|
+
+ folder_gcnet
|
|
391
|
+
+ "\n***"
|
|
392
|
+
)
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
l3, _ = loadArr(filepath, isNead)
|
|
396
|
+
|
|
397
|
+
# removing specific variable from a given file
|
|
398
|
+
specific_vars_to_drop = station_info.get("skipped_variables", [])
|
|
399
|
+
if len(specific_vars_to_drop) > 0:
|
|
400
|
+
logger.info("Skipping %s from %s" % (specific_vars_to_drop, stid))
|
|
401
|
+
l3 = l3.drop_vars([var for var in specific_vars_to_drop if var in l3])
|
|
402
|
+
|
|
403
|
+
list_station_data.append((l3, station_info))
|
|
404
|
+
|
|
405
|
+
# Sort the list in reverse chronological order so that we start with the latest data
|
|
406
|
+
sorted_list_station_data = sorted(
|
|
407
|
+
list_station_data, key=lambda x: x[0].time.min(), reverse=True
|
|
408
|
+
)
|
|
409
|
+
sorted_stids = [info["stid"] for _, info in sorted_list_station_data]
|
|
410
|
+
logger.info("joining %s" % " ".join(sorted_stids))
|
|
411
|
+
|
|
412
|
+
l3_merged = None
|
|
413
|
+
|
|
414
|
+
for l3, station_info in sorted_list_station_data:
|
|
415
|
+
stid = station_info["stid"]
|
|
416
|
+
|
|
417
|
+
if l3_merged is None:
|
|
418
|
+
# saving attributes of stid
|
|
419
|
+
st_attrs = {}
|
|
420
|
+
st_attrs[stid] = l3.attrs.copy()
|
|
421
|
+
# adding timestamps info
|
|
422
|
+
st_attrs[stid]["first_timestamp"] = (
|
|
423
|
+
l3.time.isel(time=0).dt.strftime(date_format="%Y-%m-%d %H:%M:%S").item()
|
|
424
|
+
)
|
|
425
|
+
st_attrs[stid]["last_timestamp"] = (
|
|
426
|
+
l3.time.isel(time=-1)
|
|
427
|
+
.dt.strftime(date_format="%Y-%m-%d %H:%M:%S")
|
|
428
|
+
.item()
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# then stripping attributes
|
|
432
|
+
attrs_list = list(l3.attrs.keys())
|
|
433
|
+
for k in attrs_list:
|
|
434
|
+
del l3.attrs[k]
|
|
435
|
+
|
|
436
|
+
# initializing l3_merged with l3
|
|
437
|
+
l3_merged = l3.copy()
|
|
438
|
+
|
|
439
|
+
# creating the station_attributes attribute in l3_merged
|
|
440
|
+
l3_merged.attrs["stations_attributes"] = st_attrs
|
|
441
|
+
|
|
442
|
+
else:
|
|
443
|
+
# if l3 (older data) is missing variables compared to l3_merged (newer data)
|
|
444
|
+
# , then we fill them with nan
|
|
445
|
+
for v in l3_merged.data_vars:
|
|
446
|
+
if v not in l3.data_vars:
|
|
447
|
+
l3[v] = l3.t_u * np.nan
|
|
448
|
+
for v in l3.data_vars:
|
|
449
|
+
if v not in l3_merged.data_vars:
|
|
450
|
+
l3_merged[v] = l3_merged.t_u * np.nan
|
|
451
|
+
|
|
452
|
+
# saving attributes of station under an attribute called $stid
|
|
453
|
+
st_attrs = l3_merged.attrs.get("stations_attributes", {})
|
|
454
|
+
st_attrs[stid] = l3.attrs.copy()
|
|
455
|
+
l3_merged.attrs["stations_attributes"] = st_attrs
|
|
456
|
+
|
|
457
|
+
# then stripping attributes
|
|
458
|
+
attrs_list = list(l3.attrs.keys())
|
|
459
|
+
for k in attrs_list:
|
|
460
|
+
del l3.attrs[k]
|
|
461
|
+
|
|
462
|
+
l3_merged.attrs["stations_attributes"][stid]["first_timestamp"] = (
|
|
463
|
+
l3.time.isel(time=0).dt.strftime(date_format="%Y-%m-%d %H:%M:%S").item()
|
|
464
|
+
)
|
|
465
|
+
l3_merged.attrs["stations_attributes"][stid]["last_timestamp"] = (
|
|
466
|
+
l3_merged.time.isel(time=0)
|
|
467
|
+
.dt.strftime(date_format="%Y-%m-%d %H:%M:%S")
|
|
468
|
+
.item()
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
# adjusting surface height in the most recent data (l3_merged)
|
|
472
|
+
# so that it shows continuity with the older data (l3)
|
|
473
|
+
if "z_surf_combined" in l3_merged.keys() and "z_surf_combined" in l3.keys():
|
|
474
|
+
if (
|
|
475
|
+
l3_merged.z_surf_combined.notnull().any()
|
|
476
|
+
and l3.z_surf_combined.notnull().any()
|
|
477
|
+
):
|
|
478
|
+
l3_merged["z_surf_combined"] = (
|
|
479
|
+
"time",
|
|
480
|
+
align_surface_heights(
|
|
481
|
+
l3_merged.z_surf_combined.to_series(),
|
|
482
|
+
l3.z_surf_combined.to_series(),
|
|
483
|
+
),
|
|
484
|
+
)
|
|
485
|
+
if "z_ice_surf" in l3_merged.keys() and "z_ice_surf" in l3.keys():
|
|
486
|
+
if (
|
|
487
|
+
l3_merged.z_ice_surf.notnull().any()
|
|
488
|
+
and l3.z_ice_surf.notnull().any()
|
|
489
|
+
):
|
|
490
|
+
l3_merged["z_ice_surf"] = (
|
|
491
|
+
"time",
|
|
492
|
+
align_surface_heights(
|
|
493
|
+
l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
|
|
494
|
+
),
|
|
495
|
+
)
|
|
56
496
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
497
|
+
# saves attributes
|
|
498
|
+
attrs = l3_merged.attrs
|
|
499
|
+
# merging by time block
|
|
500
|
+
l3_merged = xr.concat(
|
|
501
|
+
(
|
|
502
|
+
l3.sel(
|
|
503
|
+
time=slice(l3.time.isel(time=0), l3_merged.time.isel(time=0))
|
|
504
|
+
),
|
|
505
|
+
l3_merged,
|
|
506
|
+
),
|
|
507
|
+
dim="time",
|
|
508
|
+
)
|
|
61
509
|
|
|
62
|
-
#
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
# Assign metadata
|
|
116
|
-
print(f'Adding metadata from {args.metadata}...')
|
|
117
|
-
m = getMeta(args.metadata)
|
|
118
|
-
l3_h = addMeta(l3_h, m)
|
|
119
|
-
l3_d = addMeta(l3_d, m)
|
|
120
|
-
l3_m = addMeta(l3_m, m)
|
|
121
|
-
|
|
122
|
-
# Set up output path
|
|
123
|
-
out = os.path.join(args.outpath, name)
|
|
124
|
-
|
|
125
|
-
# Write to files
|
|
126
|
-
writeAll(out, name, l3_h, l3_d, l3_m, col_names)
|
|
127
|
-
print(f'Files saved to {os.path.join(out, name)}...')
|
|
128
|
-
|
|
129
|
-
if __name__ == "__main__":
|
|
130
|
-
join_l3()
|
|
510
|
+
# restauring attributes
|
|
511
|
+
l3_merged.attrs = attrs
|
|
512
|
+
|
|
513
|
+
# Assign site id
|
|
514
|
+
if not l3_merged:
|
|
515
|
+
logger.error("No level 3 station data file found for " + site)
|
|
516
|
+
return None, sorted_list_station_data
|
|
517
|
+
l3_merged.attrs["site_id"] = site
|
|
518
|
+
l3_merged.attrs["stations"] = " ".join(sorted_stids)
|
|
519
|
+
l3_merged.attrs["level"] = "L3"
|
|
520
|
+
l3_merged.attrs["project"] = sorted_list_station_data[0][1]["project"]
|
|
521
|
+
l3_merged.attrs["location_type"] = sorted_list_station_data[0][1]["location_type"]
|
|
522
|
+
|
|
523
|
+
site_source = dict(
|
|
524
|
+
site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
|
|
525
|
+
gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
for stid, station_attributes in l3_merged.attrs["stations_attributes"].items():
|
|
529
|
+
if "source" in station_attributes.keys():
|
|
530
|
+
station_source = json.loads(station_attributes["source"])
|
|
531
|
+
for k, v in station_source.items():
|
|
532
|
+
if k in site_source and site_source[k] != v:
|
|
533
|
+
site_source[k] = "multiple"
|
|
534
|
+
else:
|
|
535
|
+
site_source[k] = v
|
|
536
|
+
l3_merged.attrs["source"] = json.dumps(site_source)
|
|
537
|
+
|
|
538
|
+
v = pypromice.resources.load_variables(variables)
|
|
539
|
+
m = pypromice.resources.load_metadata(metadata)
|
|
540
|
+
if outpath is not None:
|
|
541
|
+
prepare_and_write(l3_merged, outpath, v, m, "60min")
|
|
542
|
+
prepare_and_write(l3_merged, outpath, v, m, "1D")
|
|
543
|
+
prepare_and_write(l3_merged, outpath, v, m, "M")
|
|
544
|
+
return l3_merged, sorted_list_station_data
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def main():
|
|
548
|
+
args = parse_arguments_joinl3()
|
|
549
|
+
_, _ = join_l3(
|
|
550
|
+
args.config_folder,
|
|
551
|
+
args.site,
|
|
552
|
+
args.folder_l3,
|
|
553
|
+
args.folder_gcnet,
|
|
554
|
+
args.outpath,
|
|
555
|
+
args.variables,
|
|
556
|
+
args.metadata,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
if __name__ == "__main__":
|
|
561
|
+
main()
|