pypromice 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +6 -1
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1033 -186
  10. pypromice/process/aws.py +130 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +128 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +61 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/METADATA +3 -3
  32. pypromice-1.4.0.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.0.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,130 +1,561 @@
1
1
  #!/usr/bin/env python
2
- import os, unittest
2
+ import json
3
+ import logging, os, sys, toml
4
+ from argparse import ArgumentParser
5
+
6
+ from pypromice.utilities.git import get_commit_hash_and_check_dirty
7
+
8
+ import pypromice.resources
9
+ from pypromice.process.write import prepare_and_write
10
+ import numpy as np
3
11
  import pandas as pd
4
12
  import xarray as xr
5
- from argparse import ArgumentParser
6
- from pypromice.process import getVars, getMeta, addMeta, getColNames, \
7
- roundValues, resampleL3, writeAll
8
- from pypromice.process.L1toL2 import correctPrecip
9
-
10
- def parse_arguments_join():
11
- parser = ArgumentParser(description="AWS L3 joiner for merging together two L3 products, for example an L3 RAW and L3 TX data product. An hourly, daily and monthly L3 data product is outputted to the defined output path")
12
- parser.add_argument('-s', '--file1', type=str, required=True,
13
- help='Path to source L3 file, which will be preferenced in merge process')
14
- parser.add_argument('-t', '--file2', type=str, required=True,
15
- help='Path to target L3 file, which will be used to fill gaps in merge process')
16
- parser.add_argument('-o', '--outpath', default=os.getcwd(), type=str, required=True,
17
- help='Path where to write output')
18
- parser.add_argument('-v', '--variables', default=None, type=str, required=False,
19
- help='Path to variables look-up table .csv file for variable name retained'''),
20
- parser.add_argument('-m', '--metadata', default=None, type=str, required=False,
21
- help='Path to metadata table .csv file for metadata information'''),
22
- parser.add_argument('-d', '--datatype', default='raw', type=str, required=False,
23
- help='Data type to output, raw or tx')
24
- args = parser.parse_args()
13
+
14
+ logging.basicConfig(
15
+ format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
16
+ level=logging.INFO,
17
+ stream=sys.stdout,
18
+ )
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def parse_arguments_joinl3(debug_args=None):
23
+ parser = ArgumentParser(
24
+ description="AWS L3 script for the processing L3 data from L2 and merging the L3 data with its historical site. An hourly, daily and monthly L3 data product is outputted to the defined output path"
25
+ )
26
+ parser.add_argument(
27
+ "-c",
28
+ "--config_folder",
29
+ type=str,
30
+ required=True,
31
+ help="Path to folder with sites configuration (TOML) files",
32
+ )
33
+ parser.add_argument(
34
+ "-s",
35
+ "--site",
36
+ default=None,
37
+ type=str,
38
+ required=False,
39
+ help="Name of site to process (default: all sites are processed)",
40
+ )
41
+
42
+ parser.add_argument(
43
+ "-l3", "--folder_l3", type=str, required=True, help="Path to level 3 folder"
44
+ )
45
+ parser.add_argument(
46
+ "-gc",
47
+ "--folder_gcnet",
48
+ type=str,
49
+ required=False,
50
+ help="Path to GC-Net historical L1 folder",
51
+ )
52
+
53
+ parser.add_argument(
54
+ "-o",
55
+ "--outpath",
56
+ default=os.getcwd(),
57
+ type=str,
58
+ required=True,
59
+ help="Path where to write output",
60
+ )
61
+
62
+ parser.add_argument(
63
+ "-v",
64
+ "--variables",
65
+ default=None,
66
+ type=str,
67
+ required=False,
68
+ help="Path to variables look-up table .csv file for variable name retained" "",
69
+ ),
70
+ parser.add_argument(
71
+ "-m",
72
+ "--metadata",
73
+ default=None,
74
+ type=str,
75
+ required=False,
76
+ help="Path to metadata table .csv file for metadata information" "",
77
+ ),
78
+
79
+ args = parser.parse_args(args=debug_args)
25
80
  return args
26
81
 
27
- def loadArr(infile):
28
- if infile.split('.')[-1].lower() in 'csv':
29
- df = pd.read_csv(infile, index_col=0, parse_dates=True)
30
- ds = xr.Dataset.from_dataframe(df)
31
-
32
- elif infile.split('.')[-1].lower() in 'nc':
33
- ds = xr.open_dataset(infile)
34
-
82
+
83
+ def readNead(infile):
84
+ with open(infile) as f:
85
+ fmt = f.readline()
86
+ assert fmt[0] == "#"
87
+ assert fmt.split("#")[1].split()[0] == "NEAD"
88
+ assert fmt.split("#")[1].split()[1] == "1.0"
89
+ assert fmt.split("#")[1].split()[2] == "UTF-8"
90
+
91
+ line = f.readline()
92
+ assert line[0] == "#"
93
+ assert line.split("#")[1].strip() == "[METADATA]"
94
+
95
+ meta = {}
96
+ fields = {}
97
+ section = "meta"
98
+ while True:
99
+ line = f.readline()
100
+ if line.strip(" ") == "#":
101
+ continue
102
+ if line == "# [DATA]\n":
103
+ break # done reading header
104
+ if line == "# [FIELDS]\n":
105
+ section = "fields"
106
+ continue # done reading header
107
+
108
+ if line[0] == "\n":
109
+ continue # blank line
110
+ assert line[0] == "#" # if not blank, must start with "#"
111
+
112
+ key_eq_val = line.split("#")[1].strip()
113
+ if key_eq_val == "" or key_eq_val == None:
114
+ continue # Line is just "#" or "# " or "# #"...
115
+ assert "=" in key_eq_val, print(line, key_eq_val)
116
+ key = key_eq_val.split("=")[0].strip()
117
+ val = key_eq_val.split("=")[1].strip()
118
+
119
+ # Convert from string to number if it is a number
120
+ if val.strip("-").strip("+").replace(".", "").isdigit():
121
+ val = float(val)
122
+ if val == int(val):
123
+ val = int(val)
124
+
125
+ if section == "meta":
126
+ meta[key] = val
127
+ if section == "fields":
128
+ fields[key] = val
129
+ # done reading header
130
+
131
+ # Find delimiter and fields for reading NEAD as simple CSV
132
+ assert "field_delimiter" in meta.keys()
133
+ assert "fields" in fields.keys()
134
+ FD = meta["field_delimiter"]
135
+ names = [_.strip() for _ in fields.pop("fields").split(FD)]
136
+
137
+ df = pd.read_csv(
138
+ infile,
139
+ comment="#",
140
+ names=names,
141
+ sep=FD,
142
+ usecols=np.arange(len(names)),
143
+ skip_blank_lines=True,
144
+ )
145
+ df["timestamp"] = pd.to_datetime(df.timestamp).dt.tz_localize(None)
146
+ df = df.set_index("timestamp")
147
+ ds = df.to_xarray()
148
+ ds.attrs = meta
149
+
150
+ # renaming variables
151
+ file_path = pypromice.resources.DEFAULT_VARIABLES_ALIASES_GCNET_PATH
152
+ var_name = pd.read_csv(file_path)
153
+ var_name = var_name.set_index("old_name").GEUS_name
154
+ msk = [v for v in var_name.index if v in ds.data_vars]
155
+ var_name = var_name.loc[msk].to_dict()
156
+
157
+ # combining thermocouple and CS100 temperatures
158
+ ds["TA1"] = ds["TA1"].combine_first(ds["TA3"])
159
+ ds["TA2"] = ds["TA2"].combine_first(ds["TA4"])
160
+
161
+ # renaming variables to the GEUS names
162
+ ds = ds.rename(var_name)
163
+
164
+ # variables always dropped from the historical GC-Net files
165
+ # could be move to the config files at some point
166
+ standard_vars_to_drop = [
167
+ "NR",
168
+ "TA3",
169
+ "TA4",
170
+ "TA5",
171
+ "NR_cor",
172
+ "TA2m",
173
+ "RH2m",
174
+ "VW10m",
175
+ "SZA",
176
+ "SAA",
177
+ ]
178
+ standard_vars_to_drop = standard_vars_to_drop + [
179
+ v for v in list(ds.keys()) if v.endswith("_adj_flag")
180
+ ]
181
+
182
+ # Drop the variables if they are present in the dataset
183
+ ds = ds.drop_vars([var for var in standard_vars_to_drop if var in ds])
184
+
185
+ ds = ds.rename({"timestamp": "time"})
186
+
187
+ # in the historical GC-Net processing, periods with missing z_surf_combined
188
+ # are filled with a constant value, these values should be removed to
189
+ # allow a better alignement with the z_surf_combined estimated for the GEUS stations
190
+ ds["z_surf_combined"] = ds["z_surf_combined"].where(
191
+ ds["z_surf_combined"].diff(dim="time") != 0
192
+ )
193
+ return ds
194
+
195
+
196
+ def loadArr(infile, isNead):
197
+ if infile.split(".")[-1].lower() in "csv":
198
+ if isNead:
199
+ ds = readNead(infile)
200
+ else:
201
+ df = pd.read_csv(infile)
202
+ df["time"] = pd.to_datetime(df["time"]).dt.tz_localize(None)
203
+ df = df.set_index("time")
204
+ ds = xr.Dataset.from_dataframe(df)
205
+
206
+ elif infile.split(".")[-1].lower() in "nc":
207
+ with xr.open_dataset(infile) as ds:
208
+ ds.load()
209
+ # Remove encoding attributes from NetCDF
210
+ for varname in ds.variables:
211
+ if ds[varname].encoding != {}:
212
+ ds[varname].encoding = {}
213
+
35
214
  try:
36
- name = ds.attrs['station_name']
215
+ name = ds.attrs["station_name"]
37
216
  except:
38
- name = infile.split('/')[-1].split('.')[0].split('_hour')[0].split('_10min')[0]
39
-
40
- print(f'{name} array loaded from {infile}')
217
+ name = infile.split("/")[-1].split(".")[0].split("_hour")[0].split("_10min")[0]
218
+
219
+ print(f"{name} array loaded from {infile}")
41
220
  return ds, name
42
-
43
-
44
- def join_l3():
45
- args = parse_arguments_join()
46
-
47
- # Check files
48
- if os.path.isfile(args.file1) and os.path.isfile(args.file2):
49
-
50
- # Load data arrays
51
- ds1, n1 = loadArr(args.file1)
52
- ds2, n2 = loadArr(args.file2)
53
-
54
- # Check stations match
55
- if n1.lower() == n2.lower():
221
+
222
+
223
+ def align_surface_heights(data_series_new, data_series_old):
224
+ """
225
+ Align two surface height time series based on the gap between their end and
226
+ start.
227
+
228
+ If the gap between the end of `data_series_old` and the start of `data_series_new`
229
+ is less than a week, the function aligns them based on the median value of
230
+ the last week of `data_series_old` and the first week of `data_series_new`.
231
+ If the gap is larger than a week, it aligns them using a linear fit. If
232
+ there is overlap, the function uses the overlapping period to adjust the
233
+ newer time series.
234
+
235
+ Parameters
236
+ ----------
237
+ data_series_old : pandas.Series
238
+ The older time series data.
239
+ data_series_new : pandas.Series
240
+ The newer time series data.
241
+
242
+ Returns
243
+ -------
244
+ numpy.ndarray
245
+ Array containing the aligned newer time series data.
246
+ """
247
+ # Get the first and last valid indices of both series
248
+ last_old_idx = data_series_old.last_valid_index()
249
+ first_new_idx = data_series_new.first_valid_index()
250
+
251
+ # Check for overlap
252
+ if first_new_idx <= last_old_idx:
253
+ # Find the overlapping period
254
+ overlap_start = first_new_idx
255
+ overlap_end = min(last_old_idx, overlap_start + pd.to_timedelta("7D"))
256
+
257
+ # Compute the median values for the overlapping period
258
+ overlap_old = data_series_old[overlap_start:overlap_end].median()
259
+ overlap_new = data_series_new[overlap_start:overlap_end].median()
260
+
261
+ if np.isnan(overlap_old) or np.isnan(overlap_new):
262
+ overlap_end = min(last_old_idx, data_series_new.last_valid_index())
263
+
264
+ # Compute the median values for the overlapping period
265
+ overlap_old = data_series_old[overlap_start:overlap_end].median()
266
+ overlap_new = data_series_new[overlap_start:overlap_end].median()
267
+
268
+ # Align based on the overlapping median values
269
+ data_series_new = data_series_new - overlap_new + overlap_old
270
+
271
+ elif (first_new_idx - last_old_idx).days <= 7:
272
+ # Compute the median of the last week of data in the old series
273
+ last_week_old = data_series_old[
274
+ last_old_idx - pd.Timedelta(weeks=1) : last_old_idx
275
+ ].median()
276
+
277
+ # Compute the median of the first week of data in the new series
278
+ first_week_new = data_series_new[
279
+ first_new_idx : first_new_idx + pd.Timedelta(weeks=1)
280
+ ].median()
281
+
282
+ # Align based on the median values
283
+ data_series_new = data_series_new - first_week_new + last_week_old
284
+ else:
285
+ # Perform a linear fit on the last 5x365x24 non-nan values
286
+ hours_in_5_years = 5 * 365 * 24
287
+
288
+ # Drop NaN values and extract the last `hours_in_5_years` non-NaN data points
289
+ data_series_old_nonan = data_series_old.dropna()
290
+ data_series_old_last_5_years = data_series_old_nonan.iloc[
291
+ -min(len(data_series_old), hours_in_5_years):
292
+ ]
293
+
294
+ # Perform a linear fit on the last 5 years of data
295
+ fit = np.polyfit(
296
+ data_series_old_last_5_years.index.astype("int64"),
297
+ data_series_old_last_5_years.values,
298
+ 1,
299
+ )
300
+ fit_fn = np.poly1d(fit)
301
+
302
+ data_series_new = (
303
+ data_series_new.values
304
+ + fit_fn(data_series_new.index.astype("int64")[0])
305
+ - data_series_new[first_new_idx]
306
+ )
307
+
308
+ return data_series_new
309
+
310
+
311
+ def build_station_list(config_folder: str, target_station_site: str) -> list:
312
+ """
313
+ Get a list of unique station information dictionaries for a given station site.
314
+
315
+ Parameters
316
+ ----------
317
+ config_folder : str
318
+ Path to the folder containing the station configuration TOML files.
319
+ target_station_site : str
320
+ The station site to filter the station information by.
321
+
322
+ Returns
323
+ -------
324
+ list
325
+ A list of dictionaries containing station information that have the specified station site.
326
+ """
327
+ station_info_list = [] # Initialize an empty list to store station information
328
+
329
+ found_as_station = False
330
+ for filename in os.listdir(config_folder):
331
+ if filename.endswith(".toml"):
332
+ file_path = os.path.join(config_folder, filename)
333
+
334
+ with open(file_path, "r") as file:
335
+ data = toml.load(file) # Load the TOML file
336
+ station_site = data.get("station_site") # Get the station site
337
+ stid = data.get("stid") # Get the station ID
338
+
339
+ # Check if the station site matches the target and stid is unique
340
+ if stid == target_station_site:
341
+ found_as_station = True
342
+ if station_site == target_station_site and stid:
343
+ station_info = data.copy() # Copy all attributes from the TOML file
344
+ station_info_list.append(
345
+ station_info
346
+ ) # Add the station info to the list
347
+
348
+ if len(station_info_list) == 0 and not found_as_station:
349
+ logger.error(
350
+ "\n***\nNo station_configuration file found for %s.\nProcessing it as a single-station PROMICE site.\n***"
351
+ % target_station_site
352
+ )
353
+ station_info = {
354
+ "stid": target_station_site,
355
+ "station_site": target_station_site,
356
+ "project": "PROMICE",
357
+ "location_type": "ice sheet",
358
+ }
359
+ station_info_list.append(station_info)
360
+ elif len(station_info_list) == 0:
361
+ logger.error(
362
+ '\n***\nThe name "%s" passed to join_l3 is a station name and not a site name (e.g. SCO_Lv3 instead of SCO_L). Please provide a site name that is named at least once in the "station_site" attribute of the station configuration files.\n***'
363
+ % target_station_site
364
+ )
365
+
366
+ return station_info_list
367
+
368
+
369
+ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, metadata):
370
+ # Get the list of station information dictionaries associated with the given site
371
+ list_station_info = build_station_list(config_folder, site)
372
+
373
+ # Read the datasets and store them into a list along with their latest timestamp and station info
374
+ list_station_data = []
375
+ for station_info in list_station_info:
376
+ stid = station_info["stid"]
377
+
378
+ filepath = os.path.join(folder_l3, stid, stid + "_hour.nc")
379
+ isNead = False
380
+ if station_info["project"].lower() in ["historical gc-net"]:
381
+ filepath = os.path.join(folder_gcnet, stid + ".csv")
382
+ isNead = True
383
+ if not os.path.isfile(filepath):
384
+ logger.error(
385
+ "\n***\n"
386
+ + stid
387
+ + " was listed as station but could not be found in "
388
+ + folder_l3
389
+ + " nor "
390
+ + folder_gcnet
391
+ + "\n***"
392
+ )
393
+ continue
394
+
395
+ l3, _ = loadArr(filepath, isNead)
396
+
397
+ # removing specific variable from a given file
398
+ specific_vars_to_drop = station_info.get("skipped_variables", [])
399
+ if len(specific_vars_to_drop) > 0:
400
+ logger.info("Skipping %s from %s" % (specific_vars_to_drop, stid))
401
+ l3 = l3.drop_vars([var for var in specific_vars_to_drop if var in l3])
402
+
403
+ list_station_data.append((l3, station_info))
404
+
405
+ # Sort the list in reverse chronological order so that we start with the latest data
406
+ sorted_list_station_data = sorted(
407
+ list_station_data, key=lambda x: x[0].time.min(), reverse=True
408
+ )
409
+ sorted_stids = [info["stid"] for _, info in sorted_list_station_data]
410
+ logger.info("joining %s" % " ".join(sorted_stids))
411
+
412
+ l3_merged = None
413
+
414
+ for l3, station_info in sorted_list_station_data:
415
+ stid = station_info["stid"]
416
+
417
+ if l3_merged is None:
418
+ # saving attributes of stid
419
+ st_attrs = {}
420
+ st_attrs[stid] = l3.attrs.copy()
421
+ # adding timestamps info
422
+ st_attrs[stid]["first_timestamp"] = (
423
+ l3.time.isel(time=0).dt.strftime(date_format="%Y-%m-%d %H:%M:%S").item()
424
+ )
425
+ st_attrs[stid]["last_timestamp"] = (
426
+ l3.time.isel(time=-1)
427
+ .dt.strftime(date_format="%Y-%m-%d %H:%M:%S")
428
+ .item()
429
+ )
430
+
431
+ # then stripping attributes
432
+ attrs_list = list(l3.attrs.keys())
433
+ for k in attrs_list:
434
+ del l3.attrs[k]
435
+
436
+ # initializing l3_merged with l3
437
+ l3_merged = l3.copy()
438
+
439
+ # creating the station_attributes attribute in l3_merged
440
+ l3_merged.attrs["stations_attributes"] = st_attrs
441
+
442
+ else:
443
+ # if l3 (older data) is missing variables compared to l3_merged (newer data)
444
+ # , then we fill them with nan
445
+ for v in l3_merged.data_vars:
446
+ if v not in l3.data_vars:
447
+ l3[v] = l3.t_u * np.nan
448
+ for v in l3.data_vars:
449
+ if v not in l3_merged.data_vars:
450
+ l3_merged[v] = l3_merged.t_u * np.nan
451
+
452
+ # saving attributes of station under an attribute called $stid
453
+ st_attrs = l3_merged.attrs.get("stations_attributes", {})
454
+ st_attrs[stid] = l3.attrs.copy()
455
+ l3_merged.attrs["stations_attributes"] = st_attrs
456
+
457
+ # then stripping attributes
458
+ attrs_list = list(l3.attrs.keys())
459
+ for k in attrs_list:
460
+ del l3.attrs[k]
461
+
462
+ l3_merged.attrs["stations_attributes"][stid]["first_timestamp"] = (
463
+ l3.time.isel(time=0).dt.strftime(date_format="%Y-%m-%d %H:%M:%S").item()
464
+ )
465
+ l3_merged.attrs["stations_attributes"][stid]["last_timestamp"] = (
466
+ l3_merged.time.isel(time=0)
467
+ .dt.strftime(date_format="%Y-%m-%d %H:%M:%S")
468
+ .item()
469
+ )
470
+
471
+ # adjusting surface height in the most recent data (l3_merged)
472
+ # so that it shows continuity with the older data (l3)
473
+ if "z_surf_combined" in l3_merged.keys() and "z_surf_combined" in l3.keys():
474
+ if (
475
+ l3_merged.z_surf_combined.notnull().any()
476
+ and l3.z_surf_combined.notnull().any()
477
+ ):
478
+ l3_merged["z_surf_combined"] = (
479
+ "time",
480
+ align_surface_heights(
481
+ l3_merged.z_surf_combined.to_series(),
482
+ l3.z_surf_combined.to_series(),
483
+ ),
484
+ )
485
+ if "z_ice_surf" in l3_merged.keys() and "z_ice_surf" in l3.keys():
486
+ if (
487
+ l3_merged.z_ice_surf.notnull().any()
488
+ and l3.z_ice_surf.notnull().any()
489
+ ):
490
+ l3_merged["z_ice_surf"] = (
491
+ "time",
492
+ align_surface_heights(
493
+ l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
494
+ ),
495
+ )
56
496
 
57
- # Merge arrays
58
- print(f'Combining {args.file1} with {args.file2}...')
59
- name = n1
60
- all_ds = ds1.combine_first(ds2)
497
+ # saves attributes
498
+ attrs = l3_merged.attrs
499
+ # merging by time block
500
+ l3_merged = xr.concat(
501
+ (
502
+ l3.sel(
503
+ time=slice(l3.time.isel(time=0), l3_merged.time.isel(time=0))
504
+ ),
505
+ l3_merged,
506
+ ),
507
+ dim="time",
508
+ )
61
509
 
62
- # Re-calculate corrected precipitation
63
- if hasattr(all_ds, 'precip_u_cor'):
64
- if ~all_ds['precip_u_cor'].isnull().all():
65
- all_ds['precip_u_cor'], _ = correctPrecip(all_ds['precip_u'],
66
- all_ds['wspd_u'])
67
- if hasattr(all_ds, 'precip_l_cor'):
68
- if ~all_ds['precip_l_cor'].isnull().all():
69
- all_ds['precip_l_cor'], _ = correctPrecip(all_ds['precip_l'],
70
- all_ds['wspd_l'])
71
- else:
72
- print(f'Mismatched station names {n1}, {n2}')
73
- exit()
74
-
75
- elif os.path.isfile(args.file1):
76
- ds1, name = loadArr(args.file1)
77
- print(f'Only one file found {args.file1}...')
78
- all_ds = ds1
79
-
80
- elif os.path.isfile(args.file2):
81
- ds2, name = loadArr(args.file2)
82
- print(f'Only one file found {args.file2}...')
83
- all_ds = ds2
84
-
85
- else:
86
- print(f'Invalid files {args.file1}, {args.file2}')
87
- exit()
88
-
89
- # Get hourly, daily and monthly datasets
90
- print('Resampling L3 data to hourly, daily and monthly resolutions...')
91
- l3_h = resampleL3(all_ds, '60min')
92
- l3_d = resampleL3(all_ds, '1D')
93
- l3_m = resampleL3(all_ds, 'M')
94
-
95
- print(f'Adding variable information from {args.variables}...')
96
-
97
- # Load variables look-up table
98
- var = getVars(args.variables)
99
-
100
- # Round all values to specified decimals places
101
- l3_h = roundValues(l3_h, var)
102
- l3_d = roundValues(l3_d, var)
103
- l3_m = roundValues(l3_m, var)
104
-
105
- # Get columns to keep
106
- if hasattr(all_ds, 'p_l'):
107
- col_names = getColNames(var, 2, args.datatype.lower())
108
- else:
109
- col_names = getColNames(var, 1, args.datatype.lower())
110
-
111
- # Assign station id
112
- for l in [l3_h, l3_d, l3_m]:
113
- l.attrs['station_id'] = name
114
-
115
- # Assign metadata
116
- print(f'Adding metadata from {args.metadata}...')
117
- m = getMeta(args.metadata)
118
- l3_h = addMeta(l3_h, m)
119
- l3_d = addMeta(l3_d, m)
120
- l3_m = addMeta(l3_m, m)
121
-
122
- # Set up output path
123
- out = os.path.join(args.outpath, name)
124
-
125
- # Write to files
126
- writeAll(out, name, l3_h, l3_d, l3_m, col_names)
127
- print(f'Files saved to {os.path.join(out, name)}...')
128
-
129
- if __name__ == "__main__":
130
- join_l3()
510
+ # restauring attributes
511
+ l3_merged.attrs = attrs
512
+
513
+ # Assign site id
514
+ if not l3_merged:
515
+ logger.error("No level 3 station data file found for " + site)
516
+ return None, sorted_list_station_data
517
+ l3_merged.attrs["site_id"] = site
518
+ l3_merged.attrs["stations"] = " ".join(sorted_stids)
519
+ l3_merged.attrs["level"] = "L3"
520
+ l3_merged.attrs["project"] = sorted_list_station_data[0][1]["project"]
521
+ l3_merged.attrs["location_type"] = sorted_list_station_data[0][1]["location_type"]
522
+
523
+ site_source = dict(
524
+ site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
525
+ gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
526
+ )
527
+
528
+ for stid, station_attributes in l3_merged.attrs["stations_attributes"].items():
529
+ if "source" in station_attributes.keys():
530
+ station_source = json.loads(station_attributes["source"])
531
+ for k, v in station_source.items():
532
+ if k in site_source and site_source[k] != v:
533
+ site_source[k] = "multiple"
534
+ else:
535
+ site_source[k] = v
536
+ l3_merged.attrs["source"] = json.dumps(site_source)
537
+
538
+ v = pypromice.resources.load_variables(variables)
539
+ m = pypromice.resources.load_metadata(metadata)
540
+ if outpath is not None:
541
+ prepare_and_write(l3_merged, outpath, v, m, "60min")
542
+ prepare_and_write(l3_merged, outpath, v, m, "1D")
543
+ prepare_and_write(l3_merged, outpath, v, m, "M")
544
+ return l3_merged, sorted_list_station_data
545
+
546
+
547
+ def main():
548
+ args = parse_arguments_joinl3()
549
+ _, _ = join_l3(
550
+ args.config_folder,
551
+ args.site,
552
+ args.folder_l3,
553
+ args.folder_gcnet,
554
+ args.outpath,
555
+ args.variables,
556
+ args.metadata,
557
+ )
558
+
559
+
560
+ if __name__ == "__main__":
561
+ main()