pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +15 -3
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1034 -186
  10. pypromice/process/aws.py +139 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +147 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +62 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
  32. pypromice-1.4.1.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.1.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Load module
5
+ """
6
+ from datetime import timedelta
7
+ from typing import Sequence, Optional
8
+
9
+ import logging
10
+ import os
11
+ import pandas as pd
12
+ import toml
13
+ import xarray as xr
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def getConfig(
19
+ config_file, inpath, default_columns: Sequence[str] = ("msg_lat", "msg_lon")
20
+ ):
21
+ """Load configuration from .toml file. PROMICE .toml files support defining
22
+ features at the top level which apply to all nested properties, but do not
23
+ overwrite nested properties if they are defined
24
+
25
+ Parameters
26
+ ----------
27
+ config_file : str
28
+ TOML file path
29
+ inpath : str
30
+ Input folder directory where L0 files can be found
31
+
32
+ Returns
33
+ -------
34
+ conf : dict
35
+ Configuration dictionary
36
+ """
37
+ conf = toml.load(config_file) # Move all top level keys to nested properties,
38
+ top = [
39
+ _ for _ in conf.keys() if not type(conf[_]) is dict
40
+ ] # if they are not already defined in the nested properties
41
+ subs = [
42
+ _ for _ in conf.keys() if type(conf[_]) is dict
43
+ ] # Insert the section name (config_file) as a file property and config file
44
+ for s in subs:
45
+ for t in top:
46
+ if t not in conf[s].keys():
47
+ conf[s][t] = conf[t]
48
+
49
+ conf[s]["conf"] = config_file
50
+ conf[s]["file"] = os.path.join(inpath, s)
51
+ conf[s]["columns"].extend(default_columns)
52
+
53
+ for t in top:
54
+ conf.pop(t) # Delete all top level keys beause each file
55
+ # should carry all properties with it
56
+ for k in conf.keys(): # Check required fields are present
57
+ for field in ["columns", "station_id", "format", "skiprows"]:
58
+ assert field in conf[k].keys(), field + " not in config keys"
59
+ return conf
60
+
61
+
62
+ def getL0(
63
+ infile: str,
64
+ nodata,
65
+ cols,
66
+ skiprows,
67
+ file_version,
68
+ delimiter=",",
69
+ comment="#",
70
+ time_offset: Optional[float] = None,
71
+ ) -> xr.Dataset:
72
+ """Read L0 data file into pandas DataFrame object
73
+
74
+ Parameters
75
+ ----------
76
+ infile : str
77
+ L0 file path
78
+ nodata : list
79
+ List containing value for nan values and reassigned value
80
+ cols : list
81
+ List of columns in file
82
+ skiprows : int
83
+ Skip rows value
84
+ file_version : int
85
+ Version of L0 file
86
+ delimiter : str
87
+ String delimiter for L0 file
88
+ comment : str
89
+ Notifier of commented sections in L0 file
90
+ time_offset : Optional[float]
91
+ Time offset in hours for correcting for non utc time data.
92
+ Returns
93
+ -------
94
+ ds : xarray.Dataset
95
+ L0 Dataset
96
+ """
97
+ if file_version == 1:
98
+ df = pd.read_csv(
99
+ infile,
100
+ comment=comment,
101
+ index_col=0,
102
+ na_values=nodata,
103
+ names=cols,
104
+ sep=delimiter,
105
+ skiprows=skiprows,
106
+ skip_blank_lines=True,
107
+ usecols=range(len(cols)),
108
+ low_memory=False,
109
+ )
110
+ df["time"] = pd.to_datetime(
111
+ df.year.astype(str)
112
+ + df.doy.astype(str).str.zfill(3)
113
+ + df.hhmm.astype(str).str.zfill(4),
114
+ format="%Y%j%H%M",
115
+ )
116
+ df = df.set_index("time")
117
+
118
+ else:
119
+ df = pd.read_csv(
120
+ infile,
121
+ comment=comment,
122
+ index_col=0,
123
+ na_values=nodata,
124
+ names=cols,
125
+ parse_dates=True,
126
+ sep=delimiter,
127
+ skiprows=skiprows,
128
+ skip_blank_lines=True,
129
+ usecols=range(len(cols)),
130
+ low_memory=False,
131
+ )
132
+ try:
133
+ df.index = pd.to_datetime(df.index)
134
+ except ValueError as e:
135
+ logger.info("\n" + infile)
136
+ logger.info("\nValueError:")
137
+ logger.info(e)
138
+ logger.info("\t\t> Trying pd.to_datetime with format=mixed")
139
+ try:
140
+ df.index = pd.to_datetime(df.index, format="mixed")
141
+ except Exception as e:
142
+ logger.info("\nDateParseError:")
143
+ logger.info(e)
144
+ logger.info(
145
+ "\t\t> Trying again removing apostrophes in timestamp (old files format)"
146
+ )
147
+ df.index = pd.to_datetime(df.index.str.replace('"', ""))
148
+
149
+ if time_offset is not None:
150
+ df.index = df.index + timedelta(hours=time_offset)
151
+
152
+ # Drop SKIP columns
153
+ for c in df.columns:
154
+ if c[0:4] == "SKIP":
155
+ df.drop(columns=c, inplace=True)
156
+
157
+ # Carry relevant metadata with ds
158
+ ds = xr.Dataset.from_dataframe(df)
159
+ ds.attrs["level"] = "L0"
160
+
161
+ return ds
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Mon Jun 10 10:58:39 2024
5
+
6
+ @author: pho
7
+ """
8
+ import logging
9
+ import numpy as np
10
+ import xarray as xr
11
+ from pypromice.process.L1toL2 import calcDirWindSpeeds
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def resample_dataset(ds_h, t):
15
+ '''Resample L2 AWS data, e.g. hourly to daily average. This uses pandas
16
+ DataFrame resampling at the moment as a work-around to the xarray Dataset
17
+ resampling. As stated, xarray resampling is a lengthy process that takes
18
+ ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean()
19
+ This has now been fixed, so needs implementing:
20
+ https://github.com/pydata/xarray/issues/4498#event-6610799698
21
+
22
+ Parameters
23
+ ----------
24
+ ds_h : xarray.Dataset
25
+ L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data)
26
+ t : str
27
+ Resample factor, same variable definition as in
28
+ pandas.DataFrame.resample()
29
+
30
+ Returns
31
+ -------
32
+ ds_d : xarray.Dataset
33
+ L3 AWS dataset resampled to the frequency defined by t
34
+ '''
35
+ df_d = ds_h.to_dataframe().resample(t).mean()
36
+
37
+ # taking the 10 min data and using it as instantaneous values:
38
+ if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
39
+ cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
40
+ for col in cols_to_update:
41
+ df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
42
+ if col == 'p_i':
43
+ df_d[col] = df_d[col].values-1000
44
+
45
+
46
+ # recalculating wind direction from averaged directional wind speeds
47
+ for var in ['wdir_u','wdir_l']:
48
+ boom = var.split('_')[1]
49
+ if var in df_d.columns:
50
+ if ('wspd_x_'+boom in df_d.columns) & ('wspd_y_'+boom in df_d.columns):
51
+ df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom])
52
+ else:
53
+ logger.info(var+' in dataframe but not wspd_x_'+boom+' nor wspd_y_'+boom+', recalculating them')
54
+ ds_h['wspd_x_'+boom], ds_h['wspd_y_'+boom] = calcDirWindSpeeds(ds_h['wspd_'+boom], ds_h['wdir_'+boom])
55
+ df_d[['wspd_x_'+boom, 'wspd_y_'+boom]] = ds_h[['wspd_x_'+boom, 'wspd_y_'+boom]].to_dataframe().resample(t).mean()
56
+ df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom])
57
+
58
+ # recalculating relative humidity from average vapour pressure and average
59
+ # saturation vapor pressure
60
+ for var in ['rh_u','rh_l']:
61
+ lvl = var.split('_')[1]
62
+ if var in df_d.columns:
63
+ if ('t_'+lvl in ds_h.keys()):
64
+ es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl])
65
+ p_vap = ds_h[var] / 100 * es_wtr
66
+
67
+ df_d[var] = (p_vap.to_series().resample(t).mean() \
68
+ / es_wtr.to_series().resample(t).mean())*100
69
+ if var+'_cor' in df_d.keys():
70
+ df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \
71
+ / es_cor.to_series().resample(t).mean())*100
72
+
73
+ # passing each variable attribute to the ressample dataset
74
+ vals = []
75
+ for c in df_d.columns:
76
+ if c in ds_h.data_vars:
77
+ vals.append(xr.DataArray(
78
+ data=df_d[c], dims=['time'],
79
+ coords={'time':df_d.index}, attrs=ds_h[c].attrs))
80
+ else:
81
+ vals.append(xr.DataArray(
82
+ data=df_d[c], dims=['time'],
83
+ coords={'time':df_d.index}, attrs=None))
84
+
85
+ ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
86
+ return ds_d
87
+
88
+
89
+ def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071,
90
+ es_100=1013.246, eps=0.622):
91
+ '''Calculate specific humidity
92
+
93
+ Parameters
94
+ ----------
95
+ T_0 : float
96
+ Steam point temperature. Default is 273.15.
97
+ T_100 : float
98
+ Steam point temperature in Kelvin
99
+ t : xarray.DataArray
100
+ Air temperature
101
+ es_0 : float
102
+ Saturation vapour pressure at the melting point (hPa)
103
+ es_100 : float
104
+ Saturation vapour pressure at steam point temperature (hPa)
105
+
106
+ Returns
107
+ -------
108
+ xarray.DataArray
109
+ Saturation vapour pressure with regard to water above 0 C (hPa)
110
+ xarray.DataArray
111
+ Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa)
112
+ '''
113
+ # Saturation vapour pressure above 0 C (hPa)
114
+ es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0))
115
+ - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1)
116
+ + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100))
117
+
118
+ # Saturation vapour pressure below 0 C (hPa)
119
+ es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654
120
+ * np.log10(T_0 / (t + T_0)) + 0.876793
121
+ * (1 - (t + T_0) / T_0)
122
+ + np.log10(es_0))
123
+
124
+ # Saturation vapour pressure (hPa)
125
+ es_cor = xr.where(t < 0, es_ice, es_wtr)
126
+
127
+ return es_wtr, es_cor
128
+
129
+ def _calcWindDir(wspd_x, wspd_y):
130
+ '''Calculate wind direction in degrees
131
+
132
+ Parameters
133
+ ----------
134
+ wspd_x : xarray.DataArray
135
+ Wind speed in X direction
136
+ wspd_y : xarray.DataArray
137
+ Wind speed in Y direction
138
+
139
+ Returns
140
+ -------
141
+ wdir : xarray.DataArray
142
+ Wind direction'''
143
+ deg2rad = np.pi / 180
144
+ rad2deg = 1 / deg2rad
145
+ wdir = np.arctan2(wspd_x, wspd_y) * rad2deg
146
+ wdir = (wdir + 360) % 360
147
+ return wdir
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Utilities module for data formatting, populating and metadata handling
5
+ """
6
+ import numpy as np
7
+
8
+ def popCols(ds, names):
9
+ '''Populate dataset with all given variable names
10
+
11
+ Parammeters
12
+ -----------
13
+ ds : xr.Dataset
14
+ Dataset
15
+ names : list
16
+ List of variable names to populate
17
+ '''
18
+ for v in names:
19
+ if v not in list(ds.variables):
20
+ ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
21
+ return ds
22
+
23
+ def addBasicMeta(ds, vars_df):
24
+ ''' Use a variable lookup table DataFrame to add the basic metadata
25
+ to the xarray dataset. This is later amended to finalise L3
26
+
27
+ Parameters
28
+ ----------
29
+ ds : xr.Dataset
30
+ Dataset to add metadata to
31
+ vars_df : pd.DataFrame
32
+ Metadata dataframe
33
+
34
+ Returns
35
+ -------
36
+ ds : xr.Dataset
37
+ Dataset with added metadata
38
+ '''
39
+ for v in vars_df.index:
40
+ if v == 'time': continue # coordinate variable, not normal var
41
+ if v not in list(ds.variables): continue
42
+ for c in ['standard_name', 'long_name', 'units']:
43
+ if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue
44
+ ds[v].attrs[c] = vars_df[c][v]
45
+ return ds
46
+
47
+ def populateMeta(ds, conf, skip):
48
+ '''Populate L0 Dataset with metadata dictionary
49
+
50
+ Parameters
51
+ ----------
52
+ ds : xarray.Dataset
53
+ L0 dataset
54
+ conf : dict
55
+ Metadata dictionary
56
+ skip : list
57
+ List of column names to skip parsing to metadata
58
+
59
+ Returns
60
+ -------
61
+ ds : xarray.Dataset
62
+ L0 dataset with metadata populated as Dataset attributes
63
+ '''
64
+ # skip = ["columns", "skiprows"]
65
+ for k in conf.keys():
66
+ if k not in skip: ds.attrs[k] = conf[k]
67
+ return ds
68
+