pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pypromice might be problematic. Click here for more details.
- pypromice/get/get.py +19 -19
- pypromice/postprocess/bufr_to_csv.py +6 -1
- pypromice/postprocess/bufr_utilities.py +91 -18
- pypromice/postprocess/create_bufr_files.py +178 -0
- pypromice/postprocess/get_bufr.py +248 -397
- pypromice/postprocess/make_metadata_csv.py +214 -0
- pypromice/postprocess/real_time_utilities.py +41 -11
- pypromice/process/L0toL1.py +12 -5
- pypromice/process/L1toL2.py +159 -30
- pypromice/process/L2toL3.py +1034 -187
- pypromice/process/aws.py +131 -752
- pypromice/process/get_l2.py +90 -0
- pypromice/process/get_l2tol3.py +111 -0
- pypromice/process/join_l2.py +112 -0
- pypromice/process/join_l3.py +551 -120
- pypromice/process/load.py +161 -0
- pypromice/process/resample.py +128 -0
- pypromice/process/utilities.py +68 -0
- pypromice/process/write.py +503 -0
- pypromice/qc/github_data_issues.py +10 -16
- pypromice/qc/percentiles/thresholds.csv +2 -2
- pypromice/qc/persistence.py +71 -25
- pypromice/resources/__init__.py +28 -0
- pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
- pypromice/resources/variable_aliases_GC-Net.csv +78 -0
- pypromice/resources/variables.csv +106 -0
- pypromice/station_configuration.py +118 -0
- pypromice/tx/get_l0tx.py +7 -4
- pypromice/tx/payload_formats.csv +1 -0
- pypromice/tx/tx.py +27 -6
- pypromice/utilities/__init__.py +0 -0
- pypromice/utilities/git.py +61 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
- pypromice-1.4.0.dist-info/RECORD +53 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
- pypromice-1.4.0.dist-info/entry_points.txt +13 -0
- pypromice/postprocess/station_configurations.toml +0 -762
- pypromice/process/get_l3.py +0 -46
- pypromice/process/variables.csv +0 -92
- pypromice/qc/persistence_test.py +0 -150
- pypromice/test/test_config1.toml +0 -69
- pypromice/test/test_config2.toml +0 -54
- pypromice/test/test_email +0 -75
- pypromice/test/test_payload_formats.csv +0 -4
- pypromice/test/test_payload_types.csv +0 -7
- pypromice/test/test_percentile.py +0 -229
- pypromice/test/test_raw1.txt +0 -4468
- pypromice/test/test_raw_DataTable2.txt +0 -11167
- pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
- pypromice/test/test_raw_transmitted1.txt +0 -15411
- pypromice/test/test_raw_transmitted2.txt +0 -28
- pypromice-1.3.5.dist-info/RECORD +0 -53
- pypromice-1.3.5.dist-info/entry_points.txt +0 -8
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
- {pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
import os, sys, argparse
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import xarray as xr
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(
|
|
8
|
+
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
|
|
9
|
+
level=logging.INFO,
|
|
10
|
+
stream=sys.stdout,
|
|
11
|
+
)
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
def extract_metadata_from_nc(file_path: str, data_type: str, label_s_id: str) -> pd.Series:
|
|
15
|
+
"""
|
|
16
|
+
Extract metadata from a NetCDF file and return it as a pandas Series.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
- file_path (str): The path to the NetCDF file.
|
|
20
|
+
- data_type (str): The type of data ('station' or 'site').
|
|
21
|
+
- label_s_id (str): The label for the station or site ID.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
- pd.Series: A pandas Series containing the extracted metadata.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
with xr.open_dataset(file_path) as nc_file:
|
|
28
|
+
# Extract attributes
|
|
29
|
+
s_id = nc_file.attrs.get(label_s_id, 'N/A')
|
|
30
|
+
location_type = nc_file.attrs.get('location_type', 'N/A')
|
|
31
|
+
project = nc_file.attrs.get('project', 'N/A')
|
|
32
|
+
if data_type == 'site':
|
|
33
|
+
stations = nc_file.attrs.get('stations', s_id)
|
|
34
|
+
if data_type == 'station':
|
|
35
|
+
number_of_booms = nc_file.attrs.get('number_of_booms', 'N/A')
|
|
36
|
+
|
|
37
|
+
# Extract the time variable as datetime64
|
|
38
|
+
time_var = nc_file['time'].values.astype('datetime64[s]')
|
|
39
|
+
|
|
40
|
+
# Extract the first and last timestamps
|
|
41
|
+
date_installation_str = pd.Timestamp(time_var[0]).strftime('%Y-%m-%d')
|
|
42
|
+
last_valid_date_str = pd.Timestamp(time_var[-1]).strftime('%Y-%m-%d')
|
|
43
|
+
|
|
44
|
+
# Extract the first and last values of lat, lon, and alt
|
|
45
|
+
lat_installation = nc_file['lat'].isel(time=0).values.item()
|
|
46
|
+
lon_installation = nc_file['lon'].isel(time=0).values.item()
|
|
47
|
+
alt_installation = nc_file['alt'].isel(time=0).values.item()
|
|
48
|
+
|
|
49
|
+
lat_last_known = nc_file['lat'].isel(time=-1).values.item()
|
|
50
|
+
lon_last_known = nc_file['lon'].isel(time=-1).values.item()
|
|
51
|
+
alt_last_known = nc_file['alt'].isel(time=-1).values.item()
|
|
52
|
+
|
|
53
|
+
# Create a pandas Series for the metadata
|
|
54
|
+
if data_type == 'site':
|
|
55
|
+
row = pd.Series({
|
|
56
|
+
'project': project.replace('\r',''),
|
|
57
|
+
'location_type': location_type,
|
|
58
|
+
'stations': stations,
|
|
59
|
+
'date_installation': date_installation_str,
|
|
60
|
+
'latitude_installation': lat_installation,
|
|
61
|
+
'longitude_installation': lon_installation,
|
|
62
|
+
'altitude_installation': alt_installation,
|
|
63
|
+
'date_last_valid': last_valid_date_str,
|
|
64
|
+
'latitude_last_valid': lat_last_known,
|
|
65
|
+
'longitude_last_valid': lon_last_known,
|
|
66
|
+
'altitude_last_valid': alt_last_known
|
|
67
|
+
}, name=s_id)
|
|
68
|
+
else:
|
|
69
|
+
row = pd.Series({
|
|
70
|
+
'project': project.replace('\r',''),
|
|
71
|
+
'number_of_booms': number_of_booms,
|
|
72
|
+
'location_type': location_type,
|
|
73
|
+
'date_installation': date_installation_str,
|
|
74
|
+
'latitude_installation': lat_installation,
|
|
75
|
+
'longitude_installation': lon_installation,
|
|
76
|
+
'altitude_installation': alt_installation,
|
|
77
|
+
'date_last_valid': last_valid_date_str,
|
|
78
|
+
'latitude_last_valid': lat_last_known,
|
|
79
|
+
'longitude_last_valid': lon_last_known,
|
|
80
|
+
'altitude_last_valid': alt_last_known
|
|
81
|
+
}, name=s_id)
|
|
82
|
+
return row
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.info(f"Warning: Error processing {file_path}: {str(e)}")
|
|
85
|
+
return pd.Series() # Return an empty Series in case of an error
|
|
86
|
+
|
|
87
|
+
def process_files(base_dir: str, csv_file_path: str, data_type: str) -> pd.DataFrame:
|
|
88
|
+
"""
|
|
89
|
+
Process all files in the base directory to generate new metadata.
|
|
90
|
+
|
|
91
|
+
Parameters:
|
|
92
|
+
- base_dir (str): The base directory containing the NetCDF files.
|
|
93
|
+
- csv_file_path (str): The path to the existing metadata CSV file.
|
|
94
|
+
- data_type (str): The type of data ('station' or 'site').
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
- pd.DataFrame: The combined metadata DataFrame.
|
|
98
|
+
"""
|
|
99
|
+
label_s_id = 'station_id' if data_type == 'station' else 'site_id'
|
|
100
|
+
|
|
101
|
+
# Initialize a list to hold the rows (Series) of DataFrame
|
|
102
|
+
rows = []
|
|
103
|
+
|
|
104
|
+
# Read existing metadata if the CSV file exists
|
|
105
|
+
if os.path.exists(csv_file_path) and os.path.getsize(csv_file_path) > 0:
|
|
106
|
+
logger.info("Updating " + str(csv_file_path))
|
|
107
|
+
existing_metadata_df = pd.read_csv(csv_file_path, index_col=label_s_id)
|
|
108
|
+
else:
|
|
109
|
+
logger.info("Creating " + str(csv_file_path))
|
|
110
|
+
existing_metadata_df = pd.DataFrame()
|
|
111
|
+
|
|
112
|
+
# Track updated sites or stations to avoid duplicate updates
|
|
113
|
+
updated_s = []
|
|
114
|
+
new_s = []
|
|
115
|
+
|
|
116
|
+
# Traverse through all the subfolders and files in the base directory
|
|
117
|
+
for subdir, _, files in os.walk(base_dir):
|
|
118
|
+
for file in files:
|
|
119
|
+
if file.endswith('_hour.nc'):
|
|
120
|
+
file_path = os.path.join(subdir, file)
|
|
121
|
+
row = extract_metadata_from_nc(file_path, data_type, label_s_id)
|
|
122
|
+
if not row.empty:
|
|
123
|
+
s_id = row.name
|
|
124
|
+
if s_id in existing_metadata_df.index:
|
|
125
|
+
# Compare with existing metadata
|
|
126
|
+
existing_row = existing_metadata_df.loc[s_id]
|
|
127
|
+
old_date_installation = existing_row['date_installation']
|
|
128
|
+
old_last_valid_date = existing_row['date_last_valid']
|
|
129
|
+
|
|
130
|
+
# Update the existing metadata
|
|
131
|
+
existing_metadata_df.loc[s_id] = row
|
|
132
|
+
|
|
133
|
+
# Print message if dates are updated
|
|
134
|
+
if old_last_valid_date != row['date_last_valid']:
|
|
135
|
+
logger.info(f"Updated {label_s_id}: {s_id} date_last_valid: {old_last_valid_date} --> {row['date_last_valid']}")
|
|
136
|
+
|
|
137
|
+
updated_s.append(s_id)
|
|
138
|
+
else:
|
|
139
|
+
new_s.append(s_id)
|
|
140
|
+
# Append new metadata row to the list
|
|
141
|
+
rows.append(row)
|
|
142
|
+
|
|
143
|
+
# Convert the list of rows to a DataFrame
|
|
144
|
+
new_metadata_df = pd.DataFrame(rows)
|
|
145
|
+
|
|
146
|
+
# Concatenate the existing metadata with the new metadata
|
|
147
|
+
combined_metadata_df = pd.concat([existing_metadata_df, new_metadata_df], ignore_index=False)
|
|
148
|
+
|
|
149
|
+
# Exclude some sites
|
|
150
|
+
sites_to_exclude = [s for s in ['XXX', 'Roof_GEUS', 'Roof_PROMICE'] if s in combined_metadata_df.index]
|
|
151
|
+
excluded_metadata_df = combined_metadata_df.loc[sites_to_exclude].copy()
|
|
152
|
+
combined_metadata_df.drop(sites_to_exclude, inplace=True)
|
|
153
|
+
|
|
154
|
+
# Sort the DataFrame by index (s_id)
|
|
155
|
+
combined_metadata_df.sort_index(inplace=True)
|
|
156
|
+
|
|
157
|
+
# Print excluded lines
|
|
158
|
+
if not excluded_metadata_df.empty:
|
|
159
|
+
pd.set_option('display.max_columns', None) # Show all columns
|
|
160
|
+
pd.set_option('display.max_colwidth', None) # Show full width of columns
|
|
161
|
+
pd.set_option('display.width', None) # Disable line wrapping
|
|
162
|
+
logger.info("\nExcluded lines from combined metadata.csv:")
|
|
163
|
+
print(excluded_metadata_df)
|
|
164
|
+
|
|
165
|
+
# Drop excluded lines from combined_metadata_df
|
|
166
|
+
combined_metadata_df.drop(sites_to_exclude, errors='ignore', inplace=True)
|
|
167
|
+
|
|
168
|
+
# Save to csv
|
|
169
|
+
combined_metadata_df.to_csv(csv_file_path, index_label=label_s_id)
|
|
170
|
+
|
|
171
|
+
return combined_metadata_df, existing_metadata_df, new_s, updated_s
|
|
172
|
+
|
|
173
|
+
def compare_and_log_updates(combined_metadata_df: pd.DataFrame, existing_metadata_df: pd.DataFrame, new_s: list, updated_s: list):
|
|
174
|
+
"""
|
|
175
|
+
Compare the combined metadata with the existing metadata and log the updates.
|
|
176
|
+
|
|
177
|
+
Parameters:
|
|
178
|
+
- combined_metadata_df (pd.DataFrame): The combined metadata DataFrame.
|
|
179
|
+
- existing_metadata_df (pd.DataFrame): The existing metadata DataFrame.
|
|
180
|
+
- new_s (list): List of new station/site IDs.
|
|
181
|
+
- updated_s (list): List of updated station/site IDs.
|
|
182
|
+
"""
|
|
183
|
+
# Determine which lines were not updated (reused) and which were added
|
|
184
|
+
if not existing_metadata_df.empty:
|
|
185
|
+
reused_s = [s_id for s_id in existing_metadata_df.index if ((s_id not in new_s) & (s_id not in updated_s))]
|
|
186
|
+
reused_lines = existing_metadata_df.loc[reused_s]
|
|
187
|
+
added_lines = combined_metadata_df.loc[combined_metadata_df.index.difference(existing_metadata_df.index)]
|
|
188
|
+
|
|
189
|
+
logger.info("\nLines from the old metadata.csv that are reused (not updated):")
|
|
190
|
+
print(reused_lines)
|
|
191
|
+
|
|
192
|
+
if not added_lines.empty:
|
|
193
|
+
logger.info("\nLines that were not present in the old metadata.csv and are added:")
|
|
194
|
+
print(added_lines)
|
|
195
|
+
else:
|
|
196
|
+
logger.info("\nAll lines are added (no old metadata.csv found)")
|
|
197
|
+
|
|
198
|
+
def main():
|
|
199
|
+
parser = argparse.ArgumentParser(description='Process station or site data.')
|
|
200
|
+
parser.add_argument('-t', '--type', choices=['station', 'site'],
|
|
201
|
+
required=True,
|
|
202
|
+
help='Type of data to process: "station" or "site"')
|
|
203
|
+
parser.add_argument('-r', '--root_dir', required=True, help='Root directory ' +
|
|
204
|
+
'containing the aws-l3 station or site folder')
|
|
205
|
+
parser.add_argument('-m','--metadata_file', required=True,
|
|
206
|
+
help='File path to metadata csv file (existing or '+
|
|
207
|
+
'intended output path')
|
|
208
|
+
|
|
209
|
+
args = parser.parse_args()
|
|
210
|
+
combined_metadata_df, existing_metadata_df, new_s, updated_s = process_files(args.root_dir, args.metadata_file, args.type)
|
|
211
|
+
compare_and_log_updates(combined_metadata_df, existing_metadata_df, new_s, updated_s)
|
|
212
|
+
|
|
213
|
+
if __name__ == '__main__':
|
|
214
|
+
main()
|
|
@@ -7,7 +7,7 @@ This includes:
|
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
9
|
import logging
|
|
10
|
-
from typing import Optional
|
|
10
|
+
from typing import Optional, Collection
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import pandas as pd
|
|
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
|
|
|
22
22
|
def get_latest_data(
|
|
23
23
|
df: pd.DataFrame,
|
|
24
24
|
lin_reg_time_limit: str,
|
|
25
|
+
vars_to_skip: Optional[Collection[str]] = None,
|
|
25
26
|
) -> Optional[pd.Series]:
|
|
26
27
|
"""
|
|
27
28
|
Determine instantaneous values for the latest valid timestamp in the input dataframe
|
|
@@ -66,16 +67,47 @@ def get_latest_data(
|
|
|
66
67
|
lin_reg_time_limit,
|
|
67
68
|
)
|
|
68
69
|
|
|
70
|
+
if last_valid_index not in df_limited.index:
|
|
71
|
+
logger.info("No valid data limited period")
|
|
72
|
+
return None
|
|
73
|
+
|
|
69
74
|
# Apply smoothing to z_boom_u
|
|
70
75
|
# require at least 2 hourly obs? Sometimes seeing once/day data for z_boom_u
|
|
71
|
-
df_limited = rolling_window(df_limited, "z_boom_u", "
|
|
72
|
-
|
|
76
|
+
df_limited = rolling_window(df_limited, "z_boom_u", "72h", 2, 3)
|
|
77
|
+
|
|
73
78
|
# limit to single most recent valid row (convert to series)
|
|
74
79
|
s_current = df_limited.loc[last_valid_index]
|
|
75
80
|
|
|
81
|
+
if vars_to_skip is not None:
|
|
82
|
+
s_current = filter_skipped_variables(s_current, vars_to_skip)
|
|
83
|
+
|
|
76
84
|
return s_current
|
|
77
85
|
|
|
78
86
|
|
|
87
|
+
def filter_skipped_variables(
|
|
88
|
+
row: pd.Series, vars_to_skip: Collection[str]
|
|
89
|
+
) -> pd.Series:
|
|
90
|
+
"""
|
|
91
|
+
Mutate input series by setting var_to_skip to np.nan
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
row
|
|
96
|
+
vars_to_skip
|
|
97
|
+
List of variable names to be skipped
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
Input series
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
vars_to_skip = set(row.keys()) & set(vars_to_skip)
|
|
105
|
+
for var_key in vars_to_skip:
|
|
106
|
+
row[var_key] = np.nan
|
|
107
|
+
logger.info("----> Skipping var: {}".format(var_key))
|
|
108
|
+
return row
|
|
109
|
+
|
|
110
|
+
|
|
79
111
|
def rolling_window(df, column, window, min_periods, decimals) -> pd.DataFrame:
|
|
80
112
|
"""Apply a rolling window (smoothing) to the input column
|
|
81
113
|
|
|
@@ -145,9 +177,9 @@ def find_positions(df, time_limit):
|
|
|
145
177
|
logger.info(f"last transmission: {df_limited.index.max()}")
|
|
146
178
|
|
|
147
179
|
# Extrapolate recommended for altitude, optional for lat and lon.
|
|
148
|
-
df_limited, lat_valid = linear_fit(df_limited, "gps_lat",
|
|
149
|
-
df_limited, lon_valid = linear_fit(df_limited, "gps_lon",
|
|
150
|
-
df_limited, alt_valid = linear_fit(df_limited, "gps_alt",
|
|
180
|
+
df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 7)
|
|
181
|
+
df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 7)
|
|
182
|
+
df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 4)
|
|
151
183
|
|
|
152
184
|
# If we have no valid lat, lon or alt data in the df_limited window, then interpolate
|
|
153
185
|
# using full tx dataset.
|
|
@@ -158,17 +190,15 @@ def find_positions(df, time_limit):
|
|
|
158
190
|
logger.info(f"----> Using full history for linear extrapolation: {k}")
|
|
159
191
|
logger.info(f"first transmission: {df.index.min()}")
|
|
160
192
|
if k == "gps_alt":
|
|
161
|
-
df, valid = linear_fit(df, k,
|
|
193
|
+
df, valid = linear_fit(df, k, 2)
|
|
162
194
|
else:
|
|
163
|
-
df, valid = linear_fit(df, k,
|
|
195
|
+
df, valid = linear_fit(df, k, 7)
|
|
164
196
|
check_valid_again[k] = valid
|
|
165
197
|
if check_valid_again[k] is True:
|
|
166
198
|
df_limited[f"{k}_fit"] = df.loc[df_limited.index, f"{k}_fit"]
|
|
167
199
|
else:
|
|
168
200
|
logger.info(f"----> No data exists for {k}. Stubbing out with NaN.")
|
|
169
|
-
df_limited[f"{k}_fit"] = pd.Series(
|
|
170
|
-
np.nan, index=df_limited.index
|
|
171
|
-
)
|
|
201
|
+
df_limited[f"{k}_fit"] = pd.Series(np.nan, index=df_limited.index)
|
|
172
202
|
|
|
173
203
|
return df_limited
|
|
174
204
|
|
pypromice/process/L0toL1.py
CHANGED
|
@@ -5,9 +5,9 @@ AWS Level 0 (L0) to Level 1 (L1) data processing
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import xarray as xr
|
|
8
|
-
import re
|
|
9
|
-
|
|
8
|
+
import re, logging
|
|
10
9
|
from pypromice.process.value_clipping import clip_values
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
|
|
@@ -28,9 +28,10 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
|
|
|
28
28
|
-------
|
|
29
29
|
ds : xarray.Dataset
|
|
30
30
|
Level 1 dataset
|
|
31
|
-
'''
|
|
31
|
+
'''
|
|
32
32
|
assert(type(L0) == xr.Dataset)
|
|
33
33
|
ds = L0
|
|
34
|
+
ds.attrs['level'] = 'L1'
|
|
34
35
|
|
|
35
36
|
for l in list(ds.keys()):
|
|
36
37
|
if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
|
|
@@ -64,9 +65,15 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
|
|
|
64
65
|
if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
|
|
65
66
|
if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
|
|
66
67
|
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
|
|
68
|
+
elif 'L' in ds['gps_lat'].dropna(dim='time').values[1]:
|
|
69
|
+
logger.info('Found L in GPS string')
|
|
70
|
+
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
|
|
71
|
+
for l in ['gps_lat', 'gps_lon']:
|
|
72
|
+
ds[l] = ds[l]/100000
|
|
67
73
|
else:
|
|
68
74
|
try:
|
|
69
75
|
ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time']) # TODO this is a work around specifically for L0 RAW processing for THU_U. Find a way to make this slicker
|
|
76
|
+
|
|
70
77
|
except:
|
|
71
78
|
print('Invalid GPS type {ds["gps_lat"].dtype} for decoding')
|
|
72
79
|
|
|
@@ -179,7 +186,7 @@ def addTimeShift(ds, vars_df):
|
|
|
179
186
|
if ds.attrs['logger_type'] == 'CR1000X':
|
|
180
187
|
# v3, data is hourly all year long
|
|
181
188
|
# shift everything except instantaneous
|
|
182
|
-
df_a = df_a.shift(periods=-1, freq="
|
|
189
|
+
df_a = df_a.shift(periods=-1, freq="h")
|
|
183
190
|
df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
|
|
184
191
|
df_out = df_out.sort_index()
|
|
185
192
|
elif ds.attrs['logger_type'] == 'CR1000':
|
|
@@ -247,7 +254,7 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
|
|
|
247
254
|
rho_af = 1145
|
|
248
255
|
else:
|
|
249
256
|
rho_af = np.nan
|
|
250
|
-
|
|
257
|
+
logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
|
|
251
258
|
f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%')
|
|
252
259
|
# assert(False)
|
|
253
260
|
|
pypromice/process/L1toL2.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
AWS Level 1 (L1) to Level 2 (L2) data processing
|
|
4
4
|
"""
|
|
5
5
|
import logging
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
import pandas as pd
|
|
@@ -23,6 +24,8 @@ logger = logging.getLogger(__name__)
|
|
|
23
24
|
def toL2(
|
|
24
25
|
L1: xr.Dataset,
|
|
25
26
|
vars_df: pd.DataFrame,
|
|
27
|
+
data_flags_dir: Path,
|
|
28
|
+
data_adjustments_dir: Path,
|
|
26
29
|
T_0=273.15,
|
|
27
30
|
ews=1013.246,
|
|
28
31
|
ei0=6.1071,
|
|
@@ -30,7 +33,18 @@ def toL2(
|
|
|
30
33
|
eps_clear=9.36508e-6,
|
|
31
34
|
emissivity=0.97,
|
|
32
35
|
) -> xr.Dataset:
|
|
33
|
-
'''Process one Level 1 (L1) product to Level 2
|
|
36
|
+
'''Process one Level 1 (L1) product to Level 2.
|
|
37
|
+
In this step we do:
|
|
38
|
+
- manual flagging and adjustments
|
|
39
|
+
- automated QC: persistence, percentile
|
|
40
|
+
- custom filter: gps_alt filter, NaN t_rad removed from dlr & ulr
|
|
41
|
+
- smoothing of tilt and rot
|
|
42
|
+
- calculation of rh with regards to ice in subfreezin conditions
|
|
43
|
+
- calculation of cloud coverage
|
|
44
|
+
- correction of dsr and usr for tilt
|
|
45
|
+
- filtering of dsr based on a theoritical TOA irradiance and grazing light
|
|
46
|
+
- calculation of albedo
|
|
47
|
+
- calculation of directional wind speed
|
|
34
48
|
|
|
35
49
|
Parameters
|
|
36
50
|
----------
|
|
@@ -59,32 +73,52 @@ def toL2(
|
|
|
59
73
|
Level 2 dataset
|
|
60
74
|
'''
|
|
61
75
|
ds = L1.copy(deep=True) # Reassign dataset
|
|
76
|
+
ds.attrs['level'] = 'L2'
|
|
62
77
|
try:
|
|
63
|
-
ds = adjustTime(ds)
|
|
64
|
-
ds = flagNAN(ds)
|
|
65
|
-
ds = adjustData(ds)
|
|
78
|
+
ds = adjustTime(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust time after a user-defined csv files
|
|
79
|
+
ds = flagNAN(ds, flag_dir=data_flags_dir.as_posix()) # Flag NaNs after a user-defined csv files
|
|
80
|
+
ds = adjustData(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust data after a user-defined csv files
|
|
66
81
|
except Exception:
|
|
67
82
|
logger.exception('Flagging and fixing failed:')
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
84
|
+
ds = persistence_qc(ds) # Flag and remove persistence outliers
|
|
85
|
+
# if ds.attrs['format'] == 'TX':
|
|
86
|
+
# # TODO: The configuration should be provided explicitly
|
|
87
|
+
# outlier_detector = ThresholdBasedOutlierDetector.default()
|
|
88
|
+
# ds = outlier_detector.filter_data(ds) # Flag and remove percentile outliers
|
|
89
|
+
|
|
90
|
+
# filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation
|
|
91
|
+
# right now baseline elevation is gapfilled monthly median elevation
|
|
92
|
+
baseline_elevation = (ds.gps_alt.to_series().resample('MS').median()
|
|
93
|
+
.reindex(ds.time.to_series().index, method='nearest')
|
|
94
|
+
.ffill().bfill())
|
|
95
|
+
mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull()
|
|
96
|
+
ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask)
|
|
97
|
+
|
|
98
|
+
# removing dlr and ulr that are missing t_rad
|
|
99
|
+
# this is done now becasue t_rad can be filtered either manually or with persistence
|
|
100
|
+
ds['dlr'] = ds.dlr.where(ds.t_rad.notnull())
|
|
101
|
+
ds['ulr'] = ds.ulr.where(ds.t_rad.notnull())
|
|
102
|
+
|
|
103
|
+
# calculating realtive humidity with regard to ice
|
|
75
104
|
T_100 = _getTempK(T_0)
|
|
76
105
|
ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'],
|
|
77
106
|
T_0, T_100, ews, ei0)
|
|
78
107
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
ds['dlr'], ds.attrs['station_id'])
|
|
83
|
-
ds['cc'] = (('time'), cc.data)
|
|
84
|
-
else:
|
|
85
|
-
# Default cloud cover for bedrock station for which tilt should be 0 anyway.
|
|
86
|
-
cc = 0.8
|
|
108
|
+
if ds.attrs['number_of_booms']==2:
|
|
109
|
+
ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'],
|
|
110
|
+
T_0, T_100, ews, ei0)
|
|
87
111
|
|
|
112
|
+
if hasattr(ds,'t_i'):
|
|
113
|
+
if ~ds['t_i'].isnull().all():
|
|
114
|
+
ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'],
|
|
115
|
+
T_0, T_100, ews, ei0)
|
|
116
|
+
|
|
117
|
+
# Determiune cloud cover for on-ice stations
|
|
118
|
+
cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage
|
|
119
|
+
ds['dlr'], ds.attrs['station_id'])
|
|
120
|
+
ds['cc'] = (('time'), cc.data)
|
|
121
|
+
|
|
88
122
|
# Determine surface temperature
|
|
89
123
|
ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'], # Calculate surface temperature
|
|
90
124
|
emissivity)
|
|
@@ -102,6 +136,11 @@ def toL2(
|
|
|
102
136
|
else:
|
|
103
137
|
lat = ds['gps_lat'].mean()
|
|
104
138
|
lon = ds['gps_lon'].mean()
|
|
139
|
+
|
|
140
|
+
# smoothing tilt and rot
|
|
141
|
+
ds['tilt_x'] = smoothTilt(ds['tilt_x'])
|
|
142
|
+
ds['tilt_y'] = smoothTilt(ds['tilt_y'])
|
|
143
|
+
ds['rot'] = smoothRot(ds['rot'])
|
|
105
144
|
|
|
106
145
|
deg2rad, rad2deg = _getRotation() # Get degree-radian conversions
|
|
107
146
|
phi_sensor_rad, theta_sensor_rad = calcTilt(ds['tilt_x'], ds['tilt_y'], # Calculate station tilt
|
|
@@ -112,13 +151,15 @@ def toL2(
|
|
|
112
151
|
ZenithAngle_rad, ZenithAngle_deg = calcZenith(lat, Declination_rad, # Calculate zenith
|
|
113
152
|
HourAngle_rad, deg2rad,
|
|
114
153
|
rad2deg)
|
|
115
|
-
|
|
154
|
+
|
|
155
|
+
|
|
116
156
|
# Correct Downwelling shortwave radiation
|
|
117
157
|
DifFrac = 0.2 + 0.8 * cc
|
|
118
158
|
CorFac_all = calcCorrectionFactor(Declination_rad, phi_sensor_rad, # Calculate correction
|
|
119
159
|
theta_sensor_rad, HourAngle_rad,
|
|
120
160
|
ZenithAngle_rad, ZenithAngle_deg,
|
|
121
161
|
lat, DifFrac, deg2rad)
|
|
162
|
+
CorFac_all = xr.where(ds['cc'].notnull(), CorFac_all, 1)
|
|
122
163
|
ds['dsr_cor'] = ds['dsr'].copy(deep=True) * CorFac_all # Apply correction
|
|
123
164
|
|
|
124
165
|
AngleDif_deg = calcAngleDiff(ZenithAngle_rad, HourAngle_rad, # Calculate angle between sun and sensor
|
|
@@ -145,9 +186,9 @@ def toL2(
|
|
|
145
186
|
TOA_crit_nopass = (ds['dsr_cor'] > (0.9 * isr_toa + 10)) # Determine filter
|
|
146
187
|
ds['dsr_cor'][TOA_crit_nopass] = np.nan # Apply filter and interpolate
|
|
147
188
|
ds['usr_cor'][TOA_crit_nopass] = np.nan
|
|
148
|
-
|
|
149
|
-
ds['
|
|
150
|
-
|
|
189
|
+
|
|
190
|
+
ds['dsr_cor'] = ds.dsr_cor.where(ds.dsr.notnull())
|
|
191
|
+
ds['usr_cor'] = ds.usr_cor.where(ds.usr.notnull())
|
|
151
192
|
# # Check sun position
|
|
152
193
|
# sundown = ZenithAngle_deg >= 90
|
|
153
194
|
# _checkSunPos(ds, OKalbedos, sundown, sunonlowerdome, TOA_crit_nopass)
|
|
@@ -160,22 +201,52 @@ def toL2(
|
|
|
160
201
|
ds['precip_u_cor'], ds['precip_u_rate'] = correctPrecip(ds['precip_u'],
|
|
161
202
|
ds['wspd_u'])
|
|
162
203
|
if ds.attrs['number_of_booms']==2:
|
|
163
|
-
ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], # Correct relative humidity
|
|
164
|
-
T_0, T_100, ews, ei0)
|
|
165
|
-
|
|
166
204
|
if ~ds['precip_l'].isnull().all() and precip_flag: # Correct precipitation
|
|
167
205
|
ds['precip_l_cor'], ds['precip_l_rate']= correctPrecip(ds['precip_l'],
|
|
168
206
|
ds['wspd_l'])
|
|
169
207
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
208
|
+
# Get directional wind speed
|
|
209
|
+
ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0)
|
|
210
|
+
ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u'])
|
|
211
|
+
|
|
212
|
+
if ds.attrs['number_of_booms']==2:
|
|
213
|
+
ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0)
|
|
214
|
+
ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l'])
|
|
215
|
+
|
|
216
|
+
if hasattr(ds, 'wdir_i'):
|
|
217
|
+
if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all():
|
|
218
|
+
ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0)
|
|
219
|
+
ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i'])
|
|
220
|
+
|
|
174
221
|
|
|
175
222
|
ds = clip_values(ds, vars_df)
|
|
176
223
|
return ds
|
|
177
224
|
|
|
178
225
|
|
|
226
|
+
def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180):
|
|
227
|
+
'''Calculate directional wind speed from wind speed and direction
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
wspd : xr.Dataarray
|
|
232
|
+
Wind speed data array
|
|
233
|
+
wdir : xr.Dataarray
|
|
234
|
+
Wind direction data array
|
|
235
|
+
deg2rad : float
|
|
236
|
+
Degree to radians coefficient. The default is np.pi/180
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
wspd_x : xr.Dataarray
|
|
241
|
+
Wind speed in X direction
|
|
242
|
+
wspd_y : xr.Datarray
|
|
243
|
+
Wind speed in Y direction
|
|
244
|
+
'''
|
|
245
|
+
wspd_x = wspd * np.sin(wdir * deg2rad)
|
|
246
|
+
wspd_y = wspd * np.cos(wdir * deg2rad)
|
|
247
|
+
return wspd_x, wspd_y
|
|
248
|
+
|
|
249
|
+
|
|
179
250
|
def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id):
|
|
180
251
|
'''Calculate cloud cover from T and T_0
|
|
181
252
|
|
|
@@ -241,6 +312,65 @@ def calcSurfaceTemperature(T_0, ulr, dlr, emissivity):
|
|
|
241
312
|
return t_surf
|
|
242
313
|
|
|
243
314
|
|
|
315
|
+
def smoothTilt(da: xr.DataArray, threshold=0.2):
|
|
316
|
+
'''Smooth the station tilt
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
da : xarray.DataArray
|
|
321
|
+
either X or Y tilt inclinometer measurements
|
|
322
|
+
threshold : float
|
|
323
|
+
threshold used in a standrad.-deviation based filter
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
xarray.DataArray
|
|
328
|
+
either X or Y smoothed tilt inclinometer measurements
|
|
329
|
+
'''
|
|
330
|
+
# we calculate the moving standard deviation over a 3-day sliding window
|
|
331
|
+
# hourly resampling is necessary to make sure the same threshold can be used
|
|
332
|
+
# for 10 min and hourly data
|
|
333
|
+
moving_std_gap_filled = da.to_series().resample('h').median().rolling(
|
|
334
|
+
3*24, center=True, min_periods=2
|
|
335
|
+
).std().reindex(da.time, method='bfill').values
|
|
336
|
+
# we select the good timestamps and gapfill assuming that
|
|
337
|
+
# - when tilt goes missing the last available value is used
|
|
338
|
+
# - when tilt is not available for the very first time steps, the first
|
|
339
|
+
# good value is used for backfill
|
|
340
|
+
return da.where(
|
|
341
|
+
moving_std_gap_filled < threshold
|
|
342
|
+
).ffill(dim='time').bfill(dim='time')
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def smoothRot(da: xr.DataArray, threshold=4):
|
|
346
|
+
'''Smooth the station rotation
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
da : xarray.DataArray
|
|
351
|
+
rotation measurements from inclinometer
|
|
352
|
+
threshold : float
|
|
353
|
+
threshold used in a standrad-deviation based filter
|
|
354
|
+
|
|
355
|
+
Returns
|
|
356
|
+
-------
|
|
357
|
+
xarray.DataArray
|
|
358
|
+
smoothed rotation measurements from inclinometer
|
|
359
|
+
'''
|
|
360
|
+
moving_std_gap_filled = da.to_series().resample('h').median().rolling(
|
|
361
|
+
3*24, center=True, min_periods=2
|
|
362
|
+
).std().reindex(da.time, method='bfill').values
|
|
363
|
+
# same as for tilt with, in addition:
|
|
364
|
+
# - a resampling to daily values
|
|
365
|
+
# - a two week median smoothing
|
|
366
|
+
# - a resampling from these daily values to the original temporal resolution
|
|
367
|
+
return ('time', (da.where(moving_std_gap_filled <4).ffill(dim='time')
|
|
368
|
+
.to_series().resample('D').median()
|
|
369
|
+
.rolling(7*2,center=True,min_periods=2).median()
|
|
370
|
+
.reindex(da.time, method='bfill').values
|
|
371
|
+
))
|
|
372
|
+
|
|
373
|
+
|
|
244
374
|
def calcTilt(tilt_x, tilt_y, deg2rad):
|
|
245
375
|
'''Calculate station tilt
|
|
246
376
|
|
|
@@ -323,7 +453,6 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f
|
|
|
323
453
|
|
|
324
454
|
# Set to Groff & Gratch values when freezing, otherwise just rh
|
|
325
455
|
rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice))
|
|
326
|
-
rh_cor = rh_cor.where(T.notnull())
|
|
327
456
|
return rh_cor
|
|
328
457
|
|
|
329
458
|
|