pypromice 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +6 -1
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1033 -186
  10. pypromice/process/aws.py +130 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +128 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +61 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/METADATA +3 -3
  32. pypromice-1.4.0.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.0.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,214 @@
1
+ #!/usr/bin/env python
2
+ import os, sys, argparse
3
+ import pandas as pd
4
+ import xarray as xr
5
+ import logging
6
+
7
+ logging.basicConfig(
8
+ format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
9
+ level=logging.INFO,
10
+ stream=sys.stdout,
11
+ )
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def extract_metadata_from_nc(file_path: str, data_type: str, label_s_id: str) -> pd.Series:
15
+ """
16
+ Extract metadata from a NetCDF file and return it as a pandas Series.
17
+
18
+ Parameters:
19
+ - file_path (str): The path to the NetCDF file.
20
+ - data_type (str): The type of data ('station' or 'site').
21
+ - label_s_id (str): The label for the station or site ID.
22
+
23
+ Returns:
24
+ - pd.Series: A pandas Series containing the extracted metadata.
25
+ """
26
+ try:
27
+ with xr.open_dataset(file_path) as nc_file:
28
+ # Extract attributes
29
+ s_id = nc_file.attrs.get(label_s_id, 'N/A')
30
+ location_type = nc_file.attrs.get('location_type', 'N/A')
31
+ project = nc_file.attrs.get('project', 'N/A')
32
+ if data_type == 'site':
33
+ stations = nc_file.attrs.get('stations', s_id)
34
+ if data_type == 'station':
35
+ number_of_booms = nc_file.attrs.get('number_of_booms', 'N/A')
36
+
37
+ # Extract the time variable as datetime64
38
+ time_var = nc_file['time'].values.astype('datetime64[s]')
39
+
40
+ # Extract the first and last timestamps
41
+ date_installation_str = pd.Timestamp(time_var[0]).strftime('%Y-%m-%d')
42
+ last_valid_date_str = pd.Timestamp(time_var[-1]).strftime('%Y-%m-%d')
43
+
44
+ # Extract the first and last values of lat, lon, and alt
45
+ lat_installation = nc_file['lat'].isel(time=0).values.item()
46
+ lon_installation = nc_file['lon'].isel(time=0).values.item()
47
+ alt_installation = nc_file['alt'].isel(time=0).values.item()
48
+
49
+ lat_last_known = nc_file['lat'].isel(time=-1).values.item()
50
+ lon_last_known = nc_file['lon'].isel(time=-1).values.item()
51
+ alt_last_known = nc_file['alt'].isel(time=-1).values.item()
52
+
53
+ # Create a pandas Series for the metadata
54
+ if data_type == 'site':
55
+ row = pd.Series({
56
+ 'project': project.replace('\r',''),
57
+ 'location_type': location_type,
58
+ 'stations': stations,
59
+ 'date_installation': date_installation_str,
60
+ 'latitude_installation': lat_installation,
61
+ 'longitude_installation': lon_installation,
62
+ 'altitude_installation': alt_installation,
63
+ 'date_last_valid': last_valid_date_str,
64
+ 'latitude_last_valid': lat_last_known,
65
+ 'longitude_last_valid': lon_last_known,
66
+ 'altitude_last_valid': alt_last_known
67
+ }, name=s_id)
68
+ else:
69
+ row = pd.Series({
70
+ 'project': project.replace('\r',''),
71
+ 'number_of_booms': number_of_booms,
72
+ 'location_type': location_type,
73
+ 'date_installation': date_installation_str,
74
+ 'latitude_installation': lat_installation,
75
+ 'longitude_installation': lon_installation,
76
+ 'altitude_installation': alt_installation,
77
+ 'date_last_valid': last_valid_date_str,
78
+ 'latitude_last_valid': lat_last_known,
79
+ 'longitude_last_valid': lon_last_known,
80
+ 'altitude_last_valid': alt_last_known
81
+ }, name=s_id)
82
+ return row
83
+ except Exception as e:
84
+ logger.info(f"Warning: Error processing {file_path}: {str(e)}")
85
+ return pd.Series() # Return an empty Series in case of an error
86
+
87
+ def process_files(base_dir: str, csv_file_path: str, data_type: str) -> pd.DataFrame:
88
+ """
89
+ Process all files in the base directory to generate new metadata.
90
+
91
+ Parameters:
92
+ - base_dir (str): The base directory containing the NetCDF files.
93
+ - csv_file_path (str): The path to the existing metadata CSV file.
94
+ - data_type (str): The type of data ('station' or 'site').
95
+
96
+ Returns:
97
+ - pd.DataFrame: The combined metadata DataFrame.
98
+ """
99
+ label_s_id = 'station_id' if data_type == 'station' else 'site_id'
100
+
101
+ # Initialize a list to hold the rows (Series) of DataFrame
102
+ rows = []
103
+
104
+ # Read existing metadata if the CSV file exists
105
+ if os.path.exists(csv_file_path) and os.path.getsize(csv_file_path) > 0:
106
+ logger.info("Updating " + str(csv_file_path))
107
+ existing_metadata_df = pd.read_csv(csv_file_path, index_col=label_s_id)
108
+ else:
109
+ logger.info("Creating " + str(csv_file_path))
110
+ existing_metadata_df = pd.DataFrame()
111
+
112
+ # Track updated sites or stations to avoid duplicate updates
113
+ updated_s = []
114
+ new_s = []
115
+
116
+ # Traverse through all the subfolders and files in the base directory
117
+ for subdir, _, files in os.walk(base_dir):
118
+ for file in files:
119
+ if file.endswith('_hour.nc'):
120
+ file_path = os.path.join(subdir, file)
121
+ row = extract_metadata_from_nc(file_path, data_type, label_s_id)
122
+ if not row.empty:
123
+ s_id = row.name
124
+ if s_id in existing_metadata_df.index:
125
+ # Compare with existing metadata
126
+ existing_row = existing_metadata_df.loc[s_id]
127
+ old_date_installation = existing_row['date_installation']
128
+ old_last_valid_date = existing_row['date_last_valid']
129
+
130
+ # Update the existing metadata
131
+ existing_metadata_df.loc[s_id] = row
132
+
133
+ # Print message if dates are updated
134
+ if old_last_valid_date != row['date_last_valid']:
135
+ logger.info(f"Updated {label_s_id}: {s_id} date_last_valid: {old_last_valid_date} --> {row['date_last_valid']}")
136
+
137
+ updated_s.append(s_id)
138
+ else:
139
+ new_s.append(s_id)
140
+ # Append new metadata row to the list
141
+ rows.append(row)
142
+
143
+ # Convert the list of rows to a DataFrame
144
+ new_metadata_df = pd.DataFrame(rows)
145
+
146
+ # Concatenate the existing metadata with the new metadata
147
+ combined_metadata_df = pd.concat([existing_metadata_df, new_metadata_df], ignore_index=False)
148
+
149
+ # Exclude some sites
150
+ sites_to_exclude = [s for s in ['XXX', 'Roof_GEUS', 'Roof_PROMICE'] if s in combined_metadata_df.index]
151
+ excluded_metadata_df = combined_metadata_df.loc[sites_to_exclude].copy()
152
+ combined_metadata_df.drop(sites_to_exclude, inplace=True)
153
+
154
+ # Sort the DataFrame by index (s_id)
155
+ combined_metadata_df.sort_index(inplace=True)
156
+
157
+ # Print excluded lines
158
+ if not excluded_metadata_df.empty:
159
+ pd.set_option('display.max_columns', None) # Show all columns
160
+ pd.set_option('display.max_colwidth', None) # Show full width of columns
161
+ pd.set_option('display.width', None) # Disable line wrapping
162
+ logger.info("\nExcluded lines from combined metadata.csv:")
163
+ print(excluded_metadata_df)
164
+
165
+ # Drop excluded lines from combined_metadata_df
166
+ combined_metadata_df.drop(sites_to_exclude, errors='ignore', inplace=True)
167
+
168
+ # Save to csv
169
+ combined_metadata_df.to_csv(csv_file_path, index_label=label_s_id)
170
+
171
+ return combined_metadata_df, existing_metadata_df, new_s, updated_s
172
+
173
+ def compare_and_log_updates(combined_metadata_df: pd.DataFrame, existing_metadata_df: pd.DataFrame, new_s: list, updated_s: list):
174
+ """
175
+ Compare the combined metadata with the existing metadata and log the updates.
176
+
177
+ Parameters:
178
+ - combined_metadata_df (pd.DataFrame): The combined metadata DataFrame.
179
+ - existing_metadata_df (pd.DataFrame): The existing metadata DataFrame.
180
+ - new_s (list): List of new station/site IDs.
181
+ - updated_s (list): List of updated station/site IDs.
182
+ """
183
+ # Determine which lines were not updated (reused) and which were added
184
+ if not existing_metadata_df.empty:
185
+ reused_s = [s_id for s_id in existing_metadata_df.index if ((s_id not in new_s) & (s_id not in updated_s))]
186
+ reused_lines = existing_metadata_df.loc[reused_s]
187
+ added_lines = combined_metadata_df.loc[combined_metadata_df.index.difference(existing_metadata_df.index)]
188
+
189
+ logger.info("\nLines from the old metadata.csv that are reused (not updated):")
190
+ print(reused_lines)
191
+
192
+ if not added_lines.empty:
193
+ logger.info("\nLines that were not present in the old metadata.csv and are added:")
194
+ print(added_lines)
195
+ else:
196
+ logger.info("\nAll lines are added (no old metadata.csv found)")
197
+
198
+ def main():
199
+ parser = argparse.ArgumentParser(description='Process station or site data.')
200
+ parser.add_argument('-t', '--type', choices=['station', 'site'],
201
+ required=True,
202
+ help='Type of data to process: "station" or "site"')
203
+ parser.add_argument('-r', '--root_dir', required=True, help='Root directory ' +
204
+ 'containing the aws-l3 station or site folder')
205
+ parser.add_argument('-m','--metadata_file', required=True,
206
+ help='File path to metadata csv file (existing or '+
207
+ 'intended output path')
208
+
209
+ args = parser.parse_args()
210
+ combined_metadata_df, existing_metadata_df, new_s, updated_s = process_files(args.root_dir, args.metadata_file, args.type)
211
+ compare_and_log_updates(combined_metadata_df, existing_metadata_df, new_s, updated_s)
212
+
213
+ if __name__ == '__main__':
214
+ main()
@@ -7,7 +7,7 @@ This includes:
7
7
 
8
8
  """
9
9
  import logging
10
- from typing import Optional
10
+ from typing import Optional, Collection
11
11
 
12
12
  import numpy as np
13
13
  import pandas as pd
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
22
22
  def get_latest_data(
23
23
  df: pd.DataFrame,
24
24
  lin_reg_time_limit: str,
25
+ vars_to_skip: Optional[Collection[str]] = None,
25
26
  ) -> Optional[pd.Series]:
26
27
  """
27
28
  Determine instantaneous values for the latest valid timestamp in the input dataframe
@@ -66,16 +67,47 @@ def get_latest_data(
66
67
  lin_reg_time_limit,
67
68
  )
68
69
 
70
+ if last_valid_index not in df_limited.index:
71
+ logger.info("No valid data limited period")
72
+ return None
73
+
69
74
  # Apply smoothing to z_boom_u
70
75
  # require at least 2 hourly obs? Sometimes seeing once/day data for z_boom_u
71
- df_limited = rolling_window(df_limited, "z_boom_u", "72H", 2, 1)
72
-
76
+ df_limited = rolling_window(df_limited, "z_boom_u", "72h", 2, 3)
77
+
73
78
  # limit to single most recent valid row (convert to series)
74
79
  s_current = df_limited.loc[last_valid_index]
75
80
 
81
+ if vars_to_skip is not None:
82
+ s_current = filter_skipped_variables(s_current, vars_to_skip)
83
+
76
84
  return s_current
77
85
 
78
86
 
87
+ def filter_skipped_variables(
88
+ row: pd.Series, vars_to_skip: Collection[str]
89
+ ) -> pd.Series:
90
+ """
91
+ Mutate input series by setting var_to_skip to np.nan
92
+
93
+ Parameters
94
+ ----------
95
+ row
96
+ vars_to_skip
97
+ List of variable names to be skipped
98
+
99
+ Returns
100
+ -------
101
+ Input series
102
+
103
+ """
104
+ vars_to_skip = set(row.keys()) & set(vars_to_skip)
105
+ for var_key in vars_to_skip:
106
+ row[var_key] = np.nan
107
+ logger.info("----> Skipping var: {}".format(var_key))
108
+ return row
109
+
110
+
79
111
  def rolling_window(df, column, window, min_periods, decimals) -> pd.DataFrame:
80
112
  """Apply a rolling window (smoothing) to the input column
81
113
 
@@ -145,9 +177,9 @@ def find_positions(df, time_limit):
145
177
  logger.info(f"last transmission: {df_limited.index.max()}")
146
178
 
147
179
  # Extrapolate recommended for altitude, optional for lat and lon.
148
- df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 6)
149
- df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 6)
150
- df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 1)
180
+ df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 7)
181
+ df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 7)
182
+ df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 4)
151
183
 
152
184
  # If we have no valid lat, lon or alt data in the df_limited window, then interpolate
153
185
  # using full tx dataset.
@@ -158,17 +190,15 @@ def find_positions(df, time_limit):
158
190
  logger.info(f"----> Using full history for linear extrapolation: {k}")
159
191
  logger.info(f"first transmission: {df.index.min()}")
160
192
  if k == "gps_alt":
161
- df, valid = linear_fit(df, k, 1)
193
+ df, valid = linear_fit(df, k, 2)
162
194
  else:
163
- df, valid = linear_fit(df, k, 6)
195
+ df, valid = linear_fit(df, k, 7)
164
196
  check_valid_again[k] = valid
165
197
  if check_valid_again[k] is True:
166
198
  df_limited[f"{k}_fit"] = df.loc[df_limited.index, f"{k}_fit"]
167
199
  else:
168
200
  logger.info(f"----> No data exists for {k}. Stubbing out with NaN.")
169
- df_limited[f"{k}_fit"] = pd.Series(
170
- np.nan, index=df_limited.index
171
- )
201
+ df_limited[f"{k}_fit"] = pd.Series(np.nan, index=df_limited.index)
172
202
 
173
203
  return df_limited
174
204
 
@@ -5,9 +5,9 @@ AWS Level 0 (L0) to Level 1 (L1) data processing
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import xarray as xr
8
- import re
9
-
8
+ import re, logging
10
9
  from pypromice.process.value_clipping import clip_values
10
+ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
@@ -28,9 +28,10 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
28
28
  -------
29
29
  ds : xarray.Dataset
30
30
  Level 1 dataset
31
- '''
31
+ '''
32
32
  assert(type(L0) == xr.Dataset)
33
33
  ds = L0
34
+ ds.attrs['level'] = 'L1'
34
35
 
35
36
  for l in list(ds.keys()):
36
37
  if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
@@ -64,9 +65,15 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
64
65
  if ds['gps_lat'].dtype.kind == 'O': # Decode and reformat GPS information
65
66
  if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
66
67
  ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
68
+ elif 'L' in ds['gps_lat'].dropna(dim='time').values[1]:
69
+ logger.info('Found L in GPS string')
70
+ ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
71
+ for l in ['gps_lat', 'gps_lon']:
72
+ ds[l] = ds[l]/100000
67
73
  else:
68
74
  try:
69
75
  ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time']) # TODO this is a work around specifically for L0 RAW processing for THU_U. Find a way to make this slicker
76
+
70
77
  except:
71
78
  print('Invalid GPS type {ds["gps_lat"].dtype} for decoding')
72
79
 
@@ -179,7 +186,7 @@ def addTimeShift(ds, vars_df):
179
186
  if ds.attrs['logger_type'] == 'CR1000X':
180
187
  # v3, data is hourly all year long
181
188
  # shift everything except instantaneous
182
- df_a = df_a.shift(periods=-1, freq="H")
189
+ df_a = df_a.shift(periods=-1, freq="h")
183
190
  df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
184
191
  df_out = df_out.sort_index()
185
192
  elif ds.attrs['logger_type'] == 'CR1000':
@@ -247,7 +254,7 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
247
254
  rho_af = 1145
248
255
  else:
249
256
  rho_af = np.nan
250
- print('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
257
+ logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
251
258
  f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%')
252
259
  # assert(False)
253
260
 
@@ -3,6 +3,7 @@
3
3
  AWS Level 1 (L1) to Level 2 (L2) data processing
4
4
  """
5
5
  import logging
6
+ from pathlib import Path
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
@@ -23,6 +24,8 @@ logger = logging.getLogger(__name__)
23
24
  def toL2(
24
25
  L1: xr.Dataset,
25
26
  vars_df: pd.DataFrame,
27
+ data_flags_dir: Path,
28
+ data_adjustments_dir: Path,
26
29
  T_0=273.15,
27
30
  ews=1013.246,
28
31
  ei0=6.1071,
@@ -30,7 +33,18 @@ def toL2(
30
33
  eps_clear=9.36508e-6,
31
34
  emissivity=0.97,
32
35
  ) -> xr.Dataset:
33
- '''Process one Level 1 (L1) product to Level 2
36
+ '''Process one Level 1 (L1) product to Level 2.
37
+ In this step we do:
38
+ - manual flagging and adjustments
39
+ - automated QC: persistence, percentile
40
+ - custom filter: gps_alt filter, NaN t_rad removed from dlr & ulr
41
+ - smoothing of tilt and rot
42
+ - calculation of rh with regards to ice in subfreezin conditions
43
+ - calculation of cloud coverage
44
+ - correction of dsr and usr for tilt
45
+ - filtering of dsr based on a theoritical TOA irradiance and grazing light
46
+ - calculation of albedo
47
+ - calculation of directional wind speed
34
48
 
35
49
  Parameters
36
50
  ----------
@@ -59,10 +73,11 @@ def toL2(
59
73
  Level 2 dataset
60
74
  '''
61
75
  ds = L1.copy(deep=True) # Reassign dataset
76
+ ds.attrs['level'] = 'L2'
62
77
  try:
63
- ds = adjustTime(ds) # Adjust time after a user-defined csv files
64
- ds = flagNAN(ds) # Flag NaNs after a user-defined csv files
65
- ds = adjustData(ds) # Adjust data after a user-defined csv files
78
+ ds = adjustTime(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust time after a user-defined csv files
79
+ ds = flagNAN(ds, flag_dir=data_flags_dir.as_posix()) # Flag NaNs after a user-defined csv files
80
+ ds = adjustData(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust data after a user-defined csv files
66
81
  except Exception:
67
82
  logger.exception('Flagging and fixing failed:')
68
83
 
@@ -74,7 +89,7 @@ def toL2(
74
89
 
75
90
  # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation
76
91
  # right now baseline elevation is gapfilled monthly median elevation
77
- baseline_elevation = (ds.gps_alt.to_series().resample('M').median()
92
+ baseline_elevation = (ds.gps_alt.to_series().resample('MS').median()
78
93
  .reindex(ds.time.to_series().index, method='nearest')
79
94
  .ffill().bfill())
80
95
  mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull()
@@ -85,10 +100,20 @@ def toL2(
85
100
  ds['dlr'] = ds.dlr.where(ds.t_rad.notnull())
86
101
  ds['ulr'] = ds.ulr.where(ds.t_rad.notnull())
87
102
 
103
+ # calculating realtive humidity with regard to ice
88
104
  T_100 = _getTempK(T_0)
89
105
  ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'],
90
106
  T_0, T_100, ews, ei0)
91
107
 
108
+ if ds.attrs['number_of_booms']==2:
109
+ ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'],
110
+ T_0, T_100, ews, ei0)
111
+
112
+ if hasattr(ds,'t_i'):
113
+ if ~ds['t_i'].isnull().all():
114
+ ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'],
115
+ T_0, T_100, ews, ei0)
116
+
92
117
  # Determiune cloud cover for on-ice stations
93
118
  cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage
94
119
  ds['dlr'], ds.attrs['station_id'])
@@ -176,22 +201,52 @@ def toL2(
176
201
  ds['precip_u_cor'], ds['precip_u_rate'] = correctPrecip(ds['precip_u'],
177
202
  ds['wspd_u'])
178
203
  if ds.attrs['number_of_booms']==2:
179
- ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], # Correct relative humidity
180
- T_0, T_100, ews, ei0)
181
-
182
204
  if ~ds['precip_l'].isnull().all() and precip_flag: # Correct precipitation
183
205
  ds['precip_l_cor'], ds['precip_l_rate']= correctPrecip(ds['precip_l'],
184
206
  ds['wspd_l'])
185
207
 
186
- if hasattr(ds,'t_i'):
187
- if ~ds['t_i'].isnull().all(): # Instantaneous msg processing
188
- ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], # Correct relative humidity
189
- T_0, T_100, ews, ei0)
208
+ # Get directional wind speed
209
+ ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0)
210
+ ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u'])
211
+
212
+ if ds.attrs['number_of_booms']==2:
213
+ ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0)
214
+ ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l'])
215
+
216
+ if hasattr(ds, 'wdir_i'):
217
+ if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all():
218
+ ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0)
219
+ ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i'])
220
+
190
221
 
191
222
  ds = clip_values(ds, vars_df)
192
223
  return ds
193
224
 
194
225
 
226
+ def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180):
227
+ '''Calculate directional wind speed from wind speed and direction
228
+
229
+ Parameters
230
+ ----------
231
+ wspd : xr.Dataarray
232
+ Wind speed data array
233
+ wdir : xr.Dataarray
234
+ Wind direction data array
235
+ deg2rad : float
236
+ Degree to radians coefficient. The default is np.pi/180
237
+
238
+ Returns
239
+ -------
240
+ wspd_x : xr.Dataarray
241
+ Wind speed in X direction
242
+ wspd_y : xr.Datarray
243
+ Wind speed in Y direction
244
+ '''
245
+ wspd_x = wspd * np.sin(wdir * deg2rad)
246
+ wspd_y = wspd * np.cos(wdir * deg2rad)
247
+ return wspd_x, wspd_y
248
+
249
+
195
250
  def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id):
196
251
  '''Calculate cloud cover from T and T_0
197
252
 
@@ -275,7 +330,7 @@ def smoothTilt(da: xr.DataArray, threshold=0.2):
275
330
  # we calculate the moving standard deviation over a 3-day sliding window
276
331
  # hourly resampling is necessary to make sure the same threshold can be used
277
332
  # for 10 min and hourly data
278
- moving_std_gap_filled = da.to_series().resample('H').median().rolling(
333
+ moving_std_gap_filled = da.to_series().resample('h').median().rolling(
279
334
  3*24, center=True, min_periods=2
280
335
  ).std().reindex(da.time, method='bfill').values
281
336
  # we select the good timestamps and gapfill assuming that
@@ -302,7 +357,7 @@ def smoothRot(da: xr.DataArray, threshold=4):
302
357
  xarray.DataArray
303
358
  smoothed rotation measurements from inclinometer
304
359
  '''
305
- moving_std_gap_filled = da.to_series().resample('H').median().rolling(
360
+ moving_std_gap_filled = da.to_series().resample('h').median().rolling(
306
361
  3*24, center=True, min_periods=2
307
362
  ).std().reindex(da.time, method='bfill').values
308
363
  # same as for tilt with, in addition: