PyPI - pypromice - Versions diffs - 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

pypromice 1.3.5py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pypromice might be problematic. Click here for more details.

Files changed (55) hide show

pypromice/get/get.py +19 -19
pypromice/postprocess/bufr_to_csv.py +6 -1
pypromice/postprocess/bufr_utilities.py +91 -18
pypromice/postprocess/create_bufr_files.py +178 -0
pypromice/postprocess/get_bufr.py +248 -397
pypromice/postprocess/make_metadata_csv.py +214 -0
pypromice/postprocess/real_time_utilities.py +41 -11
pypromice/process/L0toL1.py +12 -5
pypromice/process/L1toL2.py +159 -30
pypromice/process/L2toL3.py +1034 -187
pypromice/process/aws.py +131 -752
pypromice/process/get_l2.py +90 -0
pypromice/process/get_l2tol3.py +111 -0
pypromice/process/join_l2.py +112 -0
pypromice/process/join_l3.py +551 -120
pypromice/process/load.py +161 -0
pypromice/process/resample.py +128 -0
pypromice/process/utilities.py +68 -0
pypromice/process/write.py +503 -0
pypromice/qc/github_data_issues.py +10 -16
pypromice/qc/percentiles/thresholds.csv +2 -2
pypromice/qc/persistence.py +71 -25
pypromice/resources/__init__.py +28 -0
pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
pypromice/resources/variable_aliases_GC-Net.csv +78 -0
pypromice/resources/variables.csv +106 -0
pypromice/station_configuration.py +118 -0
pypromice/tx/get_l0tx.py +7 -4
pypromice/tx/payload_formats.csv +1 -0
pypromice/tx/tx.py +27 -6
pypromice/utilities/__init__.py +0 -0
pypromice/utilities/git.py +61 -0
{pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/METADATA +12 -21
pypromice-1.4.0.dist-info/RECORD +53 -0
{pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/WHEEL +1 -1
pypromice-1.4.0.dist-info/entry_points.txt +13 -0
pypromice/postprocess/station_configurations.toml +0 -762
pypromice/process/get_l3.py +0 -46
pypromice/process/variables.csv +0 -92
pypromice/qc/persistence_test.py +0 -150
pypromice/test/test_config1.toml +0 -69
pypromice/test/test_config2.toml +0 -54
pypromice/test/test_email +0 -75
pypromice/test/test_payload_formats.csv +0 -4
pypromice/test/test_payload_types.csv +0 -7
pypromice/test/test_percentile.py +0 -229
pypromice/test/test_raw1.txt +0 -4468
pypromice/test/test_raw_DataTable2.txt +0 -11167
pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
pypromice/test/test_raw_transmitted1.txt +0 -15411
pypromice/test/test_raw_transmitted2.txt +0 -28
pypromice-1.3.5.dist-info/RECORD +0 -53
pypromice-1.3.5.dist-info/entry_points.txt +0 -8
{pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/LICENSE.txt +0 -0
{pypromice-1.3.5.dist-info → pypromice-1.4.0.dist-info}/top_level.txt +0 -0

pypromice/postprocess/make_metadata_csv.py ADDED Viewed

@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+import os, sys, argparse
+import pandas as pd
+import xarray as xr
+import logging
+logging.basicConfig(
+    format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
+    level=logging.INFO,
+    stream=sys.stdout,
+)
+logger = logging.getLogger(__name__)
+def extract_metadata_from_nc(file_path: str, data_type: str, label_s_id: str) -> pd.Series:
+    """
+    Extract metadata from a NetCDF file and return it as a pandas Series.
+    Parameters:
+    - file_path (str): The path to the NetCDF file.
+    - data_type (str): The type of data ('station' or 'site').
+    - label_s_id (str): The label for the station or site ID.
+    Returns:
+    - pd.Series: A pandas Series containing the extracted metadata.
+    """
+    try:
+        with xr.open_dataset(file_path) as nc_file:
+            # Extract attributes
+            s_id = nc_file.attrs.get(label_s_id, 'N/A')
+            location_type = nc_file.attrs.get('location_type', 'N/A')
+            project = nc_file.attrs.get('project', 'N/A')
+            if data_type == 'site':
+                stations = nc_file.attrs.get('stations', s_id)
+            if data_type == 'station':
+                number_of_booms = nc_file.attrs.get('number_of_booms', 'N/A')
+            # Extract the time variable as datetime64
+            time_var = nc_file['time'].values.astype('datetime64[s]')
+            # Extract the first and last timestamps
+            date_installation_str = pd.Timestamp(time_var[0]).strftime('%Y-%m-%d')
+            last_valid_date_str = pd.Timestamp(time_var[-1]).strftime('%Y-%m-%d')
+            # Extract the first and last values of lat, lon, and alt
+            lat_installation = nc_file['lat'].isel(time=0).values.item()
+            lon_installation = nc_file['lon'].isel(time=0).values.item()
+            alt_installation = nc_file['alt'].isel(time=0).values.item()
+            lat_last_known = nc_file['lat'].isel(time=-1).values.item()
+            lon_last_known = nc_file['lon'].isel(time=-1).values.item()
+            alt_last_known = nc_file['alt'].isel(time=-1).values.item()
+            # Create a pandas Series for the metadata
+            if data_type == 'site':
+                row = pd.Series({
+                    'project': project.replace('\r',''),
+                    'location_type': location_type,
+                    'stations': stations,
+                    'date_installation': date_installation_str,
+                    'latitude_installation': lat_installation,
+                    'longitude_installation': lon_installation,
+                    'altitude_installation': alt_installation,
+                    'date_last_valid': last_valid_date_str,
+                    'latitude_last_valid': lat_last_known,
+                    'longitude_last_valid': lon_last_known,
+                    'altitude_last_valid': alt_last_known
+                }, name=s_id)
+            else:
+                row = pd.Series({
+                    'project': project.replace('\r',''),
+                    'number_of_booms': number_of_booms,
+                    'location_type': location_type,
+                    'date_installation': date_installation_str,
+                    'latitude_installation': lat_installation,
+                    'longitude_installation': lon_installation,
+                    'altitude_installation': alt_installation,
+                    'date_last_valid': last_valid_date_str,
+                    'latitude_last_valid': lat_last_known,
+                    'longitude_last_valid': lon_last_known,
+                    'altitude_last_valid': alt_last_known
+                }, name=s_id)
+            return row
+    except Exception as e:
+        logger.info(f"Warning: Error processing {file_path}: {str(e)}")
+        return pd.Series()  # Return an empty Series in case of an error
+def process_files(base_dir: str, csv_file_path: str, data_type: str) -> pd.DataFrame:
+    """
+    Process all files in the base directory to generate new metadata.
+    Parameters:
+    - base_dir (str): The base directory containing the NetCDF files.
+    - csv_file_path (str): The path to the existing metadata CSV file.
+    - data_type (str): The type of data ('station' or 'site').
+    Returns:
+    - pd.DataFrame: The combined metadata DataFrame.
+    """
+    label_s_id = 'station_id' if data_type == 'station' else 'site_id'
+    # Initialize a list to hold the rows (Series) of DataFrame
+    rows = []
+    # Read existing metadata if the CSV file exists
+    if os.path.exists(csv_file_path) and os.path.getsize(csv_file_path) > 0:
+        logger.info("Updating " + str(csv_file_path))
+        existing_metadata_df = pd.read_csv(csv_file_path, index_col=label_s_id)
+    else:
+        logger.info("Creating " + str(csv_file_path))
+        existing_metadata_df = pd.DataFrame()
+    # Track updated sites or stations to avoid duplicate updates
+    updated_s = []
+    new_s = []
+    # Traverse through all the subfolders and files in the base directory
+    for subdir, _, files in os.walk(base_dir):
+        for file in files:
+            if file.endswith('_hour.nc'):
+                file_path = os.path.join(subdir, file)
+                row = extract_metadata_from_nc(file_path, data_type, label_s_id)
+                if not row.empty:
+                    s_id = row.name
+                    if s_id in existing_metadata_df.index:
+                        # Compare with existing metadata
+                        existing_row = existing_metadata_df.loc[s_id]
+                        old_date_installation = existing_row['date_installation']
+                        old_last_valid_date = existing_row['date_last_valid']
+                        # Update the existing metadata
+                        existing_metadata_df.loc[s_id] = row
+                        # Print message if dates are updated
+                        if old_last_valid_date != row['date_last_valid']:
+                            logger.info(f"Updated {label_s_id}: {s_id} date_last_valid: {old_last_valid_date} --> {row['date_last_valid']}")
+                        updated_s.append(s_id)
+                    else:
+                        new_s.append(s_id)
+                        # Append new metadata row to the list
+                        rows.append(row)
+    # Convert the list of rows to a DataFrame
+    new_metadata_df = pd.DataFrame(rows)
+    # Concatenate the existing metadata with the new metadata
+    combined_metadata_df = pd.concat([existing_metadata_df, new_metadata_df], ignore_index=False)
+    # Exclude some sites
+    sites_to_exclude = [s for s in ['XXX', 'Roof_GEUS', 'Roof_PROMICE'] if s in combined_metadata_df.index]
+    excluded_metadata_df = combined_metadata_df.loc[sites_to_exclude].copy()
+    combined_metadata_df.drop(sites_to_exclude, inplace=True)
+    # Sort the DataFrame by index (s_id)
+    combined_metadata_df.sort_index(inplace=True)
+    # Print excluded lines
+    if not excluded_metadata_df.empty:
+        pd.set_option('display.max_columns', None)  # Show all columns
+        pd.set_option('display.max_colwidth', None) # Show full width of columns
+        pd.set_option('display.width', None)        # Disable line wrapping
+        logger.info("\nExcluded lines from combined metadata.csv:")
+        print(excluded_metadata_df)
+    # Drop excluded lines from combined_metadata_df
+    combined_metadata_df.drop(sites_to_exclude, errors='ignore', inplace=True)
+    # Save to csv
+    combined_metadata_df.to_csv(csv_file_path, index_label=label_s_id)
+    return combined_metadata_df, existing_metadata_df, new_s, updated_s
+def compare_and_log_updates(combined_metadata_df: pd.DataFrame, existing_metadata_df: pd.DataFrame, new_s: list, updated_s: list):
+    """
+    Compare the combined metadata with the existing metadata and log the updates.
+    Parameters:
+    - combined_metadata_df (pd.DataFrame): The combined metadata DataFrame.
+    - existing_metadata_df (pd.DataFrame): The existing metadata DataFrame.
+    - new_s (list): List of new station/site IDs.
+    - updated_s (list): List of updated station/site IDs.
+    """
+    # Determine which lines were not updated (reused) and which were added
+    if not existing_metadata_df.empty:
+        reused_s = [s_id for s_id in existing_metadata_df.index if ((s_id not in new_s) & (s_id not in updated_s))]
+        reused_lines = existing_metadata_df.loc[reused_s]
+        added_lines = combined_metadata_df.loc[combined_metadata_df.index.difference(existing_metadata_df.index)]
+        logger.info("\nLines from the old metadata.csv that are reused (not updated):")
+        print(reused_lines)
+        if not added_lines.empty:
+            logger.info("\nLines that were not present in the old metadata.csv and are added:")
+            print(added_lines)
+    else:
+        logger.info("\nAll lines are added (no old metadata.csv found)")
+def main():
+    parser = argparse.ArgumentParser(description='Process station or site data.')
+    parser.add_argument('-t', '--type', choices=['station', 'site'],
+                        required=True,
+                        help='Type of data to process: "station" or "site"')
+    parser.add_argument('-r', '--root_dir', required=True, help='Root directory ' +
+                        'containing the aws-l3 station or site folder')
+    parser.add_argument('-m','--metadata_file', required=True,
+                        help='File path to metadata csv file (existing or '+
+                        'intended output path')
+    args = parser.parse_args()
+    combined_metadata_df, existing_metadata_df, new_s, updated_s = process_files(args.root_dir, args.metadata_file, args.type)
+    compare_and_log_updates(combined_metadata_df, existing_metadata_df, new_s, updated_s)
+if __name__ == '__main__':
+    main()

pypromice/postprocess/real_time_utilities.py CHANGED Viewed

@@ -7,7 +7,7 @@ This includes:
 """
 import logging
-from typing import Optional
+from typing import Optional, Collection
 import numpy as np
 import pandas as pd
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
 def get_latest_data(
     df: pd.DataFrame,
     lin_reg_time_limit: str,
+    vars_to_skip: Optional[Collection[str]] = None,
 ) -> Optional[pd.Series]:
     """
     Determine instantaneous values for the latest valid timestamp in the input dataframe
@@ -66,16 +67,47 @@ def get_latest_data(
         lin_reg_time_limit,
     )
+    if last_valid_index not in df_limited.index:
+        logger.info("No valid data limited period")
+        return None
     # Apply smoothing to z_boom_u
     # require at least 2 hourly obs? Sometimes seeing once/day data for z_boom_u
-    df_limited = rolling_window(df_limited, "z_boom_u", "72H", 2, 1)
+    df_limited = rolling_window(df_limited, "z_boom_u", "72h", 2, 3)
     # limit to single most recent valid row (convert to series)
     s_current = df_limited.loc[last_valid_index]
+    if vars_to_skip is not None:
+        s_current = filter_skipped_variables(s_current, vars_to_skip)
     return s_current
+def filter_skipped_variables(
+    row: pd.Series, vars_to_skip: Collection[str]
+) -> pd.Series:
+    """
+    Mutate input series by setting var_to_skip to np.nan
+    Parameters
+    ----------
+    row
+    vars_to_skip
+        List of variable names to be skipped
+    Returns
+    -------
+    Input series
+    """
+    vars_to_skip = set(row.keys()) & set(vars_to_skip)
+    for var_key in vars_to_skip:
+        row[var_key] = np.nan
+        logger.info("----> Skipping var: {}".format(var_key))
+    return row
 def rolling_window(df, column, window, min_periods, decimals) -> pd.DataFrame:
     """Apply a rolling window (smoothing) to the input column
@@ -145,9 +177,9 @@ def find_positions(df, time_limit):
     logger.info(f"last transmission: {df_limited.index.max()}")
     # Extrapolate recommended for altitude, optional for lat and lon.
-    df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 6)
-    df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 6)
-    df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 1)
+    df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 7)
+    df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 7)
+    df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 4)
     # If we have no valid lat, lon or alt data in the df_limited window, then interpolate
     # using full tx dataset.
@@ -158,17 +190,15 @@ def find_positions(df, time_limit):
             logger.info(f"----> Using full history for linear extrapolation: {k}")
             logger.info(f"first transmission: {df.index.min()}")
             if k == "gps_alt":
-                df, valid = linear_fit(df, k, 1)
+                df, valid = linear_fit(df, k, 2)
             else:
-                df, valid = linear_fit(df, k, 6)
+                df, valid = linear_fit(df, k, 7)
             check_valid_again[k] = valid
             if check_valid_again[k] is True:
                 df_limited[f"{k}_fit"] = df.loc[df_limited.index, f"{k}_fit"]
             else:
                 logger.info(f"----> No data exists for {k}. Stubbing out with NaN.")
-                df_limited[f"{k}_fit"] = pd.Series(
-                    np.nan, index=df_limited.index
-                )
+                df_limited[f"{k}_fit"] = pd.Series(np.nan, index=df_limited.index)
     return df_limited

pypromice/process/L0toL1.py CHANGED Viewed

@@ -5,9 +5,9 @@ AWS Level 0 (L0) to Level 1 (L1) data processing
 import numpy as np
 import pandas as pd
 import xarray as xr
-import re
+import re, logging
 from pypromice.process.value_clipping import clip_values
+logger = logging.getLogger(__name__)
 def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
@@ -28,9 +28,10 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
     -------
     ds : xarray.Dataset
         Level 1 dataset
-    '''
+    '''
     assert(type(L0) == xr.Dataset)
     ds = L0
+    ds.attrs['level'] = 'L1'
     for l in list(ds.keys()):
         if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']:
@@ -64,9 +65,15 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
     if ds['gps_lat'].dtype.kind == 'O':                                        # Decode and reformat GPS information
         if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
             ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
+        elif 'L' in ds['gps_lat'].dropna(dim='time').values[1]:
+            logger.info('Found L in GPS string')
+            ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])
+            for l in ['gps_lat', 'gps_lon']:
+                ds[l] = ds[l]/100000
         else:
             try:
                 ds = decodeGPS(ds, ['gps_lat','gps_lon','gps_time'])          # TODO this is a work around specifically for L0 RAW processing for THU_U. Find a way to make this slicker
             except:
                 print('Invalid GPS type {ds["gps_lat"].dtype} for decoding')
@@ -179,7 +186,7 @@ def addTimeShift(ds, vars_df):
         if ds.attrs['logger_type'] == 'CR1000X':
             # v3, data is hourly all year long
             # shift everything except instantaneous
-            df_a = df_a.shift(periods=-1, freq="H")
+            df_a = df_a.shift(periods=-1, freq="h")
             df_out = pd.concat([df_a, df_i], axis=1) # different columns, same datetime indices
             df_out = df_out.sort_index()
         elif ds.attrs['logger_type'] == 'CR1000':
@@ -247,7 +254,7 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
         rho_af = 1145
     else:
         rho_af = np.nan
-        print('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
+        logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' +
               f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%')
         # assert(False)

pypromice/process/L1toL2.py CHANGED Viewed

@@ -3,6 +3,7 @@
 AWS Level 1 (L1) to Level 2 (L2) data processing
 """
 import logging
+from pathlib import Path
 import numpy as np
 import pandas as pd
@@ -23,6 +24,8 @@ logger = logging.getLogger(__name__)
 def toL2(
     L1: xr.Dataset,
     vars_df: pd.DataFrame,
+    data_flags_dir: Path,
+    data_adjustments_dir: Path,
     T_0=273.15,
     ews=1013.246,
     ei0=6.1071,
@@ -30,7 +33,18 @@ def toL2(
     eps_clear=9.36508e-6,
     emissivity=0.97,
 ) -> xr.Dataset:
-    '''Process one Level 1 (L1) product to Level 2
+    '''Process one Level 1 (L1) product to Level 2.
+    In this step we do:
+        - manual flagging and adjustments
+        - automated QC: persistence, percentile
+        - custom filter: gps_alt filter, NaN t_rad removed from dlr & ulr
+        - smoothing of tilt and rot
+        - calculation of rh with regards to ice in subfreezin conditions
+        - calculation of cloud coverage
+        - correction of dsr and usr for tilt
+        - filtering of dsr based on a theoritical TOA irradiance and grazing light
+        - calculation of albedo
+        - calculation of directional wind speed
     Parameters
     ----------
@@ -59,32 +73,52 @@ def toL2(
         Level 2 dataset
     '''
     ds = L1.copy(deep=True)                                                    # Reassign dataset
+    ds.attrs['level'] = 'L2'
     try:
-        ds = adjustTime(ds)                                                    # Adjust time after a user-defined csv files
-        ds = flagNAN(ds)                                                       # Flag NaNs after a user-defined csv files
-        ds = adjustData(ds)                                                    # Adjust data after a user-defined csv files
+        ds = adjustTime(ds, adj_dir=data_adjustments_dir.as_posix())       # Adjust time after a user-defined csv files
+        ds = flagNAN(ds, flag_dir=data_flags_dir.as_posix())             # Flag NaNs after a user-defined csv files
+        ds = adjustData(ds, adj_dir=data_adjustments_dir.as_posix())       # Adjust data after a user-defined csv files
     except Exception:
         logger.exception('Flagging and fixing failed:')
-    if ds.attrs['format'] == 'TX':
-        ds = persistence_qc(ds)                                               # Flag and remove persistence outliers
-        # TODO: The configuration should be provided explicitly
-        outlier_detector = ThresholdBasedOutlierDetector.default()
-        ds = outlier_detector.filter_data(ds)                                 # Flag and remove percentile outliers
+    ds = persistence_qc(ds)                                               # Flag and remove persistence outliers
+    # if ds.attrs['format'] == 'TX':
+    #     # TODO: The configuration should be provided explicitly
+    #     outlier_detector = ThresholdBasedOutlierDetector.default()
+    #     ds = outlier_detector.filter_data(ds)                                 # Flag and remove percentile outliers
+    # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation
+    # right now baseline elevation is gapfilled monthly median elevation
+    baseline_elevation = (ds.gps_alt.to_series().resample('MS').median()
+                          .reindex(ds.time.to_series().index, method='nearest')
+                          .ffill().bfill())
+    mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull()
+    ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask)
+    # removing dlr and ulr that are missing t_rad
+    # this is done now becasue t_rad can be filtered either manually or with persistence
+    ds['dlr'] = ds.dlr.where(ds.t_rad.notnull())
+    ds['ulr'] = ds.ulr.where(ds.t_rad.notnull())
+    # calculating realtive humidity with regard to ice
     T_100 = _getTempK(T_0)
     ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'],
                                      T_0, T_100, ews, ei0)
-    # Determiune cloud cover for on-ice stations
-    if not ds.attrs['bedrock']:
-        cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear,        # Calculate cloud coverage
-                               ds['dlr'], ds.attrs['station_id'])
-        ds['cc'] = (('time'), cc.data)
-    else:
-        # Default cloud cover for bedrock station for which tilt should be 0 anyway.
-        cc = 0.8
+    if ds.attrs['number_of_booms']==2:
+        ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'],
+                                         T_0, T_100, ews, ei0)
+    if hasattr(ds,'t_i'):
+        if ~ds['t_i'].isnull().all():
+            ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'],
+                                             T_0, T_100, ews, ei0)
+    # Determiune cloud cover for on-ice stations
+    cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear,        # Calculate cloud coverage
+                           ds['dlr'], ds.attrs['station_id'])
+    ds['cc'] = (('time'), cc.data)
     # Determine surface temperature
     ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'],           # Calculate surface temperature
                                           emissivity)
@@ -102,6 +136,11 @@ def toL2(
     else:
         lat = ds['gps_lat'].mean()
         lon = ds['gps_lon'].mean()
+    # smoothing tilt and rot
+    ds['tilt_x'] = smoothTilt(ds['tilt_x'])
+    ds['tilt_y'] = smoothTilt(ds['tilt_y'])
+    ds['rot'] = smoothRot(ds['rot'])
     deg2rad, rad2deg = _getRotation()                                          # Get degree-radian conversions
     phi_sensor_rad, theta_sensor_rad = calcTilt(ds['tilt_x'], ds['tilt_y'],    # Calculate station tilt
@@ -112,13 +151,15 @@ def toL2(
     ZenithAngle_rad, ZenithAngle_deg = calcZenith(lat, Declination_rad,        # Calculate zenith
                                                   HourAngle_rad, deg2rad,
                                                   rad2deg)
     # Correct Downwelling shortwave radiation
     DifFrac = 0.2 + 0.8 * cc
     CorFac_all = calcCorrectionFactor(Declination_rad, phi_sensor_rad,         # Calculate correction
                                       theta_sensor_rad, HourAngle_rad,
                                       ZenithAngle_rad, ZenithAngle_deg,
                                       lat, DifFrac, deg2rad)
+    CorFac_all = xr.where(ds['cc'].notnull(), CorFac_all, 1)
     ds['dsr_cor'] = ds['dsr'].copy(deep=True) * CorFac_all                     # Apply correction
     AngleDif_deg = calcAngleDiff(ZenithAngle_rad, HourAngle_rad,               # Calculate angle between sun and sensor
@@ -145,9 +186,9 @@ def toL2(
     TOA_crit_nopass = (ds['dsr_cor'] > (0.9 * isr_toa + 10))                   # Determine filter
     ds['dsr_cor'][TOA_crit_nopass] = np.nan                                    # Apply filter and interpolate
     ds['usr_cor'][TOA_crit_nopass] = np.nan
-    ds['dsr_cor'] = ds['dsr_cor'].interpolate_na(dim='time', use_coordinate=False)
-    ds['usr_cor'] = ds['usr_cor'].interpolate_na(dim='time', use_coordinate=False)
+    ds['dsr_cor'] = ds.dsr_cor.where(ds.dsr.notnull())
+    ds['usr_cor'] = ds.usr_cor.where(ds.usr.notnull())
     # # Check sun position
     # sundown = ZenithAngle_deg >= 90
     # _checkSunPos(ds, OKalbedos, sundown, sunonlowerdome, TOA_crit_nopass)
@@ -160,22 +201,52 @@ def toL2(
         ds['precip_u_cor'], ds['precip_u_rate'] = correctPrecip(ds['precip_u'],
                                                                 ds['wspd_u'])
     if ds.attrs['number_of_booms']==2:
-        ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'],           # Correct relative humidity
-                                         T_0, T_100, ews, ei0)
         if ~ds['precip_l'].isnull().all() and precip_flag:                     # Correct precipitation
             ds['precip_l_cor'], ds['precip_l_rate']= correctPrecip(ds['precip_l'],
                                                                    ds['wspd_l'])
-    if hasattr(ds,'t_i'):
-        if ~ds['t_i'].isnull().all():                                          # Instantaneous msg processing
-            ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'],       # Correct relative humidity
-                                             T_0, T_100, ews, ei0)
+    # Get directional wind speed
+    ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0)
+    ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u'])
+    if ds.attrs['number_of_booms']==2:
+        ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0)
+        ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l'])
+    if hasattr(ds, 'wdir_i'):
+        if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all():
+            ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0)
+            ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i'])
     ds = clip_values(ds, vars_df)
     return ds
+def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180):
+    '''Calculate directional wind speed from wind speed and direction
+    Parameters
+    ----------
+    wspd : xr.Dataarray
+        Wind speed data array
+    wdir : xr.Dataarray
+        Wind direction data array
+    deg2rad : float
+        Degree to radians coefficient. The default is np.pi/180
+    Returns
+    -------
+    wspd_x : xr.Dataarray
+        Wind speed in X direction
+    wspd_y : xr.Datarray
+        Wind speed in Y direction
+    '''
+    wspd_x = wspd * np.sin(wdir * deg2rad)
+    wspd_y = wspd * np.cos(wdir * deg2rad)
+    return wspd_x, wspd_y
 def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id):
     '''Calculate cloud cover from T and T_0
@@ -241,6 +312,65 @@ def calcSurfaceTemperature(T_0, ulr, dlr, emissivity):
     return t_surf
+def smoothTilt(da: xr.DataArray, threshold=0.2):
+    '''Smooth the station tilt
+    Parameters
+    ----------
+    da : xarray.DataArray
+        either X or Y tilt inclinometer measurements
+    threshold : float
+        threshold used in a standrad.-deviation based filter
+    Returns
+    -------
+    xarray.DataArray
+        either X or Y smoothed tilt inclinometer measurements
+    '''
+    # we calculate the moving standard deviation over a 3-day sliding window
+    # hourly resampling is necessary to make sure the same threshold can be used
+    # for 10 min and hourly data
+    moving_std_gap_filled = da.to_series().resample('h').median().rolling(
+                    3*24, center=True, min_periods=2
+                    ).std().reindex(da.time, method='bfill').values
+    # we select the good timestamps and gapfill assuming that
+    # - when tilt goes missing the last available value is used
+    # - when tilt is not available for the very first time steps, the first
+    #   good value is used for backfill
+    return da.where(
+                moving_std_gap_filled < threshold
+                ).ffill(dim='time').bfill(dim='time')
+def smoothRot(da: xr.DataArray, threshold=4):
+    '''Smooth the station rotation
+    Parameters
+    ----------
+    da : xarray.DataArray
+        rotation measurements from inclinometer
+    threshold : float
+        threshold used in a standrad-deviation based filter
+    Returns
+    -------
+    xarray.DataArray
+        smoothed rotation measurements from inclinometer
+    '''
+    moving_std_gap_filled = da.to_series().resample('h').median().rolling(
+                    3*24, center=True, min_periods=2
+                    ).std().reindex(da.time, method='bfill').values
+    # same as for tilt with, in addition:
+    #     - a resampling to daily values
+    #     - a two week median smoothing
+    #     - a resampling from these daily values to the original temporal resolution
+    return ('time', (da.where(moving_std_gap_filled <4).ffill(dim='time')
+            .to_series().resample('D').median()
+            .rolling(7*2,center=True,min_periods=2).median()
+            .reindex(da.time, method='bfill').values
+            ))
 def calcTilt(tilt_x, tilt_y, deg2rad):
     '''Calculate station tilt
@@ -323,7 +453,6 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0):                        #TODO f
     # Set to Groff & Gratch values when freezing, otherwise just rh
     rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice))
-    rh_cor = rh_cor.where(T.notnull())
     return rh_cor

pypromice 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

pypromice 1.3.5py3-none-any.whl → 1.4.0py3-none-any.whl