PyPI - pypromice - Versions diffs - 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

pypromice 1.3.6py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show

pypromice/postprocess/bufr_to_csv.py +15 -3
pypromice/postprocess/bufr_utilities.py +91 -18
pypromice/postprocess/create_bufr_files.py +178 -0
pypromice/postprocess/get_bufr.py +248 -397
pypromice/postprocess/make_metadata_csv.py +214 -0
pypromice/postprocess/real_time_utilities.py +41 -11
pypromice/process/L0toL1.py +12 -5
pypromice/process/L1toL2.py +69 -14
pypromice/process/L2toL3.py +1034 -186
pypromice/process/aws.py +139 -808
pypromice/process/get_l2.py +90 -0
pypromice/process/get_l2tol3.py +111 -0
pypromice/process/join_l2.py +112 -0
pypromice/process/join_l3.py +551 -120
pypromice/process/load.py +161 -0
pypromice/process/resample.py +147 -0
pypromice/process/utilities.py +68 -0
pypromice/process/write.py +503 -0
pypromice/qc/github_data_issues.py +10 -16
pypromice/qc/persistence.py +52 -30
pypromice/resources/__init__.py +28 -0
pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
pypromice/resources/variable_aliases_GC-Net.csv +78 -0
pypromice/resources/variables.csv +106 -0
pypromice/station_configuration.py +118 -0
pypromice/tx/get_l0tx.py +7 -4
pypromice/tx/payload_formats.csv +1 -0
pypromice/tx/tx.py +27 -6
pypromice/utilities/__init__.py +0 -0
pypromice/utilities/git.py +62 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
pypromice-1.4.1.dist-info/RECORD +53 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
pypromice-1.4.1.dist-info/entry_points.txt +13 -0
pypromice/postprocess/station_configurations.toml +0 -762
pypromice/process/get_l3.py +0 -46
pypromice/process/variables.csv +0 -92
pypromice/qc/persistence_test.py +0 -150
pypromice/test/test_config1.toml +0 -69
pypromice/test/test_config2.toml +0 -54
pypromice/test/test_email +0 -75
pypromice/test/test_payload_formats.csv +0 -4
pypromice/test/test_payload_types.csv +0 -7
pypromice/test/test_percentile.py +0 -229
pypromice/test/test_raw1.txt +0 -4468
pypromice/test/test_raw_DataTable2.txt +0 -11167
pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
pypromice/test/test_raw_transmitted1.txt +0 -15411
pypromice/test/test_raw_transmitted2.txt +0 -28
pypromice-1.3.6.dist-info/RECORD +0 -53
pypromice-1.3.6.dist-info/entry_points.txt +0 -8
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0

pypromice/postprocess/get_bufr.py CHANGED Viewed

@@ -1,11 +1,16 @@
-#!/usr/bin/env python
 """
 Command-line script for running BUFR file generation
 Post-processing functions for AWS station data, such as converting PROMICE and GC-Net data files to WMO-compliant BUFR files
 """
+__all__ = [
+    "get_bufr",
+    "main",
+    "DEFAULT_POSITION_SEED_PATH",
+    "DEFAULT_LIN_REG_TIME_LIMIT",
+]
 import argparse
 import glob
 import logging
@@ -13,258 +18,45 @@ import pickle
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Mapping, Optional, Collection, Sequence, Union, TextIO
+from typing import List, Dict, Optional, Sequence, Mapping
-import attrs
 import numpy as np
 import pandas as pd
-import toml
 from pypromice.postprocess.bufr_utilities import write_bufr_message, BUFRVariables
 from pypromice.postprocess.real_time_utilities import get_latest_data
-logger = logging.getLogger(__name__)
-DEFAULT_STATION_CONFIGURATION_PATH = Path(__file__).parent.joinpath(
-    "station_configurations.toml"
+from pypromice.station_configuration import (
+    StationConfiguration,
+    load_station_configuration_mapping,
 )
-DEFAULT_POSITION_SEED_PATH = Path(__file__).parent.joinpath("positions_seed.csv")
-DEFAULT_LIN_REG_TIME_LIMIT = "91d"
-def parse_arguments_bufr() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--store_positions",
-        "--positions",
-        action="store_true",
-        required=False,
-        default=False,
-        help="If included (True), make a positions dict and output AWS_latest_locations.csv file.",
-    )
-    parser.add_argument(
-        "--positions-filepath",
-        "-p",
-        type=Path,
-        required=False,
-        help="Path to write AWS_latest_locations.csv file.",
-    )
-    parser.add_argument(
-        "--time-limit",
-        default=DEFAULT_LIN_REG_TIME_LIMIT,
-        type=str,
-        required=False,
-        help="Previous time to limit dataframe before applying linear regression.",
-    )
-    parser.add_argument(
-        "--input_files",
-        "--l3-filepath",
-        "-i",
-        type=Path,
-        nargs="+",
-        required=True,
-        help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
-    )
-    parser.add_argument(
-        "--bufr-out",
-        "-o",
-        type=Path,
-        required=True,
-        help="Path to the BUFR out directory.",
-    )
-    parser.add_argument(
-        "--timestamps-pickle-filepath",
-        type=Path,
-        required=False,
-        help="Path to the latest_timestamps.pickle file.",
-    )
-    parser.add_argument(
-        "--station_configuration_mapping",
-        default=DEFAULT_STATION_CONFIGURATION_PATH,
-        type=Path,
-        required=False,
-        help="Path to csv file with station meta data and BUFR export configuration",
-    )
-    parser.add_argument(
-        "--position_seed",
-        default=DEFAULT_POSITION_SEED_PATH,
-        type=Path,
-        required=False,
-        help="Path to csv file with seed values for output positions.",
-    )
-    parser.add_argument(
-        '--latest_timestamp',
-        default=datetime.utcnow(),
-        type=pd.Timestamp,
-        help="Timestamp used to determine latest data. Default utcnow."
-    )
-    parser.add_argument("--verbose", "-v", default=False, action="store_true")
-    return parser
-@attrs.define
-class StationConfiguration:
-    """
-    Helper class for storing station specific configurations with respect to
-    * Installation specific distance measurements such as height differences between instruments
-    * Reference strings such as stid, station_site and wmo_id
-    * BUFR export specific parameters
-    # TODO: The station related meta data should be fetched from a station specific configuration files in the future or
-    # from header data in data source.
-    """
-    stid: str
-    station_site: str = None
-    project: Optional[str] = None
-    station_type: Optional[str] = None
-    wmo_id: Optional[str] = None
-    barometer_from_gps: Optional[float] = None
-    anemometer_from_sonic_ranger: Optional[float] = None
-    temperature_from_sonic_ranger: Optional[float] = None
-    height_of_gps_from_station_ground: Optional[float] = None
-    sonic_ranger_from_gps: Optional[float] = None
-    # The station data will be exported to BUFR if True. Otherwise, it will only export latest position
-    export_bufr: bool = False
-    comment: Optional[str] = None
-    # skip specific variables for stations
-    # If a variable has known bad data, use this collection to skip the variable
-    # Note that if a station is not reporting both air temp and pressure it will be skipped,
-    # as currently implemented in csv2bufr.min_data_check().
-    # ['p_i'], # EXAMPLE
-    skipped_variables: List[str] = attrs.field(factory=list)
-    positions_update_timestamp_only: bool = False
-    def as_dict(self) -> Dict:
-        return attrs.asdict(self)
-def load_station_configuration_mapping(
-    fp: Union[str, Path, TextIO]
-) -> Mapping[str, StationConfiguration]:
-    """
-    Read station configurations from toml file
-    Parameters
-    ----------
-    fp :
-        Path to or open toml file
-    Returns
-    -------
-    Mapping from stid to StationConfiguration
-    """
-    return {
-        stid: StationConfiguration(**config_dict)
-        for stid, config_dict in toml.load(fp).items()
-    }
-def write_station_configuration_mapping(
-    config_mapping: Mapping[str, StationConfiguration], fp: TextIO
-):
-    """
-    Write station configuration to toml file
-    Parameters
-    ----------
-    config_mapping
-        Mapping from stid to StationConfiguration
-    fp
-        open writable TextIO
-    """
-    config_mapping = {
-        config.stid: config.as_dict() for config in config_mapping.values()
-    }
-    toml.dump(config_mapping, fp)
-def process_station(
-    file_path: Path,
-    output_path: Path,
-    now_timestamp: datetime,
-    latest_timestamp: Optional[datetime],
-    time_limit: str,
-    stid: str,
-    station_configuration: StationConfiguration,
-) -> Optional[Dict]:
-    df = load_data(file_path, now_timestamp)
-    # Select current data
-    latest_data = get_latest_data(
-        df,
-        lin_reg_time_limit=time_limit,
-    )
-    if latest_data is None:
-        logger.info("No valid instantaneous timestamps!")
-        return None
-    latest_data = filter_skipped_variables(
-        latest_data, vars_to_skip=station_configuration.skipped_variables
-    )
-    # Check that we have minimum required valid data
-    sufficient_wx_data, sufficient_position_data = min_data_check(latest_data)
-    station_position = dict()
-    station_position["timestamp"] = latest_data.name
-    if sufficient_position_data:
-        station_position["lon"] = latest_data.get("gps_lon_fit")
-        station_position["lat"] = latest_data.get("gps_lat_fit")
-        station_position["alt"] = latest_data.get("gps_alt_fit")
-    else:
-        logger.warning("Insufficient position data")
-        # Don't use any position attributes from latest_data
-        station_position["lon"] = None
-        station_position["lat"] = None
-        station_position["alt"] = None
-        return station_position
-    if station_configuration.export_bufr:
-        if not sufficient_wx_data:
-            logger.warning(f"Failed min data wx {stid}")
-            return station_position
-        # Store current timest
-        if latest_data.name <= latest_timestamp:
-            logger.info(f"No new data {latest_data.name} <= {latest_timestamp}")
-            return station_position
-        # Construct and export BUFR file
-        bufr_variables = get_bufr_variables(
-            data=latest_data,
-            station_configuration=station_configuration,
-        )
-        with output_path.open("bw") as fp:
-            write_bufr_message(variables=bufr_variables, file=fp)
+logger = logging.getLogger(__name__)
-    return station_position
+DEFAULT_POSITION_SEED_PATH = Path(__file__).parent.joinpath("positions_seed.csv")
+DEFAULT_LIN_REG_TIME_LIMIT = "91d"
+REQUIRED_KEYS = (
+    "t_i",
+    "p_i",
+    "rh_i",
+    "wdir_i",
+    "wspd_i",
+    "gps_lat_fit",
+    "gps_lon_fit",
+    "gps_alt_fit",
+    "z_boom_u_smooth",
+)
-def load_data(file_path: Path, now_timestamp: datetime) -> pd.DataFrame:
+def load_data(file_path: Path, latest_timestamp: datetime) -> pd.DataFrame:
     """
-    Read AWS data from csv file using time as index and filter all rows after now_timestamp
+    Read AWS data from csv file using time as index and filter all rows after latest_timestamp
     Parameters
     ----------
     file_path
-    now_timestamp
+    latest_timestamp
     Returns
     -------
@@ -276,7 +68,7 @@ def load_data(file_path: Path, now_timestamp: datetime) -> pd.DataFrame:
         .set_index("time")
         .sort_index()
     )
-    df = df[:now_timestamp]
+    df = df[:latest_timestamp]
     return df
@@ -285,12 +77,13 @@ def get_bufr(
     input_files: Sequence[Path],
     positions_filepath: Optional[Path],
     timestamps_pickle_filepath: Optional[Path],
-    station_configuration_path: Optional[Path],
-    now_timestamp: Optional[datetime] = None,
+    station_configuration_mapping: Mapping[str, StationConfiguration],
+    target_timestamp: Optional[datetime] = None,
     positions_seed_path: Optional[Path] = None,
-    earliest_timestamp: datetime = None,
+    time_window_length: timedelta = timedelta(days=2),
     store_positions: bool = False,
-    time_limit: str = "91d",
+    linear_regression_time_limit: str = "91d",
+    break_on_error: bool = False,
 ):
     """
     Main function for generating BUFR files and determine latest positions from a sequence of csv files
@@ -304,48 +97,42 @@ def get_bufr(
     bufr_out
         Path to the BUFR out directory.
     input_files
-        List of L3 csv file paths.
+        List of csv file paths.
     positions_filepath
         Path to write latest positions. Used to retrieve a static set of positions to register stations with DMI/WMO
     timestamps_pickle_filepath
         Path to pickle file used for storing latest timestamp
-    station_configuration_path
-        Path to toml file with configuration entries for each station
-    now_timestamp
-        get_bufr will export the latest data before now_timestamp. Default datetime.utcnow()
+    station_configuration_mapping
+        Mapping of station id to StationConfiguration object
+    target_timestamp
+        get_bufr will export the latest data before target_timestamp. Default datetime.utcnow()
     positions_seed_path
         Path to csv file with position data used as default values for the output position.
-    earliest_timestamp
-        The earliest allowed timestamp for data to be included in the output. Default now_timestamp - 2 days
+    time_window_length
+        The length of the time window to consider for the latest data. Default 2 days
     store_positions
         Flag determine if latest positions are exported.
-    time_limit
+    linear_regression_time_limit
         Previous time to limit dataframe before applying linear regression.
+    break_on_error
+        If True, the function will raise an exception if an error occurs during processing.
     """
-    if now_timestamp is None:
-        now_timestamp = datetime.utcnow()
-    if earliest_timestamp is None:
-        earliest_timestamp = now_timestamp - timedelta(days=2)
+    if target_timestamp is None:
+        target_timestamp = datetime.utcnow()
     # Prepare (latest) positions
     positions = dict()
     if positions_seed_path:
         positions_seed = pd.read_csv(
-            positions_seed_path, index_col=0, delimiter=",", parse_dates=["timestamp"]
+            positions_seed_path,
+            index_col="stid",
+            delimiter=",",
+            parse_dates=["timestamp"],
         ).to_dict(orient="index")
         logger.info(f"Seed positions for {positions_seed.keys()}")
         positions.update(positions_seed)
-    # Prepare station configurations
-    if station_configuration_path is None:
-        station_configuration_mapping = dict()
-    else:
-        station_configuration_mapping = load_station_configuration_mapping(
-            station_configuration_path
-        )
     # Prepare bufr output dir
     bufr_out.mkdir(parents=True, exist_ok=True)
@@ -357,18 +144,13 @@ def get_bufr(
         logger.info("latest_timestamps.pickle not found!")
         latest_timestamps = {}
-    # Initiate a new dict for current timestamps
-    current_timestamps = {}
     # Setup diagnostic lists (logger.info at end)
     skipped = []
     no_recent_data = []
-    no_entry_latest_timestamps = []
-    failed_min_data_wx = []
-    failed_min_data_pos = []
     # Iterate through csv files
     for file_path in input_files:
+        # TODO: This split is explicitly requiring the filename to have sampleate at suffix. This shuld be more robust
         stid = file_path.stem.rsplit("_", 1)[0]
         logger.info("####### Processing {} #######".format(stid))
@@ -381,40 +163,63 @@ def get_bufr(
         output_path = bufr_out / f"{stid}.bufr"
         logger.info(f"Generating {output_path} from {file_path}")
-        latest_timestamp = latest_timestamps.get(stid, earliest_timestamp)
-        latest_timestamp = max(earliest_timestamp, latest_timestamp)
+        time_window_start = target_timestamp - time_window_length
+        # Use only newer data than the latest timestamp
+        if stid in latest_timestamps:
+            time_window_start = max(latest_timestamps[stid], time_window_start)
         try:
-            station_position = process_station(
-                file_path=file_path,
-                output_path=output_path,
-                now_timestamp=now_timestamp,
-                latest_timestamp=latest_timestamp,
-                time_limit=time_limit,
-                stid=stid,
-                station_configuration=station_configuration,
-            )
-        except Exception:
-            logger.exception(f"Failed processing {stid}")
-            continue
+            input_data = load_data(file_path, target_timestamp)
-        if station_position is None:
-            logger.warning(f"No position information available for {stid}")
+            # Select current data
+            latest_data = get_latest_data(
+                input_data,
+                lin_reg_time_limit=linear_regression_time_limit,
+                vars_to_skip=station_configuration.skipped_variables,
+            )
+            if latest_data is None:
+                logger.info("No valid instantaneous timestamps!")
+                skipped.append(stid)
+                continue
-        else:
+            # Create station positions
+            station_position = get_station_positions(latest_data)
             if stid not in positions:
                 positions[stid] = dict()
             if station_configuration.positions_update_timestamp_only:
                 positions[stid]["timestamp"] = station_position["timestamp"]
             else:
                 positions[stid].update(station_position)
+            # Create BUFR File
+            if (
+                station_configuration.export_bufr
+                and latest_data.name > time_window_start
+            ):
+                latest_timestamps[stid] = latest_data.name
+                bufr_variables = get_bufr_variables(latest_data, station_configuration)
+                if bufr_variables:
+                    with output_path.open("bw") as output_file:
+                        write_bufr_message(bufr_variables, output_file)
+            else:
+                logger.info(f"No new data {latest_data.name} <= {time_window_start}")
+                no_recent_data.append(stid)
+        except Exception:
+            logger.exception(f"Failed processing {stid}")
+            if output_path.exists():
+                output_path.unlink()
+            if break_on_error:
+                raise
+            skipped.append(stid)
+            continue
     # Write the most recent timestamps back to the pickle on disk
     logger.info(f"writing latest_timestamps to {timestamps_pickle_filepath}")
     if timestamps_pickle_filepath:
         with timestamps_pickle_filepath.open("wb") as handle:
-            pickle.dump(current_timestamps, handle, protocol=pickle.HIGHEST_PROTOCOL)
+            pickle.dump(latest_timestamps, handle, protocol=pickle.HIGHEST_PROTOCOL)
     if store_positions:
         positions_df = pd.DataFrame.from_dict(
@@ -427,13 +232,7 @@ def get_bufr(
         positions_df.to_csv(positions_filepath, index_label="stid")
     logger.info("--------------------------------")
-    not_processed_wx_pos = set(failed_min_data_wx + failed_min_data_pos)
-    not_processed_count = (
-        len(skipped)
-        + len(no_recent_data)
-        + len(no_entry_latest_timestamps)
-        + len(not_processed_wx_pos)
-    )
+    not_processed_count = len(skipped) + len(no_recent_data)
     logger.info(
         "BUFR exported for {} of {} fpaths.".format(
             (len(input_files) - not_processed_count), len(input_files)
@@ -442,47 +241,46 @@ def get_bufr(
     logger.info("")
     logger.info("skipped: {}".format(skipped))
     logger.info("no_recent_data: {}".format(no_recent_data))
-    logger.info("no_entry_latest_timestamps: {}".format(no_entry_latest_timestamps))
-    logger.info("failed_min_data_wx: {}".format(failed_min_data_wx))
-    logger.info("failed_min_data_pos: {}".format(failed_min_data_pos))
     logger.info("--------------------------------")
-def filter_skipped_variables(
-    row: pd.Series, vars_to_skip: Collection[str]
-) -> pd.Series:
-    """
-    Mutate input series by setting var_to_skip to np.nan
-    Parameters
-    ----------
-    row
-    vars_to_skip
-        List of variable names to be skipped
-    Returns
-    -------
-    Input series
-    """
-    vars_to_skip = set(row.keys()) & set(vars_to_skip)
-    for var_key in vars_to_skip:
-        row[var_key] = np.nan
-        logger.info("----> Skipping var: {}".format(var_key))
-    return row
+def get_station_positions(latest_data: pd.Series) -> Dict:
+    station_position = dict()
+    station_position["timestamp"] = latest_data.name
+    station_position["lat"] = latest_data["gps_lat_fit"]
+    station_position["lon"] = latest_data["gps_lon_fit"]
+    station_position["alt"] = latest_data["gps_alt_fit"]
+    if any(
+        [
+            pd.isna(station_position["lat"]),
+            pd.isna(station_position["lon"]),
+            pd.isna(station_position["alt"]),
+        ]
+    ):
+        logger.warning("Insufficient position data")
+        station_position["lat"] = None
+        station_position["lon"] = None
+        station_position["alt"] = None
+    return station_position
 def get_bufr_variables(
     data: pd.Series,
     station_configuration: StationConfiguration,
-) -> BUFRVariables:
+) -> Optional[BUFRVariables]:
     """
-    Helper function for converting our  variables to the variables needed for bufr export.
+    Helper function for converting our variables to the variables needed for bufr export.
+    Raises AttributeError if station_configuration don't have the minimum dimension fields since they are required to determine barometer heights.
+    * height_of_gps_from_station_ground
+    * barometer_from_gps
     Parameters
     ----------
     data
-        Series with processed l3 variables from get_latest_datas
+        Series with processed variables from get_latest_datas
     station_configuration
@@ -491,30 +289,62 @@ def get_bufr_variables(
     BUFRVariables used by bufr_utilities
     """
-    heightOfStationGroundAboveMeanSeaLevel = np.nan
-    if isinstance(station_configuration.height_of_gps_from_station_ground, float):
-        heightOfStationGroundAboveMeanSeaLevel = (
-                data["gps_alt_fit"] - station_configuration.height_of_gps_from_station_ground
+    if not all(key in data.index for key in REQUIRED_KEYS):
+        raise ValueError(
+            f"Failed to process BUFRVariables. Missing required keys: {REQUIRED_KEYS}"
         )
-    heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
-    if isinstance(station_configuration.temperature_from_sonic_ranger, float):
-        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = (
-                data["z_boom_u_smooth"]+ station_configuration.temperature_from_sonic_ranger
+    # Check that we have minimum required fields to proceed with writing to BUFR
+    # Always require minimum a valid air temp or a valid pressure.
+    # If both air temp and pressure are nan, do not submit.
+    # This will allow the case of having only one or the other.
+    if data[["t_i", "p_i"]].isna().all():
+        logger.warning("Failed to process BUFRVariables - insufficient data")
+        return None
+    # Always require a valid position data
+    if data[["gps_lat_fit", "gps_lon_fit", "gps_alt_fit"]].isna().any():
+        logger.warning("Failed to process BUFRVariables - insufficient position data")
+        return None
+    if station_configuration.height_of_gps_from_station_ground is None:
+        raise AttributeError(
+            "height_of_gps_from_station_ground is required for BUFR export"
         )
+    if station_configuration.barometer_from_gps is None:
+        raise AttributeError("barometer_from_gps is required for BUFR export")
-    heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = np.nan
-    if isinstance(station_configuration.anemometer_from_sonic_ranger, float):
-        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = (
-                data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
+    if station_configuration.static_height_of_gps_from_mean_sea_level is None:
+        height_of_gps_above_mean_sea_level = data["gps_alt_fit"]
+    else:
+        height_of_gps_above_mean_sea_level = (
+            station_configuration.static_height_of_gps_from_mean_sea_level
         )
-    heightOfBarometerAboveMeanSeaLevel = np.nan
-    if isinstance(station_configuration.barometer_from_gps, float):
-        heightOfBarometerAboveMeanSeaLevel = (
-                data["gps_alt_fit"] + station_configuration.barometer_from_gps
+    heightOfStationGroundAboveMeanSeaLevel = (
+        height_of_gps_above_mean_sea_level
+        - station_configuration.height_of_gps_from_station_ground
+    )
+    heightOfBarometerAboveMeanSeaLevel = (
+        height_of_gps_above_mean_sea_level + station_configuration.barometer_from_gps
+    )
+    if station_configuration.temperature_from_sonic_ranger is None:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
+    else:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = (
+            data["z_boom_u_smooth"]
+            + station_configuration.temperature_from_sonic_ranger
         )
+    if station_configuration.anemometer_from_sonic_ranger is None:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = np.nan
+    else:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = (
+            data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
+        )
     output_row = BUFRVariables(
         wmo_id=station_configuration.wmo_id,
@@ -526,7 +356,7 @@ def get_bufr_variables(
         airTemperature=data.t_i + 273.15,
         # Convert pressure, correct the -1000 offset, then hPa to Pa
         # note that instantaneous pressure has 0.1 hPa precision
-        pressure=(data.p_i + 1000.0) * 100.0,
+        nonCoordinatePressure=(data.p_i + 1000.0) * 100.0,
         windDirection=data.wdir_i,
         windSpeed=data.wspd_i,
         latitude=data.gps_lat_fit,
@@ -540,60 +370,75 @@ def get_bufr_variables(
     return output_row
-def min_data_check(s):
-    """Check that we have minimum required fields to proceed with writing to BUFR
-    For wx vars, we currently require both air temp and pressure to be non-NaN.
-    If you know a specific var is reporting bad data, you can ignore just that var
-    using the vars_to_skip dict in wmo_config.
-    Parameters
-    ----------
-    s : pandas series
-        The current obset we are working with (for BUFR submission)
-    Returns
-    -------
-    min_data_wx_result : bool
-        True (default), the test for min wx data passed. False, the test failed.
-    min_data_pos_result : bool
-        True (default), the test for min position data passed. False, the test failed.
-    """
-    min_data_wx_result = True
-    min_data_pos_result = True
-    # Can use pd.isna() or math.isnan() below...
-    # Always require valid air temp and valid pressure (both must be non-nan)
-    # if (pd.isna(s['t_i']) is False) and (pd.isna(s['p_i']) is False):
-    #     pass
-    # else:
-    #     print('----> Failed min_data_check for air temp and pressure!')
-    #     min_data_wx_result = False
-    # If both air temp and pressure are nan, do not submit.
-    # This will allow the case of having only one or the other.
-    if (pd.isna(s["t_i"]) is True) and (pd.isna(s["p_i"]) is True):
-        logger.warning("----> Failed min_data_check for air temp and pressure!")
-        min_data_wx_result = False
-    # Missing just elevation OK
-    # if (pd.isna(s['gps_lat_fit']) is False) and (pd.isna(s['gps_lon_fit']) is False):
-    #     pass
-    # Require all three: lat, lon, elev
-    if (
-        (pd.isna(s["gps_lat_fit"]) is False)
-        and (pd.isna(s["gps_lon_fit"]) is False)
-        and (pd.isna(s["gps_alt_fit"]) is False)
-    ):
-        pass
-    else:
-        logger.warning("----> Failed min_data_check for position!")
-        min_data_pos_result = False
-    return min_data_wx_result, min_data_pos_result
 def main():
-    args = parse_arguments_bufr().parse_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--store_positions",
+        "--positions",
+        action="store_true",
+        required=False,
+        default=False,
+        help="If included (True), make a positions dict and output AWS_latest_locations.csv file.",
+    )
+    parser.add_argument(
+        "--positions-filepath",
+        "-p",
+        type=Path,
+        required=False,
+        help="Path to write AWS_latest_locations.csv file.",
+    )
+    parser.add_argument(
+        "--linear_regression_time_limit",
+        "--time-limit",
+        default=DEFAULT_LIN_REG_TIME_LIMIT,
+        type=str,
+        required=False,
+        help="Previous time to limit dataframe before applying linear regression.",
+    )
+    parser.add_argument(
+        "--input_files",
+        "-i",
+        type=Path,
+        nargs="+",
+        required=True,
+        help="Path to input files .csv files. Can be direct paths or glob patterns",
+    )
+    parser.add_argument(
+        "--bufr-out",
+        "-o",
+        type=Path,
+        required=True,
+        help="Path to the BUFR out directory.",
+    )
+    parser.add_argument(
+        "--timestamps-pickle-filepath",
+        type=Path,
+        required=False,
+        help="Path to the latest_timestamps.pickle file.",
+    )
+    parser.add_argument(
+        "--station_configurations_root",
+        type=Path,
+        required=True,
+        help="Path to root directory containing station configuration toml files",
+    )
+    parser.add_argument(
+        "--position_seed",
+        default=DEFAULT_POSITION_SEED_PATH,
+        type=Path,
+        required=False,
+        help="Path to csv file with seed values for output positions.",
+    )
+    parser.add_argument(
+        "--target_timestamp",
+        "--now-timestamp",
+        default=datetime.utcnow(),
+        type=pd.Timestamp,
+        help="Timestamp used to determine latest data. Default utcnow.",
+    )
+    parser.add_argument("--verbose", "-v", default=False, action="store_true")
+    args = parser.parse_args()
     log_level = logging.INFO
     if args.verbose:
@@ -613,17 +458,23 @@ def main():
             # The input path might be a glob pattern
             input_files += map(Path, glob.glob(path.as_posix()))
+    station_configuration_mapping = load_station_configuration_mapping(
+        args.station_configurations_root,
+        skip_unexpected_fields=True,
+    )
     get_bufr(
         bufr_out=args.bufr_out,
         input_files=input_files,
         store_positions=args.store_positions,
         positions_filepath=args.positions_filepath,
-        time_limit=args.time_limit,
+        linear_regression_time_limit=args.linear_regression_time_limit,
         timestamps_pickle_filepath=args.timestamps_pickle_filepath,
-        now_timestamp=args.latest_timestamp,
-        station_configuration_path=args.station_configuration_mapping,
+        target_timestamp=args.target_timestamp,
+        station_configuration_mapping=station_configuration_mapping,
         positions_seed_path=args.position_seed,
     )
 if __name__ == "__main__":
-    main()
+    main()

pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

Potentially problematic release.

pypromice 1.3.6py3-none-any.whl → 1.4.1py3-none-any.whl