PyPI - pypromice - Versions diffs - 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

pypromice 1.3.6py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show

pypromice/postprocess/bufr_to_csv.py +15 -3
pypromice/postprocess/bufr_utilities.py +91 -18
pypromice/postprocess/create_bufr_files.py +178 -0
pypromice/postprocess/get_bufr.py +248 -397
pypromice/postprocess/make_metadata_csv.py +214 -0
pypromice/postprocess/real_time_utilities.py +41 -11
pypromice/process/L0toL1.py +12 -5
pypromice/process/L1toL2.py +69 -14
pypromice/process/L2toL3.py +1034 -186
pypromice/process/aws.py +139 -808
pypromice/process/get_l2.py +90 -0
pypromice/process/get_l2tol3.py +111 -0
pypromice/process/join_l2.py +112 -0
pypromice/process/join_l3.py +551 -120
pypromice/process/load.py +161 -0
pypromice/process/resample.py +147 -0
pypromice/process/utilities.py +68 -0
pypromice/process/write.py +503 -0
pypromice/qc/github_data_issues.py +10 -16
pypromice/qc/persistence.py +52 -30
pypromice/resources/__init__.py +28 -0
pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
pypromice/resources/variable_aliases_GC-Net.csv +78 -0
pypromice/resources/variables.csv +106 -0
pypromice/station_configuration.py +118 -0
pypromice/tx/get_l0tx.py +7 -4
pypromice/tx/payload_formats.csv +1 -0
pypromice/tx/tx.py +27 -6
pypromice/utilities/__init__.py +0 -0
pypromice/utilities/git.py +62 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
pypromice-1.4.1.dist-info/RECORD +53 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
pypromice-1.4.1.dist-info/entry_points.txt +13 -0
pypromice/postprocess/station_configurations.toml +0 -762
pypromice/process/get_l3.py +0 -46
pypromice/process/variables.csv +0 -92
pypromice/qc/persistence_test.py +0 -150
pypromice/test/test_config1.toml +0 -69
pypromice/test/test_config2.toml +0 -54
pypromice/test/test_email +0 -75
pypromice/test/test_payload_formats.csv +0 -4
pypromice/test/test_payload_types.csv +0 -7
pypromice/test/test_percentile.py +0 -229
pypromice/test/test_raw1.txt +0 -4468
pypromice/test/test_raw_DataTable2.txt +0 -11167
pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
pypromice/test/test_raw_transmitted1.txt +0 -15411
pypromice/test/test_raw_transmitted2.txt +0 -28
pypromice-1.3.6.dist-info/RECORD +0 -53
pypromice-1.3.6.dist-info/entry_points.txt +0 -8
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
{pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0

pypromice/process/load.py ADDED Viewed

@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Load module
+"""
+from datetime import timedelta
+from typing import Sequence, Optional
+import logging
+import os
+import pandas as pd
+import toml
+import xarray as xr
+logger = logging.getLogger(__name__)
+def getConfig(
+    config_file, inpath, default_columns: Sequence[str] = ("msg_lat", "msg_lon")
+):
+    """Load configuration from .toml file. PROMICE .toml files support defining
+    features at the top level which apply to all nested properties, but do not
+    overwrite nested properties if they are defined
+    Parameters
+    ----------
+    config_file : str
+        TOML file path
+    inpath : str
+        Input folder directory where L0 files can be found
+    Returns
+    -------
+    conf : dict
+        Configuration dictionary
+    """
+    conf = toml.load(config_file)  # Move all top level keys to nested properties,
+    top = [
+        _ for _ in conf.keys() if not type(conf[_]) is dict
+    ]  # if they are not already defined in the nested properties
+    subs = [
+        _ for _ in conf.keys() if type(conf[_]) is dict
+    ]  # Insert the section name (config_file) as a file property and config file
+    for s in subs:
+        for t in top:
+            if t not in conf[s].keys():
+                conf[s][t] = conf[t]
+        conf[s]["conf"] = config_file
+        conf[s]["file"] = os.path.join(inpath, s)
+        conf[s]["columns"].extend(default_columns)
+    for t in top:
+        conf.pop(t)  # Delete all top level keys beause each file
+    # should carry all properties with it
+    for k in conf.keys():  # Check required fields are present
+        for field in ["columns", "station_id", "format", "skiprows"]:
+            assert field in conf[k].keys(), field + " not in config keys"
+    return conf
+def getL0(
+    infile: str,
+    nodata,
+    cols,
+    skiprows,
+    file_version,
+    delimiter=",",
+    comment="#",
+    time_offset: Optional[float] = None,
+) -> xr.Dataset:
+    """Read L0 data file into pandas DataFrame object
+    Parameters
+    ----------
+    infile : str
+        L0 file path
+    nodata : list
+        List containing value for nan values and reassigned value
+    cols : list
+        List of columns in file
+    skiprows : int
+        Skip rows value
+    file_version : int
+        Version of L0 file
+    delimiter : str
+        String delimiter for L0 file
+    comment : str
+        Notifier of commented sections in L0 file
+    time_offset : Optional[float]
+        Time offset in hours for correcting for non utc time data.
+    Returns
+    -------
+    ds : xarray.Dataset
+        L0 Dataset
+    """
+    if file_version == 1:
+        df = pd.read_csv(
+            infile,
+            comment=comment,
+            index_col=0,
+            na_values=nodata,
+            names=cols,
+            sep=delimiter,
+            skiprows=skiprows,
+            skip_blank_lines=True,
+            usecols=range(len(cols)),
+            low_memory=False,
+        )
+        df["time"] = pd.to_datetime(
+            df.year.astype(str)
+            + df.doy.astype(str).str.zfill(3)
+            + df.hhmm.astype(str).str.zfill(4),
+            format="%Y%j%H%M",
+        )
+        df = df.set_index("time")
+    else:
+        df = pd.read_csv(
+            infile,
+            comment=comment,
+            index_col=0,
+            na_values=nodata,
+            names=cols,
+            parse_dates=True,
+            sep=delimiter,
+            skiprows=skiprows,
+            skip_blank_lines=True,
+            usecols=range(len(cols)),
+            low_memory=False,
+        )
+        try:
+            df.index = pd.to_datetime(df.index)
+        except ValueError as e:
+            logger.info("\n" + infile)
+            logger.info("\nValueError:")
+            logger.info(e)
+            logger.info("\t\t> Trying pd.to_datetime with format=mixed")
+            try:
+                df.index = pd.to_datetime(df.index, format="mixed")
+            except Exception as e:
+                logger.info("\nDateParseError:")
+                logger.info(e)
+                logger.info(
+                    "\t\t> Trying again removing apostrophes in timestamp (old files format)"
+                )
+                df.index = pd.to_datetime(df.index.str.replace('"', ""))
+    if time_offset is not None:
+        df.index = df.index + timedelta(hours=time_offset)
+    # Drop SKIP columns
+    for c in df.columns:
+        if c[0:4] == "SKIP":
+            df.drop(columns=c, inplace=True)
+    # Carry relevant metadata with ds
+    ds = xr.Dataset.from_dataframe(df)
+    ds.attrs["level"] = "L0"
+    return ds

pypromice/process/resample.py ADDED Viewed

@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jun 10 10:58:39 2024
+@author: pho
+"""
+import logging
+import numpy as np
+import xarray as xr
+from pypromice.process.L1toL2 import calcDirWindSpeeds
+logger = logging.getLogger(__name__)
+def resample_dataset(ds_h, t):
+    '''Resample L2 AWS data, e.g. hourly to daily average. This uses pandas
+    DataFrame resampling at the moment as a work-around to the xarray Dataset
+    resampling. As stated, xarray resampling is a lengthy process that takes
+    ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean()
+    This has now been fixed, so needs implementing:
+    https://github.com/pydata/xarray/issues/4498#event-6610799698
+    Parameters
+    ----------
+    ds_h : xarray.Dataset
+        L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data)
+    t : str
+        Resample factor, same variable definition as in
+        pandas.DataFrame.resample()
+    Returns
+    -------
+    ds_d : xarray.Dataset
+        L3 AWS dataset resampled to the frequency defined by t
+    '''
+    df_d = ds_h.to_dataframe().resample(t).mean()
+    # taking the 10 min data and using it as instantaneous values:
+    if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
+        cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
+        for col in cols_to_update:
+            df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
+            if col == 'p_i':
+                df_d[col] = df_d[col].values-1000
+    # recalculating wind direction from averaged directional wind speeds
+    for var in ['wdir_u','wdir_l']:
+        boom = var.split('_')[1]
+        if var in df_d.columns:
+            if ('wspd_x_'+boom in df_d.columns) & ('wspd_y_'+boom in df_d.columns):
+                df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom])
+            else:
+                logger.info(var+' in dataframe but not wspd_x_'+boom+' nor wspd_y_'+boom+', recalculating them')
+                ds_h['wspd_x_'+boom], ds_h['wspd_y_'+boom] = calcDirWindSpeeds(ds_h['wspd_'+boom], ds_h['wdir_'+boom])
+                df_d[['wspd_x_'+boom, 'wspd_y_'+boom]] = ds_h[['wspd_x_'+boom, 'wspd_y_'+boom]].to_dataframe().resample(t).mean()
+                df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom])
+    # recalculating relative humidity from average vapour pressure and average
+    # saturation vapor pressure
+    for var in ['rh_u','rh_l']:
+        lvl = var.split('_')[1]
+        if var in df_d.columns:
+            if ('t_'+lvl in ds_h.keys()):
+                es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl])
+                p_vap = ds_h[var] / 100 * es_wtr
+                df_d[var] = (p_vap.to_series().resample(t).mean() \
+                           / es_wtr.to_series().resample(t).mean())*100
+                if var+'_cor' in df_d.keys():
+                    df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \
+                               / es_cor.to_series().resample(t).mean())*100
+    # passing each variable attribute to the ressample dataset
+    vals = []
+    for c in df_d.columns:
+        if c in ds_h.data_vars:
+            vals.append(xr.DataArray(
+                data=df_d[c], dims=['time'],
+               coords={'time':df_d.index}, attrs=ds_h[c].attrs))
+        else:
+            vals.append(xr.DataArray(
+                data=df_d[c], dims=['time'],
+               coords={'time':df_d.index}, attrs=None))
+    ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
+    return ds_d
+def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071,
+                                     es_100=1013.246, eps=0.622):
+    '''Calculate specific humidity
+    Parameters
+    ----------
+    T_0 : float
+        Steam point temperature. Default is 273.15.
+    T_100 : float
+        Steam point temperature in Kelvin
+    t : xarray.DataArray
+        Air temperature
+    es_0 : float
+        Saturation vapour pressure at the melting point (hPa)
+    es_100 : float
+        Saturation vapour pressure at steam point temperature (hPa)
+    Returns
+    -------
+    xarray.DataArray
+        Saturation vapour pressure with regard to water above 0 C (hPa)
+    xarray.DataArray
+        Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa)
+    '''
+    # Saturation vapour pressure above 0 C (hPa)
+    es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0))
+                  - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1)
+                  + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100))
+    # Saturation vapour pressure below 0 C (hPa)
+    es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654
+                  * np.log10(T_0 / (t + T_0)) + 0.876793
+                  * (1 - (t + T_0) / T_0)
+                  + np.log10(es_0))
+    # Saturation vapour pressure (hPa)
+    es_cor = xr.where(t < 0, es_ice, es_wtr)
+    return es_wtr, es_cor
+def _calcWindDir(wspd_x, wspd_y):
+    '''Calculate wind direction in degrees
+    Parameters
+    ----------
+    wspd_x : xarray.DataArray
+        Wind speed in X direction
+    wspd_y : xarray.DataArray
+        Wind speed in Y direction
+    Returns
+    -------
+    wdir : xarray.DataArray
+        Wind direction'''
+    deg2rad = np.pi / 180
+    rad2deg = 1 / deg2rad
+    wdir = np.arctan2(wspd_x, wspd_y) * rad2deg
+    wdir = (wdir + 360) % 360
+    return wdir

pypromice/process/utilities.py ADDED Viewed

@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Utilities module for data formatting, populating and metadata handling
+"""
+import numpy as np
+def popCols(ds, names):
+    '''Populate dataset with all given variable names
+    Parammeters
+    -----------
+    ds : xr.Dataset
+        Dataset
+    names : list
+        List of variable names to populate
+    '''
+    for v in names:
+        if v not in list(ds.variables):
+            ds[v] = (('time'), np.arange(ds['time'].size)*np.nan)
+    return ds
+def addBasicMeta(ds, vars_df):
+    ''' Use a variable lookup table DataFrame to add the basic metadata
+    to the xarray dataset. This is later amended to finalise L3
+    Parameters
+    ----------
+    ds : xr.Dataset
+        Dataset to add metadata to
+    vars_df : pd.DataFrame
+        Metadata dataframe
+    Returns
+    -------
+    ds : xr.Dataset
+        Dataset with added metadata
+    '''
+    for v in vars_df.index:
+        if v == 'time': continue # coordinate variable, not normal var
+        if v not in list(ds.variables): continue
+        for c in ['standard_name', 'long_name', 'units']:
+            if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue
+            ds[v].attrs[c] = vars_df[c][v]
+    return ds
+def populateMeta(ds, conf, skip):
+    '''Populate L0 Dataset with metadata dictionary
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        L0 dataset
+    conf : dict
+        Metadata dictionary
+    skip : list
+        List of column names to skip parsing to metadata
+    Returns
+    -------
+    ds : xarray.Dataset
+        L0 dataset with metadata populated as Dataset attributes
+    '''
+    # skip = ["columns", "skiprows"]
+    for k in conf.keys():
+        if k not in skip: ds.attrs[k] = conf[k]
+    return ds

pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

Potentially problematic release.

pypromice 1.3.6py3-none-any.whl → 1.4.1py3-none-any.whl