PyPI - pypromice - Versions diffs - 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

pypromice 1.5.3py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pypromice might be problematic. Click here for more details.

Files changed (67) hide show

pypromice/__init__.py +2 -0
pypromice/{qc → core/qc}/github_data_issues.py +22 -13
pypromice/{qc → core/qc}/percentiles/compute_thresholds.py +2 -2
pypromice/{qc → core/qc}/persistence.py +22 -29
pypromice/{process → core/qc}/value_clipping.py +3 -3
pypromice/core/resampling.py +142 -0
pypromice/core/variables/__init__.py +1 -0
pypromice/core/variables/air_temperature.py +64 -0
pypromice/core/variables/gps.py +221 -0
pypromice/core/variables/humidity.py +111 -0
pypromice/core/variables/precipitation.py +108 -0
pypromice/core/variables/pressure_transducer_depth.py +79 -0
pypromice/core/variables/radiation.py +422 -0
pypromice/core/variables/station_boom_height.py +75 -0
pypromice/core/variables/station_pose.py +375 -0
pypromice/io/bufr/__init__.py +0 -0
pypromice/{postprocess → io/bufr}/bufr_to_csv.py +1 -1
pypromice/{postprocess → io/bufr}/create_bufr_files.py +2 -2
pypromice/{postprocess → io/bufr}/get_bufr.py +6 -6
pypromice/{postprocess → io/bufr}/real_time_utilities.py +3 -3
pypromice/io/ingest/__init__.py +0 -0
pypromice/{utilities → io/ingest}/git.py +1 -3
pypromice/io/ingest/l0.py +294 -0
pypromice/io/ingest/l0_repository.py +103 -0
pypromice/io/ingest/toa5.py +87 -0
pypromice/{process → io}/write.py +1 -1
pypromice/pipeline/L0toL1.py +291 -0
pypromice/pipeline/L1toL2.py +233 -0
pypromice/{process → pipeline}/L2toL3.py +113 -118
pypromice/pipeline/__init__.py +4 -0
pypromice/{process → pipeline}/aws.py +10 -82
pypromice/{process → pipeline}/get_l2.py +2 -2
pypromice/{process → pipeline}/get_l2tol3.py +19 -22
pypromice/{process → pipeline}/join_l2.py +31 -32
pypromice/{process → pipeline}/join_l3.py +16 -14
pypromice/{process → pipeline}/resample.py +75 -51
pypromice/{process → pipeline}/utilities.py +0 -22
pypromice/resources/file_attributes.csv +4 -4
pypromice/resources/variable_aliases_GC-Net.csv +2 -2
pypromice/resources/variables.csv +27 -24
{pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/METADATA +1 -2
pypromice-1.7.0.dist-info/RECORD +65 -0
pypromice-1.7.0.dist-info/entry_points.txt +12 -0
pypromice/get/__init__.py +0 -1
pypromice/get/get.py +0 -211
pypromice/get/get_promice_data.py +0 -56
pypromice/process/L0toL1.py +0 -564
pypromice/process/L1toL2.py +0 -824
pypromice/process/__init__.py +0 -4
pypromice/process/load.py +0 -161
pypromice-1.5.3.dist-info/RECORD +0 -54
pypromice-1.5.3.dist-info/entry_points.txt +0 -13
/pypromice/{postprocess → core}/__init__.py +0 -0
/pypromice/{utilities → core}/dependency_graph.py +0 -0
/pypromice/{qc → core/qc}/__init__.py +0 -0
/pypromice/{qc → core/qc}/percentiles/__init__.py +0 -0
/pypromice/{qc → core/qc}/percentiles/outlier_detector.py +0 -0
/pypromice/{qc → core/qc}/percentiles/thresholds.csv +0 -0
/pypromice/{process → core/variables}/wind.py +0 -0
/pypromice/{utilities → io}/__init__.py +0 -0
/pypromice/{postprocess → io/bufr}/bufr_utilities.py +0 -0
/pypromice/{postprocess → io/bufr}/positions_seed.csv +0 -0
/pypromice/{station_configuration.py → io/bufr/station_configuration.py} +0 -0
/pypromice/{postprocess → io}/make_metadata_csv.py +0 -0
{pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/WHEEL +0 -0
{pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
{pypromice-1.5.3.dist-info → pypromice-1.7.0.dist-info}/top_level.txt +0 -0

pypromice/{process → pipeline}/L2toL3.py RENAMED Viewed

@@ -6,11 +6,14 @@ import pandas as pd
 import numpy as np
 import xarray as xr
 from sklearn.linear_model import LinearRegression
-from pypromice.qc.github_data_issues import adjustData
 from scipy.interpolate import interp1d
 from pathlib import Path
+from pypromice.core.qc.github_data_issues import adjustData
 import logging
+from pypromice.core.qc.github_data_issues import adjustData
+from pypromice.core.variables import humidity, station_boom_height
 logger = logging.getLogger(__name__)
 def toL3(L2,
@@ -24,7 +27,6 @@ def toL3(L2,
         - continuous surface height, ice surface height, snow height
         - thermistor depths
     Parameters
     ----------
     L2 : xarray:Dataset
@@ -50,16 +52,20 @@ def toL3(L2,
         # Upper boom bulk calculation
         T_h_u = ds['t_u'].copy()                                                   # Copy for processing
         p_h_u = ds['p_u'].copy()
-        rh_h_u_wrt_ice_or_water = ds['rh_u_wrt_ice_or_water'].copy()
-        q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, rh_h_u_wrt_ice_or_water)                  # Calculate specific humidity
+        # Calculate specific humidity
+        q_h_u = humidity.calculate_specific_humidity(ds["t_u"],
+                                                     ds["p_u"],
+                                                     ds["rh_u_wrt_ice_or_water"])
         if ('wspd_u' in ds.keys()) and \
             ('t_surf' in ds.keys()) and \
-                ('z_boom_u' in ds.keys()):
+                ('z_boom_cor_u' in ds.keys()):
             WS_h_u = ds['wspd_u'].copy()
             Tsurf_h = ds['t_surf'].copy()                                              # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters?
-            z_WS_u = ds['z_boom_u'].copy() + 0.4                                       # Get height of Anemometer
-            z_T_u = ds['z_boom_u'].copy() - 0.1                                        # Get height of thermometer
+            z_WS_u = ds['z_boom_cor_u'].copy() + 0.4  # Get height of Anemometer
+            z_T_u = ds['z_boom_cor_u'].copy() - 0.1  # Get height of thermometer
             if not is_bedrock:
                 SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u,            # Calculate latent and sensible heat fluxes
@@ -68,12 +74,12 @@ def toL3(L2,
                 ds['dshf_u'] = (('time'), SHF_h_u.data)
                 ds['dlhf_u'] = (('time'), LHF_h_u.data)
         else:
-            logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes')
+            logger.info('wspd_u, t_surf or z_boom_cor_u missing, cannot calculate turbulent heat fluxes')
-        q_h_u = 1000 * q_h_u                                                       # Convert sp.humid from kg/kg to g/kg
-        ds['qh_u'] = (('time'), q_h_u.data)
+        # Convert specific humidity from kg/kg to g/kg
+        ds['qh_u'] = humidity.convert(q_h_u)
     else:
-        logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
+        logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
     # Lower boom bulk calculation
     if ds.attrs['number_of_booms']==2:
@@ -82,15 +88,19 @@ def toL3(L2,
                 ('rh_l_wrt_ice_or_water' in ds.keys()):
             T_h_l = ds['t_l'].copy()                                               # Copy for processing
             p_h_l = ds['p_l'].copy()
-            rh_h_l_wrt_ice_or_water = ds['rh_l_wrt_ice_or_water'].copy()
-            q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, rh_h_l_wrt_ice_or_water)              # Calculate sp.humidity
+            # Calculate specific humidity
+            q_h_l = humidity.calculate_specific_humidity(ds["t_l"],
+                                                         ds["p_l"],
+                                                         ds["rh_l_wrt_ice_or_water"])
             if ('wspd_l' in ds.keys()) and \
                 ('t_surf' in ds.keys()) and \
-                    ('z_boom_l' in ds.keys()):
-                z_WS_l = ds['z_boom_l'].copy() + 0.4                                   # Get height of W
-                z_T_l = ds['z_boom_l'].copy() - 0.1                                    # Get height of thermometer
+                    ('z_boom_cor_l' in ds.keys()):
+                z_WS_l = ds['z_boom_cor_l'].copy() + 0.4  # Get height of radiometer
+                z_T_l = ds['z_boom_cor_l'].copy() - 0.1  # Get height of thermometer
+                # Get wind speed lower boom measurements
                 WS_h_l = ds['wspd_l'].copy()
                 if not is_bedrock:
@@ -100,12 +110,13 @@ def toL3(L2,
                     ds['dshf_l'] = (('time'), SHF_h_l.data)
                     ds['dlhf_l'] = (('time'), LHF_h_l.data)
             else:
-                logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes')
+                logger.info('wspd_l, t_surf or z_boom_cor_l missing, cannot calculate turbulent heat fluxes')
+            # Convert specific humidity from kg/kg to g/kg
+            ds['qh_l'] = humidity.convert(q_h_l)
-            q_h_l = 1000 * q_h_l                                                       # Convert sp.humid from kg/kg to g/kg
-            ds['qh_l'] = (('time'), q_h_l.data)
         else:
-            logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes')
+            logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calculate turbulent heat fluxes')
     if len(station_config)==0:
         logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n')
@@ -161,12 +172,30 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
     ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan)
     ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan)
+    z_boom_best_u = station_boom_height.include_uncorrected_values(
+                                ds["z_boom_u"],
+                                ds["z_boom_cor_u"],
+                                ds["t_u"],
+                                ds["t_l"] if "t_l" in ds.data_vars else None,
+                                ds["t_rad"] if "t_rad" in ds.data_vars else None)
+    if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
+        # Calculate stake boom height correction with uncorrected values where needed
+        z_stake_best = station_boom_height.include_uncorrected_values(
+                                    ds["z_stake"],
+                                    ds["z_stake_cor"],
+                                    ds["t_u"],
+                                    ds["t_l"] if "t_l" in ds.data_vars else None,
+                                    ds["t_rad"] if "t_rad" in ds.data_vars else None)
     if ds.attrs['site_type'] == 'ablation':
         # Calculate surface heights for ablation sites
-        ds['z_surf_1'] = 2.6 - ds['z_boom_u']
+        ds['z_surf_1'] = 2.6 - z_boom_best_u
         if ds.z_stake.notnull().any():
-            first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0]
-            ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+            first_valid_index = ds.time.where((z_stake_best + z_boom_best_u).notnull(), drop=True).data[0]
+            ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + z_stake_best.sel(time=first_valid_index) - z_stake_best
         # Use corrected point data if available
         if 'z_pt_cor' in ds.data_vars:
@@ -174,17 +203,28 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
     else:
         # Calculate surface heights for other site types
-        first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0]
-        ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
+        first_valid_index = ds.time.where(z_boom_best_u.notnull(), drop=True).data[0]
+        ds['z_surf_1'] = z_boom_best_u.sel(time=first_valid_index) - z_boom_best_u
         if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
-            first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
-            ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+            first_valid_index = ds.time.where(z_stake_best.notnull(), drop=True).data[0]
+            ds['z_surf_2'] = z_stake_best.sel(time=first_valid_index) - z_stake_best
         if 'z_boom_l' in ds.data_vars:
-            # need a combine first because KAN_U switches from having a z_stake
-            # to having a z_boom_l
-            first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0]
+            # Calculate lower boom height correction with uncorrected values where needed
+            z_boom_best_l = station_boom_height.include_uncorrected_values(
+                                        ds["z_boom_l"],
+                                        ds["z_boom_cor_l"],
+                                        ds["t_l"],
+                                        ds["t_u"] if "t_u" in ds.data_vars else None,
+                                        ds["t_rad"] if "t_rad" in ds.data_vars else None)
+            # need a combine first because KAN_U switches from having a z_stake_best
+            # to having a z_boom_best_l
+            first_valid_index = ds.time.where(z_boom_best_l.notnull(), drop=True).data[0]
             ds['z_surf_2'] = ds['z_surf_2'].combine_first(
-                ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])
+                z_boom_best_l.sel(time=first_valid_index) - z_boom_best_l)
     # Adjust data for the created surface height variables
     ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
@@ -221,8 +261,10 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
                                        .rolling('1D', center=True, min_periods=1)
                                        .median())
-        z_ice_surf = z_ice_surf.loc[ds.time]
-        # here we make sure that the periods where both z_stake and z_pt are
+        z_ice_surf = z_ice_surf.reindex(ds.time,
+                                        method=None).interpolate(method='time')
+        # here we make sure that the periods where both z_stake_best and z_pt are
         # missing are also missing in z_ice_surf
         msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull()
         z_ice_surf = z_ice_surf.where(msk)
@@ -234,7 +276,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
         # sides are less than 0.01 m appart
         # Forward and backward fill to identify bounds of gaps
-        df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')
+        df_filled = z_ice_surf.ffill().bfill()
         # Identify gaps and their start and end dates
         gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
@@ -253,7 +295,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
         z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]
         # bringing the variable into the dataset
-        ds['z_ice_surf'] = z_ice_surf
+        ds['z_ice_surf'] = ('time', z_ice_surf.values)
         ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
         ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
@@ -271,6 +313,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
         ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
         vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars]
         vars_out.append('t_i_10m')
         df_out = get_thermistor_depth(
             ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(),
             ds.attrs['station_id'],
@@ -289,7 +332,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
     period is estimated each year (either the period when z_pt_cor decreases
     or JJA if no better estimate) then different adjustmnents are conducted
     to stitch the three time series together: z_ice_surface (adjusted from
-    z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake)
+    z_pt_cor) or if unavailable, z_surf_2 (adjusted from z_stake)
     are used in the ablation period while an average of z_surf_1 and z_surf_2
     are used otherwise, after they are being adjusted to z_ice_surf at the end
     of the ablation season.
@@ -344,22 +387,24 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
         # defining ice ablation period from the decrease of a smoothed version of z_pt
         # meaning when smoothed_z_pt.diff() < threshold_ablation
-        # first smoothing
-        smoothed_PT =  (df['z_ice_surf']
-                        .resample('h')
-                        .interpolate(limit=72)
-                        .rolling('14D',center=True, min_periods=1)
-                        .mean())
-        # second smoothing
-        smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean()
-        smoothed_PT = smoothed_PT.reindex(df.index,method='ffill')
-        # smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan
-        # logical index where ablation is detected
-        ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation,
-                                      np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9]))
+        hourly_interp = (df["z_ice_surf"]
+                         .resample("h")
+                         .interpolate(limit=72))
+        once_smoothed = hourly_interp.rolling("14D", center=True, min_periods=1).mean()
+        smoothed_PT = once_smoothed.rolling("14D", center=True, min_periods=1).mean()
+        # ablation detection
+        diff_series = smoothed_PT.diff()
+        ind_ablation = np.full_like(diff_series, False, dtype=bool)
+        ind_ablation = np.logical_and(diff_series.values < threshold_ablation,
+                                      np.isin(diff_series.index.month, [6, 7, 8, 9]))
+        # making sure that we only qualify as ablation timestamps where we actually have ablation data
+        msk = np.isnan(smoothed_PT.values)
+        ind_ablation[msk] = False
+        # reindex back to df
+        smoothed_PT = smoothed_PT.reindex(df.index, method="ffill")
+        ind_ablation = pd.Series(ind_ablation, index=diff_series.index).reindex(df.index, fill_value=False).values
         # finding the beginning and end of each period with True
         idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
@@ -378,13 +423,12 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
         # finding the beginning and end of each period with True
         idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
         idx[:, 1] -= 1
         # because the smooth_PT sees 7 days ahead, it starts showing a decline
-        # 7 days in advance, we therefore need to exclude the first 7 days of
+        # 7 days in advance, we therefore need to exclude the first few days of
         # each ablation period
         for start, end in idx:
             period_start = df.index[start]
-            period_end = period_start + pd.Timedelta(days=7)
+            period_end = period_start + pd.Timedelta(days=3)
             exclusion_period = (df.index >= period_start) & (df.index < period_end)
             ind_ablation[exclusion_period] = False
@@ -393,8 +437,6 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
         z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy()
         # the surface heights are adjusted so that they start at 0
         if any(~np.isnan(hs2.iloc[:24*7])):
             hs2 = hs2 - hs2.iloc[:24*7].mean()
@@ -470,9 +512,8 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
         # to hs1 and hs2 the year after.
         for i, y in enumerate(years):
-            # if y == 2014:
-            #     import pdb; pdb.set_trace()
-            logger.debug(str(y))
+            logger.debug(f'{y}: Ablation from {z.index[ind_start[i]]} to {z.index[ind_end[i]]}')
             # defining subsets of hs1, hs2, z
             hs1_jja =  hs1[str(y)+'-06-01':str(y)+'-09-01']
             hs2_jja =  hs2[str(y)+'-06-01':str(y)+'-09-01']
@@ -588,7 +629,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
                 #     import pdb; pdb.set_trace()
                 # if there's ablation and
                 # if there are PT data available at the end of the melt season
-                if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any():
+                if z.iloc[(ind_end[i]-24*7):ind_end[i]].notnull().any():
                     logger.debug('adjusting hs2 to z')
                     # then we adjust hs2 to the end-of-ablation z
                     # first trying at the end of melt season
@@ -605,7 +646,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
                                 np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])  + \
                                     np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
             else:
-                logger.debug('no ablation')
+                logger.debug('no ablation data')
                 hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
                 hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
                 if all(np.isnan(hs2_following_winter)):
@@ -880,14 +921,18 @@ def get_thermistor_depth(df_in, site, station_config):
             # removing negative depth
             df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan
-        logger.info("interpolating 10 m firn/ice temperature")
-        df_in['t_i_10m'] = interpolate_temperature(
-            df_in.index.values,
-            df_in[depth_cols_name].values.astype(float),
-            df_in[temp_cols_name].values.astype(float),
+        logger.info("interpolating 10 m firn/ice temperature (on hourly values)")
+        df_in_h = df_in[depth_cols_name+temp_cols_name].resample('h').mean()
+        df_in_h['t_i_10m'] = interpolate_temperature(
+            df_in_h.index.values,
+            df_in_h[depth_cols_name].values.astype(float),
+            df_in_h[temp_cols_name].values.astype(float),
             kind="linear",
             min_diff_to_depth=1.5,
         ).set_index('date').values
+        df_in['t_i_10m'] = df_in_h['t_i_10m'].reindex(df_in.index,
+                                        method=None)
         # filtering
         ind_pos = df_in["t_i_10m"] > 0.1
@@ -996,7 +1041,7 @@ def piecewise_smoothing_and_interpolation(data_series, breaks):
     Parameters
     ----------
-    data_series : pandas.Series
+    data_series : pd.Series
         Series of observed latitude, longitude or elevation with datetime index.
     breaks: list
         List of timestamps of station relocation. First and last item should be
@@ -1228,56 +1273,6 @@ def calculate_viscosity(T_h, T_0, rho_atm):
     # Kinematic viscosity of air in m^2/s
     return mu / rho_atm
-def calculate_specific_humidity(T_0, T_100, T_h, p_h, rh_h_wrt_ice_or_water, es_0=6.1071, es_100=1013.246, eps=0.622):
-    '''Calculate specific humidity
-    Parameters
-    ----------
-    T_0 : float
-        Steam point temperature. Default is 273.15.
-    T_100 : float
-        Steam point temperature in Kelvin
-    T_h : xarray.DataArray
-        Air temperature
-    p_h : xarray.DataArray
-        Air pressure
-    rh_h_wrt_ice_or_water : xarray.DataArray
-        Relative humidity corrected
-    es_0 : float
-        Saturation vapour pressure at the melting point (hPa)
-    es_100 : float
-        Saturation vapour pressure at steam point temperature (hPa)
-    eps : int
-        ratio of molar masses of vapor and dry air (0.622)
-    Returns
-    -------
-    xarray.DataArray
-        Specific humidity data array
-    '''
-    # Saturation vapour pressure above 0 C (hPa)
-    es_wtr = 10**(-7.90298 * (T_100 / (T_h + T_0) - 1) + 5.02808 * np.log10(T_100 / (T_h + T_0))
-                  - 1.3816E-7 * (10**(11.344 * (1 - (T_h + T_0) / T_100)) - 1)
-                  + 8.1328E-3 * (10**(-3.49149 * (T_100 / (T_h + T_0) -1)) - 1) + np.log10(es_100))
-    # Saturation vapour pressure below 0 C (hPa)
-    es_ice = 10**(-9.09718 * (T_0 / (T_h + T_0) - 1) - 3.56654
-                  * np.log10(T_0 / (T_h + T_0)) + 0.876793
-                  * (1 - (T_h + T_0) / T_0)
-                  + np.log10(es_0))
-    # Specific humidity at saturation (incorrect below melting point)
-    q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr)
-    # Replace saturation specific humidity values below melting point
-    freezing = T_h < 0
-    q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing])
-    q_nan = np.isnan(T_h) | np.isnan(p_h)
-    q_sat[q_nan] = np.nan
-    # Convert to kg/kg
-    return rh_h_wrt_ice_or_water * q_sat / 100
 if __name__ == "__main__":
     # unittest.main()
     pass

pypromice/pipeline/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from pypromice.pipeline.aws import *
+from pypromice.pipeline import L0toL1
+from pypromice.pipeline import L1toL2
+from pypromice.pipeline import L2toL3

pypromice/{process → pipeline}/aws.py RENAMED Viewed

@@ -16,11 +16,13 @@ from importlib import metadata
 import pypromice.resources
-from pypromice.process.L0toL1 import toL1
-from pypromice.process.L1toL2 import toL2
-from pypromice.process.L2toL3 import toL3
-from pypromice.process import write, load, utilities
-from pypromice.utilities.git import get_commit_hash_and_check_dirty
+from pypromice.pipeline.L0toL1 import toL1
+from pypromice.pipeline.L1toL2 import toL2
+from pypromice.pipeline.L2toL3 import toL3
+from pypromice.pipeline import utilities
+from pypromice.io import write
+from pypromice.io.ingest.l0 import (load_data_files, load_config)
+from pypromice.io.ingest.git import get_commit_hash_and_check_dirty
 pd.set_option("display.precision", 2)
 xr.set_options(keep_attrs=True)
@@ -66,7 +68,6 @@ class AWS(object):
         )
         # Load config, variables CSF standards, and L0 files
-        self.config = self.loadConfig(config_file, inpath)
         self.vars = pypromice.resources.load_variables(var_file)
         self.meta = pypromice.resources.load_metadata(meta_file)
         self.data_issues_repository = Path(data_issues_repository)
@@ -85,7 +86,9 @@ class AWS(object):
         self.meta["source"] = json.dumps(source_dict)
         # Load config file
-        L0 = self.loadL0()
+        config = load_config(config_file, inpath)
+        L0 = load_data_files(config)
         self.L0 = []
         for l in L0:
             n = write.getColNames(self.vars, l)
@@ -148,78 +151,3 @@ class AWS(object):
         logger.info("Level 3 processing...")
         self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")
-    def loadConfig(self, config_file, inpath):
-        """Load configuration from .toml file
-        Parameters
-        ----------
-        config_file : str
-            TOML file path
-        inpath : str
-            Input folder directory where L0 files can be found
-        Returns
-        -------
-        conf : dict
-            Configuration parameters
-        """
-        conf = load.getConfig(config_file, inpath)
-        return conf
-    def loadL0(self):
-        """Load level 0 (L0) data from associated TOML-formatted
-        config file and L0 data file
-        Try readL0file() using the config with msg_lat & msg_lon appended. The
-        specific ParserError except will occur when the number of columns in
-        the tx file does not match the expected columns. In this case, remove
-        msg_lat & msg_lon from the config and call readL0file() again. These
-        station files either have no data after Nov 2022 (when msg_lat &
-        msg_lon were added to processing), or for whatever reason these fields
-        did not exist in the modem message and were not added.
-        Returns
-        -------
-        ds_list : list
-            List of L0 xr.Dataset objects
-        """
-        ds_list = []
-        for k in self.config.keys():
-            target = self.config[k]
-            try:
-                ds_list.append(self.readL0file(target))
-            except pd.errors.ParserError as e:
-                # ParserError: Too many columns specified: expected 40 and found 38
-                # logger.info(f'-----> No msg_lat or msg_lon for {k}')
-                for item in ["msg_lat", "msg_lon"]:
-                    target["columns"].remove(item)  # Also removes from self.config
-                ds_list.append(self.readL0file(target))
-            logger.info(f"L0 data successfully loaded from {k}")
-        return ds_list
-    def readL0file(self, conf):
-        """Read L0 .txt file to Dataset object using config dictionary and
-        populate with initial metadata
-        Parameters
-        ----------
-        conf : dict
-            Configuration parameters
-        Returns
-        -------
-        ds : xr.Dataset
-            L0 data
-        """
-        file_version = conf.get("file_version", -1)
-        ds = load.getL0(
-            conf["file"],
-            conf["nodata"],
-            conf["columns"],
-            conf["skiprows"],
-            file_version,
-            time_offset=conf.get("time_offset"),
-        )
-        ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
-        return ds

pypromice/{process → pipeline}/get_l2.py RENAMED Viewed

@@ -5,8 +5,8 @@ import sys
 from argparse import ArgumentParser
 from pathlib import Path
-from pypromice.process.aws import AWS
-from pypromice.process.write import prepare_and_write
+from pypromice.pipeline.aws import AWS
+from pypromice.io.write import prepare_and_write
 def parse_arguments_l2():

pypromice/{process → pipeline}/get_l2tol3.py RENAMED Viewed

@@ -5,9 +5,9 @@ from pathlib import Path
 import xarray as xr
 from argparse import ArgumentParser
 import pypromice
-from pypromice.process.L2toL3 import toL3
+from pypromice.pipeline.L2toL3 import toL3
 import pypromice.resources
-from pypromice.process.write import prepare_and_write
+from pypromice.io.write import prepare_and_write
 logger = logging.getLogger(__name__)
 def parse_arguments_l2tol3(debug_args=None):
@@ -17,13 +17,13 @@ def parse_arguments_l2tol3(debug_args=None):
     parser.add_argument('-c', '--config_folder', type=str, required=True,
                         default='../aws-l0/metadata/station_configurations/',
                         help='Path to folder with sites configuration (TOML) files')
-    parser.add_argument('-i', '--inpath', type=str, required=True,
+    parser.add_argument('-i', '--inpath', type=str, required=True,
                         help='Path to Level 2 .nc data file')
-    parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
+    parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
                         help='Path where to write output')
-    parser.add_argument('-v', '--variables', default=None, type=str,
+    parser.add_argument('-v', '--variables', default=None, type=str,
                         required=False, help='File path to variables look-up table')
-    parser.add_argument('-m', '--metadata', default=None, type=str,
+    parser.add_argument('-m', '--metadata', default=None, type=str,
                         required=False, help='File path to metadata')
     parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository")
@@ -40,11 +40,11 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
         level=logging.INFO,
         stream=sys.stdout,
     )
     # Define Level 2 dataset from file
     with xr.open_dataset(inpath) as l2:
         l2.load()
     # Remove encoding attributes from NetCDF
     for varname in l2.variables:
         if l2[varname].encoding!={}:
@@ -54,7 +54,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
         l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True'
     if 'number_of_booms' in l2.attrs.keys():
         l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms'])
     # importing station_config (dict) from config_folder (str path)
     config_file = config_folder / (l2.attrs['station_id']+'.toml')
@@ -62,7 +62,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
         # File exists, load the configuration
         station_config = toml.load(config_file)
     else:
-        # File does not exist, initialize with standard info
+        # File does not exist, initialize with standard info
         # this was prefered by RSF over exiting with error
         logger.error("\n***\nNo station_configuration file for %s.\nPlease create one on AWS-L0/metadata/station_configurations.\n***"%l2.attrs['station_id'])
         station_config = {"stid":l2.attrs['station_id'],
@@ -70,7 +70,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
                         "project": "PROMICE",
                         "location_type": "ice sheet",
                         }
     # checking that the adjustement directory is properly given
     if data_issues_path is None:
         data_issues_path = Path("../PROMICE-AWS-data-issues")
@@ -82,7 +82,7 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
         data_issues_path = Path(data_issues_path)
     data_adjustments_dir = data_issues_path / "adjustments"
     # Perform Level 3 processing
     l3 = toL3(l2, data_adjustments_dir, station_config)
@@ -92,20 +92,17 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da
     if outpath is not None:
         prepare_and_write(l3, outpath, v, m, '60min')
         prepare_and_write(l3, outpath, v, m, '1D')
-        prepare_and_write(l3, outpath, v, m, 'M')
+        prepare_and_write(l3, outpath, v, m, 'MS')
     return l3
 def main():
     args = parse_arguments_l2tol3()
-    _ = get_l2tol3(args.config_folder,
-                   args.inpath,
+    _ = get_l2tol3(args.config_folder,
+                   args.inpath,
                    args.outpath,
-                   args.variables,
-                   args.metadata,
+                   args.variables,
+                   args.metadata,
                    args.data_issues_path)
-if __name__ == "__main__":
+if __name__ == "__main__":
     main()

pypromice 1.5.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

pypromice 1.5.3py3-none-any.whl → 1.7.0py3-none-any.whl