PyPI - pypromice - Versions diffs - 1.3.2__tar.gz → 1.3.3__tar.gz - Mend

pypromice 1.3.2tar.gz → 1.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pypromice might be problematic. Click here for more details.

Files changed (56) hide show

{pypromice-1.3.2/src/pypromice.egg-info → pypromice-1.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pypromice
-Version: 1.3.2
+Version: 1.3.3
 Summary: PROMICE/GC-Net data processing toolbox
 Home-page: https://github.com/GEUS-Glaciology-and-Climate/pypromice
 Author: GEUS Glaciology and Climate

{pypromice-1.3.2 → pypromice-1.3.3}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setuptools.setup(
     name="pypromice",
-    version="1.3.2",
+    version="1.3.3",
     author="GEUS Glaciology and Climate",
     description="PROMICE/GC-Net data processing toolbox",
     long_description=long_description,

{pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/L0toL1.py RENAMED Viewed

@@ -57,7 +57,9 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
         ds['ulr'] = ((ds['ulr'] * 10) / ds.attrs['ulr_eng_coef']) + 5.67E-8*(ds['t_rad'] + T_0)**4
     ds['z_boom_u'] = _reformatArray(ds['z_boom_u'])                            # Reformat boom height
-    ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u'] + T_0)/T_0)**0.5             # Adjust sonic ranger readings for sensitivity to air temperature
+    ds['t_u_interp'] = interpTemp(ds['t_u'], vars_df)
+    ds['z_boom_u'] = ds['z_boom_u'] * ((ds['t_u_interp'] + T_0)/T_0)**0.5      # Adjust sonic ranger readings for sensitivity to air temperature
     if ds['gps_lat'].dtype.kind == 'O':                                        # Decode and reformat GPS information
         if 'NH' in ds['gps_lat'].dropna(dim='time').values[1]:
@@ -113,7 +115,8 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100):
     elif ds.attrs['number_of_booms']==2:                                       # 2-boom processing
         ds['z_boom_l'] = _reformatArray(ds['z_boom_l'])                        # Reformat boom height
-        ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l'] + T_0)/T_0)**0.5         # Adjust sonic ranger readings for sensitivity to air temperature
+        ds['t_l_interp'] = interpTemp(ds['t_l'], vars_df)
+        ds['z_boom_l'] = ds['z_boom_l'] * ((ds['t_l_interp']+ T_0)/T_0)**0.5   # Adjust sonic ranger readings for sensitivity to air temperature
     ds = clip_values(ds, vars_df)
     for key in ['hygroclip_t_offset', 'dsr_eng_coef', 'usr_eng_coef',
@@ -254,6 +257,41 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef):
     return z_pt_cor, z_pt
+def interpTemp(temp, var_configurations, max_interp=pd.Timedelta(12,'h')):
+    '''Clip and interpolate temperature dataset for use in corrections
+    Parameters
+    ----------
+    temp : `xarray.DataArray`
+        Array of temperature data
+    vars_df : `pandas.DataFrame`
+        Dataframe to retrieve attribute hi-lo values from for temperature clipping
+    max_interp : `pandas.Timedelta`
+        Maximum time steps to interpolate across. The default is 12 hours.
+    Returns
+    -------
+    temp_interp : `xarray.DataArray`
+        Array of interpolatedtemperature data
+    '''
+    # Determine if upper or lower temperature array
+    var = temp.name.lower()
+    # Find range threshold and use it to clip measurements
+    cols = ["lo", "hi", "OOL"]
+    assert set(cols) <= set(var_configurations.columns)
+    variable_limits = var_configurations[cols].dropna(how="all")
+    temp = temp.where(temp >= variable_limits.loc[var,'lo'])
+    temp = temp.where(temp <= variable_limits.loc[var, 'hi'])
+    # Drop duplicates and interpolate across NaN values
+#    temp_interp = temp.drop_duplicates(dim='time', keep='first')
+    temp_interp = temp.interpolate_na(dim='time', max_gap=max_interp)
+    return temp_interp
 def smoothTilt(tilt, win_size):
     '''Smooth tilt values using a rolling window. This is translated from the
     previous IDL/GDL smoothing algorithm:

{pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/process/value_clipping.py RENAMED Viewed

@@ -29,6 +29,7 @@ def clip_values(
     variable_limits = var_configurations[cols].dropna(how="all")
     for var, row in variable_limits.iterrows():
         if var not in list(ds.variables):
             continue
@@ -56,4 +57,5 @@ def clip_values(
                         ds[var] = ds[var].where(ds[var] >= row.lo)
                     if ~np.isnan(row.hi):
                         ds[var] = ds[var].where(ds[var] <= row.hi)
     return ds

{pypromice-1.3.2 → pypromice-1.3.3}/src/pypromice/qc/github_data_issues.py RENAMED Viewed

@@ -36,7 +36,7 @@ def flagNAN(ds_in,
     ds : xr.Dataset
         Level 0 data with flagged data
     '''
-    ds = ds_in.copy()
+    ds = ds_in.copy(deep=True)
     df = None
     df = _getDF(flag_url + ds.attrs["station_id"] + ".csv",
@@ -71,7 +71,7 @@ def flagNAN(ds_in,
                 for v in varlist:
                     if v in list(ds.keys()):
-                        logger.info(f'---> flagging {t0} {t1} {v}')
+                        logger.info(f'---> flagging {v} between {t0} and {t1}')
                         ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1))
                     else:
                         logger.info(f'---> could not flag {v} not in dataset')
@@ -99,7 +99,7 @@ def adjustTime(ds,
     ds : xr.Dataset
         Level 0 data with flagged data
     '''
-    ds_out = ds.copy()
+    ds_out = ds.copy(deep=True)
     adj_info=None
     adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
@@ -165,7 +165,7 @@ def adjustData(ds,
     ds : xr.Dataset
         Level 0 data with flagged data
     '''
-    ds_out = ds.copy()
+    ds_out = ds.copy(deep=True)
     adj_info=None
     adj_info = _getDF(adj_url + ds.attrs["station_id"] + ".csv",
                       os.path.join(adj_dir, ds.attrs["station_id"] + ".csv"),
@@ -176,13 +176,11 @@ def adjustData(ds,
         # removing potential time shifts from the adjustment list
         adj_info = adj_info.loc[adj_info.adjust_function != "time_shift", :]
-        # if t1 is left empty, then adjustment is applied until the end of the file
-        adj_info.loc[adj_info.t0.isnull(), "t0"] = ds_out.time.values[0]
-        adj_info.loc[adj_info.t1.isnull(), "t1"] = ds_out.time.values[-1]
-        # making all timestamps timezone naive (compatibility with xarray)
-        adj_info.t0 = pd.to_datetime(adj_info.t0).dt.tz_localize(None)
-        adj_info.t1 = pd.to_datetime(adj_info.t1).dt.tz_localize(None)
+        # making sure that t0 and t1 columns are object dtype then replaceing nan with None
+        adj_info[['t0','t1']] = adj_info[['t0','t1']].astype(object)
+        adj_info.loc[adj_info.t1.isnull()|(adj_info.t1==''), "t1"] = None
+        adj_info.loc[adj_info.t0.isnull()|(adj_info.t0==''), "t0"] = None
         # if "*" is in the variable name then we interpret it as regex
         selec =  adj_info['variable'].str.contains('\*') & (adj_info['variable'] != "*")
         for ind in adj_info.loc[selec, :].index:
@@ -217,88 +215,92 @@ def adjustData(ds,
                 adj_info.loc[var].adjust_function,
                 adj_info.loc[var].adjust_value,
             ):
-                if (t0 > pd.to_datetime(ds_out.time.values[-1])) | (t1 < pd.to_datetime(ds_out.time.values[0])):
+                # making all timestamps timezone naive (compatibility with xarray)
+                if isinstance(t0, str):
+                    t0 = pd.to_datetime(t0, utc=True).tz_localize(None)
+                if isinstance(t1, str):
+                    t1 = pd.to_datetime(t1, utc=True).tz_localize(None)
+                index_slice = dict(time=slice(t0, t1))
+                if len(ds_out[var].loc[index_slice].time.time) == 0:
+                    logger.info("Time range does not intersect with dataset")
                     continue
-                logger.info(f'---> {t0} {t1} {var} {func} {val}')
+                logger.info(f'---> adjusting {var} between {t0} and {t1} ({func} {val})')
                 if func == "add":
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values + val
+                    ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
                     # flagging adjusted values
                     # if var + "_adj_flag" not in ds_out.columns:
                     #     ds_out[var + "_adj_flag"] = 0
-                    # msk = ds_out[var].loc[dict(time=slice(t0, t1))])].notnull()
-                    # ind = ds_out[var].loc[dict(time=slice(t0, t1))])].loc[msk].time
+                    # msk = ds_out[var].loc[index_slice])].notnull()
+                    # ind = ds_out[var].loc[index_slice])].loc[msk].time
                     # ds_out.loc[ind, var + "_adj_flag"] = 1
                 if func == "multiply":
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))].values * val
+                    ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values * val
                     if "DW" in var:
-                        ds_out[var].loc[dict(time=slice(t0, t1))] = ds_out[var].loc[dict(time=slice(t0, t1))] % 360
+                        ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice] % 360
                     # flagging adjusted values
                     # if var + "_adj_flag" not in ds_out.columns:
                     #     ds_out[var + "_adj_flag"] = 0
-                    # msk = ds_out[var].loc[dict(time=slice(t0, t1))].notnull()
-                    # ind = ds_out[var].loc[dict(time=slice(t0, t1))].loc[msk].time
+                    # msk = ds_out[var].loc[index_slice].notnull()
+                    # ind = ds_out[var].loc[index_slice].loc[msk].time
                     # ds_out.loc[ind, var + "_adj_flag"] = 1
                 if func == "min_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
+                    tmp = ds_out[var].loc[index_slice].values
                     tmp[tmp < val] = np.nan
                 if func == "max_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))].values
+                    tmp = ds_out[var].loc[index_slice].values
                     tmp[tmp > val] = np.nan
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
+                    ds_out[var].loc[index_slice] = tmp
                 if func == "upper_perc_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
-                    df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").quantile(1 - val / 100)
-                    df_w = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").var()
+                    tmp = ds_out[var].loc[index_slice].copy()
+                    df_w = ds_out[var].loc[index_slice].resample(time="14D").quantile(1 - val / 100)
+                    df_w = ds_out[var].loc[index_slice].resample(time="14D").var()
                     for m_start, m_end in zip(df_w.time[:-2], df_w.time[1:]):
                         msk = (tmp.time >= m_start) & (tmp.time < m_end)
                         values_month = tmp.loc[msk].values
                         values_month[values_month < df_w.loc[m_start]] = np.nan
                         tmp.loc[msk] = values_month
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
+                    ds_out[var].loc[index_slice] = tmp.values
                 if func == "biweekly_upper_range_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
-                    df_max = ds_out[var].loc[dict(time=slice(t0, t1))].resample("14D").max()
-                    for m_start, m_end in zip(df_max.time[:-2], df_max.time[1:]):
-                        msk = (tmp.time >= m_start) & (tmp.time < m_end)
-                        lim = df_max.loc[m_start] - val
-                        values_month = tmp.loc[msk].values
-                        values_month[values_month < lim] = np.nan
-                        tmp.loc[msk] = values_month
-                    # remaining samples following outside of the last 2 weeks window
-                    msk = tmp.time >= m_end
-                    lim = df_max.loc[m_start] - val
-                    values_month = tmp.loc[msk].values
-                    values_month[values_month < lim] = np.nan
-                    tmp.loc[msk] = values_month
-                    # updating original pandas
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
+                    df_max = (
+                        ds_out[var].loc[index_slice].copy(deep=True)
+                        .resample(time="14D",offset='7D').max()
+                        .sel(time=ds_out[var].loc[index_slice].time.values, method='ffill')
+                        )
+                    df_max['time'] = ds_out[var].loc[index_slice].time
+                    # updating original pandas
+                    ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].where(ds_out[var].loc[index_slice] > df_max-val)
                 if func == "hampel_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))]
+                    tmp = ds_out[var].loc[index_slice]
                     tmp = _hampel(tmp, k=7 * 24, t0=val)
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = tmp.values
+                    ds_out[var].loc[index_slice] = tmp.values
                 if func == "grad_filter":
-                    tmp = ds_out[var].loc[dict(time=slice(t0, t1))].copy()
-                    msk = ds_out[var].loc[dict(time=slice(t0, t1))].copy().diff()
+                    tmp = ds_out[var].loc[index_slice].copy()
+                    msk = ds_out[var].loc[index_slice].copy().diff()
                     tmp[np.roll(msk.abs() > val, -1)] = np.nan
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = tmp
+                    ds_out[var].loc[index_slice] = tmp
                 if "swap_with_" in func:
                     var2 = func[10:]
-                    val_var = ds_out[var].loc[dict(time=slice(t0, t1))].values.copy()
-                    val_var2 = ds_out[var2].loc[dict(time=slice(t0, t1))].values.copy()
-                    ds_out[var2].loc[dict(time=slice(t0, t1))] = val_var
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = val_var2
+                    val_var = ds_out[var].loc[index_slice].values.copy()
+                    val_var2 = ds_out[var2].loc[index_slice].values.copy()
+                    ds_out[var2].loc[index_slice] = val_var
+                    ds_out[var].loc[index_slice] = val_var2
                 if func == "rotate":
-                    ds_out[var].loc[dict(time=slice(t0, t1))] = (ds_out[var].loc[dict(time=slice(t0, t1))].values + val) % 360
+                    ds_out[var].loc[index_slice] = (ds_out[var].loc[index_slice].values + val) % 360
     return ds_out

{pypromice-1.3.2 → pypromice-1.3.3/src/pypromice.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pypromice
-Version: 1.3.2
+Version: 1.3.3
 Summary: PROMICE/GC-Net data processing toolbox
 Home-page: https://github.com/GEUS-Glaciology-and-Climate/pypromice
 Author: GEUS Glaciology and Climate