PyPI - direl-ts-tool-kit - Versions diffs - 0.3.0__tar.gz → 0.4.1__tar.gz - Mend

direl-ts-tool-kit 0.3.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{direl_ts_tool_kit-0.3.0 → direl_ts_tool_kit-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: direl-ts-tool-kit
-Version: 0.3.0
+Version: 0.4.1
 Summary: A toolbox for time series analysis and visualization.
 Home-page: https://gitlab.com/direl/direl_tool_kit
 Author: Diego Restrepo-Leal

{direl_ts_tool_kit-0.3.0 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/plot/plot_ts.py RENAMED Viewed

@@ -100,6 +100,10 @@ def plot_time_series(
     if time_unit == "Day":
         ax.xaxis.set_major_locator(mdates.DayLocator())
         ax.xaxis.set_minor_locator(mdates.HourLocator())
+    if time_unit == "Hour":
+        ax.xaxis.set_major_locator(mdates.HourLocator())
+        ax.xaxis.set_minor_locator(mdates.MinuteLocator())
     ax.tick_params(axis="x", rotation=rot)
     ax.grid(which="both")

{direl_ts_tool_kit-0.3.0 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/utilities/data_prep.py RENAMED Viewed

@@ -1,12 +1,13 @@
+import numpy as np
 import pandas as pd
-def parse_datetime_index(df_raw, date_column="date"):
+def parse_datetime_index(df_raw, date_column="date", format=None):
     """
     Parses a specified column into datetime objects and sets it as the DataFrame index.
-    This function is crucial for preparing raw data (df_raw) for time series analysis
-    by ensuring the DataFrame is indexed by the correct datetime type.
+    This function prepares raw data for time series analysis by ensuring the
+    DataFrame is indexed by the correct datetime type.
     Parameters
     ----------
@@ -15,17 +16,24 @@ def parse_datetime_index(df_raw, date_column="date"):
     date_column : str, optional
         The name of the column in 'df_raw' that contains the date/time information.
         Defaults to "date".
+    format : str, optional
+        The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
+        to parse the dates, passed to `pd.to_datetime`. If None (default),
+        Pandas attempts to infer the format automatically.
     Returns
     -------
     df_ts : pd.DataFrame
         A copy of the original DataFrame with the specified date column removed
-        and set as the DatetimeIndex. Ready for time series plotting.
+        and set as the DatetimeIndex. The returned DataFrame is ready for
+        time series operations.
     """
+    if not format:
+        date_parsed = pd.to_datetime(df_raw[date_column])
+    else:
+        date_parsed = pd.to_datetime(df_raw[date_column], format=format)
-    date_parsed = pd.to_datetime(df_raw[date_column])
     df_ts = df_raw.copy()
-    original_dates = df_raw[date_column]
     df_ts.drop(columns=[date_column], inplace=True)
     df_ts.set_index(date_parsed, inplace=True)
@@ -116,3 +124,38 @@ def reindex_and_aggregate(df_ts, column_name, freq="MS"):
     df_ts_new.notnull().apply(pd.Series.value_counts)
     return df_ts_new
+def remove_outliers_by_threshold(df_ts, column_name, lower_bound, upper_bound):
+    """
+    Replaces values in a specified column with NaN if they fall outside
+    a defined range (outlier removal).
+    This function identifies data points that are either below the lower
+    bound or above the upper bound and treats them as missing data.
+    Parameters
+    ----------
+    df_ts : pd.DataFrame
+        The time series DataFrame (must have a DatetimeIndex).
+    column_name : str
+        The name of the column where outlier detection will be performed (e.g., 'Temperature').
+    lower_bound : float or int
+        The minimum acceptable value. Values strictly below this bound are replaced by NaN.
+    upper_bound : float or int
+        The maximum acceptable value. Values strictly above this bound are replaced by NaN.
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame with outlier values in the specified column replaced by np.nan.
+    """
+    df_out = df_ts.copy()
+    outlier_index = df_out[
+        (df_out[column_name] < lower_bound) | (df_out[column_name] > upper_bound)
+    ].index
+    df_out.loc[outlier_index, column_name] = np.nan
+    return df_out

{direl_ts_tool_kit-0.3.0 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: direl-ts-tool-kit
-Version: 0.3.0
+Version: 0.4.1
 Summary: A toolbox for time series analysis and visualization.
 Home-page: https://gitlab.com/direl/direl_tool_kit
 Author: Diego Restrepo-Leal

{direl_ts_tool_kit-0.3.0 → direl_ts_tool_kit-0.4.1}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="direl-ts-tool-kit",
-    version="0.3.0",
+    version="0.4.1",
     description="A toolbox for time series analysis and visualization.",
     long_description=open("README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",