PyPI - pytrendy - Versions diffs - 1.1.11.dev4__tar.gz → 1.2.0.dev1__tar.gz - Mend

pytrendy 1.1.11.dev4tar.gz → 1.2.0.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pytrendy
-Version: 1.1.11.dev4
+Version: 1.2.0.dev1
 Summary: Trend Detection in Python. Applicable for real-world industry use cases in time series.
 License: MIT License

{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pytrendy"
-version = "1.1.11.dev4"
+version = "1.2.0.dev1"
 description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
 authors = [
     { name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },

{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/detect_trends.py RENAMED Viewed

@@ -41,6 +41,7 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
             - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
             - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         debug (bool, optional):
             If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
             Defaults to `False`.
@@ -62,10 +63,11 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
     method_params = {
         'is_abrupt_padded': method_params.get('is_abrupt_padded', False),
         'abrupt_padding': method_params.get('abrupt_padding', 28),
+        'avoid_noise': method_params.get('avoid_noise', True),
     }
     # Core 5-step pipeline
-    df = process_signals(df, value_col, debug=debug)
+    df = process_signals(df, value_col, method_params, debug)
     segments = get_segments(df)
     segments = refine_segments(df, value_col, segments, method_params)
     segments = analyse_segments(df, value_col, segments)

{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/artifact_cleanup.py RENAMED Viewed

@@ -20,6 +20,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
             - **is_abrupt_padded** (`bool`): If `True`, skips neighboring-noise checks around abrupt segments. Defaults to `False`.
             - **abrupt_padding** (`int`): Padding window in days used by abrupt refinement; included for pipeline consistency. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         inverse_only (bool): If True, only perform inverse checks and skip other artifact cleanups. Useful for final cleanup pass after flat fill ins.
     Returns:
@@ -182,49 +183,51 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
         segments_refined.append(segment)
     # Pass 3: Cleans partial overlaps with noise. Don't filter out completely when partial, adjust outside noise
-    segments = deepcopy(segments_refined)
-    segments_refined = []
-    for i, segment in enumerate(segments):
-        if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
-            shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
-            start = pd.to_datetime(segment['start'])
-            is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
-            if is_inverted:
-                continue
-            # when gradual, follows similar logic to expand/contract selection.
-            end_df = df.loc[start:shifted_end]
-            if segments[i]['direction'] == 'Up':
-                new_end = end_df[value_col].idxmax()
-                segments[i]['end'] = new_end.strftime('%Y-%m-%d')
-            if segments[i]['direction'] == 'Down':
-                new_end = end_df[value_col].idxmin()
-                segments[i]['end'] = new_end.strftime('%Y-%m-%d')
+    # Only runs post-processing cleanup logic when avoid_noise is True, enabled by default.
+    if method_params['avoid_noise']:
+        segments = deepcopy(segments_refined)
+        segments_refined = []
+        for i, segment in enumerate(segments):
+            if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
+                shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
+                start = pd.to_datetime(segment['start'])
+                is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
+                if is_inverted:
+                    continue
+                # when gradual, follows similar logic to expand/contract selection.
+                end_df = df.loc[start:shifted_end]
+                if segments[i]['direction'] == 'Up':
+                    new_end = end_df[value_col].idxmax()
+                    segments[i]['end'] = new_end.strftime('%Y-%m-%d')
+                if segments[i]['direction'] == 'Down':
+                    new_end = end_df[value_col].idxmin()
+                    segments[i]['end'] = new_end.strftime('%Y-%m-%d')
-            elif segments[i]['direction'] == 'Flat':
-                segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
+                elif segments[i]['direction'] == 'Flat':
+                    segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
-        if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
+            if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
-            shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
-            end = pd.to_datetime(segment['end'])
-            # when gradual, follows similar logic to expand/contract selection.
-            start_df = df.loc[shifted_start:end]
-            if segments[i]['direction'] == 'Up':
-                new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
-                segments[i]['start'] = new_start.strftime('%Y-%m-%d')
+                shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
+                end = pd.to_datetime(segment['end'])
+                # when gradual, follows similar logic to expand/contract selection.
+                start_df = df.loc[shifted_start:end]
+                if segments[i]['direction'] == 'Up':
+                    new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
+                    segments[i]['start'] = new_start.strftime('%Y-%m-%d')
-            if segments[i]['direction'] == 'Down':
-                new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
-                segments[i]['start'] = new_start.strftime('%Y-%m-%d')
+                if segments[i]['direction'] == 'Down':
+                    new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
+                    segments[i]['start'] = new_start.strftime('%Y-%m-%d')
-            elif segments[i]['direction'] == 'Flat':
-                segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
+                elif segments[i]['direction'] == 'Flat':
+                    segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
-        segments_refined.append(segment)
+            segments_refined.append(segment)
     # Pass 4: Cleans inverse AGAIN: in case any artifacts from overlap adjustments
     segments = deepcopy(segments_refined)
@@ -235,7 +238,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
         segments_refined.append(segment)
     # Pass 5:
-    # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough
+    # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough (enabled when avoid_noise is True)
     # - Sets trends to flat when too flat.
     segments = deepcopy(segments_refined)
     segments_refined = []
@@ -309,7 +312,8 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
                 trend_too_flat = not min_in_last_section
         # Reclassify as noise if either edge cases met
-        if too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise:
+        if method_params['avoid_noise'] and \
+            (too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise):
             segment['direction'] = 'Noise'
             if 'trend_class' in segment: del segment['trend_class']

{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/process_signals.py RENAMED Viewed

@@ -6,7 +6,7 @@ from scipy.signal import savgol_filter
 from scipy.stats import iqr
 from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
-def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
+def process_signals(df: pd.DataFrame, value_col: str, method_params: dict, debug: bool=False) -> pd.DataFrame:
     """
     Applies signal processing techniques to classify regions of a time series.
@@ -32,6 +32,12 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
             Input time series data with a datetime index and signal column.
         value_col (str):
             Name of the column containing the signal to process.
+        method_params (dict, optional):
+            Optional parameters to customize detection heuristics. Supported keys:
+            - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
+            - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         debug (bool, optional):
             If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
@@ -177,17 +183,19 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
     df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
     df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
+    # Important condition to establish non-trend segments to avoid detecting trends over
+    avoid_condition = (df['flat_flag'] == 0) # flat is always avoided
+    if method_params['avoid_noise']: # noise can be optionally avoided, up to the user
+        avoid_condition &= (df['noise_flag'] == 0)
     derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
     df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
-    df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
-    df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
+    df.loc[(df['smoothed_deriv'] >= derivative_limit) & avoid_condition, 'trend_flag'] = 1
+    df.loc[(df['smoothed_deriv'] < -derivative_limit) & avoid_condition, 'trend_flag'] = -1
     if debug:
         import matplotlib.pyplot as plt
-        #df['smoothed_deriv'].hist()
-        #plt.show()
         ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
         ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
         plt.title("Signal-Noise Ratio (SNR)")
@@ -214,8 +222,8 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
         plt.show()
         ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
-        ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
-        ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
+        ax.right_ax.axhline(y=derivative_limit, color='gray', linestyle='--', linewidth=2)
+        ax.right_ax.axhline(y=-derivative_limit, color='gray', linestyle=':', linewidth=2)
         plt.title("Smoothed Derivative")
         plt.show()