PyPI - pytrendy - Versions diffs - 1.1.11.dev3__tar.gz → 1.2.0.dev1__tar.gz - Mend

pytrendy 1.1.11.dev3tar.gz → 1.2.0.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pytrendy
-Version: 1.1.11.dev3
+Version: 1.2.0.dev1
 Summary: Trend Detection in Python. Applicable for real-world industry use cases in time series.
 License: MIT License
@@ -106,20 +106,21 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
 Full Results:
 -------------------------------------------------------------------------------
-            direction       start         end  days  total_change  change_rank
-time_index
-1                 Up  2025-01-02  2025-01-24    22     14.013348            5
-2               Down  2025-01-25  2025-02-05    11    -13.564214            6
-3               Flat  2025-02-06  2025-02-09     3           NaN            7
-4                 Up  2025-02-10  2025-03-14    32     24.632035            3
-5               Flat  2025-03-15  2025-03-17     2           NaN            8
-6               Down  2025-03-18  2025-04-01    14    -22.721861            4
-7                 Up  2025-04-02  2025-05-08    36     72.611833            2
-8               Down  2025-05-09  2025-06-17    39    -73.253968            1
-9               Flat  2025-06-18  2025-06-30    12           NaN            9
+            direction       start         end  days  total_change  change_rank trend_class
+time_index
+1                 Up  2025-01-02  2025-01-24    22     14.013348            5     gradual
+2               Down  2025-01-25  2025-02-05    11    -13.564214            6     gradual
+3               Flat  2025-02-06  2025-02-09     3     -1.168831            9         NaN
+4                 Up  2025-02-10  2025-03-14    32     24.632035            3     gradual
+5               Flat  2025-03-15  2025-03-17     2      5.660173            7         NaN
+6               Down  2025-03-18  2025-04-01    14    -22.721861            4     gradual
+7                 Up  2025-04-02  2025-05-08    36     72.611833            2     gradual
+8               Down  2025-05-09  2025-06-17    39    -73.253968            1     gradual
+9               Flat  2025-06-18  2025-06-30    12      3.910534            8         NaN
 -------------------------------------------------------------------------------
 ```
 ---
 **Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/README.md RENAMED Viewed

@@ -49,20 +49,20 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
 Full Results:
 -------------------------------------------------------------------------------
-            direction       start         end  days  total_change  change_rank
-time_index
-1                 Up  2025-01-02  2025-01-24    22     14.013348            5
-2               Down  2025-01-25  2025-02-05    11    -13.564214            6
-3               Flat  2025-02-06  2025-02-09     3           NaN            7
-4                 Up  2025-02-10  2025-03-14    32     24.632035            3
-5               Flat  2025-03-15  2025-03-17     2           NaN            8
-6               Down  2025-03-18  2025-04-01    14    -22.721861            4
-7                 Up  2025-04-02  2025-05-08    36     72.611833            2
-8               Down  2025-05-09  2025-06-17    39    -73.253968            1
-9               Flat  2025-06-18  2025-06-30    12           NaN            9
+            direction       start         end  days  total_change  change_rank trend_class
+time_index
+1                 Up  2025-01-02  2025-01-24    22     14.013348            5     gradual
+2               Down  2025-01-25  2025-02-05    11    -13.564214            6     gradual
+3               Flat  2025-02-06  2025-02-09     3     -1.168831            9         NaN
+4                 Up  2025-02-10  2025-03-14    32     24.632035            3     gradual
+5               Flat  2025-03-15  2025-03-17     2      5.660173            7         NaN
+6               Down  2025-03-18  2025-04-01    14    -22.721861            4     gradual
+7                 Up  2025-04-02  2025-05-08    36     72.611833            2     gradual
+8               Down  2025-05-09  2025-06-17    39    -73.253968            1     gradual
+9               Flat  2025-06-18  2025-06-30    12      3.910534            8         NaN
 -------------------------------------------------------------------------------
 ```
 ---
-**Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
+**Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pytrendy"
-version = "1.1.11.dev3"
+version = "1.2.0.dev1"
 description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
 authors = [
     { name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/detect_trends.py RENAMED Viewed

@@ -41,6 +41,7 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
             - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
             - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         debug (bool, optional):
             If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
             Defaults to `False`.
@@ -62,10 +63,11 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
     method_params = {
         'is_abrupt_padded': method_params.get('is_abrupt_padded', False),
         'abrupt_padding': method_params.get('abrupt_padding', 28),
+        'avoid_noise': method_params.get('avoid_noise', True),
     }
     # Core 5-step pipeline
-    df = process_signals(df, value_col, debug=debug)
+    df = process_signals(df, value_col, method_params, debug)
     segments = get_segments(df)
     segments = refine_segments(df, value_col, segments, method_params)
     segments = analyse_segments(df, value_col, segments)

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/plot_pytrendy.py RENAMED Viewed

@@ -111,7 +111,7 @@ def plot_pytrendy(df: pd.DataFrame, value_col: str, segments_enhanced: list[dict
         ax.fill_between(df.index[mask], ymin, ymax, color=color, alpha=0.4)
         # Add ranking if up/down trend
-        if 'change_rank' in seg:
+        if 'change_rank' in seg and seg['direction'] in ['Up', 'Down']:
             mid_date = start + (end - start) / 2
             y_pos = ymax - (ymax - ymin) * 0.05
             ax.text(mid_date, y_pos, str(seg['change_rank']), fontsize=12,

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/results_pytrendy.py RENAMED Viewed

@@ -23,6 +23,8 @@ class PyTrendyResults:
                 List of dictionaries representing individual trend segments.
         """
         self.segments = segments
+        self.trend_segments = [seg for seg in self.segments if 'trend_class' in seg] # Get segments that are trends (exclude flats and noise)
         self.set_best()
         self.set_df()
         self.set_summary()
@@ -35,10 +37,10 @@ class PyTrendyResults:
             - Identifies the best trend segment based on steepness and duration.
             - The segment with the lowest `change_rank` is selected as the best.
         """
-        if len(self.segments) == 0 or not any('change_rank' in segment for segment in self.segments):
+        if len(self.trend_segments) == 0:
             self.best = None
             return
-        self.best = min(self.segments, key=lambda x: x.get('change_rank', math.inf))
+        self.best = min(self.trend_segments, key=lambda x: x.get('change_rank', math.inf))
     def set_summary(self) -> None:
         """
@@ -54,19 +56,23 @@ class PyTrendyResults:
             summary['df'] = pd.DataFrame()
             return
+        # Count the number of segments per direction type (Up, Down, Flat, Noise)
         direction_counts = Counter(seg["direction"] for seg in self.segments)
         summary["direction_counts"] = dict(direction_counts)
-        trend_class_counts = Counter(seg["trend_class"] for seg in self.segments if "trend_class" in seg)
+        # Count number of segments per trend classs (abrupt, gradual)
+        trend_class_counts = Counter(seg["trend_class"] for seg in self.trend_segments)
         summary["trend_class_counts"] = dict(trend_class_counts)
-        changes = [seg.get("total_change", 0) for seg in self.segments if "total_change" in seg]
+        # Get array of total change from trends and get max (best) total change
+        changes = [seg.get("total_change", 0) for seg in self.trend_segments]
         summary['highest_total_change'] = np.max(changes) if len(changes) > 0 else None
         # Set summary df (without extra details)
         df = pd.DataFrame(self.segments)
-        cols = ['time_index', 'direction', 'start', 'end', 'days']
-        if len(changes) > 1: cols += ['total_change', 'change_rank', 'trend_class']
+        cols = ['time_index', 'direction', 'start', 'end', 'days', 'total_change', 'change_rank']
+        if len(changes) > 1:  #  only include trend_class if atleast one trend exists
+            cols += ['trend_class']
         df = df[cols]
         df = df.set_index('time_index')

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_analyse.py RENAMED Viewed

@@ -16,7 +16,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
     Metrics added include:
-    - Absolute and percent change (based on min/max values)
+    - Absolute and percent change (based on start/end values)
     - Duration in days
@@ -47,19 +47,10 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
         df_segment = df.loc[segment['start']:segment['end']]
         # Calculate absolute and relative change from first point to last point of trend.
-        # (Using min/max instead of first/last to be more robust to noise.)
-        val_min = df_segment[value_col].min()
-        val_max = df_segment[value_col].max()
-        if segment['direction'] == 'Up':  # max - min
-            segment_enhanced['change'] = float(val_max - val_min)
-            segment_enhanced['pct_change'] = (
-                float(val_max / val_min - 1) if val_min != 0 else np.nan
-            )
-        elif segment['direction'] == 'Down':  # min - max
-            segment_enhanced['change'] = float(val_min - val_max)
-            segment_enhanced['pct_change'] = (
-                float(val_min / val_max - 1) if val_max != 0 else np.nan
-            )
+        val_start = df_segment[value_col].iloc[0]
+        val_end = df_segment[value_col].iloc[-1]
+        segment_enhanced['change'] = float(val_end - val_start)
+        segment_enhanced['pct_change'] = (float(val_end / val_start - 1) if val_start != 0 else np.nan)
         # Calculate days & cumulative total change
         days = (pd.to_datetime(segment['end']) - pd.to_datetime(segment['start'])).days
@@ -68,8 +59,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
         segment_enhanced['days'] = days # set days
         # Calculate cumulative total change
-        if segment['direction'] in ['Up', 'Down']:
-            segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
+        segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
         # Calculate Signal to Noise Ratio
         signal_power = np.mean(df_segment['signal']**2)
@@ -83,8 +73,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
     # Rank change, by steepest to shallowest change
     sorted_segments = sorted(segments_enhanced, key=lambda x: abs(x.get('total_change', 0)), reverse=True)
-    sorted_trends = [seg for seg in sorted_segments if 'total_change' in seg and abs(seg['total_change']) > 0]
-    for i, seg in enumerate(sorted_trends):
+    for i, seg in enumerate(sorted_segments):
         j = seg['time_index'] - 1
         segments_enhanced[j]['change_rank'] = int(i+1)

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/artifact_cleanup.py RENAMED Viewed

@@ -20,6 +20,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
             - **is_abrupt_padded** (`bool`): If `True`, skips neighboring-noise checks around abrupt segments. Defaults to `False`.
             - **abrupt_padding** (`int`): Padding window in days used by abrupt refinement; included for pipeline consistency. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         inverse_only (bool): If True, only perform inverse checks and skip other artifact cleanups. Useful for final cleanup pass after flat fill ins.
     Returns:
@@ -182,49 +183,51 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
         segments_refined.append(segment)
     # Pass 3: Cleans partial overlaps with noise. Don't filter out completely when partial, adjust outside noise
-    segments = deepcopy(segments_refined)
-    segments_refined = []
-    for i, segment in enumerate(segments):
-        if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
-            shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
-            start = pd.to_datetime(segment['start'])
-            is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
-            if is_inverted:
-                continue
-            # when gradual, follows similar logic to expand/contract selection.
-            end_df = df.loc[start:shifted_end]
-            if segments[i]['direction'] == 'Up':
-                new_end = end_df[value_col].idxmax()
-                segments[i]['end'] = new_end.strftime('%Y-%m-%d')
-            if segments[i]['direction'] == 'Down':
-                new_end = end_df[value_col].idxmin()
-                segments[i]['end'] = new_end.strftime('%Y-%m-%d')
+    # Only runs post-processing cleanup logic when avoid_noise is True, enabled by default.
+    if method_params['avoid_noise']:
+        segments = deepcopy(segments_refined)
+        segments_refined = []
+        for i, segment in enumerate(segments):
+            if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
+                shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
+                start = pd.to_datetime(segment['start'])
+                is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
+                if is_inverted:
+                    continue
+                # when gradual, follows similar logic to expand/contract selection.
+                end_df = df.loc[start:shifted_end]
+                if segments[i]['direction'] == 'Up':
+                    new_end = end_df[value_col].idxmax()
+                    segments[i]['end'] = new_end.strftime('%Y-%m-%d')
+                if segments[i]['direction'] == 'Down':
+                    new_end = end_df[value_col].idxmin()
+                    segments[i]['end'] = new_end.strftime('%Y-%m-%d')
-            elif segments[i]['direction'] == 'Flat':
-                segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
+                elif segments[i]['direction'] == 'Flat':
+                    segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
-        if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
+            if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
-            shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
-            end = pd.to_datetime(segment['end'])
-            # when gradual, follows similar logic to expand/contract selection.
-            start_df = df.loc[shifted_start:end]
-            if segments[i]['direction'] == 'Up':
-                new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
-                segments[i]['start'] = new_start.strftime('%Y-%m-%d')
+                shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
+                end = pd.to_datetime(segment['end'])
+                # when gradual, follows similar logic to expand/contract selection.
+                start_df = df.loc[shifted_start:end]
+                if segments[i]['direction'] == 'Up':
+                    new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
+                    segments[i]['start'] = new_start.strftime('%Y-%m-%d')
-            if segments[i]['direction'] == 'Down':
-                new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
-                segments[i]['start'] = new_start.strftime('%Y-%m-%d')
+                if segments[i]['direction'] == 'Down':
+                    new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
+                    segments[i]['start'] = new_start.strftime('%Y-%m-%d')
-            elif segments[i]['direction'] == 'Flat':
-                segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
+                elif segments[i]['direction'] == 'Flat':
+                    segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
-        segments_refined.append(segment)
+            segments_refined.append(segment)
     # Pass 4: Cleans inverse AGAIN: in case any artifacts from overlap adjustments
     segments = deepcopy(segments_refined)
@@ -235,7 +238,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
         segments_refined.append(segment)
     # Pass 5:
-    # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough
+    # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough (enabled when avoid_noise is True)
     # - Sets trends to flat when too flat.
     segments = deepcopy(segments_refined)
     segments_refined = []
@@ -309,7 +312,8 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
                 trend_too_flat = not min_in_last_section
         # Reclassify as noise if either edge cases met
-        if too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise:
+        if method_params['avoid_noise'] and \
+            (too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise):
             segment['direction'] = 'Noise'
             if 'trend_class' in segment: del segment['trend_class']
@@ -380,4 +384,4 @@ def fill_in_flats(df: pd.DataFrame, segments: list[dict]) -> list[dict]:
                 direction='Flat'
             ))
-    return segments_refined
+    return segments_refined

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/trend_classify.py RENAMED Viewed

@@ -75,4 +75,4 @@ def classify_trends(df: pd.DataFrame, value_col: str, segments: list[dict]) -> l
         if segment_length < 3:
             segments_classified[i]['trend_class'] = 'abrupt'
-    return segments_classified
+    return segments_classified

{pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/process_signals.py RENAMED Viewed

@@ -6,7 +6,7 @@ from scipy.signal import savgol_filter
 from scipy.stats import iqr
 from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
-def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
+def process_signals(df: pd.DataFrame, value_col: str, method_params: dict, debug: bool=False) -> pd.DataFrame:
     """
     Applies signal processing techniques to classify regions of a time series.
@@ -32,6 +32,12 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
             Input time series data with a datetime index and signal column.
         value_col (str):
             Name of the column containing the signal to process.
+        method_params (dict, optional):
+            Optional parameters to customize detection heuristics. Supported keys:
+            - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
+            - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
+            - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
         debug (bool, optional):
             If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
@@ -177,17 +183,19 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
     df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
     df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
+    # Important condition to establish non-trend segments to avoid detecting trends over
+    avoid_condition = (df['flat_flag'] == 0) # flat is always avoided
+    if method_params['avoid_noise']: # noise can be optionally avoided, up to the user
+        avoid_condition &= (df['noise_flag'] == 0)
     derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
     df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
-    df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
-    df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
+    df.loc[(df['smoothed_deriv'] >= derivative_limit) & avoid_condition, 'trend_flag'] = 1
+    df.loc[(df['smoothed_deriv'] < -derivative_limit) & avoid_condition, 'trend_flag'] = -1
     if debug:
         import matplotlib.pyplot as plt
-        #df['smoothed_deriv'].hist()
-        #plt.show()
         ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
         ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
         plt.title("Signal-Noise Ratio (SNR)")
@@ -214,8 +222,8 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
         plt.show()
         ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
-        ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
-        ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
+        ax.right_ax.axhline(y=derivative_limit, color='gray', linestyle='--', linewidth=2)
+        ax.right_ax.axhline(y=-derivative_limit, color='gray', linestyle=':', linewidth=2)
         plt.title("Smoothed Derivative")
         plt.show()