pytrendy 1.1.11.dev3__tar.gz → 1.2.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/PKG-INFO +13 -12
  2. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/README.md +12 -12
  3. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pyproject.toml +1 -1
  4. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/detect_trends.py +3 -1
  5. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/plot_pytrendy.py +1 -1
  6. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/results_pytrendy.py +12 -6
  7. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_analyse.py +7 -18
  8. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/artifact_cleanup.py +44 -40
  9. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/trend_classify.py +1 -1
  10. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/process_signals.py +16 -8
  11. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/LICENSE +0 -0
  12. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/__init__.py +0 -0
  13. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/__init__.py +0 -0
  14. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/data/classes_signals.csv +0 -0
  15. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/data/series_synthetic.csv +0 -0
  16. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/io/data_loader.py +0 -0
  17. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/__init__.py +0 -0
  18. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_get.py +0 -0
  19. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/__init__.py +0 -0
  20. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/abrupt_shaving.py +0 -0
  21. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/gradual_expand_contract.py +0 -0
  22. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/segment_grouping.py +0 -0
  23. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/update_neighbours.py +0 -0
  24. {pytrendy-1.1.11.dev3 → pytrendy-1.2.0.dev1}/pytrendy/simpledtw.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pytrendy
3
- Version: 1.1.11.dev3
3
+ Version: 1.2.0.dev1
4
4
  Summary: Trend Detection in Python. Applicable for real-world industry use cases in time series.
5
5
  License: MIT License
6
6
 
@@ -106,20 +106,21 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
106
106
 
107
107
  Full Results:
108
108
  -------------------------------------------------------------------------------
109
- direction start end days total_change change_rank
110
- time_index
111
- 1 Up 2025-01-02 2025-01-24 22 14.013348 5
112
- 2 Down 2025-01-25 2025-02-05 11 -13.564214 6
113
- 3 Flat 2025-02-06 2025-02-09 3 NaN 7
114
- 4 Up 2025-02-10 2025-03-14 32 24.632035 3
115
- 5 Flat 2025-03-15 2025-03-17 2 NaN 8
116
- 6 Down 2025-03-18 2025-04-01 14 -22.721861 4
117
- 7 Up 2025-04-02 2025-05-08 36 72.611833 2
118
- 8 Down 2025-05-09 2025-06-17 39 -73.253968 1
119
- 9 Flat 2025-06-18 2025-06-30 12 NaN 9
109
+ direction start end days total_change change_rank trend_class
110
+ time_index
111
+ 1 Up 2025-01-02 2025-01-24 22 14.013348 5 gradual
112
+ 2 Down 2025-01-25 2025-02-05 11 -13.564214 6 gradual
113
+ 3 Flat 2025-02-06 2025-02-09 3 -1.168831 9 NaN
114
+ 4 Up 2025-02-10 2025-03-14 32 24.632035 3 gradual
115
+ 5 Flat 2025-03-15 2025-03-17 2 5.660173 7 NaN
116
+ 6 Down 2025-03-18 2025-04-01 14 -22.721861 4 gradual
117
+ 7 Up 2025-04-02 2025-05-08 36 72.611833 2 gradual
118
+ 8 Down 2025-05-09 2025-06-17 39 -73.253968 1 gradual
119
+ 9 Flat 2025-06-18 2025-06-30 12 3.910534 8 NaN
120
120
  -------------------------------------------------------------------------------
121
121
  ```
122
122
 
123
123
  ---
124
124
 
125
125
  **Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
126
+
@@ -49,20 +49,20 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
49
49
 
50
50
  Full Results:
51
51
  -------------------------------------------------------------------------------
52
- direction start end days total_change change_rank
53
- time_index
54
- 1 Up 2025-01-02 2025-01-24 22 14.013348 5
55
- 2 Down 2025-01-25 2025-02-05 11 -13.564214 6
56
- 3 Flat 2025-02-06 2025-02-09 3 NaN 7
57
- 4 Up 2025-02-10 2025-03-14 32 24.632035 3
58
- 5 Flat 2025-03-15 2025-03-17 2 NaN 8
59
- 6 Down 2025-03-18 2025-04-01 14 -22.721861 4
60
- 7 Up 2025-04-02 2025-05-08 36 72.611833 2
61
- 8 Down 2025-05-09 2025-06-17 39 -73.253968 1
62
- 9 Flat 2025-06-18 2025-06-30 12 NaN 9
52
+ direction start end days total_change change_rank trend_class
53
+ time_index
54
+ 1 Up 2025-01-02 2025-01-24 22 14.013348 5 gradual
55
+ 2 Down 2025-01-25 2025-02-05 11 -13.564214 6 gradual
56
+ 3 Flat 2025-02-06 2025-02-09 3 -1.168831 9 NaN
57
+ 4 Up 2025-02-10 2025-03-14 32 24.632035 3 gradual
58
+ 5 Flat 2025-03-15 2025-03-17 2 5.660173 7 NaN
59
+ 6 Down 2025-03-18 2025-04-01 14 -22.721861 4 gradual
60
+ 7 Up 2025-04-02 2025-05-08 36 72.611833 2 gradual
61
+ 8 Down 2025-05-09 2025-06-17 39 -73.253968 1 gradual
62
+ 9 Flat 2025-06-18 2025-06-30 12 3.910534 8 NaN
63
63
  -------------------------------------------------------------------------------
64
64
  ```
65
65
 
66
66
  ---
67
67
 
68
- **Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
68
+ **Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pytrendy"
3
- version = "1.1.11.dev3"
3
+ version = "1.2.0.dev1"
4
4
  description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
5
5
  authors = [
6
6
  { name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },
@@ -41,6 +41,7 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
41
41
 
42
42
  - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
43
43
  - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
44
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
44
45
  debug (bool, optional):
45
46
  If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
46
47
  Defaults to `False`.
@@ -62,10 +63,11 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
62
63
  method_params = {
63
64
  'is_abrupt_padded': method_params.get('is_abrupt_padded', False),
64
65
  'abrupt_padding': method_params.get('abrupt_padding', 28),
66
+ 'avoid_noise': method_params.get('avoid_noise', True),
65
67
  }
66
68
 
67
69
  # Core 5-step pipeline
68
- df = process_signals(df, value_col, debug=debug)
70
+ df = process_signals(df, value_col, method_params, debug)
69
71
  segments = get_segments(df)
70
72
  segments = refine_segments(df, value_col, segments, method_params)
71
73
  segments = analyse_segments(df, value_col, segments)
@@ -111,7 +111,7 @@ def plot_pytrendy(df: pd.DataFrame, value_col: str, segments_enhanced: list[dict
111
111
  ax.fill_between(df.index[mask], ymin, ymax, color=color, alpha=0.4)
112
112
 
113
113
  # Add ranking if up/down trend
114
- if 'change_rank' in seg:
114
+ if 'change_rank' in seg and seg['direction'] in ['Up', 'Down']:
115
115
  mid_date = start + (end - start) / 2
116
116
  y_pos = ymax - (ymax - ymin) * 0.05
117
117
  ax.text(mid_date, y_pos, str(seg['change_rank']), fontsize=12,
@@ -23,6 +23,8 @@ class PyTrendyResults:
23
23
  List of dictionaries representing individual trend segments.
24
24
  """
25
25
  self.segments = segments
26
+ self.trend_segments = [seg for seg in self.segments if 'trend_class' in seg] # Get segments that are trends (exclude flats and noise)
27
+
26
28
  self.set_best()
27
29
  self.set_df()
28
30
  self.set_summary()
@@ -35,10 +37,10 @@ class PyTrendyResults:
35
37
  - Identifies the best trend segment based on steepness and duration.
36
38
  - The segment with the lowest `change_rank` is selected as the best.
37
39
  """
38
- if len(self.segments) == 0 or not any('change_rank' in segment for segment in self.segments):
40
+ if len(self.trend_segments) == 0:
39
41
  self.best = None
40
42
  return
41
- self.best = min(self.segments, key=lambda x: x.get('change_rank', math.inf))
43
+ self.best = min(self.trend_segments, key=lambda x: x.get('change_rank', math.inf))
42
44
 
43
45
  def set_summary(self) -> None:
44
46
  """
@@ -54,19 +56,23 @@ class PyTrendyResults:
54
56
  summary['df'] = pd.DataFrame()
55
57
  return
56
58
 
59
+ # Count the number of segments per direction type (Up, Down, Flat, Noise)
57
60
  direction_counts = Counter(seg["direction"] for seg in self.segments)
58
61
  summary["direction_counts"] = dict(direction_counts)
59
62
 
60
- trend_class_counts = Counter(seg["trend_class"] for seg in self.segments if "trend_class" in seg)
63
+ # Count number of segments per trend classs (abrupt, gradual)
64
+ trend_class_counts = Counter(seg["trend_class"] for seg in self.trend_segments)
61
65
  summary["trend_class_counts"] = dict(trend_class_counts)
62
66
 
63
- changes = [seg.get("total_change", 0) for seg in self.segments if "total_change" in seg]
67
+ # Get array of total change from trends and get max (best) total change
68
+ changes = [seg.get("total_change", 0) for seg in self.trend_segments]
64
69
  summary['highest_total_change'] = np.max(changes) if len(changes) > 0 else None
65
70
 
66
71
  # Set summary df (without extra details)
67
72
  df = pd.DataFrame(self.segments)
68
- cols = ['time_index', 'direction', 'start', 'end', 'days']
69
- if len(changes) > 1: cols += ['total_change', 'change_rank', 'trend_class']
73
+ cols = ['time_index', 'direction', 'start', 'end', 'days', 'total_change', 'change_rank']
74
+ if len(changes) > 1: # only include trend_class if atleast one trend exists
75
+ cols += ['trend_class']
70
76
  df = df[cols]
71
77
 
72
78
  df = df.set_index('time_index')
@@ -16,7 +16,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
16
16
 
17
17
  Metrics added include:
18
18
 
19
- - Absolute and percent change (based on min/max values)
19
+ - Absolute and percent change (based on start/end values)
20
20
 
21
21
  - Duration in days
22
22
 
@@ -47,19 +47,10 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
47
47
  df_segment = df.loc[segment['start']:segment['end']]
48
48
 
49
49
  # Calculate absolute and relative change from first point to last point of trend.
50
- # (Using min/max instead of first/last to be more robust to noise.)
51
- val_min = df_segment[value_col].min()
52
- val_max = df_segment[value_col].max()
53
- if segment['direction'] == 'Up': # max - min
54
- segment_enhanced['change'] = float(val_max - val_min)
55
- segment_enhanced['pct_change'] = (
56
- float(val_max / val_min - 1) if val_min != 0 else np.nan
57
- )
58
- elif segment['direction'] == 'Down': # min - max
59
- segment_enhanced['change'] = float(val_min - val_max)
60
- segment_enhanced['pct_change'] = (
61
- float(val_min / val_max - 1) if val_max != 0 else np.nan
62
- )
50
+ val_start = df_segment[value_col].iloc[0]
51
+ val_end = df_segment[value_col].iloc[-1]
52
+ segment_enhanced['change'] = float(val_end - val_start)
53
+ segment_enhanced['pct_change'] = (float(val_end / val_start - 1) if val_start != 0 else np.nan)
63
54
 
64
55
  # Calculate days & cumulative total change
65
56
  days = (pd.to_datetime(segment['end']) - pd.to_datetime(segment['start'])).days
@@ -68,8 +59,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
68
59
  segment_enhanced['days'] = days # set days
69
60
 
70
61
  # Calculate cumulative total change
71
- if segment['direction'] in ['Up', 'Down']:
72
- segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
62
+ segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
73
63
 
74
64
  # Calculate Signal to Noise Ratio
75
65
  signal_power = np.mean(df_segment['signal']**2)
@@ -83,8 +73,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
83
73
 
84
74
  # Rank change, by steepest to shallowest change
85
75
  sorted_segments = sorted(segments_enhanced, key=lambda x: abs(x.get('total_change', 0)), reverse=True)
86
- sorted_trends = [seg for seg in sorted_segments if 'total_change' in seg and abs(seg['total_change']) > 0]
87
- for i, seg in enumerate(sorted_trends):
76
+ for i, seg in enumerate(sorted_segments):
88
77
  j = seg['time_index'] - 1
89
78
  segments_enhanced[j]['change_rank'] = int(i+1)
90
79
 
@@ -20,6 +20,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
20
20
 
21
21
  - **is_abrupt_padded** (`bool`): If `True`, skips neighboring-noise checks around abrupt segments. Defaults to `False`.
22
22
  - **abrupt_padding** (`int`): Padding window in days used by abrupt refinement; included for pipeline consistency. Defaults to `28`.
23
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
23
24
  inverse_only (bool): If True, only perform inverse checks and skip other artifact cleanups. Useful for final cleanup pass after flat fill ins.
24
25
 
25
26
  Returns:
@@ -182,49 +183,51 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
182
183
  segments_refined.append(segment)
183
184
 
184
185
  # Pass 3: Cleans partial overlaps with noise. Don't filter out completely when partial, adjust outside noise
185
- segments = deepcopy(segments_refined)
186
- segments_refined = []
187
- for i, segment in enumerate(segments):
188
- if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
189
-
190
- shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
191
- start = pd.to_datetime(segment['start'])
192
- is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
193
- if is_inverted:
194
- continue
195
-
196
- # when gradual, follows similar logic to expand/contract selection.
197
- end_df = df.loc[start:shifted_end]
198
- if segments[i]['direction'] == 'Up':
199
- new_end = end_df[value_col].idxmax()
200
- segments[i]['end'] = new_end.strftime('%Y-%m-%d')
201
-
202
- if segments[i]['direction'] == 'Down':
203
- new_end = end_df[value_col].idxmin()
204
- segments[i]['end'] = new_end.strftime('%Y-%m-%d')
186
+ # Only runs post-processing cleanup logic when avoid_noise is True, enabled by default.
187
+ if method_params['avoid_noise']:
188
+ segments = deepcopy(segments_refined)
189
+ segments_refined = []
190
+ for i, segment in enumerate(segments):
191
+ if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
192
+
193
+ shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
194
+ start = pd.to_datetime(segment['start'])
195
+ is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
196
+ if is_inverted:
197
+ continue
198
+
199
+ # when gradual, follows similar logic to expand/contract selection.
200
+ end_df = df.loc[start:shifted_end]
201
+ if segments[i]['direction'] == 'Up':
202
+ new_end = end_df[value_col].idxmax()
203
+ segments[i]['end'] = new_end.strftime('%Y-%m-%d')
204
+
205
+ if segments[i]['direction'] == 'Down':
206
+ new_end = end_df[value_col].idxmin()
207
+ segments[i]['end'] = new_end.strftime('%Y-%m-%d')
205
208
 
206
- elif segments[i]['direction'] == 'Flat':
207
- segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
209
+ elif segments[i]['direction'] == 'Flat':
210
+ segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
208
211
 
209
- if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
212
+ if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
210
213
 
211
- shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
212
- end = pd.to_datetime(segment['end'])
213
-
214
- # when gradual, follows similar logic to expand/contract selection.
215
- start_df = df.loc[shifted_start:end]
216
- if segments[i]['direction'] == 'Up':
217
- new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
218
- segments[i]['start'] = new_start.strftime('%Y-%m-%d')
214
+ shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
215
+ end = pd.to_datetime(segment['end'])
216
+
217
+ # when gradual, follows similar logic to expand/contract selection.
218
+ start_df = df.loc[shifted_start:end]
219
+ if segments[i]['direction'] == 'Up':
220
+ new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
221
+ segments[i]['start'] = new_start.strftime('%Y-%m-%d')
219
222
 
220
- if segments[i]['direction'] == 'Down':
221
- new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
222
- segments[i]['start'] = new_start.strftime('%Y-%m-%d')
223
+ if segments[i]['direction'] == 'Down':
224
+ new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
225
+ segments[i]['start'] = new_start.strftime('%Y-%m-%d')
223
226
 
224
- elif segments[i]['direction'] == 'Flat':
225
- segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
227
+ elif segments[i]['direction'] == 'Flat':
228
+ segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
226
229
 
227
- segments_refined.append(segment)
230
+ segments_refined.append(segment)
228
231
 
229
232
  # Pass 4: Cleans inverse AGAIN: in case any artifacts from overlap adjustments
230
233
  segments = deepcopy(segments_refined)
@@ -235,7 +238,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
235
238
  segments_refined.append(segment)
236
239
 
237
240
  # Pass 5:
238
- # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough
241
+ # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough (enabled when avoid_noise is True)
239
242
  # - Sets trends to flat when too flat.
240
243
  segments = deepcopy(segments_refined)
241
244
  segments_refined = []
@@ -309,7 +312,8 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
309
312
  trend_too_flat = not min_in_last_section
310
313
 
311
314
  # Reclassify as noise if either edge cases met
312
- if too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise:
315
+ if method_params['avoid_noise'] and \
316
+ (too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise):
313
317
  segment['direction'] = 'Noise'
314
318
  if 'trend_class' in segment: del segment['trend_class']
315
319
 
@@ -380,4 +384,4 @@ def fill_in_flats(df: pd.DataFrame, segments: list[dict]) -> list[dict]:
380
384
  direction='Flat'
381
385
  ))
382
386
 
383
- return segments_refined
387
+ return segments_refined
@@ -75,4 +75,4 @@ def classify_trends(df: pd.DataFrame, value_col: str, segments: list[dict]) -> l
75
75
  if segment_length < 3:
76
76
  segments_classified[i]['trend_class'] = 'abrupt'
77
77
 
78
- return segments_classified
78
+ return segments_classified
@@ -6,7 +6,7 @@ from scipy.signal import savgol_filter
6
6
  from scipy.stats import iqr
7
7
  from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
8
8
 
9
- def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
9
+ def process_signals(df: pd.DataFrame, value_col: str, method_params: dict, debug: bool=False) -> pd.DataFrame:
10
10
  """
11
11
  Applies signal processing techniques to classify regions of a time series.
12
12
 
@@ -32,6 +32,12 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
32
32
  Input time series data with a datetime index and signal column.
33
33
  value_col (str):
34
34
  Name of the column containing the signal to process.
35
+ method_params (dict, optional):
36
+ Optional parameters to customize detection heuristics. Supported keys:
37
+
38
+ - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
39
+ - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
40
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
35
41
  debug (bool, optional):
36
42
  If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
37
43
 
@@ -177,17 +183,19 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
177
183
  df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
178
184
  df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
179
185
 
186
+ # Important condition to establish non-trend segments to avoid detecting trends over
187
+ avoid_condition = (df['flat_flag'] == 0) # flat is always avoided
188
+ if method_params['avoid_noise']: # noise can be optionally avoided, up to the user
189
+ avoid_condition &= (df['noise_flag'] == 0)
190
+
180
191
  derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
181
192
  df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
182
- df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
183
- df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
193
+ df.loc[(df['smoothed_deriv'] >= derivative_limit) & avoid_condition, 'trend_flag'] = 1
194
+ df.loc[(df['smoothed_deriv'] < -derivative_limit) & avoid_condition, 'trend_flag'] = -1
184
195
 
185
196
  if debug:
186
197
  import matplotlib.pyplot as plt
187
198
 
188
- #df['smoothed_deriv'].hist()
189
- #plt.show()
190
-
191
199
  ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
192
200
  ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
193
201
  plt.title("Signal-Noise Ratio (SNR)")
@@ -214,8 +222,8 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
214
222
  plt.show()
215
223
 
216
224
  ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
217
- ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
218
- ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
225
+ ax.right_ax.axhline(y=derivative_limit, color='gray', linestyle='--', linewidth=2)
226
+ ax.right_ax.axhline(y=-derivative_limit, color='gray', linestyle=':', linewidth=2)
219
227
  plt.title("Smoothed Derivative")
220
228
  plt.show()
221
229
 
File without changes