pytrendy 1.1.11.dev4__tar.gz → 1.2.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/PKG-INFO +1 -1
  2. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pyproject.toml +1 -1
  3. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/detect_trends.py +3 -1
  4. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/artifact_cleanup.py +43 -39
  5. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/process_signals.py +16 -8
  6. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/LICENSE +0 -0
  7. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/README.md +0 -0
  8. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/__init__.py +0 -0
  9. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/__init__.py +0 -0
  10. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data/classes_signals.csv +0 -0
  11. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data/series_synthetic.csv +0 -0
  12. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data_loader.py +0 -0
  13. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/plot_pytrendy.py +0 -0
  14. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/results_pytrendy.py +0 -0
  15. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/__init__.py +0 -0
  16. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_analyse.py +0 -0
  17. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_get.py +0 -0
  18. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/__init__.py +0 -0
  19. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/abrupt_shaving.py +0 -0
  20. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/gradual_expand_contract.py +0 -0
  21. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/segment_grouping.py +0 -0
  22. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/trend_classify.py +0 -0
  23. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/update_neighbours.py +0 -0
  24. {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/simpledtw.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pytrendy
3
- Version: 1.1.11.dev4
3
+ Version: 1.2.0.dev1
4
4
  Summary: Trend Detection in Python. Applicable for real-world industry use cases in time series.
5
5
  License: MIT License
6
6
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pytrendy"
3
- version = "1.1.11.dev4"
3
+ version = "1.2.0.dev1"
4
4
  description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
5
5
  authors = [
6
6
  { name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },
@@ -41,6 +41,7 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
41
41
 
42
42
  - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
43
43
  - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
44
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
44
45
  debug (bool, optional):
45
46
  If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
46
47
  Defaults to `False`.
@@ -62,10 +63,11 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
62
63
  method_params = {
63
64
  'is_abrupt_padded': method_params.get('is_abrupt_padded', False),
64
65
  'abrupt_padding': method_params.get('abrupt_padding', 28),
66
+ 'avoid_noise': method_params.get('avoid_noise', True),
65
67
  }
66
68
 
67
69
  # Core 5-step pipeline
68
- df = process_signals(df, value_col, debug=debug)
70
+ df = process_signals(df, value_col, method_params, debug)
69
71
  segments = get_segments(df)
70
72
  segments = refine_segments(df, value_col, segments, method_params)
71
73
  segments = analyse_segments(df, value_col, segments)
@@ -20,6 +20,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
20
20
 
21
21
  - **is_abrupt_padded** (`bool`): If `True`, skips neighboring-noise checks around abrupt segments. Defaults to `False`.
22
22
  - **abrupt_padding** (`int`): Padding window in days used by abrupt refinement; included for pipeline consistency. Defaults to `28`.
23
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
23
24
  inverse_only (bool): If True, only perform inverse checks and skip other artifact cleanups. Useful for final cleanup pass after flat fill ins.
24
25
 
25
26
  Returns:
@@ -182,49 +183,51 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
182
183
  segments_refined.append(segment)
183
184
 
184
185
  # Pass 3: Cleans partial overlaps with noise. Don't filter out completely when partial, adjust outside noise
185
- segments = deepcopy(segments_refined)
186
- segments_refined = []
187
- for i, segment in enumerate(segments):
188
- if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
189
-
190
- shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
191
- start = pd.to_datetime(segment['start'])
192
- is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
193
- if is_inverted:
194
- continue
195
-
196
- # when gradual, follows similar logic to expand/contract selection.
197
- end_df = df.loc[start:shifted_end]
198
- if segments[i]['direction'] == 'Up':
199
- new_end = end_df[value_col].idxmax()
200
- segments[i]['end'] = new_end.strftime('%Y-%m-%d')
201
-
202
- if segments[i]['direction'] == 'Down':
203
- new_end = end_df[value_col].idxmin()
204
- segments[i]['end'] = new_end.strftime('%Y-%m-%d')
186
+ # Only runs post-processing cleanup logic when avoid_noise is True, enabled by default.
187
+ if method_params['avoid_noise']:
188
+ segments = deepcopy(segments_refined)
189
+ segments_refined = []
190
+ for i, segment in enumerate(segments):
191
+ if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
192
+
193
+ shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
194
+ start = pd.to_datetime(segment['start'])
195
+ is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
196
+ if is_inverted:
197
+ continue
198
+
199
+ # when gradual, follows similar logic to expand/contract selection.
200
+ end_df = df.loc[start:shifted_end]
201
+ if segments[i]['direction'] == 'Up':
202
+ new_end = end_df[value_col].idxmax()
203
+ segments[i]['end'] = new_end.strftime('%Y-%m-%d')
204
+
205
+ if segments[i]['direction'] == 'Down':
206
+ new_end = end_df[value_col].idxmin()
207
+ segments[i]['end'] = new_end.strftime('%Y-%m-%d')
205
208
 
206
- elif segments[i]['direction'] == 'Flat':
207
- segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
209
+ elif segments[i]['direction'] == 'Flat':
210
+ segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
208
211
 
209
- if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
212
+ if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
210
213
 
211
- shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
212
- end = pd.to_datetime(segment['end'])
213
-
214
- # when gradual, follows similar logic to expand/contract selection.
215
- start_df = df.loc[shifted_start:end]
216
- if segments[i]['direction'] == 'Up':
217
- new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
218
- segments[i]['start'] = new_start.strftime('%Y-%m-%d')
214
+ shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
215
+ end = pd.to_datetime(segment['end'])
216
+
217
+ # when gradual, follows similar logic to expand/contract selection.
218
+ start_df = df.loc[shifted_start:end]
219
+ if segments[i]['direction'] == 'Up':
220
+ new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
221
+ segments[i]['start'] = new_start.strftime('%Y-%m-%d')
219
222
 
220
- if segments[i]['direction'] == 'Down':
221
- new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
222
- segments[i]['start'] = new_start.strftime('%Y-%m-%d')
223
+ if segments[i]['direction'] == 'Down':
224
+ new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
225
+ segments[i]['start'] = new_start.strftime('%Y-%m-%d')
223
226
 
224
- elif segments[i]['direction'] == 'Flat':
225
- segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
227
+ elif segments[i]['direction'] == 'Flat':
228
+ segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
226
229
 
227
- segments_refined.append(segment)
230
+ segments_refined.append(segment)
228
231
 
229
232
  # Pass 4: Cleans inverse AGAIN: in case any artifacts from overlap adjustments
230
233
  segments = deepcopy(segments_refined)
@@ -235,7 +238,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
235
238
  segments_refined.append(segment)
236
239
 
237
240
  # Pass 5:
238
- # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough
241
+ # - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough (enabled when avoid_noise is True)
239
242
  # - Sets trends to flat when too flat.
240
243
  segments = deepcopy(segments_refined)
241
244
  segments_refined = []
@@ -309,7 +312,8 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
309
312
  trend_too_flat = not min_in_last_section
310
313
 
311
314
  # Reclassify as noise if either edge cases met
312
- if too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise:
315
+ if method_params['avoid_noise'] and \
316
+ (too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise):
313
317
  segment['direction'] = 'Noise'
314
318
  if 'trend_class' in segment: del segment['trend_class']
315
319
 
@@ -6,7 +6,7 @@ from scipy.signal import savgol_filter
6
6
  from scipy.stats import iqr
7
7
  from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
8
8
 
9
- def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
9
+ def process_signals(df: pd.DataFrame, value_col: str, method_params: dict, debug: bool=False) -> pd.DataFrame:
10
10
  """
11
11
  Applies signal processing techniques to classify regions of a time series.
12
12
 
@@ -32,6 +32,12 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
32
32
  Input time series data with a datetime index and signal column.
33
33
  value_col (str):
34
34
  Name of the column containing the signal to process.
35
+ method_params (dict, optional):
36
+ Optional parameters to customize detection heuristics. Supported keys:
37
+
38
+ - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
39
+ - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
40
+ - **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
35
41
  debug (bool, optional):
36
42
  If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
37
43
 
@@ -177,17 +183,19 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
177
183
  df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
178
184
  df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
179
185
 
186
+ # Important condition to establish non-trend segments to avoid detecting trends over
187
+ avoid_condition = (df['flat_flag'] == 0) # flat is always avoided
188
+ if method_params['avoid_noise']: # noise can be optionally avoided, up to the user
189
+ avoid_condition &= (df['noise_flag'] == 0)
190
+
180
191
  derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
181
192
  df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
182
- df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
183
- df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
193
+ df.loc[(df['smoothed_deriv'] >= derivative_limit) & avoid_condition, 'trend_flag'] = 1
194
+ df.loc[(df['smoothed_deriv'] < -derivative_limit) & avoid_condition, 'trend_flag'] = -1
184
195
 
185
196
  if debug:
186
197
  import matplotlib.pyplot as plt
187
198
 
188
- #df['smoothed_deriv'].hist()
189
- #plt.show()
190
-
191
199
  ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
192
200
  ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
193
201
  plt.title("Signal-Noise Ratio (SNR)")
@@ -214,8 +222,8 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
214
222
  plt.show()
215
223
 
216
224
  ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
217
- ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
218
- ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
225
+ ax.right_ax.axhline(y=derivative_limit, color='gray', linestyle='--', linewidth=2)
226
+ ax.right_ax.axhline(y=-derivative_limit, color='gray', linestyle=':', linewidth=2)
219
227
  plt.title("Smoothed Derivative")
220
228
  plt.show()
221
229
 
File without changes
File without changes