pytrendy 1.1.11.dev4__tar.gz → 1.2.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/PKG-INFO +1 -1
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pyproject.toml +1 -1
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/detect_trends.py +3 -1
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/artifact_cleanup.py +43 -39
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/process_signals.py +16 -8
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/LICENSE +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/README.md +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/__init__.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/__init__.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data/classes_signals.csv +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data/series_synthetic.csv +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/data_loader.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/plot_pytrendy.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/io/results_pytrendy.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/__init__.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_analyse.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_get.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/__init__.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/abrupt_shaving.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/gradual_expand_contract.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/segment_grouping.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/trend_classify.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/update_neighbours.py +0 -0
- {pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/simpledtw.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pytrendy"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.2.0.dev1"
|
|
4
4
|
description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },
|
|
@@ -41,6 +41,7 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
|
|
|
41
41
|
|
|
42
42
|
- **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
|
|
43
43
|
- **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
|
|
44
|
+
- **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
|
|
44
45
|
debug (bool, optional):
|
|
45
46
|
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
|
|
46
47
|
Defaults to `False`.
|
|
@@ -62,10 +63,11 @@ def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, me
|
|
|
62
63
|
method_params = {
|
|
63
64
|
'is_abrupt_padded': method_params.get('is_abrupt_padded', False),
|
|
64
65
|
'abrupt_padding': method_params.get('abrupt_padding', 28),
|
|
66
|
+
'avoid_noise': method_params.get('avoid_noise', True),
|
|
65
67
|
}
|
|
66
68
|
|
|
67
69
|
# Core 5-step pipeline
|
|
68
|
-
df = process_signals(df, value_col, debug
|
|
70
|
+
df = process_signals(df, value_col, method_params, debug)
|
|
69
71
|
segments = get_segments(df)
|
|
70
72
|
segments = refine_segments(df, value_col, segments, method_params)
|
|
71
73
|
segments = analyse_segments(df, value_col, segments)
|
|
@@ -20,6 +20,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
|
|
|
20
20
|
|
|
21
21
|
- **is_abrupt_padded** (`bool`): If `True`, skips neighboring-noise checks around abrupt segments. Defaults to `False`.
|
|
22
22
|
- **abrupt_padding** (`int`): Padding window in days used by abrupt refinement; included for pipeline consistency. Defaults to `28`.
|
|
23
|
+
- **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
|
|
23
24
|
inverse_only (bool): If True, only perform inverse checks and skip other artifact cleanups. Useful for final cleanup pass after flat fill ins.
|
|
24
25
|
|
|
25
26
|
Returns:
|
|
@@ -182,49 +183,51 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
|
|
|
182
183
|
segments_refined.append(segment)
|
|
183
184
|
|
|
184
185
|
# Pass 3: Cleans partial overlaps with noise. Don't filter out completely when partial, adjust outside noise
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
segments[i]['
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
segments[i]['
|
|
186
|
+
# Only runs post-processing cleanup logic when avoid_noise is True, enabled by default.
|
|
187
|
+
if method_params['avoid_noise']:
|
|
188
|
+
segments = deepcopy(segments_refined)
|
|
189
|
+
segments_refined = []
|
|
190
|
+
for i, segment in enumerate(segments):
|
|
191
|
+
if (i < len(segments)-1 and has_partial_overlap_next(segment, segments[i+1])):
|
|
192
|
+
|
|
193
|
+
shifted_end = (pd.to_datetime(segments[i+1]['start']) - pd.Timedelta(days=1))
|
|
194
|
+
start = pd.to_datetime(segment['start'])
|
|
195
|
+
is_inverted = (shifted_end < start) # In case noise segment is <= 1 day in length
|
|
196
|
+
if is_inverted:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
# when gradual, follows similar logic to expand/contract selection.
|
|
200
|
+
end_df = df.loc[start:shifted_end]
|
|
201
|
+
if segments[i]['direction'] == 'Up':
|
|
202
|
+
new_end = end_df[value_col].idxmax()
|
|
203
|
+
segments[i]['end'] = new_end.strftime('%Y-%m-%d')
|
|
204
|
+
|
|
205
|
+
if segments[i]['direction'] == 'Down':
|
|
206
|
+
new_end = end_df[value_col].idxmin()
|
|
207
|
+
segments[i]['end'] = new_end.strftime('%Y-%m-%d')
|
|
205
208
|
|
|
206
|
-
|
|
207
|
-
|
|
209
|
+
elif segments[i]['direction'] == 'Flat':
|
|
210
|
+
segments[i]['end'] = shifted_end.strftime('%Y-%m-%d')
|
|
208
211
|
|
|
209
|
-
|
|
212
|
+
if (i > 0 and has_partial_overlap_prev(segment, segments[i-1])):
|
|
210
213
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
214
|
+
shifted_start = (pd.to_datetime(segments[i-1]['end']) + pd.Timedelta(days=1))
|
|
215
|
+
end = pd.to_datetime(segment['end'])
|
|
216
|
+
|
|
217
|
+
# when gradual, follows similar logic to expand/contract selection.
|
|
218
|
+
start_df = df.loc[shifted_start:end]
|
|
219
|
+
if segments[i]['direction'] == 'Up':
|
|
220
|
+
new_start = start_df[value_col].iloc[::-1].idxmin() + pd.Timedelta(days=1)
|
|
221
|
+
segments[i]['start'] = new_start.strftime('%Y-%m-%d')
|
|
219
222
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
+
if segments[i]['direction'] == 'Down':
|
|
224
|
+
new_start = start_df[value_col].iloc[::-1].idxmax() + pd.Timedelta(days=1)
|
|
225
|
+
segments[i]['start'] = new_start.strftime('%Y-%m-%d')
|
|
223
226
|
|
|
224
|
-
|
|
225
|
-
|
|
227
|
+
elif segments[i]['direction'] == 'Flat':
|
|
228
|
+
segments[i]['start'] = shifted_start.strftime('%Y-%m-%d')
|
|
226
229
|
|
|
227
|
-
|
|
230
|
+
segments_refined.append(segment)
|
|
228
231
|
|
|
229
232
|
# Pass 4: Cleans inverse AGAIN: in case any artifacts from overlap adjustments
|
|
230
233
|
segments = deepcopy(segments_refined)
|
|
@@ -235,7 +238,7 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
|
|
|
235
238
|
segments_refined.append(segment)
|
|
236
239
|
|
|
237
240
|
# Pass 5:
|
|
238
|
-
# - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough
|
|
241
|
+
# - Sets trends to noise when they have too low an SNR, too susceptible to noise, or not trendy enough (enabled when avoid_noise is True)
|
|
239
242
|
# - Sets trends to flat when too flat.
|
|
240
243
|
segments = deepcopy(segments_refined)
|
|
241
244
|
segments_refined = []
|
|
@@ -309,7 +312,8 @@ def clean_artifacts(df: pd.DataFrame, value_col: str, segments_refined: list[dic
|
|
|
309
312
|
trend_too_flat = not min_in_last_section
|
|
310
313
|
|
|
311
314
|
# Reclassify as noise if either edge cases met
|
|
312
|
-
if
|
|
315
|
+
if method_params['avoid_noise'] and \
|
|
316
|
+
(too_noisy or (is_abrupt_near_noise and not trend_ends_too_close) or is_small_gradual_in_noise):
|
|
313
317
|
segment['direction'] = 'Noise'
|
|
314
318
|
if 'trend_class' in segment: del segment['trend_class']
|
|
315
319
|
|
|
@@ -6,7 +6,7 @@ from scipy.signal import savgol_filter
|
|
|
6
6
|
from scipy.stats import iqr
|
|
7
7
|
from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
|
|
8
8
|
|
|
9
|
-
def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
|
|
9
|
+
def process_signals(df: pd.DataFrame, value_col: str, method_params: dict, debug: bool=False) -> pd.DataFrame:
|
|
10
10
|
"""
|
|
11
11
|
Applies signal processing techniques to classify regions of a time series.
|
|
12
12
|
|
|
@@ -32,6 +32,12 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
|
|
|
32
32
|
Input time series data with a datetime index and signal column.
|
|
33
33
|
value_col (str):
|
|
34
34
|
Name of the column containing the signal to process.
|
|
35
|
+
method_params (dict, optional):
|
|
36
|
+
Optional parameters to customize detection heuristics. Supported keys:
|
|
37
|
+
|
|
38
|
+
- **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
|
|
39
|
+
- **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
|
|
40
|
+
- **avoid_noise** (`bool`): Whether to avoid noisy segments in trend detection. Defaults to `True`.
|
|
35
41
|
debug (bool, optional):
|
|
36
42
|
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
|
|
37
43
|
|
|
@@ -177,17 +183,19 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
|
|
|
177
183
|
df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
|
|
178
184
|
df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
|
|
179
185
|
|
|
186
|
+
# Important condition to establish non-trend segments to avoid detecting trends over
|
|
187
|
+
avoid_condition = (df['flat_flag'] == 0) # flat is always avoided
|
|
188
|
+
if method_params['avoid_noise']: # noise can be optionally avoided, up to the user
|
|
189
|
+
avoid_condition &= (df['noise_flag'] == 0)
|
|
190
|
+
|
|
180
191
|
derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
|
|
181
192
|
df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
|
|
182
|
-
df.loc[(df['smoothed_deriv'] >= derivative_limit) &
|
|
183
|
-
df.loc[(df['smoothed_deriv'] < -derivative_limit) &
|
|
193
|
+
df.loc[(df['smoothed_deriv'] >= derivative_limit) & avoid_condition, 'trend_flag'] = 1
|
|
194
|
+
df.loc[(df['smoothed_deriv'] < -derivative_limit) & avoid_condition, 'trend_flag'] = -1
|
|
184
195
|
|
|
185
196
|
if debug:
|
|
186
197
|
import matplotlib.pyplot as plt
|
|
187
198
|
|
|
188
|
-
#df['smoothed_deriv'].hist()
|
|
189
|
-
#plt.show()
|
|
190
|
-
|
|
191
199
|
ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
|
|
192
200
|
ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
|
|
193
201
|
plt.title("Signal-Noise Ratio (SNR)")
|
|
@@ -214,8 +222,8 @@ def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.D
|
|
|
214
222
|
plt.show()
|
|
215
223
|
|
|
216
224
|
ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
|
|
217
|
-
ax.right_ax.axhline(y=
|
|
218
|
-
ax.right_ax.axhline(y=-
|
|
225
|
+
ax.right_ax.axhline(y=derivative_limit, color='gray', linestyle='--', linewidth=2)
|
|
226
|
+
ax.right_ax.axhline(y=-derivative_limit, color='gray', linestyle=':', linewidth=2)
|
|
219
227
|
plt.title("Smoothed Derivative")
|
|
220
228
|
plt.show()
|
|
221
229
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pytrendy-1.1.11.dev4 → pytrendy-1.2.0.dev1}/pytrendy/post_processing/segments_refine/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|