pytrendy 1.1.11.dev2__tar.gz → 1.1.11.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/PKG-INFO +13 -12
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/README.md +12 -12
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pyproject.toml +1 -1
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/__init__.py +10 -10
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/detect_trends.py +6 -3
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/__init__.py +3 -3
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/plot_pytrendy.py +1 -1
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/results_pytrendy.py +12 -6
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/__init__.py +9 -9
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_analyse.py +7 -18
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/artifact_cleanup.py +1 -1
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/trend_classify.py +1 -1
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/process_signals.py +51 -34
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/LICENSE +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/data/classes_signals.csv +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/data/series_synthetic.csv +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/io/data_loader.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_get.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/__init__.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/abrupt_shaving.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/gradual_expand_contract.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/segment_grouping.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/update_neighbours.py +0 -0
- {pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/simpledtw.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pytrendy
|
|
3
|
-
Version: 1.1.11.
|
|
3
|
+
Version: 1.1.11.dev4
|
|
4
4
|
Summary: Trend Detection in Python. Applicable for real-world industry use cases in time series.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -106,20 +106,21 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
|
|
|
106
106
|
|
|
107
107
|
Full Results:
|
|
108
108
|
-------------------------------------------------------------------------------
|
|
109
|
-
direction start end days total_change change_rank
|
|
110
|
-
time_index
|
|
111
|
-
1 Up 2025-01-02 2025-01-24 22 14.013348 5
|
|
112
|
-
2 Down 2025-01-25 2025-02-05 11 -13.564214 6
|
|
113
|
-
3 Flat 2025-02-06 2025-02-09 3
|
|
114
|
-
4 Up 2025-02-10 2025-03-14 32 24.632035 3
|
|
115
|
-
5 Flat 2025-03-15 2025-03-17 2
|
|
116
|
-
6 Down 2025-03-18 2025-04-01 14 -22.721861 4
|
|
117
|
-
7 Up 2025-04-02 2025-05-08 36 72.611833 2
|
|
118
|
-
8 Down 2025-05-09 2025-06-17 39 -73.253968 1
|
|
119
|
-
9 Flat 2025-06-18 2025-06-30 12
|
|
109
|
+
direction start end days total_change change_rank trend_class
|
|
110
|
+
time_index
|
|
111
|
+
1 Up 2025-01-02 2025-01-24 22 14.013348 5 gradual
|
|
112
|
+
2 Down 2025-01-25 2025-02-05 11 -13.564214 6 gradual
|
|
113
|
+
3 Flat 2025-02-06 2025-02-09 3 -1.168831 9 NaN
|
|
114
|
+
4 Up 2025-02-10 2025-03-14 32 24.632035 3 gradual
|
|
115
|
+
5 Flat 2025-03-15 2025-03-17 2 5.660173 7 NaN
|
|
116
|
+
6 Down 2025-03-18 2025-04-01 14 -22.721861 4 gradual
|
|
117
|
+
7 Up 2025-04-02 2025-05-08 36 72.611833 2 gradual
|
|
118
|
+
8 Down 2025-05-09 2025-06-17 39 -73.253968 1 gradual
|
|
119
|
+
9 Flat 2025-06-18 2025-06-30 12 3.910534 8 NaN
|
|
120
120
|
-------------------------------------------------------------------------------
|
|
121
121
|
```
|
|
122
122
|
|
|
123
123
|
---
|
|
124
124
|
|
|
125
125
|
**Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
|
|
126
|
+
|
|
@@ -49,20 +49,20 @@ The best detected trend is Down between dates 2025-05-09 - 2025-06-17
|
|
|
49
49
|
|
|
50
50
|
Full Results:
|
|
51
51
|
-------------------------------------------------------------------------------
|
|
52
|
-
direction start end days total_change change_rank
|
|
53
|
-
time_index
|
|
54
|
-
1 Up 2025-01-02 2025-01-24 22 14.013348 5
|
|
55
|
-
2 Down 2025-01-25 2025-02-05 11 -13.564214 6
|
|
56
|
-
3 Flat 2025-02-06 2025-02-09 3
|
|
57
|
-
4 Up 2025-02-10 2025-03-14 32 24.632035 3
|
|
58
|
-
5 Flat 2025-03-15 2025-03-17 2
|
|
59
|
-
6 Down 2025-03-18 2025-04-01 14 -22.721861 4
|
|
60
|
-
7 Up 2025-04-02 2025-05-08 36 72.611833 2
|
|
61
|
-
8 Down 2025-05-09 2025-06-17 39 -73.253968 1
|
|
62
|
-
9 Flat 2025-06-18 2025-06-30 12
|
|
52
|
+
direction start end days total_change change_rank trend_class
|
|
53
|
+
time_index
|
|
54
|
+
1 Up 2025-01-02 2025-01-24 22 14.013348 5 gradual
|
|
55
|
+
2 Down 2025-01-25 2025-02-05 11 -13.564214 6 gradual
|
|
56
|
+
3 Flat 2025-02-06 2025-02-09 3 -1.168831 9 NaN
|
|
57
|
+
4 Up 2025-02-10 2025-03-14 32 24.632035 3 gradual
|
|
58
|
+
5 Flat 2025-03-15 2025-03-17 2 5.660173 7 NaN
|
|
59
|
+
6 Down 2025-03-18 2025-04-01 14 -22.721861 4 gradual
|
|
60
|
+
7 Up 2025-04-02 2025-05-08 36 72.611833 2 gradual
|
|
61
|
+
8 Down 2025-05-09 2025-06-17 39 -73.253968 1 gradual
|
|
62
|
+
9 Flat 2025-06-18 2025-06-30 12 3.910534 8 NaN
|
|
63
63
|
-------------------------------------------------------------------------------
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
---
|
|
67
67
|
|
|
68
|
-
**Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
|
|
68
|
+
**Read more in the full documentation:** [russellsb.github.io/pytrendy/main](https://russellsb.github.io/pytrendy/main/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pytrendy"
|
|
3
|
-
version = "1.1.11.
|
|
3
|
+
version = "1.1.11.dev4"
|
|
4
4
|
description = "Trend Detection in Python. Applicable for real-world industry use cases in time series."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Russell Sammut Bonnici", email = "r.sammutbonnici@gmail.com" },
|
|
@@ -10,7 +10,7 @@ apply classification heuristics, and access structured results for downstream an
|
|
|
10
10
|
|
|
11
11
|
This package is organized into the following modules:
|
|
12
12
|
|
|
13
|
-
# 1. [detect_trends](detect_trends
|
|
13
|
+
# 1. [detect_trends](detect_trends)
|
|
14
14
|
|
|
15
15
|
- Defines the primary pipeline function for executing PyTrendy's trend detection workflow.
|
|
16
16
|
- This function coordinates signal preprocessing, segment extraction, boundary refinement, metric analysis, and optional visualization in a single call.
|
|
@@ -23,7 +23,7 @@ The `io` module provides essential interfaces for interacting with the input and
|
|
|
23
23
|
It streamlines access to curated datasets, supports detailed visualization of trend segments, and offers structured result handling for downstream analysis.
|
|
24
24
|
Designed for both exploratory workflows and programmatic integration, this module enables users to efficiently load data, interpret results, and present findings.
|
|
25
25
|
|
|
26
|
-
## 2.1 [data_loader](io/data_loader
|
|
26
|
+
## 2.1 [data_loader](io/data_loader)
|
|
27
27
|
|
|
28
28
|
- Provides access to built-in datasets packaged with PyTrendy.
|
|
29
29
|
- Enables quick loading of synthetic time series and classification references.
|
|
@@ -31,7 +31,7 @@ Designed for both exploratory workflows and programmatic integration, this modul
|
|
|
31
31
|
- Delivers standardized input formats optimized for trend detection and segment analysis.
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
## 2.2 [plot_pytrendy](io/plot_pytrendy
|
|
34
|
+
## 2.2 [plot_pytrendy](io/plot_pytrendy)
|
|
35
35
|
|
|
36
36
|
- Generates annotated visualizations of trend segments over time series data.
|
|
37
37
|
- Highlights matplotlib plots with Up, Down, Flat, and Noise regions using shaded overlays and metadata.
|
|
@@ -39,7 +39,7 @@ Designed for both exploratory workflows and programmatic integration, this modul
|
|
|
39
39
|
- Makes the visualization ready for reporting, presentation, and analytical review.
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
## 2.3 [results_pytrendy](io/results_pytrendy
|
|
42
|
+
## 2.3 [results_pytrendy](io/results_pytrendy)
|
|
43
43
|
|
|
44
44
|
- Wraps detection output into a structured results object.
|
|
45
45
|
- Implements the `PyTrendyResults` class for segment filtering, ranking, and summarization.
|
|
@@ -48,13 +48,13 @@ Designed for both exploratory workflows and programmatic integration, this modul
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
# 3. [post_processing](post_processing
|
|
51
|
+
# 3. [post_processing](post_processing)
|
|
52
52
|
|
|
53
53
|
The `post_processing` module provides utilities for refining, classifying, and analyzing trend segments.
|
|
54
54
|
It transforms raw detections into interpretable, ranked structures by adjusting boundaries, labeling temporal behavior, and computing signal metrics.
|
|
55
55
|
This module ensures that the output is analytically robust and ready for downstream use.
|
|
56
56
|
|
|
57
|
-
## 3.1 [segments_get](post_processing/segments_get
|
|
57
|
+
## 3.1 [segments_get](post_processing/segments_get)
|
|
58
58
|
|
|
59
59
|
- Extracts continuous segments from the `trend_flag` column.
|
|
60
60
|
- Applies minimum length constraints to filter out noise.
|
|
@@ -62,7 +62,7 @@ This module ensures that the output is analytically robust and ready for downstr
|
|
|
62
62
|
- Serves as the first step in segment-level post-processing.
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
## 3.2 [segments_refine](post_processing/segments_refine
|
|
65
|
+
## 3.2 [segments_refine](post_processing/segments_refine)
|
|
66
66
|
|
|
67
67
|
- Adjusts segment boundaries based on local extrema and changepoint detection.
|
|
68
68
|
- Classifies segments as 'gradual' or 'abrupt' using DTW alignment.
|
|
@@ -70,7 +70,7 @@ This module ensures that the output is analytically robust and ready for downstr
|
|
|
70
70
|
- Groups short consecutive segments and removes artifacts.
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
## 3.3 [segments_analyse](post_processing/segments_analyse
|
|
73
|
+
## 3.3 [segments_analyse](post_processing/segments_analyse)
|
|
74
74
|
|
|
75
75
|
- Computes metrics for each segment, comparing pretreatment vs post-treatment behavior.
|
|
76
76
|
- Includes absolute and percent change, duration, and cumulative movement.
|
|
@@ -79,14 +79,14 @@ This module ensures that the output is analytically robust and ready for downstr
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
|
|
82
|
-
# 4. [process_signals](process_signals
|
|
82
|
+
# 4. [process_signals](process_signals)
|
|
83
83
|
|
|
84
84
|
- Implements core signal processing logic to identify meaningful regions within a time series.
|
|
85
85
|
- By applying Savitzky-Golay smoothing and rolling statistical measures, this module flags flat, noisy, and directional trends.
|
|
86
86
|
- These flags serve as the foundation for segment extraction and subsequent analysis within the PyTrendy pipeline.
|
|
87
87
|
|
|
88
88
|
|
|
89
|
-
# 5. [simpledtw](simpledtw
|
|
89
|
+
# 5. [simpledtw](simpledtw)
|
|
90
90
|
|
|
91
91
|
- Provides an efficient implementation of Dynamic Time Warping (DTW) for comparing time series segments.
|
|
92
92
|
- This module is used internally to classify trends by aligning detected segments with reference signals and evaluating similarity based on alignment cost.
|
|
@@ -8,7 +8,7 @@ from .post_processing.segments_analyse import analyse_segments
|
|
|
8
8
|
from .io.plot_pytrendy import plot_pytrendy
|
|
9
9
|
from .io.results_pytrendy import PyTrendyResults
|
|
10
10
|
|
|
11
|
-
def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, method_params:dict=None) -> PyTrendyResults:
|
|
11
|
+
def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, method_params: dict=None, debug: bool=False ) -> PyTrendyResults:
|
|
12
12
|
"""
|
|
13
13
|
This is the main function that runs trend detection end-to-end.
|
|
14
14
|
|
|
@@ -41,7 +41,10 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth
|
|
|
41
41
|
|
|
42
42
|
- **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
|
|
43
43
|
- **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.
|
|
44
|
-
|
|
44
|
+
debug (bool, optional):
|
|
45
|
+
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
|
|
46
|
+
Defaults to `False`.
|
|
47
|
+
|
|
45
48
|
Returns:
|
|
46
49
|
PyTrendyResults:
|
|
47
50
|
An object encapsulating the detected segments and associated metadata.
|
|
@@ -62,7 +65,7 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth
|
|
|
62
65
|
}
|
|
63
66
|
|
|
64
67
|
# Core 5-step pipeline
|
|
65
|
-
df = process_signals(df, value_col)
|
|
68
|
+
df = process_signals(df, value_col, debug=debug)
|
|
66
69
|
segments = get_segments(df)
|
|
67
70
|
segments = refine_segments(df, value_col, segments, method_params)
|
|
68
71
|
segments = analyse_segments(df, value_col, segments)
|
|
@@ -8,7 +8,7 @@ larger workflows.
|
|
|
8
8
|
|
|
9
9
|
# Included Modules
|
|
10
10
|
|
|
11
|
-
## 1. [data_loader](data_loader
|
|
11
|
+
## 1. [data_loader](data_loader)
|
|
12
12
|
Loads built-in datasets packaged with PyTrendy. These include:
|
|
13
13
|
|
|
14
14
|
- `'series_synthetic'`: A synthetic time series with embedded uptrends, downtrends, and flat regions.
|
|
@@ -16,13 +16,13 @@ Loads built-in datasets packaged with PyTrendy. These include:
|
|
|
16
16
|
|
|
17
17
|
Useful for testing, demos, and validating detection logic.
|
|
18
18
|
|
|
19
|
-
## 2. [plot_pytrendy](plot_pytrendy
|
|
19
|
+
## 2. [plot_pytrendy](plot_pytrendy)
|
|
20
20
|
|
|
21
21
|
- Generates annotated matplotlib plots of detected trend segments over the original signal.
|
|
22
22
|
- Highlights Up, Down, Flat, and Noise regions with shaded overlays and ranks significant trends.
|
|
23
23
|
- Supports visual debugging and presentation-ready output.
|
|
24
24
|
|
|
25
|
-
## 3. [results_pytrendy](results_pytrendy
|
|
25
|
+
## 3. [results_pytrendy](results_pytrendy)
|
|
26
26
|
Wraps the output of `detect_trends` into a structured `PyTrendyResults` object. It provides:
|
|
27
27
|
|
|
28
28
|
- Summary statistics (counts, rankings, best segment)
|
|
@@ -111,7 +111,7 @@ def plot_pytrendy(df: pd.DataFrame, value_col: str, segments_enhanced: list[dict
|
|
|
111
111
|
ax.fill_between(df.index[mask], ymin, ymax, color=color, alpha=0.4)
|
|
112
112
|
|
|
113
113
|
# Add ranking if up/down trend
|
|
114
|
-
if 'change_rank' in seg:
|
|
114
|
+
if 'change_rank' in seg and seg['direction'] in ['Up', 'Down']:
|
|
115
115
|
mid_date = start + (end - start) / 2
|
|
116
116
|
y_pos = ymax - (ymax - ymin) * 0.05
|
|
117
117
|
ax.text(mid_date, y_pos, str(seg['change_rank']), fontsize=12,
|
|
@@ -23,6 +23,8 @@ class PyTrendyResults:
|
|
|
23
23
|
List of dictionaries representing individual trend segments.
|
|
24
24
|
"""
|
|
25
25
|
self.segments = segments
|
|
26
|
+
self.trend_segments = [seg for seg in self.segments if 'trend_class' in seg] # Get segments that are trends (exclude flats and noise)
|
|
27
|
+
|
|
26
28
|
self.set_best()
|
|
27
29
|
self.set_df()
|
|
28
30
|
self.set_summary()
|
|
@@ -35,10 +37,10 @@ class PyTrendyResults:
|
|
|
35
37
|
- Identifies the best trend segment based on steepness and duration.
|
|
36
38
|
- The segment with the lowest `change_rank` is selected as the best.
|
|
37
39
|
"""
|
|
38
|
-
if len(self.
|
|
40
|
+
if len(self.trend_segments) == 0:
|
|
39
41
|
self.best = None
|
|
40
42
|
return
|
|
41
|
-
self.best = min(self.
|
|
43
|
+
self.best = min(self.trend_segments, key=lambda x: x.get('change_rank', math.inf))
|
|
42
44
|
|
|
43
45
|
def set_summary(self) -> None:
|
|
44
46
|
"""
|
|
@@ -54,19 +56,23 @@ class PyTrendyResults:
|
|
|
54
56
|
summary['df'] = pd.DataFrame()
|
|
55
57
|
return
|
|
56
58
|
|
|
59
|
+
# Count the number of segments per direction type (Up, Down, Flat, Noise)
|
|
57
60
|
direction_counts = Counter(seg["direction"] for seg in self.segments)
|
|
58
61
|
summary["direction_counts"] = dict(direction_counts)
|
|
59
62
|
|
|
60
|
-
|
|
63
|
+
# Count number of segments per trend classs (abrupt, gradual)
|
|
64
|
+
trend_class_counts = Counter(seg["trend_class"] for seg in self.trend_segments)
|
|
61
65
|
summary["trend_class_counts"] = dict(trend_class_counts)
|
|
62
66
|
|
|
63
|
-
|
|
67
|
+
# Get array of total change from trends and get max (best) total change
|
|
68
|
+
changes = [seg.get("total_change", 0) for seg in self.trend_segments]
|
|
64
69
|
summary['highest_total_change'] = np.max(changes) if len(changes) > 0 else None
|
|
65
70
|
|
|
66
71
|
# Set summary df (without extra details)
|
|
67
72
|
df = pd.DataFrame(self.segments)
|
|
68
|
-
cols = ['time_index', 'direction', 'start', 'end', 'days']
|
|
69
|
-
if len(changes) > 1:
|
|
73
|
+
cols = ['time_index', 'direction', 'start', 'end', 'days', 'total_change', 'change_rank']
|
|
74
|
+
if len(changes) > 1: # only include trend_class if atleast one trend exists
|
|
75
|
+
cols += ['trend_class']
|
|
70
76
|
df = df[cols]
|
|
71
77
|
|
|
72
78
|
df = df.set_index('time_index')
|
|
@@ -9,7 +9,7 @@ boundary adjustments, classification heuristics, and quantitative analysis.
|
|
|
9
9
|
|
|
10
10
|
# Included Modules
|
|
11
11
|
|
|
12
|
-
## 1. [segments_get](segments_get
|
|
12
|
+
## 1. [segments_get](segments_get)
|
|
13
13
|
Extracts contiguous segments from the `trend_flag` column produced by signal processing.
|
|
14
14
|
|
|
15
15
|
Applies minimum length constraints to ensure meaningful segments are retained:
|
|
@@ -22,34 +22,34 @@ Applies minimum length constraints to ensure meaningful segments are retained:
|
|
|
22
22
|
|
|
23
23
|
The segment refinement functionality is organized under the `segments_refine` package:
|
|
24
24
|
|
|
25
|
-
### [segments_refine](segments_refine
|
|
25
|
+
### [segments_refine](segments_refine)
|
|
26
26
|
Main orchestration module with `refine_segments()` function that coordinates the full post-processing pipeline.
|
|
27
27
|
|
|
28
28
|
The `segments_refine` package contains focused sub-modules:
|
|
29
29
|
|
|
30
|
-
### [segments_refine.update_neighbours](segments_refine/update_neighbours
|
|
30
|
+
### [segments_refine.update_neighbours](segments_refine/update_neighbours)
|
|
31
31
|
Helper functions for adjusting segment boundaries when neighboring segments are updated:
|
|
32
32
|
- `update_prev_segment`: Adjusts the end of the previous segment
|
|
33
33
|
- `update_next_segment`: Adjusts the start of the next segment
|
|
34
34
|
|
|
35
|
-
### [segments_refine.gradual_expand_contract](segments_refine/gradual_expand_contract
|
|
35
|
+
### [segments_refine.gradual_expand_contract](segments_refine/gradual_expand_contract)
|
|
36
36
|
- `expand_contract_segments`: Adjusts boundaries based on local extrema (±7 days window)
|
|
37
37
|
|
|
38
|
-
### [segments_refine.trend_classify](segments_refine/trend_classify
|
|
38
|
+
### [segments_refine.trend_classify](segments_refine/trend_classify)
|
|
39
39
|
- `classify_trends`: Uses Dynamic Time Warping (DTW) to label segments as 'gradual' or 'abrupt'
|
|
40
40
|
|
|
41
|
-
### [segments_refine.abrupt_shaving](segments_refine/abrupt_shaving
|
|
41
|
+
### [segments_refine.abrupt_shaving](segments_refine/abrupt_shaving)
|
|
42
42
|
- `shave_abrupt_trends`: Detects changepoints in abrupt segments using z-score outliers
|
|
43
43
|
|
|
44
|
-
### [segments_refine.segment_grouping](segments_refine/segment_grouping
|
|
44
|
+
### [segments_refine.segment_grouping](segments_refine/segment_grouping)
|
|
45
45
|
- `group_segments`: Merges short, consecutive segments with the same direction
|
|
46
46
|
|
|
47
|
-
### [segments_refine.artifact_cleanup](segments_refine/artifact_cleanup
|
|
47
|
+
### [segments_refine.artifact_cleanup](segments_refine/artifact_cleanup)
|
|
48
48
|
- `clean_artifacts`: Removes invalid segments (inversions, overlaps)
|
|
49
49
|
- `fill_in_flats`: Fills gaps between segments with flat regions
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
## 3. [segments_analyse](segments_analyse
|
|
52
|
+
## 3. [segments_analyse](segments_analyse)
|
|
53
53
|
Adds quantitative descriptors to each segment, comparing pretreatment vs post-treatment behavior.
|
|
54
54
|
|
|
55
55
|
Metrics include:
|
|
@@ -16,7 +16,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
|
|
|
16
16
|
|
|
17
17
|
Metrics added include:
|
|
18
18
|
|
|
19
|
-
- Absolute and percent change (based on
|
|
19
|
+
- Absolute and percent change (based on start/end values)
|
|
20
20
|
|
|
21
21
|
- Duration in days
|
|
22
22
|
|
|
@@ -47,19 +47,10 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
|
|
|
47
47
|
df_segment = df.loc[segment['start']:segment['end']]
|
|
48
48
|
|
|
49
49
|
# Calculate absolute and relative change from first point to last point of trend.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
segment_enhanced['change'] = float(val_max - val_min)
|
|
55
|
-
segment_enhanced['pct_change'] = (
|
|
56
|
-
float(val_max / val_min - 1) if val_min != 0 else np.nan
|
|
57
|
-
)
|
|
58
|
-
elif segment['direction'] == 'Down': # min - max
|
|
59
|
-
segment_enhanced['change'] = float(val_min - val_max)
|
|
60
|
-
segment_enhanced['pct_change'] = (
|
|
61
|
-
float(val_min / val_max - 1) if val_max != 0 else np.nan
|
|
62
|
-
)
|
|
50
|
+
val_start = df_segment[value_col].iloc[0]
|
|
51
|
+
val_end = df_segment[value_col].iloc[-1]
|
|
52
|
+
segment_enhanced['change'] = float(val_end - val_start)
|
|
53
|
+
segment_enhanced['pct_change'] = (float(val_end / val_start - 1) if val_start != 0 else np.nan)
|
|
63
54
|
|
|
64
55
|
# Calculate days & cumulative total change
|
|
65
56
|
days = (pd.to_datetime(segment['end']) - pd.to_datetime(segment['start'])).days
|
|
@@ -68,8 +59,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
|
|
|
68
59
|
segment_enhanced['days'] = days # set days
|
|
69
60
|
|
|
70
61
|
# Calculate cumulative total change
|
|
71
|
-
|
|
72
|
-
segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
|
|
62
|
+
segment_enhanced['total_change'] = float(df_segment[value_col].diff().sum())
|
|
73
63
|
|
|
74
64
|
# Calculate Signal to Noise Ratio
|
|
75
65
|
signal_power = np.mean(df_segment['signal']**2)
|
|
@@ -83,8 +73,7 @@ def analyse_segments(df: pd.DataFrame, value_col: str, segments: list[dict]) ->
|
|
|
83
73
|
|
|
84
74
|
# Rank change, by steepest to shallowest change
|
|
85
75
|
sorted_segments = sorted(segments_enhanced, key=lambda x: abs(x.get('total_change', 0)), reverse=True)
|
|
86
|
-
|
|
87
|
-
for i, seg in enumerate(sorted_trends):
|
|
76
|
+
for i, seg in enumerate(sorted_segments):
|
|
88
77
|
j = seg['time_index'] - 1
|
|
89
78
|
segments_enhanced[j]['change_rank'] = int(i+1)
|
|
90
79
|
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
5
5
|
from scipy.signal import savgol_filter
|
|
6
|
+
from scipy.stats import iqr
|
|
6
7
|
from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE
|
|
7
8
|
|
|
8
|
-
def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
|
|
9
|
+
def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
|
|
9
10
|
"""
|
|
10
11
|
Applies signal processing techniques to classify regions of a time series.
|
|
11
12
|
|
|
@@ -31,6 +32,8 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
|
|
|
31
32
|
Input time series data with a datetime index and signal column.
|
|
32
33
|
value_col (str):
|
|
33
34
|
Name of the column containing the signal to process.
|
|
35
|
+
debug (bool, optional):
|
|
36
|
+
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.
|
|
34
37
|
|
|
35
38
|
Returns:
|
|
36
39
|
`pd.DataFrame`: Modified DataFrame with additional columns.
|
|
@@ -43,7 +46,7 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
|
|
|
43
46
|
WINDOW_NOISE = int(WINDOW_SMOOTH*0.5)
|
|
44
47
|
|
|
45
48
|
THRESHOLD_NOISE = 2.5 # Sensitivity to detecting noise (recommended 0-10)
|
|
46
|
-
THRESHOLD_SMOOTH = 0.
|
|
49
|
+
THRESHOLD_SMOOTH = 0.001 # Sensitivity to detecting trends as fraction of iqr
|
|
47
50
|
|
|
48
51
|
# 1. Noise detection via SNR.
|
|
49
52
|
# 1.1 Compute the SNR
|
|
@@ -173,38 +176,52 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
|
|
|
173
176
|
df['trend_flag'] = 0
|
|
174
177
|
df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
|
|
175
178
|
df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
|
|
176
|
-
df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
|
|
177
|
-
df.loc[(df['smoothed_deriv'] >= THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
|
|
178
|
-
df.loc[(df['smoothed_deriv'] < -THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
|
|
179
|
-
|
|
180
|
-
# import matplotlib.pyplot as plt
|
|
181
|
-
|
|
182
|
-
# ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
|
|
183
|
-
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
184
|
-
# plt.show()
|
|
185
|
-
|
|
186
|
-
# ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag')
|
|
187
|
-
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
188
|
-
# plt.show()
|
|
189
|
-
|
|
190
|
-
# ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed')
|
|
191
|
-
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
192
|
-
# plt.show()
|
|
193
179
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
180
|
+
derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
|
|
181
|
+
df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
|
|
182
|
+
df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
|
|
183
|
+
df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1
|
|
184
|
+
|
|
185
|
+
if debug:
|
|
186
|
+
import matplotlib.pyplot as plt
|
|
187
|
+
|
|
188
|
+
#df['smoothed_deriv'].hist()
|
|
189
|
+
#plt.show()
|
|
190
|
+
|
|
191
|
+
ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
|
|
192
|
+
ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
|
|
193
|
+
plt.title("Signal-Noise Ratio (SNR)")
|
|
194
|
+
plt.show()
|
|
195
|
+
|
|
196
|
+
ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag')
|
|
197
|
+
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
198
|
+
plt.title("Noise Flag")
|
|
199
|
+
plt.show()
|
|
200
|
+
|
|
201
|
+
ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed')
|
|
202
|
+
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
203
|
+
plt.title("Smoothed")
|
|
204
|
+
plt.show()
|
|
205
|
+
|
|
206
|
+
ax = df[[value_col, 'smoothed_std']].plot(figsize=(20,3), secondary_y='smoothed_std')
|
|
207
|
+
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
208
|
+
plt.title("Smoothed Std")
|
|
209
|
+
plt.show()
|
|
210
|
+
|
|
211
|
+
ax = df[[value_col, 'flat_flag']].plot(figsize=(20,3), secondary_y='flat_flag')
|
|
212
|
+
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
213
|
+
plt.title("Flat Flag")
|
|
214
|
+
plt.show()
|
|
215
|
+
|
|
216
|
+
ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
|
|
217
|
+
ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
|
|
218
|
+
ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
|
|
219
|
+
plt.title("Smoothed Derivative")
|
|
220
|
+
plt.show()
|
|
221
|
+
|
|
222
|
+
ax = df[[value_col, 'trend_flag']].plot(figsize=(20,3), secondary_y='trend_flag')
|
|
223
|
+
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
|
|
224
|
+
plt.title("Trend Flag")
|
|
225
|
+
plt.show()
|
|
209
226
|
|
|
210
227
|
return df
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pytrendy-1.1.11.dev2 → pytrendy-1.1.11.dev4}/pytrendy/post_processing/segments_refine/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|