direl-ts-tool-kit 0.4.9__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/PKG-INFO +31 -1
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/README.md +28 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/plot/plot_style.py +3 -0
- direl_ts_tool_kit-0.6.0/direl_ts_tool_kit/plot/plot_ts.py +414 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/PKG-INFO +31 -1
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/requires.txt +2 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/setup.py +3 -1
- direl_ts_tool_kit-0.4.9/direl_ts_tool_kit/plot/plot_ts.py +0 -153
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/LICENCE +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/__init__.py +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/plot/__init__.py +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/utilities/__init__.py +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/utilities/data_prep.py +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/SOURCES.txt +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/dependency_links.txt +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/top_level.txt +0 -0
- {direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
6
|
Author: Diego Restrepo-Leal
|
|
@@ -18,6 +18,8 @@ Requires-Dist: pandas>=1.0.0
|
|
|
18
18
|
Requires-Dist: numpy>=1.18.0
|
|
19
19
|
Requires-Dist: matplotlib>=3.0.0
|
|
20
20
|
Requires-Dist: openpyxl
|
|
21
|
+
Requires-Dist: seaborn
|
|
22
|
+
Requires-Dist: scipy
|
|
21
23
|
Dynamic: author
|
|
22
24
|
Dynamic: author-email
|
|
23
25
|
Dynamic: classifier
|
|
@@ -87,6 +89,14 @@ This function automatically sets major and minor time-based locators
|
|
|
87
89
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
88
90
|
to use scientific notation.
|
|
89
91
|
|
|
92
|
+
#### plot_interpolation_analysis
|
|
93
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
94
|
+
|
|
95
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
96
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
97
|
+
if the Imputation Standard Error (SE) is provided.
|
|
98
|
+
|
|
99
|
+
|
|
90
100
|
#### save_figure
|
|
91
101
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
92
102
|
|
|
@@ -95,6 +105,26 @@ Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
|
95
105
|
The function creates a consistent file name structure:
|
|
96
106
|
{path}/{file_name}_{variable_name}.{extension}.
|
|
97
107
|
|
|
108
|
+
#### heat_map
|
|
109
|
+
`heat_map(X, y, colors="Blues")`
|
|
110
|
+
|
|
111
|
+
Generates a correlation heatmap plot for a set of features and a target variable.
|
|
112
|
+
|
|
113
|
+
This function concatenates the feature DataFrame (X) and the target Series (y)
|
|
114
|
+
to compute and visualize the full pairwise correlation matrix using Seaborn.
|
|
115
|
+
|
|
116
|
+
#### pair_plot
|
|
117
|
+
`pair_plot(X, y)`
|
|
118
|
+
|
|
119
|
+
Generates a cornered pair plot (scatterplot matrix) to visualize relationships
|
|
120
|
+
between features and the target variable.
|
|
121
|
+
|
|
122
|
+
The function combines the feature DataFrame (X) and the target Series (y)
|
|
123
|
+
and uses seaborn.pairplot to create a matrix of scatter plots and histograms.
|
|
124
|
+
It focuses on the lower triangular part (corner=True) and includes a
|
|
125
|
+
regression line for trend visualization.
|
|
126
|
+
|
|
127
|
+
|
|
98
128
|
# Examples
|
|
99
129
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
100
130
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
@@ -56,6 +56,14 @@ This function automatically sets major and minor time-based locators
|
|
|
56
56
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
57
57
|
to use scientific notation.
|
|
58
58
|
|
|
59
|
+
#### plot_interpolation_analysis
|
|
60
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
61
|
+
|
|
62
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
63
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
64
|
+
if the Imputation Standard Error (SE) is provided.
|
|
65
|
+
|
|
66
|
+
|
|
59
67
|
#### save_figure
|
|
60
68
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
61
69
|
|
|
@@ -64,6 +72,26 @@ Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
|
64
72
|
The function creates a consistent file name structure:
|
|
65
73
|
{path}/{file_name}_{variable_name}.{extension}.
|
|
66
74
|
|
|
75
|
+
#### heat_map
|
|
76
|
+
`heat_map(X, y, colors="Blues")`
|
|
77
|
+
|
|
78
|
+
Generates a correlation heatmap plot for a set of features and a target variable.
|
|
79
|
+
|
|
80
|
+
This function concatenates the feature DataFrame (X) and the target Series (y)
|
|
81
|
+
to compute and visualize the full pairwise correlation matrix using Seaborn.
|
|
82
|
+
|
|
83
|
+
#### pair_plot
|
|
84
|
+
`pair_plot(X, y)`
|
|
85
|
+
|
|
86
|
+
Generates a cornered pair plot (scatterplot matrix) to visualize relationships
|
|
87
|
+
between features and the target variable.
|
|
88
|
+
|
|
89
|
+
The function combines the feature DataFrame (X) and the target Series (y)
|
|
90
|
+
and uses seaborn.pairplot to create a matrix of scatter plots and histograms.
|
|
91
|
+
It focuses on the lower triangular part (corner=True) and includes a
|
|
92
|
+
regression line for trend visualization.
|
|
93
|
+
|
|
94
|
+
|
|
67
95
|
# Examples
|
|
68
96
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
69
97
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from .plot_style import *
|
|
4
|
+
from scipy.stats import pearsonr
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def plot_time_series(
|
|
8
|
+
df_ts,
|
|
9
|
+
variable,
|
|
10
|
+
units="",
|
|
11
|
+
color="BLUE_LINES",
|
|
12
|
+
time_unit="Year",
|
|
13
|
+
rot=90,
|
|
14
|
+
auto_format_label=True,
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Plots a time series with custom styling and dual-level grid visibility.
|
|
18
|
+
|
|
19
|
+
This function automatically sets major and minor time-based locators
|
|
20
|
+
on the x-axis based on the specified time unit, and formats the y-axis
|
|
21
|
+
to use scientific notation.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
df_ts : pd.DataFrame
|
|
26
|
+
The DataFrame containing the time series data. Must have a DatetimeIndex.
|
|
27
|
+
variable : str
|
|
28
|
+
The name of the column to plot. The label is automatically formatted
|
|
29
|
+
(e.g., 'total_sales' becomes 'Total Sales').
|
|
30
|
+
units : str, optional
|
|
31
|
+
Units to display next to the variable name on the y-axis (e.g., 'USD').
|
|
32
|
+
Defaults to "".
|
|
33
|
+
color : str, optional
|
|
34
|
+
Key corresponding to the line color in the global 'paper_colors' dictionary.
|
|
35
|
+
Defaults to "BLUE_LINES".
|
|
36
|
+
time_unit : str, optional
|
|
37
|
+
The time granularity of the data to define x-axis tick locators.
|
|
38
|
+
Options include 'Year', 'Month', 'Weekday', 'Day' or 'Hour'. Defaults to "Year".
|
|
39
|
+
rot : int, optional
|
|
40
|
+
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
41
|
+
auto_format_label : bool, optional
|
|
42
|
+
Used internally for label formatting logic. Defaults to True.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
matplotlib.figure.Figure
|
|
47
|
+
The generated matplotlib figure object.
|
|
48
|
+
|
|
49
|
+
Notes
|
|
50
|
+
-----
|
|
51
|
+
Major grid lines are displayed with a dashed line ('--'), and minor grid
|
|
52
|
+
lines are displayed with a dotted line (':') for detailed temporal analysis.
|
|
53
|
+
|
|
54
|
+
Available Colors
|
|
55
|
+
----------------
|
|
56
|
+
The 'color' parameter accepts any key from the 'paper_colors' dictionary.
|
|
57
|
+
|
|
58
|
+
Lines: 'BLUE_LINES', 'ORANGE_LINES', 'GREEN_LINES', 'RED_LINES',
|
|
59
|
+
'GRAY_LINES', 'PURPLE_LINES', 'MAROON_LINES', 'GOLD_LINES'.
|
|
60
|
+
|
|
61
|
+
Bars: 'BLUE_BARS', 'ORANGE_BARS', 'GREEN_BARS', 'RED_BARS',
|
|
62
|
+
'GRAY_BARS', 'PURPLE_BARS', 'MAROON_BARS', 'GOLD_BARS'.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
fig, ax = plt.subplots()
|
|
66
|
+
ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
|
|
67
|
+
|
|
68
|
+
if "-" in variable:
|
|
69
|
+
variable = "-".join(
|
|
70
|
+
[
|
|
71
|
+
j.title() if i == 0 else j.lower()
|
|
72
|
+
for i, j in enumerate(variable.split("-"))
|
|
73
|
+
]
|
|
74
|
+
)
|
|
75
|
+
elif "_" in variable:
|
|
76
|
+
variable = " ".join(
|
|
77
|
+
[
|
|
78
|
+
j.title() if i == 0 else j.lower()
|
|
79
|
+
for i, j in enumerate(variable.split("_"))
|
|
80
|
+
]
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
variable = (
|
|
84
|
+
" ".join(
|
|
85
|
+
[
|
|
86
|
+
j.title() if i == 0 else j.lower()
|
|
87
|
+
for i, j in enumerate(variable.split())
|
|
88
|
+
]
|
|
89
|
+
)
|
|
90
|
+
if auto_format_label
|
|
91
|
+
else variable
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
ax.set(xlabel=f"{time_unit}", ylabel=f"{variable} {units}")
|
|
95
|
+
ax.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
|
|
96
|
+
|
|
97
|
+
if time_unit == "Year":
|
|
98
|
+
ax.xaxis.set_major_locator(mdates.YearLocator())
|
|
99
|
+
ax.xaxis.set_minor_locator(mdates.MonthLocator())
|
|
100
|
+
|
|
101
|
+
if time_unit == "Month":
|
|
102
|
+
ax.xaxis.set_major_locator(mdates.MonthLocator())
|
|
103
|
+
ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
|
|
104
|
+
|
|
105
|
+
if time_unit == "Weekday":
|
|
106
|
+
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
|
|
107
|
+
ax.xaxis.set_minor_locator(mdates.DayLocator())
|
|
108
|
+
|
|
109
|
+
if time_unit == "Day":
|
|
110
|
+
ax.xaxis.set_major_locator(mdates.DayLocator())
|
|
111
|
+
ax.xaxis.set_minor_locator(mdates.HourLocator())
|
|
112
|
+
|
|
113
|
+
if time_unit == "Hour":
|
|
114
|
+
ax.xaxis.set_major_locator(mdates.HourLocator())
|
|
115
|
+
ax.xaxis.set_minor_locator(mdates.MinuteLocator())
|
|
116
|
+
|
|
117
|
+
ax.tick_params(axis="x", rotation=rot)
|
|
118
|
+
ax.grid(which="both")
|
|
119
|
+
ax.grid(which="minor", alpha=0.6, linestyle=":")
|
|
120
|
+
ax.grid(which="major", alpha=0.8, linestyle="--")
|
|
121
|
+
|
|
122
|
+
return fig
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def plot_interpolation_analysis(
|
|
126
|
+
df_original,
|
|
127
|
+
variable,
|
|
128
|
+
units="",
|
|
129
|
+
method="polynomial",
|
|
130
|
+
order=2,
|
|
131
|
+
imputation_se=None,
|
|
132
|
+
time_unit="Year",
|
|
133
|
+
rot=90,
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
137
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
138
|
+
if the Imputation Standard Error (SE) is provided.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
df_original : pd.DataFrame
|
|
143
|
+
The DataFrame containing the original time series data.
|
|
144
|
+
variable : str
|
|
145
|
+
The name of the column to interpolate and plot (e.g., 'LPUE').
|
|
146
|
+
units : str, optional
|
|
147
|
+
Units to display next to the variable name on the y-axis. Defaults to "".
|
|
148
|
+
method : str, optional
|
|
149
|
+
The interpolation method (e.g., 'linear', 'polynomial', 'spline').
|
|
150
|
+
Defaults to 'polynomial'.
|
|
151
|
+
order : int, optional
|
|
152
|
+
The order of the interpolation (required for 'polynomial' or 'spline').
|
|
153
|
+
Defaults to 2.
|
|
154
|
+
imputation_se : pd.Series, float, or None, optional
|
|
155
|
+
The Standard Error (SE) of the imputation. This must be a single value
|
|
156
|
+
or a Series aligned with the DataFrame's index. If None, confidence
|
|
157
|
+
intervals will NOT be plotted. Defaults to None.
|
|
158
|
+
time_unit : str, optional
|
|
159
|
+
The time granularity for x-axis tick locators. Defaults to "Year".
|
|
160
|
+
rot : int, optional
|
|
161
|
+
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
matplotlib.figure.Figure
|
|
166
|
+
The generated Matplotlib figure object with the plot.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
imputed_mask = df_original[variable].isnull()
|
|
170
|
+
df_interpolated = df_original.copy()
|
|
171
|
+
df_interpolated[variable] = df_interpolated[variable].interpolate(
|
|
172
|
+
method=method, order=order
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
color1 = paper_colors["RED_LINES"]
|
|
176
|
+
color2 = paper_colors["GREEN_LINES"]
|
|
177
|
+
|
|
178
|
+
col = np.where(imputed_mask, color1, color2)
|
|
179
|
+
|
|
180
|
+
fig, ax = plt.subplots()
|
|
181
|
+
|
|
182
|
+
if imputation_se is not None:
|
|
183
|
+
df_imputed_only = df_interpolated.copy()
|
|
184
|
+
df_imputed_only.loc[~imputed_mask, variable] = np.nan
|
|
185
|
+
|
|
186
|
+
Z_80 = 1.282
|
|
187
|
+
Z_95 = 1.96
|
|
188
|
+
|
|
189
|
+
error_80 = Z_80 * imputation_se
|
|
190
|
+
error_95 = Z_95 * imputation_se
|
|
191
|
+
|
|
192
|
+
ax.fill_between(
|
|
193
|
+
df_imputed_only.index,
|
|
194
|
+
df_imputed_only[variable] - error_95,
|
|
195
|
+
df_imputed_only[variable] + error_95,
|
|
196
|
+
color=paper_colors["GRAY_BARS"],
|
|
197
|
+
alpha=0.2,
|
|
198
|
+
edgecolor="none",
|
|
199
|
+
label="95% confidence interval",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
ax.fill_between(
|
|
203
|
+
df_imputed_only.index,
|
|
204
|
+
df_imputed_only[variable] - error_80,
|
|
205
|
+
df_imputed_only[variable] + error_80,
|
|
206
|
+
color=paper_colors["GRAY_BARS"],
|
|
207
|
+
alpha=0.4,
|
|
208
|
+
edgecolor="none",
|
|
209
|
+
label="80% confidence interval",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
ax.plot(
|
|
213
|
+
df_interpolated[variable],
|
|
214
|
+
linestyle="-.",
|
|
215
|
+
linewidth=1,
|
|
216
|
+
color=paper_colors["BLUE_LINES"],
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
ax.scatter(
|
|
220
|
+
df_interpolated.index,
|
|
221
|
+
df_interpolated[variable],
|
|
222
|
+
color=col,
|
|
223
|
+
s=10,
|
|
224
|
+
linewidth=4,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
ax.set(xlabel=f"{time_unit}", ylabel=f"{variable} {units}")
|
|
228
|
+
ax.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
|
|
229
|
+
|
|
230
|
+
if time_unit == "Year":
|
|
231
|
+
ax.xaxis.set_major_locator(mdates.YearLocator())
|
|
232
|
+
ax.xaxis.set_minor_locator(mdates.MonthLocator())
|
|
233
|
+
|
|
234
|
+
if time_unit == "Month":
|
|
235
|
+
ax.xaxis.set_major_locator(mdates.MonthLocator())
|
|
236
|
+
ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
|
|
237
|
+
|
|
238
|
+
if time_unit == "Weekday":
|
|
239
|
+
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
|
|
240
|
+
ax.xaxis.set_minor_locator(mdates.DayLocator())
|
|
241
|
+
|
|
242
|
+
if time_unit == "Day":
|
|
243
|
+
ax.xaxis.set_major_locator(mdates.DayLocator())
|
|
244
|
+
ax.xaxis.set_minor_locator(mdates.HourLocator())
|
|
245
|
+
|
|
246
|
+
if time_unit == "Hour":
|
|
247
|
+
ax.xaxis.set_major_locator(mdates.HourLocator())
|
|
248
|
+
ax.xaxis.set_minor_locator(mdates.MinuteLocator())
|
|
249
|
+
|
|
250
|
+
ax.tick_params(axis="x", rotation=rot)
|
|
251
|
+
ax.grid(which="both")
|
|
252
|
+
ax.grid(which="minor", alpha=0.6, linestyle=":")
|
|
253
|
+
ax.grid(which="major", alpha=0.8, linestyle="--")
|
|
254
|
+
|
|
255
|
+
legend_elements = [
|
|
256
|
+
Line2D(
|
|
257
|
+
[0],
|
|
258
|
+
[0],
|
|
259
|
+
marker="o",
|
|
260
|
+
color=color2,
|
|
261
|
+
label="Current data",
|
|
262
|
+
linestyle="none",
|
|
263
|
+
),
|
|
264
|
+
Line2D(
|
|
265
|
+
[0],
|
|
266
|
+
[0],
|
|
267
|
+
marker="o",
|
|
268
|
+
color=color1,
|
|
269
|
+
label="Imputed data",
|
|
270
|
+
linestyle="none",
|
|
271
|
+
),
|
|
272
|
+
]
|
|
273
|
+
ax.legend(handles=legend_elements, loc="upper right")
|
|
274
|
+
|
|
275
|
+
return fig
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def save_figure(
|
|
279
|
+
fig,
|
|
280
|
+
file_name,
|
|
281
|
+
variable_name="",
|
|
282
|
+
path="./",
|
|
283
|
+
):
|
|
284
|
+
"""
|
|
285
|
+
Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
286
|
+
|
|
287
|
+
The function creates a consistent file name structure:
|
|
288
|
+
{path}/{file_name}_{variable_name}.{extension}.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
fig : matplotlib.figure.Figure
|
|
293
|
+
The Matplotlib figure object to be saved.
|
|
294
|
+
file_name : str
|
|
295
|
+
The primary name for the file (e.g., 'timeseries_report').
|
|
296
|
+
variable_name : str, optional
|
|
297
|
+
An optional secondary name, often the name of the plotted variable,
|
|
298
|
+
to be appended to the file name. Defaults to "".
|
|
299
|
+
path : str, optional
|
|
300
|
+
The directory path where the figure files will be saved.
|
|
301
|
+
Defaults to the current directory ('./').
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
None
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
if variable_name:
|
|
309
|
+
base_name = f"{path}/{file_name}_{variable_name}"
|
|
310
|
+
else:
|
|
311
|
+
base_name = f"{path}/{file_name}"
|
|
312
|
+
|
|
313
|
+
fig.savefig(f"{base_name}.png")
|
|
314
|
+
fig.savefig(f"{base_name}.pdf")
|
|
315
|
+
fig.savefig(f"{base_name}.svg")
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def heat_map(X, y, colors="Blues"):
|
|
319
|
+
"""
|
|
320
|
+
Generates a correlation heatmap plot for a set of features and a target variable.
|
|
321
|
+
|
|
322
|
+
This function concatenates the feature DataFrame (X) and the target Series (y)
|
|
323
|
+
to compute and visualize the full pairwise correlation matrix using Seaborn.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
X : pd.DataFrame
|
|
328
|
+
The DataFrame containing the feature variables.
|
|
329
|
+
y : pd.Series or pd.DataFrame
|
|
330
|
+
The target variable (must be concatenable with X).
|
|
331
|
+
colors : str or matplotlib.colors.Colormap, optional
|
|
332
|
+
The colormap to use for the heatmap, passed to the 'cmap' argument
|
|
333
|
+
in seaborn.heatmap. Defaults to "Blues".
|
|
334
|
+
|
|
335
|
+
Note: For standard correlation matrices (which include negative values),
|
|
336
|
+
a diverging colormap (e.g., "coolwarm", "vlag") is usually recommended.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
matplotlib.figure.Figure
|
|
341
|
+
The generated Matplotlib figure object containing the heatmap.
|
|
342
|
+
|
|
343
|
+
Notes
|
|
344
|
+
-----
|
|
345
|
+
The heatmap displays the Pearson correlation coefficient rounded to two
|
|
346
|
+
decimal places and includes annotations for improved readability.
|
|
347
|
+
"""
|
|
348
|
+
fig, ax = plt.subplots()
|
|
349
|
+
Z = pd.concat([X, y], axis=1)
|
|
350
|
+
|
|
351
|
+
ax = sns.heatmap(
|
|
352
|
+
Z.corr(),
|
|
353
|
+
cmap=colors,
|
|
354
|
+
annot=True,
|
|
355
|
+
linewidths=0.5,
|
|
356
|
+
fmt=".2f",
|
|
357
|
+
annot_kws={"size": 10},
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return fig
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def corrfunc(x, y, ax=None, **kws):
|
|
364
|
+
"""Plot the correlation coefficient in the top left hand corner of a plot."""
|
|
365
|
+
r, _ = pearsonr(x, y)
|
|
366
|
+
ax = ax or plt.gca()
|
|
367
|
+
ax.annotate(f"R = {r:.2f}", xy=(0.1, 0.9), fontsize=25, xycoords=ax.transAxes)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def pair_plot(X, y):
|
|
371
|
+
"""
|
|
372
|
+
Generates a cornered pair plot (scatterplot matrix) to visualize relationships
|
|
373
|
+
between features and the target variable.
|
|
374
|
+
|
|
375
|
+
The function combines the feature DataFrame (X) and the target Series (y)
|
|
376
|
+
and uses seaborn.pairplot to create a matrix of scatter plots and histograms.
|
|
377
|
+
It focuses on the lower triangular part (corner=True) and includes a
|
|
378
|
+
regression line for trend visualization.
|
|
379
|
+
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
X : pd.DataFrame
|
|
383
|
+
The DataFrame containing the feature variables.
|
|
384
|
+
y : pd.Series or pd.DataFrame
|
|
385
|
+
The target variable (must be concatenable with X).
|
|
386
|
+
|
|
387
|
+
Returns
|
|
388
|
+
-------
|
|
389
|
+
matplotlib.figure.Figure
|
|
390
|
+
The generated Matplotlib Figure object containing the cornered pair plot.
|
|
391
|
+
|
|
392
|
+
Notes
|
|
393
|
+
-----
|
|
394
|
+
1. **Dependency:** This function requires a previously defined custom function
|
|
395
|
+
`corrfunc` to be available in the local namespace, as it is used via
|
|
396
|
+
`svm.map_lower()`. This custom function is typically used to display
|
|
397
|
+
correlation coefficients (e.g., Pearson's r) in the lower panel.
|
|
398
|
+
2. **Aesthetics:** Uses a regression line (`kind="reg"`) with custom color
|
|
399
|
+
(RED_LINES) to highlight linear relationships.
|
|
400
|
+
3. **Output:** The returned Figure object can be manipulated further
|
|
401
|
+
or saved using methods like `fig.savefig()`.
|
|
402
|
+
"""
|
|
403
|
+
Z = pd.concat([X, y], axis=1)
|
|
404
|
+
svm = sns.pairplot(
|
|
405
|
+
Z,
|
|
406
|
+
corner=True,
|
|
407
|
+
kind="reg",
|
|
408
|
+
plot_kws={"line_kws": {"color": paper_colors["RED_LINES"]}},
|
|
409
|
+
)
|
|
410
|
+
svm.map_lower(corrfunc)
|
|
411
|
+
|
|
412
|
+
fig = svm.fig
|
|
413
|
+
|
|
414
|
+
return fig
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
6
|
Author: Diego Restrepo-Leal
|
|
@@ -18,6 +18,8 @@ Requires-Dist: pandas>=1.0.0
|
|
|
18
18
|
Requires-Dist: numpy>=1.18.0
|
|
19
19
|
Requires-Dist: matplotlib>=3.0.0
|
|
20
20
|
Requires-Dist: openpyxl
|
|
21
|
+
Requires-Dist: seaborn
|
|
22
|
+
Requires-Dist: scipy
|
|
21
23
|
Dynamic: author
|
|
22
24
|
Dynamic: author-email
|
|
23
25
|
Dynamic: classifier
|
|
@@ -87,6 +89,14 @@ This function automatically sets major and minor time-based locators
|
|
|
87
89
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
88
90
|
to use scientific notation.
|
|
89
91
|
|
|
92
|
+
#### plot_interpolation_analysis
|
|
93
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
94
|
+
|
|
95
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
96
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
97
|
+
if the Imputation Standard Error (SE) is provided.
|
|
98
|
+
|
|
99
|
+
|
|
90
100
|
#### save_figure
|
|
91
101
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
92
102
|
|
|
@@ -95,6 +105,26 @@ Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
|
95
105
|
The function creates a consistent file name structure:
|
|
96
106
|
{path}/{file_name}_{variable_name}.{extension}.
|
|
97
107
|
|
|
108
|
+
#### heat_map
|
|
109
|
+
`heat_map(X, y, colors="Blues")`
|
|
110
|
+
|
|
111
|
+
Generates a correlation heatmap plot for a set of features and a target variable.
|
|
112
|
+
|
|
113
|
+
This function concatenates the feature DataFrame (X) and the target Series (y)
|
|
114
|
+
to compute and visualize the full pairwise correlation matrix using Seaborn.
|
|
115
|
+
|
|
116
|
+
#### pair_plot
|
|
117
|
+
`pair_plot(X, y)`
|
|
118
|
+
|
|
119
|
+
Generates a cornered pair plot (scatterplot matrix) to visualize relationships
|
|
120
|
+
between features and the target variable.
|
|
121
|
+
|
|
122
|
+
The function combines the feature DataFrame (X) and the target Series (y)
|
|
123
|
+
and uses seaborn.pairplot to create a matrix of scatter plots and histograms.
|
|
124
|
+
It focuses on the lower triangular part (corner=True) and includes a
|
|
125
|
+
regression line for trend visualization.
|
|
126
|
+
|
|
127
|
+
|
|
98
128
|
# Examples
|
|
99
129
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
100
130
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="direl-ts-tool-kit",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.6.0",
|
|
6
6
|
description="A toolbox for time series analysis and visualization.",
|
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
@@ -15,6 +15,8 @@ setup(
|
|
|
15
15
|
"numpy>=1.18.0",
|
|
16
16
|
"matplotlib>=3.0.0",
|
|
17
17
|
"openpyxl",
|
|
18
|
+
"seaborn",
|
|
19
|
+
"scipy"
|
|
18
20
|
],
|
|
19
21
|
classifiers=[
|
|
20
22
|
"Programming Language :: Python :: 3",
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
from .plot_style import *
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def plot_time_series(
|
|
5
|
-
df_ts, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, auto_format_label=True
|
|
6
|
-
):
|
|
7
|
-
"""
|
|
8
|
-
Plots a time series with custom styling and dual-level grid visibility.
|
|
9
|
-
|
|
10
|
-
This function automatically sets major and minor time-based locators
|
|
11
|
-
on the x-axis based on the specified time unit, and formats the y-axis
|
|
12
|
-
to use scientific notation.
|
|
13
|
-
|
|
14
|
-
Parameters
|
|
15
|
-
----------
|
|
16
|
-
df_ts : pd.DataFrame
|
|
17
|
-
The DataFrame containing the time series data. Must have a DatetimeIndex.
|
|
18
|
-
variable : str
|
|
19
|
-
The name of the column to plot. The label is automatically formatted
|
|
20
|
-
(e.g., 'total_sales' becomes 'Total Sales').
|
|
21
|
-
units : str, optional
|
|
22
|
-
Units to display next to the variable name on the y-axis (e.g., 'USD').
|
|
23
|
-
Defaults to "".
|
|
24
|
-
color : str, optional
|
|
25
|
-
Key corresponding to the line color in the global 'paper_colors' dictionary.
|
|
26
|
-
Defaults to "BLUE_LINES".
|
|
27
|
-
time_unit : str, optional
|
|
28
|
-
The time granularity of the data to define x-axis tick locators.
|
|
29
|
-
Options include 'Year', 'Month', 'Weekday', 'Day' or 'Hour'. Defaults to "Year".
|
|
30
|
-
rot : int, optional
|
|
31
|
-
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
32
|
-
auto_format_label : bool, optional
|
|
33
|
-
Used internally for label formatting logic. Defaults to True.
|
|
34
|
-
|
|
35
|
-
Returns
|
|
36
|
-
-------
|
|
37
|
-
matplotlib.figure.Figure
|
|
38
|
-
The generated matplotlib figure object.
|
|
39
|
-
|
|
40
|
-
Notes
|
|
41
|
-
-----
|
|
42
|
-
Major grid lines are displayed with a dashed line ('--'), and minor grid
|
|
43
|
-
lines are displayed with a dotted line (':') for detailed temporal analysis.
|
|
44
|
-
|
|
45
|
-
Available Colors
|
|
46
|
-
----------------
|
|
47
|
-
The 'color' parameter accepts any key from the 'paper_colors' dictionary.
|
|
48
|
-
|
|
49
|
-
Lines: 'BLUE_LINES', 'ORANGE_LINES', 'GREEN_LINES', 'RED_LINES',
|
|
50
|
-
'GRAY_LINES', 'PURPLE_LINES', 'MAROON_LINES', 'GOLD_LINES'.
|
|
51
|
-
|
|
52
|
-
Bars: 'BLUE_BARS', 'ORANGE_BARS', 'GREEN_BARS', 'RED_BARS',
|
|
53
|
-
'GRAY_BARS', 'PURPLE_BARS', 'MAROON_BARS', 'GOLD_BARS'.
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
fig, ax = plt.subplots()
|
|
57
|
-
ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
|
|
58
|
-
|
|
59
|
-
if "-" in variable:
|
|
60
|
-
variable = "-".join(
|
|
61
|
-
[
|
|
62
|
-
j.title() if i == 0 else j.lower()
|
|
63
|
-
for i, j in enumerate(variable.split("-"))
|
|
64
|
-
]
|
|
65
|
-
)
|
|
66
|
-
elif "_" in variable:
|
|
67
|
-
variable = " ".join(
|
|
68
|
-
[
|
|
69
|
-
j.title() if i == 0 else j.lower()
|
|
70
|
-
for i, j in enumerate(variable.split("_"))
|
|
71
|
-
]
|
|
72
|
-
)
|
|
73
|
-
else:
|
|
74
|
-
variable = (
|
|
75
|
-
" ".join(
|
|
76
|
-
[
|
|
77
|
-
j.title() if i == 0 else j.lower()
|
|
78
|
-
for i, j in enumerate(variable.split())
|
|
79
|
-
]
|
|
80
|
-
)
|
|
81
|
-
if auto_format_label
|
|
82
|
-
else variable
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
ax.set(xlabel=f"{time_unit}", ylabel=f"{variable} {units}")
|
|
86
|
-
ax.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
|
|
87
|
-
|
|
88
|
-
if time_unit == "Year":
|
|
89
|
-
ax.xaxis.set_major_locator(mdates.YearLocator())
|
|
90
|
-
ax.xaxis.set_minor_locator(mdates.MonthLocator())
|
|
91
|
-
|
|
92
|
-
if time_unit == "Month":
|
|
93
|
-
ax.xaxis.set_major_locator(mdates.MonthLocator())
|
|
94
|
-
ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
|
|
95
|
-
|
|
96
|
-
if time_unit == "Weekday":
|
|
97
|
-
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
|
|
98
|
-
ax.xaxis.set_minor_locator(mdates.DayLocator())
|
|
99
|
-
|
|
100
|
-
if time_unit == "Day":
|
|
101
|
-
ax.xaxis.set_major_locator(mdates.DayLocator())
|
|
102
|
-
ax.xaxis.set_minor_locator(mdates.HourLocator())
|
|
103
|
-
|
|
104
|
-
if time_unit == "Hour":
|
|
105
|
-
ax.xaxis.set_major_locator(mdates.HourLocator())
|
|
106
|
-
ax.xaxis.set_minor_locator(mdates.MinuteLocator())
|
|
107
|
-
|
|
108
|
-
ax.tick_params(axis="x", rotation=rot)
|
|
109
|
-
ax.grid(which="both")
|
|
110
|
-
ax.grid(which="minor", alpha=0.6, linestyle=":")
|
|
111
|
-
ax.grid(which="major", alpha=0.8, linestyle="--")
|
|
112
|
-
|
|
113
|
-
return fig
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def save_figure(
|
|
117
|
-
fig,
|
|
118
|
-
file_name,
|
|
119
|
-
variable_name="",
|
|
120
|
-
path="./",
|
|
121
|
-
):
|
|
122
|
-
"""
|
|
123
|
-
Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
124
|
-
|
|
125
|
-
The function creates a consistent file name structure:
|
|
126
|
-
{path}/{file_name}_{variable_name}.{extension}.
|
|
127
|
-
|
|
128
|
-
Parameters
|
|
129
|
-
----------
|
|
130
|
-
fig : matplotlib.figure.Figure
|
|
131
|
-
The Matplotlib figure object to be saved.
|
|
132
|
-
file_name : str
|
|
133
|
-
The primary name for the file (e.g., 'timeseries_report').
|
|
134
|
-
variable_name : str, optional
|
|
135
|
-
An optional secondary name, often the name of the plotted variable,
|
|
136
|
-
to be appended to the file name. Defaults to "".
|
|
137
|
-
path : str, optional
|
|
138
|
-
The directory path where the figure files will be saved.
|
|
139
|
-
Defaults to the current directory ('./').
|
|
140
|
-
|
|
141
|
-
Returns
|
|
142
|
-
-------
|
|
143
|
-
None
|
|
144
|
-
"""
|
|
145
|
-
|
|
146
|
-
if variable_name:
|
|
147
|
-
base_name = f"{path}/{file_name}_{variable_name}"
|
|
148
|
-
else:
|
|
149
|
-
base_name = f"{path}/{file_name}"
|
|
150
|
-
|
|
151
|
-
fig.savefig(f"{base_name}.png")
|
|
152
|
-
fig.savefig(f"{base_name}.pdf")
|
|
153
|
-
fig.savefig(f"{base_name}.svg")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit/utilities/data_prep.py
RENAMED
|
File without changes
|
|
File without changes
|
{direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{direl_ts_tool_kit-0.4.9 → direl_ts_tool_kit-0.6.0}/direl_ts_tool_kit.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|