direl-ts-tool-kit 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/LICENCE +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/PKG-INFO +17 -1
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/README.md +16 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/__init__.py +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/plot/__init__.py +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/plot/plot_style.py +2 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/plot/plot_ts.py +213 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/utilities/__init__.py +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/utilities/data_prep.py +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/PKG-INFO +17 -1
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/SOURCES.txt +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/dependency_links.txt +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/requires.txt +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/top_level.txt +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/setup.cfg +0 -0
- {direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/setup.py +1 -1
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
6
|
Author: Diego Restrepo-Leal
|
|
@@ -89,6 +89,14 @@ This function automatically sets major and minor time-based locators
|
|
|
89
89
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
90
90
|
to use scientific notation.
|
|
91
91
|
|
|
92
|
+
#### plot_interpolation_analysis
|
|
93
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
94
|
+
|
|
95
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
96
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
97
|
+
if the Imputation Standard Error (SE) is provided.
|
|
98
|
+
|
|
99
|
+
|
|
92
100
|
#### save_figure
|
|
93
101
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
94
102
|
|
|
@@ -117,6 +125,14 @@ It focuses on the lower triangular part (corner=True) and includes a
|
|
|
117
125
|
regression line for trend visualization.
|
|
118
126
|
|
|
119
127
|
|
|
128
|
+
#### plot_histogram
|
|
129
|
+
`plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30)`
|
|
130
|
+
|
|
131
|
+
Generates a histogram plot for a specified numerical variable.
|
|
132
|
+
|
|
133
|
+
The plot visualizes the distribution of the data, with the Y-axis dynamically
|
|
134
|
+
labeled as 'Density' or 'Count' based on the `density` parameter.
|
|
135
|
+
|
|
120
136
|
# Examples
|
|
121
137
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
122
138
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
@@ -56,6 +56,14 @@ This function automatically sets major and minor time-based locators
|
|
|
56
56
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
57
57
|
to use scientific notation.
|
|
58
58
|
|
|
59
|
+
#### plot_interpolation_analysis
|
|
60
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
61
|
+
|
|
62
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
63
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
64
|
+
if the Imputation Standard Error (SE) is provided.
|
|
65
|
+
|
|
66
|
+
|
|
59
67
|
#### save_figure
|
|
60
68
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
61
69
|
|
|
@@ -84,6 +92,14 @@ It focuses on the lower triangular part (corner=True) and includes a
|
|
|
84
92
|
regression line for trend visualization.
|
|
85
93
|
|
|
86
94
|
|
|
95
|
+
#### plot_histogram
|
|
96
|
+
`plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30)`
|
|
97
|
+
|
|
98
|
+
Generates a histogram plot for a specified numerical variable.
|
|
99
|
+
|
|
100
|
+
The plot visualizes the distribution of the data, with the Y-axis dynamically
|
|
101
|
+
labeled as 'Density' or 'Count' based on the `density` parameter.
|
|
102
|
+
|
|
87
103
|
# Examples
|
|
88
104
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
89
105
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
File without changes
|
|
File without changes
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
import pandas as pd
|
|
2
3
|
from .plot_style import *
|
|
3
4
|
from scipy.stats import pearsonr
|
|
@@ -121,6 +122,159 @@ def plot_time_series(
|
|
|
121
122
|
return fig
|
|
122
123
|
|
|
123
124
|
|
|
125
|
+
def plot_interpolation_analysis(
|
|
126
|
+
df_original,
|
|
127
|
+
variable,
|
|
128
|
+
units="",
|
|
129
|
+
method="polynomial",
|
|
130
|
+
order=2,
|
|
131
|
+
imputation_se=None,
|
|
132
|
+
time_unit="Year",
|
|
133
|
+
rot=90,
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
137
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
138
|
+
if the Imputation Standard Error (SE) is provided.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
df_original : pd.DataFrame
|
|
143
|
+
The DataFrame containing the original time series data.
|
|
144
|
+
variable : str
|
|
145
|
+
The name of the column to interpolate and plot (e.g., 'LPUE').
|
|
146
|
+
units : str, optional
|
|
147
|
+
Units to display next to the variable name on the y-axis. Defaults to "".
|
|
148
|
+
method : str, optional
|
|
149
|
+
The interpolation method (e.g., 'linear', 'polynomial', 'spline').
|
|
150
|
+
Defaults to 'polynomial'.
|
|
151
|
+
order : int, optional
|
|
152
|
+
The order of the interpolation (required for 'polynomial' or 'spline').
|
|
153
|
+
Defaults to 2.
|
|
154
|
+
imputation_se : pd.Series, float, or None, optional
|
|
155
|
+
The Standard Error (SE) of the imputation. This must be a single value
|
|
156
|
+
or a Series aligned with the DataFrame's index. If None, confidence
|
|
157
|
+
intervals will NOT be plotted. Defaults to None.
|
|
158
|
+
time_unit : str, optional
|
|
159
|
+
The time granularity for x-axis tick locators. Defaults to "Year".
|
|
160
|
+
rot : int, optional
|
|
161
|
+
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
matplotlib.figure.Figure
|
|
166
|
+
The generated Matplotlib figure object with the plot.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
imputed_mask = df_original[variable].isnull()
|
|
170
|
+
df_interpolated = df_original.copy()
|
|
171
|
+
df_interpolated[variable] = df_interpolated[variable].interpolate(
|
|
172
|
+
method=method, order=order
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
color1 = paper_colors["RED_LINES"]
|
|
176
|
+
color2 = paper_colors["GREEN_LINES"]
|
|
177
|
+
|
|
178
|
+
col = np.where(imputed_mask, color1, color2)
|
|
179
|
+
|
|
180
|
+
fig, ax = plt.subplots()
|
|
181
|
+
|
|
182
|
+
if imputation_se is not None:
|
|
183
|
+
df_imputed_only = df_interpolated.copy()
|
|
184
|
+
df_imputed_only.loc[~imputed_mask, variable] = np.nan
|
|
185
|
+
|
|
186
|
+
Z_80 = 1.282
|
|
187
|
+
Z_95 = 1.96
|
|
188
|
+
|
|
189
|
+
error_80 = Z_80 * imputation_se
|
|
190
|
+
error_95 = Z_95 * imputation_se
|
|
191
|
+
|
|
192
|
+
ax.fill_between(
|
|
193
|
+
df_imputed_only.index,
|
|
194
|
+
df_imputed_only[variable] - error_95,
|
|
195
|
+
df_imputed_only[variable] + error_95,
|
|
196
|
+
color=paper_colors["GRAY_BARS"],
|
|
197
|
+
alpha=0.2,
|
|
198
|
+
edgecolor="none",
|
|
199
|
+
label="95% confidence interval",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
ax.fill_between(
|
|
203
|
+
df_imputed_only.index,
|
|
204
|
+
df_imputed_only[variable] - error_80,
|
|
205
|
+
df_imputed_only[variable] + error_80,
|
|
206
|
+
color=paper_colors["GRAY_BARS"],
|
|
207
|
+
alpha=0.4,
|
|
208
|
+
edgecolor="none",
|
|
209
|
+
label="80% confidence interval",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
ax.plot(
|
|
213
|
+
df_interpolated[variable],
|
|
214
|
+
linestyle="-.",
|
|
215
|
+
linewidth=1,
|
|
216
|
+
color=paper_colors["BLUE_LINES"],
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
ax.scatter(
|
|
220
|
+
df_interpolated.index,
|
|
221
|
+
df_interpolated[variable],
|
|
222
|
+
color=col,
|
|
223
|
+
s=10,
|
|
224
|
+
linewidth=4,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
ax.set(xlabel=f"{time_unit}", ylabel=f"{variable} {units}")
|
|
228
|
+
ax.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
|
|
229
|
+
|
|
230
|
+
if time_unit == "Year":
|
|
231
|
+
ax.xaxis.set_major_locator(mdates.YearLocator())
|
|
232
|
+
ax.xaxis.set_minor_locator(mdates.MonthLocator())
|
|
233
|
+
|
|
234
|
+
if time_unit == "Month":
|
|
235
|
+
ax.xaxis.set_major_locator(mdates.MonthLocator())
|
|
236
|
+
ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
|
|
237
|
+
|
|
238
|
+
if time_unit == "Weekday":
|
|
239
|
+
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
|
|
240
|
+
ax.xaxis.set_minor_locator(mdates.DayLocator())
|
|
241
|
+
|
|
242
|
+
if time_unit == "Day":
|
|
243
|
+
ax.xaxis.set_major_locator(mdates.DayLocator())
|
|
244
|
+
ax.xaxis.set_minor_locator(mdates.HourLocator())
|
|
245
|
+
|
|
246
|
+
if time_unit == "Hour":
|
|
247
|
+
ax.xaxis.set_major_locator(mdates.HourLocator())
|
|
248
|
+
ax.xaxis.set_minor_locator(mdates.MinuteLocator())
|
|
249
|
+
|
|
250
|
+
ax.tick_params(axis="x", rotation=rot)
|
|
251
|
+
ax.grid(which="both")
|
|
252
|
+
ax.grid(which="minor", alpha=0.6, linestyle=":")
|
|
253
|
+
ax.grid(which="major", alpha=0.8, linestyle="--")
|
|
254
|
+
|
|
255
|
+
legend_elements = [
|
|
256
|
+
Line2D(
|
|
257
|
+
[0],
|
|
258
|
+
[0],
|
|
259
|
+
marker="o",
|
|
260
|
+
color=color2,
|
|
261
|
+
label="Current data",
|
|
262
|
+
linestyle="none",
|
|
263
|
+
),
|
|
264
|
+
Line2D(
|
|
265
|
+
[0],
|
|
266
|
+
[0],
|
|
267
|
+
marker="o",
|
|
268
|
+
color=color1,
|
|
269
|
+
label="Imputed data",
|
|
270
|
+
linestyle="none",
|
|
271
|
+
),
|
|
272
|
+
]
|
|
273
|
+
ax.legend(handles=legend_elements, loc="upper right")
|
|
274
|
+
|
|
275
|
+
return fig
|
|
276
|
+
|
|
277
|
+
|
|
124
278
|
def save_figure(
|
|
125
279
|
fig,
|
|
126
280
|
file_name,
|
|
@@ -258,3 +412,62 @@ def pair_plot(X, y):
|
|
|
258
412
|
fig = svm.fig
|
|
259
413
|
|
|
260
414
|
return fig
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30):
|
|
418
|
+
"""
|
|
419
|
+
Generates a histogram plot for a specified numerical variable.
|
|
420
|
+
|
|
421
|
+
The plot visualizes the distribution of the data, with the Y-axis dynamically
|
|
422
|
+
labeled as 'Density' or 'Count' based on the `density` parameter.
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
df : pd.DataFrame
|
|
427
|
+
The DataFrame containing the data to be plotted.
|
|
428
|
+
variable : str
|
|
429
|
+
The name of the column in 'df' whose distribution will be plotted.
|
|
430
|
+
units : str, optional
|
|
431
|
+
Units to display next to the variable name on the X-axis. Defaults to "".
|
|
432
|
+
density : bool, optional
|
|
433
|
+
If True (default), the Y-axis is scaled to a Probability Density,
|
|
434
|
+
meaning the area under the bars sums to 1. If False, the Y-axis
|
|
435
|
+
displays the raw count of observations per bin.
|
|
436
|
+
color : str, optional
|
|
437
|
+
Key corresponding to the bar color in the global 'paper_colors' dictionary
|
|
438
|
+
(e.g., "BLUE_BARS"). Defaults to "BLUE_LINES".
|
|
439
|
+
bins : int or sequence, optional
|
|
440
|
+
The number of equal-width bins in the range to divide the data.
|
|
441
|
+
Can be an integer (default is 30) or a sequence specifying the bin edges.
|
|
442
|
+
|
|
443
|
+
Returns
|
|
444
|
+
-------
|
|
445
|
+
matplotlib.figure.Figure
|
|
446
|
+
The generated Matplotlib Figure object containing the histogram.
|
|
447
|
+
|
|
448
|
+
Notes
|
|
449
|
+
-----
|
|
450
|
+
The plot applies a fixed style (alpha=0.7, white edge-color) and grid
|
|
451
|
+
for visual consistency.
|
|
452
|
+
"""
|
|
453
|
+
fig, ax = plt.subplots()
|
|
454
|
+
|
|
455
|
+
ax.hist(
|
|
456
|
+
df[variable],
|
|
457
|
+
bins=bins,
|
|
458
|
+
density=density,
|
|
459
|
+
alpha=0.7,
|
|
460
|
+
color=paper_colors[color],
|
|
461
|
+
edgecolor="white",
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
ax.set_xlabel(f"{variable} {units}")
|
|
465
|
+
|
|
466
|
+
if density:
|
|
467
|
+
ax.set_ylabel("Density")
|
|
468
|
+
else:
|
|
469
|
+
ax.set_ylabel("Count")
|
|
470
|
+
|
|
471
|
+
ax.grid(alpha=0.8, linestyle="--")
|
|
472
|
+
|
|
473
|
+
return fig
|
|
File without changes
|
{direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit/utilities/data_prep.py
RENAMED
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
6
|
Author: Diego Restrepo-Leal
|
|
@@ -89,6 +89,14 @@ This function automatically sets major and minor time-based locators
|
|
|
89
89
|
on the x-axis based on the specified time unit, and formats the y-axis
|
|
90
90
|
to use scientific notation.
|
|
91
91
|
|
|
92
|
+
#### plot_interpolation_analysis
|
|
93
|
+
`plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
|
|
94
|
+
|
|
95
|
+
Performs interpolation on missing data (NaNs) in a specified column and
|
|
96
|
+
plots the result, highlighting the imputed points with confidence intervals
|
|
97
|
+
if the Imputation Standard Error (SE) is provided.
|
|
98
|
+
|
|
99
|
+
|
|
92
100
|
#### save_figure
|
|
93
101
|
`save_figure(fig, file_name, variable_name="", path="./")`
|
|
94
102
|
|
|
@@ -117,6 +125,14 @@ It focuses on the lower triangular part (corner=True) and includes a
|
|
|
117
125
|
regression line for trend visualization.
|
|
118
126
|
|
|
119
127
|
|
|
128
|
+
#### plot_histogram
|
|
129
|
+
`plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30)`
|
|
130
|
+
|
|
131
|
+
Generates a histogram plot for a specified numerical variable.
|
|
132
|
+
|
|
133
|
+
The plot visualizes the distribution of the data, with the Y-axis dynamically
|
|
134
|
+
labeled as 'Density' or 'Count' based on the `density` parameter.
|
|
135
|
+
|
|
120
136
|
# Examples
|
|
121
137
|
- [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
|
|
122
138
|
- [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
|
|
File without changes
|
{direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{direl_ts_tool_kit-0.5.0 → direl_ts_tool_kit-0.7.0}/direl_ts_tool_kit.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="direl-ts-tool-kit",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.7.0",
|
|
6
6
|
description="A toolbox for time series analysis and visualization.",
|
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|