direl-ts-tool-kit 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import seaborn as sns
2
2
  import matplotlib.pyplot as plt
3
3
  import matplotlib.dates as mdates
4
+ from matplotlib.lines import Line2D
5
+
4
6
 
5
7
  plt.style.use("fast")
6
8
 
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  import pandas as pd
2
3
  from .plot_style import *
3
4
  from scipy.stats import pearsonr
@@ -121,6 +122,159 @@ def plot_time_series(
121
122
  return fig
122
123
 
123
124
 
125
+ def plot_interpolation_analysis(
126
+ df_original,
127
+ variable,
128
+ units="",
129
+ method="polynomial",
130
+ order=2,
131
+ imputation_se=None,
132
+ time_unit="Year",
133
+ rot=90,
134
+ ):
135
+ """
136
+ Performs interpolation on missing data (NaNs) in a specified column and
137
+ plots the result, highlighting the imputed points with confidence intervals
138
+ if the Imputation Standard Error (SE) is provided.
139
+
140
+ Parameters
141
+ ----------
142
+ df_original : pd.DataFrame
143
+ The DataFrame containing the original time series data.
144
+ variable : str
145
+ The name of the column to interpolate and plot (e.g., 'LPUE').
146
+ units : str, optional
147
+ Units to display next to the variable name on the y-axis. Defaults to "".
148
+ method : str, optional
149
+ The interpolation method (e.g., 'linear', 'polynomial', 'spline').
150
+ Defaults to 'polynomial'.
151
+ order : int, optional
152
+ The order of the interpolation (required for 'polynomial' or 'spline').
153
+ Defaults to 2.
154
+ imputation_se : pd.Series, float, or None, optional
155
+ The Standard Error (SE) of the imputation. This must be a single value
156
+ or a Series aligned with the DataFrame's index. If None, confidence
157
+ intervals will NOT be plotted. Defaults to None.
158
+ time_unit : str, optional
159
+ The time granularity for x-axis tick locators. Defaults to "Year".
160
+ rot : int, optional
161
+ Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
162
+
163
+ Returns
164
+ -------
165
+ matplotlib.figure.Figure
166
+ The generated Matplotlib figure object with the plot.
167
+ """
168
+
169
+ imputed_mask = df_original[variable].isnull()
170
+ df_interpolated = df_original.copy()
171
+ df_interpolated[variable] = df_interpolated[variable].interpolate(
172
+ method=method, order=order
173
+ )
174
+
175
+ color1 = paper_colors["RED_LINES"]
176
+ color2 = paper_colors["GREEN_LINES"]
177
+
178
+ col = np.where(imputed_mask, color1, color2)
179
+
180
+ fig, ax = plt.subplots()
181
+
182
+ if imputation_se is not None:
183
+ df_imputed_only = df_interpolated.copy()
184
+ df_imputed_only.loc[~imputed_mask, variable] = np.nan
185
+
186
+ Z_80 = 1.282
187
+ Z_95 = 1.96
188
+
189
+ error_80 = Z_80 * imputation_se
190
+ error_95 = Z_95 * imputation_se
191
+
192
+ ax.fill_between(
193
+ df_imputed_only.index,
194
+ df_imputed_only[variable] - error_95,
195
+ df_imputed_only[variable] + error_95,
196
+ color=paper_colors["GRAY_BARS"],
197
+ alpha=0.2,
198
+ edgecolor="none",
199
+ label="95% confidence interval",
200
+ )
201
+
202
+ ax.fill_between(
203
+ df_imputed_only.index,
204
+ df_imputed_only[variable] - error_80,
205
+ df_imputed_only[variable] + error_80,
206
+ color=paper_colors["GRAY_BARS"],
207
+ alpha=0.4,
208
+ edgecolor="none",
209
+ label="80% confidence interval",
210
+ )
211
+
212
+ ax.plot(
213
+ df_interpolated[variable],
214
+ linestyle="-.",
215
+ linewidth=1,
216
+ color=paper_colors["BLUE_LINES"],
217
+ )
218
+
219
+ ax.scatter(
220
+ df_interpolated.index,
221
+ df_interpolated[variable],
222
+ color=col,
223
+ s=10,
224
+ linewidth=4,
225
+ )
226
+
227
+ ax.set(xlabel=f"{time_unit}", ylabel=f"{variable} {units}")
228
+ ax.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
229
+
230
+ if time_unit == "Year":
231
+ ax.xaxis.set_major_locator(mdates.YearLocator())
232
+ ax.xaxis.set_minor_locator(mdates.MonthLocator())
233
+
234
+ if time_unit == "Month":
235
+ ax.xaxis.set_major_locator(mdates.MonthLocator())
236
+ ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
237
+
238
+ if time_unit == "Weekday":
239
+ ax.xaxis.set_major_locator(mdates.WeekdayLocator())
240
+ ax.xaxis.set_minor_locator(mdates.DayLocator())
241
+
242
+ if time_unit == "Day":
243
+ ax.xaxis.set_major_locator(mdates.DayLocator())
244
+ ax.xaxis.set_minor_locator(mdates.HourLocator())
245
+
246
+ if time_unit == "Hour":
247
+ ax.xaxis.set_major_locator(mdates.HourLocator())
248
+ ax.xaxis.set_minor_locator(mdates.MinuteLocator())
249
+
250
+ ax.tick_params(axis="x", rotation=rot)
251
+ ax.grid(which="both")
252
+ ax.grid(which="minor", alpha=0.6, linestyle=":")
253
+ ax.grid(which="major", alpha=0.8, linestyle="--")
254
+
255
+ legend_elements = [
256
+ Line2D(
257
+ [0],
258
+ [0],
259
+ marker="o",
260
+ color=color2,
261
+ label="Current data",
262
+ linestyle="none",
263
+ ),
264
+ Line2D(
265
+ [0],
266
+ [0],
267
+ marker="o",
268
+ color=color1,
269
+ label="Imputed data",
270
+ linestyle="none",
271
+ ),
272
+ ]
273
+ ax.legend(handles=legend_elements, loc="upper right")
274
+
275
+ return fig
276
+
277
+
124
278
  def save_figure(
125
279
  fig,
126
280
  file_name,
@@ -258,3 +412,62 @@ def pair_plot(X, y):
258
412
  fig = svm.fig
259
413
 
260
414
  return fig
415
+
416
+
417
+ def plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30):
418
+ """
419
+ Generates a histogram plot for a specified numerical variable.
420
+
421
+ The plot visualizes the distribution of the data, with the Y-axis dynamically
422
+ labeled as 'Density' or 'Count' based on the `density` parameter.
423
+
424
+ Parameters
425
+ ----------
426
+ df : pd.DataFrame
427
+ The DataFrame containing the data to be plotted.
428
+ variable : str
429
+ The name of the column in 'df' whose distribution will be plotted.
430
+ units : str, optional
431
+ Units to display next to the variable name on the X-axis. Defaults to "".
432
+ density : bool, optional
433
+ If True (default), the Y-axis is scaled to a Probability Density,
434
+ meaning the area under the bars sums to 1. If False, the Y-axis
435
+ displays the raw count of observations per bin.
436
+ color : str, optional
437
+ Key corresponding to the bar color in the global 'paper_colors' dictionary
438
+ (e.g., "BLUE_BARS"). Defaults to "BLUE_LINES".
439
+ bins : int or sequence, optional
440
+ The number of equal-width bins in the range to divide the data.
441
+ Can be an integer (default is 30) or a sequence specifying the bin edges.
442
+
443
+ Returns
444
+ -------
445
+ matplotlib.figure.Figure
446
+ The generated Matplotlib Figure object containing the histogram.
447
+
448
+ Notes
449
+ -----
450
+ The plot applies a fixed style (alpha=0.7, white edge-color) and grid
451
+ for visual consistency.
452
+ """
453
+ fig, ax = plt.subplots()
454
+
455
+ ax.hist(
456
+ df[variable],
457
+ bins=bins,
458
+ density=density,
459
+ alpha=0.7,
460
+ color=paper_colors[color],
461
+ edgecolor="white",
462
+ )
463
+
464
+ ax.set_xlabel(f"{variable} {units}")
465
+
466
+ if density:
467
+ ax.set_ylabel("Density")
468
+ else:
469
+ ax.set_ylabel("Count")
470
+
471
+ ax.grid(alpha=0.8, linestyle="--")
472
+
473
+ return fig
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: direl-ts-tool-kit
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: A toolbox for time series analysis and visualization.
5
5
  Home-page: https://gitlab.com/direl/direl_tool_kit
6
6
  Author: Diego Restrepo-Leal
@@ -89,6 +89,14 @@ This function automatically sets major and minor time-based locators
89
89
  on the x-axis based on the specified time unit, and formats the y-axis
90
90
  to use scientific notation.
91
91
 
92
+ #### plot_interpolation_analysis
93
+ `plot_interpolation_analysis(df_original, variable, units="", method="polynomial", order=2, imputation_se=None, time_unit="Year", rot=90)`
94
+
95
+ Performs interpolation on missing data (NaNs) in a specified column and
96
+ plots the result, highlighting the imputed points with confidence intervals
97
+ if the Imputation Standard Error (SE) is provided.
98
+
99
+
92
100
  #### save_figure
93
101
  `save_figure(fig, file_name, variable_name="", path="./")`
94
102
 
@@ -117,6 +125,14 @@ It focuses on the lower triangular part (corner=True) and includes a
117
125
  regression line for trend visualization.
118
126
 
119
127
 
128
+ #### plot_histogram
129
+ `plot_histogram(df, variable, units="", density=True, color="BLUE_BARS", bins=30)`
130
+
131
+ Generates a histogram plot for a specified numerical variable.
132
+
133
+ The plot visualizes the distribution of the data, with the Y-axis dynamically
134
+ labeled as 'Density' or 'Count' based on the `density` parameter.
135
+
120
136
  # Examples
121
137
  - [Example 1](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_01.md?ref_type=heads)
122
138
  - [Example 2](https://gitlab.com/direl/direl_tool_kit/-/blob/main/example/example_02.md?ref_type=heads)
@@ -0,0 +1,11 @@
1
+ direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
2
+ direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
3
+ direl_ts_tool_kit/plot/plot_style.py,sha256=5YxoLXlYvzleTnBEGPwCmHQIJ0S96KPJspq_n-qMvpw,1069
4
+ direl_ts_tool_kit/plot/plot_ts.py,sha256=E8oswirgUyzojKRvUA39MiZqplCFMMmIJtYBDOOM4JM,14839
5
+ direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
6
+ direl_ts_tool_kit/utilities/data_prep.py,sha256=k3eOwQEEd5mxy2DtT_Gdo7BhkzEmSQqvMJ89y8mH5CQ,6024
7
+ direl_ts_tool_kit-0.7.0.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ direl_ts_tool_kit-0.7.0.dist-info/METADATA,sha256=UTxeb67A2nbjkUDpPd2tAP_7GgEc4KRmaawwDz2fzG4,5196
9
+ direl_ts_tool_kit-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ direl_ts_tool_kit-0.7.0.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
+ direl_ts_tool_kit-0.7.0.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
2
- direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
3
- direl_ts_tool_kit/plot/plot_style.py,sha256=WVcxr5LK0ht8RmFscUri7HcCB5Vd50UItocYNnlj3sA,1032
4
- direl_ts_tool_kit/plot/plot_ts.py,sha256=PbqclUAVTbYa7YTf59Q75XW1qqaW_I7qPuuTXMQmmxI,8340
5
- direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
6
- direl_ts_tool_kit/utilities/data_prep.py,sha256=k3eOwQEEd5mxy2DtT_Gdo7BhkzEmSQqvMJ89y8mH5CQ,6024
7
- direl_ts_tool_kit-0.5.0.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- direl_ts_tool_kit-0.5.0.dist-info/METADATA,sha256=ctS91dCUFsS4i4HI80XFjhHVEaySpNVQw79wyvh_ipM,4509
9
- direl_ts_tool_kit-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- direl_ts_tool_kit-0.5.0.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
- direl_ts_tool_kit-0.5.0.dist-info/RECORD,,