direl-ts-tool-kit 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/LICENCE +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/PKG-INFO +3 -2
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/README.md +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/__init__.py +1 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/__init__.py +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/plot_style.py +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/plot_ts.py +55 -5
- direl_ts_tool_kit-0.3.0/direl_ts_tool_kit/utilities/__init__.py +1 -0
- direl_ts_tool_kit-0.3.0/direl_ts_tool_kit/utilities/data_prep.py +118 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/PKG-INFO +3 -2
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/SOURCES.txt +3 -1
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/dependency_links.txt +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/requires.txt +1 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/top_level.txt +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/setup.cfg +0 -0
- {direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/setup.py +3 -2
|
File without changes
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
|
-
Author:
|
|
6
|
+
Author: Diego Restrepo-Leal
|
|
7
7
|
Author-email: diegorestrepoleal@gmail.com
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -17,6 +17,7 @@ License-File: LICENCE
|
|
|
17
17
|
Requires-Dist: pandas>=1.0.0
|
|
18
18
|
Requires-Dist: numpy>=1.18.0
|
|
19
19
|
Requires-Dist: matplotlib>=3.0.0
|
|
20
|
+
Requires-Dist: openpyxl
|
|
20
21
|
Dynamic: author
|
|
21
22
|
Dynamic: author-email
|
|
22
23
|
Dynamic: classifier
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -2,7 +2,7 @@ from .plot_style import *
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def plot_time_series(
|
|
5
|
-
|
|
5
|
+
df_ts, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, auto_format_label=True
|
|
6
6
|
):
|
|
7
7
|
"""
|
|
8
8
|
Plots a time series with custom styling and dual-level grid visibility.
|
|
@@ -13,7 +13,7 @@ def plot_time_series(
|
|
|
13
13
|
|
|
14
14
|
Parameters
|
|
15
15
|
----------
|
|
16
|
-
|
|
16
|
+
df_ts : pd.DataFrame
|
|
17
17
|
The DataFrame containing the time series data. Must have a DatetimeIndex.
|
|
18
18
|
variable : str
|
|
19
19
|
The name of the column to plot. The label is automatically formatted
|
|
@@ -29,7 +29,7 @@ def plot_time_series(
|
|
|
29
29
|
Options include 'Year', 'Month', 'Weekday', or 'Day'. Defaults to "Year".
|
|
30
30
|
rot : int, optional
|
|
31
31
|
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
32
|
-
|
|
32
|
+
auto_format_label : bool, optional
|
|
33
33
|
Used internally for label formatting logic. Defaults to True.
|
|
34
34
|
|
|
35
35
|
Returns
|
|
@@ -41,10 +41,20 @@ def plot_time_series(
|
|
|
41
41
|
-----
|
|
42
42
|
Major grid lines are displayed with a dashed line ('--'), and minor grid
|
|
43
43
|
lines are displayed with a dotted line (':') for detailed temporal analysis.
|
|
44
|
+
|
|
45
|
+
Available Colors
|
|
46
|
+
----------------
|
|
47
|
+
The 'color' parameter accepts any key from the 'paper_colors' dictionary.
|
|
48
|
+
|
|
49
|
+
Lines: 'BLUE_LINES', 'ORANGE_LINES', 'GREEN_LINES', 'RED_LINES',
|
|
50
|
+
'GRAY_LINES', 'PURPLE_LINES', 'MAROON_LINES', 'GOLD_LINES'.
|
|
51
|
+
|
|
52
|
+
Bars: 'BLUE_BARS', 'ORANGE_BARS', 'GREEN_BARS', 'RED_BARS',
|
|
53
|
+
'GRAY_BARS', 'PURPLE_BARS', 'MAROON_BARS', 'GOLD_BARS'.
|
|
44
54
|
"""
|
|
45
55
|
|
|
46
56
|
fig, ax = plt.subplots()
|
|
47
|
-
ax.plot(
|
|
57
|
+
ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
|
|
48
58
|
|
|
49
59
|
if "-" in variable:
|
|
50
60
|
variable = "-".join(
|
|
@@ -68,7 +78,7 @@ def plot_time_series(
|
|
|
68
78
|
for i, j in enumerate(variable.split())
|
|
69
79
|
]
|
|
70
80
|
)
|
|
71
|
-
if
|
|
81
|
+
if auto_format_label
|
|
72
82
|
else variable
|
|
73
83
|
)
|
|
74
84
|
|
|
@@ -97,3 +107,43 @@ def plot_time_series(
|
|
|
97
107
|
ax.grid(which="major", alpha=0.8, linestyle="--")
|
|
98
108
|
|
|
99
109
|
return fig
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def save_figure(
|
|
113
|
+
fig,
|
|
114
|
+
file_name,
|
|
115
|
+
variable_name="",
|
|
116
|
+
path="./",
|
|
117
|
+
):
|
|
118
|
+
"""
|
|
119
|
+
Saves a Matplotlib figure in three common high-quality formats (PNG, PDF, SVG).
|
|
120
|
+
|
|
121
|
+
The function creates a consistent file name structure:
|
|
122
|
+
{path}/{file_name}_{variable_name}.{extension}.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
fig : matplotlib.figure.Figure
|
|
127
|
+
The Matplotlib figure object to be saved.
|
|
128
|
+
file_name : str
|
|
129
|
+
The primary name for the file (e.g., 'timeseries_report').
|
|
130
|
+
variable_name : str, optional
|
|
131
|
+
An optional secondary name, often the name of the plotted variable,
|
|
132
|
+
to be appended to the file name. Defaults to "".
|
|
133
|
+
path : str, optional
|
|
134
|
+
The directory path where the figure files will be saved.
|
|
135
|
+
Defaults to the current directory ('./').
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
None
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
if variable_name:
|
|
143
|
+
base_name = f"{path}/{file_name}_{variable_name}"
|
|
144
|
+
else:
|
|
145
|
+
base_name = f"{path}/{file_name}"
|
|
146
|
+
|
|
147
|
+
fig.savefig(f"{base_name}.png")
|
|
148
|
+
fig.savefig(f"{base_name}.pdf")
|
|
149
|
+
fig.savefig(f"{base_name}.svg")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .data_prep import *
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def parse_datetime_index(df_raw, date_column="date"):
|
|
5
|
+
"""
|
|
6
|
+
Parses a specified column into datetime objects and sets it as the DataFrame index.
|
|
7
|
+
|
|
8
|
+
This function is crucial for preparing raw data (df_raw) for time series analysis
|
|
9
|
+
by ensuring the DataFrame is indexed by the correct datetime type.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
df_raw : pd.DataFrame
|
|
14
|
+
The raw DataFrame containing the data, including the column with date strings.
|
|
15
|
+
date_column : str, optional
|
|
16
|
+
The name of the column in 'df_raw' that contains the date/time information.
|
|
17
|
+
Defaults to "date".
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
df_ts : pd.DataFrame
|
|
22
|
+
A copy of the original DataFrame with the specified date column removed
|
|
23
|
+
and set as the DatetimeIndex. Ready for time series plotting.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
date_parsed = pd.to_datetime(df_raw[date_column])
|
|
27
|
+
df_ts = df_raw.copy()
|
|
28
|
+
original_dates = df_raw[date_column]
|
|
29
|
+
df_ts.drop(columns=[date_column], inplace=True)
|
|
30
|
+
df_ts.set_index(date_parsed, inplace=True)
|
|
31
|
+
|
|
32
|
+
return df_ts
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def generate_dates(df_ts, freq="MS"):
|
|
36
|
+
"""
|
|
37
|
+
Generates a continuous DatetimeIndex covering the time span of the input DataFrame.
|
|
38
|
+
|
|
39
|
+
The function determines the start and end dates from the existing DataFrame index
|
|
40
|
+
and creates a new, regular date sequence based on the specified frequency.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
df_ts : pd.DataFrame
|
|
45
|
+
The time series DataFrame whose index determines the start and end of the
|
|
46
|
+
new date range.
|
|
47
|
+
freq : str, optional
|
|
48
|
+
The frequency of the generated dates (e.g., 'D' for daily, 'MS' for Month Start).
|
|
49
|
+
Defaults to "MS" (Month Start).
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
pd.DatetimeIndex
|
|
54
|
+
A new DatetimeIndex spanning from the first index entry to the last index entry
|
|
55
|
+
of 'df_ts', using the specified frequency.
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
The function relies on the index of 'df_ts' to find the boundaries. It explicitly
|
|
60
|
+
sorts the index first to ensure the earliest and latest dates are correctly identified,
|
|
61
|
+
regardless of the current DataFrame order.
|
|
62
|
+
"""
|
|
63
|
+
df_ts.sort_index(inplace=True)
|
|
64
|
+
start_date = df_ts.index[0]
|
|
65
|
+
end_date = df_ts.index[-1]
|
|
66
|
+
|
|
67
|
+
dates = pd.date_range(start=start_date, end=end_date, freq=freq)
|
|
68
|
+
|
|
69
|
+
return dates
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def reindex_and_aggregate(df_ts, column_name, freq="MS"):
|
|
73
|
+
"""
|
|
74
|
+
Re-indexes a time series DataFrame to a regular frequency, aggregates values,
|
|
75
|
+
and introduces NaN for missing time steps.
|
|
76
|
+
|
|
77
|
+
This function first identifies the time range from the original (potentially irregular)
|
|
78
|
+
index, aggregates data if necessary (e.g., if multiple entries exist per time step),
|
|
79
|
+
and then merges the data onto a complete date range, effectively filling gaps
|
|
80
|
+
with NaN values.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
df_ts : pd.DataFrame
|
|
85
|
+
The input DataFrame. It is assumed that the index contains the date information
|
|
86
|
+
(though the function currently resets and uses a 'date' column name internally
|
|
87
|
+
due to the line `groupby(["date"])`).
|
|
88
|
+
column_name : str
|
|
89
|
+
The name of the column containing the values to be aggregated and re-indexed.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
pd.DataFrame
|
|
94
|
+
A new DataFrame with a complete, regular DatetimeIndex (set by the
|
|
95
|
+
frequency used in generate_dates, typically 'MS'), and the aggregated
|
|
96
|
+
values, where missing time steps are represented by NaN.
|
|
97
|
+
|
|
98
|
+
Notes
|
|
99
|
+
-----
|
|
100
|
+
1. **Dependency:** This function relies on the external function `generate_dates()`
|
|
101
|
+
to create the target date sequence.
|
|
102
|
+
2. **Aggregation:** The use of `.groupby(["date"]).sum()` implies that if
|
|
103
|
+
multiple entries share the same date, their values will be summed.
|
|
104
|
+
3. **Index Handling:** For the merge operation to work, the original index
|
|
105
|
+
is temporarily converted to a column named 'date' (via `reset_index`
|
|
106
|
+
implicitly after the `groupby`).
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
date_aux = generate_dates(df_ts, freq="MS")
|
|
110
|
+
df_date = pd.DataFrame({"date_aux": date_aux})
|
|
111
|
+
df_ts = df_ts.groupby(["date"]).sum().reset_index()
|
|
112
|
+
|
|
113
|
+
result = pd.merge(df_ts, df_date, left_on="date", right_on="date_aux", how="outer")
|
|
114
|
+
df_ts_new = result[["date_aux", column_name]]
|
|
115
|
+
df_ts_new.set_index(df_ts_new["date_aux"], inplace=True)
|
|
116
|
+
df_ts_new.notnull().apply(pd.Series.value_counts)
|
|
117
|
+
|
|
118
|
+
return df_ts_new
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: direl-ts-tool-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A toolbox for time series analysis and visualization.
|
|
5
5
|
Home-page: https://gitlab.com/direl/direl_tool_kit
|
|
6
|
-
Author:
|
|
6
|
+
Author: Diego Restrepo-Leal
|
|
7
7
|
Author-email: diegorestrepoleal@gmail.com
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -17,6 +17,7 @@ License-File: LICENCE
|
|
|
17
17
|
Requires-Dist: pandas>=1.0.0
|
|
18
18
|
Requires-Dist: numpy>=1.18.0
|
|
19
19
|
Requires-Dist: matplotlib>=3.0.0
|
|
20
|
+
Requires-Dist: openpyxl
|
|
20
21
|
Dynamic: author
|
|
21
22
|
Dynamic: author-email
|
|
22
23
|
Dynamic: classifier
|
|
@@ -9,4 +9,6 @@ direl_ts_tool_kit.egg-info/requires.txt
|
|
|
9
9
|
direl_ts_tool_kit.egg-info/top_level.txt
|
|
10
10
|
direl_ts_tool_kit/plot/__init__.py
|
|
11
11
|
direl_ts_tool_kit/plot/plot_style.py
|
|
12
|
-
direl_ts_tool_kit/plot/plot_ts.py
|
|
12
|
+
direl_ts_tool_kit/plot/plot_ts.py
|
|
13
|
+
direl_ts_tool_kit/utilities/__init__.py
|
|
14
|
+
direl_ts_tool_kit/utilities/data_prep.py
|
{direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{direl_ts_tool_kit-0.2.0 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
@@ -2,11 +2,11 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="direl-ts-tool-kit",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.3.0",
|
|
6
6
|
description="A toolbox for time series analysis and visualization.",
|
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
9
|
-
author="
|
|
9
|
+
author="Diego Restrepo-Leal",
|
|
10
10
|
author_email="diegorestrepoleal@gmail.com",
|
|
11
11
|
url="https://gitlab.com/direl/direl_tool_kit",
|
|
12
12
|
packages=find_packages(),
|
|
@@ -14,6 +14,7 @@ setup(
|
|
|
14
14
|
"pandas>=1.0.0",
|
|
15
15
|
"numpy>=1.18.0",
|
|
16
16
|
"matplotlib>=3.0.0",
|
|
17
|
+
"openpyxl",
|
|
17
18
|
],
|
|
18
19
|
classifiers=[
|
|
19
20
|
"Programming Language :: Python :: 3",
|