direl-ts-tool-kit 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- direl_ts_tool_kit/plot/plot_ts.py +5 -5
- direl_ts_tool_kit/utilities/data_prep.py +87 -4
- {direl_ts_tool_kit-0.2.2.dist-info → direl_ts_tool_kit-0.3.0.dist-info}/METADATA +1 -1
- direl_ts_tool_kit-0.3.0.dist-info/RECORD +11 -0
- {direl_ts_tool_kit-0.2.2.dist-info → direl_ts_tool_kit-0.3.0.dist-info}/licenses/LICENCE +0 -0
- {direl_ts_tool_kit-0.2.2.dist-info → direl_ts_tool_kit-0.3.0.dist-info}/top_level.txt +0 -0
- direl_ts_tool_kit-0.2.2.dist-info/RECORD +0 -11
- {direl_ts_tool_kit-0.2.2.dist-info → direl_ts_tool_kit-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -2,7 +2,7 @@ from .plot_style import *
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def plot_time_series(
|
|
5
|
-
|
|
5
|
+
df_ts, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, auto_format_label=True
|
|
6
6
|
):
|
|
7
7
|
"""
|
|
8
8
|
Plots a time series with custom styling and dual-level grid visibility.
|
|
@@ -13,7 +13,7 @@ def plot_time_series(
|
|
|
13
13
|
|
|
14
14
|
Parameters
|
|
15
15
|
----------
|
|
16
|
-
|
|
16
|
+
df_ts : pd.DataFrame
|
|
17
17
|
The DataFrame containing the time series data. Must have a DatetimeIndex.
|
|
18
18
|
variable : str
|
|
19
19
|
The name of the column to plot. The label is automatically formatted
|
|
@@ -29,7 +29,7 @@ def plot_time_series(
|
|
|
29
29
|
Options include 'Year', 'Month', 'Weekday', or 'Day'. Defaults to "Year".
|
|
30
30
|
rot : int, optional
|
|
31
31
|
Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
|
|
32
|
-
|
|
32
|
+
auto_format_label : bool, optional
|
|
33
33
|
Used internally for label formatting logic. Defaults to True.
|
|
34
34
|
|
|
35
35
|
Returns
|
|
@@ -54,7 +54,7 @@ def plot_time_series(
|
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
56
|
fig, ax = plt.subplots()
|
|
57
|
-
ax.plot(
|
|
57
|
+
ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
|
|
58
58
|
|
|
59
59
|
if "-" in variable:
|
|
60
60
|
variable = "-".join(
|
|
@@ -78,7 +78,7 @@ def plot_time_series(
|
|
|
78
78
|
for i, j in enumerate(variable.split())
|
|
79
79
|
]
|
|
80
80
|
)
|
|
81
|
-
if
|
|
81
|
+
if auto_format_label
|
|
82
82
|
else variable
|
|
83
83
|
)
|
|
84
84
|
|
|
@@ -21,9 +21,6 @@ def parse_datetime_index(df_raw, date_column="date"):
|
|
|
21
21
|
df_ts : pd.DataFrame
|
|
22
22
|
A copy of the original DataFrame with the specified date column removed
|
|
23
23
|
and set as the DatetimeIndex. Ready for time series plotting.
|
|
24
|
-
original_dates : pd.Series
|
|
25
|
-
The original Series containing the date strings/objects, which was used
|
|
26
|
-
to create the new index.
|
|
27
24
|
"""
|
|
28
25
|
|
|
29
26
|
date_parsed = pd.to_datetime(df_raw[date_column])
|
|
@@ -32,4 +29,90 @@ def parse_datetime_index(df_raw, date_column="date"):
|
|
|
32
29
|
df_ts.drop(columns=[date_column], inplace=True)
|
|
33
30
|
df_ts.set_index(date_parsed, inplace=True)
|
|
34
31
|
|
|
35
|
-
return df_ts
|
|
32
|
+
return df_ts
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def generate_dates(df_ts, freq="MS"):
|
|
36
|
+
"""
|
|
37
|
+
Generates a continuous DatetimeIndex covering the time span of the input DataFrame.
|
|
38
|
+
|
|
39
|
+
The function determines the start and end dates from the existing DataFrame index
|
|
40
|
+
and creates a new, regular date sequence based on the specified frequency.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
df_ts : pd.DataFrame
|
|
45
|
+
The time series DataFrame whose index determines the start and end of the
|
|
46
|
+
new date range.
|
|
47
|
+
freq : str, optional
|
|
48
|
+
The frequency of the generated dates (e.g., 'D' for daily, 'MS' for Month Start).
|
|
49
|
+
Defaults to "MS" (Month Start).
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
pd.DatetimeIndex
|
|
54
|
+
A new DatetimeIndex spanning from the first index entry to the last index entry
|
|
55
|
+
of 'df_ts', using the specified frequency.
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
The function relies on the index of 'df_ts' to find the boundaries. It explicitly
|
|
60
|
+
sorts the index first to ensure the earliest and latest dates are correctly identified,
|
|
61
|
+
regardless of the current DataFrame order.
|
|
62
|
+
"""
|
|
63
|
+
df_ts.sort_index(inplace=True)
|
|
64
|
+
start_date = df_ts.index[0]
|
|
65
|
+
end_date = df_ts.index[-1]
|
|
66
|
+
|
|
67
|
+
dates = pd.date_range(start=start_date, end=end_date, freq=freq)
|
|
68
|
+
|
|
69
|
+
return dates
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def reindex_and_aggregate(df_ts, column_name, freq="MS"):
|
|
73
|
+
"""
|
|
74
|
+
Re-indexes a time series DataFrame to a regular frequency, aggregates values,
|
|
75
|
+
and introduces NaN for missing time steps.
|
|
76
|
+
|
|
77
|
+
This function first identifies the time range from the original (potentially irregular)
|
|
78
|
+
index, aggregates data if necessary (e.g., if multiple entries exist per time step),
|
|
79
|
+
and then merges the data onto a complete date range, effectively filling gaps
|
|
80
|
+
with NaN values.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
df_ts : pd.DataFrame
|
|
85
|
+
The input DataFrame. It is assumed that the index contains the date information
|
|
86
|
+
(though the function currently resets and uses a 'date' column name internally
|
|
87
|
+
due to the line `groupby(["date"])`).
|
|
88
|
+
column_name : str
|
|
89
|
+
The name of the column containing the values to be aggregated and re-indexed.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
pd.DataFrame
|
|
94
|
+
A new DataFrame with a complete, regular DatetimeIndex (set by the
|
|
95
|
+
frequency used in generate_dates, typically 'MS'), and the aggregated
|
|
96
|
+
values, where missing time steps are represented by NaN.
|
|
97
|
+
|
|
98
|
+
Notes
|
|
99
|
+
-----
|
|
100
|
+
1. **Dependency:** This function relies on the external function `generate_dates()`
|
|
101
|
+
to create the target date sequence.
|
|
102
|
+
2. **Aggregation:** The use of `.groupby(["date"]).sum()` implies that if
|
|
103
|
+
multiple entries share the same date, their values will be summed.
|
|
104
|
+
3. **Index Handling:** For the merge operation to work, the original index
|
|
105
|
+
is temporarily converted to a column named 'date' (via `reset_index`
|
|
106
|
+
implicitly after the `groupby`).
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
date_aux = generate_dates(df_ts, freq="MS")
|
|
110
|
+
df_date = pd.DataFrame({"date_aux": date_aux})
|
|
111
|
+
df_ts = df_ts.groupby(["date"]).sum().reset_index()
|
|
112
|
+
|
|
113
|
+
result = pd.merge(df_ts, df_date, left_on="date", right_on="date_aux", how="outer")
|
|
114
|
+
df_ts_new = result[["date_aux", column_name]]
|
|
115
|
+
df_ts_new.set_index(df_ts_new["date_aux"], inplace=True)
|
|
116
|
+
df_ts_new.notnull().apply(pd.Series.value_counts)
|
|
117
|
+
|
|
118
|
+
return df_ts_new
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
|
|
2
|
+
direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
|
|
3
|
+
direl_ts_tool_kit/plot/plot_style.py,sha256=vhzcDa3LzgkHuy-GnliofGZ8TDntkm3_1C5kgl2Gx3E,1010
|
|
4
|
+
direl_ts_tool_kit/plot/plot_ts.py,sha256=UUeNk-4rk7rNhiXtVaHSBXw-P34ssh8wUJ2o3Jbj5Gk,4855
|
|
5
|
+
direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
|
|
6
|
+
direl_ts_tool_kit/utilities/data_prep.py,sha256=807c21Rk5Q0To6fDfoWgGghU9KQMLXelTOx5OE-YgIw,4375
|
|
7
|
+
direl_ts_tool_kit-0.3.0.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
direl_ts_tool_kit-0.3.0.dist-info/METADATA,sha256=6yKyemO_u8JoRX8ku3rE_CgZ244Jg8bm_KxkrLeuV2U,950
|
|
9
|
+
direl_ts_tool_kit-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
direl_ts_tool_kit-0.3.0.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
|
|
11
|
+
direl_ts_tool_kit-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
|
|
2
|
-
direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
|
|
3
|
-
direl_ts_tool_kit/plot/plot_style.py,sha256=vhzcDa3LzgkHuy-GnliofGZ8TDntkm3_1C5kgl2Gx3E,1010
|
|
4
|
-
direl_ts_tool_kit/plot/plot_ts.py,sha256=uYkPWz-8aWIjZGW22n5uH19CYHv-bLGrZaHPW9f3-3I,4814
|
|
5
|
-
direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
|
|
6
|
-
direl_ts_tool_kit/utilities/data_prep.py,sha256=KqVICzfjvsQqbekYiL21MLdno6spjwzF2PXCv6D-kNc,1225
|
|
7
|
-
direl_ts_tool_kit-0.2.2.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
direl_ts_tool_kit-0.2.2.dist-info/METADATA,sha256=SwDvWNWSkGCfMnU6UlieG9lurQsvUZoeTxmFZjia-VA,950
|
|
9
|
-
direl_ts_tool_kit-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
-
direl_ts_tool_kit-0.2.2.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
|
|
11
|
-
direl_ts_tool_kit-0.2.2.dist-info/RECORD,,
|
|
File without changes
|