direl-ts-tool-kit 0.2.2__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ from .plot_style import *
2
2
 
3
3
 
4
4
  def plot_time_series(
5
- df1, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, method=True
5
+ df_ts, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, auto_format_label=True
6
6
  ):
7
7
  """
8
8
  Plots a time series with custom styling and dual-level grid visibility.
@@ -13,7 +13,7 @@ def plot_time_series(
13
13
 
14
14
  Parameters
15
15
  ----------
16
- df1 : pd.DataFrame
16
+ df_ts : pd.DataFrame
17
17
  The DataFrame containing the time series data. Must have a DatetimeIndex.
18
18
  variable : str
19
19
  The name of the column to plot. The label is automatically formatted
@@ -29,7 +29,7 @@ def plot_time_series(
29
29
  Options include 'Year', 'Month', 'Weekday', or 'Day'. Defaults to "Year".
30
30
  rot : int, optional
31
31
  Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
32
- method : bool, optional
32
+ auto_format_label : bool, optional
33
33
  Used internally for label formatting logic. Defaults to True.
34
34
 
35
35
  Returns
@@ -54,7 +54,7 @@ def plot_time_series(
54
54
  """
55
55
 
56
56
  fig, ax = plt.subplots()
57
- ax.plot(df1.index, df1[variable], linewidth=3, color=paper_colors[color])
57
+ ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
58
58
 
59
59
  if "-" in variable:
60
60
  variable = "-".join(
@@ -78,7 +78,7 @@ def plot_time_series(
78
78
  for i, j in enumerate(variable.split())
79
79
  ]
80
80
  )
81
- if method
81
+ if auto_format_label
82
82
  else variable
83
83
  )
84
84
 
@@ -100,6 +100,10 @@ def plot_time_series(
100
100
  if time_unit == "Day":
101
101
  ax.xaxis.set_major_locator(mdates.DayLocator())
102
102
  ax.xaxis.set_minor_locator(mdates.HourLocator())
103
+
104
+ if time_unit == "Hour":
105
+ ax.xaxis.set_major_locator(mdates.HourLocator())
106
+ ax.xaxis.set_minor_locator(mdates.MinuteLocator())
103
107
 
104
108
  ax.tick_params(axis="x", rotation=rot)
105
109
  ax.grid(which="both")
@@ -1,12 +1,12 @@
1
1
  import pandas as pd
2
2
 
3
3
 
4
- def parse_datetime_index(df_raw, date_column="date"):
4
+ def parse_datetime_index(df_raw, date_column="date", format=None):
5
5
  """
6
6
  Parses a specified column into datetime objects and sets it as the DataFrame index.
7
7
 
8
- This function is crucial for preparing raw data (df_raw) for time series analysis
9
- by ensuring the DataFrame is indexed by the correct datetime type.
8
+ This function prepares raw data for time series analysis by ensuring the
9
+ DataFrame is indexed by the correct datetime type.
10
10
 
11
11
  Parameters
12
12
  ----------
@@ -15,21 +15,111 @@ def parse_datetime_index(df_raw, date_column="date"):
15
15
  date_column : str, optional
16
16
  The name of the column in 'df_raw' that contains the date/time information.
17
17
  Defaults to "date".
18
+ format : str, optional
19
+ The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
20
+ to parse the dates, passed to `pd.to_datetime`. If None (default),
21
+ Pandas attempts to infer the format automatically.
18
22
 
19
23
  Returns
20
24
  -------
21
25
  df_ts : pd.DataFrame
22
- A copy of the original DataFrame with the specified date column removed
23
- and set as the DatetimeIndex. Ready for time series plotting.
24
- original_dates : pd.Series
25
- The original Series containing the date strings/objects, which was used
26
- to create the new index.
26
+ A copy of the original DataFrame with the specified date column removed
27
+ and set as the DatetimeIndex. The returned DataFrame is ready for
28
+ time series operations.
27
29
  """
30
+ if not format:
31
+ date_parsed = pd.to_datetime(df_raw[date_column])
32
+ else:
33
+ date_parsed = pd.to_datetime(df_raw[date_column], format=format)
28
34
 
29
- date_parsed = pd.to_datetime(df_raw[date_column])
30
35
  df_ts = df_raw.copy()
31
- original_dates = df_raw[date_column]
32
36
  df_ts.drop(columns=[date_column], inplace=True)
33
37
  df_ts.set_index(date_parsed, inplace=True)
34
38
 
35
- return df_ts, original_dates
39
+ return df_ts
40
+
41
+
42
+ def generate_dates(df_ts, freq="MS"):
43
+ """
44
+ Generates a continuous DatetimeIndex covering the time span of the input DataFrame.
45
+
46
+ The function determines the start and end dates from the existing DataFrame index
47
+ and creates a new, regular date sequence based on the specified frequency.
48
+
49
+ Parameters
50
+ ----------
51
+ df_ts : pd.DataFrame
52
+ The time series DataFrame whose index determines the start and end of the
53
+ new date range.
54
+ freq : str, optional
55
+ The frequency of the generated dates (e.g., 'D' for daily, 'MS' for Month Start).
56
+ Defaults to "MS" (Month Start).
57
+
58
+ Returns
59
+ -------
60
+ pd.DatetimeIndex
61
+ A new DatetimeIndex spanning from the first index entry to the last index entry
62
+ of 'df_ts', using the specified frequency.
63
+
64
+ Notes
65
+ -----
66
+ The function relies on the index of 'df_ts' to find the boundaries. It explicitly
67
+ sorts the index first to ensure the earliest and latest dates are correctly identified,
68
+ regardless of the current DataFrame order.
69
+ """
70
+ df_ts.sort_index(inplace=True)
71
+ start_date = df_ts.index[0]
72
+ end_date = df_ts.index[-1]
73
+
74
+ dates = pd.date_range(start=start_date, end=end_date, freq=freq)
75
+
76
+ return dates
77
+
78
+
79
+ def reindex_and_aggregate(df_ts, column_name, freq="MS"):
80
+ """
81
+ Re-indexes a time series DataFrame to a regular frequency, aggregates values,
82
+ and introduces NaN for missing time steps.
83
+
84
+ This function first identifies the time range from the original (potentially irregular)
85
+ index, aggregates data if necessary (e.g., if multiple entries exist per time step),
86
+ and then merges the data onto a complete date range, effectively filling gaps
87
+ with NaN values.
88
+
89
+ Parameters
90
+ ----------
91
+ df_ts : pd.DataFrame
92
+ The input DataFrame. It is assumed that the index contains the date information
93
+ (though the function currently resets and uses a 'date' column name internally
94
+ due to the line `groupby(["date"])`).
95
+ column_name : str
96
+ The name of the column containing the values to be aggregated and re-indexed.
97
+
98
+ Returns
99
+ -------
100
+ pd.DataFrame
101
+ A new DataFrame with a complete, regular DatetimeIndex (set by the
102
+ frequency used in generate_dates, typically 'MS'), and the aggregated
103
+ values, where missing time steps are represented by NaN.
104
+
105
+ Notes
106
+ -----
107
+ 1. **Dependency:** This function relies on the external function `generate_dates()`
108
+ to create the target date sequence.
109
+ 2. **Aggregation:** The use of `.groupby(["date"]).sum()` implies that if
110
+ multiple entries share the same date, their values will be summed.
111
+ 3. **Index Handling:** For the merge operation to work, the original index
112
+ is temporarily converted to a column named 'date' (via `reset_index`
113
+ implicitly after the `groupby`).
114
+ """
115
+
116
+ date_aux = generate_dates(df_ts, freq="MS")
117
+ df_date = pd.DataFrame({"date_aux": date_aux})
118
+ df_ts = df_ts.groupby(["date"]).sum().reset_index()
119
+
120
+ result = pd.merge(df_ts, df_date, left_on="date", right_on="date_aux", how="outer")
121
+ df_ts_new = result[["date_aux", column_name]]
122
+ df_ts_new.set_index(df_ts_new["date_aux"], inplace=True)
123
+ df_ts_new.notnull().apply(pd.Series.value_counts)
124
+
125
+ return df_ts_new
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: direl-ts-tool-kit
3
- Version: 0.2.2
3
+ Version: 0.3.2
4
4
  Summary: A toolbox for time series analysis and visualization.
5
5
  Home-page: https://gitlab.com/direl/direl_tool_kit
6
6
  Author: Diego Restrepo-Leal
@@ -0,0 +1,11 @@
1
+ direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
2
+ direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
3
+ direl_ts_tool_kit/plot/plot_style.py,sha256=vhzcDa3LzgkHuy-GnliofGZ8TDntkm3_1C5kgl2Gx3E,1010
4
+ direl_ts_tool_kit/plot/plot_ts.py,sha256=OPmdaXGUv_oNaXQ4epiPX1fKTtxyaWuoGfHPa5if96U,5008
5
+ direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
6
+ direl_ts_tool_kit/utilities/data_prep.py,sha256=IZJShsSGWxv6Q-rM7m69kGADNUhCzs-KfhphgqD29Ok,4703
7
+ direl_ts_tool_kit-0.3.2.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ direl_ts_tool_kit-0.3.2.dist-info/METADATA,sha256=n0O3607YFBQYXu6eu27dutF5o_T2xC11JyUucb8Oafs,950
9
+ direl_ts_tool_kit-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ direl_ts_tool_kit-0.3.2.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
+ direl_ts_tool_kit-0.3.2.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- direl_ts_tool_kit/__init__.py,sha256=W99Wd3BeEFKOxT51TApURElbDJvqIjD8u_-qDoCYSJ0,94
2
- direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBGGQ4rI,49
3
- direl_ts_tool_kit/plot/plot_style.py,sha256=vhzcDa3LzgkHuy-GnliofGZ8TDntkm3_1C5kgl2Gx3E,1010
4
- direl_ts_tool_kit/plot/plot_ts.py,sha256=uYkPWz-8aWIjZGW22n5uH19CYHv-bLGrZaHPW9f3-3I,4814
5
- direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
6
- direl_ts_tool_kit/utilities/data_prep.py,sha256=KqVICzfjvsQqbekYiL21MLdno6spjwzF2PXCv6D-kNc,1225
7
- direl_ts_tool_kit-0.2.2.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- direl_ts_tool_kit-0.2.2.dist-info/METADATA,sha256=SwDvWNWSkGCfMnU6UlieG9lurQsvUZoeTxmFZjia-VA,950
9
- direl_ts_tool_kit-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- direl_ts_tool_kit-0.2.2.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
- direl_ts_tool_kit-0.2.2.dist-info/RECORD,,