direl-ts-tool-kit 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/LICENCE +0 -0
  2. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/PKG-INFO +1 -1
  3. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/README.md +0 -0
  4. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/__init__.py +0 -0
  5. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/__init__.py +0 -0
  6. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/plot_style.py +0 -0
  7. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/plot/plot_ts.py +5 -5
  8. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit/utilities/__init__.py +0 -0
  9. direl_ts_tool_kit-0.3.0/direl_ts_tool_kit/utilities/data_prep.py +118 -0
  10. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/PKG-INFO +1 -1
  11. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/SOURCES.txt +0 -0
  12. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/dependency_links.txt +0 -0
  13. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/requires.txt +0 -0
  14. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/direl_ts_tool_kit.egg-info/top_level.txt +0 -0
  15. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/setup.cfg +0 -0
  16. {direl_ts_tool_kit-0.2.2 → direl_ts_tool_kit-0.3.0}/setup.py +1 -1
  17. direl_ts_tool_kit-0.2.2/direl_ts_tool_kit/utilities/data_prep.py +0 -35
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: direl-ts-tool-kit
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: A toolbox for time series analysis and visualization.
5
5
  Home-page: https://gitlab.com/direl/direl_tool_kit
6
6
  Author: Diego Restrepo-Leal
@@ -2,7 +2,7 @@ from .plot_style import *
2
2
 
3
3
 
4
4
  def plot_time_series(
5
- df1, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, method=True
5
+ df_ts, variable, units="", color="BLUE_LINES", time_unit="Year", rot=90, auto_format_label=True
6
6
  ):
7
7
  """
8
8
  Plots a time series with custom styling and dual-level grid visibility.
@@ -13,7 +13,7 @@ def plot_time_series(
13
13
 
14
14
  Parameters
15
15
  ----------
16
- df1 : pd.DataFrame
16
+ df_ts : pd.DataFrame
17
17
  The DataFrame containing the time series data. Must have a DatetimeIndex.
18
18
  variable : str
19
19
  The name of the column to plot. The label is automatically formatted
@@ -29,7 +29,7 @@ def plot_time_series(
29
29
  Options include 'Year', 'Month', 'Weekday', or 'Day'. Defaults to "Year".
30
30
  rot : int, optional
31
31
  Rotation angle (in degrees) for the x-axis tick labels. Defaults to 90.
32
- method : bool, optional
32
+ auto_format_label : bool, optional
33
33
  Used internally for label formatting logic. Defaults to True.
34
34
 
35
35
  Returns
@@ -54,7 +54,7 @@ def plot_time_series(
54
54
  """
55
55
 
56
56
  fig, ax = plt.subplots()
57
- ax.plot(df1.index, df1[variable], linewidth=3, color=paper_colors[color])
57
+ ax.plot(df_ts.index, df_ts[variable], linewidth=3, color=paper_colors[color])
58
58
 
59
59
  if "-" in variable:
60
60
  variable = "-".join(
@@ -78,7 +78,7 @@ def plot_time_series(
78
78
  for i, j in enumerate(variable.split())
79
79
  ]
80
80
  )
81
- if method
81
+ if auto_format_label
82
82
  else variable
83
83
  )
84
84
 
@@ -0,0 +1,118 @@
1
+ import pandas as pd
2
+
3
+
4
+ def parse_datetime_index(df_raw, date_column="date"):
5
+ """
6
+ Parses a specified column into datetime objects and sets it as the DataFrame index.
7
+
8
+ This function is crucial for preparing raw data (df_raw) for time series analysis
9
+ by ensuring the DataFrame is indexed by the correct datetime type.
10
+
11
+ Parameters
12
+ ----------
13
+ df_raw : pd.DataFrame
14
+ The raw DataFrame containing the data, including the column with date strings.
15
+ date_column : str, optional
16
+ The name of the column in 'df_raw' that contains the date/time information.
17
+ Defaults to "date".
18
+
19
+ Returns
20
+ -------
21
+ df_ts : pd.DataFrame
22
+ A copy of the original DataFrame with the specified date column removed
23
+ and set as the DatetimeIndex. Ready for time series plotting.
24
+ """
25
+
26
+ date_parsed = pd.to_datetime(df_raw[date_column])
27
+ df_ts = df_raw.copy()
28
+ original_dates = df_raw[date_column]
29
+ df_ts.drop(columns=[date_column], inplace=True)
30
+ df_ts.set_index(date_parsed, inplace=True)
31
+
32
+ return df_ts
33
+
34
+
35
+ def generate_dates(df_ts, freq="MS"):
36
+ """
37
+ Generates a continuous DatetimeIndex covering the time span of the input DataFrame.
38
+
39
+ The function determines the start and end dates from the existing DataFrame index
40
+ and creates a new, regular date sequence based on the specified frequency.
41
+
42
+ Parameters
43
+ ----------
44
+ df_ts : pd.DataFrame
45
+ The time series DataFrame whose index determines the start and end of the
46
+ new date range.
47
+ freq : str, optional
48
+ The frequency of the generated dates (e.g., 'D' for daily, 'MS' for Month Start).
49
+ Defaults to "MS" (Month Start).
50
+
51
+ Returns
52
+ -------
53
+ pd.DatetimeIndex
54
+ A new DatetimeIndex spanning from the first index entry to the last index entry
55
+ of 'df_ts', using the specified frequency.
56
+
57
+ Notes
58
+ -----
59
+ The function relies on the index of 'df_ts' to find the boundaries. It explicitly
60
+ sorts the index first to ensure the earliest and latest dates are correctly identified,
61
+ regardless of the current DataFrame order.
62
+ """
63
+ df_ts.sort_index(inplace=True)
64
+ start_date = df_ts.index[0]
65
+ end_date = df_ts.index[-1]
66
+
67
+ dates = pd.date_range(start=start_date, end=end_date, freq=freq)
68
+
69
+ return dates
70
+
71
+
72
+ def reindex_and_aggregate(df_ts, column_name, freq="MS"):
73
+ """
74
+ Re-indexes a time series DataFrame to a regular frequency, aggregates values,
75
+ and introduces NaN for missing time steps.
76
+
77
+ This function first identifies the time range from the original (potentially irregular)
78
+ index, aggregates data if necessary (e.g., if multiple entries exist per time step),
79
+ and then merges the data onto a complete date range, effectively filling gaps
80
+ with NaN values.
81
+
82
+ Parameters
83
+ ----------
84
+ df_ts : pd.DataFrame
85
+ The input DataFrame. It is assumed that the index contains the date information
86
+ (though the function currently resets and uses a 'date' column name internally
87
+ due to the line `groupby(["date"])`).
88
+ column_name : str
89
+ The name of the column containing the values to be aggregated and re-indexed.
90
+
91
+ Returns
92
+ -------
93
+ pd.DataFrame
94
+ A new DataFrame with a complete, regular DatetimeIndex (set by the
95
+ frequency used in generate_dates, typically 'MS'), and the aggregated
96
+ values, where missing time steps are represented by NaN.
97
+
98
+ Notes
99
+ -----
100
+ 1. **Dependency:** This function relies on the external function `generate_dates()`
101
+ to create the target date sequence.
102
+ 2. **Aggregation:** The use of `.groupby(["date"]).sum()` implies that if
103
+ multiple entries share the same date, their values will be summed.
104
+ 3. **Index Handling:** For the merge operation to work, the original index
105
+ is temporarily converted to a column named 'date' (via `reset_index`
106
+ implicitly after the `groupby`).
107
+ """
108
+
109
+ date_aux = generate_dates(df_ts, freq="MS")
110
+ df_date = pd.DataFrame({"date_aux": date_aux})
111
+ df_ts = df_ts.groupby(["date"]).sum().reset_index()
112
+
113
+ result = pd.merge(df_ts, df_date, left_on="date", right_on="date_aux", how="outer")
114
+ df_ts_new = result[["date_aux", column_name]]
115
+ df_ts_new.set_index(df_ts_new["date_aux"], inplace=True)
116
+ df_ts_new.notnull().apply(pd.Series.value_counts)
117
+
118
+ return df_ts_new
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: direl-ts-tool-kit
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: A toolbox for time series analysis and visualization.
5
5
  Home-page: https://gitlab.com/direl/direl_tool_kit
6
6
  Author: Diego Restrepo-Leal
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="direl-ts-tool-kit",
5
- version="0.2.2",
5
+ version="0.3.0",
6
6
  description="A toolbox for time series analysis and visualization.",
7
7
  long_description=open("README.md", encoding="utf-8").read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,35 +0,0 @@
1
- import pandas as pd
2
-
3
-
4
- def parse_datetime_index(df_raw, date_column="date"):
5
- """
6
- Parses a specified column into datetime objects and sets it as the DataFrame index.
7
-
8
- This function is crucial for preparing raw data (df_raw) for time series analysis
9
- by ensuring the DataFrame is indexed by the correct datetime type.
10
-
11
- Parameters
12
- ----------
13
- df_raw : pd.DataFrame
14
- The raw DataFrame containing the data, including the column with date strings.
15
- date_column : str, optional
16
- The name of the column in 'df_raw' that contains the date/time information.
17
- Defaults to "date".
18
-
19
- Returns
20
- -------
21
- df_ts : pd.DataFrame
22
- A copy of the original DataFrame with the specified date column removed
23
- and set as the DatetimeIndex. Ready for time series plotting.
24
- original_dates : pd.Series
25
- The original Series containing the date strings/objects, which was used
26
- to create the new index.
27
- """
28
-
29
- date_parsed = pd.to_datetime(df_raw[date_column])
30
- df_ts = df_raw.copy()
31
- original_dates = df_raw[date_column]
32
- df_ts.drop(columns=[date_column], inplace=True)
33
- df_ts.set_index(date_parsed, inplace=True)
34
-
35
- return df_ts, original_dates