mgplot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ """
2
+ seas_trend_plot.py
3
+ This module contains a function to create seasonal+trend plots.
4
+ """
5
+
6
+ # --- imports
7
+ from matplotlib.pyplot import Axes
8
+
9
+ from mgplot.settings import DataT
10
+ from mgplot.line_plot import line_plot
11
+ from mgplot.utilities import get_color_list, get_setting, check_clean_timeseries
12
+ from mgplot.kw_type_checking import report_kwargs
13
+
14
+
15
+ # --- constants
16
+ COLOR = "color"
17
+ WIDTH = "width"
18
+ STYLE = "style"
19
+ ANNOTATE = "annotate"
20
+ ROUNDING = "rounding"
21
+ LEGEND = "legend"
22
+ DROPNA = "dropna"
23
+
24
+
25
+ # --- public functions
26
+ def seastrend_plot(data: DataT, **kwargs) -> Axes:
27
+ """
28
+ Publish a DataFrame, where the first column is seasonally
29
+ adjusted data, and the second column is trend data.
30
+
31
+ Aguments:
32
+ - data: DataFrame - the data to plot with the first column
33
+ being the seasonally adjusted data, and the second column
34
+ being the trend data.
35
+ The remaining arguments are the same as those passed to
36
+ line_plot().
37
+
38
+ Returns:
39
+ - a matplotlib Axes object
40
+ """
41
+
42
+ # Note: we will rely on the line_plot() function to do most of the work.
43
+ # including constraining the data to the plot_from keyword argument.
44
+
45
+ # --- sanity checks
46
+ report_kwargs(called_from="seastrend_plot", **kwargs)
47
+ data = check_clean_timeseries(data)
48
+ if len(data.columns) < 2:
49
+ raise ValueError(
50
+ "seas_trend_plot() expects a DataFrame data item with at least 2 columns."
51
+ )
52
+ # let line_plot() handle validate_kwargs()
53
+
54
+ # --- defaults if not in kwargs
55
+ colors = kwargs.pop(COLOR, get_color_list(2))
56
+ widths = kwargs.pop(WIDTH, [get_setting("line_normal"), get_setting("line_wide")])
57
+ styles = kwargs.pop(STYLE, ["-", "-"])
58
+ annotations = kwargs.pop(ANNOTATE, [True, False])
59
+ rounding = kwargs.pop(ROUNDING, True)
60
+ legend = kwargs.pop(LEGEND, True)
61
+
62
+ # series breaks are common in seas-trend data
63
+ kwargs[DROPNA] = kwargs.pop(DROPNA, False)
64
+
65
+ return line_plot(
66
+ data,
67
+ color=colors,
68
+ width=widths,
69
+ style=styles,
70
+ annotate=annotations,
71
+ rounding=rounding,
72
+ legend=legend,
73
+ **kwargs,
74
+ )
mgplot/settings.py ADDED
@@ -0,0 +1,164 @@
1
+ """
2
+ settings.py
3
+ This module provides a mechanosm for managing global settings.
4
+ """
5
+
6
+ # --- imports
7
+ from typing import TypedDict, TypeVar, Any
8
+ from pathlib import Path
9
+
10
+ import matplotlib as mpl
11
+ import matplotlib.pyplot as plt
12
+ from pandas import Series, DataFrame
13
+
14
+
15
+ # --- default types
16
+ DataT = TypeVar("DataT", Series, DataFrame) # python 3.11+
17
+
18
+
19
+ # --- global settings
20
+ plt.style.use("fivethirtyeight")
21
+ mpl.rcParams["font.size"] = 11
22
+
23
+
24
+ # --- default settings
25
+ class _DefaultValues(TypedDict):
26
+ """
27
+ _DefaultValues is a dictionary of default values for the settings.
28
+ It is a TypedDict, which means that it knows a fixed set of keys
29
+ and their corresponding types.
30
+ """
31
+
32
+ file_type: str
33
+ figsize: tuple[float, float]
34
+ file_dpi: int
35
+
36
+ line_narrow: float
37
+ line_normal: float
38
+ line_wide: float
39
+
40
+ bar_width: float
41
+
42
+ legend_font_size: float | str
43
+ legend: dict[str, Any]
44
+
45
+ colors: dict[int, list[str]] # used by get_color_list()
46
+
47
+ chart_dir: str
48
+
49
+
50
+ _mgplot_defaults = _DefaultValues(
51
+ file_type="png",
52
+ figsize=(9.0, 4.5),
53
+ file_dpi=300,
54
+ line_narrow=0.75,
55
+ line_normal=1.0,
56
+ line_wide=2.0,
57
+ bar_width=0.8,
58
+ legend_font_size="small",
59
+ legend={
60
+ "loc": "best",
61
+ "fontsize": "x-small",
62
+ },
63
+ colors={
64
+ 1: ["#dd0000"],
65
+ 5: ["royalblue", "darkorange", "forestgreen", "#dd0000", "gray"],
66
+ 9: [
67
+ "darkblue",
68
+ "darkorange",
69
+ "forestgreen",
70
+ "#dd0000",
71
+ "purple",
72
+ "gold",
73
+ "lightcoral",
74
+ "lightseagreen",
75
+ "gray",
76
+ ],
77
+ },
78
+ chart_dir=".",
79
+ )
80
+
81
+
82
+ # --- get/change settings
83
+
84
+
85
+ def get_setting(setting: str) -> Any:
86
+ """
87
+ Get a setting from the global settings.
88
+
89
+ Arguments:
90
+ - setting: str - name of the setting to get. The possible settings are:
91
+ - file_type: str - the file type to use for saving plots
92
+ - figsize: tuple[float, float] - the figure size to use for plots
93
+ - file_dpi: int - the DPI to use for saving plots
94
+ - line_narrow: float - the line width for narrow lines
95
+ - line_normal: float - the line width for normal lines
96
+ - line_wide: float - the line width for wide lines
97
+ - bar_width: float - the width of bars in bar plots
98
+ - legend_font_size: float | str - the font size for legends
99
+ - legend: dict[str, Any] - the legend settings
100
+ - colors: dict[int, list[str]] - a dictionary of colors for
101
+ different numbers of lines
102
+ - chart_dir: str - the directory to save charts in
103
+
104
+ Raises:
105
+ - KeyError: if the setting is not found
106
+
107
+ Returns:
108
+ - value: Any - the value of the setting
109
+ """
110
+ if setting not in _mgplot_defaults:
111
+ raise KeyError(f"Setting '{setting}' not found in _mgplot_defaults.")
112
+ return _mgplot_defaults[setting] # type: ignore[literal-required]
113
+
114
+
115
+ def set_setting(setting: str, value: Any) -> None:
116
+ """
117
+ Set a setting in the global settings.
118
+ Raises KeyError if the setting is not found.
119
+
120
+ Arguments:
121
+ - setting: str - name of the setting to set (see get_setting())
122
+ - value: Any - the value to set the setting to
123
+ """
124
+
125
+ if setting not in _mgplot_defaults:
126
+ raise KeyError(f"Setting '{setting}' not found in _mgplot_defaults.")
127
+ _mgplot_defaults[setting] = value # type: ignore[literal-required]
128
+
129
+
130
+ def clear_chart_dir() -> None:
131
+ """
132
+ Remove all graph-image files from the global chart_dir.
133
+ This is a convenience function to remove all files from the
134
+ chart_dir directory. It does not remove the directory itself.
135
+ Note: the function creates the directory if it does not exist.
136
+ """
137
+
138
+ chart_dir = get_setting("chart_dir")
139
+ Path(chart_dir).mkdir(parents=True, exist_ok=True)
140
+ for ext in ("png", "svg", "jpg", "jpeg"):
141
+ for fs_object in Path(chart_dir).glob(f"*.{ext}"):
142
+ if fs_object.is_file():
143
+ fs_object.unlink()
144
+
145
+
146
+ def set_chart_dir(chart_dir: str) -> None:
147
+ """
148
+ A function to set a global chart directory for finalise_plot(),
149
+ so that it does not need to be included as an argument in each
150
+ call to finalise_plot(). Create the directory if it does not exist.
151
+
152
+ Note: Path.mkdir() may raise an exception if a directory cannot be created.
153
+
154
+ Note: This is a wrapper for set_setting() to set the chart_dir setting, and
155
+ create the directory if it does not exist.
156
+
157
+ Arguments:
158
+ - chart_dir: str - the directory to set as the chart directory
159
+ """
160
+
161
+ if not chart_dir:
162
+ chart_dir = "." # avoid the empty string
163
+ Path(chart_dir).mkdir(parents=True, exist_ok=True)
164
+ set_setting("chart_dir", chart_dir)
mgplot/summary_plot.py ADDED
@@ -0,0 +1,240 @@
1
+ """
2
+ summary_plot.py:
3
+ Produce a summary plot for the data in a given DataFrame.
4
+ The data is normalised to z-scores and scaled.
5
+ """
6
+
7
+ # --- imports
8
+ # system imports
9
+ from typing import Any
10
+
11
+ # from collections.abc import Sequence
12
+
13
+ # analytic third-party imports
14
+ from numpy import ndarray, array
15
+ from matplotlib.pyplot import Axes, subplots
16
+ from pandas import DataFrame, Period
17
+
18
+ # local imports
19
+ from mgplot.settings import DataT
20
+ from mgplot.utilities import constrain_data, check_clean_timeseries
21
+ from mgplot.kw_type_checking import (
22
+ report_kwargs,
23
+ ExpectedTypeDict,
24
+ validate_expected,
25
+ validate_kwargs,
26
+ )
27
+
28
+
29
+ # --- constants
30
+ ZSCORES = "zscores"
31
+ ZSCALED = "zscaled"
32
+
33
+ SUMMARY_KW_TYPES: ExpectedTypeDict = {
34
+ "verbose": bool,
35
+ "middle": float,
36
+ "plot_type": str,
37
+ "plot_from": (int, Period, type(None)),
38
+ }
39
+ validate_expected(SUMMARY_KW_TYPES, "summary_plot")
40
+
41
+
42
+ # --- functions
43
+ def _calc_quantiles(middle: float) -> ndarray:
44
+ """Calculate the quantiles for the middle of the data."""
45
+ return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0])
46
+
47
+
48
+ def _calculate_z(
49
+ original: DataFrame, # only contains the data points of interest
50
+ middle: float, # middle proportion of data to highlight (eg. 0.8)
51
+ verbose: bool = False, # print the summary data
52
+ ) -> tuple[DataFrame, DataFrame]:
53
+ """Calculate z-scores, scaled z-scores and middle quantiles.
54
+ Return z_scores, z_scaled, q (which are the quantiles for the
55
+ start/end of the middle proportion of data to highlight)."""
56
+
57
+ # calculate z-scores, scaled scores and middle quantiles
58
+ z_scores: DataFrame = (original - original.mean()) / original.std()
59
+ z_scaled: DataFrame = (
60
+ # scale z-scores between -1 and +1
61
+ (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5)
62
+ * 2
63
+ )
64
+ q_middle = _calc_quantiles(middle)
65
+
66
+ if verbose:
67
+ frame = DataFrame(
68
+ {
69
+ "count": original.count(),
70
+ "mean": original.mean(),
71
+ "median": original.median(),
72
+ "min shaded": original.quantile(q=q_middle[0]),
73
+ "max shaded": original.quantile(q=q_middle[1]),
74
+ "z-scores": z_scores.iloc[-1],
75
+ "scaled": z_scaled.iloc[-1],
76
+ }
77
+ )
78
+ print(frame)
79
+
80
+ return DataFrame(z_scores), DataFrame(z_scaled) # syntactic sugar for type hinting
81
+
82
+
83
+ def _plot_middle_bars(
84
+ adjusted: DataFrame,
85
+ middle: float,
86
+ kwargs: dict[str, Any], # must be a dictionary, not a splat
87
+ ) -> Axes:
88
+ """Plot the middle (typically 80%) of the data as a bar.
89
+ Note: also sets the x-axis limits in kwargs.
90
+ Return the matplotlib Axes object."""
91
+
92
+ q = _calc_quantiles(middle)
93
+ lo_hi: DataFrame = adjusted.quantile(q=q).T # get the middle section of data
94
+ span = 1.15
95
+ space = 0.2
96
+ low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space
97
+ high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space
98
+ kwargs["xlim"] = (low, high) # remember the x-axis limits
99
+ _fig, ax = subplots()
100
+ ax.barh(
101
+ y=lo_hi.index,
102
+ width=lo_hi[q[1]] - lo_hi[q[0]],
103
+ left=lo_hi[q[0]],
104
+ color="#bbbbbb",
105
+ label=f"Middle {middle*100:0.0f}% of prints",
106
+ )
107
+ return ax
108
+
109
+
110
+ def _plot_latest_datapoint(
111
+ ax: Axes,
112
+ original: DataFrame,
113
+ adjusted: DataFrame,
114
+ f_size: int,
115
+ ) -> None:
116
+ """Add the latest datapoints to the summary plot"""
117
+
118
+ ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest")
119
+ f_size = 10
120
+ row = adjusted.index[-1]
121
+ for col_num, col_name in enumerate(original.columns):
122
+ ax.text(
123
+ x=adjusted.at[row, col_name],
124
+ y=col_num,
125
+ s=f"{original.at[row, col_name]:.1f}",
126
+ ha="center",
127
+ va="center",
128
+ size=f_size,
129
+ )
130
+
131
+
132
+ def _label_extremes(
133
+ ax: Axes,
134
+ data: tuple[DataFrame, DataFrame],
135
+ plot_type: str,
136
+ f_size: int,
137
+ kwargs: dict[str, Any], # must be a dictionary, not a splat
138
+ ) -> None:
139
+ """Label the extremes in the scaled plots."""
140
+
141
+ original, adjusted = data
142
+ low, high = kwargs["xlim"]
143
+ if plot_type == ZSCALED:
144
+ ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--")
145
+ ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--")
146
+ ax.scatter(
147
+ adjusted.median(),
148
+ adjusted.columns,
149
+ color="darkorchid",
150
+ marker="x",
151
+ s=5,
152
+ label="Median",
153
+ )
154
+ for col_num, col_name in enumerate(original.columns):
155
+ ax.text(
156
+ low,
157
+ col_num,
158
+ f" {original[col_name].min():.1f}",
159
+ ha="left",
160
+ va="center",
161
+ size=f_size,
162
+ )
163
+ ax.text(
164
+ high,
165
+ col_num,
166
+ f"{original[col_name].max():.1f} ",
167
+ ha="right",
168
+ va="center",
169
+ size=f_size,
170
+ )
171
+
172
+
173
+ def _horizontal_bar_plot(
174
+ original: DataFrame,
175
+ adjusted: DataFrame,
176
+ middle: float,
177
+ plot_type: str,
178
+ kwargs: dict[str, Any], # must be a dictionary, not a splat
179
+ ) -> Axes:
180
+ """Plot horizontal bars for the middle of the data."""
181
+
182
+ # kwargs is a dictionary, not a splat
183
+ # so that we can pass it to the Axes object and
184
+ # set the x-axis limits.
185
+
186
+ ax = _plot_middle_bars(adjusted, middle, kwargs)
187
+ f_size = 10
188
+ _plot_latest_datapoint(ax, original, adjusted, f_size)
189
+ _label_extremes(
190
+ ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs
191
+ )
192
+
193
+ return ax
194
+
195
+
196
+ # public
197
+ def summary_plot(
198
+ data: DataT, # summary data
199
+ **kwargs,
200
+ ) -> Axes:
201
+ """Plot a summary of historical data for a given DataFrame.
202
+
203
+ Args:
204
+ - summary: DataFrame containing the summary data. The column names are
205
+ used as labels for the plot.
206
+ - kwargs: additional arguments for the plot, including:
207
+ - plot_from: int | Period | None
208
+ - verbose: if True, print the summary data.
209
+ - middle: proportion of data to highlight (default is 0.8).
210
+ - plot_types: list of plot types to generate.
211
+
212
+
213
+ Returns Axes.
214
+ """
215
+
216
+ # --- sanity checks
217
+ data = check_clean_timeseries(data)
218
+ if not isinstance(data, DataFrame):
219
+ raise TypeError("data must be a pandas DataFrame for summary_plot()")
220
+ df = DataFrame(data) # syntactic sugar for type hinting
221
+
222
+ # --- check the arguments
223
+ report_kwargs("summary_plot", **kwargs)
224
+ validate_kwargs(SUMMARY_KW_TYPES, "summary_plot", **kwargs)
225
+
226
+ # --- optional arguments
227
+ verbose = kwargs.pop("verbose", False)
228
+ middle = float(kwargs.pop("middle", 0.8))
229
+ plot_type = kwargs.pop("plot_type", ZSCORES)
230
+
231
+ # get the data, calculate z-scores and scaled scores based on the start period
232
+ subset, kwargs = constrain_data(df, **kwargs)
233
+ z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose)
234
+
235
+ # plot as required by the plot_types argument
236
+ adjusted = z_scores if plot_type == ZSCORES else z_scaled
237
+ ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs)
238
+ ax.tick_params(axis="y", labelsize="small")
239
+ ax.set_xlim(kwargs.get("xlim", None)) # provide space for the labels
240
+ return ax
mgplot/test.py ADDED
@@ -0,0 +1,31 @@
1
+ """
2
+ test.py
3
+
4
+ Used in the testing of mgplot modules.
5
+
6
+ This module is not intended to be used directly by the user.
7
+ """
8
+
9
+ # --- imports
10
+ from mgplot.settings import set_chart_dir, clear_chart_dir
11
+
12
+
13
+ # --- constants
14
+ TEST_CHART_DIR = "./zz-test-charts/"
15
+
16
+
17
+ # --- functions
18
+ def prepare_for_test(subdirectory: str = "unnamed") -> None:
19
+ """
20
+ Prepare the chart directory to receive test plot output.
21
+ Create the directory if it does not exist.
22
+ Set the chart_dir to the test directory.
23
+
24
+ Arguments:
25
+ - subdirectory: str - the subdirectory to create
26
+ in the test directory
27
+ """
28
+
29
+ test_chart_dir = f"{TEST_CHART_DIR}{subdirectory}"
30
+ set_chart_dir(str(test_chart_dir))
31
+ clear_chart_dir()