mgplot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mgplot/finalisers.py ADDED
@@ -0,0 +1,364 @@
1
+ """
2
+ finalisers.py
3
+
4
+ Simple convenience functions to finalise and produce plots.
5
+ - bar_plot_finalise()
6
+ - line_plot_finalise()
7
+ - postcovid_plot_finalise()
8
+ - raw_growth_plot_finalise()
9
+ - revision_plot_finalise()
10
+ - run_plot_finalise()
11
+ - seastrend_plot_finalise()
12
+ - series_growth_plot_finalise()
13
+ - summary_plot_finalise()
14
+
15
+ Note: we keep these functions in a separate module to
16
+ stop circular imports
17
+
18
+ We also do most of the indicative code testing from this
19
+ module.
20
+ """
21
+
22
+ # --- imports
23
+ from pandas import DataFrame, period_range, Period, PeriodIndex, read_csv, Index
24
+ from numpy import random
25
+
26
+ from mgplot.test import prepare_for_test
27
+ from mgplot.settings import DataT
28
+ from mgplot.multi_plot import plot_then_finalise, multi_column, multi_start
29
+ from mgplot.line_plot import line_plot
30
+ from mgplot.bar_plot import bar_plot
31
+ from mgplot.seastrend_plot import seastrend_plot
32
+ from mgplot.postcovid_plot import postcovid_plot
33
+ from mgplot.revision_plot import revision_plot
34
+ from mgplot.run_plot import run_plot
35
+ from mgplot.growth_plot import series_growth_plot, raw_growth_plot
36
+ from mgplot.summary_plot import summary_plot, ZSCORES, ZSCALED
37
+
38
+
39
+ # --- public functions
40
+ def line_plot_finalise(
41
+ data: DataT,
42
+ **kwargs,
43
+ ) -> None:
44
+ """
45
+ A convenience function to call plot_then_finalise(), which
46
+ wraps calls to line_plot() and finalise_plot().
47
+ """
48
+
49
+ if isinstance(data, DataFrame):
50
+ if len(data.columns) > 1:
51
+ # default to displaying a legend
52
+ kwargs["legend"] = kwargs.get("legend", True)
53
+ if len(data.columns) > 4:
54
+ # default to using a style for the lines
55
+ kwargs["style"] = kwargs.get(
56
+ "style", ["solid", "dashed", "dashdot", "dotted"]
57
+ )
58
+ plot_then_finalise(
59
+ data,
60
+ function=line_plot,
61
+ **kwargs,
62
+ )
63
+
64
+
65
+ def bar_plot_finalise(
66
+ data: DataT,
67
+ **kwargs,
68
+ ) -> None:
69
+ """
70
+ A convenience function to call plot_then_finalise(), which
71
+ wraps calls to bar_plot() and finalise_plot().
72
+ """
73
+
74
+ plot_then_finalise(
75
+ data,
76
+ function=bar_plot,
77
+ **kwargs,
78
+ )
79
+
80
+
81
+ def seastrend_plot_finalise(
82
+ data: DataT,
83
+ **kwargs,
84
+ ) -> None:
85
+ """
86
+ A convenience function to call seas_trend_plot() and finalise_plot().
87
+ """
88
+
89
+ plot_then_finalise(
90
+ data,
91
+ function=seastrend_plot,
92
+ **kwargs,
93
+ )
94
+
95
+
96
+ def postcovid_plot_finalise(
97
+ data: DataT,
98
+ **kwargs,
99
+ ) -> None:
100
+ """
101
+ A convenience function to call postcovid_plot() and finalise_plot().
102
+ """
103
+
104
+ plot_then_finalise(
105
+ data,
106
+ function=postcovid_plot,
107
+ **kwargs,
108
+ )
109
+
110
+
111
+ def revision_plot_finalise(
112
+ data: DataT,
113
+ **kwargs,
114
+ ) -> None:
115
+ """
116
+ A convenience function to call revision_plot() and finalise_plot().
117
+ """
118
+
119
+ kwargs["legend"] = kwargs.get(
120
+ "legend", {"loc": "best", "fontsize": "x-small", "ncol": 2}
121
+ )
122
+ kwargs["style"] = kwargs.get("style", ["solid", "dashed", "dashdot", "dotted"])
123
+ plot_then_finalise(
124
+ data,
125
+ function=revision_plot,
126
+ **kwargs,
127
+ )
128
+
129
+
130
+ def run_plot_finalise(
131
+ data: DataT,
132
+ **kwargs,
133
+ ) -> None:
134
+ """
135
+ A convenience function to call run_plot() and finalise_plot().
136
+ """
137
+
138
+ plot_then_finalise(
139
+ data=data,
140
+ function=run_plot,
141
+ **kwargs,
142
+ )
143
+
144
+
145
+ def series_growth_plot_finalise(data: DataT, **kwargs) -> None:
146
+ """
147
+ A convenience function to call series_growth_plot() and finalise_plot().
148
+ Use this when you are providing the series data, for mgplot to calculate
149
+ the growth series.
150
+ """
151
+
152
+ kwargs["ylabel"] = kwargs.get("ylabel", "Per cent Growth")
153
+ kwargs["xlabel"] = kwargs.get("xlabel", None)
154
+ plot_then_finalise(
155
+ data=data,
156
+ function=series_growth_plot,
157
+ **kwargs,
158
+ )
159
+
160
+
161
+ def raw_growth_plot_finalise(data: DataT, **kwargs) -> None:
162
+ """
163
+ A convenience function to call series_growth_plot() and finalise_plot().
164
+ Use this when you are providing the raw growth data. Don't forget to
165
+ set the ylabel in kwargs.
166
+ """
167
+
168
+ kwargs["ylabel"] = kwargs.get("ylabel", "Growth Units unspecified")
169
+ kwargs["xlabel"] = kwargs.get("xlabel", None)
170
+ plot_then_finalise(
171
+ data=data,
172
+ function=raw_growth_plot,
173
+ **kwargs,
174
+ )
175
+
176
+
177
+ def summary_plot_finalise(
178
+ data: DataT,
179
+ **kwargs,
180
+ ) -> None:
181
+ """
182
+ A convenience function to call summary_plot() and finalise_plot().
183
+ This is more complex than most convienience methods.
184
+
185
+ Arguments
186
+ - data: DataFrame containing the summary data. The index must be a PeriodIndex.
187
+ - kwargs: additional arguments for the plot, including:
188
+ - plot_from: int | Period | None (None means plot from 1995-01-01)
189
+ - verbose: if True, print the summary data.
190
+ - middle: proportion of data to highlight (default is 0.8).
191
+ - plot_type: list of plot types to generate (either "zscores" or "zscaled")
192
+ defaults to "zscores".
193
+ """
194
+
195
+ # --- sanity checks
196
+ if not isinstance(data.index, PeriodIndex):
197
+ raise ValueError("data must have a PeriodIndex")
198
+
199
+ # --- standard arguments
200
+ kwargs["legend"] = kwargs.get(
201
+ "legend",
202
+ {
203
+ # put the legend below the x-axis label
204
+ "loc": "upper center",
205
+ "fontsize": "xx-small",
206
+ "bbox_to_anchor": (0.5, -0.125),
207
+ "ncol": 4,
208
+ },
209
+ )
210
+ start = kwargs.get("plot_from", None)
211
+
212
+ for plot_type in (ZSCORES, ZSCALED):
213
+ # some sorting of kwargs for plot production
214
+ kwargs["plot_type"] = plot_type
215
+ kwargs["title"] = kwargs.get("title", f"Summary at {data.index[-1]}")
216
+ kwargs["pre_tag"] = plot_type # necessary because the title is same
217
+ kwargs["preserve_lims"] = kwargs.get(
218
+ "preserve_lims", True
219
+ ) # preserve the x-axis limits
220
+
221
+ # get the start date for the plot
222
+ set_default = start is None
223
+ if isinstance(start, int):
224
+ start = data.index[start]
225
+ if set_default:
226
+ freq = data.index.freqstr[0]
227
+ if freq not in ("D", "M", "Q"):
228
+ raise ValueError(f"Unknown frequency {freq} for data index")
229
+ start = Period("1995-01-01", freq=data.index.freqstr)
230
+ kwargs["plot_from"] = start
231
+
232
+ if plot_type not in (ZSCORES, ZSCALED):
233
+ print(f"Unknown plot type {plot_type}, defaulting to {ZSCORES}")
234
+ plot_type = ZSCORES
235
+ if plot_type == "zscores":
236
+ kwargs["xlabel"] = f"Z-scores for prints since {start}"
237
+ kwargs["x0"] = True
238
+ else:
239
+ kwargs["xlabel"] = f"-1 to 1 scaled z-scores since {start}"
240
+ kwargs.pop("x0", None)
241
+
242
+ plot_then_finalise(
243
+ data,
244
+ function=summary_plot,
245
+ **kwargs,
246
+ )
247
+
248
+
249
+ # --- test code
250
+ if __name__ == "__main__":
251
+ # --- Preparation
252
+ TEST_DATA_DIR = "./zz-test-data/"
253
+ prepare_for_test("finalisers")
254
+
255
+ # - fake data
256
+ index = period_range(start="2010Q1", periods=60, freq="Q")
257
+ test_frame = DataFrame(
258
+ {
259
+ "Series 1": [0.1] * len(index),
260
+ "Series 2": [0.1] * len(index),
261
+ "Series 3": [1.1] * len(index),
262
+ },
263
+ index=index,
264
+ )
265
+ test_frame["Series 1"] = test_frame["Series 1"].cumsum() + random.normal(
266
+ 0, 0.1, len(index)
267
+ )
268
+ test_frame["Series 2"] = test_frame["Series 2"].cumsum()
269
+ test_frame["Series 3"] = test_frame["Series 3"].cumprod()
270
+
271
+ SKIP = False
272
+ if not SKIP:
273
+
274
+ line_plot_finalise(
275
+ data=test_frame,
276
+ title="Test Line Plot",
277
+ ylabel="Value",
278
+ xlabel=None,
279
+ )
280
+
281
+ multi_column(
282
+ data=test_frame,
283
+ function=line_plot_finalise,
284
+ title="Test Multi Column Line Plot: ",
285
+ ylabel="Value",
286
+ xlabel=None,
287
+ )
288
+
289
+ multi_start(
290
+ data=test_frame,
291
+ function=line_plot_finalise,
292
+ starts=[20, -10, Period("2018Q1")],
293
+ title="Test Multi Start Line Plot: ",
294
+ ylabel="Value",
295
+ xlabel=None,
296
+ )
297
+
298
+ postcovid_plot_finalise(
299
+ data=test_frame["Series 3"],
300
+ title="Test Post-COVID Plot",
301
+ ylabel="Value",
302
+ xlabel=None,
303
+ )
304
+
305
+ st = test_frame[["Series 1", "Series 2"]].copy()
306
+ st.columns = Index(["Seasonally Adjusted", "Trend"])
307
+ multi_start(
308
+ st,
309
+ function=seastrend_plot_finalise,
310
+ starts=[0, Period("2018Q1")],
311
+ title="Test Multi Start Seas-Trend Plot",
312
+ ylabel="Value",
313
+ xlabel=None,
314
+ )
315
+
316
+ # - summary plot test
317
+ summary_data = read_csv(
318
+ f"{TEST_DATA_DIR}summary.csv",
319
+ index_col=0,
320
+ parse_dates=True,
321
+ )
322
+ summary_data.index = PeriodIndex(summary_data.index, freq="M")
323
+ summary_plot_finalise(
324
+ data=summary_data,
325
+ title=f"Summary Plot at {summary_data.index[-1]}",
326
+ ylabel="Value",
327
+ xlabel=None,
328
+ )
329
+
330
+ multi_start(
331
+ data=test_frame["Series 1"],
332
+ function=series_growth_plot_finalise,
333
+ starts=[0, -19],
334
+ title="Test Multi Start Series Growth Plot: ",
335
+ ylabel="Per cent Growth",
336
+ xlabel=None,
337
+ )
338
+
339
+ # -- run plot test
340
+ ocr_data = read_csv(
341
+ f"{TEST_DATA_DIR}ocr_rba.csv",
342
+ index_col=0,
343
+ parse_dates=True,
344
+ )
345
+ ocr_data.index = PeriodIndex(ocr_data.index, freq="M")
346
+ ocr_series = ocr_data[ocr_data.columns[0]]
347
+ multi_start(
348
+ data=ocr_series,
349
+ function=[plot_then_finalise, run_plot],
350
+ starts=[Period("2020-11", freq="M"), Period("2000-11", freq="M"), 1],
351
+ title=f"Test Multi Start Run Plot at {ocr_series.index[-1]}",
352
+ ylabel="Annual Per cent Growth",
353
+ xlabel=None,
354
+ )
355
+
356
+ data_ = read_csv("./zz-test-data/revisions.csv", index_col=0, parse_dates=True)
357
+ data_.index = PeriodIndex(data_.index, freq="M")
358
+ revision_plot_finalise(
359
+ data=data_,
360
+ title="Test Revision Plot",
361
+ ylabel="Units",
362
+ xlabel=None,
363
+ rounding=2,
364
+ )
mgplot/growth_plot.py ADDED
@@ -0,0 +1,275 @@
1
+ """
2
+ growth_plot.py:
3
+ plot period and annual/through-the-year growth rates on the same axes.
4
+ - calc_growth()
5
+ - raw_growth_plot()
6
+ - series_growth_plot()
7
+ """
8
+
9
+ # --- imports
10
+ from pandas import Series, DataFrame, Index, Period, PeriodIndex, period_range
11
+ from numpy import nan
12
+ from matplotlib.pyplot import Axes
13
+ import matplotlib.patheffects as pe
14
+ from tabulate import tabulate
15
+
16
+ from mgplot.test import prepare_for_test
17
+ from mgplot.settings import get_setting, DataT
18
+ from mgplot.date_utils import set_labels
19
+ from mgplot.utilities import annotate_series, check_clean_timeseries
20
+ from mgplot.kw_type_checking import (
21
+ validate_kwargs,
22
+ report_kwargs,
23
+ validate_expected,
24
+ ExpectedTypeDict,
25
+ )
26
+
27
+
28
+ # --- constants
29
+ ANNUAL = "annual"
30
+ PERIODIC = "periodic"
31
+
32
+ GROWTH_KW_TYPES: ExpectedTypeDict = {
33
+ "line_width": (float, int),
34
+ "line_color": str,
35
+ "line_style": str,
36
+ "annotate_line": (type(None), int, str),
37
+ "bar_width": float,
38
+ "bar_color": str,
39
+ "annotate_bar": (type(None), int, str),
40
+ "annotation_rounding": int,
41
+ "plot_from": (type(None), Period, int),
42
+ "max_ticks": int,
43
+ }
44
+ validate_expected(GROWTH_KW_TYPES, "growth_plot")
45
+
46
+
47
+ # --- functions
48
+ def calc_growth(series: Series) -> DataFrame:
49
+ """
50
+ Calculate annual and periodic growth for a pandas Series,
51
+ where the index is a PeriodIndex.
52
+
53
+ Args:
54
+ - series: A pandas Series with an appropriate PeriodIndex.
55
+
56
+ Returns a two column DataFrame:
57
+
58
+ Raises
59
+ - TypeError if the series is not a pandas Series.
60
+ - TypeError if the series index is not a PeriodIndex.
61
+ - ValueError if the series is empty.
62
+ - ValueError if the series index does not have a frequency of Q, M, or D.
63
+ - ValueError if the series index has duplicates.
64
+ """
65
+
66
+ # --- sanity checks
67
+ if not isinstance(series, Series):
68
+ raise TypeError("The series argument must be a pandas Series")
69
+ if not isinstance(series.index, PeriodIndex):
70
+ raise TypeError("The series index must be a pandas PeriodIndex")
71
+ if series.empty:
72
+ raise ValueError("The series argument must not be empty")
73
+ if series.index.freqstr[0] not in ("Q", "M", "D"):
74
+ raise ValueError("The series index must have a frequency of Q, M, or D")
75
+ if series.index.has_duplicates:
76
+ raise ValueError("The series index must not have duplicate values")
77
+
78
+ # --- ensure the index is complete and the date is sorted
79
+ complete = period_range(start=series.index.min(), end=series.index.max())
80
+ series = series.reindex(complete, fill_value=nan)
81
+ series = series.sort_index(ascending=True)
82
+
83
+ # --- calculate annual and periodic growth
84
+ ppy = {"Q": 4, "M": 12, "D": 365}[PeriodIndex(series.index).freqstr[:1]]
85
+ annual = series.pct_change(periods=ppy) * 100
86
+ periodic = series.pct_change(periods=1) * 100
87
+ periodic_name = {4: "Quarterly", 12: "Monthly", 365: "Daily"}[ppy] + " Growth"
88
+ return DataFrame(
89
+ {
90
+ "Annual Growth": annual,
91
+ periodic_name: periodic,
92
+ }
93
+ )
94
+
95
+
96
+ def _annotations(
97
+ annual: Series,
98
+ periodic: Series,
99
+ axes: Axes,
100
+ **kwargs,
101
+ ) -> None:
102
+ """Apply annotations the annual and periodic growth series."""
103
+
104
+ annotate_line = kwargs.get("annotate_line", "small")
105
+ if annotate_line is not None:
106
+ annotate_series(
107
+ annual,
108
+ axes,
109
+ rounding=kwargs.get("annotation_rounding", True),
110
+ fontsize=annotate_line,
111
+ color=kwargs.get("line_color", "darkblue"),
112
+ )
113
+
114
+ annotate_bar = kwargs.get("annotate_bar", "small")
115
+ max_annotations = 30
116
+ if annotate_bar is not None and len(periodic) < max_annotations:
117
+ annotation_rounding = kwargs.get("annotation_rounding", 1)
118
+ annotate_style = {
119
+ "fontsize": annotate_bar,
120
+ "fontname": "Helvetica",
121
+ }
122
+ adjustment = (periodic.max() - periodic.min()) * 0.005
123
+ for i, value in enumerate(periodic):
124
+ va = "bottom" if value >= 0 else "top"
125
+ text = axes.text(
126
+ periodic.index[i],
127
+ adjustment if value >= 0 else -adjustment,
128
+ f"{value:.{annotation_rounding}f}",
129
+ ha="center",
130
+ va=va,
131
+ **annotate_style,
132
+ fontdict=None,
133
+ color="white",
134
+ )
135
+ text.set_path_effects(
136
+ [
137
+ pe.withStroke(
138
+ linewidth=2, foreground=kwargs.get("bar_color", "indianred")
139
+ )
140
+ ]
141
+ )
142
+
143
+
144
+ def raw_growth_plot(
145
+ data: DataT,
146
+ **kwargs,
147
+ ) -> Axes:
148
+ """
149
+ Plot annual (as a line) and periodic (as bars) growth on the
150
+ same axes.
151
+
152
+ Args:
153
+ - data: A pandas DataFrame with two columns:
154
+ - kwargs:
155
+ - line_width: The width of the line (default is 2).
156
+ - line_color: The color of the line (default is "darkblue").
157
+ - line_style: The style of the line (default is "-").
158
+ - annotate_line: None | int | str - fontsize to annotate the line
159
+ (default is "small", which means the line is annotated with
160
+ small text).
161
+ - bar_width: The width of the bars (default is 0.8).
162
+ - bar_color: The color of the bars (default is "indianred").
163
+ - annotate_bar: None | int | str - fontsize to annotate the bars
164
+ (default is "small", which means the bars are annotated with
165
+ small text).
166
+ - annotation_rounding: The number of decimal places to round the
167
+ annotations to (default is 1).
168
+ - plot_from: None | Period | int -- if:
169
+ - None: the entire series is plotted
170
+ - Period: the plot starts from this period
171
+ - int: the plot starts from this +/- index position
172
+ - max_ticks: The maximum number of ticks to show on the x-axis
173
+ (default is 10).
174
+
175
+ Returns:
176
+ - axes: The matplotlib Axes object.
177
+
178
+ Raises:
179
+ - TypeError if the annual and periodic arguments are not pandas Series.
180
+ - TypeError if the annual index is not a PeriodIndex.
181
+ - ValueError if the annual and periodic series do not have the same index.
182
+ """
183
+
184
+ # --- sanity checks
185
+ report_kwargs(called_from="raw_growth_plot", **kwargs)
186
+ validate_kwargs(GROWTH_KW_TYPES, "raw_growth_plot", **kwargs)
187
+ data = check_clean_timeseries(data)
188
+ if len(data.columns) != 2:
189
+ raise TypeError("The data argument must be a pandas DataFrame with two columns")
190
+
191
+ # --- get the series of interest ...
192
+ annual = data[data.columns[0]]
193
+ periodic = data[data.columns[1]]
194
+
195
+ # --- plot
196
+ plot_from: None | Period | int = kwargs.get("plot_from", None)
197
+ if plot_from is not None:
198
+ if isinstance(plot_from, int):
199
+ plot_from = annual.index[plot_from]
200
+ annual = annual[annual.index >= plot_from]
201
+ periodic = periodic[periodic.index >= plot_from]
202
+
203
+ save_index = PeriodIndex(annual.index).copy()
204
+ annual.index = Index(range(len(annual)))
205
+ annual.name = "Annual Growth"
206
+ periodic.index = annual.index
207
+ periodic.name = {"M": "Monthly", "Q": "Quarterly", "D": "Daily"}[
208
+ PeriodIndex(save_index).freqstr[:1]
209
+ ] + " Growth"
210
+ axes = periodic.plot.bar(
211
+ color=kwargs.get("bar_color", "indianred"),
212
+ width=kwargs.get("bar_width}", 0.8),
213
+ )
214
+ thin_threshold = 180
215
+ annual.plot(
216
+ ax=axes,
217
+ color=kwargs.get("line_color", "darkblue"),
218
+ lw=kwargs.get(
219
+ "line_width",
220
+ (
221
+ get_setting("line_normal")
222
+ if len(annual) >= thin_threshold
223
+ else get_setting("line_wide")
224
+ ),
225
+ ),
226
+ linestyle=kwargs.get("line_style", "-"),
227
+ )
228
+ _annotations(annual, periodic, axes, **kwargs)
229
+ axes.set_ylabel("Per cent Growth")
230
+
231
+ # --- fix the x-axis labels
232
+ set_labels(axes, save_index, kwargs.get("max_ticks", 10))
233
+
234
+ # --- and done ...
235
+ return axes
236
+
237
+
238
+ def series_growth_plot(
239
+ data: DataT,
240
+ **kwargs,
241
+ ) -> Axes:
242
+ """
243
+ Plot annual and periodic growth from a pandas Series,
244
+ and finalise the plot.
245
+
246
+ Args:
247
+ - data: A pandas Series with an appropriate PeriodIndex.
248
+ - kwargs:
249
+ - takes the same kwargs as for growth_plot()
250
+ """
251
+
252
+ # --- sanity checks
253
+ report_kwargs(called_from="series_growth_plot", **kwargs)
254
+ data = check_clean_timeseries(data)
255
+ # we will validate kwargs in raw_growth_plot()
256
+ if not isinstance(data, Series):
257
+ raise TypeError(
258
+ "The data argument to series_growth_plot() must be a pandas Series"
259
+ )
260
+
261
+ # --- calculate growth and plot
262
+ growth = calc_growth(data)
263
+ ax = raw_growth_plot(growth, **kwargs)
264
+ return ax
265
+
266
+
267
+ # --- test code
268
+ if __name__ == "__main__":
269
+ print("Testing")
270
+ prepare_for_test("growth_plot")
271
+ series_ = Series([1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0])
272
+ series_.index = period_range("2020Q1", periods=len(series_), freq="Q")
273
+ growth_ = calc_growth(series_)
274
+ text_ = tabulate(growth_, headers="keys", tablefmt="pipe") # type: ignore[arg-type]
275
+ print(text_)