PyPI - mgplot - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mgplot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

mgplot/__init__.py +121 -0
mgplot/bar_plot.py +107 -0
mgplot/colors.py +199 -0
mgplot/date_utils.py +324 -0
mgplot/finalise_plot.py +335 -0
mgplot/finalisers.py +364 -0
mgplot/growth_plot.py +275 -0
mgplot/kw_type_checking.py +460 -0
mgplot/line_plot.py +178 -0
mgplot/multi_plot.py +339 -0
mgplot/postcovid_plot.py +106 -0
mgplot/py.typed +1 -0
mgplot/revision_plot.py +60 -0
mgplot/run_plot.py +182 -0
mgplot/seastrend_plot.py +74 -0
mgplot/settings.py +164 -0
mgplot/summary_plot.py +240 -0
mgplot/test.py +31 -0
mgplot/utilities.py +254 -0
mgplot-0.1.0.dist-info/METADATA +53 -0
mgplot-0.1.0.dist-info/RECORD +24 -0
mgplot-0.1.0.dist-info/WHEEL +5 -0
mgplot-0.1.0.dist-info/licenses/LICENSE +8 -0
mgplot-0.1.0.dist-info/top_level.txt +1 -0

mgplot/seastrend_plot.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+seas_trend_plot.py
+This module contains a function to create seasonal+trend plots.
+"""
+# --- imports
+from matplotlib.pyplot import Axes
+from mgplot.settings import DataT
+from mgplot.line_plot import line_plot
+from mgplot.utilities import get_color_list, get_setting, check_clean_timeseries
+from mgplot.kw_type_checking import report_kwargs
+# --- constants
+COLOR = "color"
+WIDTH = "width"
+STYLE = "style"
+ANNOTATE = "annotate"
+ROUNDING = "rounding"
+LEGEND = "legend"
+DROPNA = "dropna"
+# --- public functions
+def seastrend_plot(data: DataT, **kwargs) -> Axes:
+    """
+    Publish a DataFrame, where the first column is seasonally
+    adjusted data, and the second column is trend data.
+    Aguments:
+    - data: DataFrame - the data to plot with the first column
+      being the seasonally adjusted data, and the second column
+      being the trend data.
+    The remaining arguments are the same as those passed to
+    line_plot().
+    Returns:
+    - a matplotlib Axes object
+    """
+    # Note: we will rely on the line_plot() function to do most of the work.
+    # including constraining the data to the plot_from keyword argument.
+    # --- sanity checks
+    report_kwargs(called_from="seastrend_plot", **kwargs)
+    data = check_clean_timeseries(data)
+    if len(data.columns) < 2:
+        raise ValueError(
+            "seas_trend_plot() expects a DataFrame data item with at least 2 columns."
+        )
+    # let line_plot() handle validate_kwargs()
+    # --- defaults if not in kwargs
+    colors = kwargs.pop(COLOR, get_color_list(2))
+    widths = kwargs.pop(WIDTH, [get_setting("line_normal"), get_setting("line_wide")])
+    styles = kwargs.pop(STYLE, ["-", "-"])
+    annotations = kwargs.pop(ANNOTATE, [True, False])
+    rounding = kwargs.pop(ROUNDING, True)
+    legend = kwargs.pop(LEGEND, True)
+    # series breaks are common in seas-trend data
+    kwargs[DROPNA] = kwargs.pop(DROPNA, False)
+    return line_plot(
+        data,
+        color=colors,
+        width=widths,
+        style=styles,
+        annotate=annotations,
+        rounding=rounding,
+        legend=legend,
+        **kwargs,
+    )

mgplot/settings.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""
+settings.py
+This module provides a mechanosm for managing global settings.
+"""
+# --- imports
+from typing import TypedDict, TypeVar, Any
+from pathlib import Path
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from pandas import Series, DataFrame
+# --- default types
+DataT = TypeVar("DataT", Series, DataFrame)  # python 3.11+
+# --- global settings
+plt.style.use("fivethirtyeight")
+mpl.rcParams["font.size"] = 11
+# --- default settings
+class _DefaultValues(TypedDict):
+    """
+    _DefaultValues is a dictionary of default values for the settings.
+    It is a TypedDict, which means that it knows a fixed set of keys
+    and their corresponding types.
+    """
+    file_type: str
+    figsize: tuple[float, float]
+    file_dpi: int
+    line_narrow: float
+    line_normal: float
+    line_wide: float
+    bar_width: float
+    legend_font_size: float | str
+    legend: dict[str, Any]
+    colors: dict[int, list[str]]  # used by get_color_list()
+    chart_dir: str
+_mgplot_defaults = _DefaultValues(
+    file_type="png",
+    figsize=(9.0, 4.5),
+    file_dpi=300,
+    line_narrow=0.75,
+    line_normal=1.0,
+    line_wide=2.0,
+    bar_width=0.8,
+    legend_font_size="small",
+    legend={
+        "loc": "best",
+        "fontsize": "x-small",
+    },
+    colors={
+        1: ["#dd0000"],
+        5: ["royalblue", "darkorange", "forestgreen", "#dd0000", "gray"],
+        9: [
+            "darkblue",
+            "darkorange",
+            "forestgreen",
+            "#dd0000",
+            "purple",
+            "gold",
+            "lightcoral",
+            "lightseagreen",
+            "gray",
+        ],
+    },
+    chart_dir=".",
+)
+# --- get/change settings
+def get_setting(setting: str) -> Any:
+    """
+    Get a setting from the global settings.
+    Arguments:
+    - setting: str - name of the setting to get. The possible settings are:
+        - file_type: str - the file type to use for saving plots
+        - figsize: tuple[float, float] - the figure size to use for plots
+        - file_dpi: int - the DPI to use for saving plots
+        - line_narrow: float - the line width for narrow lines
+        - line_normal: float - the line width for normal lines
+        - line_wide: float - the line width for wide lines
+        - bar_width: float - the width of bars in bar plots
+        - legend_font_size: float | str - the font size for legends
+        - legend: dict[str, Any] - the legend settings
+        - colors: dict[int, list[str]] - a dictionary of colors for
+          different numbers of lines
+        - chart_dir: str - the directory to save charts in
+    Raises:
+        - KeyError: if the setting is not found
+    Returns:
+        - value: Any - the value of the setting
+    """
+    if setting not in _mgplot_defaults:
+        raise KeyError(f"Setting '{setting}' not found in _mgplot_defaults.")
+    return _mgplot_defaults[setting]  # type: ignore[literal-required]
+def set_setting(setting: str, value: Any) -> None:
+    """
+    Set a setting in the global settings.
+    Raises KeyError if the setting is not found.
+    Arguments:
+        - setting: str - name of the setting to set (see get_setting())
+        - value: Any - the value to set the setting to
+    """
+    if setting not in _mgplot_defaults:
+        raise KeyError(f"Setting '{setting}' not found in _mgplot_defaults.")
+    _mgplot_defaults[setting] = value  # type: ignore[literal-required]
+def clear_chart_dir() -> None:
+    """
+    Remove all graph-image files from the global chart_dir.
+    This is a convenience function to remove all files from the
+    chart_dir directory. It does not remove the directory itself.
+    Note: the function creates the directory if it does not exist.
+    """
+    chart_dir = get_setting("chart_dir")
+    Path(chart_dir).mkdir(parents=True, exist_ok=True)
+    for ext in ("png", "svg", "jpg", "jpeg"):
+        for fs_object in Path(chart_dir).glob(f"*.{ext}"):
+            if fs_object.is_file():
+                fs_object.unlink()
+def set_chart_dir(chart_dir: str) -> None:
+    """
+    A function to set a global chart directory for finalise_plot(),
+    so that it does not need to be included as an argument in each
+    call to finalise_plot(). Create the directory if it does not exist.
+    Note: Path.mkdir() may raise an exception if a directory cannot be created.
+    Note: This is a wrapper for set_setting() to set the chart_dir setting, and
+    create the directory if it does not exist.
+    Arguments:
+        - chart_dir: str - the directory to set as the chart directory
+    """
+    if not chart_dir:
+        chart_dir = "."  # avoid the empty string
+    Path(chart_dir).mkdir(parents=True, exist_ok=True)
+    set_setting("chart_dir", chart_dir)

mgplot/summary_plot.py ADDED Viewed

@@ -0,0 +1,240 @@
+"""
+summary_plot.py:
+Produce a summary plot for the data in a given DataFrame.
+The data is normalised to z-scores and scaled.
+"""
+# --- imports
+# system imports
+from typing import Any
+# from collections.abc import Sequence
+# analytic third-party imports
+from numpy import ndarray, array
+from matplotlib.pyplot import Axes, subplots
+from pandas import DataFrame, Period
+# local imports
+from mgplot.settings import DataT
+from mgplot.utilities import constrain_data, check_clean_timeseries
+from mgplot.kw_type_checking import (
+    report_kwargs,
+    ExpectedTypeDict,
+    validate_expected,
+    validate_kwargs,
+)
+# --- constants
+ZSCORES = "zscores"
+ZSCALED = "zscaled"
+SUMMARY_KW_TYPES: ExpectedTypeDict = {
+    "verbose": bool,
+    "middle": float,
+    "plot_type": str,
+    "plot_from": (int, Period, type(None)),
+}
+validate_expected(SUMMARY_KW_TYPES, "summary_plot")
+# --- functions
+def _calc_quantiles(middle: float) -> ndarray:
+    """Calculate the quantiles for the middle of the data."""
+    return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0])
+def _calculate_z(
+    original: DataFrame,  # only contains the data points of interest
+    middle: float,  # middle proportion of data to highlight (eg. 0.8)
+    verbose: bool = False,  # print the summary data
+) -> tuple[DataFrame, DataFrame]:
+    """Calculate z-scores, scaled z-scores and middle quantiles.
+    Return z_scores, z_scaled, q (which are the quantiles for the
+    start/end of the middle proportion of data to highlight)."""
+    # calculate z-scores, scaled scores and middle quantiles
+    z_scores: DataFrame = (original - original.mean()) / original.std()
+    z_scaled: DataFrame = (
+        # scale z-scores between -1 and +1
+        (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5)
+        * 2
+    )
+    q_middle = _calc_quantiles(middle)
+    if verbose:
+        frame = DataFrame(
+            {
+                "count": original.count(),
+                "mean": original.mean(),
+                "median": original.median(),
+                "min shaded": original.quantile(q=q_middle[0]),
+                "max shaded": original.quantile(q=q_middle[1]),
+                "z-scores": z_scores.iloc[-1],
+                "scaled": z_scaled.iloc[-1],
+            }
+        )
+        print(frame)
+    return DataFrame(z_scores), DataFrame(z_scaled)  # syntactic sugar for type hinting
+def _plot_middle_bars(
+    adjusted: DataFrame,
+    middle: float,
+    kwargs: dict[str, Any],  # must be a dictionary, not a splat
+) -> Axes:
+    """Plot the middle (typically 80%) of the data as a bar.
+    Note: also sets the x-axis limits in kwargs.
+    Return the matplotlib Axes object."""
+    q = _calc_quantiles(middle)
+    lo_hi: DataFrame = adjusted.quantile(q=q).T  # get the middle section of data
+    span = 1.15
+    space = 0.2
+    low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space
+    high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space
+    kwargs["xlim"] = (low, high)  # remember the x-axis limits
+    _fig, ax = subplots()
+    ax.barh(
+        y=lo_hi.index,
+        width=lo_hi[q[1]] - lo_hi[q[0]],
+        left=lo_hi[q[0]],
+        color="#bbbbbb",
+        label=f"Middle {middle*100:0.0f}% of prints",
+    )
+    return ax
+def _plot_latest_datapoint(
+    ax: Axes,
+    original: DataFrame,
+    adjusted: DataFrame,
+    f_size: int,
+) -> None:
+    """Add the latest datapoints to the summary plot"""
+    ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest")
+    f_size = 10
+    row = adjusted.index[-1]
+    for col_num, col_name in enumerate(original.columns):
+        ax.text(
+            x=adjusted.at[row, col_name],
+            y=col_num,
+            s=f"{original.at[row, col_name]:.1f}",
+            ha="center",
+            va="center",
+            size=f_size,
+        )
+def _label_extremes(
+    ax: Axes,
+    data: tuple[DataFrame, DataFrame],
+    plot_type: str,
+    f_size: int,
+    kwargs: dict[str, Any],  # must be a dictionary, not a splat
+) -> None:
+    """Label the extremes in the scaled plots."""
+    original, adjusted = data
+    low, high = kwargs["xlim"]
+    if plot_type == ZSCALED:
+        ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--")
+        ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--")
+        ax.scatter(
+            adjusted.median(),
+            adjusted.columns,
+            color="darkorchid",
+            marker="x",
+            s=5,
+            label="Median",
+        )
+        for col_num, col_name in enumerate(original.columns):
+            ax.text(
+                low,
+                col_num,
+                f" {original[col_name].min():.1f}",
+                ha="left",
+                va="center",
+                size=f_size,
+            )
+            ax.text(
+                high,
+                col_num,
+                f"{original[col_name].max():.1f} ",
+                ha="right",
+                va="center",
+                size=f_size,
+            )
+def _horizontal_bar_plot(
+    original: DataFrame,
+    adjusted: DataFrame,
+    middle: float,
+    plot_type: str,
+    kwargs: dict[str, Any],  # must be a dictionary, not a splat
+) -> Axes:
+    """Plot horizontal bars for the middle of the data."""
+    # kwargs is a dictionary, not a splat
+    # so that we can pass it to the Axes object and
+    # set the x-axis limits.
+    ax = _plot_middle_bars(adjusted, middle, kwargs)
+    f_size = 10
+    _plot_latest_datapoint(ax, original, adjusted, f_size)
+    _label_extremes(
+        ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs
+    )
+    return ax
+# public
+def summary_plot(
+    data: DataT,  # summary data
+    **kwargs,
+) -> Axes:
+    """Plot a summary of historical data for a given DataFrame.
+    Args:
+    - summary: DataFrame containing the summary data. The column names are
+      used as labels for the plot.
+    - kwargs: additional arguments for the plot, including:
+        - plot_from: int | Period | None
+        - verbose: if True, print the summary data.
+        - middle: proportion of data to highlight (default is 0.8).
+        - plot_types: list of plot types to generate.
+    Returns Axes.
+    """
+    # --- sanity checks
+    data = check_clean_timeseries(data)
+    if not isinstance(data, DataFrame):
+        raise TypeError("data must be a pandas DataFrame for summary_plot()")
+    df = DataFrame(data)  # syntactic sugar for type hinting
+    # --- check the arguments
+    report_kwargs("summary_plot", **kwargs)
+    validate_kwargs(SUMMARY_KW_TYPES, "summary_plot", **kwargs)
+    # --- optional arguments
+    verbose = kwargs.pop("verbose", False)
+    middle = float(kwargs.pop("middle", 0.8))
+    plot_type = kwargs.pop("plot_type", ZSCORES)
+    # get the data, calculate z-scores and scaled scores based on the start period
+    subset, kwargs = constrain_data(df, **kwargs)
+    z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose)
+    # plot as required by the plot_types argument
+    adjusted = z_scores if plot_type == ZSCORES else z_scaled
+    ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs)
+    ax.tick_params(axis="y", labelsize="small")
+    ax.set_xlim(kwargs.get("xlim", None))  # provide space for the labels
+    return ax

mgplot/test.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+test.py
+Used in the testing of mgplot modules.
+This module is not intended to be used directly by the user.
+"""
+# --- imports
+from mgplot.settings import set_chart_dir, clear_chart_dir
+# --- constants
+TEST_CHART_DIR = "./zz-test-charts/"
+# --- functions
+def prepare_for_test(subdirectory: str = "unnamed") -> None:
+    """
+    Prepare the chart directory to receive test plot output.
+    Create the directory if it does not exist.
+    Set the chart_dir to the test directory.
+    Arguments:
+    - subdirectory: str - the subdirectory to create
+      in the test directory
+    """
+    test_chart_dir = f"{TEST_CHART_DIR}{subdirectory}"
+    set_chart_dir(str(test_chart_dir))
+    clear_chart_dir()