PyPI - mgplot - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mgplot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

mgplot/__init__.py +121 -0
mgplot/bar_plot.py +107 -0
mgplot/colors.py +199 -0
mgplot/date_utils.py +324 -0
mgplot/finalise_plot.py +335 -0
mgplot/finalisers.py +364 -0
mgplot/growth_plot.py +275 -0
mgplot/kw_type_checking.py +460 -0
mgplot/line_plot.py +178 -0
mgplot/multi_plot.py +339 -0
mgplot/postcovid_plot.py +106 -0
mgplot/py.typed +1 -0
mgplot/revision_plot.py +60 -0
mgplot/run_plot.py +182 -0
mgplot/seastrend_plot.py +74 -0
mgplot/settings.py +164 -0
mgplot/summary_plot.py +240 -0
mgplot/test.py +31 -0
mgplot/utilities.py +254 -0
mgplot-0.1.0.dist-info/METADATA +53 -0
mgplot-0.1.0.dist-info/RECORD +24 -0
mgplot-0.1.0.dist-info/WHEEL +5 -0
mgplot-0.1.0.dist-info/licenses/LICENSE +8 -0
mgplot-0.1.0.dist-info/top_level.txt +1 -0

mgplot/multi_plot.py ADDED Viewed

@@ -0,0 +1,339 @@
+"""
+multi_plot.py
+This module provides a function to create multiple plots
+from a single dataset
+- multi_start()
+- multi_column()
+And to chain a plotting function with the finalise_plot() function.
+- plot_then_finalise()
+Underlying assumptions:
+- every plot function:
+    - has a mandatory data: DataFrame | Series argument first (noting
+      that some plotting functions only work with Series data, and they
+      will raise an error if they are passed a DataFrame).
+    - accepts an optional plot_from: int | Period keyword argument
+    - returns a matplotlib Axes object
+- the multi functions (all in this module)
+    - have a mandatory data: DataFrame | Series argument
+    - have a mandatory function: Callable | list[Callable] argument
+        and otherwise pass their kwargs to the next function
+        when execution is transferred to the next function.
+    - the multi functions can be chained together.
+    - return None.
+And why are these three public functions all in the same modules?
+- They all work with the same underlying assumptions.
+- They all take a function argument/list to which execution is
+  passed.
+- They all use the same underlying logic to extract the first
+  function from the function argument, and to store any remaining
+  functions in the kwargs['function'] argument.
+Note: rather than pass the kwargs dict directly, we will re-pack-it
+"""
+# --- imports
+from typing import Any, Callable, Final
+from collections.abc import Iterable
+from pandas import Period, DataFrame, Series, period_range
+from numpy import random
+from mgplot.kw_type_checking import (
+    limit_kwargs,
+    ExpectedTypeDict,
+    report_kwargs,
+)
+from mgplot.finalise_plot import finalise_plot, FINALISE_KW_TYPES
+from mgplot.settings import DataT
+from mgplot.test import prepare_for_test
+from mgplot.utilities import check_clean_timeseries
+from mgplot.line_plot import line_plot, LP_KW_TYPES
+from mgplot.bar_plot import bar_plot, BAR_PLOT_KW_TYPES
+from mgplot.seastrend_plot import seastrend_plot
+from mgplot.postcovid_plot import postcovid_plot
+from mgplot.revision_plot import revision_plot, REVISION_KW_TYPES
+from mgplot.run_plot import run_plot, RUN_KW_TYPES
+from mgplot.summary_plot import summary_plot, SUMMARY_KW_TYPES
+from mgplot.growth_plot import series_growth_plot, raw_growth_plot, GROWTH_KW_TYPES
+# --- constants
+EXPECTED_CALLABLES: Final[dict[Callable, ExpectedTypeDict]] = {
+    # used by plot_then_finalise() to (1) check the target function
+    # is one of the expected functions, and (2) to limit the kwargs
+    # passed on, to the expected keyword arguments for that function.
+    line_plot: LP_KW_TYPES,
+    bar_plot: BAR_PLOT_KW_TYPES,
+    seastrend_plot: LP_KW_TYPES,  # just calls line_plot under the hood
+    postcovid_plot: LP_KW_TYPES,  # just calls line_plot under the hood
+    revision_plot: REVISION_KW_TYPES,
+    run_plot: LP_KW_TYPES | RUN_KW_TYPES,
+    summary_plot: SUMMARY_KW_TYPES,
+    series_growth_plot: GROWTH_KW_TYPES,
+    raw_growth_plot: GROWTH_KW_TYPES,
+}
+# --- private functions
+def first_unchain(
+    function: Callable | list[Callable],
+    **kwargs,
+) -> tuple[Callable, dict[str, Any]]:
+    """
+    Extract the first Callable from function (which may be
+    a stand alone Callable or a nonr-empty list of Callables).
+    Store the remaining Callables in kwargs['function'].
+    This allows for chaining multiple functions together.
+    Parameters
+    - kwargs - keyword arguments
+    Returns a tuple containing the first function and the updated kwargs.
+    if function is a list of Callables, the first function will be removed
+    from the the list, and the remaining functions will be stored in a
+    list under the key "function" in kwargs.
+    Raises ValueError if function is an empty list.
+    Not intended for direct use by the user.
+    """
+    error_msg = "function must be a Callable or a non-empty list of Callables"
+    if isinstance(function, list):
+        if len(function) == 0:
+            raise ValueError(error_msg)
+        first, *rest = function
+    elif callable(function):
+        first, rest = function, []
+    else:
+        raise ValueError(error_msg)
+    if rest:
+        kwargs["function"] = rest
+    return first, kwargs
+# --- public functions
+def plot_then_finalise(
+    data: DataT,
+    function: Callable | list[Callable],
+    **kwargs,
+) -> None:
+    """
+    Chain a plotting function with the finalise_plot() function.
+    This is designed to be the last function in a chain.
+    Parameters
+    - data: Series | DataFrame - The data to be plotted.
+    - function: Callable | list[Callable] - The plotting function
+      to be used.
+    - **kwargs: Additional keyword arguments to be passed to
+      the plotting function, and then the finalise_plot() function.
+    Returns None.
+    """
+    # --- sanity checks
+    report_kwargs(called_from="plot_then_finalise", **kwargs)
+    data = check_clean_timeseries(data)
+    # --- check the function argument
+    first, kwargs_ = first_unchain(function, **kwargs)
+    if first in EXPECTED_CALLABLES:
+        expected = EXPECTED_CALLABLES[first]
+        plot_kwargs = limit_kwargs(expected, **kwargs)
+    else:
+        # this is an unexpected Callable, so we will give it a try
+        print(f"Unknown proposed function: {first}; nonetheless, will give it a try.")
+        plot_kwargs = kwargs_
+    # --- call the first function with the data and kwargs
+    axes = first(data, **plot_kwargs)
+    # --- finalise the plot
+    fp_kwargs = limit_kwargs(FINALISE_KW_TYPES, **kwargs)
+    finalise_plot(axes, **fp_kwargs)
+def multi_start(
+    data: DataT,
+    function: Callable | list[Callable],
+    starts: Iterable[None | Period | int],
+    **kwargs,
+) -> None:
+    """
+    Create multiple plots with different starting points.
+    Each plot will start from the specified starting point.
+    Parameters
+    - data: Series | DataFrame - The data to be plotted.
+    - function: Callable | list[Callable] - The plotting function
+      to be used.
+    - starts: Iterable[Period | int | None] - The starting points
+      for each plot (None means use the entire data).
+    - **kwargs: Additional keyword arguments to be passed to
+      the plotting function.
+    Returns None.
+    Raises
+    - ValueError if the starts is not an iterable of None, Period or int.
+    Note: kwargs['tag'] is used to create a unique tag for each plot.
+    """
+    # --- sanity checks
+    report_kwargs(called_from="multi_start", **kwargs)
+    data = check_clean_timeseries(data)
+    if not isinstance(starts, Iterable):
+        raise ValueError("starts must be an iterable of None, Period or int")
+    # --- check the function argument
+    original_tag: Final[str] = kwargs.get("tag", "")
+    first, kwargs = first_unchain(function, **kwargs)
+    # --- iterate over the starts
+    for i, start in enumerate(starts):
+        kw = kwargs.copy()  # copy to avoid modifying the original kwargs
+        this_tag = f"{original_tag}_{i}"
+        kw["tag"] = this_tag
+        kw["plot_from"] = start  # rely on plotting function to constrain the data
+        first(data, **kw)
+def multi_column(
+    data: DataFrame,
+    function: Callable | list[Callable],
+    **kwargs,
+) -> None:
+    """
+    Create multiple plots, one for each column in a DataFrame.
+    The plot title will be the column name.
+    Parameters
+    - data: DataFrame - The data to be plotted.
+    - function: Callable - The plotting function to be used.
+    - **kwargs: Additional keyword arguments to be passed to
+      the plotting function.
+    Returns None.
+    """
+    # --- sanity checks
+    report_kwargs(called_from="multi_column", **kwargs)
+    data = check_clean_timeseries(data)
+    # --- check the function argument
+    title_stem = kwargs.get("title", "")
+    tag: Final[str] = kwargs.get("tag", "")
+    first, kwargs = first_unchain(function, **kwargs)
+    # --- iterate over the columns
+    for i, col in enumerate(data.columns):
+        series = data[[col]]
+        kwargs["title"] = f"{title_stem}{col}" if title_stem else col
+        this_tag = f"_{tag}_{i}".replace("__", "_")
+        kwargs["tag"] = this_tag
+        first(series, **kwargs)
+# --- test
+if __name__ == "__main__":
+    prepare_for_test("multi_plot")
+    dates = period_range("2020-01-01", "2020-12-31", freq="D")
+    df = DataFrame(
+        {
+            "Series 1": random.rand(len(dates)),
+            "Series 2": random.rand(len(dates)),
+            "Series 3": random.rand(len(dates)),
+        },
+        index=dates,
+    )
+    # Test multi_start
+    multi_start(
+        df,
+        function=[plot_then_finalise, line_plot],
+        starts=[None, 50, 100, Period("2020-06-01", freq="D")],
+        title="Test Multi Start: ",
+        tag="tag_test",
+    )
+    # Test multi_column
+    multi_column(
+        df, function=[plot_then_finalise, line_plot], title="Test Multi Column: "
+    )
+    # Test Test Multi Column / Multi start
+    multi_column(
+        df,
+        [multi_start, plot_then_finalise, line_plot],
+        title="Test Multi Column / Multi start: ",
+        starts=[None, 180],
+        verbose=False,
+    )
+    # bar plot
+    # Test 1
+    series_ = Series([1, 2, 3, 4, 5], index=list("ABCDE"))
+    plot_then_finalise(
+        series_,
+        function=bar_plot,
+        title="Bar Plot Example 1a",
+        xlabel="X-axis Label",
+        ylabel="Y-axis Label",
+        rotation=45,
+    )
+    multi_start(
+        series_,
+        function=[plot_then_finalise, bar_plot],
+        starts=[0, -2],
+        title="Multi-start Bar Plot Example 1b",
+        xlabel="X-axis Label",
+        ylabel="Y-axis Label",
+    )
+# --- test
+if __name__ == "__main__":
+    # --- check that this fails
+    try:
+        multi_start(
+            data=DataFrame(),
+            function=[plot_then_finalise],
+            starts=[0, 1],
+        )
+    except (ValueError, TypeError) as e:
+        print(f"Expected error: {e}")
+    # --- check that tjis fails
+    try:
+        multi_column(
+            data=Series([1, 2, 3]),  # type: ignore # Series is not a DataFrame
+            function=[plot_then_finalise],
+        )
+    except (ValueError, TypeError) as e:
+        print(f"Expected error: {e}")
+    # --- check that this fails
+    try:
+        plot_then_finalise(
+            data=Series([1, 2, 3]),
+            function=[],
+        )
+    except (ValueError, TypeError) as e:
+        print(f"Expected error: {e}")

mgplot/postcovid_plot.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""
+covid_recovery_plot.py
+Plot the pre-COVID trajectory against the current trend.
+"""
+# --- imports
+from pandas import DataFrame, Series, Period, PeriodIndex
+from matplotlib.pyplot import Axes
+from numpy import arange, polyfit
+from mgplot.settings import DataT, get_setting
+from mgplot.line_plot import line_plot
+from mgplot.utilities import check_clean_timeseries
+from mgplot.kw_type_checking import report_kwargs
+# --- constants
+WIDTH = "width"
+STYLE = "style"
+START_R = "start_r"
+END_R = "end_r"
+# --- functions
+def get_projection(original: Series, to_period: Period) -> Series:
+    """
+    Projection based on data from the start of a series
+    to the to_period (inclusive). Returns projection over the whole
+    period of the original series.
+    """
+    y_regress = original[original.index <= to_period].copy()
+    x_regress = arange(len(y_regress))
+    m, b = polyfit(x_regress, y_regress, 1)
+    x_complete = arange(len(original))
+    projection = Series((x_complete * m) + b, index=original.index)
+    return projection
+def postcovid_plot(data: DataT, **kwargs) -> Axes:
+    """
+    Plots a series with a PeriodIndex.
+    Arguments
+    - data - the series to be plotted (note that this function
+      is designed to work with a single series, not a DataFrame).
+    - **kwargs - same as for line_plot() and finalise_plot().
+    Raises:
+    - TypeError if series is not a pandas Series
+    - TypeError if series does not have a PeriodIndex
+    - ValueError if series does not have a D, M or Q frequency
+    - ValueError if regression start is after regression end
+    """
+    # --- sanity checks
+    report_kwargs(called_from="postcovid_plot", **kwargs)
+    data = check_clean_timeseries(data)
+    if not isinstance(data, Series):
+        raise TypeError("The series argument must be a pandas Series")
+    series: Series = data
+    series_index = PeriodIndex(series.index)  # syntactic sugar for type hinting
+    if series_index.freqstr[:1] not in ("Q", "M", "D"):
+        raise ValueError("The series index must have a D, M or Q freq")
+    # rely on line_plot() to validate kwargs
+    if "plot_from" in kwargs:
+        print("Warning: the 'plot_from' argument is ignored in postcovid_plot().")
+        del kwargs["plot_from"]
+    # --- plot COVID counterfactural
+    freq = PeriodIndex(series.index).freqstr  # syntactic sugar for type hinting
+    match freq[0]:
+        case "Q":
+            start_regression = Period("2014Q4", freq=freq)
+            end_regression = Period("2019Q4", freq=freq)
+        case "M":
+            start_regression = Period("2015-01", freq=freq)
+            end_regression = Period("2020-01", freq=freq)
+        case "D":
+            start_regression = Period("2015-01-01", freq=freq)
+            end_regression = Period("2020-01-01", freq=freq)
+    start_regression = Period(kwargs.pop("start_r", start_regression), freq=freq)
+    end_regression = Period(kwargs.pop("end_r", end_regression), freq=freq)
+    if start_regression >= end_regression:
+        raise ValueError("Start period must be before end period")
+    # --- combine data and projection
+    recent = series[series.index >= start_regression].copy()
+    recent.name = "Series"
+    projection = get_projection(recent, end_regression)
+    projection.name = "Pre-COVID projection"
+    data_set = DataFrame([projection, recent]).T
+    kwargs[WIDTH] = kwargs.pop(
+        WIDTH, [get_setting("line_normal"), get_setting("line_wide")]
+    )
+    kwargs[STYLE] = kwargs.pop(STYLE, ["--", "-"])
+    kwargs["legend"] = kwargs.pop("legend", True)
+    return line_plot(
+        data_set,
+        **kwargs,
+    )

mgplot/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+

mgplot/revision_plot.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""
+revision_plot.py
+Plot ABS revisions to estimates over time.
+"""
+# --- imports
+from pandas import Series
+from matplotlib.pyplot import Axes
+from mgplot.utilities import annotate_series, check_clean_timeseries
+from mgplot.line_plot import LP_KW_TYPES, line_plot
+from mgplot.kw_type_checking import validate_kwargs, validate_expected
+from mgplot.kw_type_checking import report_kwargs
+from mgplot.settings import DataT
+from mgplot.kw_type_checking import ExpectedTypeDict
+# --- constants
+ROUNDING = "rounding"
+REVISION_KW_TYPES: ExpectedTypeDict = {
+    ROUNDING: (int, bool),
+} | LP_KW_TYPES
+validate_expected(REVISION_KW_TYPES, "revision_plot")
+# --- functions
+def revision_plot(data: DataT, **kwargs) -> Axes:
+    """
+    Plot the revisions to ABS data.
+    Arguments
+    data: pd.DataFrame - the data to plot, the DataFrame has a
+        column for each data revision
+    recent: int - the number of recent data points to plot
+    kwargs : dict :
+        -   units: str - the units for the data (Note: you may need to
+            recalibrate the units for the y-axis)
+        -   rounding: int | bool - if True apply default rounding, otherwise
+            apply int rounding.
+    """
+    # --- sanity checks
+    data = check_clean_timeseries(data)
+    report_kwargs(called_from="revision_plot", **kwargs)
+    validate_kwargs(REVISION_KW_TYPES, "revision_plot", **kwargs)
+    # --- critical defaults
+    kwargs["plot_from"] = kwargs.get("plot_from", -18)
+    # --- plot
+    axes = line_plot(data, **kwargs)
+    # --- Annotate the last value in each series ...
+    rounding: int | bool = kwargs.pop(ROUNDING, True)
+    for c in data.columns:
+        col: Series = data.loc[:, c].dropna()
+        annotate_series(col, axes, color="#222222", rounding=rounding, fontsize="small")
+    return axes

mgplot/run_plot.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""
+run_plot.py
+This code contains a function to plot and highlighted
+the 'runs' in a series.
+"""
+# --- imports
+from collections.abc import Sequence
+from pandas import Series, concat
+from matplotlib.pyplot import Axes
+from matplotlib import patheffects as pe
+from mgplot.settings import DataT
+from mgplot.line_plot import line_plot
+from mgplot.kw_type_checking import (
+    limit_kwargs,
+    ExpectedTypeDict,
+    validate_kwargs,
+    validate_expected,
+    report_kwargs,
+)
+from mgplot.line_plot import LP_KW_TYPES
+from mgplot.utilities import constrain_data, check_clean_timeseries
+# --- constants
+THRESHOLD = "threshold"
+ROUND = "round"
+HIGHLIGHT = "highlight"
+DIRECTION = "direction"
+RUN_KW_TYPES: ExpectedTypeDict = {
+    THRESHOLD: float,
+    ROUND: int,
+    HIGHLIGHT: (str, Sequence, (str,)),  # colors for highlighting the runs
+    DIRECTION: str,  # "up", "down" or "both"
+}
+validate_expected(RUN_KW_TYPES, "run_highlight_plot")
+# --- functions
+def _identify_runs(
+    series: Series,
+    threshold: float,
+    up: bool,  # False means down
+) -> tuple[Series, Series]:
+    """Identify monotonic increasing/decreasing runs."""
+    diffed = series.diff()
+    change_points = concat(
+        [diffed[diffed.gt(threshold)], diffed[diffed.lt(-threshold)]]
+    ).sort_index()
+    if series.index[0] not in change_points.index:
+        starting_point = Series([0], index=[series.index[0]])
+        change_points = concat([change_points, starting_point]).sort_index()
+    facing = change_points > 0 if up else change_points < 0
+    cycles = (facing & ~facing.shift().astype(bool)).cumsum()
+    return cycles[facing], change_points
+def _plot_runs(
+    axes: Axes,
+    series: Series,
+    up: bool,
+    **kwargs,
+) -> None:
+    """Highlight the runs of a series."""
+    threshold = kwargs[THRESHOLD]
+    match kwargs.get(HIGHLIGHT):  # make sure highlight is a color string
+        case str():
+            highlight = kwargs.get(HIGHLIGHT)
+        case Sequence():
+            highlight = kwargs[HIGHLIGHT][0] if up else kwargs[HIGHLIGHT][1]
+        case _:
+            raise ValueError(
+                f"Invalid type for highlight: {type(kwargs.get(HIGHLIGHT))}. "
+                "Expected str or Sequence."
+            )
+    # highlight the runs
+    stretches, change_points = _identify_runs(series, threshold, up=up)
+    for k in range(1, stretches.max() + 1):
+        stretch = stretches[stretches == k]
+        axes.axvspan(
+            stretch.index.min(),
+            stretch.index.max(),
+            color=highlight,
+            zorder=-1,
+        )
+        space_above = series.max() - series[stretch.index].max()
+        space_below = series[stretch.index].min() - series.min()
+        y_pos, vert_align = (
+            (series.max(), "top")
+            if space_above > space_below
+            else (series.min(), "bottom")
+        )
+        text = axes.text(
+            x=stretch.index.min(),
+            y=y_pos,
+            s=(
+                change_points[stretch.index].sum().round(kwargs["round"]).astype(str)
+                + " pp"
+            ),
+            va=vert_align,
+            ha="left",
+            fontsize="small",
+            rotation=90,
+        )
+        text.set_path_effects([pe.withStroke(linewidth=5, foreground="w")])
+def run_plot(series: DataT, **kwargs) -> Axes:
+    """Plot a series of percentage rates, highlighting the increasing runs.
+    Arguments
+     - series - ordered pandas Series of percentages, with PeriodIndex
+     - **kwargs
+        - threshold - float - used to ignore micro noise near zero
+          (for example, threshhold=0.01)
+        - round - int - rounding for highlight text
+        - highlight - str or Sequence[str] - color(s) for highlighting the
+          runs, two colors can be specified in a list if direction is "both"
+        - direction - str - whether the highlight is for an upward
+          or downward or both runs. Options are "up", "down" or "both".
+        - in addition the **kwargs for line_plot are accepted.
+    Return
+     - matplotlib Axes object"""
+    # --- sanity checks
+    series = check_clean_timeseries(series)
+    if not isinstance(series, Series):
+        raise TypeError("series must be a pandas Series for run_plot()")
+    series, kwargs = constrain_data(series, **kwargs)
+    # --- check the kwargs
+    report_kwargs(called_from="run_plot", **kwargs)
+    expected = RUN_KW_TYPES | LP_KW_TYPES
+    validate_kwargs(expected, "run_plot", **kwargs)
+    # --- default arguments - in **kwargs
+    kwargs[THRESHOLD] = kwargs.get(THRESHOLD, 0.1)
+    kwargs[ROUND] = kwargs.get(ROUND, 2)
+    direct = kwargs[DIRECTION] = kwargs.get(DIRECTION, "up")
+    kwargs[HIGHLIGHT], kwargs["color"] = (
+        (kwargs.get(HIGHLIGHT, "gold"), kwargs.get("color", "#dd0000"))
+        if direct == "up"
+        else (
+            (kwargs.get(HIGHLIGHT, "skyblue"), kwargs.get("color", "navy"))
+            if direct == "down"
+            else (
+                kwargs.get(HIGHLIGHT, ("gold", "skyblue")),
+                kwargs.get("color", "navy"),
+            )
+        )
+    )
+    # defauls for line_plot
+    kwargs["width"] = kwargs.get("width", 2)
+    # plot the line
+    kwargs["drawstyle"] = kwargs.get("drawstyle", "steps-post")
+    lp_kwargs = limit_kwargs(LP_KW_TYPES, **kwargs)
+    axes = line_plot(series, **lp_kwargs)
+    # plot the runs
+    match kwargs[DIRECTION]:
+        case "up":
+            _plot_runs(axes, series, up=True, **kwargs)
+        case "down":
+            _plot_runs(axes, series, up=False, **kwargs)
+        case "both":
+            _plot_runs(axes, series, up=True, **kwargs)
+            _plot_runs(axes, series, up=False, **kwargs)
+        case _:
+            raise ValueError(
+                f"Invalid value for direction: {kwargs[DIRECTION]}. "
+                "Expected 'up', 'down', or 'both'."
+            )
+    return axes