PyPI - plot-misc - Versions diffs - 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl - Mend

plot-misc 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

plot_misc/_version.py +1 -1
plot_misc/barchart.py +71 -29
plot_misc/constants.py +4 -4
plot_misc/errors.py +3 -3
plot_misc/example_data/examples.py +232 -0
plot_misc/forest.py +47 -47
plot_misc/heatmap.py +17 -20
plot_misc/incidencematrix.py +6 -6
plot_misc/machine_learning.py +28 -28
plot_misc/piechart.py +9 -9
plot_misc/survival.py +6 -7
plot_misc/utils/formatting.py +10 -13
plot_misc/utils/utils.py +26 -27
plot_misc/volcano.py +7 -7
{plot_misc-2.2.0.dist-info → plot_misc-2.2.1.dist-info}/METADATA +14 -5
{plot_misc-2.2.0.dist-info → plot_misc-2.2.1.dist-info}/RECORD +19 -20
{plot_misc-2.2.0.dist-info → plot_misc-2.2.1.dist-info}/WHEEL +1 -1
plot_misc/example_data/example_datasets/string_data.txt +0 -1
{plot_misc-2.2.0.dist-info → plot_misc-2.2.1.dist-info}/licenses/LICENSE +0 -0
{plot_misc-2.2.0.dist-info → plot_misc-2.2.1.dist-info}/top_level.txt +0 -0

plot_misc/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.2.0'
1	+ __version__ = '2.2.1'

plot_misc/barchart.py CHANGED Viewed

@@ -30,11 +30,12 @@ from plot_misc.errors import (
     is_df,
     Error_MSG,
 )
-from typing import Any, Optional
+from typing import Any
 from plot_misc.constants import Real
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 def bar(data:pd.DataFrame, label:str, column:str,
+        positions:np.ndarray | list[Real] | None = None,
         error_max:str | None = None, error_min:str | None = None,
         colours:list[str]=['tab:blue', 'tab:pink'], transparency:float=0.7,
         wd:Real=1.0, edgecolour:str='black',
@@ -54,9 +55,13 @@ def bar(data:pd.DataFrame, label:str, column:str,
         The column name for the axes labels.
     column : `str`
         The column name for the bar height values.
-    error_max : `str`, default `NoneType`
+    positions : `np.ndarray` or `list` or `None`, default `None`
+        Numeric positions for the bars along the category axis. If None,
+        bars are placed at integer positions 0, 1, 2, ... with tick
+        labels taken from the `label` column.
+    error_max : `str`, default `None`
         column name for the upper value of the error line segment.
-    error_min : `str`, default `NoneType`
+    error_min : `str`, default `None`
         column name for the lower value of the error line segment.
     colours : `list` [`str`], default ['tab:blue', 'tab:pink']
         Colours for the bars; recycled if shorter than the number of bars.
@@ -68,7 +73,7 @@ def bar(data:pd.DataFrame, label:str, column:str,
         The bar edgecolour.
     horizontal : `bool`, default `False`
         Whether plot a horizontal bar chart.
-    ax : `plt.ax`, default `NoneType`
+    ax : `plt.ax`, default `None`
         The pyplot.axes object.
     figsize : `tuple` [`float`, `float`], default (2, 2),
         The figure size in inches, when ax is set to None.
@@ -96,6 +101,7 @@ def bar(data:pd.DataFrame, label:str, column:str,
     is_df(data)
     is_type(label, str)
     is_type(column, str)
+    is_type(positions, (type(None), np.ndarray, list))
     is_type(colours, list)
     is_type(transparency, float)
     is_type(wd, (float, int))
@@ -117,10 +123,19 @@ def bar(data:pd.DataFrame, label:str, column:str,
     # ### check input
     if any(data.isna().any()):
         raise ValueError(Error_MSG.MISSING_DF.format('data'))
-    # ### get labels
+    # ### get labels and positions
     labels = data[label]
+    if positions is None:
+        pos = np.arange(len(labels))
+    else:
+        pos = np.asarray(positions)
+        if len(pos) != len(labels):
+            raise ValueError(
+                f'Length of positions ({len(pos)}) does not match '
+                f'number of rows in data ({len(labels)}).'
+            )
     # ### plotting
-    if horizontal == False:
+    if not horizontal:
         # plotting vertical bar chart
         new_kwargs = _update_kwargs(update_dict=kwargs_bar,
                                     edgecolor=edgecolour,
@@ -128,8 +143,10 @@ def bar(data:pd.DataFrame, label:str, column:str,
                                     alpha=transparency,
                                     zorder=2,
                                     )
-        bars = ax.bar(labels, height=data[column], **new_kwargs,
+        bars = ax.bar(pos, height=data[column], **new_kwargs,
                       )
+        ax.set_xticks(pos)
+        ax.set_xticklabels(labels)
     else:
         # plotting horizontal bar chart
         new_kwargs = _update_kwargs(update_dict=kwargs_bar,
@@ -138,13 +155,15 @@ def bar(data:pd.DataFrame, label:str, column:str,
                                     alpha=transparency,
                                     zorder=2,
                                     )
-        bars = ax.barh(labels, width=data[column], **new_kwargs,
+        bars = ax.barh(pos, width=data[column], **new_kwargs,
                        )
+        ax.set_yticks(pos)
+        ax.set_yticklabels(labels)
     # do we need to plot error bars
     if error_min is not None or error_max is not None:
         # finding the mid points of the bars and
         # initialising the bounds, allowing for one-sided limits.
-        if horizontal == False:
+        if not horizontal:
             min_l = [b.get_y() + b.get_height() for b in bars]
             max_l = min_l.copy()
         else:
@@ -164,7 +183,7 @@ def bar(data:pd.DataFrame, label:str, column:str,
                                     color='black',
                                     zorder=1,
                                     )
-        if horizontal == False:
+        if not horizontal:
             mids = [b.get_x() + b.get_width() / 2 for b in bars]
             ax.vlines(mids, min_l, max_l, **new_kwargs_error,)
         else:
@@ -178,10 +197,11 @@ def bar(data:pd.DataFrame, label:str, column:str,
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
+              positions:np.ndarray | list[Real] | None = None,
               colours:list[str]=['tab:blue', 'tab:pink'],
               transparency:float=0.7, wd:Real=1.0, edgecolour:str='black',
               horizontal:bool = False, figsize:tuple[Real,Real] = (2,2),
-              ax:plt.Axes | None = None, **kwargs:Optional[Any],
+              ax:plt.Axes | None = None, **kwargs:Any,
               ) -> tuple[plt.Figure, plt.Axes]:
     """
     Plot a stacked bar chart with each bar divided into segments.
@@ -194,6 +214,9 @@ def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
         Column name used for bar labels.
     columns : `list` [`str`]
         Column names representing bar segments to stack.
+    positions : `np.ndarray` or `list` or `None`, default `None`
+        Numeric positions for the bars along the category axis. If None,
+        bars are placed at integer positions 0, 1, 2, ...
     colours : `list` [`str`]
         List of colours for each stack segment.
     transparency : `float`, default 0.7
@@ -204,7 +227,7 @@ def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
         Colour for bar borders.
     horizontal : `bool`, default `False`
         Whether plot a horizontal barchart.
-    ax : `plt.ax`, default `NoneType`
+    ax : `plt.ax`, default `None`
         The pyplot.axes object.
     figsize : `tuple` [`float`, `float`], default (2, 2),
         The figure size in inches, when ax is set to None.
@@ -222,6 +245,7 @@ def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
     is_df(data)
     is_type(label, str)
     is_type(columns, list)
+    is_type(positions, (type(None), np.ndarray, list))
     is_type(colours, list)
     is_type(transparency, float)
     is_type(wd, (float, int))
@@ -254,17 +278,17 @@ def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
                                     color=colours[idx],
                                     alpha=transparency,
                                     )
-        if horizontal == False:
+        if not horizontal:
             new_kwargs = _update_kwargs(new_kwargs, bottom=left,
                                         )
         else:
             new_kwargs = _update_kwargs(new_kwargs, left=left,
                                         )
         # The actual plotting
-        # NOTE adding wd here because it bar assigns it to either width or
+        # NOTE adding wd here because bar assigns it to either width or
         # height depending on horizontal.
-        _, ax = bar(data=data, label=label, column=name, horizontal=horizontal,
-                    wd=wd, ax=ax, kwargs_bar=new_kwargs,
+        _, ax = bar(data=data, label=label, column=name, positions=positions,
+                    horizontal=horizontal, wd=wd, ax=ax, kwargs_bar=new_kwargs,
                     )
         # updating the coordinate where the last bar stops
         left = left + data[name]
@@ -277,6 +301,7 @@ def stack_bar(data:pd.DataFrame, label:str, columns:list[str],
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
                  subtotal_col: str | None = None,
+                 positions:np.ndarray | list[Real] | None = None,
                  colours:tuple[str,str]=('grey','tab:blue'),
                  transparency:tuple[float,float]=(0.7,0.9),
                  wd:tuple[float,float]=(1,0.6),
@@ -299,8 +324,11 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
         Column name for axis labels.
     total_col : `str`
         Column containing values for the base (total) bars.
-    subtotal_col : `str` or `None`, default `NoneType`
+    subtotal_col : `str` or `None`, default `None`
         Column containing values for (smaller) overlaid subtotal bars.
+    positions : `np.ndarray` or `list` or `None`, default `None`
+        Numeric positions for the bars along the category axis. If None,
+        bars are placed at integer positions 0, 1, 2, ...
     colours : `tuple` [`str`,`str`], default ("grey", "tab:blue")
         Colours for the total and subtotal bars.
     transparency : `tuple` [`float`,`float`], default (0.7, 0.9)
@@ -339,6 +367,7 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
     is_type(ax, (type(None), plt.Axes))
     is_type(total_col, str)
     is_type(subtotal_col, (str, type(None)))
+    is_type(positions, (type(None), np.ndarray, list))
     is_type(zorder, tuple)
     is_type(colours, tuple)
     is_type(transparency, tuple)
@@ -370,6 +399,7 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
         ax=ax,
         label=label,
         column=total_col,
+        positions=positions,
         colours=[colours[0]],
         transparency=transparency[0],
         wd=wd[0],
@@ -378,7 +408,7 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
         kwargs_bar=new_total_kwargs_bar,
     )
     # plot subtotal
-    if not subtotal_col is None:
+    if subtotal_col is not None:
         subtotal = data[subtotal_col]
         # updating kwargs
         new_subtotal_kwargs_bar = _update_kwargs(
@@ -390,6 +420,7 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
             ax=ax,
             label=label,
             column=subtotal_col,
+            positions=positions,
             colours=[colours[1]],
             transparency=transparency[1],
             wd=wd[1],
@@ -405,6 +436,7 @@ def subtotal_bar(data:pd.DataFrame, label:str, total_col:str,
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 def group_bar(data:pd.DataFrame, label:str, columns:list[str],
+              group_positions:np.ndarray | list[Real] | None = None,
               errors_max:list[str] | None = None,
               errors_min:list[str] | None = None,
               colours:list[str]=['tab:blue', 'tab:pink'],
@@ -433,10 +465,14 @@ def group_bar(data:pd.DataFrame, label:str, columns:list[str],
         Column name for group labels.
     column : `list` [`str`]
         Value columns to plot as grouped bars.
-    errors_max : `list` [`str`] or `None`, default `NoneType`
+    group_positions : `np.ndarray` or `list` or `None`, default `None`
+        Numeric positions for the group centres along the category axis.
+        If None, groups are placed at positions determined by
+        `group_spacing` (0, group_spacing, 2*group_spacing, ...).
+    errors_max : `list` [`str`] or `None`, default `None`
         Column names in `data` containing the upper values of the error bars.
         Should be structured similarly to `columns` if used.
-    errors_min : `list` [`str`] or `None` default `NoneType`
+    errors_min : `list` [`str`] or `None` default `None`
         Column names in `data` containing the lower values of the error bars.
     colours : `list` [`str`], default ['tab:blue', 'tab:pink']
         Colours for the bars. Recycled if fewer colours than `columns`.
@@ -448,7 +484,7 @@ def group_bar(data:pd.DataFrame, label:str, columns:list[str],
         The bar edge colours.
     horizontal : `bool`, default `False`
         Whether plot a horizontal barchart.
-    ax : `plt.ax`, default `NoneType`
+    ax : `plt.ax`, default `None`
         The pyplot.axes object.
     figsize : `tuple` [`float`, `float`], default (2, 2),
         The figure size in inches, when ax is set to None.
@@ -464,11 +500,10 @@ def group_bar(data:pd.DataFrame, label:str, columns:list[str],
     ax : plt.Axes
         The matplotlib Axes object with the plot.
     """
-    # constants
-    OFFSET_COL = "__offset__"
     # check input - most will be done by bar, just keeping the minimum
     is_df(data)
     is_type(columns, list)
+    is_type(group_positions, (type(None), np.ndarray, list))
     is_type(errors_max, (type(None),list))
     is_type(errors_min, (type(None),list))
     is_type(horizontal, bool)
@@ -483,8 +518,16 @@ def group_bar(data:pd.DataFrame, label:str, columns:list[str],
     # ### prepare the loop
     # the number of bars for each group
     n_bars = len(columns)
-    # the number of groups
-    base = np.arange(data.shape[0]) * group_spacing
+    # the base position of each group
+    if group_positions is None:
+        base = np.arange(data.shape[0]) * group_spacing
+    else:
+        base = np.asarray(group_positions)
+        if len(base) != data.shape[0]:
+            raise ValueError(
+                f'Length of group_positions ({len(base)}) does not '
+                f'match number of rows in data ({data.shape[0]}).'
+            )
     # the total width of all the bars in a single group
     spacing_per_bar = bar_spacing * wd
     total_spacing = spacing_per_bar * (n_bars - 1)
@@ -494,20 +537,19 @@ def group_bar(data:pd.DataFrame, label:str, columns:list[str],
     group_width = wd * n_bars + total_spacing
     tick_pos = base + (group_width - wd) / 2
     # looping
-    df_offset = data.copy()
     for i, column in enumerate(columns):
         # the location of the bar
         offset = base + i * (wd + spacing_per_bar)
-        df_offset[OFFSET_COL] = offset
         # cycling the colours
         col = colours[i % len(colours)]
         # the limits
         err_max = errors_max[i] if errors_max else None
         err_min = errors_min[i] if errors_min else None
         _ = bar(
-            data=df_offset,
-            label=OFFSET_COL,
+            data=data,
+            label=label,
             column=column,
+            positions=offset,
             error_max=err_max,
             error_min=err_min,
             colours=[col],

plot_misc/constants.py CHANGED Viewed

@@ -42,9 +42,9 @@ class ForestNames(object):
     PVALUE                         = 'p-value'
     CI                             = 'confidence_interval'
     data_table                     = 'data_table'
-    EmpericalSupport_Coverage      = 'coverage'
-    EmpericalSupport_Compatability = 'compatibility'
-    EmpericalSupportResults        = 'results_'
+    EmpiricalSupport_Coverage      = 'coverage'
+    EmpiricalSupport_Compatibility = 'compatibility'
+    EmpiricalSupportResults        = 'results_'
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Utils Names
@@ -103,7 +103,7 @@ class NamesIncidenceMatrix(object):
     GRID_POS_O   = 'outline'
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-class NamesMachineLearnig(object):
+class NamesMachineLearning(object):
     '''
     Names used by machinelearning.py
     '''

plot_misc/errors.py CHANGED Viewed

@@ -95,7 +95,7 @@ def is_type(param: Any, types: tuple[Type] | Type,
         Expected type(s) of the object.
     param_name : `str` or `None`
         Name of the parameter. Will attempt to infer the parameter name if set
-        to `NoneType`.
+        to `None`.
     Returns
     -------
@@ -185,7 +185,7 @@ def are_columns_in_df(
     missing_columns = expected_columns_set - set(df.columns)
     # return
     if missing_columns:
-        if warning == False:
+        if not warning:
             raise InputValidationError(
                 message.format(missing_columns)
             )
@@ -273,7 +273,7 @@ def same_len(object1: Any, object2: Any, object_names: list[str] | None = None,
     if object_names is None:
         object_names = ['object1', 'object2']
     elif len(object_names) !=2:
-        raise ValueError('`object_names` should be `NoneType` or contain '
+        raise ValueError('`object_names` should be `None` or contain '
                          'two strings')
     # the actual test
     if n1 != n2:

plot_misc/example_data/examples.py CHANGED Viewed

@@ -74,6 +74,7 @@ import os
 import re
 import pandas as pd
 import numpy as np
+import matplotlib.lines as mlines
 from plot_misc.constants import (
     UtilsNames,
     ForestNames,
@@ -724,3 +725,234 @@ def load_survival_table(**kwargs):
     # return
     return table
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_forest_preprocessed(**kwargs):
+    """
+    Loads the forest example data with subgroup colour and model shape
+    columns attached, ready for direct use by the forest plot.
+    Returns
+    -------
+    pd.DataFrame
+    """
+    # mapping literals (mirrors resources/examples/forestplot.ipynb cell 2)
+    col_dict = {
+        'wo T2DM/CVD': 'orangered',
+        'w T2DM': 'blueviolet',
+        'w T2DM & CVD': 'limegreen',
+    }
+    shape_dict = {
+        'PGS only': 'o',
+        'PGS plus': 's',
+        'PGS extended': 'H',
+    }
+    # base data (already carries the hardcoded ForestNames.y_col)
+    df = load_forest_data(**kwargs)
+    # attach colour and shape columns
+    df['col'] = df['subgroup_name'].map(col_dict)
+    df['shape'] = df['model'].map(shape_dict)
+    # return
+    return df
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_barchart_preprocessed(**kwargs):
+    """
+    Loads data counting the number of associations between cardiac chambers
+    (`LV`, `RV`, `LA`) and cardiac outcomes.
+    Returns
+    -------
+    pd.DataFrame
+    """
+    # files
+    data = load_barchart_data(**kwargs).T
+    data['labels'] = data.index
+    data = data.loc[["Heart failure", "HCM", "DCM", "AF"]]
+    # return
+    return data
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_groupbar_preprocessed(**kwargs):
+    """
+    Loads data representing mean and SD percentage of sarcomere disruption
+    per knockdown gene and control in iPS-CM
+    Returns
+    -------
+    pd.DataFrame
+    """
+    # files
+    data = load_groupbar_data(**kwargs)
+    # compute max-error columns from mean + std
+    for gene in ['Control', 'AP4S1', 'LRRC39', 'ZFAND4']:
+        data[f'{gene}_max'] = data[f'{gene}_mean'] + data[f'{gene}_std']
+    # return
+    return data
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_subtotal_preprocessed(**kwargs):
+    """
+    Reformatting the load_barchart data to create preprocssed subtotal charts.
+    Returns
+    -------
+    pd.DataFrame
+    """
+    # files
+    data_w = load_barchart_data(**kwargs)
+    data = data_w.T
+    label = "labels"
+    total_col = "total"
+    data[label] = data_w.T.index
+    data[total_col] = data.drop(columns=[label]).sum(axis=1)
+    data['sub'] = data["LV"]
+    data.drop(labels=["LA", "LV", "RV"], inplace=True, axis=1)
+    data.sort_values(by=total_col, ascending=True, inplace=True)
+    return data
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_survival_preprocessed(**kwargs) -> tuple[dict, pd.DataFrame]:
+    """
+    Returns survival rate data along with a survival table.
+    """
+    surv_table = create_survival_data(nrows=24)
+    surv_table2 = create_survival_data(nrows=15, survival_rate=0.04,
+                                       ci_width=0.45)
+    # extract at-risk counts at three time points
+    # import plot_misc.survival as pltm_surv
+    # bottom_table1 = pltm_surv.extract_follow_up(
+    #     surv_table, at_risk_col='at_risk', points=[0, 50, 100])
+    # bottom_table2 = pltm_surv.extract_follow_up(
+    #     surv_table2, at_risk_col='at_risk', points=[0, 50, 100])
+    # sel_col = ['time', 'group_1_at_risk_format']
+    # col_names = ['time', 'group 1', 'group 2']
+    # bottom_table = pd.merge(
+    #     bottom_table1[sel_col], bottom_table2[sel_col], on=['time'],
+    #     **kwargs)
+    # bottom_table.columns = col_names
+    bottom_table = pd.DataFrame(
+        {
+            "time": [0, 50, 100],
+            "group 1": ["1,000", "379", "121"],
+            "group 2": ["1,000", "143", "1"],
+        }
+    )
+    # make data dict
+    data_dict = dict(
+        curve1 = [surv_table, 'steelblue'],
+        curve2 = [surv_table2,'crimson']
+    )
+    return data_dict, bottom_table
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_heatmap_preprocessed(**kwargs) -> pd.DataFrame:
+    """
+    Returns an example correlation matrix.
+    """
+    labels = [
+        "Var1",
+        "Var2",
+        "Var3",
+        "Var4",
+        "Var5",
+        "Var6",
+    ]
+    corr = np.array([
+        [1.00, 0.92, 0.81, 0.55, 0.30, 0.10],
+        [0.92, 1.00, 0.76, 0.50, 0.28, 0.08],
+        [0.81, 0.76, 1.00, 0.45, 0.22, 0.05],
+        [0.55, 0.50, 0.45, 1.00, 0.18, 0.02],
+        [0.30, 0.28, 0.22, 0.18, 1.00, -0.75],
+        [0.10, 0.08, 0.05, 0.02, -0.75, 1.00],
+    ])
+    corr = pd.DataFrame(
+        corr,
+        index=labels,
+        columns=labels,
+        **kwargs,
+    )
+    return corr
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_bubble_preprocessed(**kwargs) -> tuple[pd.DataFrame, list, list]:
+    """
+    Returns the data needed for a bubble chart
+    """
+    # create an 8x5 dataframe filled with zeros
+    table = pd.DataFrame(
+        np.zeros((8, 5), dtype=int),
+        columns=['GENE1', 'GENE2', 'GENE3', 'GENE4', 'GENE5'],
+        index=['CRP', 'SBP', 'BMI', 'HbA1c', 'eGFR', 'CHD',
+               'Stroke', 'T2DM']
+    )
+    # set two entries to 4
+    table.loc['SBP', 'GENE3'] = 4
+    table.loc['CHD', 'GENE4'] = 4
+    # set one entry to 3
+    table.loc['HbA1c', 'GENE2'] = 3
+    # set one entry to 4 (third entry with value 4)
+    table.loc['BMI', 'GENE5'] = 4
+    # set four entries to 1
+    table.loc['CRP', 'GENE1'] = 2
+    table.loc['eGFR', 'GENE2'] = 1
+    table.loc['Stroke', 'GENE4'] = 1
+    table.loc['T2DM', 'GENE5'] = 1
+    # Define cut-offs and mappings
+    DOT_COLOUR = [
+        ('#AAAAAA', 0.9),   # grey for (−inf, 0.2]
+        ('#d65db1', 1.9),
+        ('#ff6f91', 2.9),
+        ('#008f7a', 3.9),
+        ('#ffc75f', 4.9),
+    ]
+    # Size thresholds: 2 categories
+    DOT_SIZE = [
+        (0, .9),   # grey for (−inf, 0.2]
+        (20, 1.9),
+        (40, 2.9),
+        (60, 3.9),
+        (80, 4.9),
+    ]
+    # legend handle
+    _SCATTER_KW = dict(
+        marker='o', linestyle='none',
+        markeredgecolor='black', markeredgewidth=0.4,
+    )
+    _COLOURS = [c for c, _ in DOT_COLOUR[1:]]
+    _LABELS  = ['1', '2', '3', '4']
+    _DOT_SIZE_VALS = [s for s, _ in DOT_SIZE[1:]]
+    handles = [
+        mlines.Line2D(
+            [], [], markerfacecolor=col,
+            markersize=2 + 6 * s / 80 if s > 0 else 2,
+            label=lbl, **_SCATTER_KW,
+        )
+        for col, s, lbl in zip(_COLOURS, _DOT_SIZE_VALS, _LABELS)
+    ]
+    return table, [DOT_COLOUR, DOT_SIZE], handles
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@dataset
+def load_forest_preprocessed(**kwargs) -> pd.DataFrame:
+    """
+    Returns the data needed for a forest plot
+    """
+    # subsetting the data on the exposure
+    data = load_mace_associations(**kwargs)
+    exposure = 'LDL-C (mmol/L)'
+    dat = data[data['Exposure'] == exposure].reset_index()
+    table = data[data['Exposure'] == 'Apo-B (g/L)'].reset_index()
+    table['Variable'] = [f'Variable {str(s)}' for s in range(1,table.shape[0]+1)]
+    # add y-coordinates
+    dat['Independent'] = ['Exposure ' + str(i+1) for i in range(dat.shape[0])]
+    dat['y_axis'] = [1.0, 5.0, 9.0, 13.0, 17.0, 21.0, 25.0]
+    return dat

plot-misc 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

plot-misc 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl