PyPI - forestplotx - Versions diffs - 1.0.0__py3-none-any.whl - Mend

forestplotx 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

forestplotx/__init__.py +9 -0
forestplotx/_axes_config.py +272 -0
forestplotx/_layout.py +70 -0
forestplotx/_normalize.py +123 -0
forestplotx/plot.py +563 -0
forestplotx/py.typed +0 -0
forestplotx-1.0.0.dist-info/METADATA +295 -0
forestplotx-1.0.0.dist-info/RECORD +11 -0
forestplotx-1.0.0.dist-info/WHEEL +5 -0
forestplotx-1.0.0.dist-info/licenses/LICENSE +21 -0
forestplotx-1.0.0.dist-info/top_level.txt +1 -0

forestplotx/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .plot import forest_plot
+from ._normalize import _normalize_model_output as normalize_model_output
+__version__ = "1.0.0"
+__all__ = [
+    "forest_plot",
+    "normalize_model_output",
+]

forestplotx/_axes_config.py ADDED Viewed

@@ -0,0 +1,272 @@
+from collections.abc import Mapping
+import math
+import warnings
+from typing import Any
+import numpy as np
+from matplotlib.axes import Axes
+from matplotlib.ticker import FixedLocator, FuncFormatter, NullFormatter, NullLocator
+def _nice_linear_step(raw_step: float) -> float:
+    """Return a human-readable step size (1/2/5 x 10^k)."""
+    if raw_step <= 0:
+        return 1.0
+    exponent = math.floor(math.log10(raw_step))
+    fraction = raw_step / (10**exponent)
+    if fraction <= 1:
+        nice_fraction = 1
+    elif fraction <= 2:
+        nice_fraction = 2
+    elif fraction <= 5:
+        nice_fraction = 5
+    else:
+        nice_fraction = 10
+    return nice_fraction * (10**exponent)
+def _format_decimal(value: float, precision: int = 6) -> str:
+    """Format decimals consistently without scientific notation."""
+    return np.format_float_positional(value, precision=precision, trim="-")
+def _decimals_from_ticks(ticks: np.ndarray, max_decimals: int = 3) -> int:
+    """Infer a readable fixed decimal count from adjacent tick spacing."""
+    if len(ticks) < 2:
+        return 2
+    diffs = np.diff(np.sort(np.asarray(ticks, dtype=float)))
+    diffs = diffs[np.isfinite(diffs) & (diffs > 0)]
+    if not len(diffs):
+        return 2
+    min_diff = float(np.min(diffs))
+    decimals = int(max(0, -math.floor(math.log10(min_diff))))
+    return max(0, min(max_decimals, decimals))
+def _nice_log_step(raw_step: float) -> float:
+    """Return a readable log10 step size."""
+    candidates = [0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.25, 0.5, 1.0]
+    for cand in candidates:
+        if cand >= raw_step:
+            return cand
+    return raw_step
+def configure_forest_axis(
+    ax: Axes,
+    model_type: str,
+    link: str | None,
+    thresholds: Mapping[str, Any] | None,
+    num_ticks: int,
+    font_size: int,
+    show_general_stats: bool,
+) -> Axes:
+    """
+    Configure forest-panel axis scaling, ticks, and visual styling.
+    Parameters
+    ----------
+    ax : Axes
+        Matplotlib axis for the forest panel.
+    model_type : str
+        Model family name (e.g., ``"binom"``, ``"gamma"``, ``"linear"``).
+    link : str | None
+        Link function name used by the model output normalization.
+    thresholds : Mapping[str, Any] | None
+        Explicit axis inputs. Supported keys include:
+        ``reference_line``, ``x_label``, ``use_log``, ``lo_all``, ``hi_all``,
+        and ``y_limits``.
+    num_ticks : int
+        Target number of major ticks for linear locators.
+    font_size : int
+        Axis label font size.
+    show_general_stats : bool
+        Included for API symmetry with plot orchestration.
+    Returns
+    -------
+    Axes
+        The configured axis.
+    """
+    _ = show_general_stats
+    cfg = dict(thresholds or {})
+    link_defaults = {
+        "logit": {"reference_line": 1.0, "use_log": True, "x_label": "Odds Ratio"},
+        "log": {"reference_line": 1.0, "use_log": True, "x_label": "Ratio"},
+        "identity": {"reference_line": 0.0, "use_log": False, "x_label": "Effect Size"},
+    }
+    defaults = link_defaults.get(link or "identity", link_defaults["identity"])
+    ref_val = float(cfg.get("reference_line", defaults["reference_line"]))
+    use_log = bool(cfg.get("use_log", defaults["use_log"]))
+    x_label = str(cfg.get("x_label", defaults["x_label"]))
+    tick_style = str(cfg.get("tick_style", "decimal"))
+    clip_outliers = bool(cfg.get("clip_outliers", False))
+    clip_quantiles = cfg.get("clip_quantiles", (0.02, 0.98))
+    lo_all = np.asarray(cfg.get("lo_all", []), dtype=float)
+    hi_all = np.asarray(cfg.get("hi_all", []), dtype=float)
+    y_limits = cfg.get("y_limits")
+    ax.axvline(ref_val, color="#910C07", lw=1.2, ls="--")
+    ax.set_yticks([])
+    if y_limits is not None:
+        ax.set_ylim(y_limits[0], y_limits[1])
+    ax.set_xlabel(x_label, fontsize=font_size)
+    if len(lo_all) and len(hi_all):
+        finite_lo = lo_all[np.isfinite(lo_all)]
+        finite_hi = hi_all[np.isfinite(hi_all)]
+        if not len(finite_lo) or not len(finite_hi):
+            return ax
+        if clip_outliers:
+            q_low, q_high = clip_quantiles
+            q_low = float(q_low)
+            q_high = float(q_high)
+            if not (0.0 <= q_low < q_high <= 1.0):
+                raise ValueError("clip_quantiles must satisfy 0 <= low < high <= 1.")
+            data_min = float(np.quantile(finite_lo, q_low))
+            data_max = float(np.quantile(finite_hi, q_high))
+        else:
+            data_min = float(np.min(finite_lo))
+            data_max = float(np.max(finite_hi))
+        ax.set_xscale("log" if use_log else "linear")
+        if use_log:
+            if ref_val <= 0:
+                raise ValueError(
+                    "Log-scaled forest axis requires a positive reference value."
+                )
+            finite_eff = np.asarray(cfg.get("eff_all", []), dtype=float)
+            finite_eff = finite_eff[np.isfinite(finite_eff)]
+            has_nonpositive = bool(
+                np.any(finite_lo <= 0)
+                or np.any(finite_hi <= 0)
+                or np.any(finite_eff <= 0)
+            )
+            if has_nonpositive:
+                warnings.warn(
+                    "Log-scaled forest axis received nonpositive effect/CI values. "
+                    "These values cannot be represented on a log axis and may be clipped. "
+                    "Check whether your data is already exponentiated or set exponentiate=True "
+                    "when input is on the link scale.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+            positive_values = np.concatenate(
+                [
+                    finite_lo[finite_lo > 0],
+                    finite_hi[finite_hi > 0],
+                    finite_eff[finite_eff > 0],
+                ]
+            )
+            positive_candidates = [*positive_values.tolist(), ref_val]
+            if not positive_candidates:
+                raise ValueError(
+                    "Log-scaled forest axis requires positive effect/CI values."
+                )
+            pmin = min(positive_candidates)
+            pmax = max(positive_candidates)
+            target_ticks = max(int(num_ticks), 3)
+            if target_ticks % 2 == 0:
+                target_ticks -= 1
+            n_side_target = max((target_ticks - 1) // 2, 1)
+            span_decades = max(abs(math.log10(pmin / ref_val)), abs(math.log10(pmax / ref_val)))
+            axis_span_decades = span_decades * 1.15
+            # Keep very tight ranges readable around the reference line.
+            axis_span_decades = max(axis_span_decades, 0.01)
+            raw_step = axis_span_decades / n_side_target
+            step_decades = _nice_log_step(raw_step)
+            n_side = max(1, int(axis_span_decades / step_decades))
+            exponents = np.arange(-n_side, n_side + 1, dtype=float) * step_decades
+            ticks = ref_val * np.power(10.0, exponents)
+            axis_ratio = 10 ** axis_span_decades
+            xmin = ref_val / axis_ratio
+            xmax = ref_val * axis_ratio
+            ax.set_xlim(xmin, xmax)
+            ticks_in = ticks[(ticks >= xmin) & (ticks <= xmax)]
+            if len(ticks_in) < 3:
+                ticks_in = np.array([xmin, ref_val, xmax], dtype=float)
+            ax.xaxis.set_major_locator(FixedLocator(ticks_in))
+            if tick_style == "power10":
+                def _power10_formatter(x: float, _pos: int) -> str:
+                    exp = math.log10(x / ref_val)
+                    rounded = round(exp, 2)
+                    if math.isclose(rounded, 0.0, abs_tol=1e-9):
+                        rounded = 0.0
+                    exp_txt = f"{rounded:.2f}".rstrip("0").rstrip(".")
+                    if math.isclose(ref_val, 1.0):
+                        return rf"$10^{{{exp_txt}}}$"
+                    return rf"${_format_decimal(ref_val)}\times10^{{{exp_txt}}}$"
+                ax.xaxis.set_major_formatter(FuncFormatter(_power10_formatter))
+            else:
+                decimals = max(2, _decimals_from_ticks(ticks_in))
+                ax.xaxis.set_major_formatter(
+                    FuncFormatter(lambda x, _pos, d=decimals: f"{x:.{d}f}")
+                )
+            ax.xaxis.set_minor_locator(NullLocator())
+            ax.xaxis.set_minor_formatter(NullFormatter())
+        else:
+            if clip_outliers:
+                q_high = float(clip_quantiles[1])
+                # Linear outliers are visually dominant; keep clipping robust by capping
+                # the effective upper quantile used for span control.
+                q_high = min(q_high, 0.90)
+                distances = np.concatenate(
+                    [
+                        np.abs(finite_lo - ref_val),
+                        np.abs(finite_hi - ref_val),
+                    ]
+                )
+                distances = distances[np.isfinite(distances)]
+                if len(distances):
+                    span = float(np.quantile(distances, q_high))
+                else:
+                    span = max(abs(data_min - ref_val), abs(data_max - ref_val))
+            else:
+                span = max(abs(data_min - ref_val), abs(data_max - ref_val))
+                # Flag outlier-dominated ranges where one extreme compresses the majority.
+                distances = np.concatenate(
+                    [
+                        np.abs(finite_lo - ref_val),
+                        np.abs(finite_hi - ref_val),
+                    ]
+                )
+                distances = distances[np.isfinite(distances)]
+                if len(distances) >= 8:
+                    q95 = float(np.quantile(distances, 0.95))
+                    if q95 > 0 and span / q95 >= 5:
+                        warnings.warn(
+                            "Linear axis appears outlier-dominated. Consider clip_outliers=True "
+                            "to improve readability while preserving raw table values.",
+                            UserWarning,
+                            stacklevel=2,
+                        )
+            if span == 0:
+                span = max(1e-3, abs(ref_val) * 0.1)
+            target_ticks = max(int(num_ticks), 3)
+            raw_step = (2 * span) / max(target_ticks - 1, 1)
+            step = _nice_linear_step(raw_step)
+            kmax = max(1, math.ceil(span / step))
+            ticks = ref_val + np.arange(-kmax, kmax + 1, dtype=float) * step
+            xmin = ref_val - kmax * step
+            xmax = ref_val + kmax * step
+            ax.set_xlim(xmin, xmax)
+            ax.xaxis.set_major_locator(FixedLocator(ticks))
+            decimals = _decimals_from_ticks(ticks)
+            ax.xaxis.set_major_formatter(
+                FuncFormatter(lambda x, _pos, d=decimals: f"{x:.{d}f}")
+            )
+    for spine in ("top", "right", "left"):
+        ax.spines[spine].set_visible(False)
+    return ax

forestplotx/_layout.py ADDED Viewed

@@ -0,0 +1,70 @@
+from typing import Any, TypedDict
+import pandas as pd
+class LayoutResult(TypedDict):
+    """Structured row layout used by the forest plot renderer."""
+    rows: pd.DataFrame
+    y_positions: list[int]
+    meta: dict[str, Any]
+def build_row_layout(df_final: pd.DataFrame) -> LayoutResult:
+    """
+    Assemble row ordering and y-positions for forest plot table/points.
+    Parameters
+    ----------
+    df_final : pd.DataFrame
+        Normalized plotting dataframe expected to include a ``predictor``
+        column and optionally a ``category`` column.
+    Returns
+    -------
+    LayoutResult
+        Dict with:
+        - ``rows``: DataFrame with ``predictor``, ``is_cat``, ``category``.
+        - ``y_positions``: Integer y-positions aligned with ``rows`` order.
+        - ``meta``: Extra layout fields (`n`, `row_is_cat`, `row_cats`).
+    """
+    if "category" in df_final.columns and df_final["category"].notna().any():
+        cat_order = list(df_final["category"].dropna().unique())
+        table_rows: list[dict[str, Any]] = []
+        row_is_cat: list[bool] = []
+        row_cats: list[str] = []
+        for cat in cat_order:
+            table_rows.append({"predictor": cat, "is_cat": True, "category": cat})
+            row_is_cat.append(True)
+            row_cats.append(cat)
+            preds = df_final.loc[df_final["category"] == cat, "predictor"].unique()
+            for pred in preds:
+                table_rows.append(
+                    {"predictor": pred, "is_cat": False, "category": cat}
+                )
+                row_is_cat.append(False)
+                row_cats.append(cat)
+    else:
+        preds = df_final["predictor"].dropna().unique()
+        table_rows = [
+            {"predictor": pred, "is_cat": False, "category": "Uncategorized"}
+            for pred in preds
+        ]
+        row_is_cat = [False] * len(preds)
+        row_cats = ["Uncategorized"] * len(preds)
+    n = len(table_rows)
+    if n == 0:
+        raise ValueError("No rows to plot! Check DataFrame structure.")
+    rows_df = pd.DataFrame(table_rows)
+    y_positions = list(range(n))
+    return {
+        "rows": rows_df,
+        "y_positions": y_positions,
+        "meta": {"n": n, "row_is_cat": row_is_cat, "row_cats": row_cats},
+    }

forestplotx/_normalize.py ADDED Viewed

@@ -0,0 +1,123 @@
+import numpy as np
+import warnings
+DEFAULT_LINK = {
+    "binom": "logit",
+    "ordinal": "logit",
+    "gamma": "log",
+    "linear": "identity",
+}
+def _normalize_model_output(df, model_type, link=None, exponentiate=None):
+    """
+    Normalize model output to standardized columns and apply
+    link-driven transformations.
+    """
+    _EFFECT_CANDIDATES = ["OR", "Ratio", "Estimate", "beta", "Coef", "effect"]
+    if model_type not in DEFAULT_LINK:
+        raise ValueError(
+            f"Unknown model_type '{model_type}'. "
+            f"Use one of: {list(DEFAULT_LINK.keys())}"
+        )
+    # ---- Resolve link -------------------------------------------------------
+    resolved_link = link or DEFAULT_LINK[model_type]
+    # ---- Config derived from link ------------------------------------------
+    if resolved_link in ("log", "logit"):
+        reference_line = 1.0
+        use_log = True
+        default_exponentiate = True
+    elif resolved_link == "identity":
+        reference_line = 0.0
+        use_log = False
+        default_exponentiate = False
+    else:
+        raise ValueError(f"Unsupported link '{resolved_link}'")
+    if exponentiate is None:
+        should_exponentiate = default_exponentiate
+    elif isinstance(exponentiate, bool):
+        should_exponentiate = exponentiate
+    else:
+        raise TypeError("exponentiate must be bool or None.")
+    df = df.copy()
+    # ---- Detect effect column ----------------------------------------------
+    effect_col = None
+    for candidate in _EFFECT_CANDIDATES:
+        if candidate in df.columns:
+            effect_col = candidate
+            break
+    if effect_col is None:
+        raise ValueError(
+            f"No effect column found. Expected one of: {_EFFECT_CANDIDATES}"
+        )
+    # ---- Standardize column names ------------------------------------------
+    rename = {}
+    if effect_col != "effect":
+        rename[effect_col] = "effect"
+    if "CI_low" in df.columns:
+        rename["CI_low"] = "ci_low"
+    if "CI_high" in df.columns:
+        rename["CI_high"] = "ci_high"
+    if rename:
+        df = df.rename(columns=rename)
+    # ---- Ordinal: remove threshold rows ------------------------------------
+    if model_type == "ordinal":
+        if "predictor" not in df.columns:
+            raise ValueError("Ordinal model requires a 'predictor' column.")
+        mask = df["predictor"].str.contains(
+            r"(?i)^(?:threshold|cutpoint|intercept)", na=False, regex=True
+        )
+        df = df[~mask]
+    # ---- Apply exponentiation based on link --------------------------------
+    if should_exponentiate:
+        for col in ("effect", "ci_low", "ci_high"):
+            if col in df.columns:
+                df[col] = np.exp(df[col])
+    config = {
+        "x_label": {
+            "logit": "Odds Ratio",
+            "log": "Ratio",
+            "identity": "Effect Size",
+        }[resolved_link],
+        "reference_line": reference_line,
+        "use_log": use_log,
+        "link": resolved_link,
+        "effect_label": {
+            "logit": "OR",
+            "log": "Ratio",
+            "identity": "Coef",
+        }[resolved_link],
+        "exponentiated": should_exponentiate,
+        "renamed_columns": rename.copy(),
+    }
+    if exponentiate is None and should_exponentiate:
+        effect_map = config["renamed_columns"].get(effect_col, "effect")
+        ci_low_src = "CI_low" if "CI_low" in config["renamed_columns"] else "ci_low"
+        ci_high_src = "CI_high" if "CI_high" in config["renamed_columns"] else "ci_high"
+        warnings.warn(
+            (
+                f"Exponentiation applied automatically (model_type='{model_type}', "
+                f"link='{resolved_link}', effect_label='{config['effect_label']}'). "
+                "If your input data is already on the effect scale, set "
+                "exponentiate=False to prevent double transformation. "
+                f"Column mapping: {effect_col} -> {effect_map}; "
+                f"{ci_low_src} + {ci_high_src} -> 95% CI."
+            ),
+            UserWarning,
+            stacklevel=2,
+        )
+    return df, config