PyPI - pysofra - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

pysofra 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

pysofra/__init__.py +82 -0
pysofra/core/__init__.py +14 -0
pysofra/core/compose.py +167 -0
pysofra/core/format.py +155 -0
pysofra/core/frames.py +69 -0
pysofra/core/schema.py +128 -0
pysofra/core/table.py +924 -0
pysofra/io/__init__.py +1 -0
pysofra/models/__init__.py +6 -0
pysofra/models/extract.py +249 -0
pysofra/models/pool.py +119 -0
pysofra/models/regression.py +507 -0
pysofra/models/survival.py +395 -0
pysofra/models/uvregression.py +438 -0
pysofra/notebook/__init__.py +6 -0
pysofra/plot/__init__.py +23 -0
pysofra/plot/_backend.py +32 -0
pysofra/plot/forest.py +159 -0
pysofra/plot/inline.py +171 -0
pysofra/plot/km.py +249 -0
pysofra/render/__init__.py +28 -0
pysofra/render/_zip_determinism.py +57 -0
pysofra/render/base.py +22 -0
pysofra/render/docx.py +286 -0
pysofra/render/html.py +442 -0
pysofra/render/image.py +130 -0
pysofra/render/latex.py +253 -0
pysofra/render/markdown.py +128 -0
pysofra/render/pptx.py +340 -0
pysofra/render/xlsx.py +226 -0
pysofra/summary/__init__.py +6 -0
pysofra/summary/calibrate.py +214 -0
pysofra/summary/design.py +246 -0
pysofra/summary/effect_size.py +187 -0
pysofra/summary/extras.py +745 -0
pysofra/summary/smd.py +133 -0
pysofra/summary/stats.py +135 -0
pysofra/summary/tbl_cross.py +339 -0
pysofra/summary/tbl_one.py +1220 -0
pysofra/summary/tbl_summary.py +51 -0
pysofra/summary/tests.py +370 -0
pysofra/summary/typing.py +129 -0
pysofra/summary/weights.py +161 -0
pysofra/themes/__init__.py +5 -0
pysofra/themes/registry.py +272 -0
pysofra-0.1.0a1.dist-info/METADATA +301 -0
pysofra-0.1.0a1.dist-info/RECORD +50 -0
pysofra-0.1.0a1.dist-info/WHEEL +4 -0
pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0

pysofra/summary/weights.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Weighted summary statistics for frequency-weighted Table 1.
+These are *frequency* weights — each row carries a non-negative count.
+For complex survey designs (cluster sampling, post-stratification),
+users should pre-compute weights with a dedicated survey package and
+pass them here as a single column.
+Weighted statistics implemented:
+* mean: ``Σ w_i x_i / Σ w_i``
+* variance: unbiased frequency-weighted variance
+  ``Σ w_i (x_i - μ)² / (Σ w_i - 1)``
+* quantiles: linear-interpolation method on the weighted ECDF
+* proportions: ``Σ w_i 1{x_i = level} / Σ w_i``
+Weighted contingency tests use Rao–Scott-corrected chi-square, falling
+back to a regular chi-square on the weighted observed table when no
+design effect is available (which is the case for frequency weights —
+the weights *are* the counts).
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+import pandas as pd
+@dataclass(frozen=True)
+class WeightedContinuousStats:
+    n_eff: float       # effective sample size (sum of weights)
+    n_missing: float   # weighted count of missing values
+    mean: float
+    sd: float
+    median: float
+    q1: float
+    q3: float
+    min: float
+    max: float
+def weighted_continuous_stats(
+    values: pd.Series,
+    weights: pd.Series,
+) -> WeightedContinuousStats:
+    """Frequency-weighted summary of a continuous variable."""
+    v = pd.to_numeric(values, errors="coerce").to_numpy(dtype=float)
+    w = pd.to_numeric(weights, errors="coerce").to_numpy(dtype=float)
+    if v.shape != w.shape:
+        raise ValueError("values and weights must have the same length")
+    valid = ~np.isnan(v) & ~np.isnan(w) & (w > 0)
+    v_v = v[valid]
+    w_v = w[valid]
+    n_missing = float(np.sum(w[np.isnan(v) & ~np.isnan(w)]))
+    n_eff = float(np.sum(w_v))
+    if n_eff <= 0 or v_v.size == 0:
+        nan = float("nan")
+        return WeightedContinuousStats(0.0, n_missing, nan, nan, nan, nan, nan, nan, nan)
+    mean = float(np.sum(w_v * v_v) / n_eff)
+    # Frequency-weighted unbiased variance is undefined when the effective
+    # sample size collapses to one (or fewer). NaN propagates through
+    # ``fmt_mean_sd`` so the cell shows ``—`` rather than ``(0.00)``.
+    var = (
+        float(np.sum(w_v * (v_v - mean) ** 2) / (n_eff - 1))
+        if n_eff > 1
+        else float("nan")
+    )
+    sd = float(np.sqrt(max(var, 0.0))) if not np.isnan(var) else float("nan")
+    median, q1, q3 = (_weighted_quantile(v_v, w_v, q) for q in (0.5, 0.25, 0.75))
+    return WeightedContinuousStats(
+        n_eff=n_eff,
+        n_missing=n_missing,
+        mean=mean,
+        sd=sd,
+        median=median,
+        q1=q1,
+        q3=q3,
+        min=float(np.min(v_v)),
+        max=float(np.max(v_v)),
+    )
+def _weighted_quantile(values: np.ndarray, weights: np.ndarray, q: float) -> float:
+    """Linear-interpolation weighted quantile.
+    ``q`` is the desired probability level in ``[0, 1]``. The CDF is
+    computed at midpoint positions so that the method matches the
+    behaviour of NumPy's ``np.quantile(method='linear')`` in the
+    equal-weights limit.
+    """
+    if values.size == 0 or weights.size == 0:
+        return float("nan")
+    order = np.argsort(values)
+    v = values[order]
+    w = weights[order]
+    cumw = np.cumsum(w)
+    total = cumw[-1]
+    if total <= 0:
+        return float("nan")
+    # Position of the q-th quantile in the weighted ECDF.
+    target = q * (total - w[0]) + 0.5 * w[0]  # midpoint adjustment
+    # Cumulative midpoints.
+    midpoints = cumw - 0.5 * w
+    return float(np.interp(target, midpoints, v))
+@dataclass(frozen=True)
+class WeightedCategoricalStats:
+    n_eff: float
+    n_missing: float
+    counts: dict[object, float]
+    levels: tuple[object, ...]
+def weighted_categorical_stats(
+    values: pd.Series,
+    weights: pd.Series,
+    levels: list[object] | tuple[object, ...] | None = None,
+) -> WeightedCategoricalStats:
+    """Frequency-weighted counts per level."""
+    df = pd.DataFrame({"v": values, "w": pd.to_numeric(weights, errors="coerce")})
+    n_missing = float(df.loc[df["v"].isna() & df["w"].notna(), "w"].sum())
+    df = df.dropna()
+    df = df[df["w"] > 0]
+    if levels is None:
+        if isinstance(values.dtype, pd.CategoricalDtype):
+            level_list = list(values.cat.categories)
+        else:
+            level_list = sorted(df["v"].unique(), key=_safe_sort_key)
+    else:
+        level_list = list(levels)
+    counts: dict[object, float] = {lvl: 0.0 for lvl in level_list}
+    for lvl, sub in df.groupby("v", observed=True):
+        counts[lvl] = float(sub["w"].sum())
+    n_eff = float(sum(counts.values()))
+    return WeightedCategoricalStats(
+        n_eff=n_eff,
+        n_missing=n_missing,
+        counts=counts,
+        levels=tuple(level_list),
+    )
+def _safe_sort_key(x: object) -> tuple[int, float | str]:
+    if isinstance(x, bool):
+        return (0, float(int(x)))
+    if isinstance(x, (int, float)):
+        return (0, float(x))
+    if isinstance(x, str):
+        return (1, x)
+    return (2, repr(x))

pysofra/themes/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Built-in themes for PySofra."""
+from .registry import Theme, available_themes, register_theme, resolve_theme
+__all__ = ["Theme", "available_themes", "register_theme", "resolve_theme"]

pysofra/themes/registry.py ADDED Viewed

@@ -0,0 +1,272 @@
+"""Theme registry.
+A theme is a :class:`Theme` instance carrying enough information for every
+renderer to produce a consistent visual style. Renderers consume the theme
+through three keyed dicts (``css``, ``docx``, ``pptx``); they do not parse
+arbitrary CSS strings, so theme definitions stay small and auditable.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass(frozen=True)
+class Theme:
+    """A named visual theme.
+    ``css`` is a mapping of semantic keys to CSS declarations; the HTML
+    renderer assembles a scoped stylesheet from it. ``docx`` and ``pptx``
+    carry the corresponding hints for the Word / PowerPoint renderers
+    (font name, size, header shading, border weights, etc.).
+    """
+    name: str
+    css: dict[str, dict[str, str]] = field(default_factory=dict)
+    docx: dict[str, Any] = field(default_factory=dict)
+    pptx: dict[str, Any] = field(default_factory=dict)
+# ----------------------------------------------------------------------
+# Built-in themes
+# ----------------------------------------------------------------------
+_BASE_FONT = (
+    '"Helvetica Neue", Helvetica, Arial, "Segoe UI", '
+    '"Liberation Sans", sans-serif'
+)
+# Faded variant of the surrounding text colour, used for separator borders
+# and footnotes. ``color-mix`` is supported by every notebook frontend we
+# target (Chrome ≥ 111, Safari ≥ 16.2, Firefox ≥ 113) and degrades to the
+# raw currentColor on older engines — readable in both cases.
+_FADED_25 = "color-mix(in srgb, currentColor 25%, transparent)"
+_FADED_70 = "color-mix(in srgb, currentColor 70%, transparent)"
+_DEFAULT = Theme(
+    name="default",
+    css={
+        "table": {
+            "border-collapse": "collapse",
+            "font-family": _BASE_FONT,
+            "font-size": "14px",
+            "line-height": "1.45",
+            # Inherit the surrounding text colour so we always have contrast
+            # against the actual page background — no prefers-color-scheme
+            # hacks that fight Jupyter's own theme.
+            "color": "inherit",
+            "background": "transparent",
+            "margin": "0.75em 0",
+        },
+        "caption": {
+            "caption-side": "top",
+            "text-align": "left",
+            "font-weight": "700",
+            "padding": "0.4em 0.2em",
+            "font-size": "15px",
+            "color": "inherit",
+        },
+        "th": {
+            "padding": "0.55em 0.85em",
+            "text-align": "center",
+            "border-top": "2px solid currentColor",
+            "border-bottom": "1.25px solid currentColor",
+            "font-weight": "700",
+            "vertical-align": "bottom",
+            "color": "inherit",
+            "background": "transparent",
+        },
+        "td": {
+            "padding": "0.4em 0.85em",
+            "border-bottom": f"1px solid {_FADED_25}",
+            "vertical-align": "top",
+            "color": "inherit",
+        },
+        "tr:last-child td": {
+            "border-bottom": "2px solid currentColor",
+        },
+        "tr.group-header td": {
+            "font-weight": "700",
+            "padding-top": "0.7em",
+        },
+        "tfoot td": {
+            "font-size": "12px",
+            "color": _FADED_70,
+            "border-bottom": "none",
+            "padding-top": "0.55em",
+        },
+        ".pysofra-num": {"text-align": "right", "font-variant-numeric": "tabular-nums"},
+        ".pysofra-bold": {"font-weight": "700"},
+        ".pysofra-indent": {"padding-left": "1.75em"},
+        ".pysofra-spanning": {
+            "border-bottom": "1px solid currentColor",
+            "text-align": "center",
+            "font-weight": "700",
+            "padding": "0.35em 0.5em",
+        },
+    },
+    docx={
+        "font_name": "Calibri",
+        "font_size": 10,
+        "header_bold": True,
+        "header_bottom_border": True,
+        "outer_border": True,
+        "row_zebra": False,
+    },
+    pptx={"font_name": "Calibri", "font_size": 14},
+)
+def _override(parent: Theme, name: str, css_overrides: dict[str, dict[str, str]],
+              docx_overrides: dict[str, Any] | None = None,
+              pptx_overrides: dict[str, Any] | None = None) -> Theme:
+    new_css: dict[str, dict[str, str]] = {k: dict(v) for k, v in parent.css.items()}
+    for k, v in css_overrides.items():
+        new_css.setdefault(k, {}).update(v)
+    new_docx = dict(parent.docx)
+    if docx_overrides:
+        new_docx.update(docx_overrides)
+    new_pptx = dict(parent.pptx)
+    if pptx_overrides:
+        new_pptx.update(pptx_overrides)
+    return Theme(name=name, css=new_css, docx=new_docx, pptx=new_pptx)
+_CLINICAL = _override(
+    _DEFAULT,
+    "clinical",
+    {
+        "table": {"font-size": "14px"},
+        "caption": {"font-size": "15px"},
+        "th": {
+            "border-top": "2.5px solid currentColor",
+            "border-bottom": "1.5px solid currentColor",
+        },
+        "td": {"padding": "0.45em 0.9em"},
+    },
+    docx_overrides={"font_name": "Calibri", "font_size": 10, "header_bottom_border": True},
+)
+_COMPACT = _override(
+    _DEFAULT,
+    "compact",
+    {
+        "table": {"font-size": "13px"},
+        "th": {"padding": "0.35em 0.6em"},
+        "td": {"padding": "0.25em 0.6em"},
+    },
+    docx_overrides={"font_size": 9},
+)
+_JAMA = _override(
+    _DEFAULT,
+    "jama",
+    {
+        "table": {"font-family": '"Times New Roman", Times, serif', "font-size": "13.5px"},
+        "caption": {
+            "font-family": '"Times New Roman", Times, serif',
+            "font-weight": "700",
+            "font-size": "15px",
+        },
+        "th": {
+            "border-top": "2.5px solid currentColor",
+            "border-bottom": "1.5px solid currentColor",
+            "background": "transparent",
+        },
+        # JAMA-style: no internal row separators; strong bottom rule only.
+        "td": {"border-bottom": "none"},
+        "tr:last-child td": {"border-bottom": "2px solid currentColor"},
+        "tfoot td": {"font-family": '"Times New Roman", Times, serif'},
+    },
+    docx_overrides={"font_name": "Times New Roman", "font_size": 10, "outer_border": True},
+)
+_NEJM = _override(
+    _DEFAULT,
+    "nejm",
+    {
+        "table": {"font-family": '"Georgia", "Times New Roman", serif', "font-size": "13.5px"},
+        "th": {
+            "border-top": "2.5px solid currentColor",
+            "border-bottom": "1.25px solid currentColor",
+            "background": "transparent",
+        },
+        "td": {"border-bottom": "none", "padding": "0.35em 0.85em"},
+        "tr:last-child td": {"border-bottom": "2px solid currentColor"},
+    },
+    docx_overrides={"font_name": "Georgia", "font_size": 10, "outer_border": True},
+)
+_MINIMAL = _override(
+    _DEFAULT,
+    "minimal",
+    {
+        "th": {
+            "border-top": "none",
+            "border-bottom": "1.25px solid currentColor",
+            "background": "transparent",
+        },
+        "td": {"border-bottom": "none"},
+        "tr:last-child td": {"border-bottom": "1.25px solid currentColor"},
+    },
+    docx_overrides={"header_bottom_border": True, "outer_border": False},
+)
+_THEMES: dict[str, Theme] = {
+    "default": _DEFAULT,
+    "clinical": _CLINICAL,
+    "compact": _COMPACT,
+    "jama": _JAMA,
+    "nejm": _NEJM,
+    "minimal": _MINIMAL,
+}
+def resolve_theme(name: str) -> Theme:
+    """Resolve a theme name to a :class:`Theme`. Raises ``ValueError`` if unknown."""
+    try:
+        return _THEMES[name]
+    except KeyError as exc:
+        available = ", ".join(sorted(_THEMES))
+        raise ValueError(f"Unknown theme {name!r}. Available themes: {available}") from exc
+_BUILTIN_THEME_NAMES = frozenset(
+    {"default", "clinical", "compact", "jama", "nejm", "minimal"}
+)
+def register_theme(theme: Theme, *, overwrite: bool = False) -> None:
+    """Register a user-defined theme.
+    By default this refuses to overwrite a built-in theme; pass
+    ``overwrite=True`` to force it. Overwriting an existing user theme
+    is allowed without the flag — the guard exists only to keep
+    ``ps.tbl_one(...).theme('clinical')`` from silently rendering with a
+    user replacement that doesn't match what the documentation says.
+    """
+    if theme.name in _BUILTIN_THEME_NAMES and not overwrite:
+        raise ValueError(
+            f"Theme {theme.name!r} is a built-in. "
+            "Pass overwrite=True to replace it, or pick a different name."
+        )
+    _THEMES[theme.name] = theme
+def available_themes() -> list[str]:
+    """Return a sorted list of every registered theme name.
+    Includes both the six built-in themes (``default``, ``clinical``,
+    ``jama``, ``nejm``, ``compact``, ``minimal``) and any user themes
+    added via :func:`register_theme`. Apply a theme with
+    :meth:`~pysofra.SofraTable.theme`.
+    Examples
+    --------
+    >>> import pysofra as ps
+    >>> ps.available_themes()
+    ['clinical', 'compact', 'default', 'jama', 'minimal', 'nejm']
+    """
+    return sorted(_THEMES)