pysofra 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. pysofra/__init__.py +82 -0
  2. pysofra/core/__init__.py +14 -0
  3. pysofra/core/compose.py +167 -0
  4. pysofra/core/format.py +155 -0
  5. pysofra/core/frames.py +69 -0
  6. pysofra/core/schema.py +128 -0
  7. pysofra/core/table.py +924 -0
  8. pysofra/io/__init__.py +1 -0
  9. pysofra/models/__init__.py +6 -0
  10. pysofra/models/extract.py +249 -0
  11. pysofra/models/pool.py +119 -0
  12. pysofra/models/regression.py +507 -0
  13. pysofra/models/survival.py +395 -0
  14. pysofra/models/uvregression.py +438 -0
  15. pysofra/notebook/__init__.py +6 -0
  16. pysofra/plot/__init__.py +23 -0
  17. pysofra/plot/_backend.py +32 -0
  18. pysofra/plot/forest.py +159 -0
  19. pysofra/plot/inline.py +171 -0
  20. pysofra/plot/km.py +249 -0
  21. pysofra/render/__init__.py +28 -0
  22. pysofra/render/_zip_determinism.py +57 -0
  23. pysofra/render/base.py +22 -0
  24. pysofra/render/docx.py +286 -0
  25. pysofra/render/html.py +442 -0
  26. pysofra/render/image.py +130 -0
  27. pysofra/render/latex.py +253 -0
  28. pysofra/render/markdown.py +128 -0
  29. pysofra/render/pptx.py +340 -0
  30. pysofra/render/xlsx.py +226 -0
  31. pysofra/summary/__init__.py +6 -0
  32. pysofra/summary/calibrate.py +214 -0
  33. pysofra/summary/design.py +246 -0
  34. pysofra/summary/effect_size.py +187 -0
  35. pysofra/summary/extras.py +745 -0
  36. pysofra/summary/smd.py +133 -0
  37. pysofra/summary/stats.py +135 -0
  38. pysofra/summary/tbl_cross.py +339 -0
  39. pysofra/summary/tbl_one.py +1220 -0
  40. pysofra/summary/tbl_summary.py +51 -0
  41. pysofra/summary/tests.py +370 -0
  42. pysofra/summary/typing.py +129 -0
  43. pysofra/summary/weights.py +161 -0
  44. pysofra/themes/__init__.py +5 -0
  45. pysofra/themes/registry.py +272 -0
  46. pysofra-0.1.0a1.dist-info/METADATA +301 -0
  47. pysofra-0.1.0a1.dist-info/RECORD +50 -0
  48. pysofra-0.1.0a1.dist-info/WHEEL +4 -0
  49. pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
  50. pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,161 @@
1
+ """Weighted summary statistics for frequency-weighted Table 1.
2
+
3
+ These are *frequency* weights — each row carries a non-negative count.
4
+ For complex survey designs (cluster sampling, post-stratification),
5
+ users should pre-compute weights with a dedicated survey package and
6
+ pass them here as a single column.
7
+
8
+ Weighted statistics implemented:
9
+
10
+ * mean: ``Σ w_i x_i / Σ w_i``
11
+ * variance: unbiased frequency-weighted variance
12
+ ``Σ w_i (x_i - μ)² / (Σ w_i - 1)``
13
+ * quantiles: linear-interpolation method on the weighted ECDF
14
+ * proportions: ``Σ w_i 1{x_i = level} / Σ w_i``
15
+
16
+ Weighted contingency tests use Rao–Scott-corrected chi-square, falling
17
+ back to a regular chi-square on the weighted observed table when no
18
+ design effect is available (which is the case for frequency weights —
19
+ the weights *are* the counts).
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from dataclasses import dataclass
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class WeightedContinuousStats:
32
+ n_eff: float # effective sample size (sum of weights)
33
+ n_missing: float # weighted count of missing values
34
+ mean: float
35
+ sd: float
36
+ median: float
37
+ q1: float
38
+ q3: float
39
+ min: float
40
+ max: float
41
+
42
+
43
+ def weighted_continuous_stats(
44
+ values: pd.Series,
45
+ weights: pd.Series,
46
+ ) -> WeightedContinuousStats:
47
+ """Frequency-weighted summary of a continuous variable."""
48
+ v = pd.to_numeric(values, errors="coerce").to_numpy(dtype=float)
49
+ w = pd.to_numeric(weights, errors="coerce").to_numpy(dtype=float)
50
+ if v.shape != w.shape:
51
+ raise ValueError("values and weights must have the same length")
52
+
53
+ valid = ~np.isnan(v) & ~np.isnan(w) & (w > 0)
54
+ v_v = v[valid]
55
+ w_v = w[valid]
56
+
57
+ n_missing = float(np.sum(w[np.isnan(v) & ~np.isnan(w)]))
58
+ n_eff = float(np.sum(w_v))
59
+
60
+ if n_eff <= 0 or v_v.size == 0:
61
+ nan = float("nan")
62
+ return WeightedContinuousStats(0.0, n_missing, nan, nan, nan, nan, nan, nan, nan)
63
+
64
+ mean = float(np.sum(w_v * v_v) / n_eff)
65
+ # Frequency-weighted unbiased variance is undefined when the effective
66
+ # sample size collapses to one (or fewer). NaN propagates through
67
+ # ``fmt_mean_sd`` so the cell shows ``—`` rather than ``(0.00)``.
68
+ var = (
69
+ float(np.sum(w_v * (v_v - mean) ** 2) / (n_eff - 1))
70
+ if n_eff > 1
71
+ else float("nan")
72
+ )
73
+ sd = float(np.sqrt(max(var, 0.0))) if not np.isnan(var) else float("nan")
74
+
75
+ median, q1, q3 = (_weighted_quantile(v_v, w_v, q) for q in (0.5, 0.25, 0.75))
76
+
77
+ return WeightedContinuousStats(
78
+ n_eff=n_eff,
79
+ n_missing=n_missing,
80
+ mean=mean,
81
+ sd=sd,
82
+ median=median,
83
+ q1=q1,
84
+ q3=q3,
85
+ min=float(np.min(v_v)),
86
+ max=float(np.max(v_v)),
87
+ )
88
+
89
+
90
+ def _weighted_quantile(values: np.ndarray, weights: np.ndarray, q: float) -> float:
91
+ """Linear-interpolation weighted quantile.
92
+
93
+ ``q`` is the desired probability level in ``[0, 1]``. The CDF is
94
+ computed at midpoint positions so that the method matches the
95
+ behaviour of NumPy's ``np.quantile(method='linear')`` in the
96
+ equal-weights limit.
97
+ """
98
+ if values.size == 0 or weights.size == 0:
99
+ return float("nan")
100
+ order = np.argsort(values)
101
+ v = values[order]
102
+ w = weights[order]
103
+ cumw = np.cumsum(w)
104
+ total = cumw[-1]
105
+ if total <= 0:
106
+ return float("nan")
107
+ # Position of the q-th quantile in the weighted ECDF.
108
+ target = q * (total - w[0]) + 0.5 * w[0] # midpoint adjustment
109
+ # Cumulative midpoints.
110
+ midpoints = cumw - 0.5 * w
111
+ return float(np.interp(target, midpoints, v))
112
+
113
+
114
+ @dataclass(frozen=True)
115
+ class WeightedCategoricalStats:
116
+ n_eff: float
117
+ n_missing: float
118
+ counts: dict[object, float]
119
+ levels: tuple[object, ...]
120
+
121
+
122
+ def weighted_categorical_stats(
123
+ values: pd.Series,
124
+ weights: pd.Series,
125
+ levels: list[object] | tuple[object, ...] | None = None,
126
+ ) -> WeightedCategoricalStats:
127
+ """Frequency-weighted counts per level."""
128
+ df = pd.DataFrame({"v": values, "w": pd.to_numeric(weights, errors="coerce")})
129
+ n_missing = float(df.loc[df["v"].isna() & df["w"].notna(), "w"].sum())
130
+ df = df.dropna()
131
+ df = df[df["w"] > 0]
132
+
133
+ if levels is None:
134
+ if isinstance(values.dtype, pd.CategoricalDtype):
135
+ level_list = list(values.cat.categories)
136
+ else:
137
+ level_list = sorted(df["v"].unique(), key=_safe_sort_key)
138
+ else:
139
+ level_list = list(levels)
140
+
141
+ counts: dict[object, float] = {lvl: 0.0 for lvl in level_list}
142
+ for lvl, sub in df.groupby("v", observed=True):
143
+ counts[lvl] = float(sub["w"].sum())
144
+
145
+ n_eff = float(sum(counts.values()))
146
+ return WeightedCategoricalStats(
147
+ n_eff=n_eff,
148
+ n_missing=n_missing,
149
+ counts=counts,
150
+ levels=tuple(level_list),
151
+ )
152
+
153
+
154
+ def _safe_sort_key(x: object) -> tuple[int, float | str]:
155
+ if isinstance(x, bool):
156
+ return (0, float(int(x)))
157
+ if isinstance(x, (int, float)):
158
+ return (0, float(x))
159
+ if isinstance(x, str):
160
+ return (1, x)
161
+ return (2, repr(x))
@@ -0,0 +1,5 @@
1
+ """Built-in themes for PySofra."""
2
+
3
+ from .registry import Theme, available_themes, register_theme, resolve_theme
4
+
5
+ __all__ = ["Theme", "available_themes", "register_theme", "resolve_theme"]
@@ -0,0 +1,272 @@
1
+ """Theme registry.
2
+
3
+ A theme is a :class:`Theme` instance carrying enough information for every
4
+ renderer to produce a consistent visual style. Renderers consume the theme
5
+ through three keyed dicts (``css``, ``docx``, ``pptx``); they do not parse
6
+ arbitrary CSS strings, so theme definitions stay small and auditable.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Theme:
17
+ """A named visual theme.
18
+
19
+ ``css`` is a mapping of semantic keys to CSS declarations; the HTML
20
+ renderer assembles a scoped stylesheet from it. ``docx`` and ``pptx``
21
+ carry the corresponding hints for the Word / PowerPoint renderers
22
+ (font name, size, header shading, border weights, etc.).
23
+ """
24
+
25
+ name: str
26
+ css: dict[str, dict[str, str]] = field(default_factory=dict)
27
+ docx: dict[str, Any] = field(default_factory=dict)
28
+ pptx: dict[str, Any] = field(default_factory=dict)
29
+
30
+
31
+ # ----------------------------------------------------------------------
32
+ # Built-in themes
33
+ # ----------------------------------------------------------------------
34
+
35
+ _BASE_FONT = (
36
+ '"Helvetica Neue", Helvetica, Arial, "Segoe UI", '
37
+ '"Liberation Sans", sans-serif'
38
+ )
39
+
40
+ # Faded variant of the surrounding text colour, used for separator borders
41
+ # and footnotes. ``color-mix`` is supported by every notebook frontend we
42
+ # target (Chrome ≥ 111, Safari ≥ 16.2, Firefox ≥ 113) and degrades to the
43
+ # raw currentColor on older engines — readable in both cases.
44
+ _FADED_25 = "color-mix(in srgb, currentColor 25%, transparent)"
45
+ _FADED_70 = "color-mix(in srgb, currentColor 70%, transparent)"
46
+
47
+ _DEFAULT = Theme(
48
+ name="default",
49
+ css={
50
+ "table": {
51
+ "border-collapse": "collapse",
52
+ "font-family": _BASE_FONT,
53
+ "font-size": "14px",
54
+ "line-height": "1.45",
55
+ # Inherit the surrounding text colour so we always have contrast
56
+ # against the actual page background — no prefers-color-scheme
57
+ # hacks that fight Jupyter's own theme.
58
+ "color": "inherit",
59
+ "background": "transparent",
60
+ "margin": "0.75em 0",
61
+ },
62
+ "caption": {
63
+ "caption-side": "top",
64
+ "text-align": "left",
65
+ "font-weight": "700",
66
+ "padding": "0.4em 0.2em",
67
+ "font-size": "15px",
68
+ "color": "inherit",
69
+ },
70
+ "th": {
71
+ "padding": "0.55em 0.85em",
72
+ "text-align": "center",
73
+ "border-top": "2px solid currentColor",
74
+ "border-bottom": "1.25px solid currentColor",
75
+ "font-weight": "700",
76
+ "vertical-align": "bottom",
77
+ "color": "inherit",
78
+ "background": "transparent",
79
+ },
80
+ "td": {
81
+ "padding": "0.4em 0.85em",
82
+ "border-bottom": f"1px solid {_FADED_25}",
83
+ "vertical-align": "top",
84
+ "color": "inherit",
85
+ },
86
+ "tr:last-child td": {
87
+ "border-bottom": "2px solid currentColor",
88
+ },
89
+ "tr.group-header td": {
90
+ "font-weight": "700",
91
+ "padding-top": "0.7em",
92
+ },
93
+ "tfoot td": {
94
+ "font-size": "12px",
95
+ "color": _FADED_70,
96
+ "border-bottom": "none",
97
+ "padding-top": "0.55em",
98
+ },
99
+ ".pysofra-num": {"text-align": "right", "font-variant-numeric": "tabular-nums"},
100
+ ".pysofra-bold": {"font-weight": "700"},
101
+ ".pysofra-indent": {"padding-left": "1.75em"},
102
+ ".pysofra-spanning": {
103
+ "border-bottom": "1px solid currentColor",
104
+ "text-align": "center",
105
+ "font-weight": "700",
106
+ "padding": "0.35em 0.5em",
107
+ },
108
+ },
109
+ docx={
110
+ "font_name": "Calibri",
111
+ "font_size": 10,
112
+ "header_bold": True,
113
+ "header_bottom_border": True,
114
+ "outer_border": True,
115
+ "row_zebra": False,
116
+ },
117
+ pptx={"font_name": "Calibri", "font_size": 14},
118
+ )
119
+
120
+
121
+ def _override(parent: Theme, name: str, css_overrides: dict[str, dict[str, str]],
122
+ docx_overrides: dict[str, Any] | None = None,
123
+ pptx_overrides: dict[str, Any] | None = None) -> Theme:
124
+ new_css: dict[str, dict[str, str]] = {k: dict(v) for k, v in parent.css.items()}
125
+ for k, v in css_overrides.items():
126
+ new_css.setdefault(k, {}).update(v)
127
+ new_docx = dict(parent.docx)
128
+ if docx_overrides:
129
+ new_docx.update(docx_overrides)
130
+ new_pptx = dict(parent.pptx)
131
+ if pptx_overrides:
132
+ new_pptx.update(pptx_overrides)
133
+ return Theme(name=name, css=new_css, docx=new_docx, pptx=new_pptx)
134
+
135
+
136
+ _CLINICAL = _override(
137
+ _DEFAULT,
138
+ "clinical",
139
+ {
140
+ "table": {"font-size": "14px"},
141
+ "caption": {"font-size": "15px"},
142
+ "th": {
143
+ "border-top": "2.5px solid currentColor",
144
+ "border-bottom": "1.5px solid currentColor",
145
+ },
146
+ "td": {"padding": "0.45em 0.9em"},
147
+ },
148
+ docx_overrides={"font_name": "Calibri", "font_size": 10, "header_bottom_border": True},
149
+ )
150
+
151
+ _COMPACT = _override(
152
+ _DEFAULT,
153
+ "compact",
154
+ {
155
+ "table": {"font-size": "13px"},
156
+ "th": {"padding": "0.35em 0.6em"},
157
+ "td": {"padding": "0.25em 0.6em"},
158
+ },
159
+ docx_overrides={"font_size": 9},
160
+ )
161
+
162
+ _JAMA = _override(
163
+ _DEFAULT,
164
+ "jama",
165
+ {
166
+ "table": {"font-family": '"Times New Roman", Times, serif', "font-size": "13.5px"},
167
+ "caption": {
168
+ "font-family": '"Times New Roman", Times, serif',
169
+ "font-weight": "700",
170
+ "font-size": "15px",
171
+ },
172
+ "th": {
173
+ "border-top": "2.5px solid currentColor",
174
+ "border-bottom": "1.5px solid currentColor",
175
+ "background": "transparent",
176
+ },
177
+ # JAMA-style: no internal row separators; strong bottom rule only.
178
+ "td": {"border-bottom": "none"},
179
+ "tr:last-child td": {"border-bottom": "2px solid currentColor"},
180
+ "tfoot td": {"font-family": '"Times New Roman", Times, serif'},
181
+ },
182
+ docx_overrides={"font_name": "Times New Roman", "font_size": 10, "outer_border": True},
183
+ )
184
+
185
+ _NEJM = _override(
186
+ _DEFAULT,
187
+ "nejm",
188
+ {
189
+ "table": {"font-family": '"Georgia", "Times New Roman", serif', "font-size": "13.5px"},
190
+ "th": {
191
+ "border-top": "2.5px solid currentColor",
192
+ "border-bottom": "1.25px solid currentColor",
193
+ "background": "transparent",
194
+ },
195
+ "td": {"border-bottom": "none", "padding": "0.35em 0.85em"},
196
+ "tr:last-child td": {"border-bottom": "2px solid currentColor"},
197
+ },
198
+ docx_overrides={"font_name": "Georgia", "font_size": 10, "outer_border": True},
199
+ )
200
+
201
+ _MINIMAL = _override(
202
+ _DEFAULT,
203
+ "minimal",
204
+ {
205
+ "th": {
206
+ "border-top": "none",
207
+ "border-bottom": "1.25px solid currentColor",
208
+ "background": "transparent",
209
+ },
210
+ "td": {"border-bottom": "none"},
211
+ "tr:last-child td": {"border-bottom": "1.25px solid currentColor"},
212
+ },
213
+ docx_overrides={"header_bottom_border": True, "outer_border": False},
214
+ )
215
+
216
+
217
+ _THEMES: dict[str, Theme] = {
218
+ "default": _DEFAULT,
219
+ "clinical": _CLINICAL,
220
+ "compact": _COMPACT,
221
+ "jama": _JAMA,
222
+ "nejm": _NEJM,
223
+ "minimal": _MINIMAL,
224
+ }
225
+
226
+
227
+ def resolve_theme(name: str) -> Theme:
228
+ """Resolve a theme name to a :class:`Theme`. Raises ``ValueError`` if unknown."""
229
+ try:
230
+ return _THEMES[name]
231
+ except KeyError as exc:
232
+ available = ", ".join(sorted(_THEMES))
233
+ raise ValueError(f"Unknown theme {name!r}. Available themes: {available}") from exc
234
+
235
+
236
+ _BUILTIN_THEME_NAMES = frozenset(
237
+ {"default", "clinical", "compact", "jama", "nejm", "minimal"}
238
+ )
239
+
240
+
241
+ def register_theme(theme: Theme, *, overwrite: bool = False) -> None:
242
+ """Register a user-defined theme.
243
+
244
+ By default this refuses to overwrite a built-in theme; pass
245
+ ``overwrite=True`` to force it. Overwriting an existing user theme
246
+ is allowed without the flag — the guard exists only to keep
247
+ ``ps.tbl_one(...).theme('clinical')`` from silently rendering with a
248
+ user replacement that doesn't match what the documentation says.
249
+ """
250
+ if theme.name in _BUILTIN_THEME_NAMES and not overwrite:
251
+ raise ValueError(
252
+ f"Theme {theme.name!r} is a built-in. "
253
+ "Pass overwrite=True to replace it, or pick a different name."
254
+ )
255
+ _THEMES[theme.name] = theme
256
+
257
+
258
+ def available_themes() -> list[str]:
259
+ """Return a sorted list of every registered theme name.
260
+
261
+ Includes both the six built-in themes (``default``, ``clinical``,
262
+ ``jama``, ``nejm``, ``compact``, ``minimal``) and any user themes
263
+ added via :func:`register_theme`. Apply a theme with
264
+ :meth:`~pysofra.SofraTable.theme`.
265
+
266
+ Examples
267
+ --------
268
+ >>> import pysofra as ps
269
+ >>> ps.available_themes()
270
+ ['clinical', 'compact', 'default', 'jama', 'minimal', 'nejm']
271
+ """
272
+ return sorted(_THEMES)