pysofra 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. pysofra/__init__.py +82 -0
  2. pysofra/core/__init__.py +14 -0
  3. pysofra/core/compose.py +167 -0
  4. pysofra/core/format.py +155 -0
  5. pysofra/core/frames.py +69 -0
  6. pysofra/core/schema.py +128 -0
  7. pysofra/core/table.py +924 -0
  8. pysofra/io/__init__.py +1 -0
  9. pysofra/models/__init__.py +6 -0
  10. pysofra/models/extract.py +249 -0
  11. pysofra/models/pool.py +119 -0
  12. pysofra/models/regression.py +507 -0
  13. pysofra/models/survival.py +395 -0
  14. pysofra/models/uvregression.py +438 -0
  15. pysofra/notebook/__init__.py +6 -0
  16. pysofra/plot/__init__.py +23 -0
  17. pysofra/plot/_backend.py +32 -0
  18. pysofra/plot/forest.py +159 -0
  19. pysofra/plot/inline.py +171 -0
  20. pysofra/plot/km.py +249 -0
  21. pysofra/render/__init__.py +28 -0
  22. pysofra/render/_zip_determinism.py +57 -0
  23. pysofra/render/base.py +22 -0
  24. pysofra/render/docx.py +286 -0
  25. pysofra/render/html.py +442 -0
  26. pysofra/render/image.py +130 -0
  27. pysofra/render/latex.py +253 -0
  28. pysofra/render/markdown.py +128 -0
  29. pysofra/render/pptx.py +340 -0
  30. pysofra/render/xlsx.py +226 -0
  31. pysofra/summary/__init__.py +6 -0
  32. pysofra/summary/calibrate.py +214 -0
  33. pysofra/summary/design.py +246 -0
  34. pysofra/summary/effect_size.py +187 -0
  35. pysofra/summary/extras.py +745 -0
  36. pysofra/summary/smd.py +133 -0
  37. pysofra/summary/stats.py +135 -0
  38. pysofra/summary/tbl_cross.py +339 -0
  39. pysofra/summary/tbl_one.py +1220 -0
  40. pysofra/summary/tbl_summary.py +51 -0
  41. pysofra/summary/tests.py +370 -0
  42. pysofra/summary/typing.py +129 -0
  43. pysofra/summary/weights.py +161 -0
  44. pysofra/themes/__init__.py +5 -0
  45. pysofra/themes/registry.py +272 -0
  46. pysofra-0.1.0a1.dist-info/METADATA +301 -0
  47. pysofra-0.1.0a1.dist-info/RECORD +50 -0
  48. pysofra-0.1.0a1.dist-info/WHEEL +4 -0
  49. pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
  50. pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,438 @@
1
+ """Univariable regression — one model per predictor, stacked side-by-side.
2
+
3
+ Equivalent to R ``gtsummary::tbl_uvregression``. For each predictor,
4
+ a separate regression of ``outcome ~ predictor`` (optionally
5
+ ``+ adjust_for``) is fit; results are stacked vertically into a single
6
+ table.
7
+
8
+ Categorical predictors are dummy-encoded (first level = reference). A
9
+ group-header row is emitted for each multi-level predictor; each
10
+ non-reference level becomes its own indented body row, matching the
11
+ gtsummary layout.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Callable
17
+ from typing import Any
18
+
19
+ import pandas as pd
20
+
21
+ from ..core.frames import to_pandas
22
+ from ..core.schema import Cell, HeaderCell, HeaderRow, Row, make_cell
23
+ from ..core.table import SofraTable, TableSpec
24
+ from .extract import extract
25
+ from .regression import _default_estimate_label
26
+
27
+ # Separator used when dummy-encoding a categorical predictor's columns.
28
+ # Triple underscore makes accidental collisions with real column names
29
+ # vanishingly rare.
30
+ _DUMMY_SEP = "___"
31
+
32
+
33
+ def _is_continuous(col: pd.Series) -> bool:
34
+ """A predictor is treated as continuous iff its dtype is numeric
35
+ and *not* boolean (booleans are dichotomous)."""
36
+ return (
37
+ pd.api.types.is_numeric_dtype(col)
38
+ and not pd.api.types.is_bool_dtype(col)
39
+ )
40
+
41
+
42
+ def _expand_predictor(
43
+ sub: pd.DataFrame, pred: str,
44
+ ) -> tuple[pd.DataFrame, list[tuple[str | None, str, bool]]]:
45
+ """Return ``(design_frame, level_specs)`` for one predictor.
46
+
47
+ For a numeric predictor this is the identity: one design column,
48
+ one output row, no reference level.
49
+
50
+ For a categorical predictor we drop the first level (the reference)
51
+ and dummy-encode the rest. The returned ``level_specs`` is a list
52
+ of ``(design_column_name_or_None, display_label, is_reference)``
53
+ tuples — one tuple per *displayed* row, ordered top-to-bottom.
54
+ """
55
+ col = sub[pred]
56
+ if _is_continuous(col):
57
+ return pd.DataFrame({pred: col}), [(pred, pred, False)]
58
+
59
+ # Categorical / boolean — enumerate levels.
60
+ if isinstance(col.dtype, pd.CategoricalDtype):
61
+ levels: list[Any] = list(col.cat.categories)
62
+ elif pd.api.types.is_bool_dtype(col):
63
+ levels = [False, True]
64
+ else:
65
+ levels = sorted(col.dropna().unique(), key=str)
66
+
67
+ if len(levels) < 2:
68
+ # Single-level → nothing to fit.
69
+ empty = pd.DataFrame(index=sub.index)
70
+ return empty, []
71
+
72
+ ref = levels[0]
73
+ dummies = pd.get_dummies(col, prefix=pred, prefix_sep=_DUMMY_SEP, dtype=float)
74
+ ref_col = f"{pred}{_DUMMY_SEP}{ref}"
75
+ if ref_col in dummies.columns:
76
+ dummies = dummies.drop(columns=[ref_col])
77
+ # Drop unused levels — pd.Categorical creates a dummy column for
78
+ # every declared category, even if no observation belongs to it.
79
+ # An all-zero column is collinear with the intercept and breaks the
80
+ # fit; remove it so the reference set excludes phantom levels.
81
+ zero_var = [c for c in dummies.columns if dummies[c].sum() == 0]
82
+ if zero_var:
83
+ dummies = dummies.drop(columns=zero_var)
84
+
85
+ # Order rows: reference first (label only, no fit), then each
86
+ # non-reference level. Boolean columns get nicer display labels.
87
+ def _fmt_level(x: Any) -> str:
88
+ if isinstance(x, bool):
89
+ return "Yes" if x else "No"
90
+ return str(x)
91
+
92
+ level_specs: list[tuple[str | None, str, bool]] = [
93
+ (None, _fmt_level(ref), True)
94
+ ]
95
+ for lvl in levels[1:]:
96
+ cname = f"{pred}{_DUMMY_SEP}{lvl}"
97
+ if cname not in dummies.columns:
98
+ # Level present in `levels` but every observation in `sub`
99
+ # was a different level (categorical with unused categories).
100
+ # Skip it.
101
+ continue
102
+ level_specs.append((cname, _fmt_level(lvl), False))
103
+
104
+ return dummies, level_specs
105
+
106
+
107
+ def tbl_uvregression(
108
+ data: Any,
109
+ *,
110
+ outcome: str,
111
+ predictors: list[str] | None = None,
112
+ method: Callable[..., Any] | str = "OLS",
113
+ method_kwargs: dict[str, Any] | None = None,
114
+ adjust_for: list[str] | None = None,
115
+ exponentiate: bool | None = None,
116
+ conf_level: float = 0.95,
117
+ digits: int = 2,
118
+ labels: dict[str, str] | None = None,
119
+ ) -> SofraTable:
120
+ """Univariable regression — one model per predictor.
121
+
122
+ Parameters
123
+ ----------
124
+ data
125
+ Source dataframe (pandas or polars).
126
+ outcome
127
+ Column name of the response variable.
128
+ predictors
129
+ Predictor columns. Defaults to every column except ``outcome``
130
+ and any ``adjust_for`` covariates (numeric *and* categorical).
131
+ method
132
+ Either a callable that takes ``(y, X)`` and returns a fitted
133
+ statsmodels-style results object, or one of the string aliases
134
+ ``"OLS"``, ``"Logit"``, ``"Poisson"``, ``"GLM"``.
135
+ method_kwargs
136
+ Extra keyword arguments forwarded to the model class.
137
+ adjust_for
138
+ Optional list of covariates included in every univariable fit
139
+ (matching ``gtsummary``'s ``include`` argument). Adjustment
140
+ covariates are themselves dummy-encoded if categorical.
141
+ exponentiate
142
+ If ``True``, exponentiate point estimates and CI bounds.
143
+ ``None`` (default) auto-selects based on the model family.
144
+ conf_level
145
+ Confidence level for the CI column.
146
+ digits
147
+ Decimal places for estimates and CI bounds.
148
+ labels
149
+ Mapping from predictor name → display label. Applied to the
150
+ group-header row for categorical predictors.
151
+
152
+ Notes
153
+ -----
154
+ For a categorical predictor with K levels the result has
155
+ ``K`` rows: a header naming the variable, plus ``K`` indented
156
+ rows (the reference level rendered as ``— ref``, and one row
157
+ per non-reference level with its estimate / CI / p-value).
158
+ """
159
+ try:
160
+ import statsmodels.api as sm
161
+ except ImportError as e: # pragma: no cover
162
+ raise ImportError(
163
+ "tbl_uvregression requires statsmodels. "
164
+ "Install with `pip install statsmodels`."
165
+ ) from e
166
+
167
+ df = to_pandas(data)
168
+ if outcome not in df.columns:
169
+ raise KeyError(f"outcome column {outcome!r} not in data")
170
+ df = df.dropna(subset=[outcome])
171
+
172
+ adjust_for = list(adjust_for or [])
173
+ if predictors is None:
174
+ excluded = {outcome, *adjust_for}
175
+ predictors = [c for c in df.columns if c not in excluded]
176
+
177
+ # Predictor / adjust_for overlap doesn't make sense ("regress y on x
178
+ # adjusted for x") and would also break design-matrix assembly
179
+ # because pandas returns a DataFrame, not a Series, when a column
180
+ # name is duplicated in a slice.
181
+ overlap = sorted(set(predictors) & set(adjust_for))
182
+ if overlap:
183
+ raise ValueError(
184
+ f"Predictor(s) {overlap} also appear in adjust_for; remove "
185
+ f"from one of the two."
186
+ )
187
+ if outcome in predictors:
188
+ raise ValueError(
189
+ f"outcome {outcome!r} must not appear in predictors."
190
+ )
191
+ if outcome in adjust_for:
192
+ raise ValueError(
193
+ f"outcome {outcome!r} must not appear in adjust_for."
194
+ )
195
+
196
+ labels = dict(labels or {})
197
+ method_kwargs = dict(method_kwargs or {})
198
+
199
+ model_factory: Callable[..., Any]
200
+ if callable(method):
201
+ model_factory = method
202
+ elif method == "OLS":
203
+ model_factory = sm.OLS
204
+ elif method == "Logit":
205
+ model_factory = sm.Logit
206
+ elif method == "Poisson":
207
+ model_factory = sm.Poisson
208
+ elif method == "GLM":
209
+ model_factory = sm.GLM
210
+ else:
211
+ raise ValueError(
212
+ f"Unknown method {method!r}; pass a callable or one of "
213
+ "'OLS', 'Logit', 'Poisson', 'GLM'."
214
+ )
215
+
216
+ # Pre-expand the adjust_for block once — it's shared across rows.
217
+ if adjust_for:
218
+ sub_adjust = df[adjust_for]
219
+ adjust_design_blocks: list[pd.DataFrame] = []
220
+ for a in adjust_for:
221
+ adj_design, _ = _expand_predictor(sub_adjust, a)
222
+ adjust_design_blocks.append(adj_design)
223
+ adjust_block_template = (
224
+ pd.concat(adjust_design_blocks, axis=1)
225
+ if adjust_design_blocks else pd.DataFrame(index=df.index)
226
+ )
227
+ else:
228
+ adjust_block_template = pd.DataFrame(index=df.index)
229
+
230
+ # ------------------------------------------------------------------
231
+ # Fit one regression per predictor.
232
+ # ------------------------------------------------------------------
233
+ rows: list[Row] = []
234
+ families: set[str] = set()
235
+ exp_per: list[bool] = []
236
+ failed: list[str] = []
237
+
238
+ for pred in predictors:
239
+ # Build the working sub-frame: outcome + adjust_for + this predictor.
240
+ cols_needed = [outcome, pred, *adjust_for]
241
+ sub = df[cols_needed].dropna()
242
+ if sub.empty:
243
+ failed.append(pred)
244
+ continue
245
+
246
+ pred_design, level_specs = _expand_predictor(sub, pred)
247
+ if pred_design.empty or not level_specs:
248
+ failed.append(pred)
249
+ continue
250
+
251
+ adjust_block = (
252
+ adjust_block_template.loc[sub.index]
253
+ if not adjust_block_template.empty else None
254
+ )
255
+ # Stitch predictor + adjust into a single design matrix.
256
+ if adjust_block is not None and not adjust_block.empty:
257
+ design_X = pd.concat([pred_design, adjust_block], axis=1)
258
+ else:
259
+ design_X = pred_design.copy()
260
+ X = sm.add_constant(design_X)
261
+
262
+ try:
263
+ fit = model_factory(sub[outcome], X, **method_kwargs).fit(disp=False)
264
+ except Exception:
265
+ failed.append(pred)
266
+ continue
267
+
268
+ summary = extract(fit, conf_level=conf_level)
269
+ families.add(summary.family)
270
+ exp = summary.natural_exponentiate if exponentiate is None else bool(exponentiate)
271
+ exp_per.append(exp)
272
+
273
+ n_sub = int(len(sub))
274
+ display_label = labels.get(pred, pred)
275
+ n_levels = len(level_specs)
276
+ is_categorical_predictor = n_levels > 1 or (
277
+ n_levels == 1 and level_specs[0][2] # reference-only edge case
278
+ )
279
+
280
+ if not is_categorical_predictor:
281
+ # Numeric / continuous → single row, no header.
282
+ design_col = level_specs[0][0]
283
+ assert design_col is not None
284
+ if design_col not in summary.estimates.index:
285
+ failed.append(pred)
286
+ continue
287
+ rows.append(_one_predictor_row(
288
+ design_col, summary, exp=exp, digits=digits,
289
+ label=display_label, n=n_sub, indent=0,
290
+ source_name=pred,
291
+ ))
292
+ continue
293
+
294
+ # Multi-level categorical → group header + one row per level.
295
+ rows.append(_group_header_row(
296
+ display_label, source_name=pred, n=n_sub, n_cols=5,
297
+ ))
298
+ for design_col, lvl_label, is_ref in level_specs:
299
+ if is_ref:
300
+ rows.append(_reference_row(lvl_label, source_name=pred))
301
+ continue
302
+ if design_col is None or design_col not in summary.estimates.index: # pragma: no cover — would require a singular fit dropping a non-ref column
303
+ failed.append(f"{pred}={lvl_label}")
304
+ continue
305
+ # Count the level by summing the dummy column (avoids
306
+ # string-vs-bool comparison pitfalls when reversing the
307
+ # mangle).
308
+ level_n = int(pred_design[design_col].sum())
309
+ rows.append(_one_predictor_row(
310
+ design_col, summary, exp=exp, digits=digits,
311
+ label=lvl_label, n=level_n,
312
+ indent=1, source_name=pred,
313
+ ))
314
+
315
+ if not rows and not failed:
316
+ raise ValueError("No predictors produced a coefficient.")
317
+
318
+ # Estimate label uses the first family / first exponentiate setting.
319
+ family_label = next(iter(families)) if families else "?"
320
+ est_label = _default_estimate_label(family_label, exp_per[0] if exp_per else False)
321
+
322
+ headers = (HeaderRow(cells=(
323
+ HeaderCell(text="Predictor", align="left"),
324
+ HeaderCell(text="N"),
325
+ HeaderCell(text=est_label),
326
+ HeaderCell(text=f"{int(round(conf_level * 100))}% CI"),
327
+ HeaderCell(text="p-value"),
328
+ )),)
329
+
330
+ footnotes: list[str] = []
331
+ if adjust_for:
332
+ footnotes.append("Each variable's coefficient is adjusted for: "
333
+ f"{', '.join(adjust_for)}.")
334
+ else:
335
+ footnotes.append(
336
+ "Each row is a univariable regression of the outcome on the "
337
+ "named predictor."
338
+ )
339
+ if any(exp_per):
340
+ footnotes.append(
341
+ f"{est_label} = exponentiated coefficient; "
342
+ f"CI = {int(round(conf_level * 100))}% confidence interval."
343
+ )
344
+ else:
345
+ footnotes.append(f"CI = {int(round(conf_level * 100))}% confidence interval.")
346
+ if families:
347
+ footnotes.append(f"Model: {next(iter(families))}.")
348
+ if failed:
349
+ footnotes.append(
350
+ f"{len(failed)} predictor(s) / level(s) failed to converge or "
351
+ f"had no data: {', '.join(failed)}."
352
+ )
353
+
354
+ spec = TableSpec(
355
+ builder="tbl_uvregression",
356
+ options={
357
+ "outcome": outcome,
358
+ "predictors": tuple(predictors),
359
+ "method": method if isinstance(method, str) else method.__name__,
360
+ "exponentiate": exponentiate,
361
+ "conf_level": conf_level,
362
+ "digits": digits,
363
+ },
364
+ )
365
+ return SofraTable(
366
+ rows=tuple(rows),
367
+ headers=headers,
368
+ footnotes=tuple(footnotes),
369
+ metadata={
370
+ "builder": "tbl_uvregression",
371
+ "family": next(iter(families), None),
372
+ "failed": failed,
373
+ },
374
+ _spec=spec,
375
+ )
376
+
377
+
378
+ def _group_header_row(label: str, *, source_name: str, n: int, n_cols: int) -> Row:
379
+ """Bold predictor-name row introducing a categorical predictor's levels."""
380
+ cells = [make_cell(label, align="left", bold=True)]
381
+ cells.append(make_cell(str(n), value=n, kind="numeric", align="right"))
382
+ for _ in range(n_cols - 2):
383
+ cells.append(Cell(text="", value=None))
384
+ return Row(cells=tuple(cells), is_group_header=True,
385
+ metadata={"variable": source_name})
386
+
387
+
388
+ def _reference_row(level_label: str, *, source_name: str) -> Row:
389
+ """The reference level — no estimate, marked '— ref'."""
390
+ return Row(cells=(
391
+ make_cell(level_label, align="left", indent=1),
392
+ Cell(text="", value=None),
393
+ make_cell("— ref", value=None, kind="numeric", align="right"),
394
+ Cell(text="", value=None, kind="ci"),
395
+ Cell(text="", value=None, kind="p_value"),
396
+ ), metadata={"variable": source_name})
397
+
398
+
399
+ def _one_predictor_row(
400
+ design_col: str,
401
+ summary: Any, # ModelSummary
402
+ *,
403
+ exp: bool,
404
+ digits: int,
405
+ label: str,
406
+ n: int,
407
+ indent: int = 0,
408
+ source_name: str | None = None,
409
+ ) -> Row:
410
+ from math import exp as _exp
411
+
412
+ from ..core.format import fmt_number, fmt_p_value
413
+
414
+ est = float(summary.estimates[design_col])
415
+ lo = float(summary.ci_lo[design_col]) if design_col in summary.ci_lo.index else float("nan")
416
+ hi = float(summary.ci_hi[design_col]) if design_col in summary.ci_hi.index else float("nan")
417
+ p = float(summary.pvalues[design_col]) if design_col in summary.pvalues.index else float("nan")
418
+
419
+ def _safe_exp(x: float) -> float:
420
+ try:
421
+ return _exp(x)
422
+ except OverflowError:
423
+ return float("inf") if x > 0 else 0.0
424
+
425
+ if exp:
426
+ est_d, lo_d, hi_d = _safe_exp(est), _safe_exp(lo), _safe_exp(hi)
427
+ else:
428
+ est_d, lo_d, hi_d = est, lo, hi
429
+
430
+ return Row(cells=(
431
+ make_cell(label, align="left", indent=indent),
432
+ make_cell(str(n), value=n, kind="numeric", align="right"),
433
+ make_cell(fmt_number(est_d, digits), value=est_d,
434
+ kind="numeric", align="right"),
435
+ make_cell(f"{fmt_number(lo_d, digits)}, {fmt_number(hi_d, digits)}",
436
+ value=(lo_d, hi_d), kind="ci", align="right"),
437
+ make_cell(fmt_p_value(p), value=p, kind="p_value", align="right"),
438
+ ), metadata={"variable": source_name} if source_name else {})
@@ -0,0 +1,6 @@
1
+ """Notebook helpers.
2
+
3
+ Most of the notebook integration lives on :class:`SofraTable._repr_html_`.
4
+ This module is reserved for future notebook-specific extensions (e.g.
5
+ ipywidgets-based controls).
6
+ """
@@ -0,0 +1,23 @@
1
+ """Matplotlib-backed plot generation for SofraTables.
2
+
3
+ Plot helpers return :class:`InlinePlot` objects carrying SVG, PNG, and
4
+ PDF serialisations of the same matplotlib figure so every renderer
5
+ (HTML, DOCX, PPTX, LaTeX) can embed the plot consistently.
6
+
7
+ * :func:`forest_plot` — point estimates + CIs from a regression table.
8
+ * :func:`km_curve` — Kaplan–Meier survival curves.
9
+
10
+ Both depend on ``matplotlib``, which is an optional dependency.
11
+ """
12
+
13
+ from .forest import forest_plot, forest_plot_svg
14
+ from .inline import InlinePlot
15
+ from .km import km_curve, km_curve_svg
16
+
17
+ __all__ = [
18
+ "InlinePlot",
19
+ "forest_plot",
20
+ "forest_plot_svg",
21
+ "km_curve",
22
+ "km_curve_svg",
23
+ ]
@@ -0,0 +1,32 @@
1
+ """Headless-safe matplotlib backend setup.
2
+
3
+ PySofra never opens a window — every figure is serialised to bytes
4
+ (PNG, SVG, PDF). We therefore force matplotlib's ``Agg`` backend
5
+ before pyplot first creates a figure. Without this, the default
6
+ backend on macOS is ``MacOSX`` which calls into the GUI subsystem;
7
+ in sandboxed environments (HOME=/nonexistent, no display, container
8
+ without X) this aborts with a Cocoa error during figure creation.
9
+
10
+ This helper is idempotent: ``matplotlib.use("Agg", force=True)`` is
11
+ a cheap dictionary update once the backend is loaded. Calling it
12
+ from every plot entry point is the simplest way to guarantee Agg
13
+ is in effect regardless of import order.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+
19
+ def use_headless_backend() -> None:
20
+ """Force matplotlib's Agg backend in this process.
21
+
22
+ Called at the top of every render/plot entry point in PySofra,
23
+ immediately before ``import matplotlib.pyplot as plt``. Safe to
24
+ call repeatedly. No-op if matplotlib is not installed (the caller
25
+ handles the optional-dependency import error in its own
26
+ try/except).
27
+ """
28
+ try:
29
+ import matplotlib
30
+ except ImportError: # pragma: no cover — caller raises a friendlier error
31
+ return
32
+ matplotlib.use("Agg", force=True)
pysofra/plot/forest.py ADDED
@@ -0,0 +1,159 @@
1
+ """Forest plot rendering for regression SofraTables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from typing import Any
7
+
8
+ from ..core.table import SofraTable
9
+ from .inline import InlinePlot, fig_to_svg, render_inline_plot
10
+
11
+
12
+ def forest_plot(
13
+ table: SofraTable,
14
+ *,
15
+ log_x: bool = True,
16
+ null_line: float = 1.0,
17
+ width_in: float = 6.5,
18
+ height_per_row_in: float = 0.42,
19
+ color: str = "#0b3d91",
20
+ ) -> InlinePlot:
21
+ """Render a forest plot as an :class:`InlinePlot` (SVG + PNG + PDF).
22
+
23
+ Use this when you want the plot embedded across multiple renderers
24
+ (HTML, DOCX, PPTX, LaTeX). For the HTML-only string form, use
25
+ :func:`forest_plot_svg`.
26
+ """
27
+ fig, height = _build_forest_figure(
28
+ table, log_x=log_x, null_line=null_line,
29
+ width_in=width_in, height_per_row_in=height_per_row_in, color=color,
30
+ )
31
+ plot = render_inline_plot(fig, width_in=width_in, height_in=height)
32
+ try:
33
+ import matplotlib.pyplot as plt
34
+ plt.close(fig)
35
+ except ImportError: # pragma: no cover
36
+ pass
37
+ return plot
38
+
39
+
40
+ def forest_plot_svg(
41
+ table: SofraTable,
42
+ *,
43
+ log_x: bool = True,
44
+ null_line: float = 1.0,
45
+ width_in: float = 6.5,
46
+ height_per_row_in: float = 0.42,
47
+ color: str = "#0b3d91",
48
+ ) -> str:
49
+ """Render a forest plot for a regression :class:`SofraTable`.
50
+
51
+ Reads point estimates and CI bounds from the body rows: it looks for
52
+ a numeric cell of kind ``numeric`` (the point estimate) followed by
53
+ a cell of kind ``ci`` carrying ``(lo, hi)`` tuples — exactly the
54
+ layout produced by :func:`pysofra.tbl_regression`.
55
+
56
+ Parameters
57
+ ----------
58
+ table
59
+ A SofraTable produced by ``tbl_regression`` (single- or multi-model).
60
+ log_x
61
+ Plot on a log-scale x-axis. Default ``True`` because the natural
62
+ scale for ORs / HRs / IRRs is multiplicative.
63
+ null_line
64
+ x-coordinate of the null reference (1 for exponentiated, 0 for raw).
65
+ width_in
66
+ Figure width in inches.
67
+ height_per_row_in
68
+ Vertical space per coefficient row.
69
+ color
70
+ Hex string for the point + CI segments.
71
+ """
72
+ fig, _ = _build_forest_figure(
73
+ table, log_x=log_x, null_line=null_line,
74
+ width_in=width_in, height_per_row_in=height_per_row_in, color=color,
75
+ )
76
+ svg = fig_to_svg(fig)
77
+ try:
78
+ import matplotlib.pyplot as plt
79
+ plt.close(fig)
80
+ except ImportError: # pragma: no cover
81
+ pass
82
+ return svg
83
+
84
+
85
+ def _build_forest_figure(
86
+ table: SofraTable,
87
+ *,
88
+ log_x: bool,
89
+ null_line: float,
90
+ width_in: float,
91
+ height_per_row_in: float,
92
+ color: str,
93
+ ) -> tuple[Any, float]:
94
+ try:
95
+ from ._backend import use_headless_backend
96
+ use_headless_backend()
97
+ import matplotlib.pyplot as plt
98
+ except ImportError as e: # pragma: no cover
99
+ raise ImportError(
100
+ "Forest plots require matplotlib. Install with "
101
+ "`pip install matplotlib`."
102
+ ) from e
103
+
104
+ points: list[tuple[str, float, float, float]] = []
105
+ for r in table.rows:
106
+ label = r.cells[0].text
107
+ est = next((c for c in r.cells if c.kind == "numeric"
108
+ and isinstance(c.value, (int, float))), None)
109
+ ci = next((c for c in r.cells if c.kind == "ci"
110
+ and isinstance(c.value, tuple) and len(c.value) == 2), None)
111
+ if est is None or ci is None:
112
+ continue
113
+ lo, hi = ci.value
114
+ if any(_isnan(x) for x in (est.value, lo, hi)):
115
+ continue
116
+ points.append((label, float(est.value), float(lo), float(hi)))
117
+
118
+ if not points:
119
+ raise ValueError("No (estimate, CI) pairs found in table; "
120
+ "is this a regression table?")
121
+
122
+ n = len(points)
123
+ height = max(2.0, height_per_row_in * n + 1.0)
124
+ fig, ax = plt.subplots(figsize=(width_in, height))
125
+
126
+ labels = [p[0] for p in points]
127
+ estimates = [p[1] for p in points]
128
+ lows = [p[2] for p in points]
129
+ highs = [p[3] for p in points]
130
+
131
+ ys = list(range(n, 0, -1))
132
+ ax.errorbar(
133
+ estimates, ys,
134
+ xerr=[[e - lo for e, lo in zip(estimates, lows, strict=True)],
135
+ [hi - e for e, hi in zip(estimates, highs, strict=True)]],
136
+ fmt="s", color=color, ecolor=color,
137
+ elinewidth=1.5, capsize=4, markersize=7,
138
+ )
139
+
140
+ ax.axvline(null_line, color="#888", linewidth=1, linestyle="--", zorder=0)
141
+ if log_x:
142
+ ax.set_xscale("log")
143
+ ax.set_yticks(ys)
144
+ ax.set_yticklabels(labels)
145
+ ax.set_ylim(0.5, n + 0.5)
146
+ ax.tick_params(axis="y", left=False)
147
+ ax.spines["top"].set_visible(False)
148
+ ax.spines["right"].set_visible(False)
149
+ ax.spines["left"].set_visible(False)
150
+ ax.set_xlabel(table.headers[0].cells[1].text if table.headers else "Estimate")
151
+
152
+ return fig, height
153
+
154
+
155
+ def _isnan(x: Any) -> bool:
156
+ try:
157
+ return math.isnan(float(x))
158
+ except (TypeError, ValueError):
159
+ return False