pysofra 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. pysofra/__init__.py +82 -0
  2. pysofra/core/__init__.py +14 -0
  3. pysofra/core/compose.py +167 -0
  4. pysofra/core/format.py +155 -0
  5. pysofra/core/frames.py +69 -0
  6. pysofra/core/schema.py +128 -0
  7. pysofra/core/table.py +924 -0
  8. pysofra/io/__init__.py +1 -0
  9. pysofra/models/__init__.py +6 -0
  10. pysofra/models/extract.py +249 -0
  11. pysofra/models/pool.py +119 -0
  12. pysofra/models/regression.py +507 -0
  13. pysofra/models/survival.py +395 -0
  14. pysofra/models/uvregression.py +438 -0
  15. pysofra/notebook/__init__.py +6 -0
  16. pysofra/plot/__init__.py +23 -0
  17. pysofra/plot/_backend.py +32 -0
  18. pysofra/plot/forest.py +159 -0
  19. pysofra/plot/inline.py +171 -0
  20. pysofra/plot/km.py +249 -0
  21. pysofra/render/__init__.py +28 -0
  22. pysofra/render/_zip_determinism.py +57 -0
  23. pysofra/render/base.py +22 -0
  24. pysofra/render/docx.py +286 -0
  25. pysofra/render/html.py +442 -0
  26. pysofra/render/image.py +130 -0
  27. pysofra/render/latex.py +253 -0
  28. pysofra/render/markdown.py +128 -0
  29. pysofra/render/pptx.py +340 -0
  30. pysofra/render/xlsx.py +226 -0
  31. pysofra/summary/__init__.py +6 -0
  32. pysofra/summary/calibrate.py +214 -0
  33. pysofra/summary/design.py +246 -0
  34. pysofra/summary/effect_size.py +187 -0
  35. pysofra/summary/extras.py +745 -0
  36. pysofra/summary/smd.py +133 -0
  37. pysofra/summary/stats.py +135 -0
  38. pysofra/summary/tbl_cross.py +339 -0
  39. pysofra/summary/tbl_one.py +1220 -0
  40. pysofra/summary/tbl_summary.py +51 -0
  41. pysofra/summary/tests.py +370 -0
  42. pysofra/summary/typing.py +129 -0
  43. pysofra/summary/weights.py +161 -0
  44. pysofra/themes/__init__.py +5 -0
  45. pysofra/themes/registry.py +272 -0
  46. pysofra-0.1.0a1.dist-info/METADATA +301 -0
  47. pysofra-0.1.0a1.dist-info/RECORD +50 -0
  48. pysofra-0.1.0a1.dist-info/WHEEL +4 -0
  49. pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
  50. pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
pysofra/core/table.py ADDED
@@ -0,0 +1,924 @@
1
+ """The core :class:`SofraTable` object.
2
+
3
+ A SofraTable is a backend-agnostic representation of a publication-ready
4
+ statistical table. Every builder (``tbl_one``, ``tbl_summary``,
5
+ ``tbl_regression``) produces a SofraTable; every renderer (HTML, Markdown,
6
+ DOCX) consumes one.
7
+
8
+ The object is immutable: every modifier method (``.theme()``, ``.caption()``,
9
+ ``.add_p()``, ...) returns a *new* SofraTable. This keeps results
10
+ deterministic and notebook-friendly.
11
+
12
+ Some modifier methods (``.add_p``, ``.add_smd``, ``.add_overall``) need to
13
+ recompute statistics from the original data. A SofraTable produced by a
14
+ builder therefore carries a private ``_context`` callable that can rebuild
15
+ the table under an updated spec. Tables that have no recompute context
16
+ (e.g. those produced by merge/stack operations) silently ignore those
17
+ operations or raise, as appropriate.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from collections.abc import Callable
23
+ from dataclasses import dataclass, field, replace
24
+ from pathlib import Path
25
+ from typing import TYPE_CHECKING, Any
26
+
27
+ from .schema import HeaderRow, Row, SpanningHeader
28
+
29
+
30
+ def _row_p_below(row: Row, threshold: float) -> bool:
31
+ """Predicate: row has a numeric p-value cell below ``threshold``."""
32
+ for c in row.cells:
33
+ if c.kind == "p_value" and isinstance(c.value, (int, float)):
34
+ return float(c.value) < threshold
35
+ return False
36
+
37
+ if TYPE_CHECKING:
38
+ pass
39
+
40
+
41
+ RebuildFn = Callable[["TableSpec"], "SofraTable"]
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class TableSpec:
46
+ """Specification carried by builder-produced tables for recomputation.
47
+
48
+ The fields are deliberately generic (a dict) because different builders
49
+ have different knobs; each builder reads only the keys it cares about.
50
+ """
51
+
52
+ builder: str # "tbl_one", "tbl_summary", "tbl_regression", ...
53
+ options: dict[str, Any] = field(default_factory=dict)
54
+
55
+ def updated(self, **changes: Any) -> TableSpec:
56
+ new_opts = dict(self.options)
57
+ new_opts.update(changes)
58
+ return TableSpec(builder=self.builder, options=new_opts)
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class SofraTable:
63
+ """A backend-agnostic publication-ready table.
64
+
65
+ Attributes
66
+ ----------
67
+ rows
68
+ The table body, one :class:`~pysofra.core.schema.Row` per visible line.
69
+ headers
70
+ Column header rows, top-to-bottom. Most tables have a single header
71
+ row; multi-level headers are supported via additional rows.
72
+ spanning_headers
73
+ Optional spanning headers above the column headers.
74
+ caption
75
+ Optional table caption (rendered as a title above the table).
76
+ footnotes
77
+ Tuple of footnote strings rendered below the table.
78
+ theme_name
79
+ Theme key registered in :mod:`pysofra.themes`. Defaults to
80
+ ``"default"``.
81
+ metadata
82
+ Free-form metadata dict carried for downstream consumers and tests
83
+ (e.g. raw p-values, SMDs, model objects).
84
+ """
85
+
86
+ rows: tuple[Row, ...] = ()
87
+ headers: tuple[HeaderRow, ...] = ()
88
+ spanning_headers: tuple[SpanningHeader, ...] = ()
89
+ caption: str | None = None
90
+ footnotes: tuple[str, ...] = ()
91
+ theme_name: str = "default"
92
+ metadata: dict[str, Any] = field(default_factory=dict)
93
+ inline_svg: str | None = None
94
+ inline_svg_position: str = "above" # "above" | "below"
95
+ inline_plot: Any = None # InlinePlot — typed loosely to avoid plot-import cycle
96
+ _spec: TableSpec | None = None
97
+ _rebuild: RebuildFn | None = None
98
+
99
+ # ------------------------------------------------------------------
100
+ # Pickling
101
+ # ------------------------------------------------------------------
102
+ #
103
+ # ``_rebuild`` is a closure produced by the builders (``tbl_one``
104
+ # etc.) that captures the original DataFrame and the spec, so that
105
+ # recomputation modifiers (``add_p``, ``add_smd``, ...) can re-run
106
+ # statistics over the source data. Closures are not pickleable.
107
+ # We strip it from the pickled state and replace it with ``None``
108
+ # on restore — the unpickled object keeps all rendered cell values
109
+ # and can be re-rendered to HTML/Markdown/LaTeX/DOCX/PPTX/XLSX/PNG
110
+ # without issue, but recomputation modifiers (the
111
+ # ``add_p``/``add_smd``/etc. family that requires the captured
112
+ # data) will raise a clear error explaining the limitation. The
113
+ # presentational modifiers (``theme``, ``set_caption``,
114
+ # ``with_footnotes``, ``bold_p``, ``with_forest_plot`` …) keep
115
+ # working because they don't need ``_rebuild``.
116
+
117
+ # ------------------------------------------------------------------
118
+ # Equality
119
+ # ------------------------------------------------------------------
120
+ #
121
+ # The dataclass-generated ``__eq__`` compares every field including
122
+ # ``_rebuild``, which is a closure with per-instance identity. Two
123
+ # tables built from the same data by the same builder therefore
124
+ # tested unequal, and a pickled-then-unpickled table tested unequal
125
+ # to its source (the closure is dropped on pickle).
126
+ #
127
+ # Define equality on the output-affecting fields only: rows,
128
+ # headers, spanning_headers, caption, footnotes, theme_name,
129
+ # inline_svg, inline_svg_position, inline_plot. Skip ``metadata``
130
+ # (free-form internal carry that can hold non-equal-comparable
131
+ # values such as model objects), ``_spec`` (build-time descriptor),
132
+ # and ``_rebuild`` (closure).
133
+ def __eq__(self, other: object) -> bool:
134
+ if other is self:
135
+ return True
136
+ if not isinstance(other, SofraTable):
137
+ return NotImplemented
138
+ return (
139
+ self.rows == other.rows
140
+ and self.headers == other.headers
141
+ and self.spanning_headers == other.spanning_headers
142
+ and self.caption == other.caption
143
+ and self.footnotes == other.footnotes
144
+ and self.theme_name == other.theme_name
145
+ and self.inline_svg == other.inline_svg
146
+ and self.inline_svg_position == other.inline_svg_position
147
+ and self.inline_plot == other.inline_plot
148
+ )
149
+
150
+ # Frozen-dataclass auto-generates a ``__hash__`` based on every
151
+ # field; with our custom ``__eq__`` it would violate the eq-vs-hash
152
+ # contract (and would fail anyway on the ``metadata: dict`` field).
153
+ # Mark explicitly unhashable so the error is a sharp ``TypeError``,
154
+ # not a silent inconsistency between equal-but-different-hash tables.
155
+ __hash__ = None # type: ignore[assignment]
156
+
157
+ def __getstate__(self) -> dict[str, Any]:
158
+ state = {
159
+ "rows": self.rows,
160
+ "headers": self.headers,
161
+ "spanning_headers": self.spanning_headers,
162
+ "caption": self.caption,
163
+ "footnotes": self.footnotes,
164
+ "theme_name": self.theme_name,
165
+ "metadata": self.metadata,
166
+ "inline_svg": self.inline_svg,
167
+ "inline_svg_position": self.inline_svg_position,
168
+ "inline_plot": self.inline_plot,
169
+ "_spec": self._spec,
170
+ "_rebuild": None, # closures are not pickleable; see class docstring
171
+ }
172
+ return state
173
+
174
+ def __setstate__(self, state: dict[str, Any]) -> None:
175
+ # SofraTable is frozen; bypass __setattr__ block.
176
+ for k, v in state.items():
177
+ object.__setattr__(self, k, v)
178
+
179
+ # ------------------------------------------------------------------
180
+ # Composition / styling modifiers
181
+ # ------------------------------------------------------------------
182
+ def theme(self, name: str) -> SofraTable:
183
+ """Apply a theme by name. See :mod:`pysofra.themes` for available themes."""
184
+ from ..themes.registry import resolve_theme
185
+
186
+ resolve_theme(name) # validates
187
+ return replace(self, theme_name=name)
188
+
189
+ def set_caption(self, text: str | None) -> SofraTable:
190
+ """Set the table caption, replacing any existing one.
191
+
192
+ The caption renders above the table in every backend
193
+ (``<caption>`` in HTML, ``\\caption{}`` in LaTeX, a bold
194
+ paragraph above the table in DOCX/PPTX, sheet header in
195
+ XLSX). Pass ``None`` to clear an existing caption.
196
+
197
+ Parameters
198
+ ----------
199
+ text
200
+ The caption string, or ``None`` to clear.
201
+
202
+ Returns
203
+ -------
204
+ SofraTable
205
+ A new SofraTable with the caption set. The original is
206
+ unchanged.
207
+ """
208
+ return replace(self, caption=text)
209
+
210
+ def add_footnote(self, text: str) -> SofraTable:
211
+ """Append a single footnote to the existing footnote list.
212
+
213
+ Footnotes render below the table in every backend, in the
214
+ order they were appended. Builders such as :func:`tbl_one`
215
+ emit footnotes automatically (e.g. "Tests:", "n (%) for
216
+ categorical variables.") describing test choices and
217
+ formatting; user-supplied footnotes appended via this method
218
+ appear *after* the auto-generated ones.
219
+
220
+ Parameters
221
+ ----------
222
+ text
223
+ The footnote text. Plain text; renderers escape special
224
+ characters appropriately for their backend.
225
+
226
+ Returns
227
+ -------
228
+ SofraTable
229
+ A new SofraTable with the footnote appended. To replace
230
+ the footnote list wholesale, use
231
+ :meth:`with_footnotes` instead.
232
+ """
233
+ return replace(self, footnotes=tuple([*self.footnotes, text]))
234
+
235
+ def with_footnotes(self, footnotes: list[str] | tuple[str, ...]) -> SofraTable:
236
+ """Replace the footnote list entirely."""
237
+ return replace(self, footnotes=tuple(footnotes))
238
+
239
+ # ------------------------------------------------------------------
240
+ # Statistical modifiers (require a rebuild context)
241
+ # ------------------------------------------------------------------
242
+ def add_p(self, **overrides: Any) -> SofraTable:
243
+ """Add a p-value column.
244
+
245
+ Behaviour depends on the builder that produced the table. For
246
+ ``tbl_one`` / ``tbl_summary`` this triggers automatic test
247
+ selection per row (see :mod:`pysofra.summary.tests`); for
248
+ ``tbl_regression`` p-values are already present and this is a
249
+ no-op.
250
+ """
251
+ return self._with_option(p_value=True, p_overrides=dict(overrides))
252
+
253
+ def add_smd(self) -> SofraTable:
254
+ """Add a standardized-mean-difference column (Table 1 only)."""
255
+ return self._with_option(smd=True)
256
+
257
+ def add_q(self, method: str = "fdr_bh") -> SofraTable:
258
+ """Add a multiplicity-adjusted q-value column.
259
+
260
+ ``method`` is passed through to
261
+ ``statsmodels.stats.multitest.multipletests``; common choices are
262
+ ``fdr_bh`` (Benjamini–Hochberg, default), ``fdr_by``,
263
+ ``bonferroni``, ``holm``, ``hommel``, ``sidak``. Implicitly
264
+ enables p-values when not already on.
265
+ """
266
+ return self._with_option(p_value=True, q_value=True, q_method=method)
267
+
268
+ # ------------------------------------------------------------------
269
+ # Extras (gtsummary parity)
270
+ # ------------------------------------------------------------------
271
+ def add_significance_stars(
272
+ self,
273
+ *,
274
+ thresholds: tuple[tuple[float, str], ...] = (
275
+ (0.001, "***"),
276
+ (0.01, "**"),
277
+ (0.05, "*"),
278
+ ),
279
+ ) -> SofraTable:
280
+ """Append a ``stars`` column with ``*** / ** / *`` significance markers.
281
+
282
+ ``thresholds`` is a tuple of ``(cutoff, marker)`` pairs sorted
283
+ smallest-cutoff first; each p-value is marked with the first
284
+ marker whose cutoff it falls below.
285
+ """
286
+ from ..summary.extras import add_significance_stars
287
+
288
+ return add_significance_stars(self, thresholds=thresholds)
289
+
290
+ def add_n(self) -> SofraTable:
291
+ """Append a per-row ``N`` column with the non-missing sample size."""
292
+ from ..summary.extras import add_n
293
+
294
+ return add_n(self)
295
+
296
+ def add_stat_label(self) -> SofraTable:
297
+ """Append a ``Statistic`` column describing each row's summary form."""
298
+ from ..summary.extras import add_stat_label
299
+
300
+ return add_stat_label(self)
301
+
302
+ def color_scale_if(
303
+ self,
304
+ *,
305
+ column: int,
306
+ palette: tuple[str, str, str] = ("#fff5f0", "#fcae91", "#cb181d"),
307
+ skip_blank: bool = True,
308
+ ) -> SofraTable:
309
+ """Heatmap-style cell colouring for one numeric column (HTML only)."""
310
+ from ..summary.extras import color_scale_if
311
+
312
+ return color_scale_if(self, column=column, palette=palette,
313
+ skip_blank=skip_blank)
314
+
315
+ def add_global_p(
316
+ self, *, adjust_for: list[str] | tuple[str, ...] | None = None,
317
+ ) -> SofraTable:
318
+ """Add a joint Type-III p-value column.
319
+
320
+ Supported on both :func:`tbl_regression` and
321
+ :func:`tbl_one` / :func:`tbl_summary` tables, via two paths:
322
+
323
+ * **tbl_regression** — for each multi-level categorical
324
+ predictor in the model, the joint Wald-F p-value is
325
+ computed via ``model.f_test`` on the contrast matrix that
326
+ zeroes out every level simultaneously. Single-level
327
+ coefficients receive their existing p-value duplicated.
328
+ * **tbl_one / tbl_summary** — for each variable in the table,
329
+ a logistic regression is fit on the source data:
330
+ ``Logit(by == reference_level ~ variable [+ adjust_for])``.
331
+ The joint Wald p-value across the variable's coefficients is
332
+ the new "global p" cell. Adjustment covariates passed via
333
+ ``adjust_for=`` apply to every variable's fit, giving
334
+ covariate-adjusted joint p-values.
335
+
336
+ Parameters
337
+ ----------
338
+ adjust_for
339
+ (tbl_one / tbl_summary only) Optional list of covariate
340
+ column names to include in each per-variable regression.
341
+ Continuous numeric covariates enter as-is; non-numeric
342
+ covariates are dummy-coded. Ignored on
343
+ :func:`tbl_regression` tables.
344
+
345
+ Raises
346
+ ------
347
+ NotImplementedError
348
+ On composition primitives (``tbl_merge`` / ``tbl_stack``)
349
+ and directly-constructed tables that carry neither a
350
+ fitted ``model`` nor a re-runnable builder spec.
351
+ """
352
+ # tbl_regression path: model is attached in metadata.
353
+ if (self.metadata or {}).get("model") is not None:
354
+ from ..summary.extras import add_global_p
355
+
356
+ return add_global_p(self)
357
+ # tbl_one / tbl_summary path: route through the rebuild spec.
358
+ spec = self._spec
359
+ if spec is not None and spec.builder in ("tbl_one", "tbl_summary"):
360
+ return self._with_option(
361
+ global_p=True,
362
+ global_p_adjust_for=tuple(adjust_for or ()),
363
+ )
364
+ from ..summary.extras import add_global_p
365
+
366
+ # Falls through to the original error path (which differentiates
367
+ # tbl_cross / composition / unpickled origins).
368
+ return add_global_p(self)
369
+
370
+ def add_difference(self, *, digits: int = 2,
371
+ conf_level: float = 0.95) -> SofraTable:
372
+ """Add a between-group difference column (continuous + dichotomous).
373
+
374
+ Requires a 2-group Table 1. Continuous rows get the Welch
375
+ mean-difference + CI; dichotomous rows get the proportion
376
+ difference with Wilson-score-based CI; multi-level categorical
377
+ rows show ``—``.
378
+ """
379
+ from ..summary.extras import add_difference
380
+
381
+ return add_difference(self, digits=digits, conf_level=conf_level)
382
+
383
+ def add_ci(self, *, conf_level: float = 0.95) -> SofraTable:
384
+ """Append a confidence interval to each summary cell.
385
+
386
+ Continuous cells gain ``[lo, hi]`` for the mean; dichotomous
387
+ cells gain ``[lo%, hi%]`` for the proportion (Wilson score).
388
+ """
389
+ from ..summary.extras import add_ci
390
+
391
+ return add_ci(self, conf_level=conf_level)
392
+
393
+ def with_pvalue_fmt(self, fn: Callable[[float], str]) -> SofraTable:
394
+ """Re-format every p-value cell with the supplied callable."""
395
+ from ..summary.extras import with_pvalue_fmt
396
+
397
+ return with_pvalue_fmt(self, fn)
398
+
399
+ def with_estimate_fmt(self, fn: Callable[[float], str]) -> SofraTable:
400
+ """Re-format every numeric estimate cell with the supplied callable."""
401
+ from ..summary.extras import with_estimate_fmt
402
+
403
+ return with_estimate_fmt(self, fn)
404
+
405
+ # ------------------------------------------------------------------
406
+ # Layout hints
407
+ # ------------------------------------------------------------------
408
+ def autofit(self, *, enable: bool = True) -> SofraTable:
409
+ """Hint every renderer to size columns to content.
410
+
411
+ Stored as ``metadata['autofit']``. HTML uses content-based sizing
412
+ natively; XLSX auto-sizes column widths to the widest cell; the
413
+ DOCX renderer sets ``table.autofit = True`` when this flag is on.
414
+ """
415
+ new_md = dict(self.metadata) if self.metadata else {}
416
+ new_md["autofit"] = bool(enable)
417
+ return replace(self, metadata=new_md)
418
+
419
+ # ------------------------------------------------------------------
420
+ # Rich-cell composition
421
+ # ------------------------------------------------------------------
422
+ def compose(
423
+ self,
424
+ row: int | str,
425
+ column: int | str,
426
+ parts: Any,
427
+ ) -> SofraTable:
428
+ """Replace a cell's content with multiple typographically distinct parts.
429
+
430
+ ``parts`` is an iterable of :class:`~pysofra.core.schema.CellPart`
431
+ — each carries its own ``bold`` / ``italic`` / ``superscript`` /
432
+ ``subscript`` / ``color`` / ``link`` flags. Renderers concatenate
433
+ the parts inside the same cell, honouring whichever flags the
434
+ backend supports; the fallback ``text`` is set to the
435
+ concatenated plain text so non-rich backends still print
436
+ something readable.
437
+ """
438
+ from .schema import CellPart as _CellPart
439
+
440
+ if isinstance(row, int):
441
+ r_idx = row
442
+ else:
443
+ r_idx = next(
444
+ (i for i, r in enumerate(self.rows) if r.cells[0].text == row),
445
+ -1,
446
+ )
447
+ if r_idx == -1:
448
+ raise KeyError(f"No row labelled {row!r}")
449
+ if not 0 <= r_idx < len(self.rows):
450
+ raise KeyError(f"row {row!r} out of range")
451
+
452
+ if isinstance(column, int):
453
+ c_idx = column
454
+ else:
455
+ header_cells = self.headers[-1].cells if self.headers else ()
456
+ c_idx = next(
457
+ (j for j, c in enumerate(header_cells)
458
+ if c.text.replace("\n", " ") == column or c.text == column),
459
+ -1,
460
+ )
461
+ if c_idx == -1:
462
+ raise KeyError(f"No column labelled {column!r}")
463
+ if not 0 <= c_idx < len(self.rows[r_idx].cells):
464
+ raise KeyError(f"column {column!r} out of range")
465
+
466
+ parts_tuple = tuple(parts)
467
+ for p in parts_tuple:
468
+ if not isinstance(p, _CellPart):
469
+ raise TypeError(
470
+ f"compose() parts must be CellPart instances; "
471
+ f"got {type(p).__name__}."
472
+ )
473
+
474
+ old_row = self.rows[r_idx]
475
+ new_cells = list(old_row.cells)
476
+ fallback = "".join(p.text for p in parts_tuple)
477
+ new_cells[c_idx] = replace(
478
+ new_cells[c_idx],
479
+ text=fallback,
480
+ parts=parts_tuple,
481
+ )
482
+ new_rows = list(self.rows)
483
+ new_rows[r_idx] = replace(old_row, cells=tuple(new_cells))
484
+ return replace(self, rows=tuple(new_rows))
485
+
486
+ # ------------------------------------------------------------------
487
+ # Spanning headers (manual API)
488
+ # ------------------------------------------------------------------
489
+ def modify_spanning_header(
490
+ self,
491
+ label: str,
492
+ *,
493
+ start: int,
494
+ end: int,
495
+ ) -> SofraTable:
496
+ """Add (or replace at the same range) a spanning header above columns.
497
+
498
+ Columns are 0-indexed and the range is inclusive on both ends.
499
+ Overlapping a previous span removes it.
500
+ """
501
+ from .schema import SpanningHeader
502
+ ncols = (
503
+ len(self.headers[0].cells)
504
+ if self.headers
505
+ else (len(self.rows[0].cells) if self.rows else 1)
506
+ )
507
+ if start < 0 or end >= ncols or start > end:
508
+ raise ValueError(
509
+ f"start={start}, end={end} out of range for {ncols} columns."
510
+ )
511
+ # Drop any existing span that overlaps.
512
+ kept = tuple(
513
+ s for s in self.spanning_headers
514
+ if s.end < start or s.start > end
515
+ )
516
+ new_span = SpanningHeader(label=label, start=start, end=end)
517
+ return replace(self, spanning_headers=kept + (new_span,))
518
+
519
+ # ------------------------------------------------------------------
520
+ # Inline text extraction (in-prose)
521
+ # ------------------------------------------------------------------
522
+ def inline_text(self, *, row: int | str, column: int | str) -> str:
523
+ """Pull the text of a single cell for inline use.
524
+
525
+ ``row`` and ``column`` accept either a 0-indexed integer or a
526
+ string matched against the first cell of each row / each header
527
+ cell text. Raises ``KeyError`` if no match is found.
528
+ """
529
+ # Resolve row index.
530
+ if isinstance(row, int):
531
+ r_idx = row
532
+ else:
533
+ r_idx = next(
534
+ (i for i, r in enumerate(self.rows) if r.cells[0].text == row),
535
+ -1,
536
+ )
537
+ if r_idx == -1:
538
+ raise KeyError(f"No row labelled {row!r}")
539
+ if not 0 <= r_idx < len(self.rows):
540
+ raise KeyError(f"row {row!r} out of range")
541
+
542
+ # Resolve column index.
543
+ if isinstance(column, int):
544
+ c_idx = column
545
+ else:
546
+ header_cells = self.headers[-1].cells if self.headers else ()
547
+ c_idx = next(
548
+ (j for j, c in enumerate(header_cells)
549
+ if c.text.replace("\n", " ") == column or c.text == column),
550
+ -1,
551
+ )
552
+ if c_idx == -1:
553
+ raise KeyError(f"No column labelled {column!r}")
554
+
555
+ cells = self.rows[r_idx].cells
556
+ if not 0 <= c_idx < len(cells):
557
+ raise KeyError(f"column {column!r} out of range")
558
+ return cells[c_idx].text
559
+
560
+ # ------------------------------------------------------------------
561
+ # Raster export (PNG of the table itself)
562
+ # ------------------------------------------------------------------
563
+ def to_image(
564
+ self,
565
+ path: str | Path,
566
+ *,
567
+ scale: float = 2.0,
568
+ dpi: int = 300,
569
+ ) -> Path:
570
+ """Render the table to a PNG image.
571
+
572
+ Uses matplotlib under the hood; the result is a faithful raster
573
+ of the HTML output. Useful for quick previews, Slack attachments,
574
+ and submission figures where reviewers want a visual.
575
+
576
+ ``scale`` multiplies the pixel density (>= 1 recommended);
577
+ ``dpi`` controls the output resolution (defaults to 300, the
578
+ usual print-quality target).
579
+ """
580
+ from ..render.image import write_image
581
+
582
+ return write_image(self, Path(path), scale=scale, dpi=dpi)
583
+
584
+ def add_overall(self, label: str = "Overall") -> SofraTable:
585
+ """Add an overall (unstratified) column."""
586
+ return self._with_option(overall=True, overall_label=label)
587
+
588
+ def bold_p(self, threshold: float = 0.05) -> SofraTable:
589
+ """Bold rows whose p-value cell carries a value below ``threshold``.
590
+
591
+ This is a presentational modifier — it works on any SofraTable
592
+ whose body rows contain a cell of kind ``p_value`` with a numeric
593
+ ``value``.
594
+ """
595
+ threshold = float(threshold)
596
+ return self.bold_if(lambda r: _row_p_below(r, threshold))
597
+
598
+ # ------------------------------------------------------------------
599
+ # Conditional formatting
600
+ # ------------------------------------------------------------------
601
+ def bold_if(self, predicate: Callable[[Row], bool]) -> SofraTable:
602
+ """Bold every cell of rows satisfying ``predicate(row) -> bool``.
603
+
604
+ Example::
605
+
606
+ table.bold_if(lambda r: r.cells[0].text.startswith('age'))
607
+ """
608
+ new_rows: list[Row] = []
609
+ for r in self.rows:
610
+ if predicate(r):
611
+ new_cells = tuple(
612
+ replace(c, bold=True) if c.text else c for c in r.cells
613
+ )
614
+ new_rows.append(replace(r, cells=new_cells))
615
+ else:
616
+ new_rows.append(r)
617
+ return replace(self, rows=tuple(new_rows))
618
+
619
+ def highlight_if(
620
+ self,
621
+ predicate: Callable[[Row], bool],
622
+ *,
623
+ color: str = "#fff3cd",
624
+ ) -> SofraTable:
625
+ """Highlight rows (background colour) satisfying ``predicate``.
626
+
627
+ Adds an ``html_style`` metadata entry consumed by the HTML
628
+ renderer; ignored by Markdown / LaTeX. ``color`` accepts any CSS
629
+ colour string.
630
+ """
631
+ new_rows: list[Row] = []
632
+ for r in self.rows:
633
+ if predicate(r):
634
+ md = dict(r.metadata) if r.metadata else {}
635
+ md["highlight"] = color
636
+ new_rows.append(replace(r, metadata=md))
637
+ else:
638
+ new_rows.append(r)
639
+ return replace(self, rows=tuple(new_rows))
640
+
641
+ def style_if(
642
+ self,
643
+ predicate: Callable[[Row], bool],
644
+ *,
645
+ bold: bool = False,
646
+ italic: bool = False,
647
+ color: str | None = None,
648
+ ) -> SofraTable:
649
+ """General-purpose conditional row styling.
650
+
651
+ Combines :meth:`bold_if`, italic toggling, and an optional row
652
+ background highlight in one call.
653
+ """
654
+ out = self
655
+ if bold:
656
+ out = out.bold_if(predicate)
657
+ if italic:
658
+ new_rows: list[Row] = []
659
+ for r in out.rows:
660
+ if predicate(r):
661
+ new_cells = tuple(
662
+ replace(c, italic=True) if c.text else c for c in r.cells
663
+ )
664
+ new_rows.append(replace(r, cells=new_cells))
665
+ else:
666
+ new_rows.append(r)
667
+ out = replace(out, rows=tuple(new_rows))
668
+ if color is not None:
669
+ out = out.highlight_if(predicate, color=color)
670
+ return out
671
+
672
+ # ------------------------------------------------------------------
673
+ # Export
674
+ # ------------------------------------------------------------------
675
+ def to_html(
676
+ self,
677
+ *,
678
+ sticky_header: bool = False,
679
+ max_height: str | None = None,
680
+ ) -> str:
681
+ """Render the table as a standalone HTML fragment.
682
+
683
+ ``sticky_header=True`` keeps the column headers in view as the
684
+ body scrolls — pair with ``max_height`` (a CSS length like
685
+ ``"60vh"`` or ``"400px"``) to enable the vertical scroll
686
+ container.
687
+ """
688
+ from ..render.html import HtmlRenderer
689
+
690
+ return HtmlRenderer(
691
+ sticky_header=sticky_header,
692
+ max_height=max_height,
693
+ ).render(self)
694
+
695
+ def to_markdown(self) -> str:
696
+ """Render the table as GitHub-flavored Markdown."""
697
+ from ..render.markdown import MarkdownRenderer
698
+
699
+ return MarkdownRenderer().render(self)
700
+
701
+ def to_docx(self, path: str | Path) -> Path:
702
+ """Write the table to a ``.docx`` file. Returns the resolved path."""
703
+ from ..render.docx import DocxRenderer
704
+
705
+ return DocxRenderer().write(self, Path(path))
706
+
707
+ def to_latex(self, *, booktabs: bool = True,
708
+ float_position: str = "ht",
709
+ centering: bool = True) -> str:
710
+ """Render the table as a LaTeX ``table`` float (booktabs by default).
711
+
712
+ Requires ``\\usepackage{booktabs}`` in the consumer document
713
+ preamble. Returns the LaTeX source as a string; write it to a
714
+ ``.tex`` file with :func:`pathlib.Path.write_text` if needed.
715
+
716
+ Inline plots are embedded only when using :meth:`to_latex_file`
717
+ (which writes a sidecar PDF). For a plain LaTeX string call this
718
+ method and ignore any attached plot.
719
+ """
720
+ from ..render.latex import LatexRenderer
721
+
722
+ return LatexRenderer(
723
+ booktabs=booktabs,
724
+ float_position=float_position,
725
+ centering=centering,
726
+ ).render(self)
727
+
728
+ def to_latex_file(self, path: str | Path, *, booktabs: bool = True,
729
+ float_position: str = "ht",
730
+ centering: bool = True) -> Path:
731
+ """Write a ``.tex`` file plus a sidecar PDF for any inline plot.
732
+
733
+ If the table carries an :class:`~pysofra.plot.InlinePlot`, the
734
+ plot is written as ``<stem>_plot.pdf`` next to the ``.tex`` file
735
+ and embedded with ``\\includegraphics``. Requires ``graphicx`` in
736
+ the consuming document preamble.
737
+ """
738
+ from ..render.latex import LatexRenderer
739
+
740
+ result = LatexRenderer(
741
+ booktabs=booktabs,
742
+ float_position=float_position,
743
+ centering=centering,
744
+ ).write(self, Path(path))
745
+ return Path(result)
746
+
747
+ def to_pptx(self, path: str | Path, *,
748
+ slide_title: str | None = None) -> Path:
749
+ """Write the table to a single-slide ``.pptx`` file.
750
+
751
+ Requires the optional ``python-pptx`` dependency
752
+ (``pip install pysofra[pptx]``). If ``slide_title`` is omitted,
753
+ the table's caption is used.
754
+ """
755
+ from ..render.pptx import PptxRenderer
756
+
757
+ return PptxRenderer(slide_title=slide_title).write(self, Path(path))
758
+
759
+ def to_xlsx(self, path: str | Path, *, sheet_name: str = "Table") -> Path:
760
+ """Write the table to an ``.xlsx`` file via ``xlsxwriter``."""
761
+ from ..render.xlsx import XlsxRenderer
762
+
763
+ if XlsxRenderer is None: # pragma: no cover
764
+ raise ImportError(
765
+ "Excel export requires xlsxwriter. "
766
+ "Install with `pip install xlsxwriter`."
767
+ )
768
+ return XlsxRenderer(sheet_name=sheet_name).write(self, Path(path))
769
+
770
+ # ------------------------------------------------------------------
771
+ # Inline plot attachment
772
+ # ------------------------------------------------------------------
773
+ def with_inline_svg(
774
+ self,
775
+ svg: str,
776
+ *,
777
+ position: str = "above",
778
+ ) -> SofraTable:
779
+ """Attach a raw inline-SVG string to this table.
780
+
781
+ The HTML renderer embeds the SVG above (default) or below the
782
+ table. Markdown ignores the SVG (no in-line image syntax for
783
+ SVG strings). For a plot that needs to travel through DOCX /
784
+ LaTeX / PPTX as well, use :meth:`with_forest_plot` or
785
+ :meth:`with_km_plot` instead — those serialise a matplotlib
786
+ figure into SVG + PNG + PDF and each renderer picks the
787
+ format it supports.
788
+ """
789
+ if position not in ("above", "below"):
790
+ raise ValueError("position must be 'above' or 'below'")
791
+ return replace(self, inline_svg=svg, inline_svg_position=position)
792
+
793
+ def with_forest_plot(
794
+ self,
795
+ *,
796
+ log_x: bool = True,
797
+ null_line: float = 1.0,
798
+ position: str = "above",
799
+ **plot_kwargs: Any,
800
+ ) -> SofraTable:
801
+ """Attach a forest plot rendered from this regression table's coefficients.
802
+
803
+ Only valid for tables produced by :func:`tbl_regression`. Reads
804
+ the point estimate + CI cells directly from the table body so
805
+ the plot is guaranteed to match the displayed numbers. The
806
+ attached plot carries SVG / PNG / PDF serialisations so it
807
+ embeds in HTML, DOCX, PPTX, and LaTeX output consistently.
808
+ """
809
+ from ..plot.forest import forest_plot
810
+
811
+ plot = forest_plot(self, log_x=log_x, null_line=null_line, **plot_kwargs)
812
+ if position not in ("above", "below"):
813
+ raise ValueError("position must be 'above' or 'below'")
814
+ return replace(
815
+ self,
816
+ inline_svg=plot.svg,
817
+ inline_svg_position=position,
818
+ inline_plot=plot,
819
+ )
820
+
821
+ def with_km_plot(
822
+ self,
823
+ *,
824
+ position: str = "above",
825
+ **plot_kwargs: Any,
826
+ ) -> SofraTable:
827
+ """Attach a Kaplan–Meier curve to a :func:`tbl_survival` result.
828
+
829
+ Refits the KM curves from the original data using ``lifelines``
830
+ and embeds SVG + PNG + PDF serialisations so the same plot
831
+ renders in HTML, DOCX, PPTX, and LaTeX exports.
832
+ """
833
+ from ..models.survival import attach_km_plot
834
+
835
+ return attach_km_plot(self, position=position, **plot_kwargs)
836
+
837
+ def to_dict(self) -> dict[str, Any]:
838
+ """Dump the table as a plain dict (useful for snapshot tests)."""
839
+ return {
840
+ "caption": self.caption,
841
+ "footnotes": list(self.footnotes),
842
+ "theme": self.theme_name,
843
+ "headers": [[c.text for c in hr.cells] for hr in self.headers],
844
+ "spanning_headers": [
845
+ {"label": s.label, "start": s.start, "end": s.end}
846
+ for s in self.spanning_headers
847
+ ],
848
+ "rows": [[c.text for c in r.cells] for r in self.rows],
849
+ }
850
+
851
+ # ------------------------------------------------------------------
852
+ # Notebook integration
853
+ # ------------------------------------------------------------------
854
+ def _repr_html_(self) -> str: # noqa: D401 — Jupyter API
855
+ """Rich HTML rendering for Jupyter / Colab / VS Code notebooks."""
856
+ from ..render.html import HtmlRenderer
857
+
858
+ return HtmlRenderer(notebook=True).render(self)
859
+
860
+ def _repr_markdown_(self) -> str: # noqa: D401 — Jupyter API
861
+ """Markdown rendering used by Quarto and some markdown-first viewers."""
862
+ return self.to_markdown()
863
+
864
+ def _repr_latex_(self) -> str: # noqa: D401 — Jupyter API
865
+ """LaTeX rendering for environments that prefer it over HTML."""
866
+ return self.to_latex()
867
+
868
+ def __repr__(self) -> str: # pragma: no cover — repr is cosmetic
869
+ ncols = len(self.headers[0].cells) if self.headers else 0
870
+ return f"SofraTable(rows={len(self.rows)}, cols={ncols}, theme={self.theme_name!r})"
871
+
872
+ # ------------------------------------------------------------------
873
+ # Internal
874
+ # ------------------------------------------------------------------
875
+ def _with_option(self, **changes: Any) -> SofraTable:
876
+ if self._spec is None or self._rebuild is None:
877
+ # Three distinct routes produce this state; differentiate them
878
+ # so the error names the actual cause rather than guessing.
879
+ # (a) ``_spec is None`` — built by composition primitives
880
+ # (``tbl_merge`` / ``tbl_stack``) or constructed directly
881
+ # via ``SofraTable(...)``.
882
+ # (b) ``_spec.builder == 'tbl_cross'`` — a builder that
883
+ # deliberately doesn't capture a re-runnable spec.
884
+ # (c) otherwise — unpickled; ``_rebuild`` was stripped on
885
+ # pickle (see ``__getstate__``).
886
+ if self._spec is None:
887
+ cause = (
888
+ "either constructed directly via ``SofraTable(...)`` "
889
+ "or produced by a composition primitive "
890
+ "(``tbl_merge`` / ``tbl_stack``) — neither path carries "
891
+ "a re-runnable spec"
892
+ )
893
+ elif self._spec.builder == "tbl_cross": # pragma: no cover — tbl_cross now carries _rebuild; reachable only on an unpickled tbl_cross (rebuild closure stripped)
894
+ cause = (
895
+ "an unpickled ``tbl_cross`` table — the recomputation "
896
+ "closure was stripped on pickle (see "
897
+ "SofraTable.__getstate__). Re-run ``tbl_cross`` on "
898
+ "the source DataFrame to restore recomputation"
899
+ )
900
+ else:
901
+ cause = (
902
+ "unpickled — the recomputation closure ``_rebuild`` is "
903
+ "stripped on pickle (see SofraTable.__getstate__). "
904
+ "Re-run the original builder on the source DataFrame to "
905
+ "restore recomputation"
906
+ )
907
+ raise RuntimeError(
908
+ f"This SofraTable cannot apply statistical modifiers because "
909
+ f"it is {cause}. Presentational modifiers (theme, "
910
+ f"set_caption, with_footnotes, bold_p, with_forest_plot, "
911
+ f"etc.) and renderers still work."
912
+ )
913
+ new_spec = self._spec.updated(**changes)
914
+ rebuilt = self._rebuild(new_spec)
915
+ # Preserve presentational state (theme, caption) across rebuilds.
916
+ # Footnotes are *not* preserved: builders regenerate them based on
917
+ # the current spec (e.g. adding ``.add_p()`` introduces a "Tests:"
918
+ # line). Call ``.add_footnote()`` *after* statistical modifiers to
919
+ # append your own.
920
+ return replace(
921
+ rebuilt,
922
+ theme_name=self.theme_name,
923
+ caption=self.caption,
924
+ )