pysofra 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysofra/__init__.py +82 -0
- pysofra/core/__init__.py +14 -0
- pysofra/core/compose.py +167 -0
- pysofra/core/format.py +155 -0
- pysofra/core/frames.py +69 -0
- pysofra/core/schema.py +128 -0
- pysofra/core/table.py +924 -0
- pysofra/io/__init__.py +1 -0
- pysofra/models/__init__.py +6 -0
- pysofra/models/extract.py +249 -0
- pysofra/models/pool.py +119 -0
- pysofra/models/regression.py +507 -0
- pysofra/models/survival.py +395 -0
- pysofra/models/uvregression.py +438 -0
- pysofra/notebook/__init__.py +6 -0
- pysofra/plot/__init__.py +23 -0
- pysofra/plot/_backend.py +32 -0
- pysofra/plot/forest.py +159 -0
- pysofra/plot/inline.py +171 -0
- pysofra/plot/km.py +249 -0
- pysofra/render/__init__.py +28 -0
- pysofra/render/_zip_determinism.py +57 -0
- pysofra/render/base.py +22 -0
- pysofra/render/docx.py +286 -0
- pysofra/render/html.py +442 -0
- pysofra/render/image.py +130 -0
- pysofra/render/latex.py +253 -0
- pysofra/render/markdown.py +128 -0
- pysofra/render/pptx.py +340 -0
- pysofra/render/xlsx.py +226 -0
- pysofra/summary/__init__.py +6 -0
- pysofra/summary/calibrate.py +214 -0
- pysofra/summary/design.py +246 -0
- pysofra/summary/effect_size.py +187 -0
- pysofra/summary/extras.py +745 -0
- pysofra/summary/smd.py +133 -0
- pysofra/summary/stats.py +135 -0
- pysofra/summary/tbl_cross.py +339 -0
- pysofra/summary/tbl_one.py +1220 -0
- pysofra/summary/tbl_summary.py +51 -0
- pysofra/summary/tests.py +370 -0
- pysofra/summary/typing.py +129 -0
- pysofra/summary/weights.py +161 -0
- pysofra/themes/__init__.py +5 -0
- pysofra/themes/registry.py +272 -0
- pysofra-0.1.0a1.dist-info/METADATA +301 -0
- pysofra-0.1.0a1.dist-info/RECORD +50 -0
- pysofra-0.1.0a1.dist-info/WHEEL +4 -0
- pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
- pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
pysofra/plot/inline.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Cross-backend inline plot representation.
|
|
2
|
+
|
|
3
|
+
When a plot is attached to a :class:`SofraTable`, we keep multiple
|
|
4
|
+
serialised forms so each renderer can pick the one it needs:
|
|
5
|
+
|
|
6
|
+
* ``svg`` — for the HTML renderer.
|
|
7
|
+
* ``png_bytes`` — for DOCX and PPTX.
|
|
8
|
+
* ``pdf_bytes`` — for LaTeX (written as a sidecar file at export time).
|
|
9
|
+
|
|
10
|
+
All three are rendered once from the same matplotlib figure, ensuring
|
|
11
|
+
the visual representation is consistent across formats.
|
|
12
|
+
|
|
13
|
+
**Determinism.** matplotlib by default embeds the current wall-clock
|
|
14
|
+
timestamp and a process-random hash salt into every SVG/PNG/PDF it
|
|
15
|
+
writes. That makes binary renders unstable across processes and
|
|
16
|
+
breaks PySofra's "byte-identical reproducibility" guarantee for
|
|
17
|
+
plot-embedded tables. The helpers in this module strip those
|
|
18
|
+
non-deterministic fields so the same figure always serialises to the
|
|
19
|
+
same bytes.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import hashlib
|
|
25
|
+
import io
|
|
26
|
+
import re
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
# A fixed (project-stable) hash salt overrides matplotlib's random
|
|
31
|
+
# default for inline SVG/PDF element IDs.
|
|
32
|
+
_HASH_SALT = "pysofra-inline-plot"
|
|
33
|
+
|
|
34
|
+
# Constant timestamp baked into PDF metadata so two renders of the
|
|
35
|
+
# same figure are byte-identical. The literal predates the project
|
|
36
|
+
# and has no operational meaning.
|
|
37
|
+
_PDF_FIXED_DATE = b"D:20260101000000Z"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _configure_deterministic_matplotlib() -> None:
|
|
41
|
+
"""Pin matplotlib's hash salt and metadata so output is reproducible.
|
|
42
|
+
|
|
43
|
+
Idempotent — safe to call repeatedly.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
import matplotlib as mpl
|
|
47
|
+
except ImportError: # pragma: no cover — matplotlib is an optional extra
|
|
48
|
+
return
|
|
49
|
+
mpl.rcParams["svg.hashsalt"] = _HASH_SALT
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Patterns used to strip the timestamp from SVG / PNG / PDF outputs.
|
|
53
|
+
_SVG_DATE_RE = re.compile(
|
|
54
|
+
rb"<dc:date>[^<]*</dc:date>",
|
|
55
|
+
re.IGNORECASE,
|
|
56
|
+
)
|
|
57
|
+
_PDF_DATE_RE = re.compile(
|
|
58
|
+
rb"/(?:CreationDate|ModDate)\s*\(D:\d+(?:Z|[+\-]\d{2}'\d{2}')?\)",
|
|
59
|
+
)
|
|
60
|
+
_PDF_ID_RE = re.compile(
|
|
61
|
+
rb"/ID\s*\[\s*<[0-9A-Fa-f]+>\s*<[0-9A-Fa-f]+>\s*\]",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _strip_svg_nondeterminism(svg_bytes: bytes) -> bytes:
|
|
66
|
+
"""Remove timestamp metadata from an SVG byte stream."""
|
|
67
|
+
return _SVG_DATE_RE.sub(b"<dc:date>2026-01-01T00:00:00</dc:date>", svg_bytes)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _strip_pdf_nondeterminism(pdf_bytes: bytes) -> bytes:
|
|
71
|
+
"""Pin /CreationDate, /ModDate, and /ID in a PDF byte stream."""
|
|
72
|
+
out = _PDF_DATE_RE.sub(b"/CreationDate (" + _PDF_FIXED_DATE + b")", pdf_bytes)
|
|
73
|
+
# We always emit a deterministic /ID derived from the file hash so the
|
|
74
|
+
# two-id PDF reference is stable across runs.
|
|
75
|
+
digest = hashlib.sha256(out).hexdigest()[:32].upper().encode()
|
|
76
|
+
out = _PDF_ID_RE.sub(b"/ID [<" + digest + b"><" + digest + b">]", out)
|
|
77
|
+
return out
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _strip_png_nondeterminism(png_bytes: bytes) -> bytes:
|
|
81
|
+
"""Strip the optional ``tIME`` and ``tEXt`` chunks from a PNG.
|
|
82
|
+
|
|
83
|
+
PNG's ``tIME`` chunk stores the modification time; ``tEXt``/``iTXt``
|
|
84
|
+
chunks added by matplotlib's `Software` key embed the matplotlib
|
|
85
|
+
version + Python build banner. Removing them gives byte-stable PNG
|
|
86
|
+
output across runs and across matplotlib patch releases.
|
|
87
|
+
"""
|
|
88
|
+
# PNG layout: 8-byte signature, then a sequence of chunks
|
|
89
|
+
# [length (4) | type (4) | data (length) | crc (4)]
|
|
90
|
+
if not png_bytes.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
91
|
+
return png_bytes # pragma: no cover — not a PNG
|
|
92
|
+
out = bytearray(png_bytes[:8])
|
|
93
|
+
pos = 8
|
|
94
|
+
drop_types = {b"tIME", b"tEXt", b"iTXt", b"zTXt"}
|
|
95
|
+
while pos < len(png_bytes):
|
|
96
|
+
if pos + 8 > len(png_bytes): # pragma: no cover — malformed
|
|
97
|
+
break
|
|
98
|
+
length = int.from_bytes(png_bytes[pos : pos + 4], "big")
|
|
99
|
+
ctype = png_bytes[pos + 4 : pos + 8]
|
|
100
|
+
chunk_end = pos + 8 + length + 4 # data + crc
|
|
101
|
+
if ctype not in drop_types:
|
|
102
|
+
out.extend(png_bytes[pos:chunk_end])
|
|
103
|
+
pos = chunk_end
|
|
104
|
+
return bytes(out)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def fig_to_svg(fig: Any) -> str:
|
|
108
|
+
"""Serialise a matplotlib Figure to a deterministic inline SVG string.
|
|
109
|
+
|
|
110
|
+
Trims the ``<?xml ...?>`` / ``<!DOCTYPE ...>`` headers so the result
|
|
111
|
+
can be embedded directly into HTML, replaces the explicit
|
|
112
|
+
width / height attributes with a responsive ``max-width:100%``
|
|
113
|
+
style so the SVG scales inside its container, and strips the
|
|
114
|
+
``<dc:date>`` timestamp so two consecutive renders are byte-equal.
|
|
115
|
+
"""
|
|
116
|
+
_configure_deterministic_matplotlib()
|
|
117
|
+
buf = io.BytesIO()
|
|
118
|
+
fig.savefig(buf, format="svg", bbox_inches="tight")
|
|
119
|
+
raw = _strip_svg_nondeterminism(buf.getvalue())
|
|
120
|
+
svg = raw.decode("utf-8")
|
|
121
|
+
idx = svg.find("<svg")
|
|
122
|
+
if idx > 0:
|
|
123
|
+
svg = svg[idx:]
|
|
124
|
+
svg = svg.replace(
|
|
125
|
+
"<svg ", '<svg style="max-width:100%;height:auto;" ', 1,
|
|
126
|
+
)
|
|
127
|
+
return svg
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class InlinePlot:
|
|
132
|
+
"""A plot serialised for every backend PySofra supports."""
|
|
133
|
+
|
|
134
|
+
svg: str
|
|
135
|
+
png_bytes: bytes
|
|
136
|
+
pdf_bytes: bytes
|
|
137
|
+
width_in: float
|
|
138
|
+
height_in: float
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def render_inline_plot(fig: Any, *, width_in: float, height_in: float,
|
|
142
|
+
dpi: int = 200) -> InlinePlot:
|
|
143
|
+
"""Serialise a matplotlib figure to SVG + PNG + PDF in one pass.
|
|
144
|
+
|
|
145
|
+
All three byte streams are post-processed to remove the
|
|
146
|
+
timestamps / process-randomised IDs matplotlib would otherwise
|
|
147
|
+
embed, so the output is byte-identical across runs of the same
|
|
148
|
+
PySofra version.
|
|
149
|
+
"""
|
|
150
|
+
_configure_deterministic_matplotlib()
|
|
151
|
+
svg = fig_to_svg(fig)
|
|
152
|
+
|
|
153
|
+
# PNG bytes — for DOCX / PPTX.
|
|
154
|
+
png_buf = io.BytesIO()
|
|
155
|
+
fig.savefig(png_buf, format="png", bbox_inches="tight", dpi=dpi,
|
|
156
|
+
metadata={"Software": None})
|
|
157
|
+
png_bytes = _strip_png_nondeterminism(png_buf.getvalue())
|
|
158
|
+
|
|
159
|
+
# PDF bytes — for LaTeX sidecar.
|
|
160
|
+
pdf_buf = io.BytesIO()
|
|
161
|
+
fig.savefig(pdf_buf, format="pdf", bbox_inches="tight",
|
|
162
|
+
metadata={"CreationDate": None, "ModDate": None})
|
|
163
|
+
pdf_bytes = _strip_pdf_nondeterminism(pdf_buf.getvalue())
|
|
164
|
+
|
|
165
|
+
return InlinePlot(
|
|
166
|
+
svg=svg,
|
|
167
|
+
png_bytes=png_bytes,
|
|
168
|
+
pdf_bytes=pdf_bytes,
|
|
169
|
+
width_in=width_in,
|
|
170
|
+
height_in=height_in,
|
|
171
|
+
)
|
pysofra/plot/km.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Kaplan–Meier curve rendering for tbl_survival SofraTables.
|
|
2
|
+
|
|
3
|
+
This function does *not* re-extract the KM fits from the table — those
|
|
4
|
+
aren't preserved in the rendered structure. Instead it accepts the same
|
|
5
|
+
data the user passed to ``tbl_survival`` and fits curves freshly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..core.frames import to_pandas
|
|
13
|
+
from .inline import InlinePlot, fig_to_svg, render_inline_plot
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def km_curve(
|
|
17
|
+
data: Any,
|
|
18
|
+
*,
|
|
19
|
+
time: str,
|
|
20
|
+
event: str,
|
|
21
|
+
by: str | None = None,
|
|
22
|
+
width_in: float = 6.5,
|
|
23
|
+
height_in: float = 4.0,
|
|
24
|
+
ci: bool = True,
|
|
25
|
+
xlabel: str = "Time",
|
|
26
|
+
ylabel: str = "Survival probability",
|
|
27
|
+
palette: list[str] | None = None,
|
|
28
|
+
risk_times: list[float] | tuple[float, ...] | None = None,
|
|
29
|
+
) -> InlinePlot:
|
|
30
|
+
"""Render KM curves as an :class:`InlinePlot` (SVG + PNG + PDF).
|
|
31
|
+
|
|
32
|
+
``risk_times`` adds a numbers-at-risk table below the curves at the
|
|
33
|
+
listed time points (no table is added when ``risk_times`` is None).
|
|
34
|
+
"""
|
|
35
|
+
fig = _build_km_figure(
|
|
36
|
+
data, time=time, event=event, by=by,
|
|
37
|
+
width_in=width_in, height_in=height_in, ci=ci,
|
|
38
|
+
xlabel=xlabel, ylabel=ylabel, palette=palette,
|
|
39
|
+
risk_times=risk_times,
|
|
40
|
+
)
|
|
41
|
+
plot = render_inline_plot(fig, width_in=width_in, height_in=height_in)
|
|
42
|
+
try:
|
|
43
|
+
import matplotlib.pyplot as plt
|
|
44
|
+
plt.close(fig)
|
|
45
|
+
except ImportError: # pragma: no cover
|
|
46
|
+
pass
|
|
47
|
+
return plot
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def km_curve_svg(
|
|
51
|
+
data: Any,
|
|
52
|
+
*,
|
|
53
|
+
time: str,
|
|
54
|
+
event: str,
|
|
55
|
+
by: str | None = None,
|
|
56
|
+
width_in: float = 6.5,
|
|
57
|
+
height_in: float = 4.0,
|
|
58
|
+
ci: bool = True,
|
|
59
|
+
xlabel: str = "Time",
|
|
60
|
+
ylabel: str = "Survival probability",
|
|
61
|
+
palette: list[str] | None = None,
|
|
62
|
+
risk_times: list[float] | tuple[float, ...] | None = None,
|
|
63
|
+
) -> str:
|
|
64
|
+
"""Render Kaplan–Meier curves to an inline SVG string."""
|
|
65
|
+
fig = _build_km_figure(
|
|
66
|
+
data, time=time, event=event, by=by,
|
|
67
|
+
width_in=width_in, height_in=height_in, ci=ci,
|
|
68
|
+
xlabel=xlabel, ylabel=ylabel, palette=palette,
|
|
69
|
+
risk_times=risk_times,
|
|
70
|
+
)
|
|
71
|
+
svg = fig_to_svg(fig)
|
|
72
|
+
try:
|
|
73
|
+
import matplotlib.pyplot as plt
|
|
74
|
+
plt.close(fig)
|
|
75
|
+
except ImportError: # pragma: no cover
|
|
76
|
+
pass
|
|
77
|
+
return svg
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _build_km_figure(
|
|
81
|
+
data: Any,
|
|
82
|
+
*,
|
|
83
|
+
time: str,
|
|
84
|
+
event: str,
|
|
85
|
+
by: str | None,
|
|
86
|
+
width_in: float,
|
|
87
|
+
height_in: float,
|
|
88
|
+
ci: bool,
|
|
89
|
+
xlabel: str,
|
|
90
|
+
ylabel: str,
|
|
91
|
+
palette: list[str] | None,
|
|
92
|
+
risk_times: list[float] | tuple[float, ...] | None = None,
|
|
93
|
+
) -> Any:
|
|
94
|
+
try:
|
|
95
|
+
from ._backend import use_headless_backend
|
|
96
|
+
use_headless_backend()
|
|
97
|
+
import matplotlib.pyplot as plt
|
|
98
|
+
except ImportError as e: # pragma: no cover
|
|
99
|
+
raise ImportError(
|
|
100
|
+
"KM curves require matplotlib. Install with "
|
|
101
|
+
"`pip install matplotlib`."
|
|
102
|
+
) from e
|
|
103
|
+
try:
|
|
104
|
+
from lifelines import KaplanMeierFitter
|
|
105
|
+
except ImportError as e: # pragma: no cover
|
|
106
|
+
raise ImportError("KM curves require lifelines.") from e
|
|
107
|
+
|
|
108
|
+
df = to_pandas(data)
|
|
109
|
+
df_groups = (
|
|
110
|
+
[("Overall", df)]
|
|
111
|
+
if by is None
|
|
112
|
+
else list(df.groupby(by, observed=True))
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
palette = palette or [
|
|
116
|
+
"#0b3d91", "#c1272d", "#198754", "#ffc107", "#6f42c1", "#fd7e14",
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
# Risk-table augment: shrink the curve axes and add a small axes
|
|
120
|
+
# underneath listing N at risk per group at each requested time.
|
|
121
|
+
n_groups_pred = len(df_groups)
|
|
122
|
+
|
|
123
|
+
if risk_times:
|
|
124
|
+
from matplotlib.gridspec import GridSpec
|
|
125
|
+
|
|
126
|
+
# Each column needs roughly one digit's worth of breathing room
|
|
127
|
+
# so adjacent two-/three-digit at-risk counts never touch.
|
|
128
|
+
min_w = max(width_in, 1.5 + 0.55 * len(risk_times))
|
|
129
|
+
# Each group row in the risk table needs ~0.28 in plus a small
|
|
130
|
+
# cushion for the heading.
|
|
131
|
+
risk_h_in = 0.28 * n_groups_pred + 0.55
|
|
132
|
+
total_h = height_in + risk_h_in + 0.25
|
|
133
|
+
fig = plt.figure(figsize=(min_w, total_h))
|
|
134
|
+
gs = GridSpec(
|
|
135
|
+
2, 1, figure=fig,
|
|
136
|
+
height_ratios=[height_in, risk_h_in],
|
|
137
|
+
hspace=0.32,
|
|
138
|
+
)
|
|
139
|
+
ax = fig.add_subplot(gs[0])
|
|
140
|
+
ax_risk = fig.add_subplot(gs[1], sharex=ax)
|
|
141
|
+
else:
|
|
142
|
+
fig, ax = plt.subplots(figsize=(width_in, height_in))
|
|
143
|
+
ax_risk = None
|
|
144
|
+
|
|
145
|
+
fits: dict[str, Any] = {}
|
|
146
|
+
for i, (label, sub) in enumerate(df_groups):
|
|
147
|
+
sub = sub.dropna(subset=[time, event])
|
|
148
|
+
if sub.empty:
|
|
149
|
+
continue
|
|
150
|
+
kmf = KaplanMeierFitter()
|
|
151
|
+
kmf.fit(sub[time], sub[event], label=str(label))
|
|
152
|
+
fits[str(label)] = kmf
|
|
153
|
+
color = palette[i % len(palette)]
|
|
154
|
+
kmf.plot_survival_function(ax=ax, ci_show=ci, color=color)
|
|
155
|
+
|
|
156
|
+
ax.set_ylabel(ylabel)
|
|
157
|
+
ax.set_ylim(0, 1.02)
|
|
158
|
+
ax.spines["top"].set_visible(False)
|
|
159
|
+
ax.spines["right"].set_visible(False)
|
|
160
|
+
if by is None:
|
|
161
|
+
legend = ax.get_legend()
|
|
162
|
+
if legend is not None:
|
|
163
|
+
legend.set_visible(False)
|
|
164
|
+
|
|
165
|
+
if ax_risk is not None and risk_times:
|
|
166
|
+
# Clamp the x-window to the range the user asked about. Without
|
|
167
|
+
# this, a long-tailed survival curve (e.g. one censored case at
|
|
168
|
+
# t=110 when risk_times stop at 30) inflates xlim and crams every
|
|
169
|
+
# risk-table column into the leftmost slice of the figure.
|
|
170
|
+
rt_min = float(min(risk_times))
|
|
171
|
+
rt_max = float(max(risk_times))
|
|
172
|
+
span = max(rt_max - rt_min, 1e-9)
|
|
173
|
+
# Generous left pad so the first risk-table number sits *inside*
|
|
174
|
+
# the axes (otherwise a centered "165" at t=0 punches through the
|
|
175
|
+
# y-axis into the "Placebo"/"Treatment" label column).
|
|
176
|
+
ax.set_xlim(rt_min - 0.08 * span, rt_max + 0.04 * span)
|
|
177
|
+
|
|
178
|
+
# Clear the curve's bottom axis — the risk table owns the x-axis labels.
|
|
179
|
+
ax.set_xlabel("")
|
|
180
|
+
ax.tick_params(axis="x", labelbottom=False)
|
|
181
|
+
|
|
182
|
+
# Risk-table axis. We want a *grid of numbers*: rows = groups, cols
|
|
183
|
+
# = time points. Numbers sit at (t_pt, group_index) in data
|
|
184
|
+
# coordinates so they line up vertically with the curve.
|
|
185
|
+
n_groups = len(fits)
|
|
186
|
+
ax_risk.set_xlim(ax.get_xlim())
|
|
187
|
+
ax_risk.set_xticks(list(risk_times))
|
|
188
|
+
# Force the tick labels to render as integers / floats with no
|
|
189
|
+
# trailing zeroes so they don't collide.
|
|
190
|
+
ax_risk.set_xticklabels([
|
|
191
|
+
f"{int(t)}" if float(t).is_integer() else f"{t:g}"
|
|
192
|
+
for t in risk_times
|
|
193
|
+
], fontsize=9)
|
|
194
|
+
ax_risk.tick_params(axis="x", length=4, pad=2)
|
|
195
|
+
|
|
196
|
+
# Group labels on y, ordered top-to-bottom to mirror the curve legend.
|
|
197
|
+
ax_risk.set_yticks(range(n_groups))
|
|
198
|
+
ax_risk.set_yticklabels(list(fits.keys()), fontsize=9)
|
|
199
|
+
ax_risk.set_ylim(-0.5, n_groups - 0.5)
|
|
200
|
+
ax_risk.invert_yaxis()
|
|
201
|
+
ax_risk.tick_params(axis="y", length=0, pad=4)
|
|
202
|
+
for spine in ("top", "right", "left"):
|
|
203
|
+
ax_risk.spines[spine].set_visible(False)
|
|
204
|
+
|
|
205
|
+
# Auto-scale font down for very dense risk tables.
|
|
206
|
+
font_size = 9 if len(risk_times) <= 6 else 8
|
|
207
|
+
|
|
208
|
+
# Render numbers at each (time, group) cell.
|
|
209
|
+
for i, (_name, kmf) in enumerate(fits.items()):
|
|
210
|
+
for t_pt in risk_times:
|
|
211
|
+
n_at_risk = _n_at_risk(kmf, float(t_pt))
|
|
212
|
+
ax_risk.text(
|
|
213
|
+
t_pt, i, f"{n_at_risk}",
|
|
214
|
+
ha="center", va="center", fontsize=font_size,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
ax_risk.set_xlabel(xlabel)
|
|
218
|
+
# "Number at risk" heading sits above the *numbers* portion (centered
|
|
219
|
+
# over the columns), using axes coordinates so it never collides with
|
|
220
|
+
# the curve plot or the values themselves.
|
|
221
|
+
ax_risk.text(
|
|
222
|
+
0.5, 1.20,
|
|
223
|
+
"Number at risk",
|
|
224
|
+
transform=ax_risk.transAxes,
|
|
225
|
+
fontsize=9, fontweight="bold",
|
|
226
|
+
ha="center", va="bottom",
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
ax.set_xlabel(xlabel)
|
|
230
|
+
|
|
231
|
+
return fig
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _n_at_risk(kmf: Any, t: float) -> int:
|
|
235
|
+
"""Number of individuals at risk *just before* time ``t``.
|
|
236
|
+
|
|
237
|
+
See :func:`pysofra.models.survival._n_at_risk` for the convention
|
|
238
|
+
rationale — this is the same implementation, duplicated to avoid
|
|
239
|
+
a cross-module import in the matplotlib hot path.
|
|
240
|
+
"""
|
|
241
|
+
try:
|
|
242
|
+
tbl = kmf.event_table
|
|
243
|
+
idx = tbl.index[tbl.index >= t]
|
|
244
|
+
if len(idx) == 0:
|
|
245
|
+
return 0
|
|
246
|
+
first_t = idx.min()
|
|
247
|
+
return int(tbl.loc[first_t, "at_risk"])
|
|
248
|
+
except Exception: # pragma: no cover
|
|
249
|
+
return 0
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Renderers — backend-agnostic output of :class:`~pysofra.core.SofraTable`."""
|
|
2
|
+
|
|
3
|
+
from .docx import DocxRenderer
|
|
4
|
+
from .html import HtmlRenderer
|
|
5
|
+
from .latex import LatexRenderer
|
|
6
|
+
from .markdown import MarkdownRenderer
|
|
7
|
+
|
|
8
|
+
# PPTX and XLSX renderers are optional — gated on their backends.
|
|
9
|
+
PptxRenderer: type | None
|
|
10
|
+
XlsxRenderer: type | None
|
|
11
|
+
try:
|
|
12
|
+
from .pptx import PptxRenderer
|
|
13
|
+
except ImportError: # pragma: no cover
|
|
14
|
+
PptxRenderer = None
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from .xlsx import XlsxRenderer
|
|
18
|
+
except ImportError: # pragma: no cover
|
|
19
|
+
XlsxRenderer = None
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"DocxRenderer",
|
|
23
|
+
"HtmlRenderer",
|
|
24
|
+
"LatexRenderer",
|
|
25
|
+
"MarkdownRenderer",
|
|
26
|
+
"PptxRenderer",
|
|
27
|
+
"XlsxRenderer",
|
|
28
|
+
]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Make ZIP-based document outputs (.docx, .pptx) byte-deterministic
|
|
2
|
+
across processes.
|
|
3
|
+
|
|
4
|
+
Both ``python-docx`` and ``python-pptx`` save their documents as OOXML
|
|
5
|
+
ZIP archives. The XML *contents* are deterministic given a deterministic
|
|
6
|
+
input, but the underlying ``zipfile.ZipFile.writestr`` stamps each entry
|
|
7
|
+
with ``time.localtime()`` by default, so two saves at different wall
|
|
8
|
+
times produce different bytes even though every file inside is
|
|
9
|
+
identical.
|
|
10
|
+
|
|
11
|
+
PySofra's published claim is that every renderer produces
|
|
12
|
+
byte-deterministic output. To honour that for the OOXML formats, after
|
|
13
|
+
``python-docx`` / ``python-pptx`` finishes writing the archive we
|
|
14
|
+
rewrite it in-place with every entry's ``date_time`` pinned to a fixed
|
|
15
|
+
epoch and the compression level fixed.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import io
|
|
21
|
+
import zipfile
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
# Fixed wall-clock used for every ZIP entry's ``date_time`` field. The
|
|
25
|
+
# ZIP format only stores DOS time (2-second granularity from 1980); we
|
|
26
|
+
# use a deterministic constant well clear of that lower bound.
|
|
27
|
+
_FIXED_DATE_TIME: tuple[int, int, int, int, int, int] = (2000, 1, 1, 0, 0, 0)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def make_zip_deterministic(path: Path) -> None:
|
|
31
|
+
"""Rewrite ``path`` (an OOXML zip) so its bytes are reproducible.
|
|
32
|
+
|
|
33
|
+
Reads every entry, pins ``date_time`` to ``_FIXED_DATE_TIME``, and
|
|
34
|
+
writes the archive back with the same compression mode and preserved
|
|
35
|
+
entry order. Idempotent — applying twice produces the same bytes.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
path
|
|
40
|
+
Path to a ZIP-format file (``.docx``, ``.pptx``, ``.xlsx``).
|
|
41
|
+
"""
|
|
42
|
+
p = Path(path)
|
|
43
|
+
raw = p.read_bytes()
|
|
44
|
+
buf = io.BytesIO()
|
|
45
|
+
with zipfile.ZipFile(io.BytesIO(raw), mode="r") as src, \
|
|
46
|
+
zipfile.ZipFile(buf, mode="w") as dst:
|
|
47
|
+
for info in src.infolist():
|
|
48
|
+
new_info = zipfile.ZipInfo(
|
|
49
|
+
filename=info.filename,
|
|
50
|
+
date_time=_FIXED_DATE_TIME,
|
|
51
|
+
)
|
|
52
|
+
new_info.compress_type = info.compress_type
|
|
53
|
+
new_info.external_attr = info.external_attr
|
|
54
|
+
new_info.create_system = info.create_system
|
|
55
|
+
new_info.internal_attr = info.internal_attr
|
|
56
|
+
dst.writestr(new_info, src.read(info.filename))
|
|
57
|
+
p.write_bytes(buf.getvalue())
|
pysofra/render/base.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Base renderer interface.
|
|
2
|
+
|
|
3
|
+
All renderers consume a :class:`~pysofra.core.SofraTable` and produce output
|
|
4
|
+
in their target format. Concrete renderers live in sibling modules.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Generic, TypeVar
|
|
11
|
+
|
|
12
|
+
from ..core.table import SofraTable
|
|
13
|
+
|
|
14
|
+
T = TypeVar("T")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Renderer(ABC, Generic[T]):
|
|
18
|
+
"""Abstract base for all renderers."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def render(self, table: SofraTable) -> T: # pragma: no cover — interface
|
|
22
|
+
...
|