pysofra 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysofra/__init__.py +82 -0
- pysofra/core/__init__.py +14 -0
- pysofra/core/compose.py +167 -0
- pysofra/core/format.py +155 -0
- pysofra/core/frames.py +69 -0
- pysofra/core/schema.py +128 -0
- pysofra/core/table.py +924 -0
- pysofra/io/__init__.py +1 -0
- pysofra/models/__init__.py +6 -0
- pysofra/models/extract.py +249 -0
- pysofra/models/pool.py +119 -0
- pysofra/models/regression.py +507 -0
- pysofra/models/survival.py +395 -0
- pysofra/models/uvregression.py +438 -0
- pysofra/notebook/__init__.py +6 -0
- pysofra/plot/__init__.py +23 -0
- pysofra/plot/_backend.py +32 -0
- pysofra/plot/forest.py +159 -0
- pysofra/plot/inline.py +171 -0
- pysofra/plot/km.py +249 -0
- pysofra/render/__init__.py +28 -0
- pysofra/render/_zip_determinism.py +57 -0
- pysofra/render/base.py +22 -0
- pysofra/render/docx.py +286 -0
- pysofra/render/html.py +442 -0
- pysofra/render/image.py +130 -0
- pysofra/render/latex.py +253 -0
- pysofra/render/markdown.py +128 -0
- pysofra/render/pptx.py +340 -0
- pysofra/render/xlsx.py +226 -0
- pysofra/summary/__init__.py +6 -0
- pysofra/summary/calibrate.py +214 -0
- pysofra/summary/design.py +246 -0
- pysofra/summary/effect_size.py +187 -0
- pysofra/summary/extras.py +745 -0
- pysofra/summary/smd.py +133 -0
- pysofra/summary/stats.py +135 -0
- pysofra/summary/tbl_cross.py +339 -0
- pysofra/summary/tbl_one.py +1220 -0
- pysofra/summary/tbl_summary.py +51 -0
- pysofra/summary/tests.py +370 -0
- pysofra/summary/typing.py +129 -0
- pysofra/summary/weights.py +161 -0
- pysofra/themes/__init__.py +5 -0
- pysofra/themes/registry.py +272 -0
- pysofra-0.1.0a1.dist-info/METADATA +301 -0
- pysofra-0.1.0a1.dist-info/RECORD +50 -0
- pysofra-0.1.0a1.dist-info/WHEEL +4 -0
- pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
- pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
pysofra/render/pptx.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""PowerPoint (PPTX) rendering via ``python-pptx``.
|
|
2
|
+
|
|
3
|
+
Writes a single-slide ``.pptx`` containing one table. The caller never
|
|
4
|
+
manipulates python-pptx shapes directly — pass a :class:`SofraTable` and
|
|
5
|
+
a path and the renderer takes care of slide creation, table sizing,
|
|
6
|
+
font, header shading, and footnote textbox.
|
|
7
|
+
|
|
8
|
+
The renderer is gated on the optional ``python-pptx`` dependency:
|
|
9
|
+
|
|
10
|
+
.. code-block:: text
|
|
11
|
+
|
|
12
|
+
pip install pysofra[pptx]
|
|
13
|
+
|
|
14
|
+
If python-pptx isn't installed, calling ``.to_pptx`` raises
|
|
15
|
+
``ImportError`` with installation guidance.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from ..core.schema import HeaderRow, Row, SpanningHeader
|
|
25
|
+
from ..core.table import SofraTable
|
|
26
|
+
from ..themes.registry import resolve_theme
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class PptxRenderer:
|
|
31
|
+
"""Write a SofraTable to a single-slide ``.pptx`` file."""
|
|
32
|
+
|
|
33
|
+
slide_title: str | None = None
|
|
34
|
+
slide_width_in: float = 13.333
|
|
35
|
+
slide_height_in: float = 7.5
|
|
36
|
+
|
|
37
|
+
def write(self, table: SofraTable, path: Path) -> Path:
|
|
38
|
+
try:
|
|
39
|
+
from pptx import Presentation
|
|
40
|
+
from pptx.dml.color import RGBColor
|
|
41
|
+
from pptx.enum.text import PP_ALIGN
|
|
42
|
+
from pptx.util import Inches, Pt
|
|
43
|
+
except ImportError as e: # pragma: no cover
|
|
44
|
+
raise ImportError(
|
|
45
|
+
"PPTX export requires python-pptx. Install with "
|
|
46
|
+
"`pip install pysofra[pptx]` or `pip install python-pptx`."
|
|
47
|
+
) from e
|
|
48
|
+
|
|
49
|
+
theme = resolve_theme(table.theme_name)
|
|
50
|
+
d = theme.pptx
|
|
51
|
+
font_name: str = d.get("font_name", "Calibri")
|
|
52
|
+
font_size: int = int(d.get("font_size", 14))
|
|
53
|
+
header_fill: str = d.get("header_fill", "F2F2F2")
|
|
54
|
+
# Theme-configurable text colours; sensible high-contrast
|
|
55
|
+
# defaults so the header doesn't render as white-on-light
|
|
56
|
+
# under PowerPoint's default table style.
|
|
57
|
+
header_text_color: str = d.get("header_text_color", "1A202C")
|
|
58
|
+
body_text_color: str = d.get("body_text_color", "1A202C")
|
|
59
|
+
|
|
60
|
+
ncols = _ncols(table)
|
|
61
|
+
n_header_rows = (1 if table.spanning_headers else 0) + len(table.headers)
|
|
62
|
+
n_body_rows = len(table.rows)
|
|
63
|
+
n_total_rows = max(1, n_header_rows + n_body_rows)
|
|
64
|
+
|
|
65
|
+
prs = Presentation()
|
|
66
|
+
prs.slide_width = Inches(self.slide_width_in)
|
|
67
|
+
prs.slide_height = Inches(self.slide_height_in)
|
|
68
|
+
blank = prs.slide_layouts[6] # Blank layout
|
|
69
|
+
slide = prs.slides.add_slide(blank)
|
|
70
|
+
|
|
71
|
+
# Optional slide title.
|
|
72
|
+
title_text = self.slide_title or table.caption
|
|
73
|
+
if title_text:
|
|
74
|
+
title_box = slide.shapes.add_textbox(
|
|
75
|
+
Inches(0.5), Inches(0.3),
|
|
76
|
+
Inches(self.slide_width_in - 1), Inches(0.6),
|
|
77
|
+
)
|
|
78
|
+
tf = title_box.text_frame
|
|
79
|
+
tf.text = title_text
|
|
80
|
+
run = tf.paragraphs[0].runs[0]
|
|
81
|
+
run.font.name = font_name
|
|
82
|
+
run.font.size = Pt(font_size + 6)
|
|
83
|
+
run.font.bold = True
|
|
84
|
+
|
|
85
|
+
# Table sizing: fit within the slide with margins.
|
|
86
|
+
left = Inches(0.5)
|
|
87
|
+
top_in = 1.1 if title_text else 0.5
|
|
88
|
+
width_in = self.slide_width_in - 1
|
|
89
|
+
avail_height_in = self.slide_height_in - (1.6 if title_text else 1.0)
|
|
90
|
+
|
|
91
|
+
# Reserve vertical space for the footnotes textbox *before*
|
|
92
|
+
# sizing the table, so footnotes never get clipped off the
|
|
93
|
+
# bottom of the slide. PowerPoint applies a paragraph line
|
|
94
|
+
# spacing roughly 1.5x the font height for body text; we use
|
|
95
|
+
# 1.8x as a conservative reservation so the textbox never
|
|
96
|
+
# clips its own contents even under PowerPoint's body-style
|
|
97
|
+
# space-before/after defaults.
|
|
98
|
+
fn_font_size = max(8, font_size - 4)
|
|
99
|
+
fn_line_height_in = (fn_font_size * 1.8) / 72.0
|
|
100
|
+
n_footnote_lines = len(table.footnotes)
|
|
101
|
+
footnote_height_in = (
|
|
102
|
+
n_footnote_lines * fn_line_height_in + 0.25
|
|
103
|
+
if n_footnote_lines else 0.0
|
|
104
|
+
)
|
|
105
|
+
avail_height_in = max(1.0, avail_height_in - footnote_height_in)
|
|
106
|
+
|
|
107
|
+
plot = getattr(table, "inline_plot", None)
|
|
108
|
+
plot_png: bytes | None = (
|
|
109
|
+
plot.png_bytes
|
|
110
|
+
if plot is not None and getattr(plot, "png_bytes", None)
|
|
111
|
+
else None
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if plot_png is not None:
|
|
115
|
+
# Reserve ~40% of the available vertical space for the plot.
|
|
116
|
+
assert plot is not None # paired with plot_png check above
|
|
117
|
+
plot_height_in = min(float(plot.height_in), avail_height_in * 0.42)
|
|
118
|
+
table_height_in = avail_height_in - plot_height_in - 0.15
|
|
119
|
+
else:
|
|
120
|
+
plot_height_in = 0.0
|
|
121
|
+
table_height_in = avail_height_in
|
|
122
|
+
|
|
123
|
+
if plot_png is not None and table.inline_svg_position == "above":
|
|
124
|
+
import io
|
|
125
|
+
slide.shapes.add_picture(
|
|
126
|
+
io.BytesIO(plot_png),
|
|
127
|
+
left, Inches(top_in),
|
|
128
|
+
width=Inches(width_in),
|
|
129
|
+
height=Inches(plot_height_in),
|
|
130
|
+
)
|
|
131
|
+
table_top_in = top_in + plot_height_in + 0.15
|
|
132
|
+
else:
|
|
133
|
+
table_top_in = top_in
|
|
134
|
+
|
|
135
|
+
shape = slide.shapes.add_table(
|
|
136
|
+
n_total_rows, ncols,
|
|
137
|
+
left, Inches(table_top_in),
|
|
138
|
+
Inches(width_in), Inches(table_height_in),
|
|
139
|
+
)
|
|
140
|
+
word_table = shape.table
|
|
141
|
+
|
|
142
|
+
# PowerPoint applies a default "Medium Style 2 - Accent 1"
|
|
143
|
+
# table style that paints the first row's text white. We
|
|
144
|
+
# control every cell's fill and font colour explicitly, so
|
|
145
|
+
# disable the style-driven overrides to avoid white-on-light
|
|
146
|
+
# header text and zebra-striped body rows.
|
|
147
|
+
import contextlib
|
|
148
|
+
for attr in ("first_row", "first_col", "horz_banding",
|
|
149
|
+
"vert_banding", "last_row", "last_col"):
|
|
150
|
+
with contextlib.suppress(AttributeError, ValueError): # pragma: no cover - defensive
|
|
151
|
+
setattr(word_table, attr, False)
|
|
152
|
+
|
|
153
|
+
if plot_png is not None and table.inline_svg_position == "below":
|
|
154
|
+
import io
|
|
155
|
+
slide.shapes.add_picture(
|
|
156
|
+
io.BytesIO(plot_png),
|
|
157
|
+
left, Inches(table_top_in + table_height_in + 0.15),
|
|
158
|
+
width=Inches(width_in),
|
|
159
|
+
height=Inches(plot_height_in),
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Keep older variable names used downstream.
|
|
163
|
+
top = Inches(table_top_in)
|
|
164
|
+
width = Inches(width_in)
|
|
165
|
+
height = Inches(table_height_in)
|
|
166
|
+
|
|
167
|
+
row_idx = 0
|
|
168
|
+
if table.spanning_headers:
|
|
169
|
+
_write_spanning_row(
|
|
170
|
+
word_table, row_idx, table.spanning_headers, ncols,
|
|
171
|
+
font_name=font_name, font_size=font_size,
|
|
172
|
+
header_fill=header_fill,
|
|
173
|
+
header_text_color=header_text_color,
|
|
174
|
+
RGBColor=RGBColor, Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
175
|
+
)
|
|
176
|
+
row_idx += 1
|
|
177
|
+
for hr in table.headers:
|
|
178
|
+
_write_header_row(
|
|
179
|
+
word_table, row_idx, hr, ncols,
|
|
180
|
+
font_name=font_name, font_size=font_size,
|
|
181
|
+
header_fill=header_fill,
|
|
182
|
+
header_text_color=header_text_color,
|
|
183
|
+
RGBColor=RGBColor, Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
184
|
+
)
|
|
185
|
+
row_idx += 1
|
|
186
|
+
for body_row in table.rows:
|
|
187
|
+
_write_body_row(
|
|
188
|
+
word_table, row_idx, body_row, ncols,
|
|
189
|
+
font_name=font_name, font_size=font_size,
|
|
190
|
+
body_text_color=body_text_color, RGBColor=RGBColor,
|
|
191
|
+
Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
192
|
+
)
|
|
193
|
+
row_idx += 1
|
|
194
|
+
|
|
195
|
+
# Footnotes textbox below the table. We:
|
|
196
|
+
# (1) reserve enough room ahead of time via footnote_height_in;
|
|
197
|
+
# (2) explicitly pin single-line spacing + zero space-before/after
|
|
198
|
+
# per paragraph so PowerPoint's body-text defaults don't
|
|
199
|
+
# silently inflate the rendered height;
|
|
200
|
+
# (3) enable SHAPE_TO_FIT_TEXT so the box grows if the
|
|
201
|
+
# heuristic still underestimates (belt and braces).
|
|
202
|
+
if table.footnotes:
|
|
203
|
+
from pptx.enum.text import MSO_AUTO_SIZE
|
|
204
|
+
fn_top = top + height + Inches(0.05)
|
|
205
|
+
fn_box = slide.shapes.add_textbox(
|
|
206
|
+
left, fn_top, width, Inches(footnote_height_in),
|
|
207
|
+
)
|
|
208
|
+
tf = fn_box.text_frame
|
|
209
|
+
tf.word_wrap = True
|
|
210
|
+
tf.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT
|
|
211
|
+
tf.margin_top = Inches(0.04)
|
|
212
|
+
tf.margin_bottom = Inches(0.04)
|
|
213
|
+
for i, fn in enumerate(table.footnotes):
|
|
214
|
+
if i == 0:
|
|
215
|
+
tf.text = fn
|
|
216
|
+
para = tf.paragraphs[0]
|
|
217
|
+
else:
|
|
218
|
+
para = tf.add_paragraph()
|
|
219
|
+
para.text = fn
|
|
220
|
+
para.line_spacing = 1.0
|
|
221
|
+
para.space_before = Pt(0)
|
|
222
|
+
para.space_after = Pt(0)
|
|
223
|
+
for run in para.runs:
|
|
224
|
+
run.font.name = font_name
|
|
225
|
+
run.font.size = Pt(fn_font_size)
|
|
226
|
+
run.font.italic = True
|
|
227
|
+
if RGBColor is not None:
|
|
228
|
+
# ``RGBColor.from_string`` is python-pptx's own
|
|
229
|
+
# constructor; its stubs are not strict, so an
|
|
230
|
+
# explicit type-ignore documents that we accept
|
|
231
|
+
# the third-party untyped call here.
|
|
232
|
+
run.font.color.rgb = RGBColor.from_string( # type: ignore[no-untyped-call]
|
|
233
|
+
body_text_color
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
path = Path(path)
|
|
237
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
238
|
+
prs.save(str(path))
|
|
239
|
+
# python-pptx stamps every ZIP entry with the current wall-clock,
|
|
240
|
+
# which breaks cross-process byte-determinism. Rewrite with fixed
|
|
241
|
+
# entry mtimes so identical input always yields identical bytes.
|
|
242
|
+
from ._zip_determinism import make_zip_deterministic
|
|
243
|
+
make_zip_deterministic(path)
|
|
244
|
+
return path
|
|
245
|
+
|
|
246
|
+
def render(self, table: SofraTable) -> str: # pragma: no cover
|
|
247
|
+
raise NotImplementedError("PptxRenderer writes to disk; use .write(table, path).")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _ncols(table: SofraTable) -> int:
|
|
251
|
+
if table.headers:
|
|
252
|
+
return len(table.headers[0].cells)
|
|
253
|
+
if table.rows:
|
|
254
|
+
return len(table.rows[0].cells)
|
|
255
|
+
return 1
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _set_cell(cell: Any, text: str, *, bold: bool, italic: bool,
|
|
259
|
+
font_name: str, font_size: int, align: str | None,
|
|
260
|
+
fill_hex: str | None, font_color_hex: str | None,
|
|
261
|
+
RGBColor: Any, Pt: Any, PP_ALIGN: Any) -> None:
|
|
262
|
+
cell.text = ""
|
|
263
|
+
para = cell.text_frame.paragraphs[0]
|
|
264
|
+
if align == "right":
|
|
265
|
+
para.alignment = PP_ALIGN.RIGHT
|
|
266
|
+
elif align == "center":
|
|
267
|
+
para.alignment = PP_ALIGN.CENTER
|
|
268
|
+
else:
|
|
269
|
+
para.alignment = PP_ALIGN.LEFT
|
|
270
|
+
run = para.add_run()
|
|
271
|
+
run.text = text
|
|
272
|
+
run.font.name = font_name
|
|
273
|
+
run.font.size = Pt(font_size)
|
|
274
|
+
run.font.bold = bold
|
|
275
|
+
run.font.italic = italic
|
|
276
|
+
# Explicit colour. python-pptx's default table style paints header
|
|
277
|
+
# text white; without an explicit override the header cell renders
|
|
278
|
+
# white-on-light-fill, which is unreadable. We always set a colour
|
|
279
|
+
# so the theme's choice is what ships, not PowerPoint's default.
|
|
280
|
+
if font_color_hex is not None and RGBColor is not None:
|
|
281
|
+
run.font.color.rgb = RGBColor.from_string(font_color_hex)
|
|
282
|
+
if fill_hex is not None:
|
|
283
|
+
cell.fill.solid()
|
|
284
|
+
cell.fill.fore_color.rgb = RGBColor.from_string(fill_hex)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _write_header_row(word_table: Any, idx: int, hr: HeaderRow, ncols: int, *,
|
|
288
|
+
font_name: str, font_size: int, header_fill: str,
|
|
289
|
+
header_text_color: str,
|
|
290
|
+
RGBColor: Any, Pt: Any, PP_ALIGN: Any) -> None:
|
|
291
|
+
row = word_table.rows[idx]
|
|
292
|
+
for j, c in enumerate(hr.cells[:ncols]):
|
|
293
|
+
cell = row.cells[j]
|
|
294
|
+
text = c.text.replace("\n", " ")
|
|
295
|
+
_set_cell(
|
|
296
|
+
cell, text, bold=True, italic=False,
|
|
297
|
+
font_name=font_name, font_size=font_size,
|
|
298
|
+
align=c.align, fill_hex=header_fill,
|
|
299
|
+
font_color_hex=header_text_color,
|
|
300
|
+
RGBColor=RGBColor, Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _write_body_row(word_table: Any, idx: int, r: Row, ncols: int, *,
|
|
305
|
+
font_name: str, font_size: int,
|
|
306
|
+
body_text_color: str, RGBColor: Any,
|
|
307
|
+
Pt: Any, PP_ALIGN: Any) -> None:
|
|
308
|
+
row = word_table.rows[idx]
|
|
309
|
+
for j, c in enumerate(r.cells[:ncols]):
|
|
310
|
+
cell = row.cells[j]
|
|
311
|
+
text = c.text
|
|
312
|
+
if c.indent > 0:
|
|
313
|
+
text = " " * c.indent + text
|
|
314
|
+
_set_cell(
|
|
315
|
+
cell, text, bold=c.bold or r.is_group_header, italic=c.italic,
|
|
316
|
+
font_name=font_name, font_size=font_size,
|
|
317
|
+
align=c.align, fill_hex=None,
|
|
318
|
+
font_color_hex=body_text_color,
|
|
319
|
+
RGBColor=RGBColor, Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _write_spanning_row(word_table: Any, idx: int, spans: tuple[SpanningHeader, ...],
|
|
324
|
+
ncols: int, *, font_name: str, font_size: int,
|
|
325
|
+
header_fill: str, header_text_color: str,
|
|
326
|
+
RGBColor: Any, Pt: Any,
|
|
327
|
+
PP_ALIGN: Any) -> None:
|
|
328
|
+
row = word_table.rows[idx]
|
|
329
|
+
for span in spans:
|
|
330
|
+
anchor = row.cells[span.start]
|
|
331
|
+
# python-pptx supports cell merging via _tc/_tcPr is involved; use the
|
|
332
|
+
# public ``merge`` API where available.
|
|
333
|
+
for j in range(span.start + 1, span.end + 1):
|
|
334
|
+
anchor.merge(row.cells[j])
|
|
335
|
+
_set_cell(
|
|
336
|
+
anchor, span.label, bold=True, italic=False,
|
|
337
|
+
font_name=font_name, font_size=font_size, align="center",
|
|
338
|
+
font_color_hex=header_text_color,
|
|
339
|
+
fill_hex=header_fill, RGBColor=RGBColor, Pt=Pt, PP_ALIGN=PP_ALIGN,
|
|
340
|
+
)
|
pysofra/render/xlsx.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Excel (.xlsx) rendering via ``xlsxwriter``.
|
|
2
|
+
|
|
3
|
+
Writes a single-sheet workbook containing one table. The caller never
|
|
4
|
+
touches the xlsx-writer API directly — pass a :class:`SofraTable` and a
|
|
5
|
+
path and the renderer takes care of formatting (fonts, borders, header
|
|
6
|
+
shading, row indentation, column widths, captions, footnotes).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import datetime as _dt
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from ..core.schema import HeaderRow, Row, SpanningHeader
|
|
18
|
+
from ..core.table import SofraTable
|
|
19
|
+
from ..themes.registry import resolve_theme
|
|
20
|
+
|
|
21
|
+
# A fixed wall-clock timestamp used in the workbook's docProps metadata.
|
|
22
|
+
# xlsxwriter would otherwise embed ``datetime.now()`` into core.xml's
|
|
23
|
+
# ``<dcterms:created>``/``<dcterms:modified>`` elements, breaking
|
|
24
|
+
# byte-determinism across processes (the same SofraTable would produce
|
|
25
|
+
# a different SHA-256 on each call). 2000-01-01T00:00:00Z is the epoch
|
|
26
|
+
# we pin for reproducible publication artefacts.
|
|
27
|
+
_DETERMINISTIC_CREATED = _dt.datetime(2000, 1, 1, 0, 0, 0)
|
|
28
|
+
|
|
29
|
+
# Matches a plain decimal — optional sign, digits, optional fractional
|
|
30
|
+
# part. Used to detect when ``Cell.text`` is the literal formatted
|
|
31
|
+
# value (so writing the numeric ``Cell.value`` to Excel will match
|
|
32
|
+
# what HTML/Markdown/LaTeX display).
|
|
33
|
+
_PLAIN_NUMBER_RE = re.compile(r"^[-+]?\d+(\.\d+)?$")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _text_matches_number(text: str) -> bool:
|
|
37
|
+
"""True when ``text`` is a bare decimal (no journal-style
|
|
38
|
+
threshold marker like ``<0.001`` / ``>0.99`` / em-dash)."""
|
|
39
|
+
return bool(_PLAIN_NUMBER_RE.match(text.strip()))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class XlsxRenderer:
|
|
44
|
+
"""Write a SofraTable to an .xlsx file."""
|
|
45
|
+
|
|
46
|
+
sheet_name: str = "Table"
|
|
47
|
+
|
|
48
|
+
def write(self, table: SofraTable, path: Path) -> Path:
|
|
49
|
+
try:
|
|
50
|
+
import xlsxwriter
|
|
51
|
+
except ImportError as e: # pragma: no cover
|
|
52
|
+
raise ImportError(
|
|
53
|
+
"Excel export requires xlsxwriter. Install with "
|
|
54
|
+
"`pip install xlsxwriter`."
|
|
55
|
+
) from e
|
|
56
|
+
|
|
57
|
+
theme = resolve_theme(table.theme_name)
|
|
58
|
+
d = theme.docx # reuse the docx theme dict for font / sizing
|
|
59
|
+
font_name: str = d.get("font_name", "Calibri")
|
|
60
|
+
font_size: int = int(d.get("font_size", 11))
|
|
61
|
+
|
|
62
|
+
path = Path(path)
|
|
63
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
|
|
65
|
+
wb = xlsxwriter.Workbook(str(path))
|
|
66
|
+
# Pin the workbook's creation timestamp so the bytes are
|
|
67
|
+
# reproducible across processes (see _DETERMINISTIC_CREATED).
|
|
68
|
+
wb.set_properties({"created": _DETERMINISTIC_CREATED})
|
|
69
|
+
ws = wb.add_worksheet(self.sheet_name)
|
|
70
|
+
|
|
71
|
+
base_fmt = {"font_name": font_name, "font_size": font_size}
|
|
72
|
+
fmt_caption = wb.add_format({**base_fmt, "bold": True,
|
|
73
|
+
"font_size": font_size + 1})
|
|
74
|
+
fmt_header = wb.add_format({**base_fmt, "bold": True,
|
|
75
|
+
"align": "center", "valign": "vcenter",
|
|
76
|
+
"bottom": 2, "top": 2,
|
|
77
|
+
"text_wrap": True})
|
|
78
|
+
fmt_spanning = wb.add_format({**base_fmt, "bold": True,
|
|
79
|
+
"align": "center", "bottom": 1})
|
|
80
|
+
fmt_footnote = wb.add_format({**base_fmt, "italic": True,
|
|
81
|
+
"font_size": max(8, font_size - 1)})
|
|
82
|
+
|
|
83
|
+
row_idx = 0
|
|
84
|
+
ncols = _ncols(table)
|
|
85
|
+
|
|
86
|
+
if table.caption:
|
|
87
|
+
ws.merge_range(row_idx, 0, row_idx, ncols - 1, table.caption,
|
|
88
|
+
fmt_caption)
|
|
89
|
+
row_idx += 1
|
|
90
|
+
|
|
91
|
+
if table.spanning_headers:
|
|
92
|
+
row_idx = _write_spanning_row(
|
|
93
|
+
ws, row_idx, table.spanning_headers, ncols, fmt_spanning,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
for hr in table.headers:
|
|
97
|
+
row_idx = _write_header_row(ws, row_idx, hr, fmt_header)
|
|
98
|
+
|
|
99
|
+
last_body_row = row_idx + len(table.rows) - 1
|
|
100
|
+
for r in table.rows:
|
|
101
|
+
_write_body_row(ws, row_idx, r, wb, base_fmt,
|
|
102
|
+
is_last=(row_idx == last_body_row))
|
|
103
|
+
row_idx += 1
|
|
104
|
+
|
|
105
|
+
if table.footnotes:
|
|
106
|
+
for fn in table.footnotes:
|
|
107
|
+
ws.merge_range(row_idx, 0, row_idx, ncols - 1, fn, fmt_footnote)
|
|
108
|
+
row_idx += 1
|
|
109
|
+
|
|
110
|
+
# Sensible column widths based on cell-text length.
|
|
111
|
+
_autosize_columns(ws, table, ncols)
|
|
112
|
+
|
|
113
|
+
# xlsxwriter writes the actual .xlsx on ``close()``; a permission
|
|
114
|
+
# failure (read-only directory) surfaces here as
|
|
115
|
+
# ``xlsxwriter.exceptions.FileCreateError``. Every other PySofra
|
|
116
|
+
# renderer raises an ``OSError`` subclass on the same failure
|
|
117
|
+
# mode (``PermissionError`` / ``IsADirectoryError`` / etc.). To
|
|
118
|
+
# let callers handle "couldn't write the file" with a single
|
|
119
|
+
# ``except OSError`` regardless of backend, re-raise as
|
|
120
|
+
# ``OSError`` (chaining the original).
|
|
121
|
+
try:
|
|
122
|
+
wb.close()
|
|
123
|
+
except xlsxwriter.exceptions.FileCreateError as e:
|
|
124
|
+
raise OSError(str(e)) from e
|
|
125
|
+
# xlsxwriter also stamps ZIP entry mtimes with the current
|
|
126
|
+
# wall-clock; rewrite with fixed entry mtimes for cross-process
|
|
127
|
+
# byte-determinism.
|
|
128
|
+
from ._zip_determinism import make_zip_deterministic
|
|
129
|
+
make_zip_deterministic(path)
|
|
130
|
+
return path
|
|
131
|
+
|
|
132
|
+
def render(self, table: SofraTable) -> str: # pragma: no cover
|
|
133
|
+
raise NotImplementedError("XlsxRenderer writes to disk; use .write(table, path).")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ----------------------------------------------------------------------
|
|
137
|
+
# Helpers
|
|
138
|
+
# ----------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
def _ncols(table: SofraTable) -> int:
|
|
141
|
+
if table.headers:
|
|
142
|
+
return len(table.headers[0].cells)
|
|
143
|
+
if table.rows:
|
|
144
|
+
return len(table.rows[0].cells)
|
|
145
|
+
return 1
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _write_spanning_row(ws: Any, row_idx: int,
|
|
149
|
+
spans: tuple[SpanningHeader, ...],
|
|
150
|
+
ncols: int, fmt: Any) -> int:
|
|
151
|
+
for span in spans:
|
|
152
|
+
if span.end > span.start:
|
|
153
|
+
ws.merge_range(row_idx, span.start, row_idx, span.end,
|
|
154
|
+
span.label, fmt)
|
|
155
|
+
else:
|
|
156
|
+
ws.write(row_idx, span.start, span.label, fmt)
|
|
157
|
+
del ncols
|
|
158
|
+
return row_idx + 1
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _write_header_row(ws: Any, row_idx: int, hr: HeaderRow, fmt: Any) -> int:
|
|
162
|
+
ws.set_row(row_idx, 30) # taller for wrapped headers
|
|
163
|
+
for col_idx, c in enumerate(hr.cells):
|
|
164
|
+
text = c.text.replace("\n", "\n")
|
|
165
|
+
ws.write_string(row_idx, col_idx, text, fmt)
|
|
166
|
+
return row_idx + 1
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _write_body_row(ws: Any, row_idx: int, r: Row, wb: Any,
|
|
170
|
+
base_fmt: dict[str, Any], *, is_last: bool) -> None:
|
|
171
|
+
for col_idx, c in enumerate(r.cells):
|
|
172
|
+
props: dict[str, Any] = dict(base_fmt)
|
|
173
|
+
if c.align == "right":
|
|
174
|
+
props["align"] = "right"
|
|
175
|
+
elif c.align == "center":
|
|
176
|
+
props["align"] = "center"
|
|
177
|
+
else:
|
|
178
|
+
props["align"] = "left"
|
|
179
|
+
if c.bold or r.is_group_header:
|
|
180
|
+
props["bold"] = True
|
|
181
|
+
if c.italic:
|
|
182
|
+
props["italic"] = True
|
|
183
|
+
if c.indent > 0 and col_idx == 0:
|
|
184
|
+
props["indent"] = c.indent
|
|
185
|
+
if is_last:
|
|
186
|
+
props["bottom"] = 2
|
|
187
|
+
highlight = (r.metadata or {}).get("highlight")
|
|
188
|
+
if highlight:
|
|
189
|
+
props["bg_color"] = str(highlight)
|
|
190
|
+
# Cell-level style['xlsx'] overrides are forwarded to xlsxwriter.
|
|
191
|
+
cell_xlsx = (c.style or {}).get("xlsx") if c.style else None
|
|
192
|
+
if isinstance(cell_xlsx, dict):
|
|
193
|
+
props.update(cell_xlsx)
|
|
194
|
+
fmt = wb.add_format(props)
|
|
195
|
+
|
|
196
|
+
# Try to preserve numeric type when the cell carries a number,
|
|
197
|
+
# but only when the rendered text is a plain decimal. When the
|
|
198
|
+
# journal-style p-value threshold has fired (``"<0.001"``,
|
|
199
|
+
# ``">0.99"``, em-dash for NA), the rendered text no longer
|
|
200
|
+
# matches the float, and writing the float would make Excel
|
|
201
|
+
# disagree with HTML / Markdown / LaTeX. Detect that case and
|
|
202
|
+
# write the formatted string instead.
|
|
203
|
+
if (
|
|
204
|
+
isinstance(c.value, (int, float))
|
|
205
|
+
and c.kind in ("numeric", "p_value", "q_value")
|
|
206
|
+
and _text_matches_number(c.text)
|
|
207
|
+
):
|
|
208
|
+
try:
|
|
209
|
+
ws.write_number(row_idx, col_idx, float(c.value), fmt)
|
|
210
|
+
continue
|
|
211
|
+
except Exception: # pragma: no cover — xlsxwriter accepts every float
|
|
212
|
+
pass
|
|
213
|
+
ws.write_string(row_idx, col_idx, c.text, fmt)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _autosize_columns(ws: Any, table: SofraTable, ncols: int) -> None:
|
|
217
|
+
widths = [10] * ncols
|
|
218
|
+
for hr in table.headers:
|
|
219
|
+
for j, hc in enumerate(hr.cells):
|
|
220
|
+
for line in hc.text.split("\n"):
|
|
221
|
+
widths[j] = max(widths[j], min(40, len(line) + 2))
|
|
222
|
+
for r in table.rows:
|
|
223
|
+
for j, bc in enumerate(r.cells[:ncols]):
|
|
224
|
+
widths[j] = max(widths[j], min(40, len(bc.text) + 2 + bc.indent * 2))
|
|
225
|
+
for j, w in enumerate(widths):
|
|
226
|
+
ws.set_column(j, j, w)
|