pysofra 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. pysofra/__init__.py +82 -0
  2. pysofra/core/__init__.py +14 -0
  3. pysofra/core/compose.py +167 -0
  4. pysofra/core/format.py +155 -0
  5. pysofra/core/frames.py +69 -0
  6. pysofra/core/schema.py +128 -0
  7. pysofra/core/table.py +924 -0
  8. pysofra/io/__init__.py +1 -0
  9. pysofra/models/__init__.py +6 -0
  10. pysofra/models/extract.py +249 -0
  11. pysofra/models/pool.py +119 -0
  12. pysofra/models/regression.py +507 -0
  13. pysofra/models/survival.py +395 -0
  14. pysofra/models/uvregression.py +438 -0
  15. pysofra/notebook/__init__.py +6 -0
  16. pysofra/plot/__init__.py +23 -0
  17. pysofra/plot/_backend.py +32 -0
  18. pysofra/plot/forest.py +159 -0
  19. pysofra/plot/inline.py +171 -0
  20. pysofra/plot/km.py +249 -0
  21. pysofra/render/__init__.py +28 -0
  22. pysofra/render/_zip_determinism.py +57 -0
  23. pysofra/render/base.py +22 -0
  24. pysofra/render/docx.py +286 -0
  25. pysofra/render/html.py +442 -0
  26. pysofra/render/image.py +130 -0
  27. pysofra/render/latex.py +253 -0
  28. pysofra/render/markdown.py +128 -0
  29. pysofra/render/pptx.py +340 -0
  30. pysofra/render/xlsx.py +226 -0
  31. pysofra/summary/__init__.py +6 -0
  32. pysofra/summary/calibrate.py +214 -0
  33. pysofra/summary/design.py +246 -0
  34. pysofra/summary/effect_size.py +187 -0
  35. pysofra/summary/extras.py +745 -0
  36. pysofra/summary/smd.py +133 -0
  37. pysofra/summary/stats.py +135 -0
  38. pysofra/summary/tbl_cross.py +339 -0
  39. pysofra/summary/tbl_one.py +1220 -0
  40. pysofra/summary/tbl_summary.py +51 -0
  41. pysofra/summary/tests.py +370 -0
  42. pysofra/summary/typing.py +129 -0
  43. pysofra/summary/weights.py +161 -0
  44. pysofra/themes/__init__.py +5 -0
  45. pysofra/themes/registry.py +272 -0
  46. pysofra-0.1.0a1.dist-info/METADATA +301 -0
  47. pysofra-0.1.0a1.dist-info/RECORD +50 -0
  48. pysofra-0.1.0a1.dist-info/WHEEL +4 -0
  49. pysofra-0.1.0a1.dist-info/licenses/LICENSE +674 -0
  50. pysofra-0.1.0a1.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,253 @@
1
+ """LaTeX rendering — booktabs style.
2
+
3
+ Outputs a self-contained ``table`` float with ``\\caption``, ``\\toprule``,
4
+ ``\\midrule``, ``\\bottomrule``, and an ``\\addlinespace``-style spanning
5
+ header. The output is publication-ready and compiles with any modern
6
+ ``pdflatex`` / ``lualatex`` / ``xelatex`` engine, given a ``\\usepackage{booktabs}``
7
+ in the document preamble (also ``\\usepackage{array}`` for the alignment
8
+ column types we use).
9
+
10
+ The renderer is deliberately minimal: we do not try to replicate every
11
+ HTML styling decision in LaTeX. Captions, footnotes, spanning headers,
12
+ column alignment, indentation, and bold/italic cells are all preserved;
13
+ fonts and colors come from the surrounding LaTeX document, not from our
14
+ themes.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+ from typing import Any
21
+
22
+ from ..core.schema import Cell, HeaderCell, Row, SpanningHeader
23
+ from ..core.table import SofraTable
24
+ from .base import Renderer
25
+
26
+ # LaTeX special characters that must be escaped in text mode.
27
+ _LATEX_ESCAPES = {
28
+ "\\": r"\textbackslash{}",
29
+ "&": r"\&",
30
+ "%": r"\%",
31
+ "$": r"\$",
32
+ "#": r"\#",
33
+ "_": r"\_",
34
+ "{": r"\{",
35
+ "}": r"\}",
36
+ "~": r"\textasciitilde{}",
37
+ "^": r"\textasciicircum{}",
38
+ }
39
+
40
+
41
+ def _escape(text: str) -> str:
42
+ """Escape LaTeX special characters in plain text."""
43
+ out: list[str] = []
44
+ for ch in text:
45
+ out.append(_LATEX_ESCAPES.get(ch, ch))
46
+ return "".join(out)
47
+
48
+
49
+ def _align_char(c: HeaderCell | Cell) -> str:
50
+ if getattr(c, "align", None) == "right":
51
+ return "r"
52
+ if getattr(c, "align", None) == "center":
53
+ return "c"
54
+ return "l"
55
+
56
+
57
+ @dataclass
58
+ class LatexRenderer(Renderer[str]):
59
+ """Render a :class:`SofraTable` as a LaTeX ``table`` float."""
60
+
61
+ booktabs: bool = True
62
+ float_position: str = "ht"
63
+ centering: bool = True
64
+ image_basename: str | None = None # if set, the inline plot is written here.pdf
65
+
66
+ def render(self, table: SofraTable) -> str:
67
+ ncols = _ncols(table)
68
+ col_spec = _column_spec(table)
69
+
70
+ toprule = r"\toprule" if self.booktabs else r"\hline"
71
+ midrule = r"\midrule" if self.booktabs else r"\hline"
72
+ bottomrule = r"\bottomrule" if self.booktabs else r"\hline"
73
+
74
+ plot = getattr(table, "inline_plot", None)
75
+ plot_command: str | None = None
76
+ if (
77
+ plot is not None
78
+ and getattr(plot, "pdf_bytes", None)
79
+ and self.image_basename
80
+ ):
81
+ plot_command = (
82
+ rf"\includegraphics[width={plot.width_in:.2f}in]"
83
+ rf"{{{self.image_basename}}}"
84
+ )
85
+
86
+ out: list[str] = []
87
+ out.append(rf"\begin{{table}}[{self.float_position}]")
88
+ if self.centering:
89
+ out.append(r"\centering")
90
+ if table.caption:
91
+ out.append(rf"\caption{{{_escape(table.caption)}}}")
92
+
93
+ if plot_command and table.inline_svg_position == "above":
94
+ out.append(plot_command)
95
+ out.append(r"\par\vspace{0.5em}")
96
+
97
+ out.append(rf"\begin{{tabular}}{{{col_spec}}}")
98
+ out.append(toprule)
99
+
100
+ # Spanning headers
101
+ for span_row in self._render_spanning_rows(table.spanning_headers, ncols):
102
+ out.append(span_row)
103
+
104
+ # Column headers
105
+ for hr in table.headers:
106
+ out.append(self._render_header_row(hr))
107
+ if table.headers:
108
+ out.append(midrule)
109
+
110
+ # Body rows
111
+ for r in table.rows:
112
+ out.append(self._render_row(r))
113
+
114
+ out.append(bottomrule)
115
+ out.append(r"\end{tabular}")
116
+
117
+ if plot_command and table.inline_svg_position == "below":
118
+ out.append(r"\par\vspace{0.5em}")
119
+ out.append(plot_command)
120
+
121
+ # Footnotes — emitted *outside* the tabular as small italic paragraphs.
122
+ if table.footnotes:
123
+ out.append(r"\vspace{0.25em}")
124
+ for fn in table.footnotes:
125
+ out.append(
126
+ rf"\par\noindent\small\textit{{{_escape(fn)}}}"
127
+ )
128
+
129
+ out.append(r"\end{table}")
130
+ return "\n".join(out) + "\n"
131
+
132
+ def write(self, table: SofraTable, path: Any) -> Any:
133
+ """Write LaTeX source to ``path``, plus a sidecar PDF if the table carries a plot.
134
+
135
+ Use this when you want a self-contained LaTeX deliverable with
136
+ the inline plot embedded. The sidecar PDF is named
137
+ ``<stem>_plot.pdf`` and referenced from the LaTeX source via
138
+ ``\\includegraphics``.
139
+ """
140
+ from pathlib import Path
141
+ path = Path(path)
142
+ plot = getattr(table, "inline_plot", None)
143
+ if plot is not None and getattr(plot, "pdf_bytes", None):
144
+ stem = path.stem
145
+ pdf_path = path.with_name(f"{stem}_plot.pdf")
146
+ pdf_path.write_bytes(plot.pdf_bytes)
147
+ self.image_basename = pdf_path.name
148
+ path.parent.mkdir(parents=True, exist_ok=True)
149
+ path.write_text(self.render(table))
150
+ return path
151
+
152
+ # ------------------------------------------------------------------
153
+ def _render_spanning_rows(
154
+ self, spans: tuple[SpanningHeader, ...], ncols: int
155
+ ) -> list[str]:
156
+ if not spans:
157
+ return []
158
+ # Build the row of \multicolumn cells in column order, padding gaps.
159
+ ordered = sorted(spans, key=lambda s: s.start)
160
+ cells: list[str] = []
161
+ cline_parts: list[str] = []
162
+ col = 0
163
+ for span in ordered:
164
+ while col < span.start:
165
+ cells.append("")
166
+ col += 1
167
+ size = span.end - span.start + 1
168
+ cells.append(
169
+ rf"\multicolumn{{{size}}}{{c}}{{{_escape(span.label)}}}"
170
+ )
171
+ cline_parts.append(rf"\cmidrule(lr){{{span.start + 1}-{span.end + 1}}}")
172
+ col = span.end + 1
173
+ while col < ncols:
174
+ cells.append("")
175
+ col += 1
176
+ return [" & ".join(cells) + r" \\", "".join(cline_parts)]
177
+
178
+ def _render_header_row(self, hr: Any) -> str:
179
+ parts = [self._render_header_cell(c) for c in hr.cells]
180
+ return " & ".join(parts) + r" \\"
181
+
182
+ def _render_header_cell(self, c: HeaderCell) -> str:
183
+ parts = c.text.split("\n")
184
+ if len(parts) > 1:
185
+ body = r" \\ ".join(_escape(p) for p in parts)
186
+ text = rf"\shortstack{{{body}}}"
187
+ else:
188
+ text = _escape(c.text)
189
+ if c.bold:
190
+ return rf"\textbf{{{text}}}"
191
+ return text
192
+
193
+ def _render_row(self, r: Row) -> str:
194
+ cells = [self._render_cell(c) for c in r.cells]
195
+ line = " & ".join(cells) + r" \\"
196
+ if r.is_group_header:
197
+ line = r"\addlinespace[0.25em]" + " " + line
198
+ return line
199
+
200
+ def _render_cell(self, c: Cell) -> str:
201
+ text = (
202
+ "".join(_render_part_tex(p) for p in c.parts)
203
+ if c.parts
204
+ else _escape(c.text)
205
+ )
206
+ if c.indent > 0:
207
+ text = rf"\hspace{{{c.indent * 1.2:.2f}em}}{text}"
208
+ if c.bold:
209
+ text = rf"\textbf{{{text}}}"
210
+ if c.italic:
211
+ text = rf"\textit{{{text}}}"
212
+ return text
213
+
214
+
215
+ def _render_part_tex(part: Any) -> str:
216
+ """Render a CellPart as LaTeX."""
217
+ s = _escape(part.text)
218
+ if part.code:
219
+ s = rf"\texttt{{{s}}}"
220
+ if part.superscript:
221
+ s = rf"\textsuperscript{{{s}}}"
222
+ if part.subscript:
223
+ s = rf"\textsubscript{{{s}}}"
224
+ if part.italic:
225
+ s = rf"\textit{{{s}}}"
226
+ if part.bold:
227
+ s = rf"\textbf{{{s}}}"
228
+ if part.link:
229
+ s = rf"\href{{{part.link}}}{{{s}}}"
230
+ return s
231
+
232
+
233
+ def _ncols(table: SofraTable) -> int:
234
+ if table.headers:
235
+ return len(table.headers[0].cells)
236
+ if table.rows:
237
+ return len(table.rows[0].cells)
238
+ return 1
239
+
240
+
241
+ def _column_spec(table: SofraTable) -> str:
242
+ """Derive the tabular column alignment spec from the header row."""
243
+ if not table.headers:
244
+ return "l" * _ncols(table)
245
+ cells = table.headers[0].cells
246
+ aligns: list[str] = []
247
+ # First column is conventionally left-aligned (the label column).
248
+ for i, c in enumerate(cells):
249
+ if i == 0:
250
+ aligns.append("l")
251
+ else:
252
+ aligns.append(_align_char(c) if c.align else "c")
253
+ return "".join(aligns)
@@ -0,0 +1,128 @@
1
+ """Markdown rendering.
2
+
3
+ Outputs GitHub-flavored Markdown. The format is intentionally lossy:
4
+ indentation and styling do not survive across every Markdown flavour,
5
+ but the table structure, captions, footnotes, and spanning-header
6
+ labels are preserved.
7
+
8
+ Spanning headers are emitted as a **bold paragraph above the table**
9
+ (not as a pipe row) because Markdown's table grammar requires the
10
+ column-header row to be immediately followed by the alignment row;
11
+ inserting a row between them turns the spanner into the data header
12
+ and silently corrupts the table. Rendering spanners as a paragraph
13
+ keeps both the table valid and the information visible.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from dataclasses import dataclass
20
+
21
+ from ..core.schema import Cell, HeaderCell, Row, SpanningHeader
22
+ from ..core.table import SofraTable
23
+ from .base import Renderer
24
+
25
+ _INDENT_CHAR = " " # 4 regular spaces — Markdown collapses these visually
26
+
27
+
28
+ @dataclass
29
+ class MarkdownRenderer(Renderer[str]):
30
+ def render(self, table: SofraTable) -> str:
31
+ ncols = _ncols(table)
32
+ lines: list[str] = []
33
+
34
+ if table.caption:
35
+ lines.append(f"**{_escape(table.caption)}**")
36
+ lines.append("")
37
+
38
+ if table.spanning_headers:
39
+ lines.append(_render_spanning_paragraph(table.spanning_headers))
40
+ lines.append("")
41
+
42
+ # Use the *last* header row as the column header; earlier rows become
43
+ # part of the caption block (Markdown supports only one).
44
+ header_row = table.headers[-1] if table.headers else None
45
+ if header_row is None:
46
+ header_cells = [HeaderCell(text="") for _ in range(ncols)]
47
+ else:
48
+ header_cells = list(header_row.cells)
49
+
50
+ lines.append("| " + " | ".join(_header_text(c) for c in header_cells) + " |")
51
+ lines.append("|" + "|".join(_align_marker(c) for c in header_cells) + "|")
52
+
53
+ for r in table.rows:
54
+ lines.append(_render_row(r))
55
+
56
+ if table.footnotes:
57
+ lines.append("")
58
+ for f in table.footnotes:
59
+ lines.append(f"_{_escape(f)}_")
60
+
61
+ return "\n".join(lines).rstrip() + "\n"
62
+
63
+
64
+ def _render_spanning_paragraph(spans: tuple[SpanningHeader, ...]) -> str:
65
+ """Render spanners as a single italicised paragraph above the table.
66
+
67
+ Example: ``*Group 1 (cols 1–3) · Group 2 (cols 4–5)*``.
68
+ """
69
+ parts: list[str] = []
70
+ for s in spans:
71
+ range_str = (
72
+ f"col {s.start + 1}" if s.start == s.end
73
+ else f"cols {s.start + 1}–{s.end + 1}"
74
+ )
75
+ parts.append(f"**{_escape(s.label)}** ({range_str})")
76
+ return "*" + " · ".join(parts) + "*"
77
+
78
+
79
+ def _header_text(c: HeaderCell) -> str:
80
+ return _escape(c.text.replace("\n", " · "))
81
+
82
+
83
+ def _align_marker(c: HeaderCell) -> str:
84
+ if c.align == "right":
85
+ return " ---: "
86
+ if c.align == "center":
87
+ return " :---: "
88
+ return " :--- "
89
+
90
+
91
+ def _render_row(r: Row) -> str:
92
+ return "| " + " | ".join(_cell_text(c) for c in r.cells) + " |"
93
+
94
+
95
+ def _cell_text(c: Cell) -> str:
96
+ indent = _INDENT_CHAR * c.indent
97
+ text = _escape(c.text)
98
+ if c.bold:
99
+ text = f"**{text}**"
100
+ if c.italic:
101
+ text = f"*{text}*"
102
+ return f"{indent}{text}"
103
+
104
+
105
+ # Characters that have syntactic meaning in GitHub-flavored Markdown and
106
+ # need to be backslash-escaped inside table cells. The backslash itself
107
+ # is escaped FIRST so subsequent additions don't double-escape.
108
+ _MARKDOWN_SPECIALS = ("\\", "|", "`", "*", "_", "[", "]", "<", ">", "#")
109
+ _SPECIAL_RE = re.compile("|".join(re.escape(ch) for ch in _MARKDOWN_SPECIALS))
110
+
111
+
112
+ def _escape(s: str) -> str:
113
+ """Backslash-escape every character GitHub-flavored Markdown treats
114
+ as syntax inside table cells.
115
+
116
+ Without this, a cell text like ``gene*`` is rendered as italicised
117
+ ``gene``, ``a_b_c`` becomes underlined, and a stray ``[`` opens an
118
+ unmatched link.
119
+ """
120
+ return _SPECIAL_RE.sub(lambda m: "\\" + m.group(0), s)
121
+
122
+
123
+ def _ncols(table: SofraTable) -> int:
124
+ if table.headers:
125
+ return len(table.headers[0].cells)
126
+ if table.rows:
127
+ return len(table.rows[0].cells)
128
+ return 1