pysofra 0.1.0a4__tar.gz → 0.1.0a6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysofra-0.1.0a6/CHANGELOG.md +121 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/PKG-INFO +6 -6
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/README.md +5 -5
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/pyproject.toml +1 -1
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/__init__.py +1 -1
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/table.py +26 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/extract.py +26 -4
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/pool.py +5 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/regression.py +12 -1
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/survival.py +5 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/plot/forest.py +23 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/extras.py +180 -31
- pysofra-0.1.0a6/src/pysofra/summary/smd.py +246 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/tbl_one.py +61 -16
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/tests.py +168 -51
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/svyttest.json +5 -5
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_regressions.py +458 -4
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_render_edges.py +15 -7
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_summary_edges.py +2 -2
- pysofra-0.1.0a4/CHANGELOG.md +0 -55
- pysofra-0.1.0a4/src/pysofra/summary/smd.py +0 -133
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/.gitignore +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/LICENSE +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/NOTICE +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/compose.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/format.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/frames.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/core/schema.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/models/uvregression.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/plot/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/plot/_backend.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/plot/inline.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/plot/km.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/_zip_determinism.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/base.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/docx.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/html.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/image.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/latex.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/markdown.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/pptx.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/render/xlsx.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/calibrate.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/design.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/effect_size.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/stats.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/tbl_cross.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/tbl_summary.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/typing.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/summary/weights.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/themes/__init__.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/src/pysofra/themes/registry.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/conftest.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/README.md +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/anova_oneway.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/chi_square.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/fisher_2x2.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/kruskal_wallis.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/student_t.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/weighted_mean.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/welch_t_test.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/fixtures/scipy_validation/wilcoxon_rank_sum.json +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_api_stability.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_compose.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_compose_edges.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_conditional_formatting.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_design_regression.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_extract_edges.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_extras_edges.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_extras_edges_2.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_format.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_latex_pptx.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_misc_fixes.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_modifier_edges.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_multi_model.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_partial_modifiers.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_partials.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_plot_determinism.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_plot_embedding.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_plots.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_polars.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_pptx_overflow.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_property_invariants.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_rao_scott.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_regression.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_render_edges_2.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_renderer_consistency.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_rendering.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_scipy_validation.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_snapshot.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_statistical_correctness.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_stats.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_summary_edges_2.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_survey_design.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_survey_extensions.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_survival.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_table_edges.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_tbl_one.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_test_overrides.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_uvregression_factors.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_validation_fixes.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_weights.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_wishlist.py +0 -0
- {pysofra-0.1.0a4 → pysofra-0.1.0a6}/tests/test_xlsx.py +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to PySofra will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0a6] — 2026-05-26
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **`svyttest` now uses full-design Taylor linearisation** of the
|
|
12
|
+
regression coefficient `ȳ_B − ȳ_A` instead of summing per-group
|
|
13
|
+
variances in quadrature. The new formulation accounts for
|
|
14
|
+
cross-group covariance under the survey design. Pinned against
|
|
15
|
+
R `survey::svyttest`: identical t-statistic and df, p-value
|
|
16
|
+
agreement to 7 decimal places on the test fixture. The previous
|
|
17
|
+
per-group formulation could be wildly anti-conservative when
|
|
18
|
+
clusters straddled groups.
|
|
19
|
+
- **`svyttest` degrees of freedom** corrected to `n_PSU − n_strata − 1`
|
|
20
|
+
(the design df minus one for the slope parameter). Previously
|
|
21
|
+
off by one.
|
|
22
|
+
- **`rao_scott_chisq` normalises weights to `Σw = n` before computing
|
|
23
|
+
the chi-square statistic**, matching R `survey::svychisq`. The
|
|
24
|
+
previous formulation produced statistics that scaled linearly with
|
|
25
|
+
the absolute magnitude of the weights and disagreed with R by
|
|
26
|
+
~10–15% on typical survey-weighted contingency tables.
|
|
27
|
+
- **`tbl_one(..., weights=...)` raises on negative or all-zero
|
|
28
|
+
weights** instead of warning and silently dropping. The earlier
|
|
29
|
+
behaviour could leave `N = -1` or `N = 0` cells in the rendered
|
|
30
|
+
table.
|
|
31
|
+
- **`tbl_one(...).add_p()` now emits a UserWarning** when falling
|
|
32
|
+
back to unweighted ANOVA / Kruskal–Wallis for >2-group
|
|
33
|
+
continuous variables under weights (design-adjusted multi-group
|
|
34
|
+
test is not yet implemented).
|
|
35
|
+
- **`tbl_one(...).add_global_p()` warns** when the table already
|
|
36
|
+
carries a column added by a prior modifier (`add_difference`,
|
|
37
|
+
`add_significance_stars`); the rebuild path drops such columns
|
|
38
|
+
and the user should call `add_global_p()` first.
|
|
39
|
+
|
|
40
|
+
## [0.1.0a5] — 2026-05-25
|
|
41
|
+
|
|
42
|
+
### Fixed
|
|
43
|
+
- **`svyttest` degrees of freedom** now follow the standard survey
|
|
44
|
+
convention `n_PSU − n_strata` (matching Stata `svy: ttest` and R
|
|
45
|
+
`survey::svyttest` with `nest=TRUE`), instead of `N − n_strata`. The
|
|
46
|
+
previous formula over-stated df dramatically under clustering and
|
|
47
|
+
produced anti-conservative p-values.
|
|
48
|
+
- **AFT models (Weibull / LogNormal / LogLogistic) are now labelled
|
|
49
|
+
"TR" (Time Ratio)** instead of "HR". The two parameters point in
|
|
50
|
+
opposite directions (TR > 1 → longer survival; HR > 1 → shorter
|
|
51
|
+
survival), so the mislabel was potentially misleading.
|
|
52
|
+
- **Lifelines regression CIs honour the user-supplied `conf_level`**.
|
|
53
|
+
Previously the CIs reflected the model's fit-time `alpha` regardless
|
|
54
|
+
of `conf_level`, so passing `conf_level=0.90` produced a "90% CI"
|
|
55
|
+
header with 95% CI numbers. The CI is now re-derived from `coef ±
|
|
56
|
+
z·se(coef)` at the requested level.
|
|
57
|
+
- **SMDs on a weighted Table 1 are now weighted**. `continuous_smd` and
|
|
58
|
+
`categorical_smd` accept a `weights=` argument; `tbl_one(..., weights=)`
|
|
59
|
+
threads it through automatically. Previously the SMD column was
|
|
60
|
+
always computed on unweighted samples even on a weighted table.
|
|
61
|
+
- **`add_ci`, `add_difference`, and `add_global_p` now honour weights**.
|
|
62
|
+
The Welch CI on continuous means, the Newcombe CI on proportion
|
|
63
|
+
differences, and the joint Wald-F test for `add_global_p` all use
|
|
64
|
+
weighted means / variances / proportions (with Kish's effective
|
|
65
|
+
sample size for SEs) when the table was built with `weights=`.
|
|
66
|
+
|
|
67
|
+
### Added
|
|
68
|
+
- `conf_level` range validation in `tbl_regression`, `tbl_survival`, and
|
|
69
|
+
`pool` (raises `ValueError` for values outside `(0, 1)`).
|
|
70
|
+
- `with_forest_plot()` on a multi-model regression table now emits a
|
|
71
|
+
`UserWarning` that only the first model is visualised, so the
|
|
72
|
+
presence of additional models is no longer silent.
|
|
73
|
+
|
|
74
|
+
## [0.1.0a4] — 2026-05-25
|
|
75
|
+
|
|
76
|
+
### Added
|
|
77
|
+
- Input validation for duplicate names in `variables=` (now raises
|
|
78
|
+
`ValueError` instead of silently accepting duplicates).
|
|
79
|
+
- Confidence-level range check in `.add_ci()` and related modifiers
|
|
80
|
+
(must lie in `(0, 1)`).
|
|
81
|
+
|
|
82
|
+
### Changed
|
|
83
|
+
- Renamed several test files for clarity. No public API changes.
|
|
84
|
+
|
|
85
|
+
## [0.1.0a3] — 2026-05-24
|
|
86
|
+
|
|
87
|
+
### Changed
|
|
88
|
+
- Documentation polish across README, changelog, and inline docstrings.
|
|
89
|
+
No public API or behavioural changes.
|
|
90
|
+
|
|
91
|
+
## [0.1.0a2] — 2026-05-23
|
|
92
|
+
|
|
93
|
+
### Fixed
|
|
94
|
+
- Theme styling now survives notebook viewers that strip `<style>` blocks
|
|
95
|
+
(e.g. GitHub's notebook viewer). Critical theme properties (font, border,
|
|
96
|
+
padding) are emitted as inline `style` attributes on each table element, so
|
|
97
|
+
`jama` vs `nejm` vs `clinical` vs `minimal` stay visibly distinct everywhere.
|
|
98
|
+
- README image and link URLs are now absolute so they render on PyPI.
|
|
99
|
+
|
|
100
|
+
## [0.1.0a1] — 2026-05-20
|
|
101
|
+
|
|
102
|
+
### Added
|
|
103
|
+
|
|
104
|
+
- Initial alpha release.
|
|
105
|
+
- Core `SofraTable` object with immutable method chaining.
|
|
106
|
+
- `tbl_one()` — baseline characteristic tables (Table 1) with continuous /
|
|
107
|
+
categorical summaries, stratification, missing data summaries, overall
|
|
108
|
+
column, p-values, and standardized mean differences (SMDs).
|
|
109
|
+
- `tbl_summary()` — general descriptive summary tables with grouping and
|
|
110
|
+
configurable statistics.
|
|
111
|
+
- `tbl_regression()` — regression tables for `statsmodels` linear / logistic
|
|
112
|
+
/ Poisson models, with confidence intervals, exponentiation, and p-values.
|
|
113
|
+
- `tbl_merge()` / `tbl_stack()` — table composition.
|
|
114
|
+
- HTML renderer with rich notebook `_repr_html_` output (dark-mode aware,
|
|
115
|
+
responsive, sticky headers).
|
|
116
|
+
- Markdown renderer.
|
|
117
|
+
- DOCX renderer via `python-docx` (publication-quality Word tables with
|
|
118
|
+
captions, footnotes, merged spanning headers).
|
|
119
|
+
- Themes: `clinical`, `compact`, `jama`, `nejm`, `minimal`.
|
|
120
|
+
- Automatic statistical test selection with override hooks.
|
|
121
|
+
- Snapshot tests for HTML output.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pysofra
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0a6
|
|
4
4
|
Summary: Statistical reporting and table preparation framework for Python — the missing reporting layer.
|
|
5
5
|
Project-URL: Homepage, https://github.com/jturner-uofl/pysofra
|
|
6
6
|
Project-URL: Documentation, https://github.com/jturner-uofl/pysofra
|
|
@@ -70,12 +70,12 @@ Description-Content-Type: text/markdown
|
|
|
70
70
|
|
|
71
71
|
### The missing statistical reporting layer for Python
|
|
72
72
|
|
|
73
|
-
[](https://github.com/jturner-uofl/pysofra)
|
|
74
74
|
[](https://www.python.org/downloads/)
|
|
75
75
|
[](https://github.com/jturner-uofl/pysofra/blob/main/LICENSE)
|
|
76
76
|
[](https://github.com/astral-sh/ruff)
|
|
77
77
|
[](http://mypy-lang.org/)
|
|
78
|
-
[](#status)
|
|
79
79
|
|
|
80
80
|
</div>
|
|
81
81
|
|
|
@@ -111,7 +111,7 @@ Description-Content-Type: text/markdown
|
|
|
111
111
|
- **One immutable object, seven output formats** — build a `SofraTable` once, render to HTML / Markdown / LaTeX / DOCX / PPTX / XLSX / PNG, all byte-deterministic across processes
|
|
112
112
|
- **Auto-dispatched statistical tests** — Welch, Wilcoxon, ANOVA, Kruskal–Wallis, Fisher, χ², Rao–Scott, design-adjusted *t* — picked by variable kind, overridable per-row
|
|
113
113
|
- **Inline forest plots and KM curves** — embed matplotlib figures directly into the table; the same `SofraTable` renders them across every backend
|
|
114
|
-
- **Statistically correct** — every numeric output validated against `scipy` / `statsmodels` / `lifelines` at machine precision
|
|
114
|
+
- **Statistically correct** — every numeric output validated against `scipy` / `statsmodels` / `lifelines` reference implementations at machine precision
|
|
115
115
|
- **Method-chainable and immutable** — every modifier returns a new table; no in-place mutation, no global state, fully reproducible
|
|
116
116
|
|
|
117
117
|
<div align="center">
|
|
@@ -255,13 +255,13 @@ pip install "pysofra[dev]" # testing + linting (pytest, ruff, mypy, hypot
|
|
|
255
255
|
|
|
256
256
|
## Status
|
|
257
257
|
|
|
258
|
-
PySofra is in **alpha** (`0.1.
|
|
258
|
+
PySofra is in **alpha** (`0.1.0a6`). The public API surface is pinned
|
|
259
259
|
by an explicit
|
|
260
260
|
[API-stability test](https://github.com/jturner-uofl/pysofra/blob/main/tests/test_api_stability.py)
|
|
261
261
|
so that any unintended rename, removal, or signature change surfaces as
|
|
262
262
|
a failed test. Quality bar at this release:
|
|
263
263
|
|
|
264
|
-
* **
|
|
264
|
+
* **900+ tests passing**, near-100% line coverage, mypy strict, ruff clean.
|
|
265
265
|
* Every numeric output is validated against `scipy`, `lifelines`,
|
|
266
266
|
`statsmodels`, or a hand-computed textbook formula
|
|
267
267
|
([test_statistical_correctness.py](https://github.com/jturner-uofl/pysofra/blob/main/tests/test_statistical_correctness.py)).
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
### The missing statistical reporting layer for Python
|
|
6
6
|
|
|
7
|
-
[](https://github.com/jturner-uofl/pysofra)
|
|
8
8
|
[](https://www.python.org/downloads/)
|
|
9
9
|
[](https://github.com/jturner-uofl/pysofra/blob/main/LICENSE)
|
|
10
10
|
[](https://github.com/astral-sh/ruff)
|
|
11
11
|
[](http://mypy-lang.org/)
|
|
12
|
-
[](#status)
|
|
13
13
|
|
|
14
14
|
</div>
|
|
15
15
|
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
- **One immutable object, seven output formats** — build a `SofraTable` once, render to HTML / Markdown / LaTeX / DOCX / PPTX / XLSX / PNG, all byte-deterministic across processes
|
|
46
46
|
- **Auto-dispatched statistical tests** — Welch, Wilcoxon, ANOVA, Kruskal–Wallis, Fisher, χ², Rao–Scott, design-adjusted *t* — picked by variable kind, overridable per-row
|
|
47
47
|
- **Inline forest plots and KM curves** — embed matplotlib figures directly into the table; the same `SofraTable` renders them across every backend
|
|
48
|
-
- **Statistically correct** — every numeric output validated against `scipy` / `statsmodels` / `lifelines` at machine precision
|
|
48
|
+
- **Statistically correct** — every numeric output validated against `scipy` / `statsmodels` / `lifelines` reference implementations at machine precision
|
|
49
49
|
- **Method-chainable and immutable** — every modifier returns a new table; no in-place mutation, no global state, fully reproducible
|
|
50
50
|
|
|
51
51
|
<div align="center">
|
|
@@ -189,13 +189,13 @@ pip install "pysofra[dev]" # testing + linting (pytest, ruff, mypy, hypot
|
|
|
189
189
|
|
|
190
190
|
## Status
|
|
191
191
|
|
|
192
|
-
PySofra is in **alpha** (`0.1.
|
|
192
|
+
PySofra is in **alpha** (`0.1.0a6`). The public API surface is pinned
|
|
193
193
|
by an explicit
|
|
194
194
|
[API-stability test](https://github.com/jturner-uofl/pysofra/blob/main/tests/test_api_stability.py)
|
|
195
195
|
so that any unintended rename, removal, or signature change surfaces as
|
|
196
196
|
a failed test. Quality bar at this release:
|
|
197
197
|
|
|
198
|
-
* **
|
|
198
|
+
* **900+ tests passing**, near-100% line coverage, mypy strict, ruff clean.
|
|
199
199
|
* Every numeric output is validated against `scipy`, `lifelines`,
|
|
200
200
|
`statsmodels`, or a hand-computed textbook formula
|
|
201
201
|
([test_statistical_correctness.py](https://github.com/jturner-uofl/pysofra/blob/main/tests/test_statistical_correctness.py)).
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pysofra"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.0a6"
|
|
8
8
|
description = "Statistical reporting and table preparation framework for Python — the missing reporting layer."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "GPL-3.0-or-later" }
|
|
@@ -355,8 +355,34 @@ class SofraTable:
|
|
|
355
355
|
|
|
356
356
|
return add_global_p(self)
|
|
357
357
|
# tbl_one / tbl_summary path: route through the rebuild spec.
|
|
358
|
+
# The rebuild reconstructs the table from spec.options only;
|
|
359
|
+
# columns added by post-build modifiers (``add_difference``,
|
|
360
|
+
# ``add_ci``, ``add_significance_stars``, ...) live in
|
|
361
|
+
# ``self.rows``/``self.headers`` and are NOT preserved by the
|
|
362
|
+
# rebuild. Detect a *known* such column by header text and warn
|
|
363
|
+
# the user so the silent column-drop doesn't mislead them.
|
|
364
|
+
# The correct chaining order is to call ``add_global_p()``
|
|
365
|
+
# *before* any column-adding modifier.
|
|
358
366
|
spec = self._spec
|
|
359
367
|
if spec is not None and spec.builder in ("tbl_one", "tbl_summary"):
|
|
368
|
+
post_build_headers = {"Diff", "[", "[ "}
|
|
369
|
+
header_texts = (
|
|
370
|
+
[c.text for c in self.headers[0].cells] if self.headers else []
|
|
371
|
+
)
|
|
372
|
+
has_diff_col = any(h.startswith("Diff (") for h in header_texts)
|
|
373
|
+
has_sig_col = any(h.lower() == "signif." for h in header_texts)
|
|
374
|
+
del post_build_headers
|
|
375
|
+
if has_diff_col or has_sig_col:
|
|
376
|
+
import warnings as _w
|
|
377
|
+
_w.warn(
|
|
378
|
+
"add_global_p() reruns the table builder; any "
|
|
379
|
+
"column added by a prior modifier (e.g. add_difference, "
|
|
380
|
+
"add_significance_stars) will be dropped. Call "
|
|
381
|
+
"add_global_p() BEFORE those modifiers to preserve "
|
|
382
|
+
"their columns.",
|
|
383
|
+
UserWarning,
|
|
384
|
+
stacklevel=2,
|
|
385
|
+
)
|
|
360
386
|
return self._with_option(
|
|
361
387
|
global_p=True,
|
|
362
388
|
global_p_adjust_for=tuple(adjust_for or ()),
|
|
@@ -152,11 +152,29 @@ def _extract_lifelines(model: Any, conf_level: float) -> ModelSummary:
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
estimates = summary["coef"].astype(float)
|
|
155
|
-
ci_lo = summary[lo_col].astype(float)
|
|
156
|
-
ci_hi = summary[hi_col].astype(float)
|
|
157
155
|
pvalues = summary["p"].astype(float) if "p" in summary.columns else pd.Series(
|
|
158
156
|
[float("nan")] * len(summary), index=summary.index
|
|
159
157
|
)
|
|
158
|
+
|
|
159
|
+
# Lifelines bakes the CI level into the fit (alpha=0.05 by default),
|
|
160
|
+
# so the ``coef lower/upper X%`` columns reflect the fit-time alpha,
|
|
161
|
+
# not the user's requested ``conf_level``. To honour ``conf_level``
|
|
162
|
+
# without re-fitting the model, re-derive the CI directly from
|
|
163
|
+
# ``coef`` and ``se(coef)`` using a normal pivot. Falls back to the
|
|
164
|
+
# lifelines-provided columns only when no SE column is present.
|
|
165
|
+
se_col = _find_col(summary, ["se(coef)"])
|
|
166
|
+
if se_col is not None:
|
|
167
|
+
import numpy as _np
|
|
168
|
+
from scipy import stats as _sp_stats
|
|
169
|
+
z = float(_sp_stats.norm.ppf(0.5 + conf_level / 2))
|
|
170
|
+
se = summary[se_col].astype(float)
|
|
171
|
+
ci_lo = estimates - z * se
|
|
172
|
+
ci_hi = estimates + z * se
|
|
173
|
+
# Hide ``_np`` reference so linters don't flag it as unused.
|
|
174
|
+
del _np
|
|
175
|
+
else:
|
|
176
|
+
ci_lo = summary[lo_col].astype(float)
|
|
177
|
+
ci_hi = summary[hi_col].astype(float)
|
|
160
178
|
# AFT models (Weibull / log-logistic / log-normal) carry a MultiIndex
|
|
161
179
|
# ``(param, covariate)`` index — e.g. ``('lambda_', 'age')``. Renderers
|
|
162
180
|
# expect string row labels; flatten with ``covariate (param)`` so the
|
|
@@ -170,9 +188,13 @@ def _extract_lifelines(model: Any, conf_level: float) -> ModelSummary:
|
|
|
170
188
|
pvalues.index = pd.Index(flat)
|
|
171
189
|
|
|
172
190
|
family = type(model).__name__
|
|
173
|
-
# Cox
|
|
191
|
+
# Cox returns exp(coef) as a Hazard Ratio; the AFT family (Weibull,
|
|
192
|
+
# LogNormal, LogLogistic) returns exp(coef) as a Time Ratio. Both are
|
|
193
|
+
# the natural "exponentiate me" output of the fitter, so we set
|
|
194
|
+
# natural_exp=True; the column header label is chosen downstream by
|
|
195
|
+
# ``_default_estimate_label`` in regression.py which selects "HR"
|
|
196
|
+
# for Cox and "TR" for AFT.
|
|
174
197
|
natural_exp = True
|
|
175
|
-
del conf_level # honoured by lifelines at fit time
|
|
176
198
|
return ModelSummary(
|
|
177
199
|
estimates=estimates,
|
|
178
200
|
ci_lo=ci_lo,
|
|
@@ -54,6 +54,11 @@ def pool(models: list[Any], *, conf_level: float = 0.95) -> ModelSummary:
|
|
|
54
54
|
sklearn (sklearn has no SEs so the pool degenerates to a simple
|
|
55
55
|
mean-of-coefficients).
|
|
56
56
|
"""
|
|
57
|
+
if not (0.0 < conf_level < 1.0):
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"conf_level must lie in the open interval (0, 1); "
|
|
60
|
+
f"got {conf_level!r}."
|
|
61
|
+
)
|
|
57
62
|
if len(models) < 2:
|
|
58
63
|
raise ValueError(
|
|
59
64
|
"pool requires at least two imputed-dataset fits "
|
|
@@ -77,6 +77,11 @@ def tbl_regression(
|
|
|
77
77
|
Source dataframe — needed only when ``design=`` references
|
|
78
78
|
columns that the fitted model didn't already see.
|
|
79
79
|
"""
|
|
80
|
+
if not (0.0 < conf_level < 1.0):
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"conf_level must lie in the open interval (0, 1); "
|
|
83
|
+
f"got {conf_level!r}."
|
|
84
|
+
)
|
|
80
85
|
models = list(model) if isinstance(model, (list, tuple)) else [model]
|
|
81
86
|
if not models:
|
|
82
87
|
raise ValueError("tbl_regression requires at least one model.")
|
|
@@ -347,7 +352,13 @@ def _default_estimate_label(family_label: str, exponentiate: bool) -> str:
|
|
|
347
352
|
if "cox" in fl or "phreg" in fl:
|
|
348
353
|
return "HR"
|
|
349
354
|
if "weibull" in fl or "lognormal" in fl or "loglogistic" in fl:
|
|
350
|
-
|
|
355
|
+
# AFT family: exp(coef) is a TIME RATIO (also called Acceleration
|
|
356
|
+
# Factor), not a hazard ratio. TR > 1 means LONGER survival;
|
|
357
|
+
# HR > 1 means SHORTER survival — the two parameters point in
|
|
358
|
+
# opposite directions. Mislabelling AFT as "HR" is publication-
|
|
359
|
+
# critical because a reader will draw the wrong clinical
|
|
360
|
+
# conclusion.
|
|
361
|
+
return "TR"
|
|
351
362
|
if "logit" in fl or "binomial" in fl or "probit" in fl or "logistic" in fl:
|
|
352
363
|
return "OR"
|
|
353
364
|
if "poisson" in fl or "negativebinomial" in fl:
|
|
@@ -77,6 +77,11 @@ def tbl_survival(
|
|
|
77
77
|
"tbl_survival requires lifelines. Install with `pip install lifelines`."
|
|
78
78
|
) from e
|
|
79
79
|
|
|
80
|
+
if not (0.0 < conf_level < 1.0):
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"conf_level must lie in the open interval (0, 1); "
|
|
83
|
+
f"got {conf_level!r}."
|
|
84
|
+
)
|
|
80
85
|
data = to_pandas(data)
|
|
81
86
|
for col in (time, event):
|
|
82
87
|
if col not in data.columns:
|
|
@@ -101,6 +101,29 @@ def _build_forest_figure(
|
|
|
101
101
|
"`pip install matplotlib`."
|
|
102
102
|
) from e
|
|
103
103
|
|
|
104
|
+
# Multi-model `tbl_regression` tables emit one estimate / CI / p
|
|
105
|
+
# column triple per model and a spanning header per model. The
|
|
106
|
+
# current forest renderer plots a single series, so for multi-model
|
|
107
|
+
# tables it can only visualise one model. We pick the first model's
|
|
108
|
+
# columns (matching what gtsummary does by default when given a
|
|
109
|
+
# multi-model object), and emit a clear ``UserWarning`` so the user
|
|
110
|
+
# knows the other models were not drawn.
|
|
111
|
+
n_models = max(1, len(table.spanning_headers))
|
|
112
|
+
if n_models > 1:
|
|
113
|
+
import warnings as _w
|
|
114
|
+
first_label = table.spanning_headers[0].label
|
|
115
|
+
other_labels = [s.label for s in table.spanning_headers[1:]]
|
|
116
|
+
_w.warn(
|
|
117
|
+
f"with_forest_plot on a multi-model regression table plots "
|
|
118
|
+
f"only the first model ({first_label!r}); the remaining "
|
|
119
|
+
f"{len(other_labels)} model(s) {other_labels!r} are not "
|
|
120
|
+
f"visualised. Render one model at a time, or use "
|
|
121
|
+
f"`with_forest_plot(...)` on each single-model table "
|
|
122
|
+
f"separately.",
|
|
123
|
+
UserWarning,
|
|
124
|
+
stacklevel=2,
|
|
125
|
+
)
|
|
126
|
+
|
|
104
127
|
points: list[tuple[str, float, float, float]] = []
|
|
105
128
|
for r in table.rows:
|
|
106
129
|
label = r.cells[0].text
|