forestplotx 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forestplotx-1.0.0/LICENSE +21 -0
- forestplotx-1.0.0/PKG-INFO +295 -0
- forestplotx-1.0.0/README.md +267 -0
- forestplotx-1.0.0/pyproject.toml +49 -0
- forestplotx-1.0.0/setup.cfg +4 -0
- forestplotx-1.0.0/src/forestplotx/__init__.py +9 -0
- forestplotx-1.0.0/src/forestplotx/_axes_config.py +272 -0
- forestplotx-1.0.0/src/forestplotx/_layout.py +70 -0
- forestplotx-1.0.0/src/forestplotx/_normalize.py +123 -0
- forestplotx-1.0.0/src/forestplotx/plot.py +563 -0
- forestplotx-1.0.0/src/forestplotx/py.typed +0 -0
- forestplotx-1.0.0/src/forestplotx.egg-info/PKG-INFO +295 -0
- forestplotx-1.0.0/src/forestplotx.egg-info/SOURCES.txt +18 -0
- forestplotx-1.0.0/src/forestplotx.egg-info/dependency_links.txt +1 -0
- forestplotx-1.0.0/src/forestplotx.egg-info/requires.txt +6 -0
- forestplotx-1.0.0/src/forestplotx.egg-info/top_level.txt +1 -0
- forestplotx-1.0.0/tests/test_axes_config.py +620 -0
- forestplotx-1.0.0/tests/test_layout.py +282 -0
- forestplotx-1.0.0/tests/test_normalization.py +157 -0
- forestplotx-1.0.0/tests/test_plot_smoke.py +65 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shervin Taheripour
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forestplotx
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Publication-ready forest plots for regression model outputs in Python.
|
|
5
|
+
Author-email: Shervin Taheripour <shervintaheripour@fastmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/shervin-taheripour/forestplotx
|
|
8
|
+
Project-URL: Repository, https://github.com/shervin-taheripour/forestplotx
|
|
9
|
+
Project-URL: Issues, https://github.com/shervin-taheripour/forestplotx/issues
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: matplotlib>=3.7
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: pandas>=2.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# forestplotx
|
|
30
|
+
|
|
31
|
+
Publication-ready forest plots for regression model outputs in Python.
|
|
32
|
+
|
|
33
|
+
`forestplotx` takes DataFrame output from logistic, linear, ordinal, or gamma regression models and produces a combined table + forest plot figure — ready for papers, reports, and presentations.
|
|
34
|
+
|
|
35
|
+

|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- **Multiple model types** — binomial (logistic), linear, gamma, and ordinal (cumulative logit)
|
|
40
|
+
- **Automatic effect-scale handling** — exponentiation, log-scale axes, and reference lines driven by link function
|
|
41
|
+
- **Flexible column detection** — accepts `OR`, `Ratio`, `Estimate`, `beta`, `Coef`, or `effect` as input
|
|
42
|
+
- **Dual-outcome layout** — side-by-side comparison of up to two outcomes
|
|
43
|
+
- **Category grouping** — optional row grouping with bold category headers
|
|
44
|
+
- **Deterministic layout presets** — fixed internal geometry for 4 core display cases
|
|
45
|
+
- **Adaptive small-table sizing** — compact height heuristic for low row counts
|
|
46
|
+
- **Static matplotlib output** — high-resolution, saveable figures
|
|
47
|
+
|
|
48
|
+
## Layout Examples
|
|
49
|
+
|
|
50
|
+
- `examples/layout_case1_general_true_two_outcomes.png`
|
|
51
|
+
- `examples/layout_case2_general_true_one_outcome.png`
|
|
52
|
+
- `examples/layout_case3_general_false_two_outcomes.png`
|
|
53
|
+
- `examples/layout_case4_general_false_one_outcome.png`
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install forestplotx
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Requires Python ≥ 3.10. Dependencies: `matplotlib>=3.7`, `numpy>=1.24`, `pandas>=2.0`.
|
|
62
|
+
|
|
63
|
+
### Development install (reproducible environment)
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -r requirements.txt # pin exact versions used during development
|
|
67
|
+
pip install -e ".[dev]" # install forestplotx itself in editable mode
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`requirements.txt` pins the full transitive closure of runtime + test dependencies. `pyproject.toml` declares the minimum-version constraints used when installing normally.
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
import pandas as pd
|
|
76
|
+
import forestplotx as fpx
|
|
77
|
+
|
|
78
|
+
# Example: logistic regression output
|
|
79
|
+
df = pd.DataFrame({
|
|
80
|
+
"predictor": ["Age", "Sex", "BMI", "Smoking"],
|
|
81
|
+
"outcome": ["Mortality"] * 4,
|
|
82
|
+
"Estimate": [-0.12, 0.85, 0.30, 0.55], # log-odds (pre-exponentiation)
|
|
83
|
+
"CI_low": [-0.35, 0.42, 0.05, 0.20],
|
|
84
|
+
"CI_high": [ 0.11, 1.28, 0.55, 0.90],
|
|
85
|
+
"p_value": [0.300, 0.001, 0.020, 0.003],
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Supported Model Types
|
|
92
|
+
|
|
93
|
+
| `model_type` | Link | Effect label | X-axis | Reference line |
|
|
94
|
+
|:-------------|:-----|:-------------|:-------|:---------------|
|
|
95
|
+
| `"binom"` | logit | OR | Odds Ratio (log scale) | 1.0 |
|
|
96
|
+
| `"gamma"` | log | Ratio | Ratio (log scale) | 1.0 |
|
|
97
|
+
| `"linear"` | identity | Coef | Effect Size | 0.0 |
|
|
98
|
+
| `"ordinal"` | logit | OR | Odds Ratio (log scale) | 1.0 |
|
|
99
|
+
|
|
100
|
+
The `link` parameter can override the default — for example, `model_type="binom", link="identity"` will skip exponentiation and plot on a linear scale.
|
|
101
|
+
|
|
102
|
+
## Input DataFrame
|
|
103
|
+
|
|
104
|
+
### Required columns
|
|
105
|
+
|
|
106
|
+
| Column | Description |
|
|
107
|
+
|:-------|:------------|
|
|
108
|
+
| `predictor` | Row labels (predictor names) |
|
|
109
|
+
| `outcome` | Outcome name (used for column headers and filtering) |
|
|
110
|
+
| Effect column | One of: `OR`, `Ratio`, `Estimate`, `beta`, `Coef`, `effect` |
|
|
111
|
+
| `CI_low` / `ci_low` | Lower bound of 95% CI |
|
|
112
|
+
| `CI_high` / `ci_high` | Upper bound of 95% CI |
|
|
113
|
+
|
|
114
|
+
### Optional columns
|
|
115
|
+
|
|
116
|
+
| Column | Description |
|
|
117
|
+
|:-------|:------------|
|
|
118
|
+
| `p_value` | P-value (bold formatting applied when < 0.05) |
|
|
119
|
+
| `category` | Group predictors under category headers |
|
|
120
|
+
| `n` | Event count |
|
|
121
|
+
| `N` | Total count |
|
|
122
|
+
|
|
123
|
+
**Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
|
|
124
|
+
Displayed CI values in the table use bracket notation: `[low,high]`.
|
|
125
|
+
|
|
126
|
+
## API Reference
|
|
127
|
+
|
|
128
|
+
### `forest_plot()`
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
fig, axes = fpx.forest_plot(
|
|
132
|
+
df, # DataFrame with model output
|
|
133
|
+
model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
|
|
134
|
+
link=None, # Override default link function
|
|
135
|
+
exponentiate=None, # None=auto by link, True=force, False=disable
|
|
136
|
+
outcomes=None, # list[str], max 2; auto-detected if None
|
|
137
|
+
legend_labels=None, # list[str] override for legend entries
|
|
138
|
+
footer_text=None, # Italic footer (wrapped/capped internally)
|
|
139
|
+
show_general_stats=True, # Show n / N / Freq columns
|
|
140
|
+
bold_override=None, # Manual bold control per predictor/outcome
|
|
141
|
+
base_decimals=2, # Decimal places for effect / CI values
|
|
142
|
+
tick_style="decimal", # "decimal" or "power10" (readable log10 exponents)
|
|
143
|
+
clip_outliers=False, # Clip axis limits by quantiles (opt-in)
|
|
144
|
+
clip_quantiles=(0.02, 0.98), # Low/high quantiles used when clipping
|
|
145
|
+
point_colors=None, # list[str], up to 2 hex codes for outcome markers
|
|
146
|
+
table_only=False, # Render table without forest panel
|
|
147
|
+
show=True, # Call plt.show(); set False for programmatic use
|
|
148
|
+
save=None, # File path to save (e.g. "plot.png")
|
|
149
|
+
)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
|
|
153
|
+
When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
|
|
154
|
+
|
|
155
|
+
### Layout Behavior (v1)
|
|
156
|
+
|
|
157
|
+
`forest_plot()` uses fixed internal layout presets (including internal font size) for:
|
|
158
|
+
|
|
159
|
+
1. `show_general_stats=True` + two outcomes
|
|
160
|
+
2. `show_general_stats=True` + one outcome
|
|
161
|
+
3. `show_general_stats=False` + two outcomes
|
|
162
|
+
4. `show_general_stats=False` + one outcome
|
|
163
|
+
|
|
164
|
+
This is intentional to keep output stable and publication-ready across common use cases.
|
|
165
|
+
`base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
|
|
166
|
+
For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
|
|
167
|
+
Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
|
|
168
|
+
|
|
169
|
+
### Exponentiation Safety
|
|
170
|
+
|
|
171
|
+
- Use `exponentiate=None` (default) for model/link-based automatic handling.
|
|
172
|
+
- Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
|
|
173
|
+
- Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
|
|
174
|
+
- Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
|
|
175
|
+
|
|
176
|
+
### `normalize_model_output()`
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
clean_df, config = fpx.normalize_model_output(
|
|
180
|
+
df, model_type="binom", link=None, exponentiate=None
|
|
181
|
+
)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Standardizes columns, applies exponentiation policy, and returns axis metadata.
|
|
185
|
+
`config` includes `exponentiated` and `renamed_columns` for transparency.
|
|
186
|
+
|
|
187
|
+
## Examples
|
|
188
|
+
|
|
189
|
+
### Category grouping
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
|
|
193
|
+
|
|
194
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Dual outcomes
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
# DataFrame with two outcomes per predictor
|
|
201
|
+
fig, axes = fpx.forest_plot(
|
|
202
|
+
df_two_outcomes,
|
|
203
|
+
model_type="binom",
|
|
204
|
+
outcomes=["Mortality", "Readmission"],
|
|
205
|
+
legend_labels=["30-day mortality", "90-day readmission"],
|
|
206
|
+
)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Custom marker colors
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
fig, axes = fpx.forest_plot(
|
|
213
|
+
df_two_outcomes,
|
|
214
|
+
model_type="binom",
|
|
215
|
+
outcomes=["Mortality", "Readmission"],
|
|
216
|
+
point_colors=["#2C5F8A", "#D4763A"],
|
|
217
|
+
)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Linear model
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
fig, axes = fpx.forest_plot(df_linear, model_type="linear")
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Save to file
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
fig, axes = fpx.forest_plot(df, model_type="binom", save="forest_plot.png")
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Programmatic use (no display)
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
fig, axes = fpx.forest_plot(df, model_type="binom", show=False)
|
|
236
|
+
# Further customization...
|
|
237
|
+
fig.suptitle("My Forest Plot", fontsize=16)
|
|
238
|
+
fig.savefig("custom_plot.pdf", dpi=300)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
In notebooks, `show=False` prevents internal `plt.show()`, but Jupyter may still auto-render
|
|
242
|
+
the returned figure object. Use `plt.close(fig)` to suppress display.
|
|
243
|
+
|
|
244
|
+
## Testing
|
|
245
|
+
|
|
246
|
+
The test suite lives in `tests/` and covers all internal modules with no image comparisons — structural and behavioral assertions only.
|
|
247
|
+
|
|
248
|
+
Install dev dependencies first (see [Installation](#installation)), then:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
pytest
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Test files
|
|
255
|
+
|
|
256
|
+
| File | Module under test | Tests |
|
|
257
|
+
|:-----|:------------------|------:|
|
|
258
|
+
| `tests/test_normalization.py` | `_normalize.py` | 11 |
|
|
259
|
+
| `tests/test_layout.py` | `_layout.py` | 33 |
|
|
260
|
+
| `tests/test_axes_config.py` | `_axes_config.py` | 65 |
|
|
261
|
+
| `tests/test_plot_smoke.py` | `plot.py` | 2 |
|
|
262
|
+
|
|
263
|
+
### Coverage summary
|
|
264
|
+
|
|
265
|
+
**`test_layout.py`** — `build_row_layout()`
|
|
266
|
+
|
|
267
|
+
- Flat layout (no `category` column): sequential y-positions, correct row count, all `is_cat=False`, `"Uncategorized"` labels, predictor order preserved, required columns present
|
|
268
|
+
- NaN predictor rows dropped; empty DataFrame raises `ValueError`
|
|
269
|
+
- Categorized layout: category header rows inserted, total = categories + predictors (parametrized), correct `is_cat` flags and per-predictor category labels, all-NaN category falls back to flat
|
|
270
|
+
- Dual-outcome DataFrames: `unique()` deduplication keeps one row per predictor regardless of outcome count
|
|
271
|
+
|
|
272
|
+
**`test_axes_config.py`** — `configure_forest_axis()` and helpers
|
|
273
|
+
|
|
274
|
+
- `_nice_linear_step`: 8 parametrized input→output pairs, zero, negative, tiny positive values
|
|
275
|
+
- `_decimals_from_ticks`: empty/single-tick → 2, step-inferred decimals (0/1/2), `max_decimals` cap
|
|
276
|
+
- Reference line: `axvline` placed at correct x for logit (1.0), log (1.0), identity (0.0); `#910C07` color; dashed style; threshold override
|
|
277
|
+
- X-scale: `"log"` for logit/log links, `"linear"` for identity; empty data and `thresholds=None` do not crash
|
|
278
|
+
- X-label: correct label per link (`"Odds Ratio"` / `"Ratio"` / `"Effect Size"`), threshold override, font size propagated
|
|
279
|
+
- Y-ticks cleared; y-limits applied from `thresholds["y_limits"]`
|
|
280
|
+
- Spine visibility: top/right/left hidden, bottom visible
|
|
281
|
+
- X-limits contain full data range for log and linear axes; negative reference raises `ValueError`; span=0 edge case handled
|
|
282
|
+
- End-to-end parametrized across all four model types: binom, gamma, linear, ordinal
|
|
283
|
+
- `show_general_stats=True/False` both produce consistent output (documents no-op behaviour on axis)
|
|
284
|
+
- Tick count heuristic: `num_ticks` in {3, 5, 7} for log and linear axes
|
|
285
|
+
- `tick_style="power10"` uses readable rounded log10 exponents; single vs dual outcome `lo_all`/`hi_all` arrays both handled
|
|
286
|
+
|
|
287
|
+
## Scope
|
|
288
|
+
|
|
289
|
+
`forestplotx` v1.0 is intentionally focused. It produces static, publication-quality forest plots for common regression model types.
|
|
290
|
+
|
|
291
|
+
**Not included:** interactive plots, Cox/Poisson models, theming engine, or GUI.
|
|
292
|
+
|
|
293
|
+
## License
|
|
294
|
+
|
|
295
|
+
MIT
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# forestplotx
|
|
2
|
+
|
|
3
|
+
Publication-ready forest plots for regression model outputs in Python.
|
|
4
|
+
|
|
5
|
+
`forestplotx` takes DataFrame output from logistic, linear, ordinal, or gamma regression models and produces a combined table + forest plot figure — ready for papers, reports, and presentations.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Multiple model types** — binomial (logistic), linear, gamma, and ordinal (cumulative logit)
|
|
12
|
+
- **Automatic effect-scale handling** — exponentiation, log-scale axes, and reference lines driven by link function
|
|
13
|
+
- **Flexible column detection** — accepts `OR`, `Ratio`, `Estimate`, `beta`, `Coef`, or `effect` as input
|
|
14
|
+
- **Dual-outcome layout** — side-by-side comparison of up to two outcomes
|
|
15
|
+
- **Category grouping** — optional row grouping with bold category headers
|
|
16
|
+
- **Deterministic layout presets** — fixed internal geometry for 4 core display cases
|
|
17
|
+
- **Adaptive small-table sizing** — compact height heuristic for low row counts
|
|
18
|
+
- **Static matplotlib output** — high-resolution, saveable figures
|
|
19
|
+
|
|
20
|
+
## Layout Examples
|
|
21
|
+
|
|
22
|
+
- `examples/layout_case1_general_true_two_outcomes.png`
|
|
23
|
+
- `examples/layout_case2_general_true_one_outcome.png`
|
|
24
|
+
- `examples/layout_case3_general_false_two_outcomes.png`
|
|
25
|
+
- `examples/layout_case4_general_false_one_outcome.png`
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install forestplotx
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Requires Python ≥ 3.10. Dependencies: `matplotlib>=3.7`, `numpy>=1.24`, `pandas>=2.0`.
|
|
34
|
+
|
|
35
|
+
### Development install (reproducible environment)
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install -r requirements.txt # pin exact versions used during development
|
|
39
|
+
pip install -e ".[dev]" # install forestplotx itself in editable mode
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
`requirements.txt` pins the full transitive closure of runtime + test dependencies. `pyproject.toml` declares the minimum-version constraints used when installing normally.
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import pandas as pd
|
|
48
|
+
import forestplotx as fpx
|
|
49
|
+
|
|
50
|
+
# Example: logistic regression output
|
|
51
|
+
df = pd.DataFrame({
|
|
52
|
+
"predictor": ["Age", "Sex", "BMI", "Smoking"],
|
|
53
|
+
"outcome": ["Mortality"] * 4,
|
|
54
|
+
"Estimate": [-0.12, 0.85, 0.30, 0.55], # log-odds (pre-exponentiation)
|
|
55
|
+
"CI_low": [-0.35, 0.42, 0.05, 0.20],
|
|
56
|
+
"CI_high": [ 0.11, 1.28, 0.55, 0.90],
|
|
57
|
+
"p_value": [0.300, 0.001, 0.020, 0.003],
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Supported Model Types
|
|
64
|
+
|
|
65
|
+
| `model_type` | Link | Effect label | X-axis | Reference line |
|
|
66
|
+
|:-------------|:-----|:-------------|:-------|:---------------|
|
|
67
|
+
| `"binom"` | logit | OR | Odds Ratio (log scale) | 1.0 |
|
|
68
|
+
| `"gamma"` | log | Ratio | Ratio (log scale) | 1.0 |
|
|
69
|
+
| `"linear"` | identity | Coef | Effect Size | 0.0 |
|
|
70
|
+
| `"ordinal"` | logit | OR | Odds Ratio (log scale) | 1.0 |
|
|
71
|
+
|
|
72
|
+
The `link` parameter can override the default — for example, `model_type="binom", link="identity"` will skip exponentiation and plot on a linear scale.
|
|
73
|
+
|
|
74
|
+
## Input DataFrame
|
|
75
|
+
|
|
76
|
+
### Required columns
|
|
77
|
+
|
|
78
|
+
| Column | Description |
|
|
79
|
+
|:-------|:------------|
|
|
80
|
+
| `predictor` | Row labels (predictor names) |
|
|
81
|
+
| `outcome` | Outcome name (used for column headers and filtering) |
|
|
82
|
+
| Effect column | One of: `OR`, `Ratio`, `Estimate`, `beta`, `Coef`, `effect` |
|
|
83
|
+
| `CI_low` / `ci_low` | Lower bound of 95% CI |
|
|
84
|
+
| `CI_high` / `ci_high` | Upper bound of 95% CI |
|
|
85
|
+
|
|
86
|
+
### Optional columns
|
|
87
|
+
|
|
88
|
+
| Column | Description |
|
|
89
|
+
|:-------|:------------|
|
|
90
|
+
| `p_value` | P-value (bold formatting applied when < 0.05) |
|
|
91
|
+
| `category` | Group predictors under category headers |
|
|
92
|
+
| `n` | Event count |
|
|
93
|
+
| `N` | Total count |
|
|
94
|
+
|
|
95
|
+
**Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
|
|
96
|
+
Displayed CI values in the table use bracket notation: `[low,high]`.
|
|
97
|
+
|
|
98
|
+
## API Reference
|
|
99
|
+
|
|
100
|
+
### `forest_plot()`
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
fig, axes = fpx.forest_plot(
|
|
104
|
+
df, # DataFrame with model output
|
|
105
|
+
model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
|
|
106
|
+
link=None, # Override default link function
|
|
107
|
+
exponentiate=None, # None=auto by link, True=force, False=disable
|
|
108
|
+
outcomes=None, # list[str], max 2; auto-detected if None
|
|
109
|
+
legend_labels=None, # list[str] override for legend entries
|
|
110
|
+
footer_text=None, # Italic footer (wrapped/capped internally)
|
|
111
|
+
show_general_stats=True, # Show n / N / Freq columns
|
|
112
|
+
bold_override=None, # Manual bold control per predictor/outcome
|
|
113
|
+
base_decimals=2, # Decimal places for effect / CI values
|
|
114
|
+
tick_style="decimal", # "decimal" or "power10" (readable log10 exponents)
|
|
115
|
+
clip_outliers=False, # Clip axis limits by quantiles (opt-in)
|
|
116
|
+
clip_quantiles=(0.02, 0.98), # Low/high quantiles used when clipping
|
|
117
|
+
point_colors=None, # list[str], up to 2 hex codes for outcome markers
|
|
118
|
+
table_only=False, # Render table without forest panel
|
|
119
|
+
show=True, # Call plt.show(); set False for programmatic use
|
|
120
|
+
save=None, # File path to save (e.g. "plot.png")
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
|
|
125
|
+
When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
|
|
126
|
+
|
|
127
|
+
### Layout Behavior (v1)
|
|
128
|
+
|
|
129
|
+
`forest_plot()` uses fixed internal layout presets (including internal font size) for:
|
|
130
|
+
|
|
131
|
+
1. `show_general_stats=True` + two outcomes
|
|
132
|
+
2. `show_general_stats=True` + one outcome
|
|
133
|
+
3. `show_general_stats=False` + two outcomes
|
|
134
|
+
4. `show_general_stats=False` + one outcome
|
|
135
|
+
|
|
136
|
+
This is intentional to keep output stable and publication-ready across common use cases.
|
|
137
|
+
`base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
|
|
138
|
+
For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
|
|
139
|
+
Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
|
|
140
|
+
|
|
141
|
+
### Exponentiation Safety
|
|
142
|
+
|
|
143
|
+
- Use `exponentiate=None` (default) for model/link-based automatic handling.
|
|
144
|
+
- Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
|
|
145
|
+
- Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
|
|
146
|
+
- Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
|
|
147
|
+
|
|
148
|
+
### `normalize_model_output()`
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
clean_df, config = fpx.normalize_model_output(
|
|
152
|
+
df, model_type="binom", link=None, exponentiate=None
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Standardizes columns, applies exponentiation policy, and returns axis metadata.
|
|
157
|
+
`config` includes `exponentiated` and `renamed_columns` for transparency.
|
|
158
|
+
|
|
159
|
+
## Examples
|
|
160
|
+
|
|
161
|
+
### Category grouping
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
|
|
165
|
+
|
|
166
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Dual outcomes
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
# DataFrame with two outcomes per predictor
|
|
173
|
+
fig, axes = fpx.forest_plot(
|
|
174
|
+
df_two_outcomes,
|
|
175
|
+
model_type="binom",
|
|
176
|
+
outcomes=["Mortality", "Readmission"],
|
|
177
|
+
legend_labels=["30-day mortality", "90-day readmission"],
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Custom marker colors
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
fig, axes = fpx.forest_plot(
|
|
185
|
+
df_two_outcomes,
|
|
186
|
+
model_type="binom",
|
|
187
|
+
outcomes=["Mortality", "Readmission"],
|
|
188
|
+
point_colors=["#2C5F8A", "#D4763A"],
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Linear model
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
fig, axes = fpx.forest_plot(df_linear, model_type="linear")
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Save to file
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
fig, axes = fpx.forest_plot(df, model_type="binom", save="forest_plot.png")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Programmatic use (no display)
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
fig, axes = fpx.forest_plot(df, model_type="binom", show=False)
|
|
208
|
+
# Further customization...
|
|
209
|
+
fig.suptitle("My Forest Plot", fontsize=16)
|
|
210
|
+
fig.savefig("custom_plot.pdf", dpi=300)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
In notebooks, `show=False` prevents internal `plt.show()`, but Jupyter may still auto-render
|
|
214
|
+
the returned figure object. Use `plt.close(fig)` to suppress display.
|
|
215
|
+
|
|
216
|
+
## Testing
|
|
217
|
+
|
|
218
|
+
The test suite lives in `tests/` and covers all internal modules with no image comparisons — structural and behavioral assertions only.
|
|
219
|
+
|
|
220
|
+
Install dev dependencies first (see [Installation](#installation)), then:
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
pytest
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Test files
|
|
227
|
+
|
|
228
|
+
| File | Module under test | Tests |
|
|
229
|
+
|:-----|:------------------|------:|
|
|
230
|
+
| `tests/test_normalization.py` | `_normalize.py` | 11 |
|
|
231
|
+
| `tests/test_layout.py` | `_layout.py` | 33 |
|
|
232
|
+
| `tests/test_axes_config.py` | `_axes_config.py` | 65 |
|
|
233
|
+
| `tests/test_plot_smoke.py` | `plot.py` | 2 |
|
|
234
|
+
|
|
235
|
+
### Coverage summary
|
|
236
|
+
|
|
237
|
+
**`test_layout.py`** — `build_row_layout()`
|
|
238
|
+
|
|
239
|
+
- Flat layout (no `category` column): sequential y-positions, correct row count, all `is_cat=False`, `"Uncategorized"` labels, predictor order preserved, required columns present
|
|
240
|
+
- NaN predictor rows dropped; empty DataFrame raises `ValueError`
|
|
241
|
+
- Categorized layout: category header rows inserted, total = categories + predictors (parametrized), correct `is_cat` flags and per-predictor category labels, all-NaN category falls back to flat
|
|
242
|
+
- Dual-outcome DataFrames: `unique()` deduplication keeps one row per predictor regardless of outcome count
|
|
243
|
+
|
|
244
|
+
**`test_axes_config.py`** — `configure_forest_axis()` and helpers
|
|
245
|
+
|
|
246
|
+
- `_nice_linear_step`: 8 parametrized input→output pairs, zero, negative, tiny positive values
|
|
247
|
+
- `_decimals_from_ticks`: empty/single-tick → 2, step-inferred decimals (0/1/2), `max_decimals` cap
|
|
248
|
+
- Reference line: `axvline` placed at correct x for logit (1.0), log (1.0), identity (0.0); `#910C07` color; dashed style; threshold override
|
|
249
|
+
- X-scale: `"log"` for logit/log links, `"linear"` for identity; empty data and `thresholds=None` do not crash
|
|
250
|
+
- X-label: correct label per link (`"Odds Ratio"` / `"Ratio"` / `"Effect Size"`), threshold override, font size propagated
|
|
251
|
+
- Y-ticks cleared; y-limits applied from `thresholds["y_limits"]`
|
|
252
|
+
- Spine visibility: top/right/left hidden, bottom visible
|
|
253
|
+
- X-limits contain full data range for log and linear axes; negative reference raises `ValueError`; span=0 edge case handled
|
|
254
|
+
- End-to-end parametrized across all four model types: binom, gamma, linear, ordinal
|
|
255
|
+
- `show_general_stats=True/False` both produce consistent output (documents no-op behaviour on axis)
|
|
256
|
+
- Tick count heuristic: `num_ticks` in {3, 5, 7} for log and linear axes
|
|
257
|
+
- `tick_style="power10"` uses readable rounded log10 exponents; single vs dual outcome `lo_all`/`hi_all` arrays both handled
|
|
258
|
+
|
|
259
|
+
## Scope
|
|
260
|
+
|
|
261
|
+
`forestplotx` v1.0 is intentionally focused. It produces static, publication-quality forest plots for common regression model types.
|
|
262
|
+
|
|
263
|
+
**Not included:** interactive plots, Cox/Poisson models, theming engine, or GUI.
|
|
264
|
+
|
|
265
|
+
## License
|
|
266
|
+
|
|
267
|
+
MIT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "forestplotx"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Publication-ready forest plots for regression model outputs in Python."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Shervin Taheripour", email = "shervintaheripour@fastmail.com" },
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.10"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Visualization",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"matplotlib>=3.7",
|
|
28
|
+
"numpy>=1.24",
|
|
29
|
+
"pandas>=2.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/shervin-taheripour/forestplotx"
|
|
34
|
+
Repository = "https://github.com/shervin-taheripour/forestplotx"
|
|
35
|
+
Issues = "https://github.com/shervin-taheripour/forestplotx/issues"
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest>=7.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.package-data]
|
|
46
|
+
forestplotx = ["py.typed"]
|
|
47
|
+
|
|
48
|
+
[tool.pytest.ini_options]
|
|
49
|
+
pythonpath = ["src"]
|