forestplotx 1.0.1__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forestplotx-1.1.0/PKG-INFO +201 -0
- forestplotx-1.1.0/README.md +174 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/pyproject.toml +1 -1
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/__init__.py +1 -1
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_axes_config.py +144 -48
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/plot.py +200 -61
- forestplotx-1.1.0/src/forestplotx.egg-info/PKG-INFO +201 -0
- forestplotx-1.1.0/tests/test_plot_smoke.py +253 -0
- forestplotx-1.0.1/PKG-INFO +0 -328
- forestplotx-1.0.1/README.md +0 -301
- forestplotx-1.0.1/src/forestplotx.egg-info/PKG-INFO +0 -328
- forestplotx-1.0.1/tests/test_plot_smoke.py +0 -122
- {forestplotx-1.0.1 → forestplotx-1.1.0}/LICENSE +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/setup.cfg +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_layout.py +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_normalize.py +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/py.typed +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/SOURCES.txt +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/dependency_links.txt +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/requires.txt +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/top_level.txt +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_axes_config.py +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_layout.py +0 -0
- {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_normalization.py +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forestplotx
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Publication-ready forest plots for regression model outputs in Python.
|
|
5
|
+
Author-email: Shervin Taheripour <shervintaheripour@fastmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/shervin-taheripour/forestplotx
|
|
8
|
+
Project-URL: Repository, https://github.com/shervin-taheripour/forestplotx
|
|
9
|
+
Project-URL: Issues, https://github.com/shervin-taheripour/forestplotx/issues
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: matplotlib>=3.7
|
|
22
|
+
Requires-Dist: numpy>=1.24
|
|
23
|
+
Requires-Dist: pandas>=2.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# forestplotx
|
|
29
|
+
|
|
30
|
+
`forestplotx` creates publication-style forest plots that combine a clean text table with a forest panel, with deterministic formatting for common regression outputs.
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
- Publication-style table + forest composition
|
|
35
|
+
- Supports `binom`, `gamma`, `linear`, and `ordinal` model outputs
|
|
36
|
+
- One or two outcomes per plot
|
|
37
|
+
- Deterministic internal layout presets for stable output
|
|
38
|
+
- Readable log-axis handling in both `decimal` and `power10` styles
|
|
39
|
+
- Optional footer text for manuscript-style notes
|
|
40
|
+
- Visible column-header and x-axis label overrides
|
|
41
|
+
|
|
42
|
+
**Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
|
|
43
|
+
Displayed CI values in the table use bracket notation: `[low,high]`.
|
|
44
|
+
|
|
45
|
+
## API Reference
|
|
46
|
+
|
|
47
|
+
### `forest_plot()`
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
fig, axes = fpx.forest_plot(
|
|
51
|
+
df, # DataFrame with model output
|
|
52
|
+
outcomes=None, # list[str], max 2; auto-detected if None
|
|
53
|
+
save=None, # File path to save (e.g. "plot.png")
|
|
54
|
+
model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
|
|
55
|
+
link=None, # Override default link function
|
|
56
|
+
exponentiate=None, # None=auto by link, True=force, False=disable
|
|
57
|
+
table_only=False, # Render table without forest panel
|
|
58
|
+
legend_labels=None, # list[str] override for legend entries
|
|
59
|
+
point_colors=None, # list[str], up to 2 hex codes for outcome markers
|
|
60
|
+
column_labels=None, # dict override for table column labels
|
|
61
|
+
x_label_override=None, # Override forest x-axis label
|
|
62
|
+
footer_text=None, # Italic footer (wrapped/capped internally)
|
|
63
|
+
tick_style="decimal", # "decimal" or "power10"
|
|
64
|
+
clip_outliers=False, # Opt-in clipping of extreme CI-driven axis outliers
|
|
65
|
+
clip_quantiles=(0.02, 0.98), # Retained for API compatibility
|
|
66
|
+
base_decimals=2, # Decimal places for effect / CI values
|
|
67
|
+
show=True, # Call plt.show(); set False for programmatic use
|
|
68
|
+
show_general_stats=True, # Show n / N / Freq columns
|
|
69
|
+
bold_override=None, # Manual bold control per predictor/outcome
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
|
|
74
|
+
When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
|
|
75
|
+
|
|
76
|
+
### Layout Behavior (v1)
|
|
77
|
+
|
|
78
|
+
`forest_plot()` uses fixed internal layout presets (including internal font size) for:
|
|
79
|
+
|
|
80
|
+
1. `show_general_stats=True` + two outcomes
|
|
81
|
+
2. `show_general_stats=True` + one outcome
|
|
82
|
+
3. `show_general_stats=False` + two outcomes
|
|
83
|
+
4. `show_general_stats=False` + one outcome
|
|
84
|
+
|
|
85
|
+
This is intentional to keep output stable and publication-ready across common use cases.
|
|
86
|
+
`base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
|
|
87
|
+
For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
|
|
88
|
+
Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
|
|
89
|
+
Within each layout case, deterministic pressure tiers are applied internally (`standard`, `expanded`, `max`) based on the final rendered string widths.
|
|
90
|
+
Predictor labels are truncated (with warning) when they exceed layout-specific caps:
|
|
91
|
+
1. `show_general_stats=True` + two outcomes: 21 chars
|
|
92
|
+
2. `show_general_stats=True` + one outcome: 24 chars
|
|
93
|
+
3. `show_general_stats=False` + two outcomes: 26 chars
|
|
94
|
+
4. `show_general_stats=False` + one outcome: 25 chars
|
|
95
|
+
When general stats are shown, large `n`/`N` values are compacted (e.g., `9.9k`) to preserve column readability.
|
|
96
|
+
Compaction activates only when counts reach `>= 1,000` and uses a shared unit across both `n` and `N` (`k`, `M`, `B`, `T`) for consistent within-row formatting.
|
|
97
|
+
Very large values beyond display range are capped as `>999T` with a warning.
|
|
98
|
+
Effect / CI display uses the same compact unit family (`k`, `M`, `B`, `T`) once values reach `>= 1,000`, followed by deterministic decimal trimming to keep tables readable.
|
|
99
|
+
Rows are fully grayed only when all displayed outcomes are missing; if at least one outcome is valid, only the missing outcome triplet (`effect`, `95% CI`, `p`) is blanked and gray-marked.
|
|
100
|
+
|
|
101
|
+
### Title Handling
|
|
102
|
+
|
|
103
|
+
`forest_plot()` intentionally does not include a `title` parameter in v1.
|
|
104
|
+
This is by design for publication workflows where figure titles/captions are managed in the manuscript rather than embedded inside the plot image.
|
|
105
|
+
If needed for slides or reports, add a title externally on the returned matplotlib figure object.
|
|
106
|
+
|
|
107
|
+
### Exponentiation Safety
|
|
108
|
+
|
|
109
|
+
- Use `exponentiate=None` (default) for model/link-based automatic handling.
|
|
110
|
+
- Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
|
|
111
|
+
- Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
|
|
112
|
+
- Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
|
|
113
|
+
|
|
114
|
+
### Axis Behavior
|
|
115
|
+
|
|
116
|
+
- Log-axis limits are data-driven after optional clipping; they are not forced symmetric around the reference value.
|
|
117
|
+
- `clip_outliers=True` uses magnitude-based clipping centered on the median CI bounds, which works much better for small samples with one extreme interval.
|
|
118
|
+
- `tick_style="decimal"` uses readable decimal ticks:
|
|
119
|
+
- dense near-reference ticks for moderate spans
|
|
120
|
+
- `1-2-5` progression for wider spans
|
|
121
|
+
- compact notation for very large tick labels when needed
|
|
122
|
+
- `tick_style="power10"` keeps readable power-of-ten labels for very wide ratio ranges.
|
|
123
|
+
|
|
124
|
+
### Label Overrides
|
|
125
|
+
|
|
126
|
+
Use `column_labels` to override visible table headers without changing the underlying model type:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
fig, axes = fpx.forest_plot(
|
|
130
|
+
df,
|
|
131
|
+
model_type="gamma",
|
|
132
|
+
exponentiate=False,
|
|
133
|
+
column_labels={
|
|
134
|
+
"effect": "IRR",
|
|
135
|
+
"ci": "95% CI",
|
|
136
|
+
"p": "P",
|
|
137
|
+
"n": "Cases",
|
|
138
|
+
"N": "Total",
|
|
139
|
+
"Freq": "Share",
|
|
140
|
+
},
|
|
141
|
+
x_label_override="IRR",
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Supported `column_labels` keys:
|
|
146
|
+
- `effect`
|
|
147
|
+
- `ci`
|
|
148
|
+
- `p`
|
|
149
|
+
- `n`
|
|
150
|
+
- `N`
|
|
151
|
+
- `Freq`
|
|
152
|
+
|
|
153
|
+
### `normalize_model_output()`
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
clean_df, config = fpx.normalize_model_output(
|
|
157
|
+
df, model_type="binom", link=None, exponentiate=None
|
|
158
|
+
)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Standardizes columns, applies exponentiation policy, and returns axis metadata.
|
|
162
|
+
`config` includes `exponentiated` and `renamed_columns` for transparency.
|
|
163
|
+
|
|
164
|
+
## Examples
|
|
165
|
+
|
|
166
|
+
### Category grouping
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
|
|
170
|
+
|
|
171
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Dual outcomes
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
# DataFrame with two outcomes per predictor
|
|
178
|
+
fig, axes = fpx.forest_plot(
|
|
179
|
+
df_two_outcomes,
|
|
180
|
+
model_type="binom",
|
|
181
|
+
outcomes=["Mortality", "Readmission"],
|
|
182
|
+
legend_labels=["30-day mortality", "90-day readmission"],
|
|
183
|
+
)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Custom marker colors
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
fig, axes = fpx.forest_plot(
|
|
190
|
+
df_two_outcomes,
|
|
191
|
+
model_type="binom",
|
|
192
|
+
outcomes=["Mortality", "Readmission"],
|
|
193
|
+
point_colors=["#2C5F8A", "#D4763A"],
|
|
194
|
+
)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Linear model
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
fig, axes = fpx.forest_plot(df_linear, model_type="linear")
|
|
201
|
+
```
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# forestplotx
|
|
2
|
+
|
|
3
|
+
`forestplotx` creates publication-style forest plots that combine a clean text table with a forest panel, with deterministic formatting for common regression outputs.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Publication-style table + forest composition
|
|
8
|
+
- Supports `binom`, `gamma`, `linear`, and `ordinal` model outputs
|
|
9
|
+
- One or two outcomes per plot
|
|
10
|
+
- Deterministic internal layout presets for stable output
|
|
11
|
+
- Readable log-axis handling in both `decimal` and `power10` styles
|
|
12
|
+
- Optional footer text for manuscript-style notes
|
|
13
|
+
- Visible column-header and x-axis label overrides
|
|
14
|
+
|
|
15
|
+
**Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
|
|
16
|
+
Displayed CI values in the table use bracket notation: `[low,high]`.
|
|
17
|
+
|
|
18
|
+
## API Reference
|
|
19
|
+
|
|
20
|
+
### `forest_plot()`
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
fig, axes = fpx.forest_plot(
|
|
24
|
+
df, # DataFrame with model output
|
|
25
|
+
outcomes=None, # list[str], max 2; auto-detected if None
|
|
26
|
+
save=None, # File path to save (e.g. "plot.png")
|
|
27
|
+
model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
|
|
28
|
+
link=None, # Override default link function
|
|
29
|
+
exponentiate=None, # None=auto by link, True=force, False=disable
|
|
30
|
+
table_only=False, # Render table without forest panel
|
|
31
|
+
legend_labels=None, # list[str] override for legend entries
|
|
32
|
+
point_colors=None, # list[str], up to 2 hex codes for outcome markers
|
|
33
|
+
column_labels=None, # dict override for table column labels
|
|
34
|
+
x_label_override=None, # Override forest x-axis label
|
|
35
|
+
footer_text=None, # Italic footer (wrapped/capped internally)
|
|
36
|
+
tick_style="decimal", # "decimal" or "power10"
|
|
37
|
+
clip_outliers=False, # Opt-in clipping of extreme CI-driven axis outliers
|
|
38
|
+
clip_quantiles=(0.02, 0.98), # Retained for API compatibility
|
|
39
|
+
base_decimals=2, # Decimal places for effect / CI values
|
|
40
|
+
show=True, # Call plt.show(); set False for programmatic use
|
|
41
|
+
show_general_stats=True, # Show n / N / Freq columns
|
|
42
|
+
bold_override=None, # Manual bold control per predictor/outcome
|
|
43
|
+
)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
|
|
47
|
+
When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
|
|
48
|
+
|
|
49
|
+
### Layout Behavior (v1)
|
|
50
|
+
|
|
51
|
+
`forest_plot()` uses fixed internal layout presets (including internal font size) for:
|
|
52
|
+
|
|
53
|
+
1. `show_general_stats=True` + two outcomes
|
|
54
|
+
2. `show_general_stats=True` + one outcome
|
|
55
|
+
3. `show_general_stats=False` + two outcomes
|
|
56
|
+
4. `show_general_stats=False` + one outcome
|
|
57
|
+
|
|
58
|
+
This is intentional to keep output stable and publication-ready across common use cases.
|
|
59
|
+
`base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
|
|
60
|
+
For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
|
|
61
|
+
Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
|
|
62
|
+
Within each layout case, deterministic pressure tiers are applied internally (`standard`, `expanded`, `max`) based on the final rendered string widths.
|
|
63
|
+
Predictor labels are truncated (with warning) when they exceed layout-specific caps:
|
|
64
|
+
1. `show_general_stats=True` + two outcomes: 21 chars
|
|
65
|
+
2. `show_general_stats=True` + one outcome: 24 chars
|
|
66
|
+
3. `show_general_stats=False` + two outcomes: 26 chars
|
|
67
|
+
4. `show_general_stats=False` + one outcome: 25 chars
|
|
68
|
+
When general stats are shown, large `n`/`N` values are compacted (e.g., `9.9k`) to preserve column readability.
|
|
69
|
+
Compaction activates only when counts reach `>= 1,000` and uses a shared unit across both `n` and `N` (`k`, `M`, `B`, `T`) for consistent within-row formatting.
|
|
70
|
+
Very large values beyond display range are capped as `>999T` with a warning.
|
|
71
|
+
Effect / CI display uses the same compact unit family (`k`, `M`, `B`, `T`) once values reach `>= 1,000`, followed by deterministic decimal trimming to keep tables readable.
|
|
72
|
+
Rows are fully grayed only when all displayed outcomes are missing; if at least one outcome is valid, only the missing outcome triplet (`effect`, `95% CI`, `p`) is blanked and gray-marked.
|
|
73
|
+
|
|
74
|
+
### Title Handling
|
|
75
|
+
|
|
76
|
+
`forest_plot()` intentionally does not include a `title` parameter in v1.
|
|
77
|
+
This is by design for publication workflows where figure titles/captions are managed in the manuscript rather than embedded inside the plot image.
|
|
78
|
+
If needed for slides or reports, add a title externally on the returned matplotlib figure object.
|
|
79
|
+
|
|
80
|
+
### Exponentiation Safety
|
|
81
|
+
|
|
82
|
+
- Use `exponentiate=None` (default) for model/link-based automatic handling.
|
|
83
|
+
- Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
|
|
84
|
+
- Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
|
|
85
|
+
- Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
|
|
86
|
+
|
|
87
|
+
### Axis Behavior
|
|
88
|
+
|
|
89
|
+
- Log-axis limits are data-driven after optional clipping; they are not forced symmetric around the reference value.
|
|
90
|
+
- `clip_outliers=True` uses magnitude-based clipping centered on the median CI bounds, which works much better for small samples with one extreme interval.
|
|
91
|
+
- `tick_style="decimal"` uses readable decimal ticks:
|
|
92
|
+
- dense near-reference ticks for moderate spans
|
|
93
|
+
- `1-2-5` progression for wider spans
|
|
94
|
+
- compact notation for very large tick labels when needed
|
|
95
|
+
- `tick_style="power10"` keeps readable power-of-ten labels for very wide ratio ranges.
|
|
96
|
+
|
|
97
|
+
### Label Overrides
|
|
98
|
+
|
|
99
|
+
Use `column_labels` to override visible table headers without changing the underlying model type:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
fig, axes = fpx.forest_plot(
|
|
103
|
+
df,
|
|
104
|
+
model_type="gamma",
|
|
105
|
+
exponentiate=False,
|
|
106
|
+
column_labels={
|
|
107
|
+
"effect": "IRR",
|
|
108
|
+
"ci": "95% CI",
|
|
109
|
+
"p": "P",
|
|
110
|
+
"n": "Cases",
|
|
111
|
+
"N": "Total",
|
|
112
|
+
"Freq": "Share",
|
|
113
|
+
},
|
|
114
|
+
x_label_override="IRR",
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Supported `column_labels` keys:
|
|
119
|
+
- `effect`
|
|
120
|
+
- `ci`
|
|
121
|
+
- `p`
|
|
122
|
+
- `n`
|
|
123
|
+
- `N`
|
|
124
|
+
- `Freq`
|
|
125
|
+
|
|
126
|
+
### `normalize_model_output()`
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
clean_df, config = fpx.normalize_model_output(
|
|
130
|
+
df, model_type="binom", link=None, exponentiate=None
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Standardizes columns, applies exponentiation policy, and returns axis metadata.
|
|
135
|
+
`config` includes `exponentiated` and `renamed_columns` for transparency.
|
|
136
|
+
|
|
137
|
+
## Examples
|
|
138
|
+
|
|
139
|
+
### Category grouping
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
|
|
143
|
+
|
|
144
|
+
fig, axes = fpx.forest_plot(df, model_type="binom")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Dual outcomes
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# DataFrame with two outcomes per predictor
|
|
151
|
+
fig, axes = fpx.forest_plot(
|
|
152
|
+
df_two_outcomes,
|
|
153
|
+
model_type="binom",
|
|
154
|
+
outcomes=["Mortality", "Readmission"],
|
|
155
|
+
legend_labels=["30-day mortality", "90-day readmission"],
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Custom marker colors
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
fig, axes = fpx.forest_plot(
|
|
163
|
+
df_two_outcomes,
|
|
164
|
+
model_type="binom",
|
|
165
|
+
outcomes=["Mortality", "Readmission"],
|
|
166
|
+
point_colors=["#2C5F8A", "#D4763A"],
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Linear model
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
fig, axes = fpx.forest_plot(df_linear, model_type="linear")
|
|
174
|
+
```
|
|
@@ -119,18 +119,6 @@ def configure_forest_axis(
|
|
|
119
119
|
if not len(finite_lo) or not len(finite_hi):
|
|
120
120
|
return ax
|
|
121
121
|
|
|
122
|
-
if clip_outliers:
|
|
123
|
-
q_low, q_high = clip_quantiles
|
|
124
|
-
q_low = float(q_low)
|
|
125
|
-
q_high = float(q_high)
|
|
126
|
-
if not (0.0 <= q_low < q_high <= 1.0):
|
|
127
|
-
raise ValueError("clip_quantiles must satisfy 0 <= low < high <= 1.")
|
|
128
|
-
data_min = float(np.quantile(finite_lo, q_low))
|
|
129
|
-
data_max = float(np.quantile(finite_hi, q_high))
|
|
130
|
-
else:
|
|
131
|
-
data_min = float(np.min(finite_lo))
|
|
132
|
-
data_max = float(np.max(finite_hi))
|
|
133
|
-
|
|
134
122
|
ax.set_xscale("log" if use_log else "linear")
|
|
135
123
|
|
|
136
124
|
if use_log:
|
|
@@ -154,12 +142,11 @@ def configure_forest_axis(
|
|
|
154
142
|
UserWarning,
|
|
155
143
|
stacklevel=2,
|
|
156
144
|
)
|
|
145
|
+
positive_lo = finite_lo[finite_lo > 0]
|
|
146
|
+
positive_hi = finite_hi[finite_hi > 0]
|
|
147
|
+
positive_eff = finite_eff[finite_eff > 0]
|
|
157
148
|
positive_values = np.concatenate(
|
|
158
|
-
[
|
|
159
|
-
finite_lo[finite_lo > 0],
|
|
160
|
-
finite_hi[finite_hi > 0],
|
|
161
|
-
finite_eff[finite_eff > 0],
|
|
162
|
-
]
|
|
149
|
+
[positive_lo, positive_hi, positive_eff]
|
|
163
150
|
)
|
|
164
151
|
positive_candidates = [*positive_values.tolist(), ref_val]
|
|
165
152
|
if not positive_candidates:
|
|
@@ -167,27 +154,113 @@ def configure_forest_axis(
|
|
|
167
154
|
"Log-scaled forest axis requires positive effect/CI values."
|
|
168
155
|
)
|
|
169
156
|
|
|
170
|
-
|
|
171
|
-
|
|
157
|
+
if clip_outliers and len(positive_values):
|
|
158
|
+
clip_factor = 10.0
|
|
159
|
+
|
|
160
|
+
if len(positive_lo):
|
|
161
|
+
lo_baseline = float(np.median(positive_lo))
|
|
162
|
+
lo_threshold = lo_baseline / clip_factor if lo_baseline > 0 else 0.0
|
|
163
|
+
lo_inliers = positive_lo[positive_lo >= lo_threshold]
|
|
164
|
+
clipped_pmin = float(np.min(lo_inliers)) if len(lo_inliers) else float(np.min(positive_lo))
|
|
165
|
+
else:
|
|
166
|
+
clipped_pmin = float(np.min(positive_values))
|
|
167
|
+
|
|
168
|
+
if len(positive_hi):
|
|
169
|
+
hi_baseline = float(np.median(positive_hi))
|
|
170
|
+
hi_threshold = hi_baseline * clip_factor
|
|
171
|
+
hi_inliers = positive_hi[positive_hi <= hi_threshold]
|
|
172
|
+
clipped_pmax = float(np.max(hi_inliers)) if len(hi_inliers) else float(np.max(positive_hi))
|
|
173
|
+
else:
|
|
174
|
+
clipped_pmax = float(np.max(positive_values))
|
|
175
|
+
|
|
176
|
+
pmin = min(clipped_pmin, ref_val)
|
|
177
|
+
pmax = max(clipped_pmax, ref_val)
|
|
178
|
+
else:
|
|
179
|
+
pmin = min(positive_candidates)
|
|
180
|
+
pmax = max(positive_candidates)
|
|
172
181
|
target_ticks = max(int(num_ticks), 3)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
raw_step = axis_span_decades /
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
182
|
+
log_min = math.log10(pmin)
|
|
183
|
+
log_max = math.log10(pmax)
|
|
184
|
+
span_decades = max(log_max - log_min, 0.0)
|
|
185
|
+
pad_decades = max(0.08, min(0.25, span_decades * 0.08))
|
|
186
|
+
axis_log_min = log_min - pad_decades
|
|
187
|
+
axis_log_max = log_max + pad_decades
|
|
188
|
+
axis_span_decades = axis_log_max - axis_log_min
|
|
189
|
+
|
|
190
|
+
raw_step = axis_span_decades / max(target_ticks - 1, 1)
|
|
191
|
+
if span_decades > 3:
|
|
192
|
+
step_decades = max(1.0, _nice_log_step(raw_step))
|
|
193
|
+
else:
|
|
194
|
+
step_decades = _nice_log_step(raw_step)
|
|
195
|
+
|
|
196
|
+
tick_start = math.ceil(axis_log_min / step_decades) * step_decades
|
|
197
|
+
tick_end = math.floor(axis_log_max / step_decades) * step_decades
|
|
198
|
+
if tick_end < tick_start:
|
|
199
|
+
tick_logs = np.array([axis_log_min, 0.0, axis_log_max], dtype=float)
|
|
200
|
+
else:
|
|
201
|
+
tick_logs = np.arange(
|
|
202
|
+
tick_start,
|
|
203
|
+
tick_end + 0.5 * step_decades,
|
|
204
|
+
step_decades,
|
|
205
|
+
)
|
|
206
|
+
if not np.any(np.isclose(tick_logs, 0.0, atol=1e-9)):
|
|
207
|
+
tick_logs = np.sort(np.append(tick_logs, 0.0))
|
|
208
|
+
|
|
209
|
+
xmin = 10 ** axis_log_min
|
|
210
|
+
xmax = 10 ** axis_log_max
|
|
189
211
|
ax.set_xlim(xmin, xmax)
|
|
190
|
-
ticks_in =
|
|
212
|
+
ticks_in = np.power(10.0, tick_logs)
|
|
213
|
+
ticks_in = ticks_in[(ticks_in >= xmin) & (ticks_in <= xmax)]
|
|
214
|
+
ticks_in = np.unique(np.asarray(ticks_in, dtype=float))
|
|
215
|
+
|
|
216
|
+
tick_data_min = max(pmin, np.nextafter(0.0, 1.0))
|
|
217
|
+
tick_data_max = pmax
|
|
218
|
+
moderate_decimal_span = (
|
|
219
|
+
tick_style == "decimal"
|
|
220
|
+
and pmin >= 0.2
|
|
221
|
+
and pmax <= 10.0
|
|
222
|
+
and span_decades <= 1.4
|
|
223
|
+
)
|
|
224
|
+
if moderate_decimal_span:
|
|
225
|
+
readable_ticks = np.array(
|
|
226
|
+
[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0, 7.0, 10.0],
|
|
227
|
+
dtype=float,
|
|
228
|
+
)
|
|
229
|
+
ticks_in = readable_ticks[(readable_ticks >= tick_data_min) & (readable_ticks <= tick_data_max)]
|
|
230
|
+
if len(ticks_in) > 8:
|
|
231
|
+
keep = []
|
|
232
|
+
for idx, tick in enumerate(ticks_in):
|
|
233
|
+
if idx % 2 == 0 or math.isclose(tick, 1.0, abs_tol=1e-9):
|
|
234
|
+
keep.append(tick)
|
|
235
|
+
ticks_in = np.array(sorted(set(keep)), dtype=float)
|
|
236
|
+
elif tick_style == "decimal":
|
|
237
|
+
decade_min = int(math.floor(axis_log_min))
|
|
238
|
+
decade_max = int(math.ceil(axis_log_max))
|
|
239
|
+
readable_ticks = []
|
|
240
|
+
for decade in range(decade_min, decade_max + 1):
|
|
241
|
+
base = 10.0 ** decade
|
|
242
|
+
for mult in (1.0, 2.0, 5.0):
|
|
243
|
+
tick = mult * base
|
|
244
|
+
if tick_data_min <= tick <= tick_data_max:
|
|
245
|
+
readable_ticks.append(tick)
|
|
246
|
+
if readable_ticks:
|
|
247
|
+
ticks_in = np.array(sorted(set(readable_ticks)), dtype=float)
|
|
248
|
+
if not np.any(np.isclose(ticks_in, ref_val, atol=1e-9)) and tick_data_min <= ref_val <= tick_data_max:
|
|
249
|
+
ticks_in = np.array(sorted(np.append(ticks_in, ref_val)), dtype=float)
|
|
250
|
+
if len(ticks_in) > 9:
|
|
251
|
+
min_log_gap = axis_span_decades / 7.0
|
|
252
|
+
keep = [float(ticks_in[0])]
|
|
253
|
+
for tick in ticks_in[1:-1]:
|
|
254
|
+
if math.isclose(tick, ref_val, abs_tol=1e-9):
|
|
255
|
+
keep.append(float(tick))
|
|
256
|
+
continue
|
|
257
|
+
if math.log10(float(tick)) - math.log10(float(keep[-1])) >= min_log_gap:
|
|
258
|
+
keep.append(float(tick))
|
|
259
|
+
keep.append(float(ticks_in[-1]))
|
|
260
|
+
if tick_data_min <= ref_val <= tick_data_max and not any(math.isclose(t, ref_val, abs_tol=1e-9) for t in keep):
|
|
261
|
+
keep.append(ref_val)
|
|
262
|
+
ticks_in = np.array(sorted(set(keep)), dtype=float)
|
|
263
|
+
|
|
191
264
|
if len(ticks_in) < 3:
|
|
192
265
|
ticks_in = np.array([xmin, ref_val, xmax], dtype=float)
|
|
193
266
|
ax.xaxis.set_major_locator(FixedLocator(ticks_in))
|
|
@@ -195,25 +268,48 @@ def configure_forest_axis(
|
|
|
195
268
|
if tick_style == "power10":
|
|
196
269
|
|
|
197
270
|
def _power10_formatter(x: float, _pos: int) -> str:
|
|
198
|
-
exp = math.log10(x
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
return rf"$10^{{{exp_txt}}}$"
|
|
205
|
-
return rf"${_format_decimal(ref_val)}\times10^{{{exp_txt}}}$"
|
|
271
|
+
exp = round(math.log10(x), 6)
|
|
272
|
+
if math.isclose(exp, round(exp), abs_tol=1e-9):
|
|
273
|
+
exp_txt = str(int(round(exp)))
|
|
274
|
+
else:
|
|
275
|
+
exp_txt = f"{exp:.2f}".rstrip("0").rstrip(".")
|
|
276
|
+
return rf"$10^{{{exp_txt}}}$"
|
|
206
277
|
|
|
207
278
|
ax.xaxis.set_major_formatter(FuncFormatter(_power10_formatter))
|
|
208
279
|
else:
|
|
209
|
-
decimals =
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
280
|
+
decimals = _decimals_from_ticks(ticks_in)
|
|
281
|
+
|
|
282
|
+
def _decimal_log_formatter(x: float, _pos: int, d: int = decimals) -> str:
|
|
283
|
+
abs_x = abs(float(x))
|
|
284
|
+
if abs_x >= 1e12:
|
|
285
|
+
return _format_decimal(x / 1e12, precision=1).rstrip("0").rstrip(".") + "T"
|
|
286
|
+
if abs_x >= 1e9:
|
|
287
|
+
return _format_decimal(x / 1e9, precision=1).rstrip("0").rstrip(".") + "B"
|
|
288
|
+
if abs_x >= 1e6:
|
|
289
|
+
return _format_decimal(x / 1e6, precision=1).rstrip("0").rstrip(".") + "M"
|
|
290
|
+
if abs_x >= 1e3:
|
|
291
|
+
return _format_decimal(x / 1e3, precision=1).rstrip("0").rstrip(".") + "k"
|
|
292
|
+
if abs_x >= 10:
|
|
293
|
+
return _format_decimal(x, precision=0)
|
|
294
|
+
return _format_decimal(x, precision=max(d + 1, 1))
|
|
295
|
+
|
|
296
|
+
ax.xaxis.set_major_formatter(FuncFormatter(_decimal_log_formatter))
|
|
213
297
|
|
|
214
298
|
ax.xaxis.set_minor_locator(NullLocator())
|
|
215
299
|
ax.xaxis.set_minor_formatter(NullFormatter())
|
|
216
300
|
else:
|
|
301
|
+
if clip_outliers:
|
|
302
|
+
q_low, q_high = clip_quantiles
|
|
303
|
+
q_low = float(q_low)
|
|
304
|
+
q_high = float(q_high)
|
|
305
|
+
if not (0.0 <= q_low < q_high <= 1.0):
|
|
306
|
+
raise ValueError("clip_quantiles must satisfy 0 <= low < high <= 1.")
|
|
307
|
+
data_min = float(np.quantile(finite_lo, q_low))
|
|
308
|
+
data_max = float(np.quantile(finite_hi, q_high))
|
|
309
|
+
else:
|
|
310
|
+
data_min = float(np.min(finite_lo))
|
|
311
|
+
data_max = float(np.max(finite_hi))
|
|
312
|
+
|
|
217
313
|
if clip_outliers:
|
|
218
314
|
q_high = float(clip_quantiles[1])
|
|
219
315
|
# Linear outliers are visually dominant; keep clipping robust by capping
|