forestplotx 1.0.1__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. forestplotx-1.1.0/PKG-INFO +201 -0
  2. forestplotx-1.1.0/README.md +174 -0
  3. {forestplotx-1.0.1 → forestplotx-1.1.0}/pyproject.toml +1 -1
  4. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/__init__.py +1 -1
  5. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_axes_config.py +144 -48
  6. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/plot.py +200 -61
  7. forestplotx-1.1.0/src/forestplotx.egg-info/PKG-INFO +201 -0
  8. forestplotx-1.1.0/tests/test_plot_smoke.py +253 -0
  9. forestplotx-1.0.1/PKG-INFO +0 -328
  10. forestplotx-1.0.1/README.md +0 -301
  11. forestplotx-1.0.1/src/forestplotx.egg-info/PKG-INFO +0 -328
  12. forestplotx-1.0.1/tests/test_plot_smoke.py +0 -122
  13. {forestplotx-1.0.1 → forestplotx-1.1.0}/LICENSE +0 -0
  14. {forestplotx-1.0.1 → forestplotx-1.1.0}/setup.cfg +0 -0
  15. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_layout.py +0 -0
  16. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/_normalize.py +0 -0
  17. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx/py.typed +0 -0
  18. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/SOURCES.txt +0 -0
  19. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/dependency_links.txt +0 -0
  20. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/requires.txt +0 -0
  21. {forestplotx-1.0.1 → forestplotx-1.1.0}/src/forestplotx.egg-info/top_level.txt +0 -0
  22. {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_axes_config.py +0 -0
  23. {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_layout.py +0 -0
  24. {forestplotx-1.0.1 → forestplotx-1.1.0}/tests/test_normalization.py +0 -0
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: forestplotx
3
+ Version: 1.1.0
4
+ Summary: Publication-ready forest plots for regression model outputs in Python.
5
+ Author-email: Shervin Taheripour <shervintaheripour@fastmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/shervin-taheripour/forestplotx
8
+ Project-URL: Repository, https://github.com/shervin-taheripour/forestplotx
9
+ Project-URL: Issues, https://github.com/shervin-taheripour/forestplotx/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Visualization
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: matplotlib>=3.7
22
+ Requires-Dist: numpy>=1.24
23
+ Requires-Dist: pandas>=2.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=7.0; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # forestplotx
29
+
30
+ `forestplotx` creates publication-style forest plots that combine a clean text table with a forest panel, with deterministic formatting for common regression outputs.
31
+
32
+ ## Features
33
+
34
+ - Publication-style table + forest composition
35
+ - Supports `binom`, `gamma`, `linear`, and `ordinal` model outputs
36
+ - One or two outcomes per plot
37
+ - Deterministic internal layout presets for stable output
38
+ - Readable log-axis handling in both `decimal` and `power10` styles
39
+ - Optional footer text for manuscript-style notes
40
+ - Visible column-header and x-axis label overrides
41
+
42
+ **Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
43
+ Displayed CI values in the table use bracket notation: `[low,high]`.
44
+
45
+ ## API Reference
46
+
47
+ ### `forest_plot()`
48
+
49
+ ```python
50
+ fig, axes = fpx.forest_plot(
51
+ df, # DataFrame with model output
52
+ outcomes=None, # list[str], max 2; auto-detected if None
53
+ save=None, # File path to save (e.g. "plot.png")
54
+ model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
55
+ link=None, # Override default link function
56
+ exponentiate=None, # None=auto by link, True=force, False=disable
57
+ table_only=False, # Render table without forest panel
58
+ legend_labels=None, # list[str] override for legend entries
59
+ point_colors=None, # list[str], up to 2 hex codes for outcome markers
60
+ column_labels=None, # dict override for table column labels
61
+ x_label_override=None, # Override forest x-axis label
62
+ footer_text=None, # Italic footer (wrapped/capped internally)
63
+ tick_style="decimal", # "decimal" or "power10"
64
+ clip_outliers=False, # Opt-in clipping of extreme CI-driven axis outliers
65
+ clip_quantiles=(0.02, 0.98), # Retained for API compatibility
66
+ base_decimals=2, # Decimal places for effect / CI values
67
+ show=True, # Call plt.show(); set False for programmatic use
68
+ show_general_stats=True, # Show n / N / Freq columns
69
+ bold_override=None, # Manual bold control per predictor/outcome
70
+ )
71
+ ```
72
+
73
+ **Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
74
+ When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
75
+
76
+ ### Layout Behavior (v1)
77
+
78
+ `forest_plot()` uses fixed internal layout presets (including internal font size) for:
79
+
80
+ 1. `show_general_stats=True` + two outcomes
81
+ 2. `show_general_stats=True` + one outcome
82
+ 3. `show_general_stats=False` + two outcomes
83
+ 4. `show_general_stats=False` + one outcome
84
+
85
+ This is intentional to keep output stable and publication-ready across common use cases.
86
+ `base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
87
+ For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
88
+ Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
89
+ Within each layout case, deterministic pressure tiers are applied internally (`standard`, `expanded`, `max`) based on the final rendered string widths.
90
+ Predictor labels are truncated (with warning) when they exceed layout-specific caps:
91
+ 1. `show_general_stats=True` + two outcomes: 21 chars
92
+ 2. `show_general_stats=True` + one outcome: 24 chars
93
+ 3. `show_general_stats=False` + two outcomes: 26 chars
94
+ 4. `show_general_stats=False` + one outcome: 25 chars
95
+ When general stats are shown, large `n`/`N` values are compacted (e.g., `9.9k`) to preserve column readability.
96
+ Compaction activates only when counts reach `>= 1,000` and uses a shared unit across both `n` and `N` (`k`, `M`, `B`, `T`) for consistent within-row formatting.
97
+ Very large values beyond display range are capped as `>999T` with a warning.
98
+ Effect / CI display uses the same compact unit family (`k`, `M`, `B`, `T`) once values reach `>= 1,000`, followed by deterministic decimal trimming to keep tables readable.
99
+ Rows are fully grayed only when all displayed outcomes are missing; if at least one outcome is valid, only the missing outcome triplet (`effect`, `95% CI`, `p`) is blanked and gray-marked.
100
+
101
+ ### Title Handling
102
+
103
+ `forest_plot()` intentionally does not include a `title` parameter in v1.
104
+ This is by design for publication workflows where figure titles/captions are managed in the manuscript rather than embedded inside the plot image.
105
+ If needed for slides or reports, add a title externally on the returned matplotlib figure object.
106
+
107
+ ### Exponentiation Safety
108
+
109
+ - Use `exponentiate=None` (default) for model/link-based automatic handling.
110
+ - Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
111
+ - Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
112
+ - Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
113
+
114
+ ### Axis Behavior
115
+
116
+ - Log-axis limits are data-driven after optional clipping; they are not forced symmetric around the reference value.
117
+ - `clip_outliers=True` uses magnitude-based clipping centered on the median CI bounds, which works much better for small samples with one extreme interval.
118
+ - `tick_style="decimal"` uses readable decimal ticks:
119
+ - dense near-reference ticks for moderate spans
120
+ - `1-2-5` progression for wider spans
121
+ - compact notation for very large tick labels when needed
122
+ - `tick_style="power10"` keeps readable power-of-ten labels for very wide ratio ranges.
123
+
124
+ ### Label Overrides
125
+
126
+ Use `column_labels` to override visible table headers without changing the underlying model type:
127
+
128
+ ```python
129
+ fig, axes = fpx.forest_plot(
130
+ df,
131
+ model_type="gamma",
132
+ exponentiate=False,
133
+ column_labels={
134
+ "effect": "IRR",
135
+ "ci": "95% CI",
136
+ "p": "P",
137
+ "n": "Cases",
138
+ "N": "Total",
139
+ "Freq": "Share",
140
+ },
141
+ x_label_override="IRR",
142
+ )
143
+ ```
144
+
145
+ Supported `column_labels` keys:
146
+ - `effect`
147
+ - `ci`
148
+ - `p`
149
+ - `n`
150
+ - `N`
151
+ - `Freq`
152
+
153
+ ### `normalize_model_output()`
154
+
155
+ ```python
156
+ clean_df, config = fpx.normalize_model_output(
157
+ df, model_type="binom", link=None, exponentiate=None
158
+ )
159
+ ```
160
+
161
+ Standardizes columns, applies exponentiation policy, and returns axis metadata.
162
+ `config` includes `exponentiated` and `renamed_columns` for transparency.
163
+
164
+ ## Examples
165
+
166
+ ### Category grouping
167
+
168
+ ```python
169
+ df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
170
+
171
+ fig, axes = fpx.forest_plot(df, model_type="binom")
172
+ ```
173
+
174
+ ### Dual outcomes
175
+
176
+ ```python
177
+ # DataFrame with two outcomes per predictor
178
+ fig, axes = fpx.forest_plot(
179
+ df_two_outcomes,
180
+ model_type="binom",
181
+ outcomes=["Mortality", "Readmission"],
182
+ legend_labels=["30-day mortality", "90-day readmission"],
183
+ )
184
+ ```
185
+
186
+ ### Custom marker colors
187
+
188
+ ```python
189
+ fig, axes = fpx.forest_plot(
190
+ df_two_outcomes,
191
+ model_type="binom",
192
+ outcomes=["Mortality", "Readmission"],
193
+ point_colors=["#2C5F8A", "#D4763A"],
194
+ )
195
+ ```
196
+
197
+ ### Linear model
198
+
199
+ ```python
200
+ fig, axes = fpx.forest_plot(df_linear, model_type="linear")
201
+ ```
@@ -0,0 +1,174 @@
1
+ # forestplotx
2
+
3
+ `forestplotx` creates publication-style forest plots that combine a clean text table with a forest panel, with deterministic formatting for common regression outputs.
4
+
5
+ ## Features
6
+
7
+ - Publication-style table + forest composition
8
+ - Supports `binom`, `gamma`, `linear`, and `ordinal` model outputs
9
+ - One or two outcomes per plot
10
+ - Deterministic internal layout presets for stable output
11
+ - Readable log-axis handling in both `decimal` and `power10` styles
12
+ - Optional footer text for manuscript-style notes
13
+ - Visible column-header and x-axis label overrides
14
+
15
+ **Note:** For `logit`/`log` links, `exponentiate=None` applies model-based exponentiation with a warning; set `exponentiate=False` if your data is already on effect scale.
16
+ Displayed CI values in the table use bracket notation: `[low,high]`.
17
+
18
+ ## API Reference
19
+
20
+ ### `forest_plot()`
21
+
22
+ ```python
23
+ fig, axes = fpx.forest_plot(
24
+ df, # DataFrame with model output
25
+ outcomes=None, # list[str], max 2; auto-detected if None
26
+ save=None, # File path to save (e.g. "plot.png")
27
+ model_type="binom", # "binom" | "gamma" | "linear" | "ordinal"
28
+ link=None, # Override default link function
29
+ exponentiate=None, # None=auto by link, True=force, False=disable
30
+ table_only=False, # Render table without forest panel
31
+ legend_labels=None, # list[str] override for legend entries
32
+ point_colors=None, # list[str], up to 2 hex codes for outcome markers
33
+ column_labels=None, # dict override for table column labels
34
+ x_label_override=None, # Override forest x-axis label
35
+ footer_text=None, # Italic footer (wrapped/capped internally)
36
+ tick_style="decimal", # "decimal" or "power10"
37
+ clip_outliers=False, # Opt-in clipping of extreme CI-driven axis outliers
38
+ clip_quantiles=(0.02, 0.98), # Retained for API compatibility
39
+ base_decimals=2, # Decimal places for effect / CI values
40
+ show=True, # Call plt.show(); set False for programmatic use
41
+ show_general_stats=True, # Show n / N / Freq columns
42
+ bold_override=None, # Manual bold control per predictor/outcome
43
+ )
44
+ ```
45
+
46
+ **Returns:** `(fig, axes)` — matplotlib Figure and axes tuple. When `show=False`, the figure is returned without displaying, allowing further customization before calling `plt.show()` manually.
47
+ When `exponentiate=None`, auto exponentiation for log/logit links emits a warning so users can verify input scale.
48
+
49
+ ### Layout Behavior (v1)
50
+
51
+ `forest_plot()` uses fixed internal layout presets (including internal font size) for:
52
+
53
+ 1. `show_general_stats=True` + two outcomes
54
+ 2. `show_general_stats=True` + one outcome
55
+ 3. `show_general_stats=False` + two outcomes
56
+ 4. `show_general_stats=False` + one outcome
57
+
58
+ This is intentional to keep output stable and publication-ready across common use cases.
59
+ `base_decimals` is capped at 3 internally to prevent table collisions in dense layouts.
60
+ For small row counts, figure height uses a tighter internal heuristic to reduce excessive whitespace.
61
+ Long footer text is wrapped and capped to 3 lines with ellipsis for overflow protection.
62
+ Within each layout case, deterministic pressure tiers are applied internally (`standard`, `expanded`, `max`) based on the final rendered string widths.
63
+ Predictor labels are truncated (with warning) when they exceed layout-specific caps:
64
+ 1. `show_general_stats=True` + two outcomes: 21 chars
65
+ 2. `show_general_stats=True` + one outcome: 24 chars
66
+ 3. `show_general_stats=False` + two outcomes: 26 chars
67
+ 4. `show_general_stats=False` + one outcome: 25 chars
68
+ When general stats are shown, large `n`/`N` values are compacted (e.g., `9.9k`) to preserve column readability.
69
+ Compaction activates only when counts reach `>= 1,000` and uses a shared unit across both `n` and `N` (`k`, `M`, `B`, `T`) for consistent within-row formatting.
70
+ Very large values beyond display range are capped as `>999T` with a warning.
71
+ Effect / CI display uses the same compact unit family (`k`, `M`, `B`, `T`) once values reach `>= 1,000`, followed by deterministic decimal trimming to keep tables readable.
72
+ Rows are fully grayed only when all displayed outcomes are missing; if at least one outcome is valid, only the missing outcome triplet (`effect`, `95% CI`, `p`) is blanked and gray-marked.
73
+
74
+ ### Title Handling
75
+
76
+ `forest_plot()` intentionally does not include a `title` parameter in v1.
77
+ This is by design for publication workflows where figure titles/captions are managed in the manuscript rather than embedded inside the plot image.
78
+ If needed for slides or reports, add a title externally on the returned matplotlib figure object.
79
+
80
+ ### Exponentiation Safety
81
+
82
+ - Use `exponentiate=None` (default) for model/link-based automatic handling.
83
+ - Use `exponentiate=False` if your input is already on effect scale (e.g., OR/Ratio, not log-coefficients).
84
+ - Use `exponentiate=True` only when input is definitely on log scale and needs transformation.
85
+ - Read warnings: they include auto-exponentiation context and column mapping (effect column + `CI_low`/`CI_high` combined into `95% CI`).
86
+
87
+ ### Axis Behavior
88
+
89
+ - Log-axis limits are data-driven after optional clipping; they are not forced symmetric around the reference value.
90
+ - `clip_outliers=True` uses magnitude-based clipping centered on the median CI bounds, which works much better for small samples with one extreme interval.
91
+ - `tick_style="decimal"` uses readable decimal ticks:
92
+ - dense near-reference ticks for moderate spans
93
+ - `1-2-5` progression for wider spans
94
+ - compact notation for very large tick labels when needed
95
+ - `tick_style="power10"` keeps readable power-of-ten labels for very wide ratio ranges.
96
+
97
+ ### Label Overrides
98
+
99
+ Use `column_labels` to override visible table headers without changing the underlying model type:
100
+
101
+ ```python
102
+ fig, axes = fpx.forest_plot(
103
+ df,
104
+ model_type="gamma",
105
+ exponentiate=False,
106
+ column_labels={
107
+ "effect": "IRR",
108
+ "ci": "95% CI",
109
+ "p": "P",
110
+ "n": "Cases",
111
+ "N": "Total",
112
+ "Freq": "Share",
113
+ },
114
+ x_label_override="IRR",
115
+ )
116
+ ```
117
+
118
+ Supported `column_labels` keys:
119
+ - `effect`
120
+ - `ci`
121
+ - `p`
122
+ - `n`
123
+ - `N`
124
+ - `Freq`
125
+
126
+ ### `normalize_model_output()`
127
+
128
+ ```python
129
+ clean_df, config = fpx.normalize_model_output(
130
+ df, model_type="binom", link=None, exponentiate=None
131
+ )
132
+ ```
133
+
134
+ Standardizes columns, applies exponentiation policy, and returns axis metadata.
135
+ `config` includes `exponentiated` and `renamed_columns` for transparency.
136
+
137
+ ## Examples
138
+
139
+ ### Category grouping
140
+
141
+ ```python
142
+ df["category"] = ["Demographics", "Demographics", "Clinical", "Clinical"]
143
+
144
+ fig, axes = fpx.forest_plot(df, model_type="binom")
145
+ ```
146
+
147
+ ### Dual outcomes
148
+
149
+ ```python
150
+ # DataFrame with two outcomes per predictor
151
+ fig, axes = fpx.forest_plot(
152
+ df_two_outcomes,
153
+ model_type="binom",
154
+ outcomes=["Mortality", "Readmission"],
155
+ legend_labels=["30-day mortality", "90-day readmission"],
156
+ )
157
+ ```
158
+
159
+ ### Custom marker colors
160
+
161
+ ```python
162
+ fig, axes = fpx.forest_plot(
163
+ df_two_outcomes,
164
+ model_type="binom",
165
+ outcomes=["Mortality", "Readmission"],
166
+ point_colors=["#2C5F8A", "#D4763A"],
167
+ )
168
+ ```
169
+
170
+ ### Linear model
171
+
172
+ ```python
173
+ fig, axes = fpx.forest_plot(df_linear, model_type="linear")
174
+ ```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "forestplotx"
7
- version = "1.0.1"
7
+ version = "1.1.0"
8
8
  description = "Publication-ready forest plots for regression model outputs in Python."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,7 +1,7 @@
1
1
  from .plot import forest_plot
2
2
  from ._normalize import _normalize_model_output as normalize_model_output
3
3
 
4
- __version__ = "1.0.1"
4
+ __version__ = "1.1.0"
5
5
 
6
6
  __all__ = [
7
7
  "forest_plot",
@@ -119,18 +119,6 @@ def configure_forest_axis(
119
119
  if not len(finite_lo) or not len(finite_hi):
120
120
  return ax
121
121
 
122
- if clip_outliers:
123
- q_low, q_high = clip_quantiles
124
- q_low = float(q_low)
125
- q_high = float(q_high)
126
- if not (0.0 <= q_low < q_high <= 1.0):
127
- raise ValueError("clip_quantiles must satisfy 0 <= low < high <= 1.")
128
- data_min = float(np.quantile(finite_lo, q_low))
129
- data_max = float(np.quantile(finite_hi, q_high))
130
- else:
131
- data_min = float(np.min(finite_lo))
132
- data_max = float(np.max(finite_hi))
133
-
134
122
  ax.set_xscale("log" if use_log else "linear")
135
123
 
136
124
  if use_log:
@@ -154,12 +142,11 @@ def configure_forest_axis(
154
142
  UserWarning,
155
143
  stacklevel=2,
156
144
  )
145
+ positive_lo = finite_lo[finite_lo > 0]
146
+ positive_hi = finite_hi[finite_hi > 0]
147
+ positive_eff = finite_eff[finite_eff > 0]
157
148
  positive_values = np.concatenate(
158
- [
159
- finite_lo[finite_lo > 0],
160
- finite_hi[finite_hi > 0],
161
- finite_eff[finite_eff > 0],
162
- ]
149
+ [positive_lo, positive_hi, positive_eff]
163
150
  )
164
151
  positive_candidates = [*positive_values.tolist(), ref_val]
165
152
  if not positive_candidates:
@@ -167,27 +154,113 @@ def configure_forest_axis(
167
154
  "Log-scaled forest axis requires positive effect/CI values."
168
155
  )
169
156
 
170
- pmin = min(positive_candidates)
171
- pmax = max(positive_candidates)
157
+ if clip_outliers and len(positive_values):
158
+ clip_factor = 10.0
159
+
160
+ if len(positive_lo):
161
+ lo_baseline = float(np.median(positive_lo))
162
+ lo_threshold = lo_baseline / clip_factor if lo_baseline > 0 else 0.0
163
+ lo_inliers = positive_lo[positive_lo >= lo_threshold]
164
+ clipped_pmin = float(np.min(lo_inliers)) if len(lo_inliers) else float(np.min(positive_lo))
165
+ else:
166
+ clipped_pmin = float(np.min(positive_values))
167
+
168
+ if len(positive_hi):
169
+ hi_baseline = float(np.median(positive_hi))
170
+ hi_threshold = hi_baseline * clip_factor
171
+ hi_inliers = positive_hi[positive_hi <= hi_threshold]
172
+ clipped_pmax = float(np.max(hi_inliers)) if len(hi_inliers) else float(np.max(positive_hi))
173
+ else:
174
+ clipped_pmax = float(np.max(positive_values))
175
+
176
+ pmin = min(clipped_pmin, ref_val)
177
+ pmax = max(clipped_pmax, ref_val)
178
+ else:
179
+ pmin = min(positive_candidates)
180
+ pmax = max(positive_candidates)
172
181
  target_ticks = max(int(num_ticks), 3)
173
- if target_ticks % 2 == 0:
174
- target_ticks -= 1
175
- n_side_target = max((target_ticks - 1) // 2, 1)
176
-
177
- span_decades = max(abs(math.log10(pmin / ref_val)), abs(math.log10(pmax / ref_val)))
178
- axis_span_decades = span_decades * 1.15
179
- # Keep very tight ranges readable around the reference line.
180
- axis_span_decades = max(axis_span_decades, 0.01)
181
- raw_step = axis_span_decades / n_side_target
182
- step_decades = _nice_log_step(raw_step)
183
- n_side = max(1, int(axis_span_decades / step_decades))
184
- exponents = np.arange(-n_side, n_side + 1, dtype=float) * step_decades
185
- ticks = ref_val * np.power(10.0, exponents)
186
- axis_ratio = 10 ** axis_span_decades
187
- xmin = ref_val / axis_ratio
188
- xmax = ref_val * axis_ratio
182
+ log_min = math.log10(pmin)
183
+ log_max = math.log10(pmax)
184
+ span_decades = max(log_max - log_min, 0.0)
185
+ pad_decades = max(0.08, min(0.25, span_decades * 0.08))
186
+ axis_log_min = log_min - pad_decades
187
+ axis_log_max = log_max + pad_decades
188
+ axis_span_decades = axis_log_max - axis_log_min
189
+
190
+ raw_step = axis_span_decades / max(target_ticks - 1, 1)
191
+ if span_decades > 3:
192
+ step_decades = max(1.0, _nice_log_step(raw_step))
193
+ else:
194
+ step_decades = _nice_log_step(raw_step)
195
+
196
+ tick_start = math.ceil(axis_log_min / step_decades) * step_decades
197
+ tick_end = math.floor(axis_log_max / step_decades) * step_decades
198
+ if tick_end < tick_start:
199
+ tick_logs = np.array([axis_log_min, 0.0, axis_log_max], dtype=float)
200
+ else:
201
+ tick_logs = np.arange(
202
+ tick_start,
203
+ tick_end + 0.5 * step_decades,
204
+ step_decades,
205
+ )
206
+ if not np.any(np.isclose(tick_logs, 0.0, atol=1e-9)):
207
+ tick_logs = np.sort(np.append(tick_logs, 0.0))
208
+
209
+ xmin = 10 ** axis_log_min
210
+ xmax = 10 ** axis_log_max
189
211
  ax.set_xlim(xmin, xmax)
190
- ticks_in = ticks[(ticks >= xmin) & (ticks <= xmax)]
212
+ ticks_in = np.power(10.0, tick_logs)
213
+ ticks_in = ticks_in[(ticks_in >= xmin) & (ticks_in <= xmax)]
214
+ ticks_in = np.unique(np.asarray(ticks_in, dtype=float))
215
+
216
+ tick_data_min = max(pmin, np.nextafter(0.0, 1.0))
217
+ tick_data_max = pmax
218
+ moderate_decimal_span = (
219
+ tick_style == "decimal"
220
+ and pmin >= 0.2
221
+ and pmax <= 10.0
222
+ and span_decades <= 1.4
223
+ )
224
+ if moderate_decimal_span:
225
+ readable_ticks = np.array(
226
+ [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0, 7.0, 10.0],
227
+ dtype=float,
228
+ )
229
+ ticks_in = readable_ticks[(readable_ticks >= tick_data_min) & (readable_ticks <= tick_data_max)]
230
+ if len(ticks_in) > 8:
231
+ keep = []
232
+ for idx, tick in enumerate(ticks_in):
233
+ if idx % 2 == 0 or math.isclose(tick, 1.0, abs_tol=1e-9):
234
+ keep.append(tick)
235
+ ticks_in = np.array(sorted(set(keep)), dtype=float)
236
+ elif tick_style == "decimal":
237
+ decade_min = int(math.floor(axis_log_min))
238
+ decade_max = int(math.ceil(axis_log_max))
239
+ readable_ticks = []
240
+ for decade in range(decade_min, decade_max + 1):
241
+ base = 10.0 ** decade
242
+ for mult in (1.0, 2.0, 5.0):
243
+ tick = mult * base
244
+ if tick_data_min <= tick <= tick_data_max:
245
+ readable_ticks.append(tick)
246
+ if readable_ticks:
247
+ ticks_in = np.array(sorted(set(readable_ticks)), dtype=float)
248
+ if not np.any(np.isclose(ticks_in, ref_val, atol=1e-9)) and tick_data_min <= ref_val <= tick_data_max:
249
+ ticks_in = np.array(sorted(np.append(ticks_in, ref_val)), dtype=float)
250
+ if len(ticks_in) > 9:
251
+ min_log_gap = axis_span_decades / 7.0
252
+ keep = [float(ticks_in[0])]
253
+ for tick in ticks_in[1:-1]:
254
+ if math.isclose(tick, ref_val, abs_tol=1e-9):
255
+ keep.append(float(tick))
256
+ continue
257
+ if math.log10(float(tick)) - math.log10(float(keep[-1])) >= min_log_gap:
258
+ keep.append(float(tick))
259
+ keep.append(float(ticks_in[-1]))
260
+ if tick_data_min <= ref_val <= tick_data_max and not any(math.isclose(t, ref_val, abs_tol=1e-9) for t in keep):
261
+ keep.append(ref_val)
262
+ ticks_in = np.array(sorted(set(keep)), dtype=float)
263
+
191
264
  if len(ticks_in) < 3:
192
265
  ticks_in = np.array([xmin, ref_val, xmax], dtype=float)
193
266
  ax.xaxis.set_major_locator(FixedLocator(ticks_in))
@@ -195,25 +268,48 @@ def configure_forest_axis(
195
268
  if tick_style == "power10":
196
269
 
197
270
  def _power10_formatter(x: float, _pos: int) -> str:
198
- exp = math.log10(x / ref_val)
199
- rounded = round(exp, 2)
200
- if math.isclose(rounded, 0.0, abs_tol=1e-9):
201
- rounded = 0.0
202
- exp_txt = f"{rounded:.2f}".rstrip("0").rstrip(".")
203
- if math.isclose(ref_val, 1.0):
204
- return rf"$10^{{{exp_txt}}}$"
205
- return rf"${_format_decimal(ref_val)}\times10^{{{exp_txt}}}$"
271
+ exp = round(math.log10(x), 6)
272
+ if math.isclose(exp, round(exp), abs_tol=1e-9):
273
+ exp_txt = str(int(round(exp)))
274
+ else:
275
+ exp_txt = f"{exp:.2f}".rstrip("0").rstrip(".")
276
+ return rf"$10^{{{exp_txt}}}$"
206
277
 
207
278
  ax.xaxis.set_major_formatter(FuncFormatter(_power10_formatter))
208
279
  else:
209
- decimals = max(2, _decimals_from_ticks(ticks_in))
210
- ax.xaxis.set_major_formatter(
211
- FuncFormatter(lambda x, _pos, d=decimals: f"{x:.{d}f}")
212
- )
280
+ decimals = _decimals_from_ticks(ticks_in)
281
+
282
+ def _decimal_log_formatter(x: float, _pos: int, d: int = decimals) -> str:
283
+ abs_x = abs(float(x))
284
+ if abs_x >= 1e12:
285
+ return _format_decimal(x / 1e12, precision=1).rstrip("0").rstrip(".") + "T"
286
+ if abs_x >= 1e9:
287
+ return _format_decimal(x / 1e9, precision=1).rstrip("0").rstrip(".") + "B"
288
+ if abs_x >= 1e6:
289
+ return _format_decimal(x / 1e6, precision=1).rstrip("0").rstrip(".") + "M"
290
+ if abs_x >= 1e3:
291
+ return _format_decimal(x / 1e3, precision=1).rstrip("0").rstrip(".") + "k"
292
+ if abs_x >= 10:
293
+ return _format_decimal(x, precision=0)
294
+ return _format_decimal(x, precision=max(d + 1, 1))
295
+
296
+ ax.xaxis.set_major_formatter(FuncFormatter(_decimal_log_formatter))
213
297
 
214
298
  ax.xaxis.set_minor_locator(NullLocator())
215
299
  ax.xaxis.set_minor_formatter(NullFormatter())
216
300
  else:
301
+ if clip_outliers:
302
+ q_low, q_high = clip_quantiles
303
+ q_low = float(q_low)
304
+ q_high = float(q_high)
305
+ if not (0.0 <= q_low < q_high <= 1.0):
306
+ raise ValueError("clip_quantiles must satisfy 0 <= low < high <= 1.")
307
+ data_min = float(np.quantile(finite_lo, q_low))
308
+ data_max = float(np.quantile(finite_hi, q_high))
309
+ else:
310
+ data_min = float(np.min(finite_lo))
311
+ data_max = float(np.max(finite_hi))
312
+
217
313
  if clip_outliers:
218
314
  q_high = float(clip_quantiles[1])
219
315
  # Linear outliers are visually dominant; keep clipping robust by capping