ggh4x-python 0.3.1.9000__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ggh4x/__init__.py +140 -0
- ggh4x/_aimed_text_grob.py +432 -0
- ggh4x/_borrowed_ggplot2.py +273 -0
- ggh4x/_cli.py +84 -0
- ggh4x/_datasets.py +106 -0
- ggh4x/_download.py +111 -0
- ggh4x/_facet_helpers.py +313 -0
- ggh4x/_facet_utils.py +649 -0
- ggh4x/_gap_grobs.py +606 -0
- ggh4x/_registry.py +10 -0
- ggh4x/_rlang.py +93 -0
- ggh4x/_utils.py +150 -0
- ggh4x/_vctrs.py +233 -0
- ggh4x/conveniences.py +601 -0
- ggh4x/coord_axes_inside.py +380 -0
- ggh4x/element_part_rect.py +545 -0
- ggh4x/facet_grid2.py +1018 -0
- ggh4x/facet_manual.py +901 -0
- ggh4x/facet_nested.py +776 -0
- ggh4x/facet_nested_wrap.py +193 -0
- ggh4x/facet_wrap2.py +896 -0
- ggh4x/geom_box.py +536 -0
- ggh4x/geom_outline_point.py +444 -0
- ggh4x/geom_pointpath.py +259 -0
- ggh4x/geom_polygonraster.py +252 -0
- ggh4x/geom_rectrug.py +489 -0
- ggh4x/geom_text_aimed.py +279 -0
- ggh4x/guide_stringlegend.py +354 -0
- ggh4x/help_secondary.py +549 -0
- ggh4x/multiscale/__init__.py +51 -0
- ggh4x/multiscale/_multiscale_add.py +207 -0
- ggh4x/multiscale/scale_listed.py +167 -0
- ggh4x/multiscale/scale_manual.py +478 -0
- ggh4x/multiscale/scale_multi.py +393 -0
- ggh4x/panel_scales/__init__.py +58 -0
- ggh4x/panel_scales/at_panel.py +115 -0
- ggh4x/panel_scales/facetted_pos_scales.py +647 -0
- ggh4x/panel_scales/force_panelsize.py +411 -0
- ggh4x/panel_scales/scale_facet.py +222 -0
- ggh4x/position_disjoint_ranges.py +229 -0
- ggh4x/position_lineartrans.py +242 -0
- ggh4x/py.typed +0 -0
- ggh4x/resources/faithful.csv +273 -0
- ggh4x/resources/iris.csv +151 -0
- ggh4x/resources/mtcars.csv +33 -0
- ggh4x/resources/pressure.csv +20 -0
- ggh4x/resources/volcano.csv +87 -0
- ggh4x/save.py +255 -0
- ggh4x/stat_difference.py +388 -0
- ggh4x/stat_funxy.py +436 -0
- ggh4x/stat_rle.py +290 -0
- ggh4x/stat_rollingkernel.py +369 -0
- ggh4x/stat_theodensity.py +681 -0
- ggh4x/strip_nested.py +448 -0
- ggh4x/strip_split.py +687 -0
- ggh4x/strip_tag.py +636 -0
- ggh4x/strip_themed.py +232 -0
- ggh4x/strip_vanilla.py +1464 -0
- ggh4x/themes.py +31 -0
- ggh4x/themes_ggh4x.py +67 -0
- ggh4x_python-0.3.1.9000.dist-info/METADATA +40 -0
- ggh4x_python-0.3.1.9000.dist-info/RECORD +64 -0
- ggh4x_python-0.3.1.9000.dist-info/WHEEL +4 -0
- ggh4x_python-0.3.1.9000.dist-info/licenses/LICENSE +3 -0
ggh4x/save.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Save a ggplot with automatic size measurement (R source: save.R).
|
|
2
|
+
|
|
3
|
+
Port of ggh4x's :func:`save_plot`, a wrapper over :func:`ggplot2_py.save.ggsave`
|
|
4
|
+
that guesses the plot's physical size from the built gtable. The guess is only
|
|
5
|
+
meaningful when the panels have an absolute size -- set either via
|
|
6
|
+
``theme(panel.widths=, panel.heights=)`` or via ``force_panelsizes()`` -- because
|
|
7
|
+
with the default *null* (proportional) panel sizing the measured dimension is
|
|
8
|
+
undefined and the current graphics-device size is used instead.
|
|
9
|
+
|
|
10
|
+
Deviation from R
|
|
11
|
+
----------------
|
|
12
|
+
R's ``save_plot`` returns the file-name string with ``width`` and ``height``
|
|
13
|
+
attributes (in inches) attached via ``attr<-``. Python strings cannot carry
|
|
14
|
+
attributes, so this port returns a :class:`SavePlotResult` -- a thin ``str``
|
|
15
|
+
subclass that *is* the path everywhere a string is expected, but additionally
|
|
16
|
+
exposes ``.width`` and ``.height`` (inches, or ``None`` when the size could not
|
|
17
|
+
be measured, matching R's ``NA_real_``).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
from ._cli import cli_abort
|
|
27
|
+
from ._rlang import arg_match0
|
|
28
|
+
from ._utils import has_null_unit, height_cm, width_cm
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"save_plot",
|
|
32
|
+
"has_null_unit",
|
|
33
|
+
"SavePlotResult",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Inches-per-unit divisors used by R's
|
|
38
|
+
# ``switch(units, `in` = 1, cm = 2.54, mm = 25.4, px = dpi)`` (save.R L52, L63).
|
|
39
|
+
# ``px`` is handled specially because its divisor is the runtime ``dpi``.
|
|
40
|
+
_UNIT_DIVISOR = {"in": 1.0, "cm": 2.54, "mm": 25.4}
|
|
41
|
+
|
|
42
|
+
_VALID_UNITS = ("in", "cm", "mm", "px")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class SavePlotResult(str):
|
|
46
|
+
"""File path returned by :func:`save_plot`, carrying the plot size.
|
|
47
|
+
|
|
48
|
+
A :class:`str` subclass so it behaves as the output file path everywhere
|
|
49
|
+
(``open(result)``, ``result == path``, ``os.fspath(result)``), while also
|
|
50
|
+
exposing the inferred plot dimensions. Mirrors R attaching ``width`` and
|
|
51
|
+
``height`` attributes to the returned file-name string.
|
|
52
|
+
|
|
53
|
+
Attributes
|
|
54
|
+
----------
|
|
55
|
+
width : float or None
|
|
56
|
+
Plot width in inches, or ``None`` when it could not be measured
|
|
57
|
+
(R's ``NA_real_``; the device size is used by ``ggsave`` in that case).
|
|
58
|
+
height : float or None
|
|
59
|
+
Plot height in inches, or ``None`` when it could not be measured.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
width: Optional[float]
|
|
63
|
+
height: Optional[float]
|
|
64
|
+
|
|
65
|
+
def __new__(
|
|
66
|
+
cls,
|
|
67
|
+
value: str,
|
|
68
|
+
width: Optional[float] = None,
|
|
69
|
+
height: Optional[float] = None,
|
|
70
|
+
) -> "SavePlotResult":
|
|
71
|
+
obj = super().__new__(cls, value)
|
|
72
|
+
obj.width = width
|
|
73
|
+
obj.height = height
|
|
74
|
+
return obj
|
|
75
|
+
|
|
76
|
+
def __repr__(self) -> str: # pragma: no cover - cosmetic
|
|
77
|
+
return (
|
|
78
|
+
f"SavePlotResult({str.__repr__(self)}, "
|
|
79
|
+
f"width={self.width!r}, height={self.height!r})"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _build_gtable(plot: Any) -> Any:
|
|
84
|
+
"""Build the gtable for *plot*, mirroring R's ``ggplotGrob(plot)``.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
plot : GGPlot or patchwork-like
|
|
89
|
+
A ggplot, or any object exposing ``to_gtable()`` (e.g. a
|
|
90
|
+
patchwork composition).
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
Gtable
|
|
95
|
+
The assembled gtable whose ``widths`` / ``heights`` are measured.
|
|
96
|
+
"""
|
|
97
|
+
from ggplot2_py.plot import ggplot_build, ggplot_gtable, is_ggplot
|
|
98
|
+
|
|
99
|
+
if is_ggplot(plot):
|
|
100
|
+
built = ggplot_build(plot)
|
|
101
|
+
return ggplot_gtable(built)
|
|
102
|
+
if hasattr(plot, "to_gtable"):
|
|
103
|
+
# patchwork-python compositions expose to_gtable(); ggsave branches the
|
|
104
|
+
# same way (ggplot2_py.save.ggsave).
|
|
105
|
+
return plot.to_gtable()
|
|
106
|
+
# Already a gtable (or grob) -- use as-is, consistent with ggsave's fallback.
|
|
107
|
+
return plot
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _measure_inches(track_units: Any, axis: str) -> Optional[float]:
|
|
111
|
+
"""Measure a gtable track length in inches, or ``None`` if size is *null*.
|
|
112
|
+
|
|
113
|
+
Mirrors save.R L44-50 / L55-61: if the track contains any top-level *null*
|
|
114
|
+
unit the dimension is undefined (R ``NA_real_`` -> Python ``None``);
|
|
115
|
+
otherwise it is ``sum(width_cm(track)) / 2.54``.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
track_units : Unit
|
|
120
|
+
The gtable ``widths`` or ``heights`` unit vector.
|
|
121
|
+
axis : str
|
|
122
|
+
``"width"`` or ``"height"``; selects ``width_cm`` vs ``height_cm``.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
float or None
|
|
127
|
+
The length in inches, or ``None`` when a null unit is present.
|
|
128
|
+
"""
|
|
129
|
+
if has_null_unit(track_units):
|
|
130
|
+
return None
|
|
131
|
+
measure = width_cm if axis == "width" else height_cm
|
|
132
|
+
cm = np.asarray(measure(track_units), dtype=float)
|
|
133
|
+
return float(np.sum(cm)) / 2.54
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _supplied_to_inches(value: float, units: str, dpi: float) -> float:
|
|
137
|
+
"""Convert a user-supplied dimension from *units* to inches.
|
|
138
|
+
|
|
139
|
+
Mirrors R ``value / switch(units, `in`=1, cm=2.54, mm=25.4, px=dpi)``
|
|
140
|
+
(save.R L52, L63).
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
value : float
|
|
145
|
+
Dimension expressed in *units*.
|
|
146
|
+
units : str
|
|
147
|
+
One of ``"in"``, ``"cm"``, ``"mm"``, ``"px"``.
|
|
148
|
+
dpi : float
|
|
149
|
+
Resolution, used as the divisor when ``units == "px"``.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
float
|
|
154
|
+
The dimension in inches.
|
|
155
|
+
"""
|
|
156
|
+
if units == "px":
|
|
157
|
+
return value / dpi
|
|
158
|
+
return value / _UNIT_DIVISOR[units]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def save_plot(
|
|
162
|
+
*args: Any,
|
|
163
|
+
plot: Any = None,
|
|
164
|
+
width: Optional[float] = None,
|
|
165
|
+
height: Optional[float] = None,
|
|
166
|
+
units: str = "in",
|
|
167
|
+
dpi: float = 300,
|
|
168
|
+
**kwargs: Any,
|
|
169
|
+
) -> SavePlotResult:
|
|
170
|
+
"""Save a ggplot, guessing its size from absolute panel dimensions.
|
|
171
|
+
|
|
172
|
+
Wrapper over :func:`ggplot2_py.save.ggsave` that measures the plot size from
|
|
173
|
+
the built gtable when *width* / *height* are not given. The measurement is
|
|
174
|
+
only well defined when the panels have a fixed size (set via
|
|
175
|
+
``theme(panel.widths=, panel.heights=)`` or ``force_panelsizes()``); with the
|
|
176
|
+
default proportional (*null*) panels the dimension is left undefined and the
|
|
177
|
+
graphics-device size is used.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
*args
|
|
182
|
+
Positional arguments forwarded to :func:`ggsave`; the first is the
|
|
183
|
+
output ``filename`` (R passes ``...`` through verbatim).
|
|
184
|
+
plot : GGPlot or patchwork-like, optional
|
|
185
|
+
The plot to save. If ``None``, the last displayed plot is used
|
|
186
|
+
(``get_last_plot()`` inside ``ggsave``); the size is then measured from
|
|
187
|
+
that same plot.
|
|
188
|
+
width, height : float, optional
|
|
189
|
+
Plot size in *units*. If ``None`` (default) the size is measured from the
|
|
190
|
+
plot; when the plot has no fixed size the value becomes ``None`` and
|
|
191
|
+
``ggsave`` falls back to the device size (R's ``NA``).
|
|
192
|
+
units : {"in", "cm", "mm", "px"}, optional
|
|
193
|
+
Units of supplied *width* / *height*. Default ``"in"``.
|
|
194
|
+
dpi : float, optional
|
|
195
|
+
Resolution in dots per inch (default ``300``). Also the px-to-inch
|
|
196
|
+
divisor when ``units == "px"``.
|
|
197
|
+
**kwargs
|
|
198
|
+
Further keyword arguments forwarded to :func:`ggsave` (e.g. ``device``,
|
|
199
|
+
``path``, ``bg``, ``limitsize``, ``scale``).
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
SavePlotResult
|
|
204
|
+
The output file path (a ``str`` subclass) with ``.width`` and
|
|
205
|
+
``.height`` attributes in inches (``None`` when not measured).
|
|
206
|
+
|
|
207
|
+
Raises
|
|
208
|
+
------
|
|
209
|
+
ValueError
|
|
210
|
+
If *units* is not one of ``"in"``, ``"cm"``, ``"mm"``, ``"px"``.
|
|
211
|
+
|
|
212
|
+
Notes
|
|
213
|
+
-----
|
|
214
|
+
R attaches ``width`` / ``height`` attributes to the returned file name; this
|
|
215
|
+
port exposes them on the returned :class:`SavePlotResult` instead, because a
|
|
216
|
+
plain Python ``str`` cannot carry attributes.
|
|
217
|
+
"""
|
|
218
|
+
units = arg_match0(units, _VALID_UNITS, arg_name="units")
|
|
219
|
+
|
|
220
|
+
# Resolve the plot once so the measured gtable matches the saved plot, even
|
|
221
|
+
# when plot=None defers to the last-displayed plot.
|
|
222
|
+
if plot is None:
|
|
223
|
+
from ggplot2_py.plot import get_last_plot
|
|
224
|
+
|
|
225
|
+
plot = get_last_plot()
|
|
226
|
+
if plot is None:
|
|
227
|
+
cli_abort("No plot to save. Supply `plot` or create a plot first.")
|
|
228
|
+
|
|
229
|
+
gt = _build_gtable(plot)
|
|
230
|
+
|
|
231
|
+
# --- width (save.R L44-53) ---
|
|
232
|
+
if width is None:
|
|
233
|
+
width = _measure_inches(gt.widths, "width")
|
|
234
|
+
else:
|
|
235
|
+
width = _supplied_to_inches(width, units, float(dpi))
|
|
236
|
+
|
|
237
|
+
# --- height (save.R L55-64) ---
|
|
238
|
+
if height is None:
|
|
239
|
+
height = _measure_inches(gt.heights, "height")
|
|
240
|
+
else:
|
|
241
|
+
height = _supplied_to_inches(height, units, float(dpi))
|
|
242
|
+
|
|
243
|
+
from ggplot2_py.save import ggsave
|
|
244
|
+
|
|
245
|
+
out_file = ggsave(
|
|
246
|
+
*args,
|
|
247
|
+
plot=plot,
|
|
248
|
+
width=width,
|
|
249
|
+
height=height,
|
|
250
|
+
units="in",
|
|
251
|
+
dpi=dpi,
|
|
252
|
+
**kwargs,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return SavePlotResult(str(out_file), width=width, height=height)
|
ggh4x/stat_difference.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""Difference ribbon stat (R source: ``ggh4x/R/stat_difference.R``).
|
|
2
|
+
|
|
3
|
+
Port of ggh4x's :func:`stat_difference` and the :class:`StatDifference`
|
|
4
|
+
ggproto object onto the Bio-Babel ``ggplot2_py`` stack.
|
|
5
|
+
|
|
6
|
+
``stat_difference()`` builds a ribbon whose ``fill`` aesthetic encodes the sign
|
|
7
|
+
of the difference ``ymax - ymin`` (or ``xmax - xmin`` when the orientation is
|
|
8
|
+
flipped). The stat re-orders the ``group`` aesthetic so that the positive and
|
|
9
|
+
negative segments of the difference receive distinct fills, and it interpolates
|
|
10
|
+
the exact crossover positions so that the ribbon does not look "stumpy" where
|
|
11
|
+
the two series cross.
|
|
12
|
+
|
|
13
|
+
R reference behaviour was captured live via ``StatDifference$compute_group`` /
|
|
14
|
+
``StatDifference$compute_panel`` / ``StatDifference$setup_params`` and the
|
|
15
|
+
parity tests in ``tests/test_stat_difference.py``.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import Any, Dict, Optional, Sequence, Tuple
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
25
|
+
from ggplot2_py import ggproto_parent
|
|
26
|
+
from ggplot2_py.aes import AfterStat, aes
|
|
27
|
+
from ggplot2_py.stat import Stat, _flip_data, _has_flipped_aes
|
|
28
|
+
|
|
29
|
+
from ._vctrs import data_frame0, vec_rep_each, vec_unrep
|
|
30
|
+
|
|
31
|
+
__all__ = ["StatDifference", "stat_difference"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# ggproto
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
class StatDifference(Stat):
|
|
38
|
+
"""Stat computing a signed difference ribbon between two series.
|
|
39
|
+
|
|
40
|
+
Mirrors the R ``StatDifference`` ggproto object. For each group the stat
|
|
41
|
+
sorts the data by the main (continuous) axis, computes the sign of
|
|
42
|
+
``ymax - ymin``, run-length encodes that sign, and interpolates exact
|
|
43
|
+
crossover positions where the sign changes. A per-run ``id`` marker is
|
|
44
|
+
emitted so that :meth:`compute_panel` can renumber the ``group`` aesthetic
|
|
45
|
+
(one group per monotone-sign run), giving the positive and negative
|
|
46
|
+
segments distinct fills.
|
|
47
|
+
|
|
48
|
+
Attributes
|
|
49
|
+
----------
|
|
50
|
+
required_aes : list of str
|
|
51
|
+
``['x|y', 'ymin|xmin', 'ymax|xmax']`` -- the main axis can be either
|
|
52
|
+
``x`` or ``y`` (orientation aware), with a matching ``min``/``max`` pair.
|
|
53
|
+
default_aes : Mapping
|
|
54
|
+
``aes(fill=after_stat(sign))``.
|
|
55
|
+
extra_params : list of str
|
|
56
|
+
``['na_rm', 'orientation', 'levels']``.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
required_aes = ["x|y", "ymin|xmin", "ymax|xmax"]
|
|
60
|
+
default_aes = aes(fill=AfterStat("sign"))
|
|
61
|
+
extra_params = ["na_rm", "orientation", "levels"]
|
|
62
|
+
|
|
63
|
+
def setup_params(self, data: pd.DataFrame, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
64
|
+
"""Resolve the flip orientation of the stat.
|
|
65
|
+
|
|
66
|
+
Mirrors R::
|
|
67
|
+
|
|
68
|
+
params$flipped_aes <- has_flipped_aes(
|
|
69
|
+
data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
data : pandas.DataFrame
|
|
75
|
+
Layer data.
|
|
76
|
+
params : dict
|
|
77
|
+
Layer parameters.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
dict
|
|
82
|
+
*params* with ``flipped_aes`` set.
|
|
83
|
+
"""
|
|
84
|
+
params["flipped_aes"] = _has_flipped_aes(
|
|
85
|
+
data,
|
|
86
|
+
params,
|
|
87
|
+
main_is_orthogonal=False,
|
|
88
|
+
main_is_continuous=True,
|
|
89
|
+
)
|
|
90
|
+
return params
|
|
91
|
+
|
|
92
|
+
def compute_panel(
|
|
93
|
+
self,
|
|
94
|
+
data: pd.DataFrame,
|
|
95
|
+
scales: Any,
|
|
96
|
+
flipped_aes: bool = False,
|
|
97
|
+
**kwargs: Any,
|
|
98
|
+
) -> pd.DataFrame:
|
|
99
|
+
"""Compute the panel, renumbering groups by sign-run.
|
|
100
|
+
|
|
101
|
+
Flips the data for the requested orientation, delegates to the base
|
|
102
|
+
:meth:`Stat.compute_panel` for the split-by-group dispatch, then sets
|
|
103
|
+
``group = cumsum(id)`` so that each monotone-sign run becomes its own
|
|
104
|
+
group, drops the ``id`` marker, records ``flipped_aes`` and flips back.
|
|
105
|
+
|
|
106
|
+
Mirrors R::
|
|
107
|
+
|
|
108
|
+
data <- flip_data(data, flipped_aes)
|
|
109
|
+
data <- ggproto_parent(Stat, self)$compute_panel(data, scales, ...)
|
|
110
|
+
data$group <- cumsum(data$id)
|
|
111
|
+
data$id <- NULL
|
|
112
|
+
data$flipped_aes <- flipped_aes
|
|
113
|
+
flip_data(data, flipped_aes)
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
data : pandas.DataFrame
|
|
118
|
+
Panel data (one PANEL).
|
|
119
|
+
scales : Any
|
|
120
|
+
Panel scales (``dict``-like or ``None``); unused here, forwarded.
|
|
121
|
+
flipped_aes : bool, optional
|
|
122
|
+
Whether the orientation is flipped (``x``/``y`` swapped).
|
|
123
|
+
**kwargs
|
|
124
|
+
Forwarded to :meth:`compute_group` (e.g. ``levels``, ``na_rm``).
|
|
125
|
+
``orientation`` (injected by ``ggplot2_py``'s ``compute_layer``
|
|
126
|
+
via ``extra_params``) is consumed here and not forwarded, matching
|
|
127
|
+
R where ``compute_layer`` only forwards ``self$parameters()``.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
pandas.DataFrame
|
|
132
|
+
Panel data with renumbered ``group`` and a ``flipped_aes`` column.
|
|
133
|
+
"""
|
|
134
|
+
# ``orientation`` rides along via ggplot2_py's parameters(extra=True);
|
|
135
|
+
# R's compute_group has no such formal, so strip it before delegating.
|
|
136
|
+
kwargs.pop("orientation", None)
|
|
137
|
+
|
|
138
|
+
data = _flip_data(data, flipped_aes)
|
|
139
|
+
data = ggproto_parent(Stat, self).compute_panel(data, scales, **kwargs)
|
|
140
|
+
if data is None or data.empty:
|
|
141
|
+
return data if data is not None else pd.DataFrame()
|
|
142
|
+
data = data.copy()
|
|
143
|
+
data["group"] = np.cumsum(np.asarray(data["id"], dtype=float)).astype(int)
|
|
144
|
+
data = data.drop(columns=["id"])
|
|
145
|
+
data["flipped_aes"] = flipped_aes
|
|
146
|
+
return _flip_data(data, flipped_aes)
|
|
147
|
+
|
|
148
|
+
def compute_group(
|
|
149
|
+
self,
|
|
150
|
+
data: pd.DataFrame,
|
|
151
|
+
scales: Any,
|
|
152
|
+
levels: Sequence[str] = ("+", "-"),
|
|
153
|
+
na_rm: bool = False,
|
|
154
|
+
flipped_aes: bool = False,
|
|
155
|
+
) -> pd.DataFrame:
|
|
156
|
+
"""Compute the signed difference ribbon for a single group.
|
|
157
|
+
|
|
158
|
+
Sorts by ``x``, run-length encodes ``sign(ymax - ymin)``, interpolates
|
|
159
|
+
the crossover ``x`` wherever the sign changes, and assembles a frame
|
|
160
|
+
with interpolated ``ymin``/``ymax`` (via :func:`numpy.interp`), a
|
|
161
|
+
per-run ``id`` marker and a ``sign`` factor labelled with *levels*.
|
|
162
|
+
|
|
163
|
+
Mirrors R ``StatDifference$compute_group`` exactly, including:
|
|
164
|
+
|
|
165
|
+
* crossover formula
|
|
166
|
+
``-y[d]*(x[d+1]-x[d])/(y[d+1]-y[d]) + x[d]`` over run-ends ``d``
|
|
167
|
+
(excluding the last run),
|
|
168
|
+
* trimming the first and last interpolated ``sign`` entries,
|
|
169
|
+
* dropping rows where ``sign == 0`` *after* assembly, and
|
|
170
|
+
* factor coercion ``factor(sign, levels=c("1","-1"), labels=levels)``.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
data : pandas.DataFrame
|
|
175
|
+
One group's data; must contain ``x``, ``ymin``, ``ymax``.
|
|
176
|
+
scales : Any
|
|
177
|
+
Panel scales; unused.
|
|
178
|
+
levels : sequence of str, optional
|
|
179
|
+
Two labels for the ``fill`` factor: ``levels[0]`` when
|
|
180
|
+
``ymax > ymin`` and ``levels[1]`` when ``ymax < ymin``.
|
|
181
|
+
na_rm : bool, optional
|
|
182
|
+
Unused (missing handling occurs upstream); kept for parity.
|
|
183
|
+
flipped_aes : bool, optional
|
|
184
|
+
Unused here (the flip is applied in :meth:`compute_panel`).
|
|
185
|
+
|
|
186
|
+
Returns
|
|
187
|
+
-------
|
|
188
|
+
pandas.DataFrame
|
|
189
|
+
Columns ``x``, ``ymin``, ``ymax``, ``id``, ``sign``.
|
|
190
|
+
"""
|
|
191
|
+
# Sort by x (stable, mirroring R's order()).
|
|
192
|
+
order = np.argsort(data["x"].to_numpy(), kind="stable")
|
|
193
|
+
data = data.iloc[order].reset_index(drop=True)
|
|
194
|
+
|
|
195
|
+
x = data["x"].to_numpy(dtype=float)
|
|
196
|
+
ymin = data["ymin"].to_numpy(dtype=float)
|
|
197
|
+
ymax = data["ymax"].to_numpy(dtype=float)
|
|
198
|
+
n = len(data)
|
|
199
|
+
|
|
200
|
+
y = ymax - ymin
|
|
201
|
+
sign = np.sign(y)
|
|
202
|
+
|
|
203
|
+
# Run-length encode the sign: run values (key) and run lengths (times).
|
|
204
|
+
sign_rle = vec_unrep(sign)
|
|
205
|
+
rle_key = np.asarray(sign_rle["key"].to_numpy(), dtype=float)
|
|
206
|
+
rle_times = sign_rle["times"].to_numpy()
|
|
207
|
+
|
|
208
|
+
# Crossing points at run boundaries (all run-ends except the last).
|
|
209
|
+
ends = np.cumsum(rle_times)
|
|
210
|
+
dups = ends[:-1] # 1-based run-end indices in R; convert below.
|
|
211
|
+
if dups.size:
|
|
212
|
+
# R is 1-based: x[dups] / x[dups + 1]; here dups-1 / dups (0-based).
|
|
213
|
+
d0 = dups.astype(int) - 1
|
|
214
|
+
d1 = dups.astype(int)
|
|
215
|
+
cross = (
|
|
216
|
+
-y[d0] * (x[d1] - x[d0]) / (y[d1] - y[d0]) + x[d0]
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
cross = np.empty(0, dtype=float)
|
|
220
|
+
|
|
221
|
+
# Interpolate ymin/ymax at the doubled crossover positions.
|
|
222
|
+
x_cross = vec_rep_each(cross, 2)
|
|
223
|
+
if x_cross.size:
|
|
224
|
+
ymin_cross = np.interp(x_cross, x, ymin)
|
|
225
|
+
ymax_cross = np.interp(x_cross, x, ymax)
|
|
226
|
+
else:
|
|
227
|
+
ymin_cross = np.empty(0, dtype=float)
|
|
228
|
+
ymax_cross = np.empty(0, dtype=float)
|
|
229
|
+
|
|
230
|
+
# Match metadata: doubled run keys, trimmed at both ends.
|
|
231
|
+
sign_meta = vec_rep_each(rle_key, 2)
|
|
232
|
+
if sign_meta.size >= 2:
|
|
233
|
+
sign_meta = sign_meta[1:-1]
|
|
234
|
+
else:
|
|
235
|
+
sign_meta = np.empty(0, dtype=float)
|
|
236
|
+
|
|
237
|
+
# Per-crossover id marker (0,1 pairs) and order key.
|
|
238
|
+
n_cross = cross.size
|
|
239
|
+
id_cross = np.tile(np.array([0, 1]), n_cross) if n_cross else np.empty(0, dtype=int)
|
|
240
|
+
ord_cross = (np.cumsum(id_cross) + 1) if n_cross else np.empty(0, dtype=int)
|
|
241
|
+
|
|
242
|
+
# Order key for the *original* data rows: each run index repeated by
|
|
243
|
+
# its run length (1-based to match ord_cross domain).
|
|
244
|
+
data_ord = vec_rep_each(np.arange(1, len(rle_times) + 1), rle_times)
|
|
245
|
+
|
|
246
|
+
# id for original rows: 1 for the first row, 0 elsewhere.
|
|
247
|
+
id_data = np.zeros(n, dtype=int)
|
|
248
|
+
if n:
|
|
249
|
+
id_data[0] = 1
|
|
250
|
+
|
|
251
|
+
new = data_frame0(
|
|
252
|
+
x=np.concatenate([x, x_cross]),
|
|
253
|
+
ymin=np.concatenate([ymin, ymin_cross]),
|
|
254
|
+
ymax=np.concatenate([ymax, ymax_cross]),
|
|
255
|
+
ord=np.concatenate([data_ord.astype(float), ord_cross.astype(float)]),
|
|
256
|
+
id=np.concatenate([id_data, id_cross]).astype(int),
|
|
257
|
+
sign=np.concatenate([sign, sign_meta]),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Order by (ord, x) -- stable, mirroring R's order(new$ord, new$x).
|
|
261
|
+
sort_idx = np.lexsort((new["x"].to_numpy(), new["ord"].to_numpy()))
|
|
262
|
+
new = new.iloc[sort_idx].reset_index(drop=True)
|
|
263
|
+
|
|
264
|
+
# Drop zero-difference rows.
|
|
265
|
+
new = new[new["sign"].to_numpy() != 0].reset_index(drop=True)
|
|
266
|
+
|
|
267
|
+
# Factor: numeric sign -> char level "1"/"-1" -> labels levels[0:2].
|
|
268
|
+
new["sign"] = _sign_factor(new["sign"].to_numpy(), levels)
|
|
269
|
+
new = new.drop(columns=["ord"])
|
|
270
|
+
return new
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _sign_factor(sign: np.ndarray, levels: Sequence[str]) -> pd.Categorical:
|
|
274
|
+
"""Coerce numeric signs into a labelled, ordered categorical.
|
|
275
|
+
|
|
276
|
+
Mirrors R ``factor(sign, levels = c("1", "-1"), labels = levels[1:2])``:
|
|
277
|
+
``+1 -> levels[0]``, ``-1 -> levels[1]``, anything else -> ``NaN``. The
|
|
278
|
+
resulting categories are ``[levels[0], levels[1]]`` in that order.
|
|
279
|
+
|
|
280
|
+
Parameters
|
|
281
|
+
----------
|
|
282
|
+
sign : numpy.ndarray
|
|
283
|
+
Numeric signs (typically ``+1`` / ``-1`` after zeros are dropped).
|
|
284
|
+
levels : sequence of str
|
|
285
|
+
Two-element label sequence.
|
|
286
|
+
|
|
287
|
+
Returns
|
|
288
|
+
-------
|
|
289
|
+
pandas.Categorical
|
|
290
|
+
Categorical with categories ``[levels[0], levels[1]]``.
|
|
291
|
+
"""
|
|
292
|
+
cats = [levels[0], levels[1]]
|
|
293
|
+
out = np.full(len(sign), None, dtype=object)
|
|
294
|
+
out[sign == 1] = levels[0]
|
|
295
|
+
out[sign == -1] = levels[1]
|
|
296
|
+
return pd.Categorical(out, categories=cats, ordered=False)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# ---------------------------------------------------------------------------
|
|
300
|
+
# Constructor
|
|
301
|
+
# ---------------------------------------------------------------------------
|
|
302
|
+
def stat_difference(
|
|
303
|
+
mapping: Optional[Any] = None,
|
|
304
|
+
data: Any = None,
|
|
305
|
+
geom: str = "ribbon",
|
|
306
|
+
position: str = "identity",
|
|
307
|
+
*,
|
|
308
|
+
levels: Tuple[str, str] = ("+", "-"),
|
|
309
|
+
na_rm: bool = False,
|
|
310
|
+
orientation: Any = None,
|
|
311
|
+
show_legend: Optional[bool] = None,
|
|
312
|
+
inherit_aes: bool = True,
|
|
313
|
+
**kwargs: Any,
|
|
314
|
+
) -> Any:
|
|
315
|
+
"""Difference ribbon.
|
|
316
|
+
|
|
317
|
+
Makes a ribbon that is filled depending on whether ``ymax`` is higher than
|
|
318
|
+
``ymin``. Useful for displaying differences between two series. The stat may
|
|
319
|
+
reorder the ``group`` aesthetic to accommodate two different fills for the
|
|
320
|
+
signs of the difference, and interpolates the series at crossovers so the
|
|
321
|
+
ribbon does not look stumpy.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
mapping : Mapping, optional
|
|
326
|
+
Aesthetic mapping created with ``aes()``.
|
|
327
|
+
data : DataFrame or callable, optional
|
|
328
|
+
Layer data.
|
|
329
|
+
geom : str, optional
|
|
330
|
+
Geom used to render the stat output. Defaults to ``"ribbon"``.
|
|
331
|
+
position : str, optional
|
|
332
|
+
Position adjustment. Defaults to ``"identity"``.
|
|
333
|
+
levels : tuple of str, optional
|
|
334
|
+
A ``character(2)`` giving factor levels for the ``fill`` aesthetic for
|
|
335
|
+
the cases where (1) ``ymax > ymin`` and (2) ``ymax < ymin``. Defaults to
|
|
336
|
+
``("+", "-")``.
|
|
337
|
+
na_rm : bool, optional
|
|
338
|
+
If ``False``, missing values are removed with a warning. Defaults to
|
|
339
|
+
``False``.
|
|
340
|
+
orientation : {None, "x", "y"}, optional
|
|
341
|
+
The axis the stat should run along. The default (``None``) infers the
|
|
342
|
+
orientation from the aesthetics.
|
|
343
|
+
show_legend : bool, optional
|
|
344
|
+
Whether this layer is included in the legend.
|
|
345
|
+
inherit_aes : bool, optional
|
|
346
|
+
Whether to inherit the plot-level aesthetic mapping.
|
|
347
|
+
**kwargs
|
|
348
|
+
Other arguments passed to the layer (e.g. ``alpha``).
|
|
349
|
+
|
|
350
|
+
Returns
|
|
351
|
+
-------
|
|
352
|
+
Layer
|
|
353
|
+
A layer object that can be added to a plot.
|
|
354
|
+
|
|
355
|
+
Notes
|
|
356
|
+
-----
|
|
357
|
+
When there is a run of more than two zero-difference values, the inner
|
|
358
|
+
values are ignored (matching R).
|
|
359
|
+
|
|
360
|
+
Examples
|
|
361
|
+
--------
|
|
362
|
+
>>> import numpy as np, pandas as pd
|
|
363
|
+
>>> from ggh4x.stat_difference import stat_difference
|
|
364
|
+
>>> rng = np.random.default_rng(0)
|
|
365
|
+
>>> df = pd.DataFrame({
|
|
366
|
+
... "x": np.arange(1, 101),
|
|
367
|
+
... "y": np.cumsum(rng.standard_normal(100)),
|
|
368
|
+
... "z": np.cumsum(rng.standard_normal(100)),
|
|
369
|
+
... })
|
|
370
|
+
>>> layer = stat_difference(aes(x="x", ymin="y", ymax="z"), data=df, alpha=0.3)
|
|
371
|
+
"""
|
|
372
|
+
from ggplot2_py.layer import layer as _layer
|
|
373
|
+
|
|
374
|
+
return _layer(
|
|
375
|
+
data=data,
|
|
376
|
+
mapping=mapping,
|
|
377
|
+
stat=StatDifference,
|
|
378
|
+
geom=geom,
|
|
379
|
+
position=position,
|
|
380
|
+
show_legend=show_legend,
|
|
381
|
+
inherit_aes=inherit_aes,
|
|
382
|
+
params={
|
|
383
|
+
"na_rm": na_rm,
|
|
384
|
+
"orientation": orientation,
|
|
385
|
+
"levels": levels,
|
|
386
|
+
**kwargs,
|
|
387
|
+
},
|
|
388
|
+
)
|