ggh4x-python 0.3.1.9000__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ggh4x/__init__.py +140 -0
- ggh4x/_aimed_text_grob.py +432 -0
- ggh4x/_borrowed_ggplot2.py +273 -0
- ggh4x/_cli.py +84 -0
- ggh4x/_datasets.py +106 -0
- ggh4x/_download.py +111 -0
- ggh4x/_facet_helpers.py +313 -0
- ggh4x/_facet_utils.py +649 -0
- ggh4x/_gap_grobs.py +606 -0
- ggh4x/_registry.py +10 -0
- ggh4x/_rlang.py +93 -0
- ggh4x/_utils.py +150 -0
- ggh4x/_vctrs.py +233 -0
- ggh4x/conveniences.py +601 -0
- ggh4x/coord_axes_inside.py +380 -0
- ggh4x/element_part_rect.py +545 -0
- ggh4x/facet_grid2.py +1018 -0
- ggh4x/facet_manual.py +901 -0
- ggh4x/facet_nested.py +776 -0
- ggh4x/facet_nested_wrap.py +193 -0
- ggh4x/facet_wrap2.py +896 -0
- ggh4x/geom_box.py +536 -0
- ggh4x/geom_outline_point.py +444 -0
- ggh4x/geom_pointpath.py +259 -0
- ggh4x/geom_polygonraster.py +252 -0
- ggh4x/geom_rectrug.py +489 -0
- ggh4x/geom_text_aimed.py +279 -0
- ggh4x/guide_stringlegend.py +354 -0
- ggh4x/help_secondary.py +549 -0
- ggh4x/multiscale/__init__.py +51 -0
- ggh4x/multiscale/_multiscale_add.py +207 -0
- ggh4x/multiscale/scale_listed.py +167 -0
- ggh4x/multiscale/scale_manual.py +478 -0
- ggh4x/multiscale/scale_multi.py +393 -0
- ggh4x/panel_scales/__init__.py +58 -0
- ggh4x/panel_scales/at_panel.py +115 -0
- ggh4x/panel_scales/facetted_pos_scales.py +647 -0
- ggh4x/panel_scales/force_panelsize.py +411 -0
- ggh4x/panel_scales/scale_facet.py +222 -0
- ggh4x/position_disjoint_ranges.py +229 -0
- ggh4x/position_lineartrans.py +242 -0
- ggh4x/py.typed +0 -0
- ggh4x/resources/faithful.csv +273 -0
- ggh4x/resources/iris.csv +151 -0
- ggh4x/resources/mtcars.csv +33 -0
- ggh4x/resources/pressure.csv +20 -0
- ggh4x/resources/volcano.csv +87 -0
- ggh4x/save.py +255 -0
- ggh4x/stat_difference.py +388 -0
- ggh4x/stat_funxy.py +436 -0
- ggh4x/stat_rle.py +290 -0
- ggh4x/stat_rollingkernel.py +369 -0
- ggh4x/stat_theodensity.py +681 -0
- ggh4x/strip_nested.py +448 -0
- ggh4x/strip_split.py +687 -0
- ggh4x/strip_tag.py +636 -0
- ggh4x/strip_themed.py +232 -0
- ggh4x/strip_vanilla.py +1464 -0
- ggh4x/themes.py +31 -0
- ggh4x/themes_ggh4x.py +67 -0
- ggh4x_python-0.3.1.9000.dist-info/METADATA +40 -0
- ggh4x_python-0.3.1.9000.dist-info/RECORD +64 -0
- ggh4x_python-0.3.1.9000.dist-info/WHEEL +4 -0
- ggh4x_python-0.3.1.9000.dist-info/licenses/LICENSE +3 -0
ggh4x/stat_rle.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Run length encoding stat (R source: ggh4x/R/stat_rle.R).
|
|
2
|
+
|
|
3
|
+
Port of ggh4x's :func:`stat_rle` / ``StatRle``. Run length encoding takes a
|
|
4
|
+
vector of values (the ``label`` aesthetic) and, after ordering the data on the
|
|
5
|
+
``x`` aesthetic, computes the lengths of consecutive repeated values, turning
|
|
6
|
+
each run into a rectangle spanning the run's ``x`` extent.
|
|
7
|
+
|
|
8
|
+
In contrast to :func:`base::rle`, ``NA`` values are considered equivalent
|
|
9
|
+
(consecutive ``NA`` form a single run), mirroring ``vctrs::vec_unrep`` as used
|
|
10
|
+
by the R implementation.
|
|
11
|
+
|
|
12
|
+
The computed columns are ``start``, ``end``, ``start_id``, ``end_id``,
|
|
13
|
+
``run_id``, ``runlength`` and ``runvalue``; the geom defaults map these onto a
|
|
14
|
+
``geom_rect`` via :func:`after_stat`.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from ggplot2_py import ggproto_parent # noqa: F401 (kept for parity / future use)
|
|
25
|
+
from ggplot2_py.aes import AfterStat
|
|
26
|
+
from ggplot2_py.layer import layer
|
|
27
|
+
from ggplot2_py.stat import Stat
|
|
28
|
+
|
|
29
|
+
from ._rlang import arg_match0
|
|
30
|
+
|
|
31
|
+
__all__ = ["stat_rle", "StatRle"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# -- helpers ----------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _vec_unrep(x: "np.ndarray | pd.Series | List[Any]") -> pd.DataFrame:
|
|
38
|
+
"""Run-length encode consecutive equal values, ``NA``-as-equal.
|
|
39
|
+
|
|
40
|
+
Faithful reimplementation of ``vctrs::vec_unrep`` for the semantics ggh4x
|
|
41
|
+
relies on: consecutive equal values (including consecutive ``NA`` / ``None``)
|
|
42
|
+
collapse into a single run. This differs from the more general
|
|
43
|
+
:func:`ggh4x._vctrs.vec_unrep`, which compares with ``!=`` and therefore
|
|
44
|
+
splits runs of ``NaN`` (because ``NaN != NaN``).
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
x : numpy.ndarray, pandas.Series, or list
|
|
49
|
+
Input vector. May contain ``NA``/``NaN``/``None``. A pandas
|
|
50
|
+
``Categorical``/factor dtype is preserved in the returned ``key``.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
pandas.DataFrame
|
|
55
|
+
Two columns: ``key`` (the run value in first-appearance order, dtype
|
|
56
|
+
preserved from the input) and ``times`` (``int`` run length).
|
|
57
|
+
|
|
58
|
+
Raises
|
|
59
|
+
------
|
|
60
|
+
ValueError
|
|
61
|
+
If ``x`` is ``None`` (mirrors R's ``vec_unrep(NULL)`` scalar-type error).
|
|
62
|
+
"""
|
|
63
|
+
if x is None:
|
|
64
|
+
raise ValueError("`x` must be a vector, not `None`.")
|
|
65
|
+
|
|
66
|
+
s = x if isinstance(x, pd.Series) else pd.Series(list(x))
|
|
67
|
+
n = len(s)
|
|
68
|
+
if n == 0:
|
|
69
|
+
return pd.DataFrame(
|
|
70
|
+
{
|
|
71
|
+
"key": pd.Series([], dtype=s.dtype),
|
|
72
|
+
"times": pd.Series([], dtype=int),
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Boundaries: a new run starts at position 0 and wherever the current value
|
|
77
|
+
# differs from the previous one, treating NA as equal to a preceding NA.
|
|
78
|
+
cur = s.iloc[1:].reset_index(drop=True)
|
|
79
|
+
prev = s.iloc[:-1].reset_index(drop=True)
|
|
80
|
+
cur_na = cur.isna().to_numpy()
|
|
81
|
+
prev_na = prev.isna().to_numpy()
|
|
82
|
+
# equal if both NA, or (neither NA and values match)
|
|
83
|
+
eq = (cur_na & prev_na) | ((~cur_na) & (~prev_na) & (cur.to_numpy() == prev.to_numpy()))
|
|
84
|
+
|
|
85
|
+
change = np.empty(n, dtype=bool)
|
|
86
|
+
change[0] = True
|
|
87
|
+
change[1:] = ~eq
|
|
88
|
+
idx = np.flatnonzero(change)
|
|
89
|
+
times = np.diff(np.append(idx, n)).astype(int)
|
|
90
|
+
key = s.iloc[idx].reset_index(drop=True)
|
|
91
|
+
return pd.DataFrame({"key": key, "times": times})
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# -- ggproto ----------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class StatRle(Stat):
|
|
98
|
+
"""Run length encoding stat turning runs of ``label`` into rectangles.
|
|
99
|
+
|
|
100
|
+
Notes
|
|
101
|
+
-----
|
|
102
|
+
Port of ``StatRle`` (``ggh4x/R/stat_rle.R`` L113-162). The data is ordered
|
|
103
|
+
on ``x`` and the ``label`` aesthetic is run-length encoded (``NA``-as-equal).
|
|
104
|
+
Four ``align`` modes (``none``/``centre``/``start``/``end``) control how the
|
|
105
|
+
``start``/``end`` ``x`` positions of each run are computed.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
required_aes: List[str] = ["x", "label"]
|
|
109
|
+
default_aes: Dict[str, Any] = {
|
|
110
|
+
"xmin": AfterStat("start"),
|
|
111
|
+
"xmax": AfterStat("end"),
|
|
112
|
+
"ymin": AfterStat(lambda d: np.full(len(d), -np.inf)),
|
|
113
|
+
"ymax": AfterStat(lambda d: np.full(len(d), np.inf)),
|
|
114
|
+
"fill": AfterStat("runvalue"),
|
|
115
|
+
}
|
|
116
|
+
dropped_aes: List[str] = ["x", "label"]
|
|
117
|
+
extra_params: List[str] = ["na_rm", "orientation", "align"]
|
|
118
|
+
|
|
119
|
+
def setup_params(self, data: pd.DataFrame, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
120
|
+
"""Set ``flipped_aes`` from the requested orientation.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
data : pandas.DataFrame
|
|
125
|
+
Layer data (unused; present for signature parity).
|
|
126
|
+
params : dict
|
|
127
|
+
Stat parameters. Read ``orientation``.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
dict
|
|
132
|
+
``params`` with ``flipped_aes`` set to ``orientation == 'y'``.
|
|
133
|
+
"""
|
|
134
|
+
params["flipped_aes"] = params.get("orientation") == "y"
|
|
135
|
+
return params
|
|
136
|
+
|
|
137
|
+
def compute_group(
|
|
138
|
+
self,
|
|
139
|
+
data: pd.DataFrame,
|
|
140
|
+
scales: Any = None,
|
|
141
|
+
flipped_aes: bool = False,
|
|
142
|
+
align: str = "none",
|
|
143
|
+
**kwargs: Any,
|
|
144
|
+
) -> pd.DataFrame:
|
|
145
|
+
"""Run-length encode ``label`` and compute per-run ``x`` extents.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
data : pandas.DataFrame
|
|
150
|
+
Group data, must contain ``x`` and ``label`` columns.
|
|
151
|
+
scales : Any, optional
|
|
152
|
+
Panel scales (unused; present for signature parity).
|
|
153
|
+
flipped_aes : bool, optional
|
|
154
|
+
Orientation flag (unused by the computation; kept for parity).
|
|
155
|
+
align : {'none', 'centre', 'start', 'end'}, optional
|
|
156
|
+
How to derive ``start``/``end`` positions:
|
|
157
|
+
|
|
158
|
+
* ``none`` -- exact ``x`` at run boundaries.
|
|
159
|
+
* ``centre`` -- midpoint between a run boundary and its neighbour.
|
|
160
|
+
* ``start`` -- align run starts to the previous run's end.
|
|
161
|
+
* ``end`` -- align run ends to the next run's start.
|
|
162
|
+
**kwargs : Any
|
|
163
|
+
Ignored extra parameters.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
pandas.DataFrame
|
|
168
|
+
Columns ``start``, ``end``, ``start_id``, ``end_id``, ``run_id``,
|
|
169
|
+
``runlength`` and ``runvalue``.
|
|
170
|
+
"""
|
|
171
|
+
# Order on x (stable, mirroring R's order()).
|
|
172
|
+
order = np.argsort(data["x"].to_numpy(), kind="stable")
|
|
173
|
+
data = data.iloc[order].reset_index(drop=True)
|
|
174
|
+
n = len(data)
|
|
175
|
+
|
|
176
|
+
run = _vec_unrep(data["label"])
|
|
177
|
+
times = run["times"].to_numpy()
|
|
178
|
+
|
|
179
|
+
# 1-based boundary indices (R semantics).
|
|
180
|
+
end_id = np.cumsum(times)
|
|
181
|
+
start_id = end_id - times + 1
|
|
182
|
+
|
|
183
|
+
xvals = data["x"].to_numpy()
|
|
184
|
+
|
|
185
|
+
def _x(idx_1based: np.ndarray) -> np.ndarray:
|
|
186
|
+
# idx_1based is a 1-based index array (already clamped to [1, n]).
|
|
187
|
+
return xvals[idx_1based.astype(int) - 1]
|
|
188
|
+
|
|
189
|
+
if align == "centre":
|
|
190
|
+
start = (
|
|
191
|
+
_x(np.maximum(start_id, 1)) + _x(np.maximum(start_id - 1, 1))
|
|
192
|
+
) / 2.0
|
|
193
|
+
end = (_x(np.minimum(end_id, n)) + _x(np.minimum(end_id + 1, n))) / 2.0
|
|
194
|
+
elif align == "end":
|
|
195
|
+
start = _x(np.maximum(start_id - 1, 1))
|
|
196
|
+
end = _x(end_id)
|
|
197
|
+
elif align == "start":
|
|
198
|
+
start = _x(start_id)
|
|
199
|
+
end = _x(np.minimum(end_id + 1, n))
|
|
200
|
+
else: # "none"
|
|
201
|
+
start = _x(start_id)
|
|
202
|
+
end = _x(end_id)
|
|
203
|
+
|
|
204
|
+
run_id = np.arange(1, len(run) + 1)
|
|
205
|
+
|
|
206
|
+
return pd.DataFrame(
|
|
207
|
+
{
|
|
208
|
+
"start": start,
|
|
209
|
+
"end": end,
|
|
210
|
+
"start_id": start_id.astype(int),
|
|
211
|
+
"end_id": end_id.astype(int),
|
|
212
|
+
"run_id": run_id.astype(int),
|
|
213
|
+
"runlength": times.astype(int),
|
|
214
|
+
"runvalue": run["key"].reset_index(drop=True),
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# -- constructor ------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def stat_rle(
|
|
223
|
+
mapping: Optional[Any] = None,
|
|
224
|
+
data: Optional[Any] = None,
|
|
225
|
+
geom: str = "rect",
|
|
226
|
+
position: str = "identity",
|
|
227
|
+
*,
|
|
228
|
+
align: str = "none",
|
|
229
|
+
na_rm: bool = False,
|
|
230
|
+
orientation: str = "x",
|
|
231
|
+
show_legend: Optional[bool] = None,
|
|
232
|
+
inherit_aes: bool = True,
|
|
233
|
+
**kwargs: Any,
|
|
234
|
+
) -> Any:
|
|
235
|
+
"""Run length encoding layer.
|
|
236
|
+
|
|
237
|
+
Run length encoding takes a vector of values (the ``label`` aesthetic) and
|
|
238
|
+
calculates the lengths of consecutive repeated values, after ordering the
|
|
239
|
+
data on ``x``. Each run is rendered as a rectangle by default.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
mapping : Mapping, optional
|
|
244
|
+
Aesthetic mapping. Requires ``x`` and ``label``.
|
|
245
|
+
data : DataFrame or callable, optional
|
|
246
|
+
Layer data.
|
|
247
|
+
geom : str, optional
|
|
248
|
+
Geom to use. Defaults to ``"rect"``.
|
|
249
|
+
position : str, optional
|
|
250
|
+
Position adjustment. Defaults to ``"identity"``.
|
|
251
|
+
align : {'none', 'centre', 'center', 'start', 'end'}, optional
|
|
252
|
+
Effects the computed ``start`` and ``end`` variables. ``"center"`` is
|
|
253
|
+
normalised to ``"centre"``. Defaults to ``"none"``.
|
|
254
|
+
na_rm : bool, optional
|
|
255
|
+
If ``False`` (default), missing values are removed with a warning.
|
|
256
|
+
orientation : {'x', 'y'}, optional
|
|
257
|
+
Orientation of the stat. Defaults to ``"x"``.
|
|
258
|
+
show_legend : bool, optional
|
|
259
|
+
Whether to show a legend for this layer.
|
|
260
|
+
inherit_aes : bool, optional
|
|
261
|
+
Whether to inherit aesthetics from the plot. Defaults to ``True``.
|
|
262
|
+
**kwargs : Any
|
|
263
|
+
Additional parameters passed to the layer.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
Layer
|
|
268
|
+
A ggplot2_py layer.
|
|
269
|
+
"""
|
|
270
|
+
align = arg_match0(align, ["none", "centre", "center", "start", "end"], "align")
|
|
271
|
+
if align == "center":
|
|
272
|
+
align = "centre"
|
|
273
|
+
|
|
274
|
+
params: Dict[str, Any] = {
|
|
275
|
+
"na_rm": na_rm,
|
|
276
|
+
"orientation": orientation,
|
|
277
|
+
"align": align,
|
|
278
|
+
}
|
|
279
|
+
params.update(kwargs)
|
|
280
|
+
|
|
281
|
+
return layer(
|
|
282
|
+
data=data,
|
|
283
|
+
mapping=mapping,
|
|
284
|
+
stat=StatRle,
|
|
285
|
+
geom=geom,
|
|
286
|
+
position=position,
|
|
287
|
+
show_legend=show_legend,
|
|
288
|
+
inherit_aes=inherit_aes,
|
|
289
|
+
params=params,
|
|
290
|
+
)
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
"""Rolling-kernel smoother stat.
|
|
2
|
+
|
|
3
|
+
Python port of ``stat_roll.R`` from the R package **ggh4x**
|
|
4
|
+
(``StatRollingkernel`` ggproto object, the ``stat_rollingkernel`` constructor,
|
|
5
|
+
and the three kernel helpers ``.kernel_norm`` / ``.kernel_unif`` /
|
|
6
|
+
``.kernel_cauchy``).
|
|
7
|
+
|
|
8
|
+
A rolling kernel moves along one of the axes and assigns weights to datapoints
|
|
9
|
+
depending on the distance to the kernel's location. It then computes a weighted
|
|
10
|
+
average of the y-values, creating a trendline. Unlike a (weighted) rolling
|
|
11
|
+
average, the spacing between datapoints need not be constant.
|
|
12
|
+
|
|
13
|
+
The computation follows the R source verbatim:
|
|
14
|
+
|
|
15
|
+
#. ``flip_data`` the input when ``orientation == "y"``.
|
|
16
|
+
#. Resolve the bandwidth: a :class:`~ggplot2_py.Rel` object becomes
|
|
17
|
+
``bw.value * diff(range(x))``; a string names one of the ``stats::bw.*``
|
|
18
|
+
rules (delegated to :func:`ggplot2_py.stat._precompute_bw`).
|
|
19
|
+
#. Drop non-finite ``x``/``y`` rows.
|
|
20
|
+
#. Build an evaluation sequence of length ``n`` spanning
|
|
21
|
+
``mid +/- (1 + expand) * 0.5 * diff(range(x))``.
|
|
22
|
+
#. Form the outer difference matrix ``outer(x, seq, "-")``, apply the kernel,
|
|
23
|
+
column-normalize by ``colSums`` (this yields ``NaN`` columns where the kernel
|
|
24
|
+
has zero total weight, exactly as in R), multiply by ``y`` and sum down the
|
|
25
|
+
columns to obtain the smoothed value.
|
|
26
|
+
#. ``flip_data`` the result back.
|
|
27
|
+
|
|
28
|
+
The kernels mirror ``stats::dnorm`` / ``stats::dunif`` / ``stats::dcauchy``
|
|
29
|
+
through ``scipy.stats`` PDFs.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from typing import Any, Callable, Dict, Optional
|
|
35
|
+
|
|
36
|
+
import numpy as np
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
from ggplot2_py import Rel
|
|
40
|
+
from ggplot2_py.layer import layer as _layer
|
|
41
|
+
from ggplot2_py.stat import Stat, _flip_data, _precompute_bw
|
|
42
|
+
|
|
43
|
+
from ._cli import cli_abort
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"StatRollingkernel",
|
|
47
|
+
"stat_rollingkernel",
|
|
48
|
+
"_kernel_norm",
|
|
49
|
+
"_kernel_unif",
|
|
50
|
+
"_kernel_cauchy",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Kernels
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
def _kernel_norm(x: np.ndarray, bw: float) -> np.ndarray:
|
|
59
|
+
"""Gaussian kernel.
|
|
60
|
+
|
|
61
|
+
Port of ``.kernel_norm <- function(x, bw) dnorm(x, sd = bw)``.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
x : numpy.ndarray
|
|
66
|
+
Distances from the kernel location.
|
|
67
|
+
bw : float
|
|
68
|
+
Bandwidth, used as the standard deviation.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
numpy.ndarray
|
|
73
|
+
Relative weights, i.e. ``dnorm(x, sd = bw)``.
|
|
74
|
+
"""
|
|
75
|
+
from scipy import stats as _st
|
|
76
|
+
|
|
77
|
+
return _st.norm.pdf(x, scale=bw)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _kernel_unif(x: np.ndarray, bw: float) -> np.ndarray:
|
|
81
|
+
"""Uniform kernel.
|
|
82
|
+
|
|
83
|
+
Port of ``.kernel_unif <- function(x, bw) dunif(x, min = -0.5 * bw,
|
|
84
|
+
max = 0.5 * bw)``. Equivalent to a simple unweighted moving average.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
x : numpy.ndarray
|
|
89
|
+
Distances from the kernel location.
|
|
90
|
+
bw : float
|
|
91
|
+
Bandwidth; the uniform support is ``[-0.5 * bw, 0.5 * bw]``.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
numpy.ndarray
|
|
96
|
+
Relative weights, i.e. ``dunif(x, -0.5 * bw, 0.5 * bw)``.
|
|
97
|
+
"""
|
|
98
|
+
from scipy import stats as _st
|
|
99
|
+
|
|
100
|
+
# scipy's ``uniform`` is parameterised by (loc, scale) == (min, max - min).
|
|
101
|
+
# Its support is the closed interval [loc, loc + scale], matching R's
|
|
102
|
+
# ``dunif`` which returns 1/(max-min) at both endpoints.
|
|
103
|
+
return _st.uniform.pdf(x, loc=-0.5 * bw, scale=bw)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _kernel_cauchy(x: np.ndarray, bw: float) -> np.ndarray:
|
|
107
|
+
"""Cauchy kernel.
|
|
108
|
+
|
|
109
|
+
Port of ``.kernel_cauchy <- function(x, bw) dcauchy(x, scale = bw)``. The
|
|
110
|
+
Cauchy distribution has fatter tails than the normal distribution.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
x : numpy.ndarray
|
|
115
|
+
Distances from the kernel location.
|
|
116
|
+
bw : float
|
|
117
|
+
Bandwidth, used as the scale parameter (location fixed at 0).
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
numpy.ndarray
|
|
122
|
+
Relative weights, i.e. ``dcauchy(x, scale = bw)``.
|
|
123
|
+
"""
|
|
124
|
+
from scipy import stats as _st
|
|
125
|
+
|
|
126
|
+
return _st.cauchy.pdf(x, scale=bw)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Mapping from R ``switch`` kernel names to the callable kernels.
|
|
130
|
+
_KERNEL_LOOKUP: Dict[str, Callable[[np.ndarray, float], np.ndarray]] = {
|
|
131
|
+
"gaussian": _kernel_norm,
|
|
132
|
+
"norm": _kernel_norm,
|
|
133
|
+
"unif": _kernel_unif,
|
|
134
|
+
"mean": _kernel_unif,
|
|
135
|
+
"cauchy": _kernel_cauchy,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
# ggproto
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
class StatRollingkernel(Stat):
|
|
144
|
+
"""Rolling-kernel smoother (port of ``StatRollingkernel``).
|
|
145
|
+
|
|
146
|
+
Computed variables
|
|
147
|
+
------------------
|
|
148
|
+
x : float
|
|
149
|
+
A sequence of ordered x positions.
|
|
150
|
+
y : float
|
|
151
|
+
The weighted value of the rolling kernel.
|
|
152
|
+
weight : float
|
|
153
|
+
The sum of weight strengths at a position.
|
|
154
|
+
scaled : float
|
|
155
|
+
``weight / sum(weight)`` by group.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
required_aes = ["x", "y"]
|
|
159
|
+
extra_params = ["na_rm", "orientation"]
|
|
160
|
+
|
|
161
|
+
def setup_params(
|
|
162
|
+
self, data: pd.DataFrame, params: Dict[str, Any]
|
|
163
|
+
) -> Dict[str, Any]:
|
|
164
|
+
"""Resolve orientation and the kernel callable.
|
|
165
|
+
|
|
166
|
+
Mirrors the R ``setup_params`` (which carries an unused third
|
|
167
|
+
``scales`` argument). Sets ``flipped_aes`` from ``orientation`` and
|
|
168
|
+
resolves a string ``kernel`` into a callable.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
data : pandas.DataFrame
|
|
173
|
+
Layer data (unused, kept for signature parity).
|
|
174
|
+
params : dict
|
|
175
|
+
Layer parameters.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
dict
|
|
180
|
+
Updated parameters with ``flipped_aes`` set and ``kernel``
|
|
181
|
+
resolved to a callable.
|
|
182
|
+
|
|
183
|
+
Raises
|
|
184
|
+
------
|
|
185
|
+
Exception
|
|
186
|
+
Via :func:`ggh4x._cli.cli_abort` if the kernel name is unknown.
|
|
187
|
+
"""
|
|
188
|
+
params["flipped_aes"] = params.get("orientation") == "y"
|
|
189
|
+
kernel = params.get("kernel")
|
|
190
|
+
if isinstance(kernel, str):
|
|
191
|
+
resolved = _KERNEL_LOOKUP.get(kernel)
|
|
192
|
+
if resolved is None:
|
|
193
|
+
cli_abort(f"Unknown kernel specification: {kernel}.")
|
|
194
|
+
params["kernel"] = resolved
|
|
195
|
+
return params
|
|
196
|
+
|
|
197
|
+
def compute_group(
|
|
198
|
+
self,
|
|
199
|
+
data: pd.DataFrame,
|
|
200
|
+
scales: Any,
|
|
201
|
+
n: int = 256,
|
|
202
|
+
bw: Any = 0.02,
|
|
203
|
+
expand: float = 0,
|
|
204
|
+
kernel: Callable[[np.ndarray, float], np.ndarray] = _kernel_norm,
|
|
205
|
+
flipped_aes: bool = False,
|
|
206
|
+
) -> pd.DataFrame:
|
|
207
|
+
"""Compute the rolling-kernel trendline for one group.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
data : pandas.DataFrame
|
|
212
|
+
Group data with at least ``x`` and ``y`` columns.
|
|
213
|
+
scales : Any
|
|
214
|
+
Panel scales (unused).
|
|
215
|
+
n : int, default 256
|
|
216
|
+
Number of evaluation points to return.
|
|
217
|
+
bw : float, str, or ggplot2_py.Rel, default 0.02
|
|
218
|
+
Bandwidth. A :class:`~ggplot2_py.Rel` becomes
|
|
219
|
+
``bw.value * diff(range(x))``; a string names a ``bw.*`` rule.
|
|
220
|
+
expand : float, default 0
|
|
221
|
+
Fraction by which to expand the evaluation range beyond the data.
|
|
222
|
+
kernel : callable, default :func:`_kernel_norm`
|
|
223
|
+
Kernel ``f(distances, bw) -> weights``.
|
|
224
|
+
flipped_aes : bool, default False
|
|
225
|
+
Whether x/y are swapped (orientation ``"y"``).
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
pandas.DataFrame
|
|
230
|
+
Columns ``x``, ``y``, ``weight`` and ``scaled`` (flipped back when
|
|
231
|
+
``flipped_aes`` is true).
|
|
232
|
+
"""
|
|
233
|
+
data = _flip_data(data, flipped_aes)
|
|
234
|
+
|
|
235
|
+
x_full = np.asarray(data["x"], dtype=float)
|
|
236
|
+
|
|
237
|
+
# -- Resolve bandwidth -------------------------------------------------
|
|
238
|
+
if isinstance(bw, Rel):
|
|
239
|
+
bw = bw.value * (np.nanmax(x_full) - np.nanmin(x_full))
|
|
240
|
+
elif isinstance(bw, str):
|
|
241
|
+
# ``_precompute_bw`` maps nrd0/nrd/sj/sj-ste/sj-dpi/ucv/bcv.
|
|
242
|
+
lname = bw.lower()
|
|
243
|
+
if lname not in ("nrd0", "nrd", "ucv", "bcv", "sj", "sj-ste", "sj-dpi"):
|
|
244
|
+
cli_abort(f"Unknown bandwidth rule: {bw}.")
|
|
245
|
+
bw = _precompute_bw(x_full, lname)
|
|
246
|
+
bw = float(bw)
|
|
247
|
+
|
|
248
|
+
# -- Drop non-finite rows ---------------------------------------------
|
|
249
|
+
x = np.asarray(data["x"], dtype=float)
|
|
250
|
+
y = np.asarray(data["y"], dtype=float)
|
|
251
|
+
keep = np.isfinite(x) & np.isfinite(y)
|
|
252
|
+
x = x[keep]
|
|
253
|
+
y = y[keep]
|
|
254
|
+
|
|
255
|
+
# -- Evaluation sequence ----------------------------------------------
|
|
256
|
+
lo = float(np.min(x))
|
|
257
|
+
hi = float(np.max(x))
|
|
258
|
+
mid = (lo + hi) / 2.0
|
|
259
|
+
half = (1.0 + expand) * (0.5 * (hi - lo))
|
|
260
|
+
seq_range = np.linspace(mid - half, mid + half, int(n))
|
|
261
|
+
|
|
262
|
+
# -- Kernel-weighted average ------------------------------------------
|
|
263
|
+
# krnl[i, j] = x[i] - seq_range[j] (R: outer(data$x, seq_range, "-"))
|
|
264
|
+
krnl = x[:, None] - seq_range[None, :]
|
|
265
|
+
krnl = kernel(krnl, bw)
|
|
266
|
+
krnl = np.asarray(krnl, dtype=float)
|
|
267
|
+
|
|
268
|
+
# weight = colSums(krnl); column-normalise (0/0 -> NaN, as in R).
|
|
269
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
270
|
+
weight = krnl.sum(axis=0)
|
|
271
|
+
krnl = krnl / weight[None, :]
|
|
272
|
+
krnl = krnl * y[:, None]
|
|
273
|
+
y_out = krnl.sum(axis=0)
|
|
274
|
+
|
|
275
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
276
|
+
scaled = weight / weight.sum()
|
|
277
|
+
|
|
278
|
+
out = pd.DataFrame(
|
|
279
|
+
{
|
|
280
|
+
"x": seq_range,
|
|
281
|
+
"y": y_out,
|
|
282
|
+
"weight": weight,
|
|
283
|
+
"scaled": scaled,
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
return _flip_data(out, flipped_aes)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# ---------------------------------------------------------------------------
|
|
290
|
+
# Constructor
|
|
291
|
+
# ---------------------------------------------------------------------------
|
|
292
|
+
|
|
293
|
+
def stat_rollingkernel(
|
|
294
|
+
mapping: Optional[Any] = None,
|
|
295
|
+
data: Any = None,
|
|
296
|
+
geom: str = "line",
|
|
297
|
+
position: str = "identity",
|
|
298
|
+
*,
|
|
299
|
+
bw: Any = "nrd",
|
|
300
|
+
kernel: Any = "gaussian",
|
|
301
|
+
n: int = 256,
|
|
302
|
+
expand: float = 0.1,
|
|
303
|
+
na_rm: bool = False,
|
|
304
|
+
orientation: str = "x",
|
|
305
|
+
show_legend: Optional[bool] = None,
|
|
306
|
+
inherit_aes: bool = True,
|
|
307
|
+
**kwargs: Any,
|
|
308
|
+
) -> Any:
|
|
309
|
+
"""Rolling-kernel trendline layer.
|
|
310
|
+
|
|
311
|
+
A rolling kernel moves along one of the axes, assigns distance-based
|
|
312
|
+
weights to datapoints and computes a weighted average of the y-values,
|
|
313
|
+
producing a trendline.
|
|
314
|
+
|
|
315
|
+
Parameters
|
|
316
|
+
----------
|
|
317
|
+
mapping : aes, optional
|
|
318
|
+
Aesthetic mapping.
|
|
319
|
+
data : DataFrame or callable, optional
|
|
320
|
+
Layer data.
|
|
321
|
+
geom : str, default ``"line"``
|
|
322
|
+
Geom used to draw the trendline.
|
|
323
|
+
position : str, default ``"identity"``
|
|
324
|
+
Position adjustment.
|
|
325
|
+
bw : float, str, or ggplot2_py.Rel, default ``"nrd"``
|
|
326
|
+
Bandwidth. One of: a numeric kernel width in data units; a
|
|
327
|
+
:class:`~ggplot2_py.Rel` for a width relative to the group data range;
|
|
328
|
+
or a string naming one of the ``stats::bw.nrd`` family of rules.
|
|
329
|
+
kernel : str or callable, default ``"gaussian"``
|
|
330
|
+
Either a callable ``f(distances, bw) -> weights`` or one of
|
|
331
|
+
``"gaussian"``/``"norm"``, ``"unif"``/``"mean"`` or ``"cauchy"``.
|
|
332
|
+
n : int, default 256
|
|
333
|
+
Number of points to return per group.
|
|
334
|
+
expand : float, default 0.1
|
|
335
|
+
How much to expand the evaluation range beyond the extreme datapoints.
|
|
336
|
+
na_rm : bool, default False
|
|
337
|
+
Whether to silently remove missing values.
|
|
338
|
+
orientation : str, default ``"x"``
|
|
339
|
+
Axis along which the rolling occurs, either ``"x"`` or ``"y"``.
|
|
340
|
+
show_legend : bool, optional
|
|
341
|
+
Whether to show a legend.
|
|
342
|
+
inherit_aes : bool, default True
|
|
343
|
+
Whether to inherit aesthetics from the plot.
|
|
344
|
+
**kwargs
|
|
345
|
+
Additional parameters passed to the layer.
|
|
346
|
+
|
|
347
|
+
Returns
|
|
348
|
+
-------
|
|
349
|
+
Layer
|
|
350
|
+
A ggplot2 layer ggproto object.
|
|
351
|
+
"""
|
|
352
|
+
return _layer(
|
|
353
|
+
data=data,
|
|
354
|
+
mapping=mapping,
|
|
355
|
+
stat=StatRollingkernel,
|
|
356
|
+
geom=geom,
|
|
357
|
+
position=position,
|
|
358
|
+
show_legend=show_legend,
|
|
359
|
+
inherit_aes=inherit_aes,
|
|
360
|
+
params={
|
|
361
|
+
"bw": bw,
|
|
362
|
+
"kernel": kernel,
|
|
363
|
+
"n": n,
|
|
364
|
+
"expand": expand,
|
|
365
|
+
"na_rm": na_rm,
|
|
366
|
+
"orientation": orientation,
|
|
367
|
+
**kwargs,
|
|
368
|
+
},
|
|
369
|
+
)
|