ggh4x-python 0.3.1.9000__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ggh4x/__init__.py +140 -0
- ggh4x/_aimed_text_grob.py +432 -0
- ggh4x/_borrowed_ggplot2.py +273 -0
- ggh4x/_cli.py +84 -0
- ggh4x/_datasets.py +106 -0
- ggh4x/_download.py +111 -0
- ggh4x/_facet_helpers.py +313 -0
- ggh4x/_facet_utils.py +649 -0
- ggh4x/_gap_grobs.py +606 -0
- ggh4x/_registry.py +10 -0
- ggh4x/_rlang.py +93 -0
- ggh4x/_utils.py +150 -0
- ggh4x/_vctrs.py +233 -0
- ggh4x/conveniences.py +601 -0
- ggh4x/coord_axes_inside.py +380 -0
- ggh4x/element_part_rect.py +545 -0
- ggh4x/facet_grid2.py +1018 -0
- ggh4x/facet_manual.py +901 -0
- ggh4x/facet_nested.py +776 -0
- ggh4x/facet_nested_wrap.py +193 -0
- ggh4x/facet_wrap2.py +896 -0
- ggh4x/geom_box.py +536 -0
- ggh4x/geom_outline_point.py +444 -0
- ggh4x/geom_pointpath.py +259 -0
- ggh4x/geom_polygonraster.py +252 -0
- ggh4x/geom_rectrug.py +489 -0
- ggh4x/geom_text_aimed.py +279 -0
- ggh4x/guide_stringlegend.py +354 -0
- ggh4x/help_secondary.py +549 -0
- ggh4x/multiscale/__init__.py +51 -0
- ggh4x/multiscale/_multiscale_add.py +207 -0
- ggh4x/multiscale/scale_listed.py +167 -0
- ggh4x/multiscale/scale_manual.py +478 -0
- ggh4x/multiscale/scale_multi.py +393 -0
- ggh4x/panel_scales/__init__.py +58 -0
- ggh4x/panel_scales/at_panel.py +115 -0
- ggh4x/panel_scales/facetted_pos_scales.py +647 -0
- ggh4x/panel_scales/force_panelsize.py +411 -0
- ggh4x/panel_scales/scale_facet.py +222 -0
- ggh4x/position_disjoint_ranges.py +229 -0
- ggh4x/position_lineartrans.py +242 -0
- ggh4x/py.typed +0 -0
- ggh4x/resources/faithful.csv +273 -0
- ggh4x/resources/iris.csv +151 -0
- ggh4x/resources/mtcars.csv +33 -0
- ggh4x/resources/pressure.csv +20 -0
- ggh4x/resources/volcano.csv +87 -0
- ggh4x/save.py +255 -0
- ggh4x/stat_difference.py +388 -0
- ggh4x/stat_funxy.py +436 -0
- ggh4x/stat_rle.py +290 -0
- ggh4x/stat_rollingkernel.py +369 -0
- ggh4x/stat_theodensity.py +681 -0
- ggh4x/strip_nested.py +448 -0
- ggh4x/strip_split.py +687 -0
- ggh4x/strip_tag.py +636 -0
- ggh4x/strip_themed.py +232 -0
- ggh4x/strip_vanilla.py +1464 -0
- ggh4x/themes.py +31 -0
- ggh4x/themes_ggh4x.py +67 -0
- ggh4x_python-0.3.1.9000.dist-info/METADATA +40 -0
- ggh4x_python-0.3.1.9000.dist-info/RECORD +64 -0
- ggh4x_python-0.3.1.9000.dist-info/WHEEL +4 -0
- ggh4x_python-0.3.1.9000.dist-info/licenses/LICENSE +3 -0
ggh4x/conveniences.py
ADDED
|
@@ -0,0 +1,601 @@
|
|
|
1
|
+
"""Convenience helpers (R source: conveniences.R, plus ``sep_discrete`` from scale_manual.R).
|
|
2
|
+
|
|
3
|
+
Ports of ggh4x's small user-facing convenience functions:
|
|
4
|
+
|
|
5
|
+
* :func:`distribute_args` -- vectorised argument distributor that calls a function once per
|
|
6
|
+
"column" of arguments.
|
|
7
|
+
* :func:`elem_list_text` / :func:`elem_list_rect` -- thin wrappers over
|
|
8
|
+
:func:`distribute_args` that build lists of ``element_text`` / ``element_rect`` theme
|
|
9
|
+
elements.
|
|
10
|
+
* :func:`weave_factors` -- an ``interaction``-like factor builder with first-input-priority
|
|
11
|
+
level ordering.
|
|
12
|
+
* :func:`center_limits` -- a function factory producing symmetric scale limits.
|
|
13
|
+
* :func:`sep_discrete` -- a function factory that maps separator-delimited discrete labels to
|
|
14
|
+
numeric positions (R source: scale_manual.R).
|
|
15
|
+
|
|
16
|
+
All semantics were verified against a live R ``ggh4x`` session (level ordering, NA handling,
|
|
17
|
+
length-1 vs. length-n behaviour, and the run-length grouping in ``sep_discrete``).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import inspect
|
|
23
|
+
from itertools import product
|
|
24
|
+
from typing import Any, Callable, List, Sequence
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
|
|
29
|
+
from ._cli import cli_abort
|
|
30
|
+
from ._vctrs import vec_unrep
|
|
31
|
+
|
|
32
|
+
try: # pragma: no cover - import guard; ggplot2_py is always present in this env
|
|
33
|
+
from ggplot2_py.theme_elements import element_rect, element_text
|
|
34
|
+
except ImportError: # pragma: no cover
|
|
35
|
+
element_text = None # type: ignore[assignment]
|
|
36
|
+
element_rect = None # type: ignore[assignment]
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"distribute_args",
|
|
40
|
+
"elem_list_text",
|
|
41
|
+
"elem_list_rect",
|
|
42
|
+
"weave_factors",
|
|
43
|
+
"center_limits",
|
|
44
|
+
"sep_discrete",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# -- distribute_args -------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _is_na_scalar(x: Any) -> bool:
|
|
52
|
+
"""Return ``True`` for a scalar that represents R's ``NA`` / Python missing value.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
x : Any
|
|
57
|
+
Candidate scalar.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
bool
|
|
62
|
+
``True`` if *x* is ``None`` or a float/NaT-style NaN.
|
|
63
|
+
"""
|
|
64
|
+
if x is None:
|
|
65
|
+
return True
|
|
66
|
+
if isinstance(x, float) and np.isnan(x):
|
|
67
|
+
return True
|
|
68
|
+
# numpy scalar NaN
|
|
69
|
+
if isinstance(x, np.floating) and np.isnan(x):
|
|
70
|
+
return True
|
|
71
|
+
try:
|
|
72
|
+
return bool(pd.isna(x))
|
|
73
|
+
except (TypeError, ValueError):
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _cell_is_atomic(cell: Any) -> bool:
|
|
78
|
+
"""Mirror R ``is.vector`` for a distribute_args matrix cell.
|
|
79
|
+
|
|
80
|
+
R only NA-collapses *vector* cells (atomic vectors / bare lists). Classed objects such
|
|
81
|
+
as ``margin`` and theme elements are not vectors and are left untouched.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
cell : Any
|
|
86
|
+
A single cell value.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
bool
|
|
91
|
+
``True`` when *cell* should be subjected to the NA-collapse rule.
|
|
92
|
+
"""
|
|
93
|
+
if cell is None:
|
|
94
|
+
# NULL is *not* a vector in R (is.vector(NULL) == FALSE), but it is length-0 and gets
|
|
95
|
+
# dropped downstream regardless, so treat it as non-atomic here.
|
|
96
|
+
return False
|
|
97
|
+
if isinstance(cell, (str, bytes)):
|
|
98
|
+
return True
|
|
99
|
+
if isinstance(cell, (int, float, bool, complex)):
|
|
100
|
+
return True
|
|
101
|
+
if isinstance(cell, (np.generic,)):
|
|
102
|
+
return True
|
|
103
|
+
if isinstance(cell, (list, tuple, np.ndarray, pd.Series, range)):
|
|
104
|
+
return True
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _contains_na(cell: Any) -> bool:
|
|
109
|
+
"""Return ``True`` when an atomic cell contains any NA (R: ``any(is.na(x))``).
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
cell : Any
|
|
114
|
+
An atomic cell (scalar or sequence of scalars).
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
bool
|
|
119
|
+
"""
|
|
120
|
+
if isinstance(cell, (str, bytes)):
|
|
121
|
+
return False
|
|
122
|
+
if isinstance(cell, (list, tuple, np.ndarray, pd.Series, range)):
|
|
123
|
+
return any(_is_na_scalar(v) for v in cell)
|
|
124
|
+
return _is_na_scalar(cell)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _as_cells(value: Any) -> List[Any]:
|
|
128
|
+
"""Split an argument into its per-column cells, mirroring R ``as.list(arg)``.
|
|
129
|
+
|
|
130
|
+
A list/tuple/Series is treated as the explicit sequence of cells (each element becomes
|
|
131
|
+
one cell; an element that is itself a sequence becomes a *vector* cell). Any other value
|
|
132
|
+
is a length-1 argument occupying a single cell.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
value : Any
|
|
137
|
+
The raw argument value.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
list
|
|
142
|
+
The cells for this argument, in column order.
|
|
143
|
+
"""
|
|
144
|
+
if isinstance(value, (list, tuple)):
|
|
145
|
+
return list(value)
|
|
146
|
+
if isinstance(value, pd.Series):
|
|
147
|
+
return list(value)
|
|
148
|
+
if isinstance(value, np.ndarray) and value.ndim == 1:
|
|
149
|
+
return list(value)
|
|
150
|
+
return [value]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _signature_param_names(fun: Callable[..., Any]) -> List[str]:
|
|
154
|
+
"""Return the keyword-argument names of *fun*, mirroring R ``names(formals(.fun))``.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
fun : callable
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
list of str
|
|
163
|
+
Names of parameters that can be passed by keyword (``**kwargs`` is excluded, like R
|
|
164
|
+
cannot name a ``...`` formal).
|
|
165
|
+
"""
|
|
166
|
+
names: List[str] = []
|
|
167
|
+
for name, param in inspect.signature(fun).parameters.items():
|
|
168
|
+
if param.kind in (
|
|
169
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
170
|
+
inspect.Parameter.KEYWORD_ONLY,
|
|
171
|
+
):
|
|
172
|
+
names.append(name)
|
|
173
|
+
return names
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def distribute_args(
|
|
177
|
+
*,
|
|
178
|
+
fun: Callable[..., Any] = None,
|
|
179
|
+
cull: bool = True,
|
|
180
|
+
**kwargs: Any,
|
|
181
|
+
) -> Any:
|
|
182
|
+
"""Distribute vectorised arguments across repeated calls of *fun*.
|
|
183
|
+
|
|
184
|
+
Mirrors ggh4x's ``distribute_args`` (conveniences.R). The ``i``-th element of each named
|
|
185
|
+
argument is passed to the ``i``-th call of *fun*. Length-1 arguments occupy **only the
|
|
186
|
+
first** call (they are *not* recycled across calls), matching R's matrix construction.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
fun : callable, optional
|
|
191
|
+
Function to receive the distributed arguments. Defaults to
|
|
192
|
+
``ggplot2_py.theme_elements.element_text``.
|
|
193
|
+
cull : bool, default True
|
|
194
|
+
When ``True``, arguments whose names are not formals of *fun* are silently dropped.
|
|
195
|
+
**kwargs : Any
|
|
196
|
+
Vectorised arguments. A list/tuple/1-D array/Series provides one value per call (a
|
|
197
|
+
nested sequence is passed through as a vector); any scalar is a single value used for
|
|
198
|
+
the first call only. ``None`` and ``NaN`` mark positions that should be skipped, and
|
|
199
|
+
an atomic vector containing any ``NaN``/``None`` collapses to "skip" at that position
|
|
200
|
+
(mirroring R's "NA vectors become NULL").
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
list or object
|
|
205
|
+
A list of *fun* outputs (one per column), **or** -- when no usable arguments remain
|
|
206
|
+
after culling -- a single ``fun()`` result (matching R's ``return(.fun())``).
|
|
207
|
+
|
|
208
|
+
Notes
|
|
209
|
+
-----
|
|
210
|
+
Because valid argument names are deduced from *fun*'s signature, functions whose public
|
|
211
|
+
surface is a bare ``**kwargs`` are mishandled (extra arguments are dropped), exactly as
|
|
212
|
+
the R documentation warns for ``...``.
|
|
213
|
+
"""
|
|
214
|
+
if fun is None:
|
|
215
|
+
fun = element_text
|
|
216
|
+
|
|
217
|
+
args = dict(kwargs)
|
|
218
|
+
|
|
219
|
+
# Cull unknown arguments by signature, mirroring names(formals(.fun)).
|
|
220
|
+
if cull:
|
|
221
|
+
allowed = set(_signature_param_names(fun))
|
|
222
|
+
args = {k: v for k, v in args.items() if k in allowed}
|
|
223
|
+
|
|
224
|
+
# Drop zero-length arguments (R: args[lengths(args) > 0]).
|
|
225
|
+
args = {k: v for k, v in args.items() if not _is_zero_length(v)}
|
|
226
|
+
|
|
227
|
+
if len(args) == 0:
|
|
228
|
+
return fun()
|
|
229
|
+
|
|
230
|
+
names = list(args.keys())
|
|
231
|
+
cells_per_arg = {k: _as_cells(v) for k, v in args.items()}
|
|
232
|
+
lens = {k: len(cells_per_arg[k]) for k in names}
|
|
233
|
+
ncol = max(lens.values())
|
|
234
|
+
|
|
235
|
+
# Build the per-column kwargs, applying the NA-collapse rule per cell.
|
|
236
|
+
results: List[Any] = []
|
|
237
|
+
for j in range(ncol):
|
|
238
|
+
col_kwargs: dict = {}
|
|
239
|
+
for name in names:
|
|
240
|
+
cells = cells_per_arg[name]
|
|
241
|
+
if j >= len(cells):
|
|
242
|
+
continue # length-1 (or short) args only fill leading columns
|
|
243
|
+
cell = cells[j]
|
|
244
|
+
if _cell_is_atomic(cell) and _contains_na(cell):
|
|
245
|
+
continue # NA-containing vector becomes NULL -> dropped
|
|
246
|
+
if cell is None:
|
|
247
|
+
continue # explicit NULL -> dropped (length-0)
|
|
248
|
+
col_kwargs[name] = _unwrap_singleton(cell)
|
|
249
|
+
results.append(fun(**col_kwargs))
|
|
250
|
+
return results
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _is_zero_length(value: Any) -> bool:
|
|
254
|
+
"""Return ``True`` for an argument R would treat as length-0 (``lengths(x) == 0``).
|
|
255
|
+
|
|
256
|
+
Parameters
|
|
257
|
+
----------
|
|
258
|
+
value : Any
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
bool
|
|
263
|
+
"""
|
|
264
|
+
if value is None:
|
|
265
|
+
return True
|
|
266
|
+
if isinstance(value, (list, tuple)):
|
|
267
|
+
return len(value) == 0
|
|
268
|
+
if isinstance(value, (np.ndarray, pd.Series, pd.Index)):
|
|
269
|
+
return len(value) == 0
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _unwrap_singleton(cell: Any) -> Any:
|
|
274
|
+
"""Unwrap a one-element sequence cell to its scalar, leaving longer vectors intact.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
cell : Any
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
Any
|
|
283
|
+
"""
|
|
284
|
+
if isinstance(cell, (list, tuple)) and len(cell) == 1:
|
|
285
|
+
return cell[0]
|
|
286
|
+
if isinstance(cell, np.ndarray) and cell.ndim == 1 and cell.size == 1:
|
|
287
|
+
return cell.item()
|
|
288
|
+
return cell
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def elem_list_text(**kwargs: Any) -> Any:
|
|
292
|
+
"""Build a list of ``element_text`` theme elements from vectorised arguments.
|
|
293
|
+
|
|
294
|
+
Convenience wrapper around :func:`distribute_args` with ``fun=element_text``.
|
|
295
|
+
|
|
296
|
+
Parameters
|
|
297
|
+
----------
|
|
298
|
+
**kwargs : Any
|
|
299
|
+
Vectorised ``element_text`` arguments (see :func:`distribute_args`).
|
|
300
|
+
|
|
301
|
+
Returns
|
|
302
|
+
-------
|
|
303
|
+
list or ElementText
|
|
304
|
+
A list of ``ElementText`` objects (or a single one when no arguments remain).
|
|
305
|
+
"""
|
|
306
|
+
return distribute_args(fun=element_text, **kwargs)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def elem_list_rect(**kwargs: Any) -> Any:
|
|
310
|
+
"""Build a list of ``element_rect`` theme elements from vectorised arguments.
|
|
311
|
+
|
|
312
|
+
Convenience wrapper around :func:`distribute_args` with ``fun=element_rect``.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
**kwargs : Any
|
|
317
|
+
Vectorised ``element_rect`` arguments (see :func:`distribute_args`).
|
|
318
|
+
|
|
319
|
+
Returns
|
|
320
|
+
-------
|
|
321
|
+
list or ElementRect
|
|
322
|
+
A list of ``ElementRect`` objects (or a single one when no arguments remain).
|
|
323
|
+
"""
|
|
324
|
+
return distribute_args(fun=element_rect, **kwargs)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# -- weave_factors ---------------------------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _is_factor(x: Any) -> bool:
|
|
331
|
+
"""Return ``True`` if *x* is a pandas Categorical / factor-like input."""
|
|
332
|
+
if isinstance(x, pd.Categorical):
|
|
333
|
+
return True
|
|
334
|
+
if isinstance(x, pd.Series) and isinstance(x.dtype, pd.CategoricalDtype):
|
|
335
|
+
return True
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _as_categorical(x: Any) -> pd.Categorical:
|
|
340
|
+
"""Coerce *x* to a :class:`pandas.Categorical`."""
|
|
341
|
+
if isinstance(x, pd.Categorical):
|
|
342
|
+
return x
|
|
343
|
+
if isinstance(x, pd.Series) and isinstance(x.dtype, pd.CategoricalDtype):
|
|
344
|
+
return x.array # type: ignore[return-value]
|
|
345
|
+
return pd.Categorical(x)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _r_as_character(x: Any) -> str:
|
|
349
|
+
"""Format a scalar the way R's ``as.character`` / ``paste`` would.
|
|
350
|
+
|
|
351
|
+
Notably, integral floats are rendered without a trailing ``.0`` (R prints ``10``, not
|
|
352
|
+
``10.0``), and ``NA`` becomes the literal string ``"NA"``.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
x : Any
|
|
357
|
+
|
|
358
|
+
Returns
|
|
359
|
+
-------
|
|
360
|
+
str
|
|
361
|
+
"""
|
|
362
|
+
if _is_na_scalar(x):
|
|
363
|
+
return "NA"
|
|
364
|
+
if isinstance(x, (bool, np.bool_)):
|
|
365
|
+
return "TRUE" if x else "FALSE"
|
|
366
|
+
if isinstance(x, (float, np.floating)):
|
|
367
|
+
if float(x).is_integer():
|
|
368
|
+
return str(int(x))
|
|
369
|
+
return repr(float(x))
|
|
370
|
+
if isinstance(x, (int, np.integer)):
|
|
371
|
+
return str(int(x))
|
|
372
|
+
return str(x)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def weave_factors(
|
|
376
|
+
*args: Any,
|
|
377
|
+
drop: bool = True,
|
|
378
|
+
sep: str = ".",
|
|
379
|
+
replace_na: bool = True,
|
|
380
|
+
) -> pd.Categorical:
|
|
381
|
+
"""Combine factors into a single factor with first-input-priority level ordering.
|
|
382
|
+
|
|
383
|
+
Mirrors ggh4x's ``weave_factors`` (conveniences.R). Resembles
|
|
384
|
+
``interaction(..., lex.order = TRUE)`` but treats non-factor inputs as if their levels
|
|
385
|
+
were ``unique(as.character(x))`` (first-appearance order, not sorted).
|
|
386
|
+
|
|
387
|
+
Parameters
|
|
388
|
+
----------
|
|
389
|
+
*args : array-like
|
|
390
|
+
Input vectors (factors / Categoricals or plain sequences). All must have the same
|
|
391
|
+
length, or length 1 (length-1 inputs are recycled).
|
|
392
|
+
drop : bool, default True
|
|
393
|
+
Drop level combinations that do not occur in the data.
|
|
394
|
+
sep : str, default "."
|
|
395
|
+
Delimiter joining the per-input labels into the new level labels.
|
|
396
|
+
replace_na : bool, default True
|
|
397
|
+
Replace ``NA`` values with empty strings. For factor inputs this appends an empty
|
|
398
|
+
``""`` level and routes any ``NA`` to it; for non-factors, ``NA`` becomes ``""``.
|
|
399
|
+
|
|
400
|
+
Returns
|
|
401
|
+
-------
|
|
402
|
+
pandas.Categorical
|
|
403
|
+
The woven factor (``ordered=False``).
|
|
404
|
+
|
|
405
|
+
Raises
|
|
406
|
+
------
|
|
407
|
+
ValueError
|
|
408
|
+
If the inputs do not all share a common length (or length 1).
|
|
409
|
+
"""
|
|
410
|
+
inputs = list(args)
|
|
411
|
+
nargs = len(inputs)
|
|
412
|
+
if nargs < 1:
|
|
413
|
+
return None # type: ignore[return-value]
|
|
414
|
+
|
|
415
|
+
lengths = [len(a) for a in inputs]
|
|
416
|
+
max_len = max(lengths)
|
|
417
|
+
if not all(length in (1, max_len) for length in lengths):
|
|
418
|
+
cli_abort(
|
|
419
|
+
"All inputs to `weave_factors` should have the same length, "
|
|
420
|
+
"or length 1."
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Recycle length-1 inputs to the common length (R's paste() recycles).
|
|
424
|
+
def _recycle(a: Any) -> Any:
|
|
425
|
+
if len(a) == max_len:
|
|
426
|
+
return a
|
|
427
|
+
# length 1 -> repeat
|
|
428
|
+
if _is_factor(a):
|
|
429
|
+
cat = _as_categorical(a)
|
|
430
|
+
return cat.repeat(max_len)
|
|
431
|
+
return list(a) * max_len
|
|
432
|
+
|
|
433
|
+
inputs = [_recycle(a) for a in inputs]
|
|
434
|
+
|
|
435
|
+
# Per-input: string values + level labels (post replace_na).
|
|
436
|
+
value_strings: List[List[str]] = []
|
|
437
|
+
level_lists: List[List[str]] = []
|
|
438
|
+
|
|
439
|
+
for a in inputs:
|
|
440
|
+
if _is_factor(a):
|
|
441
|
+
cat = _as_categorical(a)
|
|
442
|
+
lvls = [str(c) for c in cat.categories]
|
|
443
|
+
codes = np.asarray(cat.codes, dtype=int)
|
|
444
|
+
if replace_na:
|
|
445
|
+
# Append an empty "" level; route NA codes to it (R always extends levels).
|
|
446
|
+
if "" in lvls:
|
|
447
|
+
na_index = lvls.index("")
|
|
448
|
+
ext_levels = list(lvls)
|
|
449
|
+
else:
|
|
450
|
+
na_index = len(lvls)
|
|
451
|
+
ext_levels = list(lvls) + [""]
|
|
452
|
+
code_for = np.where(codes < 0, na_index, codes)
|
|
453
|
+
vals = [ext_levels[c] for c in code_for]
|
|
454
|
+
level_lists.append(ext_levels)
|
|
455
|
+
value_strings.append(vals)
|
|
456
|
+
else:
|
|
457
|
+
vals = ["NA" if c < 0 else lvls[c] for c in codes]
|
|
458
|
+
level_lists.append(lvls)
|
|
459
|
+
value_strings.append(vals)
|
|
460
|
+
else:
|
|
461
|
+
seq = list(a)
|
|
462
|
+
if replace_na:
|
|
463
|
+
vals = ["" if _is_na_scalar(v) else _r_as_character(v) for v in seq]
|
|
464
|
+
else:
|
|
465
|
+
vals = [_r_as_character(v) for v in seq]
|
|
466
|
+
value_strings.append(vals)
|
|
467
|
+
# levels() %||% as.character(unique(x)) -> unique first-appearance order
|
|
468
|
+
uniq: List[str] = []
|
|
469
|
+
seen = set()
|
|
470
|
+
for v in vals:
|
|
471
|
+
if v not in seen:
|
|
472
|
+
seen.add(v)
|
|
473
|
+
uniq.append(v)
|
|
474
|
+
level_lists.append(uniq)
|
|
475
|
+
|
|
476
|
+
# Row-wise joined values.
|
|
477
|
+
n = max_len
|
|
478
|
+
vals_joined = [sep.join(value_strings[k][i] for k in range(nargs)) for i in range(n)]
|
|
479
|
+
|
|
480
|
+
# Unique observed values (first-appearance order).
|
|
481
|
+
unique_vals: List[str] = []
|
|
482
|
+
seen_v = set()
|
|
483
|
+
for v in vals_joined:
|
|
484
|
+
if v not in seen_v:
|
|
485
|
+
seen_v.add(v)
|
|
486
|
+
unique_vals.append(v)
|
|
487
|
+
|
|
488
|
+
# Candidate levels: expand.grid over REVERSED level lists, then paste back reversed.
|
|
489
|
+
# itertools.product varies the LAST iterable fastest; expand.grid varies the FIRST
|
|
490
|
+
# column fastest. Reversing the list of (already reversed) lists reproduces R exactly:
|
|
491
|
+
# product over the original-order lists, with the LAST input varying fastest, which after
|
|
492
|
+
# the final reverse-join makes the FIRST input vary slowest (first-input priority).
|
|
493
|
+
candidate_levels: List[str] = []
|
|
494
|
+
for combo in product(*level_lists):
|
|
495
|
+
candidate_levels.append(sep.join(combo))
|
|
496
|
+
|
|
497
|
+
if drop:
|
|
498
|
+
observed = set(unique_vals)
|
|
499
|
+
candidate_levels = [lv for lv in candidate_levels if lv in observed]
|
|
500
|
+
|
|
501
|
+
# unique() (defensive; sep collisions could create duplicates).
|
|
502
|
+
final_levels: List[str] = []
|
|
503
|
+
seen_l = set()
|
|
504
|
+
for lv in candidate_levels:
|
|
505
|
+
if lv not in seen_l:
|
|
506
|
+
seen_l.add(lv)
|
|
507
|
+
final_levels.append(lv)
|
|
508
|
+
|
|
509
|
+
return pd.Categorical(vals_joined, categories=final_levels, ordered=False)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# -- center_limits ---------------------------------------------------------------------------
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def center_limits(around: float = 0) -> Callable[[Any], np.ndarray]:
|
|
516
|
+
"""Return a function that centres scale limits symmetrically around *around*.
|
|
517
|
+
|
|
518
|
+
Mirrors ggh4x's ``center_limits`` (conveniences.R). Useful for centring log2 fold-change
|
|
519
|
+
colour limits at zero.
|
|
520
|
+
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
523
|
+
around : float, default 0
|
|
524
|
+
The value about which to centre the returned limits.
|
|
525
|
+
|
|
526
|
+
Returns
|
|
527
|
+
-------
|
|
528
|
+
callable
|
|
529
|
+
A function mapping an input vector to
|
|
530
|
+
``[-1, 1] * max(abs(input - around)) + around``.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
def _limits(input: Any) -> np.ndarray:
|
|
534
|
+
arr = np.asarray(input, dtype=float)
|
|
535
|
+
span = np.nanmax(np.abs(arr - around))
|
|
536
|
+
return np.array([-1.0, 1.0]) * span + around
|
|
537
|
+
|
|
538
|
+
return _limits
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
# -- sep_discrete ----------------------------------------------------------------------------
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def sep_discrete(sep: str = ".", inv: bool = False) -> Callable[[Sequence[str]], np.ndarray]:
|
|
545
|
+
"""Return a function mapping separator-delimited discrete labels to numeric positions.
|
|
546
|
+
|
|
547
|
+
Mirrors ggh4x's ``sep_discrete`` (scale_manual.R). Labels are split on the (literal,
|
|
548
|
+
non-regex) separator; the first split component drives the base position
|
|
549
|
+
(``1..n``) and each deeper component contributes a run-length group offset, so grouped
|
|
550
|
+
labels are laid out hierarchically.
|
|
551
|
+
|
|
552
|
+
Parameters
|
|
553
|
+
----------
|
|
554
|
+
sep : str, default "."
|
|
555
|
+
Literal separator used to split labels (no regular expressions).
|
|
556
|
+
inv : bool, default False
|
|
557
|
+
When ``True``, invert the layering of groups (split columns are reversed before
|
|
558
|
+
scoring).
|
|
559
|
+
|
|
560
|
+
Returns
|
|
561
|
+
-------
|
|
562
|
+
callable
|
|
563
|
+
A function accepting a sequence of ``str`` labels and returning a numeric
|
|
564
|
+
``numpy.ndarray`` of positions.
|
|
565
|
+
"""
|
|
566
|
+
|
|
567
|
+
def _positions(limits: Sequence[str]) -> np.ndarray:
|
|
568
|
+
labels = list(limits)
|
|
569
|
+
split = [lab.split(sep) for lab in labels]
|
|
570
|
+
lengs = [len(s) for s in split]
|
|
571
|
+
depth = max(lengs) if lengs else 0
|
|
572
|
+
|
|
573
|
+
# Pad ragged splits with empty strings (R: c(lab, rep("", depth - length(lab)))).
|
|
574
|
+
if not all(length == depth for length in lengs):
|
|
575
|
+
split = [s + [""] * (depth - len(s)) for s in split]
|
|
576
|
+
|
|
577
|
+
nrow = len(split)
|
|
578
|
+
# Build the matrix of column vectors.
|
|
579
|
+
mat = np.empty((nrow, depth), dtype=object)
|
|
580
|
+
for i, row in enumerate(split):
|
|
581
|
+
for j in range(depth):
|
|
582
|
+
mat[i, j] = row[j]
|
|
583
|
+
|
|
584
|
+
if inv:
|
|
585
|
+
mat = mat[:, ::-1]
|
|
586
|
+
|
|
587
|
+
# Per-column run-length group index (consecutive runs -> 0,1,2,... repeated).
|
|
588
|
+
vals = np.zeros((nrow, depth), dtype=float)
|
|
589
|
+
for j in range(depth):
|
|
590
|
+
col = mat[:, j]
|
|
591
|
+
unrep = vec_unrep(col)
|
|
592
|
+
keys = list(unrep["key"])
|
|
593
|
+
times = list(unrep["times"])
|
|
594
|
+
group = np.repeat(np.arange(len(keys)), times)
|
|
595
|
+
vals[:, j] = group
|
|
596
|
+
|
|
597
|
+
# First column becomes the base position 1..nrow.
|
|
598
|
+
vals[:, 0] = np.arange(1, nrow + 1)
|
|
599
|
+
return vals.sum(axis=1)
|
|
600
|
+
|
|
601
|
+
return _positions
|