scales-python 1.4.0.9000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scales/bounds.py ADDED
@@ -0,0 +1,512 @@
1
+ """
2
+ Bounds, rescaling, and out-of-bounds handling utilities.
3
+
4
+ Python port of ``scales::bounds.R`` from the R *scales* package
5
+ (https://github.com/r-lib/scales).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Optional, Tuple, Union
11
+
12
+ import numpy as np
13
+
14
+ from ._utils import zero_range
15
+
16
+ __all__ = [
17
+ "rescale",
18
+ "rescale_mid",
19
+ "rescale_max",
20
+ "rescale_none",
21
+ "censor",
22
+ "squish",
23
+ "squish_infinite",
24
+ "discard",
25
+ "oob_censor",
26
+ "oob_censor_any",
27
+ "oob_squish",
28
+ "oob_squish_any",
29
+ "oob_squish_infinite",
30
+ "oob_keep",
31
+ "oob_discard",
32
+ "trim_to_domain",
33
+ "trans_range", # R alias: trans_range <- trim_to_domain
34
+ ]
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Type aliases
38
+ # ---------------------------------------------------------------------------
39
+ _RangeLike = Tuple[float, float]
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Helpers
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def _as_numeric(x: np.ndarray) -> np.ndarray:
47
+ """Convert datetime64 arrays to float64 (nanoseconds since epoch).
48
+
49
+ Non-datetime arrays are returned as float64 without modification.
50
+ """
51
+ if np.issubdtype(x.dtype, np.datetime64):
52
+ return x.astype("datetime64[ns]").astype(np.float64)
53
+ return np.asarray(x, dtype=np.float64)
54
+
55
+
56
+ def _ensure_array(x: Union[np.ndarray, list, float]) -> np.ndarray:
57
+ """Coerce *x* to a NumPy array."""
58
+ return np.asarray(x)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Rescaling
63
+ # ---------------------------------------------------------------------------
64
+
65
+ def rescale(
66
+ x: Union[np.ndarray, list, float],
67
+ to: _RangeLike = (0, 1),
68
+ from_range: Optional[_RangeLike] = None,
69
+ ) -> np.ndarray:
70
+ """Linearly rescale a numeric vector to a new range.
71
+
72
+ Parameters
73
+ ----------
74
+ x : array_like
75
+ Numeric values to rescale.
76
+ to : tuple of float, optional
77
+ Output range ``(min, max)``. Default ``(0, 1)``.
78
+ from_range : tuple of float or None, optional
79
+ Input range ``(min, max)``. When *None* (default) the range is
80
+ computed from ``x`` (ignoring NaN).
81
+
82
+ Returns
83
+ -------
84
+ np.ndarray
85
+ Rescaled values.
86
+ """
87
+ x = _ensure_array(x)
88
+ x_num = _as_numeric(x)
89
+
90
+ if from_range is None:
91
+ if x_num.size > 0 and np.all(np.isnan(x_num)):
92
+ return np.full_like(x_num, np.nan)
93
+ from_range = (np.nanmin(x_num), np.nanmax(x_num))
94
+
95
+ from_min, from_max = float(from_range[0]), float(from_range[1])
96
+ to_min, to_max = float(to[0]), float(to[1])
97
+
98
+ if from_min == from_max:
99
+ return np.full_like(x_num, (to_min + to_max) / 2.0)
100
+
101
+ return (x_num - from_min) / (from_max - from_min) * (to_max - to_min) + to_min
102
+
103
+
104
+ def rescale_mid(
105
+ x: Union[np.ndarray, list, float],
106
+ to: _RangeLike = (0, 1),
107
+ from_range: Optional[_RangeLike] = None,
108
+ mid: float = 0,
109
+ ) -> np.ndarray:
110
+ """Rescale numeric vector to new range with a specified midpoint.
111
+
112
+ The *mid* value is mapped to the mean of *to*.
113
+
114
+ Parameters
115
+ ----------
116
+ x : array_like
117
+ Numeric values to rescale.
118
+ to : tuple of float, optional
119
+ Output range ``(min, max)``. Default ``(0, 1)``.
120
+ from_range : tuple of float or None, optional
121
+ Input range ``(min, max)``. Defaults to ``(min(x), max(x))``.
122
+ mid : float, optional
123
+ Value in the input domain that should be mapped to the midpoint
124
+ of *to*. Default ``0``.
125
+
126
+ Returns
127
+ -------
128
+ np.ndarray
129
+ Rescaled values.
130
+ """
131
+ x = _ensure_array(x)
132
+ x_num = _as_numeric(x)
133
+
134
+ if from_range is None:
135
+ if x_num.size > 0 and np.all(np.isnan(x_num)):
136
+ return np.full_like(x_num, np.nan)
137
+ from_range = (np.nanmin(x_num), np.nanmax(x_num))
138
+
139
+ from_min, from_max = float(from_range[0]), float(from_range[1])
140
+ to_min, to_max = float(to[0]), float(to[1])
141
+
142
+ # Mirrors R's rescale_mid.numeric:
143
+ # if (zero_range(from) || zero_range(to)) return mean(to) for non-NA
144
+ # extent <- 2 * max(abs(from - mid))
145
+ # (x - mid) / extent * diff(to) + mean(to)
146
+ to_mean = (to_min + to_max) / 2.0
147
+ if zero_range((from_min, from_max)) or zero_range((to_min, to_max)):
148
+ result = np.where(np.isnan(x_num), np.nan, to_mean)
149
+ return result
150
+
151
+ extent = 2.0 * max(abs(from_min - mid), abs(from_max - mid))
152
+ return (x_num - mid) / extent * (to_max - to_min) + to_mean
153
+
154
+
155
+ def rescale_max(
156
+ x: Union[np.ndarray, list, float],
157
+ to: _RangeLike = (0, 1),
158
+ from_range: Optional[_RangeLike] = None,
159
+ ) -> np.ndarray:
160
+ """Rescale numeric vector relative to its maximum.
161
+
162
+ Parameters
163
+ ----------
164
+ x : array_like
165
+ Numeric values to rescale.
166
+ to : tuple of float, optional
167
+ Output range ``(min, max)``. Default ``(0, 1)``.
168
+ from_range : tuple of float or None, optional
169
+ Input range ``(min, max)``. Defaults to ``(0, max(x))``.
170
+
171
+ Returns
172
+ -------
173
+ np.ndarray
174
+ Rescaled values.
175
+ """
176
+ x = _ensure_array(x)
177
+ x_num = _as_numeric(x)
178
+
179
+ if from_range is None:
180
+ from_range = (0.0, np.nanmax(x_num))
181
+
182
+ return x_num / float(from_range[1]) * float(to[1])
183
+
184
+
185
+ def rescale_none(
186
+ x: Union[np.ndarray, list, float],
187
+ to: Optional[_RangeLike] = None,
188
+ from_range: Optional[_RangeLike] = None,
189
+ ) -> np.ndarray:
190
+ """Identity rescaler — returns *x* unchanged.
191
+
192
+ Parameters
193
+ ----------
194
+ x : array_like
195
+ Values.
196
+ to : ignored
197
+ from_range : ignored
198
+
199
+ Returns
200
+ -------
201
+ np.ndarray
202
+ """
203
+ return _ensure_array(x)
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Censoring / squishing / discarding
208
+ # ---------------------------------------------------------------------------
209
+
210
+ def censor(
211
+ x: Union[np.ndarray, list, float],
212
+ range: _RangeLike = (0, 1),
213
+ only_finite: bool = True,
214
+ ) -> np.ndarray:
215
+ """Replace values outside *range* with ``np.nan``.
216
+
217
+ Parameters
218
+ ----------
219
+ x : array_like
220
+ Numeric values.
221
+ range : tuple of float, optional
222
+ Allowed ``(min, max)`` range. Default ``(0, 1)``.
223
+ only_finite : bool, optional
224
+ If *True* (default), infinite values are **not** censored.
225
+
226
+ Returns
227
+ -------
228
+ np.ndarray
229
+ Array with out-of-range values replaced by ``np.nan``.
230
+ """
231
+ x = np.array(_ensure_array(x), dtype=np.float64)
232
+ lo, hi = float(range[0]), float(range[1])
233
+
234
+ if only_finite:
235
+ finite = np.isfinite(x)
236
+ oob = finite & ((x < lo) | (x > hi))
237
+ else:
238
+ oob = (x < lo) | (x > hi)
239
+
240
+ x[oob] = np.nan
241
+ return x
242
+
243
+
244
+ def squish(
245
+ x: Union[np.ndarray, list, float],
246
+ range: _RangeLike = (0, 1),
247
+ only_finite: bool = True,
248
+ ) -> np.ndarray:
249
+ """Clamp (squish) values outside *range* to the nearest boundary.
250
+
251
+ Parameters
252
+ ----------
253
+ x : array_like
254
+ Numeric values.
255
+ range : tuple of float, optional
256
+ Allowed ``(min, max)`` range. Default ``(0, 1)``.
257
+ only_finite : bool, optional
258
+ If *True* (default), infinite values are **not** squished.
259
+
260
+ Returns
261
+ -------
262
+ np.ndarray
263
+ Array with out-of-range values replaced by the closest boundary.
264
+ """
265
+ x = np.array(_ensure_array(x), dtype=np.float64)
266
+ lo, hi = float(range[0]), float(range[1])
267
+
268
+ if only_finite:
269
+ finite = np.isfinite(x)
270
+ x = np.where(finite & (x < lo), lo, x)
271
+ x = np.where(finite & (x > hi), hi, x)
272
+ else:
273
+ x = np.clip(x, lo, hi)
274
+ return x
275
+
276
+
277
+ def squish_infinite(
278
+ x: Union[np.ndarray, list, float],
279
+ range: _RangeLike = (0, 1),
280
+ ) -> np.ndarray:
281
+ """Replace infinite values with the corresponding boundary of *range*.
282
+
283
+ Finite values (including ``np.nan``) are left untouched.
284
+
285
+ Parameters
286
+ ----------
287
+ x : array_like
288
+ Numeric values.
289
+ range : tuple of float, optional
290
+ ``(min, max)`` range used as replacement values. Default ``(0, 1)``.
291
+
292
+ Returns
293
+ -------
294
+ np.ndarray
295
+ """
296
+ x = np.array(_ensure_array(x), dtype=np.float64)
297
+ lo, hi = float(range[0]), float(range[1])
298
+ x[x == -np.inf] = lo
299
+ x[x == np.inf] = hi
300
+ return x
301
+
302
+
303
+ def discard(
304
+ x: Union[np.ndarray, list, float],
305
+ range: _RangeLike = (0, 1),
306
+ ) -> np.ndarray:
307
+ """Remove values outside *range* (returns a shorter array).
308
+
309
+ Parameters
310
+ ----------
311
+ x : array_like
312
+ Numeric values.
313
+ range : tuple of float, optional
314
+ Allowed ``(min, max)`` range. Default ``(0, 1)``.
315
+
316
+ Returns
317
+ -------
318
+ np.ndarray
319
+ Array containing only in-range values.
320
+ """
321
+ x = np.array(_ensure_array(x), dtype=np.float64)
322
+ lo, hi = float(range[0]), float(range[1])
323
+ mask = (x >= lo) & (x <= hi)
324
+ return x[mask]
325
+
326
+
327
+ # ---------------------------------------------------------------------------
328
+ # OOB handler functions
329
+ # ---------------------------------------------------------------------------
330
+ # In the R package, oob_* functions are direct functions with signature
331
+ # ``(x, range)``. We mirror that here.
332
+
333
+ def oob_censor(
334
+ x: Union[np.ndarray, list, float],
335
+ range: _RangeLike = (0, 1),
336
+ ) -> np.ndarray:
337
+ """Censor out-of-bounds values (replace with ``np.nan``).
338
+
339
+ Infinite values are **not** censored (``only_finite=True``).
340
+
341
+ Parameters
342
+ ----------
343
+ x : array_like
344
+ range : tuple of float
345
+
346
+ Returns
347
+ -------
348
+ np.ndarray
349
+ """
350
+ return censor(x, range=range, only_finite=True)
351
+
352
+
353
+ def oob_censor_any(
354
+ x: Union[np.ndarray, list, float],
355
+ range: _RangeLike = (0, 1),
356
+ ) -> np.ndarray:
357
+ """Censor **all** out-of-bounds values, including infinite.
358
+
359
+ Parameters
360
+ ----------
361
+ x : array_like
362
+ range : tuple of float
363
+
364
+ Returns
365
+ -------
366
+ np.ndarray
367
+ """
368
+ return censor(x, range=range, only_finite=False)
369
+
370
+
371
+ def oob_squish(
372
+ x: Union[np.ndarray, list, float],
373
+ range: _RangeLike = (0, 1),
374
+ ) -> np.ndarray:
375
+ """Squish (clamp) out-of-bounds values to range limits.
376
+
377
+ Infinite values are **not** squished (``only_finite=True``).
378
+
379
+ Parameters
380
+ ----------
381
+ x : array_like
382
+ range : tuple of float
383
+
384
+ Returns
385
+ -------
386
+ np.ndarray
387
+ """
388
+ return squish(x, range=range, only_finite=True)
389
+
390
+
391
+ def oob_squish_any(
392
+ x: Union[np.ndarray, list, float],
393
+ range: _RangeLike = (0, 1),
394
+ ) -> np.ndarray:
395
+ """Squish **all** out-of-bounds values, including infinite.
396
+
397
+ Parameters
398
+ ----------
399
+ x : array_like
400
+ range : tuple of float
401
+
402
+ Returns
403
+ -------
404
+ np.ndarray
405
+ """
406
+ return squish(x, range=range, only_finite=False)
407
+
408
+
409
+ def oob_squish_infinite(
410
+ x: Union[np.ndarray, list, float],
411
+ range: _RangeLike = (0, 1),
412
+ ) -> np.ndarray:
413
+ """Only squish infinite values to range limits.
414
+
415
+ Finite out-of-bounds values are left untouched.
416
+
417
+ Parameters
418
+ ----------
419
+ x : array_like
420
+ range : tuple of float
421
+
422
+ Returns
423
+ -------
424
+ np.ndarray
425
+ """
426
+ return squish_infinite(x, range=range)
427
+
428
+
429
+ def oob_keep(
430
+ x: Union[np.ndarray, list, float],
431
+ range: _RangeLike = (0, 1),
432
+ ) -> np.ndarray:
433
+ """Keep all values unchanged (no out-of-bounds modification).
434
+
435
+ Parameters
436
+ ----------
437
+ x : array_like
438
+ range : ignored
439
+
440
+ Returns
441
+ -------
442
+ np.ndarray
443
+ """
444
+ return np.array(_ensure_array(x), dtype=np.float64)
445
+
446
+
447
+ def oob_discard(
448
+ x: Union[np.ndarray, list, float],
449
+ range: _RangeLike = (0, 1),
450
+ ) -> np.ndarray:
451
+ """Discard (remove) out-of-bounds values.
452
+
453
+ Parameters
454
+ ----------
455
+ x : array_like
456
+ range : tuple of float
457
+
458
+ Returns
459
+ -------
460
+ np.ndarray
461
+ Shorter array with out-of-bounds values removed.
462
+ """
463
+ return discard(x, range=range)
464
+
465
+
466
+ # ---------------------------------------------------------------------------
467
+ # Transform domain utilities
468
+ # ---------------------------------------------------------------------------
469
+
470
+ def trim_to_domain(
471
+ transform: object,
472
+ x: Union[np.ndarray, list, float],
473
+ ) -> np.ndarray:
474
+ """Compute the **transformed range** of *x*, clipped to the transform's domain.
475
+
476
+ Mirrors R's ``trim_to_domain``:
477
+ ``range(transform$transform(range(squish(x, transform$domain), na.rm = TRUE)))``.
478
+
479
+ The return shape is **always a length-2 array**: the (min, max) of the
480
+ transformed, domain-squished data.
481
+
482
+ Parameters
483
+ ----------
484
+ transform : object
485
+ A transform object exposing ``transform(x)`` and a ``domain``
486
+ ``(min, max)`` tuple.
487
+ x : array_like
488
+ Values to summarise.
489
+
490
+ Returns
491
+ -------
492
+ np.ndarray
493
+ Length-2 array ``[trans_min, trans_max]``.
494
+ """
495
+ x_arr = np.array(_ensure_array(x), dtype=np.float64)
496
+ # Accept both string-named transforms and transform objects. If the
497
+ # object exposes a `domain` attribute, use it; otherwise assume the
498
+ # whole real line.
499
+ domain = getattr(transform, "domain", (-np.inf, np.inf))
500
+ squished = squish(x_arr, range=domain, only_finite=True)
501
+ finite = squished[np.isfinite(squished)]
502
+ if finite.size == 0:
503
+ return np.array([np.nan, np.nan], dtype=float)
504
+ raw_range = np.array([finite.min(), finite.max()], dtype=float)
505
+ transformed = np.asarray(transform.transform(raw_range), dtype=float)
506
+ if transformed.size == 0 or np.all(~np.isfinite(transformed)):
507
+ return np.array([np.nan, np.nan], dtype=float)
508
+ return np.array([float(np.nanmin(transformed)), float(np.nanmax(transformed))])
509
+
510
+
511
+ # R alias: trans_range <- trim_to_domain
512
+ trans_range = trim_to_domain