scales-python 1.4.0.9000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scales/breaks.py ADDED
@@ -0,0 +1,627 @@
1
+ """
2
+ Break generators for continuous scales.
3
+
4
+ Python port of the R scales package break generators
5
+ (https://github.com/r-lib/scales). Corresponds to:
6
+ - R/breaks.R
7
+ - R/breaks-retired.R
8
+
9
+ All public break generators are *closure factories*: they return a callable
10
+ that accepts ``(x, n=None)`` and returns a :class:`numpy.ndarray` of break
11
+ positions.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ import warnings
18
+ from typing import Any, Callable, Optional, Sequence, Tuple, Union
19
+
20
+ import numpy as np
21
+ from numpy.typing import ArrayLike
22
+
23
+ __all__ = [
24
+ "breaks_extended",
25
+ "breaks_pretty",
26
+ "breaks_width",
27
+ "breaks_timespan",
28
+ "breaks_exp",
29
+ "cbreaks",
30
+ # Legacy aliases
31
+ "extended_breaks",
32
+ "pretty_breaks",
33
+ ]
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Extended Wilkinson algorithm helpers
38
+ # ---------------------------------------------------------------------------
39
+
40
+ def _simplicity(q_idx: int, n_Q: int, j: int, lmin: float, lmax: float,
41
+ lstep: float) -> float:
42
+ """Simplicity score for a candidate labelling."""
43
+ # Whether the label sequence includes zero
44
+ v = 1.0 if ((lmin <= 0 <= lmax) or
45
+ (lmin >= 0 >= lmax)) else 0.0
46
+ return 1.0 - (q_idx / (n_Q - 1.0)) - j + v if n_Q > 1 else 1.0 - j + v
47
+
48
+
49
+ def _simplicity_max(q_idx: int, n_Q: int, j: int) -> float:
50
+ """Upper bound on simplicity (best-case v=1)."""
51
+ return 1.0 - (q_idx / (n_Q - 1.0)) - j + 1.0 if n_Q > 1 else 2.0 - j
52
+
53
+
54
+ def _coverage(dmin: float, dmax: float, lmin: float, lmax: float) -> float:
55
+ """Coverage score – how well the labels cover the data range.
56
+
57
+ R (labeling::.coverage):
58
+ 1 - 0.5 * ((dmax - lmax)^2 + (dmin - lmin)^2) / (0.1*(dmax-dmin))^2
59
+
60
+ Note the denominator factor is ``0.1`` — **not** ``0.5`` — which
61
+ makes the penalty for label overshoot far more severe. With 0.5,
62
+ candidates that extend well beyond the data range appear almost
63
+ as good as tight ones, causing the algorithm to prefer
64
+ ``[0,10,20,30,40]`` over ``[10,15,...,35]`` for data in ``[9,35]``.
65
+ """
66
+ data_range = dmax - dmin
67
+ if data_range < 1e-100:
68
+ return 1.0
69
+ tenth = 0.1 * data_range
70
+ return (1.0
71
+ - 0.5 * ((dmax - lmax) ** 2 + (dmin - lmin) ** 2)
72
+ / (tenth ** 2))
73
+
74
+
75
+ def _coverage_max(dmin: float, dmax: float, span: float) -> float:
76
+ """Upper bound on coverage for a given label span.
77
+
78
+ R (labeling::.coverage.max) uses the same ``0.1 * range``
79
+ denominator as :func:`_coverage`.
80
+ """
81
+ data_range = dmax - dmin
82
+ if data_range < 1e-100:
83
+ return 1.0
84
+ if span >= data_range:
85
+ tenth = 0.1 * data_range
86
+ return 1.0 - 0.5 * ((span - data_range) ** 2) / (tenth ** 2)
87
+ return 1.0
88
+
89
+
90
+ def _density(k: int, m: int, dmin: float, dmax: float,
91
+ lmin: float, lmax: float) -> float:
92
+ """Density score – penalty for too many or too few ticks."""
93
+ r = (k - 1.0) / (lmax - lmin) if lmax != lmin else 1.0
94
+ rt = (m - 1.0) / (max(lmax, dmax) - min(lmin, dmin))
95
+ if rt == 0:
96
+ return 1.0
97
+ ratio = r / rt
98
+ return 2.0 - max(ratio, 1.0 / ratio)
99
+
100
+
101
+ def _density_max(k: int, m: int) -> float:
102
+ """Upper bound on density."""
103
+ if k >= m:
104
+ return 2.0 - (k - 1.0) / (m - 1.0) if m > 1 else 1.0
105
+ return 1.0
106
+
107
+
108
+ def _legibility() -> float:
109
+ """Legibility score (constant; formatting quality is not assessed here)."""
110
+ return 1.0
111
+
112
+
113
+ def _extended(
114
+ dmin: float,
115
+ dmax: float,
116
+ n: int = 5,
117
+ Q: Sequence[float] = (1, 5, 2, 2.5, 4, 3),
118
+ only_loose: bool = False,
119
+ w: Tuple[float, float, float, float] = (0.25, 0.2, 0.5, 0.05),
120
+ ) -> np.ndarray:
121
+ """
122
+ Wilkinson's extended algorithm for nice axis breaks.
123
+
124
+ Parameters
125
+ ----------
126
+ dmin : float
127
+ Data minimum.
128
+ dmax : float
129
+ Data maximum.
130
+ n : int
131
+ Desired number of breaks.
132
+ Q : sequence of float
133
+ Preference-ordered list of nice step multiples.
134
+ only_loose : bool
135
+ If ``True``, the returned breaks are guaranteed to enclose
136
+ ``[dmin, dmax]``.
137
+ w : tuple of float
138
+ Weights for (simplicity, coverage, density, legibility).
139
+
140
+ Returns
141
+ -------
142
+ numpy.ndarray
143
+ Optimal break positions.
144
+ """
145
+ if dmax - dmin < 1e-10:
146
+ return np.array([dmin])
147
+
148
+ n_Q = len(Q)
149
+ best_score = -2.0
150
+ best: Optional[np.ndarray] = None
151
+
152
+ # R (labeling::extended) iterates ``z`` over integers with
153
+ # ``step = j * q * 10^z`` and increments z until the coverage bound
154
+ # falls below the current best. An earlier Python version iterated
155
+ # ``r_mul in (1, 2, 5, 10, 20, 50, 100)`` with ``base = 10^floor(log10(delta))``
156
+ # — that made the *same* numeric step reachable under multiple q
157
+ # values (e.g. step=2 under both q=1 and q=2), letting the algorithm
158
+ # borrow a better simplicity score from the wrong q. Result: for
159
+ # ``(0, 6)`` Python picked ``[0,2,4,6]`` instead of R's ``[0,1,...,6]``.
160
+ j = 1
161
+ while j < 50:
162
+ broken_j = False
163
+ for q_idx, q in enumerate(Q):
164
+ sm = _simplicity_max(q_idx, n_Q, j)
165
+ if (w[0] * sm + w[1] + w[2] + w[3]) < best_score:
166
+ # Outer loop can't beat best; done with j
167
+ broken_j = True
168
+ break
169
+
170
+ k = 2
171
+ while k < 50:
172
+ dm = _density_max(k, n)
173
+ if (w[0] * sm + w[1] + w[2] * dm + w[3]) < best_score:
174
+ break
175
+
176
+ delta = (dmax - dmin) / (k + 1) / j / q
177
+ if delta <= 0:
178
+ k += 1
179
+ continue
180
+ z = int(math.ceil(math.log10(delta)))
181
+
182
+ while z < 50:
183
+ step = j * q * (10.0 ** z)
184
+ if step < 1e-100:
185
+ z += 1
186
+ continue
187
+
188
+ cm = _coverage_max(dmin, dmax, step * (k - 1))
189
+ if (w[0] * sm + w[1] * cm + w[2] * dm + w[3]) < best_score:
190
+ break
191
+
192
+ min_start = math.floor(dmax / step) * j - (k - 1) * j
193
+ max_start = math.ceil(dmin / step) * j
194
+ if min_start > max_start:
195
+ z += 1
196
+ continue
197
+
198
+ for start in range(int(min_start), int(max_start) + 1):
199
+ lmin = start * (step / j)
200
+ lmax = lmin + step * (k - 1)
201
+
202
+ if only_loose:
203
+ if lmin > dmin or lmax < dmax:
204
+ continue
205
+
206
+ s = _simplicity(q_idx, n_Q, j, lmin, lmax, step)
207
+ c = _coverage(dmin, dmax, lmin, lmax)
208
+ d = _density(k, n, dmin, dmax, lmin, lmax)
209
+ leg = _legibility()
210
+
211
+ score = (w[0] * s + w[1] * c
212
+ + w[2] * d + w[3] * leg)
213
+
214
+ if score > best_score:
215
+ best_score = score
216
+ best = np.arange(lmin, lmax + step * 0.5, step)
217
+ best = best[:k]
218
+ z += 1
219
+ k += 1
220
+ if broken_j:
221
+ break
222
+ j += 1
223
+
224
+ if best is None:
225
+ # Fallback: linspace
226
+ return np.linspace(dmin, dmax, n)
227
+
228
+ # Clean up floating-point dust
229
+ best = np.round(best, decimals=10)
230
+ # Remove trailing zeros artifact
231
+ mask = np.abs(best) < 1e-14
232
+ best[mask] = 0.0
233
+ return best
234
+
235
+
236
+ # ---------------------------------------------------------------------------
237
+ # Pretty breaks (R's pretty() algorithm)
238
+ # ---------------------------------------------------------------------------
239
+
240
+ def _pretty(dmin: float, dmax: float, n: int = 5) -> np.ndarray:
241
+ """
242
+ R-style ``pretty()`` for axis breaks.
243
+
244
+ Attempt to find a "nice" step size covering ``[dmin, dmax]`` with
245
+ approximately *n* intervals.
246
+
247
+ Parameters
248
+ ----------
249
+ dmin : float
250
+ Data minimum.
251
+ dmax : float
252
+ Data maximum.
253
+ n : int
254
+ Desired number of intervals (not ticks).
255
+
256
+ Returns
257
+ -------
258
+ numpy.ndarray
259
+ Break positions.
260
+ """
261
+ if not np.isfinite(dmin) or not np.isfinite(dmax):
262
+ return np.array([dmin, dmax])
263
+ if dmax - dmin < 1e-10:
264
+ return np.array([dmin])
265
+
266
+ # R's pretty algorithm
267
+ h = 1.5 # high
268
+ h5 = 0.5 + 1.5 * h # =2.75
269
+
270
+ dx = dmax - dmin
271
+ cell = max(abs(dmin), abs(dmax))
272
+ # Rough cell size
273
+ if h5 >= 1.5 * h + 0.5:
274
+ U = 1 + (1.0 / (1 + h))
275
+ else:
276
+ U = 1 + (1.5 / (1 + h5))
277
+
278
+ # Initial cell size estimate
279
+ cell = dx / n
280
+ if cell < 20 * 1e-07 * max(abs(dmin), abs(dmax)):
281
+ cell = 20 * 1e-07 * max(abs(dmin), abs(dmax))
282
+
283
+ base = 10 ** math.floor(math.log10(cell))
284
+ unit = cell / base
285
+
286
+ if unit < 1.5:
287
+ step = 1.0
288
+ elif unit < 2.5:
289
+ step = 2.0
290
+ elif unit < 4.0:
291
+ step = 2.5
292
+ elif unit < 7.5:
293
+ step = 5.0
294
+ else:
295
+ step = 10.0
296
+
297
+ step *= base
298
+ lo = step * math.floor(dmin / step)
299
+ hi = step * math.ceil(dmax / step)
300
+
301
+ # Nudge to include boundaries
302
+ if lo > dmin:
303
+ lo -= step
304
+ if hi < dmax:
305
+ hi += step
306
+
307
+ result = np.arange(lo, hi + step * 0.5, step)
308
+ # Clean up floating-point dust
309
+ result = np.round(result, decimals=10)
310
+ mask = np.abs(result) < 1e-14
311
+ result[mask] = 0.0
312
+ return result
313
+
314
+
315
+ # ---------------------------------------------------------------------------
316
+ # Public break generators
317
+ # ---------------------------------------------------------------------------
318
+
319
+ def breaks_extended(
320
+ n: int = 5,
321
+ *,
322
+ Q: Sequence[float] = (1, 5, 2, 2.5, 4, 3),
323
+ only_loose: bool = False,
324
+ ) -> Callable[[ArrayLike, Optional[int]], np.ndarray]:
325
+ """
326
+ Create a break function using Wilkinson's extended algorithm.
327
+
328
+ Parameters
329
+ ----------
330
+ n : int, optional
331
+ Desired number of breaks (default 5).
332
+ Q : sequence of float, optional
333
+ Preference-ordered list of "nice" step multiples
334
+ (default ``(1, 5, 2, 2.5, 4, 3)``).
335
+ only_loose : bool, optional
336
+ If ``True``, the returned breaks are guaranteed to enclose the
337
+ data range (default ``False``).
338
+
339
+ Returns
340
+ -------
341
+ callable
342
+ A function ``(x, n=None) -> numpy.ndarray`` that computes break
343
+ positions for data *x*.
344
+
345
+ Examples
346
+ --------
347
+ >>> brk = breaks_extended(n=5)
348
+ >>> brk([1.3, 9.8])
349
+ array([ 0., 2., 4., 6., 8., 10.])
350
+ """
351
+
352
+ def breaks_fn(x: ArrayLike, n_: Optional[int] = None) -> np.ndarray:
353
+ x = np.asarray(x, dtype=float)
354
+ x = x[np.isfinite(x)]
355
+ if len(x) == 0:
356
+ return np.array([])
357
+ dmin, dmax = float(x.min()), float(x.max())
358
+ k = n_ if n_ is not None else n
359
+ return _extended(dmin, dmax, n=k, Q=Q, only_loose=only_loose)
360
+
361
+ return breaks_fn
362
+
363
+
364
+ def breaks_pretty(n: int = 5) -> Callable[[ArrayLike, Optional[int]], np.ndarray]:
365
+ """
366
+ Create a break function using R's ``pretty()`` algorithm.
367
+
368
+ Parameters
369
+ ----------
370
+ n : int, optional
371
+ Desired number of breaks (default 5).
372
+
373
+ Returns
374
+ -------
375
+ callable
376
+ A function ``(x, n=None) -> numpy.ndarray`` that computes break
377
+ positions for data *x*.
378
+
379
+ Examples
380
+ --------
381
+ >>> brk = breaks_pretty(n=5)
382
+ >>> brk([0.5, 9.3])
383
+ array([ 0., 2., 4., 6., 8., 10.])
384
+ """
385
+
386
+ def breaks_fn(x: ArrayLike, n_: Optional[int] = None) -> np.ndarray:
387
+ x = np.asarray(x, dtype=float)
388
+ x = x[np.isfinite(x)]
389
+ if len(x) == 0:
390
+ return np.array([])
391
+ dmin, dmax = float(x.min()), float(x.max())
392
+ k = n_ if n_ is not None else n
393
+ return _pretty(dmin, dmax, n=k)
394
+
395
+ return breaks_fn
396
+
397
+
398
+ def breaks_width(
399
+ width: float,
400
+ offset: float = 0,
401
+ ) -> Callable[[ArrayLike, Optional[int]], np.ndarray]:
402
+ """
403
+ Create a break function with fixed-width intervals.
404
+
405
+ Parameters
406
+ ----------
407
+ width : float
408
+ Distance between consecutive breaks.
409
+ offset : float, optional
410
+ Shift all breaks by this amount (default 0).
411
+
412
+ Returns
413
+ -------
414
+ callable
415
+ A function ``(x, n=None) -> numpy.ndarray`` that computes break
416
+ positions for data *x*.
417
+
418
+ Examples
419
+ --------
420
+ >>> brk = breaks_width(width=0.5)
421
+ >>> brk([0.1, 2.4])
422
+ array([0. , 0.5, 1. , 1.5, 2. , 2.5])
423
+ """
424
+ if width <= 0:
425
+ raise ValueError("`width` must be positive")
426
+
427
+ def breaks_fn(x: ArrayLike, n_: Optional[int] = None) -> np.ndarray:
428
+ x = np.asarray(x, dtype=float)
429
+ x = x[np.isfinite(x)]
430
+ if len(x) == 0:
431
+ return np.array([])
432
+ dmin, dmax = float(x.min()), float(x.max())
433
+
434
+ # Shift by offset, compute grid, shift back
435
+ lo = math.floor((dmin - offset) / width) * width + offset
436
+ hi = math.ceil((dmax - offset) / width) * width + offset
437
+
438
+ result = np.arange(lo, hi + width * 0.5, width)
439
+ # Clean up floating-point dust
440
+ result = np.round(result, decimals=10)
441
+ return result
442
+
443
+ return breaks_fn
444
+
445
+
446
+ _TIMESPAN_UNITS = {
447
+ "secs": 1,
448
+ "mins": 60,
449
+ "hours": 3600,
450
+ "days": 86400,
451
+ "weeks": 604800,
452
+ }
453
+
454
+
455
+ def breaks_timespan(
456
+ unit: str = "secs",
457
+ n: int = 5,
458
+ ) -> Callable[[ArrayLike, Optional[int]], np.ndarray]:
459
+ """
460
+ Create a break function for timespan (duration) data.
461
+
462
+ The data are assumed to be in seconds; breaks are placed at multiples
463
+ of the chosen *unit*.
464
+
465
+ Parameters
466
+ ----------
467
+ unit : str, optional
468
+ One of ``"secs"``, ``"mins"``, ``"hours"``, ``"days"``,
469
+ ``"weeks"`` (default ``"secs"``).
470
+ n : int, optional
471
+ Desired number of breaks (default 5).
472
+
473
+ Returns
474
+ -------
475
+ callable
476
+ A function ``(x, n=None) -> numpy.ndarray`` that computes break
477
+ positions for data *x*.
478
+
479
+ Raises
480
+ ------
481
+ ValueError
482
+ If *unit* is not one of the recognised values.
483
+
484
+ Examples
485
+ --------
486
+ >>> brk = breaks_timespan(unit="mins", n=5)
487
+ >>> brk([0, 7200])
488
+ array([ 0., 60., 120., ...])
489
+ """
490
+ if unit not in _TIMESPAN_UNITS:
491
+ raise ValueError(
492
+ f"Unknown unit {unit!r}. Choose from: "
493
+ f"{', '.join(_TIMESPAN_UNITS)}"
494
+ )
495
+ multiplier = _TIMESPAN_UNITS[unit]
496
+
497
+ def breaks_fn(x: ArrayLike, n_: Optional[int] = None) -> np.ndarray:
498
+ x = np.asarray(x, dtype=float)
499
+ x = x[np.isfinite(x)]
500
+ if len(x) == 0:
501
+ return np.array([])
502
+ dmin, dmax = float(x.min()), float(x.max())
503
+ k = n_ if n_ is not None else n
504
+
505
+ # Scale to unit, compute pretty breaks, scale back
506
+ scaled_min = dmin / multiplier
507
+ scaled_max = dmax / multiplier
508
+ brks = _pretty(scaled_min, scaled_max, n=k)
509
+ return brks * multiplier
510
+
511
+ return breaks_fn
512
+
513
+
514
+ def breaks_exp(
515
+ n: int = 5,
516
+ ) -> Callable[[ArrayLike, Optional[int]], np.ndarray]:
517
+ """
518
+ Create a break function suitable for exponential transformations.
519
+
520
+ For data spanning several orders of magnitude the breaks are placed
521
+ at ``0`` plus the last ``n - 1`` integer powers of 10. For data with
522
+ a smaller range, falls back to :func:`breaks_extended`.
523
+
524
+ Parameters
525
+ ----------
526
+ n : int, optional
527
+ Desired number of breaks (default 5).
528
+
529
+ Returns
530
+ -------
531
+ callable
532
+ A function ``(x, n=None) -> numpy.ndarray`` that computes break
533
+ positions for data *x*.
534
+
535
+ Examples
536
+ --------
537
+ >>> brk = breaks_exp(n=4)
538
+ >>> brk([0.01, 1000])
539
+ array([0.e+00, 1.e+01, 1.e+02, 1.e+03])
540
+ """
541
+
542
+ def breaks_fn(x: ArrayLike, n_: Optional[int] = None) -> np.ndarray:
543
+ x = np.asarray(x, dtype=float)
544
+ x = x[np.isfinite(x)]
545
+ if len(x) == 0:
546
+ return np.array([])
547
+ dmin, dmax = float(x.min()), float(x.max())
548
+ k = n_ if n_ is not None else n
549
+
550
+ # If the range is large (multiple orders of magnitude), use
551
+ # powers of 10.
552
+ if dmax > 0 and dmin >= 0:
553
+ log_max = math.log10(max(dmax, 1e-100))
554
+ log_min = math.log10(max(dmin, 1e-100))
555
+ order_span = log_max - log_min
556
+ else:
557
+ order_span = 0
558
+
559
+ if order_span >= 2:
560
+ # Use powers of 10
561
+ max_power = int(math.ceil(log_max))
562
+ # Take the last (k-1) integer powers, plus 0
563
+ powers = list(range(max(0, max_power - k + 1), max_power + 1))
564
+ brks = [0.0] + [10.0 ** p for p in powers]
565
+ # Trim to k breaks
566
+ brks = brks[-(k):]
567
+ return np.array(brks)
568
+
569
+ # Fall back to extended breaks for small ranges
570
+ return _extended(dmin, dmax, n=k)
571
+
572
+ return breaks_fn
573
+
574
+
575
+ def cbreaks(
576
+ x: ArrayLike,
577
+ breaks_fun: Optional[Callable] = None,
578
+ labels_fun: Optional[Callable] = None,
579
+ ) -> dict[str, Any]:
580
+ """
581
+ Comprehensive breaks (deprecated).
582
+
583
+ .. deprecated:: 0.1.0
584
+ Use the specific break generators directly instead.
585
+
586
+ Parameters
587
+ ----------
588
+ x : array-like
589
+ Data range (length-2 vector ``[min, max]``).
590
+ breaks_fun : callable, optional
591
+ Break function. Defaults to :func:`breaks_extended` ``()``.
592
+ labels_fun : callable, optional
593
+ Label function. If ``None``, labels are the string
594
+ representation of breaks.
595
+
596
+ Returns
597
+ -------
598
+ dict
599
+ Dictionary with keys ``"breaks"`` and ``"labels"``.
600
+ """
601
+ warnings.warn(
602
+ "cbreaks() is deprecated. Use the specific break generators directly.",
603
+ DeprecationWarning,
604
+ stacklevel=2,
605
+ )
606
+ if breaks_fun is None:
607
+ breaks_fun = breaks_extended()
608
+
609
+ brks = breaks_fun(x)
610
+
611
+ if labels_fun is not None:
612
+ labels = labels_fun(brks)
613
+ else:
614
+ labels = [str(b) for b in brks]
615
+
616
+ return {"breaks": brks, "labels": labels}
617
+
618
+
619
+ # ---------------------------------------------------------------------------
620
+ # Legacy aliases
621
+ # ---------------------------------------------------------------------------
622
+
623
+ #: Legacy alias for :func:`breaks_extended`.
624
+ extended_breaks = breaks_extended
625
+
626
+ #: Legacy alias for :func:`breaks_pretty`.
627
+ pretty_breaks = breaks_pretty