scales-python 1.4.0.9000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scales/py.typed ADDED
File without changes
scales/range.py ADDED
@@ -0,0 +1,223 @@
1
+ """
2
+ Mutable range classes for accumulating scale domains.
3
+
4
+ Python port of ``R/range.R`` from the R *scales* package
5
+ (https://github.com/r-lib/scales). The R source uses R6 classes;
6
+ here we use plain Python classes with the same ``train`` / ``reset``
7
+ interface.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Optional, Sequence, Union
13
+
14
+ import numpy as np
15
+ from numpy.typing import ArrayLike
16
+
17
+ __all__ = [
18
+ "Range",
19
+ "ContinuousRange",
20
+ "DiscreteRange",
21
+ ]
22
+
23
+
24
+ class Range:
25
+ """Base range class.
26
+
27
+ Attributes
28
+ ----------
29
+ range : object or None
30
+ The accumulated range. ``None`` until the first ``train()``
31
+ call.
32
+ """
33
+
34
+ def __init__(self) -> None:
35
+ self.range: object = None
36
+
37
+ def train(self, x: ArrayLike, **kwargs) -> None: # pragma: no cover
38
+ """Update the range with new data (implemented by subclasses)."""
39
+ raise NotImplementedError
40
+
41
+ def reset(self) -> None:
42
+ """Reset the range to its initial (empty) state."""
43
+ self.range = None
44
+
45
+
46
+ class ContinuousRange(Range):
47
+ """Mutable continuous range that accumulates via :meth:`train`.
48
+
49
+ An R6-style object that progressively builds a numeric ``(min, max)``
50
+ range across multiple ``train()`` calls.
51
+
52
+ Examples
53
+ --------
54
+ >>> rng = ContinuousRange()
55
+ >>> rng.train([1, 5, 3])
56
+ >>> rng.range
57
+ (1.0, 5.0)
58
+ >>> rng.train([0, 4])
59
+ >>> rng.range
60
+ (0.0, 5.0)
61
+ """
62
+
63
+ def __init__(self) -> None:
64
+ super().__init__()
65
+ self.range: Optional[tuple[float, float]] = None
66
+
67
+ def train(self, x: ArrayLike) -> None:
68
+ """Update the range with new numeric data.
69
+
70
+ Parameters
71
+ ----------
72
+ x : array_like
73
+ Numeric values. Non-finite values (``NaN``, ``Inf``) are
74
+ silently dropped before the range is updated.
75
+ """
76
+ x = np.asarray(x, dtype=float)
77
+ x = x[np.isfinite(x)]
78
+ if len(x) == 0:
79
+ return
80
+ new_range = (float(np.min(x)), float(np.max(x)))
81
+ if self.range is None:
82
+ self.range = new_range
83
+ else:
84
+ self.range = (
85
+ min(self.range[0], new_range[0]),
86
+ max(self.range[1], new_range[1]),
87
+ )
88
+
89
+ def reset(self) -> None:
90
+ """Reset to an empty range."""
91
+ self.range = None
92
+
93
+
94
+ class DiscreteRange(Range):
95
+ """Mutable discrete range (ordered set of unique levels).
96
+
97
+ Mirrors R ``scales::discrete_range`` / ``clevels``
98
+ (scales/R/scale-discrete.R:55-116):
99
+
100
+ * If the input is a pandas Categorical (R factor), its ``categories``
101
+ order is preserved.
102
+ * Otherwise, levels are **sorted alphabetically** (R ``sort(unique(x))``).
103
+ * When combined with an existing range, a factor input keeps its
104
+ order; a non-factor combination is re-sorted.
105
+
106
+ Examples
107
+ --------
108
+ >>> rng = DiscreteRange()
109
+ >>> rng.train(["b", "a", "c"])
110
+ >>> rng.range
111
+ ['a', 'b', 'c']
112
+ >>> rng.train(["d", "a"])
113
+ >>> rng.range
114
+ ['a', 'b', 'c', 'd']
115
+ """
116
+
117
+ def __init__(self) -> None:
118
+ super().__init__()
119
+ self.range: Optional[list] = None
120
+ self._is_factor: bool = False
121
+
122
+ def train(
123
+ self,
124
+ x: Union[ArrayLike, Sequence, "pd.Categorical"],
125
+ drop: bool = False,
126
+ na_rm: bool = False,
127
+ ) -> None:
128
+ """Update the range with new discrete data.
129
+
130
+ Parameters
131
+ ----------
132
+ x : array_like or pandas.Categorical
133
+ Discrete values. If *x* is a :class:`pandas.Categorical`
134
+ its categories are used (respecting order). Otherwise,
135
+ the unique values are sorted alphabetically to match R's
136
+ ``sort(unique(x))`` behaviour in ``clevels()``.
137
+ drop : bool, optional
138
+ If ``True`` and *x* is categorical, unused categories are
139
+ dropped before training (default ``False``).
140
+ na_rm : bool, optional
141
+ If ``True``, ``None`` / ``NaN`` values are removed before
142
+ training (default ``False``).
143
+ """
144
+ new_is_factor = hasattr(x, "categories")
145
+ # Handle pandas Categoricals — factor-style, preserve order.
146
+ if new_is_factor:
147
+ if drop:
148
+ x = x.remove_unused_categories()
149
+ levels = list(x.categories)
150
+ else:
151
+ x = np.asarray(x)
152
+ # R's clevels for non-factor: sort(unique(x))
153
+ seen: set = set()
154
+ uniq: list = []
155
+ for val in x.flat:
156
+ key = val
157
+ if isinstance(val, float) and np.isnan(val):
158
+ key = None
159
+ if key not in seen:
160
+ seen.add(key)
161
+ uniq.append(val)
162
+ # Sort alphabetically (R default). Keep NaN separate —
163
+ # sorted() will raise on mixed None/str, so we strip first
164
+ # and re-append.
165
+ non_na = [v for v in uniq if not (v is None or
166
+ (isinstance(v, float) and np.isnan(v)))]
167
+ na_tail = [v for v in uniq if v is None or
168
+ (isinstance(v, float) and np.isnan(v))]
169
+ try:
170
+ non_na = sorted(non_na)
171
+ except TypeError:
172
+ # Mixed incomparable types — keep insertion order
173
+ pass
174
+ levels = non_na + na_tail
175
+
176
+ # Optionally strip NaN / None
177
+ if na_rm:
178
+ levels = [
179
+ v
180
+ for v in levels
181
+ if not (v is None or (isinstance(v, float) and np.isnan(v)))
182
+ ]
183
+
184
+ if self.range is None:
185
+ # First batch — remember whether it was a factor.
186
+ self.range = levels
187
+ self._is_factor = new_is_factor
188
+ else:
189
+ # Combine with existing range. R discrete_range
190
+ # (scale-discrete.R:82-96): union of old ∪ new_levels.
191
+ # Keep factor order if either side was a factor; else
192
+ # re-sort alphabetically.
193
+ existing_set = set()
194
+ for v in self.range:
195
+ if isinstance(v, float) and np.isnan(v):
196
+ existing_set.add(None)
197
+ else:
198
+ existing_set.add(v)
199
+ combined = list(self.range)
200
+ for v in levels:
201
+ key = None if (isinstance(v, float) and np.isnan(v)) else v
202
+ if key not in existing_set:
203
+ existing_set.add(key)
204
+ combined.append(v)
205
+
206
+ if self._is_factor or new_is_factor:
207
+ self.range = combined
208
+ self._is_factor = True
209
+ else:
210
+ non_na = [v for v in combined if not (v is None or
211
+ (isinstance(v, float) and np.isnan(v)))]
212
+ na_tail = [v for v in combined if v is None or
213
+ (isinstance(v, float) and np.isnan(v))]
214
+ try:
215
+ non_na = sorted(non_na)
216
+ except TypeError:
217
+ pass
218
+ self.range = non_na + na_tail
219
+
220
+ def reset(self) -> None:
221
+ """Reset to an empty range."""
222
+ self.range = None
223
+ self._is_factor = False
@@ -0,0 +1,146 @@
1
+ """
2
+ Continuous-scale helpers: apply and train continuous scales.
3
+
4
+ Python port of ``R/scale-continuous.R`` from the R *scales* package
5
+ (https://github.com/r-lib/scales).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Callable, Optional, Tuple, Union
11
+
12
+ import numpy as np
13
+ from numpy.typing import ArrayLike
14
+
15
+ from .bounds import censor, rescale
16
+ from .transforms import Transform, as_transform
17
+
18
+ __all__ = [
19
+ "cscale",
20
+ "train_continuous",
21
+ ]
22
+
23
+
24
+ def cscale(
25
+ x: ArrayLike,
26
+ palette: Callable[[np.ndarray], np.ndarray],
27
+ na_value: Any = np.nan,
28
+ trans: Optional[Union[Transform, str]] = None,
29
+ oob: Callable[[np.ndarray], np.ndarray] = censor,
30
+ ) -> np.ndarray:
31
+ """Apply a continuous scale to numeric data.
32
+
33
+ Mirrors R's ``cscale`` + ``map_continuous``: transforms *x*, rescales
34
+ to ``[0, 1]``, applies *oob* (censor by default) to that rescaled
35
+ result, then passes it through *palette*. NaNs (including those
36
+ introduced by *oob*) are replaced with *na_value*.
37
+
38
+ Parameters
39
+ ----------
40
+ x : array_like
41
+ Numeric values in data coordinates.
42
+ palette : callable
43
+ A continuous palette function that maps values in ``[0, 1]`` to
44
+ output values (e.g. colours or sizes).
45
+ na_value : any, optional
46
+ Value used for ``NaN`` entries in *x* (default ``np.nan``).
47
+ trans : Transform or str, optional
48
+ If given, *x* is first transformed before rescaling. May be a
49
+ :class:`~scales.transforms.Transform` object or a string name
50
+ recognised by :func:`~scales.transforms.as_transform`.
51
+ oob : callable, optional
52
+ Out-of-bounds handler applied to the rescaled ``[0, 1]`` values
53
+ before the palette. Default is :func:`~scales.bounds.censor`,
54
+ which replaces values outside ``[0, 1]`` with ``NaN`` — matching
55
+ R's ``map_continuous(oob = censor)``. Use
56
+ :func:`~scales.bounds.squish` to clamp instead.
57
+
58
+ Returns
59
+ -------
60
+ numpy.ndarray
61
+ Palette-mapped values, same length as *x*.
62
+
63
+ Examples
64
+ --------
65
+ >>> from scales.palettes import pal_seq_gradient
66
+ >>> cscale([1, 5, 10], pal_seq_gradient("white", "blue"))
67
+ """
68
+ x = np.asarray(x, dtype=float)
69
+
70
+ # 1. Optionally transform
71
+ if trans is not None:
72
+ if isinstance(trans, str):
73
+ trans = as_transform(trans)
74
+ x = trans.transform(x)
75
+
76
+ # 2. Identify NAs *before* rescaling
77
+ na_mask = ~np.isfinite(x)
78
+
79
+ # 3. Rescale to [0, 1] using the finite range of x
80
+ scaled = rescale(x, to=(0.0, 1.0))
81
+
82
+ # 4. Apply OOB handler (default: censor → NaN). After this, any value
83
+ # outside [0, 1] that the user asked to censor becomes NaN.
84
+ scaled = np.asarray(oob(scaled), dtype=float)
85
+ na_mask = na_mask | ~np.isfinite(scaled)
86
+
87
+ # 5. Apply palette
88
+ result = palette(scaled)
89
+ result = np.asarray(result)
90
+
91
+ # 6. Replace NAs
92
+ if np.any(na_mask):
93
+ if result.dtype.kind in ("U", "S", "O"):
94
+ # String / object array
95
+ result = result.astype(object)
96
+ result[na_mask] = na_value
97
+
98
+ return result
99
+
100
+
101
+ def train_continuous(
102
+ new: ArrayLike,
103
+ existing: Optional[Tuple[float, float]] = None,
104
+ ) -> Tuple[float, float]:
105
+ """Train (update) a continuous range with new data.
106
+
107
+ Combines the range of *new* with an *existing* ``(min, max)`` range
108
+ to produce an updated range that spans both.
109
+
110
+ Parameters
111
+ ----------
112
+ new : array_like
113
+ New numeric observations. Non-finite values are ignored.
114
+ existing : tuple of float or None, optional
115
+ Previously computed ``(min, max)`` range. ``None`` indicates
116
+ no prior range.
117
+
118
+ Returns
119
+ -------
120
+ tuple of float
121
+ Updated ``(min, max)`` range.
122
+
123
+ Examples
124
+ --------
125
+ >>> train_continuous([1, 5, 3])
126
+ (1.0, 5.0)
127
+ >>> train_continuous([0, 4], existing=(1.0, 5.0))
128
+ (0.0, 5.0)
129
+ """
130
+ new = np.asarray(new, dtype=float)
131
+ new = new[np.isfinite(new)]
132
+
133
+ if len(new) == 0:
134
+ if existing is None:
135
+ raise ValueError("Cannot train on empty data with no existing range.")
136
+ return existing
137
+
138
+ new_range = (float(np.min(new)), float(np.max(new)))
139
+
140
+ if existing is None:
141
+ return new_range
142
+
143
+ return (
144
+ min(existing[0], new_range[0]),
145
+ max(existing[1], new_range[1]),
146
+ )
@@ -0,0 +1,196 @@
1
+ """
2
+ Discrete-scale helpers: apply and train discrete scales.
3
+
4
+ Python port of ``R/scale-discrete.R`` from the R *scales* package
5
+ (https://github.com/r-lib/scales).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Callable, List, Optional, Sequence, Union
11
+
12
+ import numpy as np
13
+ from numpy.typing import ArrayLike
14
+
15
+ __all__ = [
16
+ "dscale",
17
+ "train_discrete",
18
+ ]
19
+
20
+
21
+ def dscale(
22
+ x: ArrayLike,
23
+ palette: Callable[[int], Any],
24
+ na_value: Any = None,
25
+ ) -> np.ndarray:
26
+ """Apply a discrete scale to categorical data.
27
+
28
+ Maps each unique level of *x* to a palette output, then broadcasts
29
+ back to the full length of *x*.
30
+
31
+ Parameters
32
+ ----------
33
+ x : array_like
34
+ Discrete (categorical) values. May be strings, integers, or a
35
+ :class:`pandas.Categorical`.
36
+ palette : callable
37
+ A discrete palette function that takes an integer *n* (number
38
+ of levels) and returns a sequence of *n* output values.
39
+ na_value : any, optional
40
+ Value used for ``None`` / ``NaN`` entries in *x* (default
41
+ ``None``).
42
+
43
+ Returns
44
+ -------
45
+ numpy.ndarray
46
+ Palette-mapped values, same length as *x*.
47
+
48
+ Examples
49
+ --------
50
+ >>> from scales.palettes import pal_brewer
51
+ >>> dscale(["a", "b", "a", "c"], pal_brewer())
52
+ """
53
+ # Determine levels (ordered unique values)
54
+ if hasattr(x, "categories"):
55
+ # pandas Categorical
56
+ levels = list(x.categories)
57
+ x_arr = np.asarray(x)
58
+ else:
59
+ x_arr = np.asarray(x)
60
+ # Preserve first-appearance order
61
+ seen: set = set()
62
+ levels: list = []
63
+ for val in x_arr.flat:
64
+ key = _na_key(val)
65
+ if key not in seen:
66
+ seen.add(key)
67
+ if not _is_na(val):
68
+ levels.append(val)
69
+
70
+ n = len(levels)
71
+ if n == 0:
72
+ return np.full(x_arr.shape, na_value, dtype=object)
73
+
74
+ # Get palette colours / values for n levels
75
+ pal_values = palette(n)
76
+ pal_values = np.asarray(pal_values)
77
+
78
+ # Build lookup: level -> palette value
79
+ lookup: dict = {}
80
+ for i, lev in enumerate(levels):
81
+ lookup[lev] = pal_values[i] if i < len(pal_values) else na_value
82
+
83
+ # Map x through the lookup
84
+ result = np.empty(x_arr.shape, dtype=pal_values.dtype if len(pal_values) > 0 else object)
85
+ for idx in np.ndindex(x_arr.shape):
86
+ val = x_arr[idx]
87
+ if _is_na(val):
88
+ result[idx] = na_value
89
+ else:
90
+ result[idx] = lookup.get(val, na_value)
91
+
92
+ return result
93
+
94
+
95
+ def train_discrete(
96
+ new: Union[ArrayLike, Sequence],
97
+ existing: Optional[List] = None,
98
+ drop: bool = False,
99
+ na_rm: bool = False,
100
+ ) -> list:
101
+ """Train (update) a discrete range with new data.
102
+
103
+ Combines the unique levels of *new* with an *existing* level list
104
+ to produce an updated set of levels (preserving order of first
105
+ appearance).
106
+
107
+ Parameters
108
+ ----------
109
+ new : array_like or sequence
110
+ New discrete observations.
111
+ existing : list or None, optional
112
+ Previously computed list of levels. ``None`` indicates no
113
+ prior levels.
114
+ drop : bool, optional
115
+ If ``True`` and *new* is a :class:`pandas.Categorical`,
116
+ unused categories are dropped (default ``False``).
117
+ na_rm : bool, optional
118
+ If ``True``, ``None`` / ``NaN`` values are removed from the
119
+ result (default ``False``).
120
+
121
+ Returns
122
+ -------
123
+ list
124
+ Updated list of unique levels.
125
+
126
+ Examples
127
+ --------
128
+ >>> train_discrete(["a", "b", "c"])
129
+ ['a', 'b', 'c']
130
+ >>> train_discrete(["b", "d"], existing=["a", "b", "c"])
131
+ ['a', 'b', 'c', 'd']
132
+ """
133
+ # Extract levels from new data.
134
+ # R semantics: non-factor input is `sort(unique(...))`; Categorical
135
+ # (factor) input preserves its defined order.
136
+ existing_is_factor = hasattr(existing, "categories")
137
+ new_is_factor = hasattr(new, "categories")
138
+
139
+ if new_is_factor:
140
+ if drop:
141
+ new = new.remove_unused_categories()
142
+ new_levels = list(new.categories)
143
+ else:
144
+ arr = np.asarray(new)
145
+ seen: set = set()
146
+ uniq: list = []
147
+ for val in arr.flat:
148
+ key = _na_key(val)
149
+ if key not in seen:
150
+ seen.add(key)
151
+ uniq.append(val)
152
+ new_levels = uniq
153
+
154
+ if na_rm:
155
+ new_levels = [v for v in new_levels if not _is_na(v)]
156
+
157
+ if existing is None:
158
+ if new_is_factor:
159
+ return new_levels
160
+ # Non-factor: sort alphabetically per R's clevels().
161
+ return sorted(new_levels, key=lambda v: (v is None, str(v)))
162
+
163
+ existing_keys = {_na_key(v) for v in existing}
164
+ merged = list(existing)
165
+ for v in new_levels:
166
+ key = _na_key(v)
167
+ if key not in existing_keys:
168
+ existing_keys.add(key)
169
+ merged.append(v)
170
+
171
+ # When neither side is a factor, R re-sorts the union.
172
+ if not (existing_is_factor or new_is_factor):
173
+ merged = sorted(merged, key=lambda v: (v is None, str(v)))
174
+
175
+ return merged
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # Internal helpers
180
+ # ---------------------------------------------------------------------------
181
+
182
+ def _is_na(val: Any) -> bool:
183
+ """Check if a value is NA-like (None or NaN)."""
184
+ if val is None:
185
+ return True
186
+ try:
187
+ return np.isnan(val)
188
+ except (TypeError, ValueError):
189
+ return False
190
+
191
+
192
+ def _na_key(val: Any) -> Any:
193
+ """Return a hashable key, mapping all NA variants to ``None``."""
194
+ if _is_na(val):
195
+ return None
196
+ return val