rangeable 1.0.0__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rangeable-1.0.0 → rangeable-2.0.0}/PKG-INFO +7 -5
- {rangeable-1.0.0 → rangeable-2.0.0}/README.md +6 -4
- {rangeable-1.0.0 → rangeable-2.0.0}/pyproject.toml +1 -1
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/__init__.py +1 -0
- rangeable-2.0.0/src/rangeable/_core.py +532 -0
- rangeable-2.0.0/src/rangeable/_disjoint_set.py +290 -0
- rangeable-1.0.0/src/rangeable/_core.py +0 -179
- rangeable-1.0.0/src/rangeable/_disjoint_set.py +0 -104
- {rangeable-1.0.0 → rangeable-2.0.0}/.gitignore +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/CHANGELOG.md +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/LICENSE +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/_boundary_index.py +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/_errors.py +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/_interval.py +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/_slot.py +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/_transition.py +0 -0
- {rangeable-1.0.0 → rangeable-2.0.0}/src/rangeable/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rangeable
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Hashable-element interval set with first-insert ordered active queries.
|
|
5
5
|
Project-URL: Homepage, https://github.com/ZhgChgLi/PythonRangeable
|
|
6
6
|
Project-URL: Source, https://github.com/ZhgChgLi/PythonRangeable
|
|
@@ -108,14 +108,16 @@ See [RangeableRFC](https://github.com/ZhgChgLi/RangeableRFC) § 4 for normative
|
|
|
108
108
|
|
|
109
109
|
## Cross-language consistency
|
|
110
110
|
|
|
111
|
-
This Python implementation
|
|
111
|
+
This Python implementation joins the [Ruby](https://github.com/ZhgChgLi/RubyRangeable), [Swift](https://github.com/ZhgChgLi/SwiftRangeable), [JS](https://github.com/ZhgChgLi/JSRangeable), [Kotlin](https://github.com/ZhgChgLi/KotlinRangeable) and [Go](https://github.com/ZhgChgLi/GoRangeable) implementations. All six share a 160-op / 86-probe JSON fixture and produce byte-identical outputs.
|
|
112
112
|
|
|
113
113
|
## See also
|
|
114
114
|
|
|
115
115
|
- **[RangeableRFC](https://github.com/ZhgChgLi/RangeableRFC)** — normative specification.
|
|
116
|
-
- **[RubyRangeable](https://github.com/ZhgChgLi/RubyRangeable)** —
|
|
117
|
-
- **[SwiftRangeable](https://github.com/ZhgChgLi/SwiftRangeable)** —
|
|
118
|
-
- **[JSRangeable](https://github.com/ZhgChgLi/JSRangeable)** —
|
|
116
|
+
- **[RubyRangeable](https://github.com/ZhgChgLi/RubyRangeable)** — Ruby reference (`gem install rangeable`).
|
|
117
|
+
- **[SwiftRangeable](https://github.com/ZhgChgLi/SwiftRangeable)** — Swift reference (SPM).
|
|
118
|
+
- **[JSRangeable](https://github.com/ZhgChgLi/JSRangeable)** — TypeScript reference (`npm i rangeable-js`).
|
|
119
|
+
- **[KotlinRangeable](https://github.com/ZhgChgLi/KotlinRangeable)** — Kotlin/JVM reference (JitPack).
|
|
120
|
+
- **[GoRangeable](https://github.com/ZhgChgLi/GoRangeable)** — Go reference (`go get github.com/ZhgChgLi/GoRangeable`).
|
|
119
121
|
|
|
120
122
|
## Development
|
|
121
123
|
|
|
@@ -77,14 +77,16 @@ See [RangeableRFC](https://github.com/ZhgChgLi/RangeableRFC) § 4 for normative
|
|
|
77
77
|
|
|
78
78
|
## Cross-language consistency
|
|
79
79
|
|
|
80
|
-
This Python implementation
|
|
80
|
+
This Python implementation joins the [Ruby](https://github.com/ZhgChgLi/RubyRangeable), [Swift](https://github.com/ZhgChgLi/SwiftRangeable), [JS](https://github.com/ZhgChgLi/JSRangeable), [Kotlin](https://github.com/ZhgChgLi/KotlinRangeable) and [Go](https://github.com/ZhgChgLi/GoRangeable) implementations. All six share a 160-op / 86-probe JSON fixture and produce byte-identical outputs.
|
|
81
81
|
|
|
82
82
|
## See also
|
|
83
83
|
|
|
84
84
|
- **[RangeableRFC](https://github.com/ZhgChgLi/RangeableRFC)** — normative specification.
|
|
85
|
-
- **[RubyRangeable](https://github.com/ZhgChgLi/RubyRangeable)** —
|
|
86
|
-
- **[SwiftRangeable](https://github.com/ZhgChgLi/SwiftRangeable)** —
|
|
87
|
-
- **[JSRangeable](https://github.com/ZhgChgLi/JSRangeable)** —
|
|
85
|
+
- **[RubyRangeable](https://github.com/ZhgChgLi/RubyRangeable)** — Ruby reference (`gem install rangeable`).
|
|
86
|
+
- **[SwiftRangeable](https://github.com/ZhgChgLi/SwiftRangeable)** — Swift reference (SPM).
|
|
87
|
+
- **[JSRangeable](https://github.com/ZhgChgLi/JSRangeable)** — TypeScript reference (`npm i rangeable-js`).
|
|
88
|
+
- **[KotlinRangeable](https://github.com/ZhgChgLi/KotlinRangeable)** — Kotlin/JVM reference (JitPack).
|
|
89
|
+
- **[GoRangeable](https://github.com/ZhgChgLi/GoRangeable)** — Go reference (`go get github.com/ZhgChgLi/GoRangeable`).
|
|
88
90
|
|
|
89
91
|
## Development
|
|
90
92
|
|
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
"""Main Rangeable container."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Generic, Hashable, Iterator, TypeVar
|
|
6
|
+
|
|
7
|
+
from ._boundary_index import BoundaryIndex
|
|
8
|
+
from ._disjoint_set import (
|
|
9
|
+
DisjointSet,
|
|
10
|
+
InsertResult,
|
|
11
|
+
RemoveResult,
|
|
12
|
+
intersect_disjoint_lists,
|
|
13
|
+
merge_disjoint_lists,
|
|
14
|
+
subtract_disjoint_lists,
|
|
15
|
+
)
|
|
16
|
+
from ._errors import InvalidIntervalError
|
|
17
|
+
from ._interval import Interval
|
|
18
|
+
from ._slot import Slot
|
|
19
|
+
from ._transition import TransitionEvent
|
|
20
|
+
|
|
21
|
+
E = TypeVar("E", bound=Hashable)
|
|
22
|
+
|
|
23
|
+
_EMPTY_OBJS: tuple = ()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Rangeable(Generic[E]):
|
|
27
|
+
"""Generic, integer-coordinate, closed-interval set container.
|
|
28
|
+
|
|
29
|
+
Pairs hashable elements with their merged disjoint integer ranges
|
|
30
|
+
and supports three query families:
|
|
31
|
+
|
|
32
|
+
* by-element via :meth:`get_range`
|
|
33
|
+
* by-position via ``r[i]`` / :meth:`active_at`
|
|
34
|
+
* by-range via :meth:`transitions`
|
|
35
|
+
|
|
36
|
+
See `RFC §3 <https://github.com/ZhgChgLi/RangeableRFC>`_ for the
|
|
37
|
+
full normative API surface.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
__slots__ = (
|
|
41
|
+
"_intervals",
|
|
42
|
+
"_insertion_order",
|
|
43
|
+
"_ord",
|
|
44
|
+
"_version",
|
|
45
|
+
"_event_index",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def __init__(self) -> None:
|
|
49
|
+
self._intervals: dict[E, DisjointSet] = {}
|
|
50
|
+
self._insertion_order: list[E] = []
|
|
51
|
+
self._ord: dict[E, int] = {}
|
|
52
|
+
self._version: int = 0
|
|
53
|
+
self._event_index: BoundaryIndex[E] | None = None
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def empty(cls) -> "Rangeable[E]":
|
|
57
|
+
"""Sugar matching the RFC §3.1 ``Rangeable.empty()`` alias."""
|
|
58
|
+
return cls()
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def version(self) -> int:
|
|
62
|
+
return self._version
|
|
63
|
+
|
|
64
|
+
def insert(self, element: E, *, start: int, end: int) -> "Rangeable[E]":
|
|
65
|
+
"""Insert ``element`` covering the closed interval ``[start, end]``.
|
|
66
|
+
|
|
67
|
+
Idempotent per RFC §3.2: re-inserting a sub-range that is already
|
|
68
|
+
fully contained leaves the container unchanged and does NOT bump
|
|
69
|
+
:attr:`version`.
|
|
70
|
+
|
|
71
|
+
Raises :class:`InvalidIntervalError` if ``start > end``.
|
|
72
|
+
|
|
73
|
+
Returns ``self`` for chaining.
|
|
74
|
+
"""
|
|
75
|
+
if start > end:
|
|
76
|
+
raise InvalidIntervalError(f"start ({start}) > end ({end})")
|
|
77
|
+
|
|
78
|
+
ds = self._intervals.get(element)
|
|
79
|
+
if ds is None:
|
|
80
|
+
ds = DisjointSet()
|
|
81
|
+
self._intervals[element] = ds
|
|
82
|
+
self._insertion_order.append(element)
|
|
83
|
+
self._ord[element] = len(self._insertion_order)
|
|
84
|
+
|
|
85
|
+
result = ds.insert(start, end)
|
|
86
|
+
if result == InsertResult.MUTATED:
|
|
87
|
+
self._version += 1
|
|
88
|
+
self._event_index = None
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __getitem__(self, i: int) -> Slot[E]:
|
|
92
|
+
"""Active-element list at ``i``. RFC §3.3.
|
|
93
|
+
|
|
94
|
+
O(log |segments| + r) once the index is built. Returns an empty
|
|
95
|
+
:class:`Slot` for coordinates outside every segment.
|
|
96
|
+
"""
|
|
97
|
+
self._ensure_event_index_fresh()
|
|
98
|
+
assert self._event_index is not None
|
|
99
|
+
seg = self._event_index.segment_at(i)
|
|
100
|
+
if seg is None:
|
|
101
|
+
return Slot(_EMPTY_OBJS)
|
|
102
|
+
return Slot(seg.active)
|
|
103
|
+
|
|
104
|
+
def active_at(self, *, index: int) -> Slot[E]:
|
|
105
|
+
"""Same as ``self[index]``, named to match RFC §3.3."""
|
|
106
|
+
return self[index]
|
|
107
|
+
|
|
108
|
+
def get_range(self, element: E) -> list[tuple[int, int]]:
|
|
109
|
+
"""Merged ranges for ``element`` as ``[(lo, hi), ...]``. RFC §3.4.
|
|
110
|
+
|
|
111
|
+
Returns an empty list when the element has never been inserted.
|
|
112
|
+
"""
|
|
113
|
+
ds = self._intervals.get(element)
|
|
114
|
+
if ds is None:
|
|
115
|
+
return []
|
|
116
|
+
return ds.to_pairs()
|
|
117
|
+
|
|
118
|
+
def transitions(self, *, lo: int, hi: int | None) -> list[TransitionEvent[E]]:
|
|
119
|
+
"""Open / close events within the inclusive coordinate range
|
|
120
|
+
``[lo, hi]``. RFC §3.5.
|
|
121
|
+
|
|
122
|
+
``hi=None`` means +∞ (include all events through the upper bound).
|
|
123
|
+
|
|
124
|
+
Raises :class:`InvalidIntervalError` if ``lo > hi`` or ``lo`` is
|
|
125
|
+
``None``.
|
|
126
|
+
"""
|
|
127
|
+
if lo is None:
|
|
128
|
+
raise InvalidIntervalError("transitions: lo must not be None")
|
|
129
|
+
if hi is not None and lo > hi:
|
|
130
|
+
raise InvalidIntervalError(f"lo ({lo}) > hi ({hi})")
|
|
131
|
+
|
|
132
|
+
self._ensure_event_index_fresh()
|
|
133
|
+
assert self._event_index is not None
|
|
134
|
+
upper = None if hi is None else hi + 1
|
|
135
|
+
return self._event_index.events_in_range(lo, upper)
|
|
136
|
+
|
|
137
|
+
def __len__(self) -> int:
|
|
138
|
+
"""Number of distinct equivalence-class elements ever inserted."""
|
|
139
|
+
return len(self._insertion_order)
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def count(self) -> int:
|
|
143
|
+
return len(self._insertion_order)
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def empty(self) -> bool:
|
|
147
|
+
return not self._insertion_order
|
|
148
|
+
|
|
149
|
+
def __bool__(self) -> bool:
|
|
150
|
+
return bool(self._insertion_order)
|
|
151
|
+
|
|
152
|
+
def __iter__(self) -> Iterator[tuple[E, list[tuple[int, int]]]]:
|
|
153
|
+
"""Yield ``(element, ranges)`` pairs in insertion-order ascending."""
|
|
154
|
+
for element in self._insertion_order:
|
|
155
|
+
yield element, self._intervals[element].to_pairs()
|
|
156
|
+
|
|
157
|
+
def copy(self) -> "Rangeable[E]":
|
|
158
|
+
"""Deep copy. Mutation on the copy MUST NOT affect this instance,
|
|
159
|
+
and vice versa.
|
|
160
|
+
"""
|
|
161
|
+
dup = Rangeable[E]()
|
|
162
|
+
for element in self._insertion_order:
|
|
163
|
+
dup._replant(element, self._intervals[element], self._ord[element])
|
|
164
|
+
dup._version = self._version
|
|
165
|
+
return dup
|
|
166
|
+
|
|
167
|
+
def __copy__(self) -> "Rangeable[E]":
|
|
168
|
+
return self.copy()
|
|
169
|
+
|
|
170
|
+
def __deepcopy__(self, memo: dict) -> "Rangeable[E]":
|
|
171
|
+
return self.copy()
|
|
172
|
+
|
|
173
|
+
def _ensure_event_index_fresh(self) -> None:
|
|
174
|
+
if self._event_index is not None and self._event_index.version == self._version:
|
|
175
|
+
return
|
|
176
|
+
v_start = self._version
|
|
177
|
+
rebuilt = BoundaryIndex.build(self._intervals, self._ord, v_start)
|
|
178
|
+
if self._version == v_start:
|
|
179
|
+
self._event_index = rebuilt
|
|
180
|
+
|
|
181
|
+
def _replant(self, element: E, source_set: DisjointSet, source_ord: int) -> None:
|
|
182
|
+
new_set = DisjointSet()
|
|
183
|
+
for iv in source_set:
|
|
184
|
+
new_set.insert(iv.lo, iv.hi)
|
|
185
|
+
self._intervals[element] = new_set
|
|
186
|
+
self._insertion_order.append(element)
|
|
187
|
+
self._ord[element] = source_ord
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------ #
|
|
190
|
+
# v2 — Removal API (RFC §6.6–§6.9, §10.B)
|
|
191
|
+
# ------------------------------------------------------------------ #
|
|
192
|
+
|
|
193
|
+
def remove(self, element: E, *, start: int, end: int) -> "Rangeable[E]":
|
|
194
|
+
"""Remove the closed interval ``[start, end]`` from ``R(element)``.
|
|
195
|
+
|
|
196
|
+
RFC §6.6. Idempotent per §4.10 (N3): if ``element`` is absent or
|
|
197
|
+
``[start, end]`` does not overlap any existing interval, this is a
|
|
198
|
+
no-op and :attr:`version` MUST NOT bump.
|
|
199
|
+
|
|
200
|
+
If the removal empties ``R(element)`` it is eagerly pruned per
|
|
201
|
+
§4.10 (N1): the key is dropped from ``intervals``, removed from
|
|
202
|
+
``insertion_order``, and surviving elements' ``ord`` are densely
|
|
203
|
+
renumbered.
|
|
204
|
+
|
|
205
|
+
Raises :class:`InvalidIntervalError` if ``start > end``. Returns
|
|
206
|
+
``self`` for chaining.
|
|
207
|
+
"""
|
|
208
|
+
if start > end:
|
|
209
|
+
raise InvalidIntervalError(f"start ({start}) > end ({end})")
|
|
210
|
+
|
|
211
|
+
ds = self._intervals.get(element)
|
|
212
|
+
if ds is None:
|
|
213
|
+
return self # §6.6 step 2: no R(e) to subtract from.
|
|
214
|
+
|
|
215
|
+
result = ds.remove(start, end)
|
|
216
|
+
if result == RemoveResult.IDEMPOTENT:
|
|
217
|
+
return self
|
|
218
|
+
if result == RemoveResult.MUTATED_BECAME_EMPTY:
|
|
219
|
+
self._excise_element(element)
|
|
220
|
+
# Both MUTATED and MUTATED_BECAME_EMPTY paths bump version once.
|
|
221
|
+
self._version += 1
|
|
222
|
+
self._event_index = None
|
|
223
|
+
return self
|
|
224
|
+
|
|
225
|
+
def remove_element(self, element: E) -> "Rangeable[E]":
|
|
226
|
+
"""Excise ``element`` and its entire ``R(element)``. RFC §6.7.
|
|
227
|
+
|
|
228
|
+
Idempotent per §4.10 (N3): no-op (no version bump) when the element
|
|
229
|
+
is absent. Returns ``self`` for chaining.
|
|
230
|
+
"""
|
|
231
|
+
if element not in self._intervals:
|
|
232
|
+
return self
|
|
233
|
+
self._excise_element(element)
|
|
234
|
+
self._version += 1
|
|
235
|
+
self._event_index = None
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def __delitem__(self, key: E) -> None:
|
|
239
|
+
"""``del r[e]`` is sugar for :meth:`remove_element`. RFC §6.7.
|
|
240
|
+
|
|
241
|
+
Note: ``r[i]`` is the integer-subscript probe returning a
|
|
242
|
+
:class:`Slot`, so ``del r[e]`` only makes sense when ``e`` is an
|
|
243
|
+
element instance, not an index. Mixing the two APIs is a caller
|
|
244
|
+
bug.
|
|
245
|
+
"""
|
|
246
|
+
self.remove_element(key)
|
|
247
|
+
|
|
248
|
+
def clear(self) -> "Rangeable[E]":
|
|
249
|
+
"""Reset to the empty container. RFC §6.8.
|
|
250
|
+
|
|
251
|
+
Idempotent per §4.10 (N3): clearing an already-empty container is
|
|
252
|
+
a no-op (no version bump). Returns ``self`` for chaining.
|
|
253
|
+
"""
|
|
254
|
+
if not self._intervals:
|
|
255
|
+
return self
|
|
256
|
+
self._intervals = {}
|
|
257
|
+
self._insertion_order = []
|
|
258
|
+
self._ord = {}
|
|
259
|
+
self._version += 1
|
|
260
|
+
self._event_index = None
|
|
261
|
+
return self
|
|
262
|
+
|
|
263
|
+
def remove_ranges(self, *, start: int, end: int) -> "Rangeable[E]":
|
|
264
|
+
"""Apply ``remove(e, start, end)`` for every ``e`` atomically.
|
|
265
|
+
|
|
266
|
+
RFC §6.9. A single :attr:`version` bump for the entire op; eager
|
|
267
|
+
pruning happens for every element whose ``R(e)`` becomes empty.
|
|
268
|
+
|
|
269
|
+
Raises :class:`InvalidIntervalError` if ``start > end`` BEFORE any
|
|
270
|
+
mutation. Returns ``self`` for chaining.
|
|
271
|
+
"""
|
|
272
|
+
if start > end:
|
|
273
|
+
raise InvalidIntervalError(f"start ({start}) > end ({end})")
|
|
274
|
+
|
|
275
|
+
any_change = False
|
|
276
|
+
# Iterate a snapshot — we mutate `self._intervals` in the loop.
|
|
277
|
+
for element in list(self._insertion_order):
|
|
278
|
+
ds = self._intervals[element]
|
|
279
|
+
result = ds.remove(start, end)
|
|
280
|
+
if result == RemoveResult.IDEMPOTENT:
|
|
281
|
+
continue
|
|
282
|
+
any_change = True
|
|
283
|
+
if result == RemoveResult.MUTATED_BECAME_EMPTY:
|
|
284
|
+
# DEFER insertion_order / ord rebuild until the loop ends
|
|
285
|
+
# to avoid O(E^2). Just drop the intervals key here.
|
|
286
|
+
del self._intervals[element]
|
|
287
|
+
|
|
288
|
+
if not any_change:
|
|
289
|
+
return self
|
|
290
|
+
|
|
291
|
+
# Single-pass dense rebuild of insertion_order + ord (§6.9 step 4).
|
|
292
|
+
survivors = [e for e in self._insertion_order if e in self._intervals]
|
|
293
|
+
self._insertion_order = survivors
|
|
294
|
+
self._ord = {e: i + 1 for i, e in enumerate(survivors)}
|
|
295
|
+
self._version += 1
|
|
296
|
+
self._event_index = None
|
|
297
|
+
return self
|
|
298
|
+
|
|
299
|
+
def _excise_element(self, element: E) -> None:
|
|
300
|
+
"""Excise ``element`` from intervals + insertion_order + ord with
|
|
301
|
+
a dense ``ord`` renumber over the survivors past its position.
|
|
302
|
+
|
|
303
|
+
Caller is responsible for the single ``version`` bump and
|
|
304
|
+
``event_index`` invalidation that wraps the whole op.
|
|
305
|
+
"""
|
|
306
|
+
del self._intervals[element]
|
|
307
|
+
idx = self._insertion_order.index(element)
|
|
308
|
+
del self._insertion_order[idx]
|
|
309
|
+
del self._ord[element]
|
|
310
|
+
# Densely renumber ord for survivors at positions >= idx.
|
|
311
|
+
for i in range(idx, len(self._insertion_order)):
|
|
312
|
+
self._ord[self._insertion_order[i]] -= 1
|
|
313
|
+
|
|
314
|
+
# ------------------------------------------------------------------ #
|
|
315
|
+
# v2 — Set Operations API (RFC §6.10–§6.13, §10.C–§10.G)
|
|
316
|
+
# ------------------------------------------------------------------ #
|
|
317
|
+
|
|
318
|
+
def union(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
319
|
+
"""Per-element union with ``other``. RFC §6.10.
|
|
320
|
+
|
|
321
|
+
Returns a fresh :class:`Rangeable` with ``version == 0``;
|
|
322
|
+
``self`` and ``other`` are unchanged. Insertion order: preserve
|
|
323
|
+
``self``'s order, then tail-append keys in
|
|
324
|
+
``keys(other) ∖ keys(self)`` in ``other``'s insertion-order order.
|
|
325
|
+
"""
|
|
326
|
+
out: Rangeable[E] = Rangeable()
|
|
327
|
+
# Step 1: walk self.insertion_order — every key in self appears.
|
|
328
|
+
for element in self._insertion_order:
|
|
329
|
+
list_self = self._intervals[element]._entries
|
|
330
|
+
other_ds = other._intervals.get(element)
|
|
331
|
+
if other_ds is None:
|
|
332
|
+
merged_entries = list(list_self)
|
|
333
|
+
else:
|
|
334
|
+
merged_entries = merge_disjoint_lists(
|
|
335
|
+
list_self, other_ds._entries
|
|
336
|
+
)
|
|
337
|
+
out._populate(element, merged_entries)
|
|
338
|
+
# Step 2: tail-append keys in other ∖ self.
|
|
339
|
+
for element in other._insertion_order:
|
|
340
|
+
if element in self._intervals:
|
|
341
|
+
continue
|
|
342
|
+
merged_entries = list(other._intervals[element]._entries)
|
|
343
|
+
out._populate(element, merged_entries)
|
|
344
|
+
return out
|
|
345
|
+
|
|
346
|
+
def intersection(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
347
|
+
"""Per-element intersection with ``other``. RFC §6.11.
|
|
348
|
+
|
|
349
|
+
Returns a fresh :class:`Rangeable`. Empty per-element results are
|
|
350
|
+
eagerly pruned (§4.10 N1). Insertion order: ``self``'s order over
|
|
351
|
+
surviving keys with densely renumbered ``ord``.
|
|
352
|
+
"""
|
|
353
|
+
out: Rangeable[E] = Rangeable()
|
|
354
|
+
for element in self._insertion_order:
|
|
355
|
+
other_ds = other._intervals.get(element)
|
|
356
|
+
if other_ds is None:
|
|
357
|
+
continue
|
|
358
|
+
intersected = intersect_disjoint_lists(
|
|
359
|
+
self._intervals[element]._entries, other_ds._entries
|
|
360
|
+
)
|
|
361
|
+
if not intersected:
|
|
362
|
+
continue # eager prune (§4.10 N1)
|
|
363
|
+
out._populate(element, intersected)
|
|
364
|
+
return out
|
|
365
|
+
|
|
366
|
+
def difference(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
367
|
+
"""Per-element ``self ∖ other``. RFC §6.12.
|
|
368
|
+
|
|
369
|
+
Returns a fresh :class:`Rangeable`. Empty results pruned (§4.10).
|
|
370
|
+
Insertion order: ``self``'s order over survivors, dense ``ord``.
|
|
371
|
+
"""
|
|
372
|
+
out: Rangeable[E] = Rangeable()
|
|
373
|
+
for element in self._insertion_order:
|
|
374
|
+
list_self = self._intervals[element]._entries
|
|
375
|
+
other_ds = other._intervals.get(element)
|
|
376
|
+
if other_ds is None or not other_ds._entries:
|
|
377
|
+
remaining = list(list_self)
|
|
378
|
+
else:
|
|
379
|
+
remaining = subtract_disjoint_lists(
|
|
380
|
+
list_self, other_ds._entries
|
|
381
|
+
)
|
|
382
|
+
if not remaining:
|
|
383
|
+
continue # eager prune
|
|
384
|
+
out._populate(element, remaining)
|
|
385
|
+
return out
|
|
386
|
+
|
|
387
|
+
def symmetric_difference(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
388
|
+
"""Per-element ``self △ other``. RFC §6.13.
|
|
389
|
+
|
|
390
|
+
Returns a fresh :class:`Rangeable`. Implemented via the algebraic
|
|
391
|
+
identity ``(self ∖ other) ∪ (other ∖ self)`` per element with
|
|
392
|
+
``merge_disjoint_lists`` to collapse the adjacency case (§6.13
|
|
393
|
+
worked example: ``[(0,5)] △ [(6,10)] == [(0,10)]``).
|
|
394
|
+
"""
|
|
395
|
+
out: Rangeable[E] = Rangeable()
|
|
396
|
+
# Step 1: self-primary keys.
|
|
397
|
+
for element in self._insertion_order:
|
|
398
|
+
list_self = self._intervals[element]._entries
|
|
399
|
+
other_ds = other._intervals.get(element)
|
|
400
|
+
if other_ds is None:
|
|
401
|
+
# b is empty; sym = a = list_self.
|
|
402
|
+
out._populate(element, list(list_self))
|
|
403
|
+
continue
|
|
404
|
+
list_other = other_ds._entries
|
|
405
|
+
a = subtract_disjoint_lists(list_self, list_other)
|
|
406
|
+
b = subtract_disjoint_lists(list_other, list_self)
|
|
407
|
+
sym = merge_disjoint_lists(a, b)
|
|
408
|
+
if not sym:
|
|
409
|
+
continue # eager prune (§4.10 N1)
|
|
410
|
+
out._populate(element, sym)
|
|
411
|
+
# Step 2: other-only keys.
|
|
412
|
+
for element in other._insertion_order:
|
|
413
|
+
if element in self._intervals:
|
|
414
|
+
continue
|
|
415
|
+
sym = list(other._intervals[element]._entries)
|
|
416
|
+
if not sym:
|
|
417
|
+
continue # defensive; unreachable under (I1.4)
|
|
418
|
+
out._populate(element, sym)
|
|
419
|
+
return out
|
|
420
|
+
|
|
421
|
+
# -------------------------- Mutating set ops ---------------------- #
|
|
422
|
+
|
|
423
|
+
def update(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
424
|
+
"""In-place union (``self`` becomes ``self ∪ other``). RFC §6.10.
|
|
425
|
+
|
|
426
|
+
Idempotent: if the result is structurally equal to ``self`` the
|
|
427
|
+
version MUST NOT bump (set-naming convention; mirrors §3.2's
|
|
428
|
+
idempotence rule). Returns ``self`` for chaining.
|
|
429
|
+
"""
|
|
430
|
+
result = self.union(other)
|
|
431
|
+
self._adopt_if_changed(result)
|
|
432
|
+
return self
|
|
433
|
+
|
|
434
|
+
def intersection_update(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
435
|
+
"""In-place intersection. RFC §6.11.
|
|
436
|
+
|
|
437
|
+
No-op if result is structurally equal to ``self``. Returns ``self``.
|
|
438
|
+
"""
|
|
439
|
+
result = self.intersection(other)
|
|
440
|
+
self._adopt_if_changed(result)
|
|
441
|
+
return self
|
|
442
|
+
|
|
443
|
+
def difference_update(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
444
|
+
"""In-place ``self := self ∖ other``. RFC §6.12. Returns ``self``."""
|
|
445
|
+
result = self.difference(other)
|
|
446
|
+
self._adopt_if_changed(result)
|
|
447
|
+
return self
|
|
448
|
+
|
|
449
|
+
def symmetric_difference_update(
|
|
450
|
+
self, other: "Rangeable[E]"
|
|
451
|
+
) -> "Rangeable[E]":
|
|
452
|
+
"""In-place ``self := self △ other``. RFC §6.13. Returns ``self``."""
|
|
453
|
+
result = self.symmetric_difference(other)
|
|
454
|
+
self._adopt_if_changed(result)
|
|
455
|
+
return self
|
|
456
|
+
|
|
457
|
+
# ------------------------------- Operators ------------------------ #
|
|
458
|
+
|
|
459
|
+
def __or__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
460
|
+
return self.union(other)
|
|
461
|
+
|
|
462
|
+
def __and__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
463
|
+
return self.intersection(other)
|
|
464
|
+
|
|
465
|
+
def __sub__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
466
|
+
return self.difference(other)
|
|
467
|
+
|
|
468
|
+
def __xor__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
469
|
+
return self.symmetric_difference(other)
|
|
470
|
+
|
|
471
|
+
def __ior__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
472
|
+
self.update(other)
|
|
473
|
+
return self
|
|
474
|
+
|
|
475
|
+
def __iand__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
476
|
+
self.intersection_update(other)
|
|
477
|
+
return self
|
|
478
|
+
|
|
479
|
+
def __isub__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
480
|
+
self.difference_update(other)
|
|
481
|
+
return self
|
|
482
|
+
|
|
483
|
+
def __ixor__(self, other: "Rangeable[E]") -> "Rangeable[E]":
|
|
484
|
+
self.symmetric_difference_update(other)
|
|
485
|
+
return self
|
|
486
|
+
|
|
487
|
+
# ---------------------- Internal helpers (set ops) ---------------- #
|
|
488
|
+
|
|
489
|
+
def _populate(self, element: E, entries: list[Interval]) -> None:
|
|
490
|
+
"""Internal-only: append ``element`` with already-(I1)-canonical
|
|
491
|
+
``entries`` to a freshly-built result container.
|
|
492
|
+
|
|
493
|
+
Bypasses :meth:`insert`'s per-call version bump and event-index
|
|
494
|
+
invalidation. ``entries`` MUST be sorted, disjoint, non-adjacent;
|
|
495
|
+
callers (set-op kernels) guarantee this via
|
|
496
|
+
``merge_disjoint_lists`` / ``intersect_disjoint_lists`` /
|
|
497
|
+
``subtract_disjoint_lists``.
|
|
498
|
+
"""
|
|
499
|
+
ds = DisjointSet()
|
|
500
|
+
ds._entries = list(entries)
|
|
501
|
+
self._intervals[element] = ds
|
|
502
|
+
self._insertion_order.append(element)
|
|
503
|
+
self._ord[element] = len(self._insertion_order)
|
|
504
|
+
|
|
505
|
+
def _structurally_equal(self, other: "Rangeable[E]") -> bool:
|
|
506
|
+
"""Structural equality test for the no-op-no-bump rule on the
|
|
507
|
+
mutating set ops (§6.10–§6.13 idempotence dual of §3.2).
|
|
508
|
+
|
|
509
|
+
Compares ``insertion_order`` (ordered) and per-element interval
|
|
510
|
+
tuples (already canonical under (I1)).
|
|
511
|
+
"""
|
|
512
|
+
if self._insertion_order != other._insertion_order:
|
|
513
|
+
return False
|
|
514
|
+
for element in self._insertion_order:
|
|
515
|
+
if (
|
|
516
|
+
self._intervals[element]._entries
|
|
517
|
+
!= other._intervals[element]._entries
|
|
518
|
+
):
|
|
519
|
+
return False
|
|
520
|
+
return True
|
|
521
|
+
|
|
522
|
+
def _adopt_if_changed(self, result: "Rangeable[E]") -> None:
|
|
523
|
+
"""Adopt ``result``'s state in-place when it differs structurally
|
|
524
|
+
from ``self``. Bumps version exactly once when adoption happens.
|
|
525
|
+
"""
|
|
526
|
+
if self._structurally_equal(result):
|
|
527
|
+
return # idempotent: no version bump per §3.2 dual.
|
|
528
|
+
self._intervals = result._intervals
|
|
529
|
+
self._insertion_order = result._insertion_order
|
|
530
|
+
self._ord = result._ord
|
|
531
|
+
self._version += 1
|
|
532
|
+
self._event_index = None
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Sorted, disjoint, non-adjacent merged-interval list for one element."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import bisect
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Iterator
|
|
8
|
+
|
|
9
|
+
from ._errors import InvalidIntervalError
|
|
10
|
+
from ._interval import Interval
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InsertResult(Enum):
|
|
14
|
+
"""Outcome of :meth:`DisjointSet.insert`. The owning :class:`Rangeable`
|
|
15
|
+
bumps its version counter only on ``MUTATED``; ``IDEMPOTENT`` means the
|
|
16
|
+
insert was absorbed and the canonical state is unchanged (RFC Test #21,
|
|
17
|
+
Lemma 6.5.B).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
MUTATED = "mutated"
|
|
21
|
+
IDEMPOTENT = "idempotent"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RemoveResult(Enum):
|
|
25
|
+
"""Outcome of :meth:`DisjointSet.remove`. The owning :class:`Rangeable`
|
|
26
|
+
bumps its version counter only on ``MUTATED`` / ``MUTATED_BECAME_EMPTY``.
|
|
27
|
+
|
|
28
|
+
``MUTATED_BECAME_EMPTY`` additionally signals to the owner that the
|
|
29
|
+
element MUST be eagerly pruned per RFC §4.10 (N1) — the per-element list
|
|
30
|
+
is now empty and the key must be excised from ``intervals``,
|
|
31
|
+
``insertion_order``, and ``ord``.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
IDEMPOTENT = "idempotent"
|
|
35
|
+
MUTATED = "mutated"
|
|
36
|
+
MUTATED_BECAME_EMPTY = "mutated_became_empty"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DisjointSet:
|
|
40
|
+
"""Maintains the RFC §5.1 (I1) invariant for one element:
|
|
41
|
+
|
|
42
|
+
* sorted by ``lo`` strictly ascending
|
|
43
|
+
* any two adjacent entries ``(lo1, hi1), (lo2, hi2)`` satisfy
|
|
44
|
+
``hi1 + 1 < lo2`` (no overlap, no integer adjacency)
|
|
45
|
+
* ``lo <= hi`` for every entry
|
|
46
|
+
|
|
47
|
+
Mirrors the Ruby reference implementation line-for-line, including
|
|
48
|
+
the §6.1 cleaner-variant containment fast-path.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
__slots__ = ("_entries",)
|
|
52
|
+
|
|
53
|
+
def __init__(self) -> None:
|
|
54
|
+
self._entries: list[Interval] = []
|
|
55
|
+
|
|
56
|
+
def __len__(self) -> int:
|
|
57
|
+
return len(self._entries)
|
|
58
|
+
|
|
59
|
+
def __iter__(self) -> Iterator[Interval]:
|
|
60
|
+
return iter(self._entries)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def empty(self) -> bool:
|
|
64
|
+
return not self._entries
|
|
65
|
+
|
|
66
|
+
def to_pairs(self) -> list[tuple[int, int]]:
|
|
67
|
+
"""Snapshot the merged intervals as ``[(lo, hi), ...]``."""
|
|
68
|
+
return [(iv.lo, iv.hi) for iv in self._entries]
|
|
69
|
+
|
|
70
|
+
def insert(self, lo: int, hi: int) -> InsertResult:
|
|
71
|
+
"""Insert ``[lo, hi]`` into the set, performing union-with-merge per
|
|
72
|
+
RFC §6.1.
|
|
73
|
+
|
|
74
|
+
Returns :attr:`InsertResult.MUTATED` if the canonical state changed
|
|
75
|
+
(caller should bump version), :attr:`InsertResult.IDEMPOTENT` if the
|
|
76
|
+
insert was absorbed by an existing entry (caller MUST NOT bump
|
|
77
|
+
version, per Test #21 and Lemma 6.5.B).
|
|
78
|
+
"""
|
|
79
|
+
if lo > hi:
|
|
80
|
+
raise InvalidIntervalError(f"lo ({lo}) > hi ({hi})")
|
|
81
|
+
|
|
82
|
+
# Step 4 of §6.1: bsearch for the leftmost touch candidate.
|
|
83
|
+
# Predicate: ``iv.hi + 1 >= lo``. We use ``iv.hi + 1`` (not
|
|
84
|
+
# ``lo - 1``) to avoid Integer underflow at ``lo == Int.min``
|
|
85
|
+
# boundaries (§4.7 C5). Python ints are unbounded but we mirror
|
|
86
|
+
# the Ruby form for cross-language byte parity.
|
|
87
|
+
i0 = bisect.bisect_left(
|
|
88
|
+
self._entries, lo, key=lambda iv: iv.hi + 1
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Step 5: collect contiguous touch entries while
|
|
92
|
+
# ``entries[i].lo <= hi + 1``.
|
|
93
|
+
to_merge_end = i0
|
|
94
|
+
n = len(self._entries)
|
|
95
|
+
while to_merge_end < n and self._entries[to_merge_end].lo <= hi + 1:
|
|
96
|
+
to_merge_end += 1
|
|
97
|
+
merge_count = to_merge_end - i0
|
|
98
|
+
|
|
99
|
+
# Step 6: containment idempotent fast-path. If we touch exactly one
|
|
100
|
+
# existing entry that fully covers [lo, hi], this insert is a no-op.
|
|
101
|
+
# MUST NOT mutate, MUST NOT bump version.
|
|
102
|
+
if merge_count == 1:
|
|
103
|
+
existing = self._entries[i0]
|
|
104
|
+
if existing.lo <= lo and hi <= existing.hi:
|
|
105
|
+
return InsertResult.IDEMPOTENT
|
|
106
|
+
|
|
107
|
+
# Step 7: real mutation path. Compute merged bounds, splice in.
|
|
108
|
+
new_lo = lo
|
|
109
|
+
new_hi = hi
|
|
110
|
+
if merge_count > 0:
|
|
111
|
+
first = self._entries[i0]
|
|
112
|
+
last = self._entries[to_merge_end - 1]
|
|
113
|
+
if first.lo < new_lo:
|
|
114
|
+
new_lo = first.lo
|
|
115
|
+
if last.hi > new_hi:
|
|
116
|
+
new_hi = last.hi
|
|
117
|
+
merged = Interval(new_lo, new_hi)
|
|
118
|
+
self._entries[i0:to_merge_end] = [merged]
|
|
119
|
+
return InsertResult.MUTATED
|
|
120
|
+
|
|
121
|
+
def remove(self, lo: int, hi: int) -> RemoveResult:
|
|
122
|
+
"""Remove the closed interval ``[lo, hi]`` per RFC §6.6.
|
|
123
|
+
|
|
124
|
+
Returns :attr:`RemoveResult.IDEMPOTENT` when no entry overlaps
|
|
125
|
+
(caller MUST NOT bump version per §4.10 N3),
|
|
126
|
+
:attr:`RemoveResult.MUTATED` when entries shrink but the list stays
|
|
127
|
+
non-empty, :attr:`RemoveResult.MUTATED_BECAME_EMPTY` when the last
|
|
128
|
+
interval is removed and the owner MUST eagerly prune the key per
|
|
129
|
+
§4.10 (N1).
|
|
130
|
+
"""
|
|
131
|
+
if lo > hi:
|
|
132
|
+
raise InvalidIntervalError(f"lo ({lo}) > hi ({hi})")
|
|
133
|
+
|
|
134
|
+
n = len(self._entries)
|
|
135
|
+
# Step 3 of §6.6: bsearch for the leftmost entry with ``iv.hi >= lo``.
|
|
136
|
+
# Equivalent (under the bisect-by-key contract) to
|
|
137
|
+
# ``bisect_left(entries, lo, key=lambda iv: iv.hi + 1)`` — the same
|
|
138
|
+
# predicate used by insert (and thus the same Int.min underflow guard).
|
|
139
|
+
i = bisect.bisect_left(self._entries, lo, key=lambda iv: iv.hi + 1)
|
|
140
|
+
|
|
141
|
+
# Step 4: quick-exit when nothing in R(e) overlaps [lo, hi].
|
|
142
|
+
if i == n or self._entries[i].lo > hi:
|
|
143
|
+
return RemoveResult.IDEMPOTENT
|
|
144
|
+
|
|
145
|
+
# Step 5: sweep all overlapping entries, build replacements.
|
|
146
|
+
to_replace_start = i
|
|
147
|
+
replacements: list[Interval] = []
|
|
148
|
+
while i < n and self._entries[i].lo <= hi:
|
|
149
|
+
iv = self._entries[i]
|
|
150
|
+
# Left residual only when iv.lo < lo (guards Int.min underflow
|
|
151
|
+
# on ``lo - 1``).
|
|
152
|
+
if iv.lo < lo:
|
|
153
|
+
replacements.append(Interval(iv.lo, lo - 1))
|
|
154
|
+
# Right residual only when hi < iv.hi (guards Int.max overflow
|
|
155
|
+
# on ``hi + 1``).
|
|
156
|
+
if hi < iv.hi:
|
|
157
|
+
replacements.append(Interval(hi + 1, iv.hi))
|
|
158
|
+
i += 1
|
|
159
|
+
to_replace_end = i
|
|
160
|
+
|
|
161
|
+
# Step 6: splice. Python's slice-assign is the natural primitive.
|
|
162
|
+
self._entries[to_replace_start:to_replace_end] = replacements
|
|
163
|
+
|
|
164
|
+
# Step 7: signal eager-prune to caller when list is now empty.
|
|
165
|
+
if not self._entries:
|
|
166
|
+
return RemoveResult.MUTATED_BECAME_EMPTY
|
|
167
|
+
return RemoveResult.MUTATED
|
|
168
|
+
|
|
169
|
+
# ------------------------------------------------------------------ #
|
|
170
|
+
# List-level primitives for set operations (§6.10–§6.13).
|
|
171
|
+
# These operate on Interval lists, not on DisjointSet instances, so
|
|
172
|
+
# the union/intersection/difference/symmetric_difference paths can
|
|
173
|
+
# reuse them without per-call DisjointSet construction overhead.
|
|
174
|
+
# ------------------------------------------------------------------ #
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _append_or_merge(out: list[Interval], iv: Interval) -> None:
|
|
178
|
+
"""Two-pointer helper from RFC §6.10.
|
|
179
|
+
|
|
180
|
+
Appends ``iv`` to ``out``, collapsing into the last entry when overlap
|
|
181
|
+
or integer-adjacency (``out[-1].hi + 1 >= iv.lo``) is detected.
|
|
182
|
+
"""
|
|
183
|
+
if not out or out[-1].hi + 1 < iv.lo:
|
|
184
|
+
out.append(iv)
|
|
185
|
+
else:
|
|
186
|
+
last = out[-1]
|
|
187
|
+
if iv.hi > last.hi:
|
|
188
|
+
out[-1] = Interval(last.lo, iv.hi)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def merge_disjoint_lists(
|
|
192
|
+
list_a: list[Interval], list_b: list[Interval]
|
|
193
|
+
) -> list[Interval]:
|
|
194
|
+
"""Two-pointer linear merge of two (I1)-canonical lists, RFC §6.10.
|
|
195
|
+
|
|
196
|
+
O(|list_a| + |list_b|). Output is (I1)-canonical (sorted, disjoint,
|
|
197
|
+
non-adjacent) thanks to ``_append_or_merge``'s adjacency collapse.
|
|
198
|
+
"""
|
|
199
|
+
out: list[Interval] = []
|
|
200
|
+
i, j = 0, 0
|
|
201
|
+
n_a, n_b = len(list_a), len(list_b)
|
|
202
|
+
while i < n_a and j < n_b:
|
|
203
|
+
if list_a[i].lo <= list_b[j].lo:
|
|
204
|
+
_append_or_merge(out, list_a[i])
|
|
205
|
+
i += 1
|
|
206
|
+
else:
|
|
207
|
+
_append_or_merge(out, list_b[j])
|
|
208
|
+
j += 1
|
|
209
|
+
while i < n_a:
|
|
210
|
+
_append_or_merge(out, list_a[i])
|
|
211
|
+
i += 1
|
|
212
|
+
while j < n_b:
|
|
213
|
+
_append_or_merge(out, list_b[j])
|
|
214
|
+
j += 1
|
|
215
|
+
return out
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def intersect_disjoint_lists(
|
|
219
|
+
list_a: list[Interval], list_b: list[Interval]
|
|
220
|
+
) -> list[Interval]:
|
|
221
|
+
"""Two-pointer pairwise intersection, RFC §6.11.
|
|
222
|
+
|
|
223
|
+
O(|list_a| + |list_b|). Output is (I1)-canonical without any explicit
|
|
224
|
+
adjacency-collapse step (Lemma 6.11.A): consecutive output entries
|
|
225
|
+
inherit a ≥ 2 integer gap from the inputs.
|
|
226
|
+
"""
|
|
227
|
+
out: list[Interval] = []
|
|
228
|
+
i, j = 0, 0
|
|
229
|
+
n_a, n_b = len(list_a), len(list_b)
|
|
230
|
+
while i < n_a and j < n_b:
|
|
231
|
+
a_iv = list_a[i]
|
|
232
|
+
b_iv = list_b[j]
|
|
233
|
+
lo = a_iv.lo if a_iv.lo > b_iv.lo else b_iv.lo
|
|
234
|
+
hi = a_iv.hi if a_iv.hi < b_iv.hi else b_iv.hi
|
|
235
|
+
if lo <= hi:
|
|
236
|
+
out.append(Interval(lo, hi))
|
|
237
|
+
if a_iv.hi <= b_iv.hi:
|
|
238
|
+
i += 1
|
|
239
|
+
else:
|
|
240
|
+
j += 1
|
|
241
|
+
return out
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def subtract_disjoint_lists(
|
|
245
|
+
list_a: list[Interval], list_b: list[Interval]
|
|
246
|
+
) -> list[Interval]:
|
|
247
|
+
"""Two-pointer subtraction ``list_a ∖ list_b``, RFC §6.12.
|
|
248
|
+
|
|
249
|
+
O(|list_a| + |list_b|). Output is (I1)-canonical. Underflow / overflow
|
|
250
|
+
safe: ``L_b[j].lo - 1`` only computed when ``L_b[j].lo > current_lo``;
|
|
251
|
+
``L_b[j].hi + 1`` only when ``L_b[j].hi < current_hi``.
|
|
252
|
+
"""
|
|
253
|
+
out: list[Interval] = []
|
|
254
|
+
n_a, n_b = len(list_a), len(list_b)
|
|
255
|
+
if n_a == 0:
|
|
256
|
+
return out
|
|
257
|
+
if n_b == 0:
|
|
258
|
+
return list(list_a)
|
|
259
|
+
|
|
260
|
+
i = 0
|
|
261
|
+
j = 0
|
|
262
|
+
current_lo: int | None = None
|
|
263
|
+
current_hi: int | None = None
|
|
264
|
+
while i < n_a:
|
|
265
|
+
if current_lo is None:
|
|
266
|
+
current_lo = list_a[i].lo
|
|
267
|
+
current_hi = list_a[i].hi
|
|
268
|
+
# Skip L_b entries strictly before [current_lo, current_hi].
|
|
269
|
+
while j < n_b and list_b[j].hi < current_lo:
|
|
270
|
+
j += 1
|
|
271
|
+
if j == n_b or list_b[j].lo > current_hi:
|
|
272
|
+
# No more cuts on this entry: commit and advance.
|
|
273
|
+
out.append(Interval(current_lo, current_hi))
|
|
274
|
+
i += 1
|
|
275
|
+
current_lo = None
|
|
276
|
+
current_hi = None
|
|
277
|
+
continue
|
|
278
|
+
# list_b[j] overlaps [current_lo, current_hi]; cut.
|
|
279
|
+
if list_b[j].lo > current_lo:
|
|
280
|
+
out.append(Interval(current_lo, list_b[j].lo - 1))
|
|
281
|
+
if list_b[j].hi < current_hi:
|
|
282
|
+
# Right residual becomes the new current.
|
|
283
|
+
current_lo = list_b[j].hi + 1
|
|
284
|
+
j += 1
|
|
285
|
+
else:
|
|
286
|
+
# list_b[j] swallows the rest of the current entry.
|
|
287
|
+
i += 1
|
|
288
|
+
current_lo = None
|
|
289
|
+
current_hi = None
|
|
290
|
+
return out
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
"""Main Rangeable container."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import Generic, Hashable, Iterator, TypeVar
|
|
6
|
-
|
|
7
|
-
from ._boundary_index import BoundaryIndex
|
|
8
|
-
from ._disjoint_set import DisjointSet, InsertResult
|
|
9
|
-
from ._errors import InvalidIntervalError
|
|
10
|
-
from ._slot import Slot
|
|
11
|
-
from ._transition import TransitionEvent
|
|
12
|
-
|
|
13
|
-
E = TypeVar("E", bound=Hashable)
|
|
14
|
-
|
|
15
|
-
_EMPTY_OBJS: tuple = ()
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class Rangeable(Generic[E]):
|
|
19
|
-
"""Generic, integer-coordinate, closed-interval set container.
|
|
20
|
-
|
|
21
|
-
Pairs hashable elements with their merged disjoint integer ranges
|
|
22
|
-
and supports three query families:
|
|
23
|
-
|
|
24
|
-
* by-element via :meth:`get_range`
|
|
25
|
-
* by-position via ``r[i]`` / :meth:`active_at`
|
|
26
|
-
* by-range via :meth:`transitions`
|
|
27
|
-
|
|
28
|
-
See `RFC §3 <https://github.com/ZhgChgLi/RangeableRFC>`_ for the
|
|
29
|
-
full normative API surface.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
__slots__ = (
|
|
33
|
-
"_intervals",
|
|
34
|
-
"_insertion_order",
|
|
35
|
-
"_ord",
|
|
36
|
-
"_version",
|
|
37
|
-
"_event_index",
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
def __init__(self) -> None:
|
|
41
|
-
self._intervals: dict[E, DisjointSet] = {}
|
|
42
|
-
self._insertion_order: list[E] = []
|
|
43
|
-
self._ord: dict[E, int] = {}
|
|
44
|
-
self._version: int = 0
|
|
45
|
-
self._event_index: BoundaryIndex[E] | None = None
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
|
-
def empty(cls) -> "Rangeable[E]":
|
|
49
|
-
"""Sugar matching the RFC §3.1 ``Rangeable.empty()`` alias."""
|
|
50
|
-
return cls()
|
|
51
|
-
|
|
52
|
-
@property
|
|
53
|
-
def version(self) -> int:
|
|
54
|
-
return self._version
|
|
55
|
-
|
|
56
|
-
def insert(self, element: E, *, start: int, end: int) -> "Rangeable[E]":
|
|
57
|
-
"""Insert ``element`` covering the closed interval ``[start, end]``.
|
|
58
|
-
|
|
59
|
-
Idempotent per RFC §3.2: re-inserting a sub-range that is already
|
|
60
|
-
fully contained leaves the container unchanged and does NOT bump
|
|
61
|
-
:attr:`version`.
|
|
62
|
-
|
|
63
|
-
Raises :class:`InvalidIntervalError` if ``start > end``.
|
|
64
|
-
|
|
65
|
-
Returns ``self`` for chaining.
|
|
66
|
-
"""
|
|
67
|
-
if start > end:
|
|
68
|
-
raise InvalidIntervalError(f"start ({start}) > end ({end})")
|
|
69
|
-
|
|
70
|
-
ds = self._intervals.get(element)
|
|
71
|
-
if ds is None:
|
|
72
|
-
ds = DisjointSet()
|
|
73
|
-
self._intervals[element] = ds
|
|
74
|
-
self._insertion_order.append(element)
|
|
75
|
-
self._ord[element] = len(self._insertion_order)
|
|
76
|
-
|
|
77
|
-
result = ds.insert(start, end)
|
|
78
|
-
if result == InsertResult.MUTATED:
|
|
79
|
-
self._version += 1
|
|
80
|
-
self._event_index = None
|
|
81
|
-
return self
|
|
82
|
-
|
|
83
|
-
def __getitem__(self, i: int) -> Slot[E]:
|
|
84
|
-
"""Active-element list at ``i``. RFC §3.3.
|
|
85
|
-
|
|
86
|
-
O(log |segments| + r) once the index is built. Returns an empty
|
|
87
|
-
:class:`Slot` for coordinates outside every segment.
|
|
88
|
-
"""
|
|
89
|
-
self._ensure_event_index_fresh()
|
|
90
|
-
assert self._event_index is not None
|
|
91
|
-
seg = self._event_index.segment_at(i)
|
|
92
|
-
if seg is None:
|
|
93
|
-
return Slot(_EMPTY_OBJS)
|
|
94
|
-
return Slot(seg.active)
|
|
95
|
-
|
|
96
|
-
def active_at(self, *, index: int) -> Slot[E]:
|
|
97
|
-
"""Same as ``self[index]``, named to match RFC §3.3."""
|
|
98
|
-
return self[index]
|
|
99
|
-
|
|
100
|
-
def get_range(self, element: E) -> list[tuple[int, int]]:
|
|
101
|
-
"""Merged ranges for ``element`` as ``[(lo, hi), ...]``. RFC §3.4.
|
|
102
|
-
|
|
103
|
-
Returns an empty list when the element has never been inserted.
|
|
104
|
-
"""
|
|
105
|
-
ds = self._intervals.get(element)
|
|
106
|
-
if ds is None:
|
|
107
|
-
return []
|
|
108
|
-
return ds.to_pairs()
|
|
109
|
-
|
|
110
|
-
def transitions(self, *, lo: int, hi: int | None) -> list[TransitionEvent[E]]:
|
|
111
|
-
"""Open / close events within the inclusive coordinate range
|
|
112
|
-
``[lo, hi]``. RFC §3.5.
|
|
113
|
-
|
|
114
|
-
``hi=None`` means +∞ (include all events through the upper bound).
|
|
115
|
-
|
|
116
|
-
Raises :class:`InvalidIntervalError` if ``lo > hi`` or ``lo`` is
|
|
117
|
-
``None``.
|
|
118
|
-
"""
|
|
119
|
-
if lo is None:
|
|
120
|
-
raise InvalidIntervalError("transitions: lo must not be None")
|
|
121
|
-
if hi is not None and lo > hi:
|
|
122
|
-
raise InvalidIntervalError(f"lo ({lo}) > hi ({hi})")
|
|
123
|
-
|
|
124
|
-
self._ensure_event_index_fresh()
|
|
125
|
-
assert self._event_index is not None
|
|
126
|
-
upper = None if hi is None else hi + 1
|
|
127
|
-
return self._event_index.events_in_range(lo, upper)
|
|
128
|
-
|
|
129
|
-
def __len__(self) -> int:
|
|
130
|
-
"""Number of distinct equivalence-class elements ever inserted."""
|
|
131
|
-
return len(self._insertion_order)
|
|
132
|
-
|
|
133
|
-
@property
|
|
134
|
-
def count(self) -> int:
|
|
135
|
-
return len(self._insertion_order)
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def empty(self) -> bool:
|
|
139
|
-
return not self._insertion_order
|
|
140
|
-
|
|
141
|
-
def __bool__(self) -> bool:
|
|
142
|
-
return bool(self._insertion_order)
|
|
143
|
-
|
|
144
|
-
def __iter__(self) -> Iterator[tuple[E, list[tuple[int, int]]]]:
|
|
145
|
-
"""Yield ``(element, ranges)`` pairs in insertion-order ascending."""
|
|
146
|
-
for element in self._insertion_order:
|
|
147
|
-
yield element, self._intervals[element].to_pairs()
|
|
148
|
-
|
|
149
|
-
def copy(self) -> "Rangeable[E]":
|
|
150
|
-
"""Deep copy. Mutation on the copy MUST NOT affect this instance,
|
|
151
|
-
and vice versa.
|
|
152
|
-
"""
|
|
153
|
-
dup = Rangeable[E]()
|
|
154
|
-
for element in self._insertion_order:
|
|
155
|
-
dup._replant(element, self._intervals[element], self._ord[element])
|
|
156
|
-
dup._version = self._version
|
|
157
|
-
return dup
|
|
158
|
-
|
|
159
|
-
def __copy__(self) -> "Rangeable[E]":
|
|
160
|
-
return self.copy()
|
|
161
|
-
|
|
162
|
-
def __deepcopy__(self, memo: dict) -> "Rangeable[E]":
|
|
163
|
-
return self.copy()
|
|
164
|
-
|
|
165
|
-
def _ensure_event_index_fresh(self) -> None:
|
|
166
|
-
if self._event_index is not None and self._event_index.version == self._version:
|
|
167
|
-
return
|
|
168
|
-
v_start = self._version
|
|
169
|
-
rebuilt = BoundaryIndex.build(self._intervals, self._ord, v_start)
|
|
170
|
-
if self._version == v_start:
|
|
171
|
-
self._event_index = rebuilt
|
|
172
|
-
|
|
173
|
-
def _replant(self, element: E, source_set: DisjointSet, source_ord: int) -> None:
|
|
174
|
-
new_set = DisjointSet()
|
|
175
|
-
for iv in source_set:
|
|
176
|
-
new_set.insert(iv.lo, iv.hi)
|
|
177
|
-
self._intervals[element] = new_set
|
|
178
|
-
self._insertion_order.append(element)
|
|
179
|
-
self._ord[element] = source_ord
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
"""Sorted, disjoint, non-adjacent merged-interval list for one element."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import bisect
|
|
6
|
-
from enum import Enum
|
|
7
|
-
from typing import Iterator
|
|
8
|
-
|
|
9
|
-
from ._errors import InvalidIntervalError
|
|
10
|
-
from ._interval import Interval
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class InsertResult(Enum):
|
|
14
|
-
"""Outcome of :meth:`DisjointSet.insert`. The owning :class:`Rangeable`
|
|
15
|
-
bumps its version counter only on ``MUTATED``; ``IDEMPOTENT`` means the
|
|
16
|
-
insert was absorbed and the canonical state is unchanged (RFC Test #21,
|
|
17
|
-
Lemma 6.5.B).
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
MUTATED = "mutated"
|
|
21
|
-
IDEMPOTENT = "idempotent"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class DisjointSet:
|
|
25
|
-
"""Maintains the RFC §5.1 (I1) invariant for one element:
|
|
26
|
-
|
|
27
|
-
* sorted by ``lo`` strictly ascending
|
|
28
|
-
* any two adjacent entries ``(lo1, hi1), (lo2, hi2)`` satisfy
|
|
29
|
-
``hi1 + 1 < lo2`` (no overlap, no integer adjacency)
|
|
30
|
-
* ``lo <= hi`` for every entry
|
|
31
|
-
|
|
32
|
-
Mirrors the Ruby reference implementation line-for-line, including
|
|
33
|
-
the §6.1 cleaner-variant containment fast-path.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
__slots__ = ("_entries",)
|
|
37
|
-
|
|
38
|
-
def __init__(self) -> None:
|
|
39
|
-
self._entries: list[Interval] = []
|
|
40
|
-
|
|
41
|
-
def __len__(self) -> int:
|
|
42
|
-
return len(self._entries)
|
|
43
|
-
|
|
44
|
-
def __iter__(self) -> Iterator[Interval]:
|
|
45
|
-
return iter(self._entries)
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def empty(self) -> bool:
|
|
49
|
-
return not self._entries
|
|
50
|
-
|
|
51
|
-
def to_pairs(self) -> list[tuple[int, int]]:
|
|
52
|
-
"""Snapshot the merged intervals as ``[(lo, hi), ...]``."""
|
|
53
|
-
return [(iv.lo, iv.hi) for iv in self._entries]
|
|
54
|
-
|
|
55
|
-
def insert(self, lo: int, hi: int) -> InsertResult:
|
|
56
|
-
"""Insert ``[lo, hi]`` into the set, performing union-with-merge per
|
|
57
|
-
RFC §6.1.
|
|
58
|
-
|
|
59
|
-
Returns :attr:`InsertResult.MUTATED` if the canonical state changed
|
|
60
|
-
(caller should bump version), :attr:`InsertResult.IDEMPOTENT` if the
|
|
61
|
-
insert was absorbed by an existing entry (caller MUST NOT bump
|
|
62
|
-
version, per Test #21 and Lemma 6.5.B).
|
|
63
|
-
"""
|
|
64
|
-
if lo > hi:
|
|
65
|
-
raise InvalidIntervalError(f"lo ({lo}) > hi ({hi})")
|
|
66
|
-
|
|
67
|
-
# Step 4 of §6.1: bsearch for the leftmost touch candidate.
|
|
68
|
-
# Predicate: ``iv.hi + 1 >= lo``. We use ``iv.hi + 1`` (not
|
|
69
|
-
# ``lo - 1``) to avoid Integer underflow at ``lo == Int.min``
|
|
70
|
-
# boundaries (§4.7 C5). Python ints are unbounded but we mirror
|
|
71
|
-
# the Ruby form for cross-language byte parity.
|
|
72
|
-
i0 = bisect.bisect_left(
|
|
73
|
-
self._entries, lo, key=lambda iv: iv.hi + 1
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
# Step 5: collect contiguous touch entries while
|
|
77
|
-
# ``entries[i].lo <= hi + 1``.
|
|
78
|
-
to_merge_end = i0
|
|
79
|
-
n = len(self._entries)
|
|
80
|
-
while to_merge_end < n and self._entries[to_merge_end].lo <= hi + 1:
|
|
81
|
-
to_merge_end += 1
|
|
82
|
-
merge_count = to_merge_end - i0
|
|
83
|
-
|
|
84
|
-
# Step 6: containment idempotent fast-path. If we touch exactly one
|
|
85
|
-
# existing entry that fully covers [lo, hi], this insert is a no-op.
|
|
86
|
-
# MUST NOT mutate, MUST NOT bump version.
|
|
87
|
-
if merge_count == 1:
|
|
88
|
-
existing = self._entries[i0]
|
|
89
|
-
if existing.lo <= lo and hi <= existing.hi:
|
|
90
|
-
return InsertResult.IDEMPOTENT
|
|
91
|
-
|
|
92
|
-
# Step 7: real mutation path. Compute merged bounds, splice in.
|
|
93
|
-
new_lo = lo
|
|
94
|
-
new_hi = hi
|
|
95
|
-
if merge_count > 0:
|
|
96
|
-
first = self._entries[i0]
|
|
97
|
-
last = self._entries[to_merge_end - 1]
|
|
98
|
-
if first.lo < new_lo:
|
|
99
|
-
new_lo = first.lo
|
|
100
|
-
if last.hi > new_hi:
|
|
101
|
-
new_hi = last.hi
|
|
102
|
-
merged = Interval(new_lo, new_hi)
|
|
103
|
-
self._entries[i0:to_merge_end] = [merged]
|
|
104
|
-
return InsertResult.MUTATED
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|