exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,570 @@
|
|
1
|
+
"""
|
2
|
+
Roaring Bitmap Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the ROARING_BITMAP strategy for highly compressed
|
5
|
+
bitmap operations with excellent performance for sparse data.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Optional, Set, Tuple
|
9
|
+
from collections import defaultdict
|
10
|
+
import struct
|
11
|
+
from .base import ANodeMatrixStrategy
|
12
|
+
from ...types import NodeMode, NodeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class Container:
|
16
|
+
"""Base class for Roaring Bitmap containers."""
|
17
|
+
|
18
|
+
def __init__(self):
|
19
|
+
self.cardinality = 0
|
20
|
+
|
21
|
+
def contains(self, x: int) -> bool:
|
22
|
+
"""Check if value is in container."""
|
23
|
+
raise NotImplementedError
|
24
|
+
|
25
|
+
def add(self, x: int) -> bool:
|
26
|
+
"""Add value to container. Returns True if value was new."""
|
27
|
+
raise NotImplementedError
|
28
|
+
|
29
|
+
def remove(self, x: int) -> bool:
|
30
|
+
"""Remove value from container. Returns True if value existed."""
|
31
|
+
raise NotImplementedError
|
32
|
+
|
33
|
+
def to_array(self) -> List[int]:
|
34
|
+
"""Convert container to sorted array."""
|
35
|
+
raise NotImplementedError
|
36
|
+
|
37
|
+
|
38
|
+
class ArrayContainer(Container):
|
39
|
+
"""Array container for small sets (< 4096 elements)."""
|
40
|
+
|
41
|
+
def __init__(self):
|
42
|
+
super().__init__()
|
43
|
+
self.values: List[int] = []
|
44
|
+
|
45
|
+
def contains(self, x: int) -> bool:
|
46
|
+
"""Binary search for value."""
|
47
|
+
left, right = 0, len(self.values)
|
48
|
+
while left < right:
|
49
|
+
mid = (left + right) // 2
|
50
|
+
if self.values[mid] < x:
|
51
|
+
left = mid + 1
|
52
|
+
else:
|
53
|
+
right = mid
|
54
|
+
return left < len(self.values) and self.values[left] == x
|
55
|
+
|
56
|
+
def add(self, x: int) -> bool:
|
57
|
+
"""Insert value in sorted order."""
|
58
|
+
left, right = 0, len(self.values)
|
59
|
+
while left < right:
|
60
|
+
mid = (left + right) // 2
|
61
|
+
if self.values[mid] < x:
|
62
|
+
left = mid + 1
|
63
|
+
else:
|
64
|
+
right = mid
|
65
|
+
|
66
|
+
if left < len(self.values) and self.values[left] == x:
|
67
|
+
return False # Already exists
|
68
|
+
|
69
|
+
self.values.insert(left, x)
|
70
|
+
self.cardinality += 1
|
71
|
+
return True
|
72
|
+
|
73
|
+
def remove(self, x: int) -> bool:
|
74
|
+
"""Remove value if present."""
|
75
|
+
left, right = 0, len(self.values)
|
76
|
+
while left < right:
|
77
|
+
mid = (left + right) // 2
|
78
|
+
if self.values[mid] < x:
|
79
|
+
left = mid + 1
|
80
|
+
else:
|
81
|
+
right = mid
|
82
|
+
|
83
|
+
if left < len(self.values) and self.values[left] == x:
|
84
|
+
self.values.pop(left)
|
85
|
+
self.cardinality -= 1
|
86
|
+
return True
|
87
|
+
|
88
|
+
return False
|
89
|
+
|
90
|
+
def to_array(self) -> List[int]:
|
91
|
+
"""Return copy of values array."""
|
92
|
+
return self.values.copy()
|
93
|
+
|
94
|
+
def should_convert_to_bitmap(self) -> bool:
|
95
|
+
"""Check if should convert to bitmap container."""
|
96
|
+
return self.cardinality >= 4096
|
97
|
+
|
98
|
+
|
99
|
+
class BitmapContainer(Container):
|
100
|
+
"""Bitmap container for dense sets (>= 4096 elements)."""
|
101
|
+
|
102
|
+
def __init__(self):
|
103
|
+
super().__init__()
|
104
|
+
self.bitmap = bytearray(8192) # 65536 bits = 8192 bytes
|
105
|
+
|
106
|
+
def contains(self, x: int) -> bool:
|
107
|
+
"""Check bit at position x."""
|
108
|
+
byte_index = x // 8
|
109
|
+
bit_offset = x % 8
|
110
|
+
return bool(self.bitmap[byte_index] & (1 << bit_offset))
|
111
|
+
|
112
|
+
def add(self, x: int) -> bool:
|
113
|
+
"""Set bit at position x."""
|
114
|
+
byte_index = x // 8
|
115
|
+
bit_offset = x % 8
|
116
|
+
|
117
|
+
if self.bitmap[byte_index] & (1 << bit_offset):
|
118
|
+
return False # Already set
|
119
|
+
|
120
|
+
self.bitmap[byte_index] |= (1 << bit_offset)
|
121
|
+
self.cardinality += 1
|
122
|
+
return True
|
123
|
+
|
124
|
+
def remove(self, x: int) -> bool:
|
125
|
+
"""Clear bit at position x."""
|
126
|
+
byte_index = x // 8
|
127
|
+
bit_offset = x % 8
|
128
|
+
|
129
|
+
if not (self.bitmap[byte_index] & (1 << bit_offset)):
|
130
|
+
return False # Not set
|
131
|
+
|
132
|
+
self.bitmap[byte_index] &= ~(1 << bit_offset)
|
133
|
+
self.cardinality -= 1
|
134
|
+
return True
|
135
|
+
|
136
|
+
def to_array(self) -> List[int]:
|
137
|
+
"""Convert bitmap to array of set values."""
|
138
|
+
result = []
|
139
|
+
for byte_index in range(len(self.bitmap)):
|
140
|
+
byte_value = self.bitmap[byte_index]
|
141
|
+
if byte_value != 0:
|
142
|
+
for bit_offset in range(8):
|
143
|
+
if byte_value & (1 << bit_offset):
|
144
|
+
result.append(byte_index * 8 + bit_offset)
|
145
|
+
return result
|
146
|
+
|
147
|
+
def should_convert_to_array(self) -> bool:
|
148
|
+
"""Check if should convert to array container."""
|
149
|
+
return self.cardinality < 4096
|
150
|
+
|
151
|
+
|
152
|
+
class RoaringBitmapStrategy(ANodeMatrixStrategy):
|
153
|
+
"""
|
154
|
+
Roaring Bitmap node strategy for compressed sparse sets.
|
155
|
+
|
156
|
+
Uses a hybrid approach with array containers for sparse data
|
157
|
+
and bitmap containers for dense data, providing excellent
|
158
|
+
compression and performance characteristics.
|
159
|
+
"""
|
160
|
+
|
161
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
162
|
+
"""Initialize the Roaring Bitmap strategy."""
|
163
|
+
super().__init__(NodeMode.ROARING_BITMAP, traits, **options)
|
164
|
+
|
165
|
+
# Roaring bitmap structure: high 16 bits -> container
|
166
|
+
self._containers: Dict[int, Container] = {} # high_bits -> container
|
167
|
+
self._size = 0
|
168
|
+
|
169
|
+
# Key-value mapping for compatibility
|
170
|
+
self._key_to_value: Dict[str, int] = {} # key -> 32-bit value
|
171
|
+
self._value_to_key: Dict[int, str] = {} # value -> key
|
172
|
+
self._values: Dict[str, Any] = {} # Associated data
|
173
|
+
self._next_value = 0
|
174
|
+
|
175
|
+
def get_supported_traits(self) -> NodeTrait:
|
176
|
+
"""Get the traits supported by the roaring bitmap strategy."""
|
177
|
+
return (NodeTrait.COMPRESSED | NodeTrait.INDEXED | NodeTrait.STREAMING | NodeTrait.SPATIAL)
|
178
|
+
|
179
|
+
def _split_value(self, value: int) -> Tuple[int, int]:
|
180
|
+
"""Split 32-bit value into high 16 bits and low 16 bits."""
|
181
|
+
high = (value >> 16) & 0xFFFF
|
182
|
+
low = value & 0xFFFF
|
183
|
+
return high, low
|
184
|
+
|
185
|
+
def _get_or_create_container(self, high: int) -> Container:
|
186
|
+
"""Get or create container for high bits."""
|
187
|
+
if high not in self._containers:
|
188
|
+
self._containers[high] = ArrayContainer()
|
189
|
+
return self._containers[high]
|
190
|
+
|
191
|
+
def _maybe_convert_container(self, high: int) -> None:
|
192
|
+
"""Convert container type if needed for efficiency."""
|
193
|
+
if high not in self._containers:
|
194
|
+
return
|
195
|
+
|
196
|
+
container = self._containers[high]
|
197
|
+
|
198
|
+
if isinstance(container, ArrayContainer) and container.should_convert_to_bitmap():
|
199
|
+
# Convert to bitmap container
|
200
|
+
new_container = BitmapContainer()
|
201
|
+
for value in container.to_array():
|
202
|
+
new_container.add(value)
|
203
|
+
self._containers[high] = new_container
|
204
|
+
|
205
|
+
elif isinstance(container, BitmapContainer) and container.should_convert_to_array():
|
206
|
+
# Convert to array container
|
207
|
+
new_container = ArrayContainer()
|
208
|
+
for value in container.to_array():
|
209
|
+
new_container.add(value)
|
210
|
+
self._containers[high] = new_container
|
211
|
+
|
212
|
+
def _add_value(self, value: int) -> bool:
|
213
|
+
"""Add a 32-bit value to the roaring bitmap."""
|
214
|
+
high, low = self._split_value(value)
|
215
|
+
container = self._get_or_create_container(high)
|
216
|
+
|
217
|
+
was_new = container.add(low)
|
218
|
+
if was_new:
|
219
|
+
self._size += 1
|
220
|
+
self._maybe_convert_container(high)
|
221
|
+
|
222
|
+
return was_new
|
223
|
+
|
224
|
+
def _remove_value(self, value: int) -> bool:
|
225
|
+
"""Remove a 32-bit value from the roaring bitmap."""
|
226
|
+
high, low = self._split_value(value)
|
227
|
+
|
228
|
+
if high not in self._containers:
|
229
|
+
return False
|
230
|
+
|
231
|
+
container = self._containers[high]
|
232
|
+
was_removed = container.remove(low)
|
233
|
+
|
234
|
+
if was_removed:
|
235
|
+
self._size -= 1
|
236
|
+
|
237
|
+
# Remove empty containers
|
238
|
+
if container.cardinality == 0:
|
239
|
+
del self._containers[high]
|
240
|
+
else:
|
241
|
+
self._maybe_convert_container(high)
|
242
|
+
|
243
|
+
return was_removed
|
244
|
+
|
245
|
+
def _contains_value(self, value: int) -> bool:
|
246
|
+
"""Check if 32-bit value is in the roaring bitmap."""
|
247
|
+
high, low = self._split_value(value)
|
248
|
+
|
249
|
+
if high not in self._containers:
|
250
|
+
return False
|
251
|
+
|
252
|
+
return self._containers[high].contains(low)
|
253
|
+
|
254
|
+
# ============================================================================
|
255
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
256
|
+
# ============================================================================
|
257
|
+
|
258
|
+
def put(self, key: Any, value: Any = None) -> None:
|
259
|
+
"""Add key to the roaring bitmap."""
|
260
|
+
key_str = str(key)
|
261
|
+
|
262
|
+
# Get or assign 32-bit value for this key
|
263
|
+
if key_str in self._key_to_value:
|
264
|
+
bit_value = self._key_to_value[key_str]
|
265
|
+
else:
|
266
|
+
# Try to parse key as integer
|
267
|
+
try:
|
268
|
+
bit_value = int(key_str)
|
269
|
+
if bit_value < 0 or bit_value > 0xFFFFFFFF:
|
270
|
+
bit_value = self._next_value
|
271
|
+
self._next_value += 1
|
272
|
+
except ValueError:
|
273
|
+
bit_value = self._next_value
|
274
|
+
self._next_value += 1
|
275
|
+
|
276
|
+
self._key_to_value[key_str] = bit_value
|
277
|
+
self._value_to_key[bit_value] = key_str
|
278
|
+
|
279
|
+
# Add to roaring bitmap if value is truthy
|
280
|
+
should_add = value is not None and value is not False
|
281
|
+
|
282
|
+
if should_add:
|
283
|
+
self._add_value(bit_value)
|
284
|
+
self._values[key_str] = value if value is not None else True
|
285
|
+
else:
|
286
|
+
self._remove_value(bit_value)
|
287
|
+
self._values.pop(key_str, None)
|
288
|
+
|
289
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
290
|
+
"""Get value associated with key."""
|
291
|
+
key_str = str(key)
|
292
|
+
|
293
|
+
if key_str in self._key_to_value:
|
294
|
+
bit_value = self._key_to_value[key_str]
|
295
|
+
if self._contains_value(bit_value):
|
296
|
+
return self._values.get(key_str, True)
|
297
|
+
|
298
|
+
return default
|
299
|
+
|
300
|
+
def has(self, key: Any) -> bool:
|
301
|
+
"""Check if key is in the roaring bitmap."""
|
302
|
+
key_str = str(key)
|
303
|
+
|
304
|
+
if key_str in self._key_to_value:
|
305
|
+
bit_value = self._key_to_value[key_str]
|
306
|
+
return self._contains_value(bit_value)
|
307
|
+
|
308
|
+
return False
|
309
|
+
|
310
|
+
def remove(self, key: Any) -> bool:
|
311
|
+
"""Remove key from the roaring bitmap."""
|
312
|
+
key_str = str(key)
|
313
|
+
|
314
|
+
if key_str in self._key_to_value:
|
315
|
+
bit_value = self._key_to_value[key_str]
|
316
|
+
was_removed = self._remove_value(bit_value)
|
317
|
+
|
318
|
+
if was_removed:
|
319
|
+
self._values.pop(key_str, None)
|
320
|
+
return True
|
321
|
+
|
322
|
+
return False
|
323
|
+
|
324
|
+
def delete(self, key: Any) -> bool:
|
325
|
+
"""Remove key from the roaring bitmap (alias for remove)."""
|
326
|
+
return self.remove(key)
|
327
|
+
|
328
|
+
def clear(self) -> None:
|
329
|
+
"""Clear all data."""
|
330
|
+
self._containers.clear()
|
331
|
+
self._size = 0
|
332
|
+
self._values.clear()
|
333
|
+
# Keep key mappings for consistency
|
334
|
+
|
335
|
+
def keys(self) -> Iterator[str]:
|
336
|
+
"""Get all keys with set bits."""
|
337
|
+
for key_str, bit_value in self._key_to_value.items():
|
338
|
+
if self._contains_value(bit_value):
|
339
|
+
yield key_str
|
340
|
+
|
341
|
+
def values(self) -> Iterator[Any]:
|
342
|
+
"""Get all values for set bits."""
|
343
|
+
return iter(self._values.values())
|
344
|
+
|
345
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
346
|
+
"""Get all key-value pairs for set bits."""
|
347
|
+
for key_str, bit_value in self._key_to_value.items():
|
348
|
+
if self._contains_value(bit_value):
|
349
|
+
yield (key_str, self._values.get(key_str, True))
|
350
|
+
|
351
|
+
def __len__(self) -> int:
|
352
|
+
"""Get the number of set bits."""
|
353
|
+
return self._size
|
354
|
+
|
355
|
+
def to_native(self) -> Dict[str, bool]:
|
356
|
+
"""Convert to native Python dict of boolean values."""
|
357
|
+
result = {}
|
358
|
+
for key_str, bit_value in self._key_to_value.items():
|
359
|
+
result[key_str] = self._contains_value(bit_value)
|
360
|
+
return result
|
361
|
+
|
362
|
+
@property
|
363
|
+
def is_list(self) -> bool:
|
364
|
+
"""This can behave like a list for indexed access."""
|
365
|
+
return True
|
366
|
+
|
367
|
+
@property
|
368
|
+
def is_dict(self) -> bool:
|
369
|
+
"""This can behave like a dict."""
|
370
|
+
return True
|
371
|
+
|
372
|
+
# ============================================================================
|
373
|
+
# ROARING BITMAP SPECIFIC OPERATIONS
|
374
|
+
# ============================================================================
|
375
|
+
|
376
|
+
def add_range(self, start: int, end: int) -> int:
|
377
|
+
"""Add range of values [start, end). Returns number of new values."""
|
378
|
+
added_count = 0
|
379
|
+
for value in range(start, end):
|
380
|
+
if self._add_value(value):
|
381
|
+
added_count += 1
|
382
|
+
return added_count
|
383
|
+
|
384
|
+
def remove_range(self, start: int, end: int) -> int:
|
385
|
+
"""Remove range of values [start, end). Returns number of removed values."""
|
386
|
+
removed_count = 0
|
387
|
+
for value in range(start, end):
|
388
|
+
if self._remove_value(value):
|
389
|
+
removed_count += 1
|
390
|
+
return removed_count
|
391
|
+
|
392
|
+
def union(self, other: 'xRoaringBitmapStrategy') -> 'xRoaringBitmapStrategy':
|
393
|
+
"""Union with another roaring bitmap."""
|
394
|
+
result = xRoaringBitmapStrategy(traits=self._traits)
|
395
|
+
|
396
|
+
# Union all containers
|
397
|
+
all_highs = set(self._containers.keys()) | set(other._containers.keys())
|
398
|
+
|
399
|
+
for high in all_highs:
|
400
|
+
result_values = set()
|
401
|
+
|
402
|
+
if high in self._containers:
|
403
|
+
result_values.update(self._containers[high].to_array())
|
404
|
+
|
405
|
+
if high in other._containers:
|
406
|
+
result_values.update(other._containers[high].to_array())
|
407
|
+
|
408
|
+
# Add all values to result
|
409
|
+
for low in result_values:
|
410
|
+
value = (high << 16) | low
|
411
|
+
result._add_value(value)
|
412
|
+
|
413
|
+
return result
|
414
|
+
|
415
|
+
def intersection(self, other: 'xRoaringBitmapStrategy') -> 'xRoaringBitmapStrategy':
|
416
|
+
"""Intersection with another roaring bitmap."""
|
417
|
+
result = xRoaringBitmapStrategy(traits=self._traits)
|
418
|
+
|
419
|
+
# Intersect only common containers
|
420
|
+
common_highs = set(self._containers.keys()) & set(other._containers.keys())
|
421
|
+
|
422
|
+
for high in common_highs:
|
423
|
+
self_values = set(self._containers[high].to_array())
|
424
|
+
other_values = set(other._containers[high].to_array())
|
425
|
+
common_values = self_values & other_values
|
426
|
+
|
427
|
+
# Add common values to result
|
428
|
+
for low in common_values:
|
429
|
+
value = (high << 16) | low
|
430
|
+
result._add_value(value)
|
431
|
+
|
432
|
+
return result
|
433
|
+
|
434
|
+
def difference(self, other: 'xRoaringBitmapStrategy') -> 'xRoaringBitmapStrategy':
|
435
|
+
"""Difference with another roaring bitmap."""
|
436
|
+
result = xRoaringBitmapStrategy(traits=self._traits)
|
437
|
+
|
438
|
+
for high, container in self._containers.items():
|
439
|
+
self_values = set(container.to_array())
|
440
|
+
|
441
|
+
if high in other._containers:
|
442
|
+
other_values = set(other._containers[high].to_array())
|
443
|
+
diff_values = self_values - other_values
|
444
|
+
else:
|
445
|
+
diff_values = self_values
|
446
|
+
|
447
|
+
# Add difference values to result
|
448
|
+
for low in diff_values:
|
449
|
+
value = (high << 16) | low
|
450
|
+
result._add_value(value)
|
451
|
+
|
452
|
+
return result
|
453
|
+
|
454
|
+
def to_array(self) -> List[int]:
|
455
|
+
"""Convert to sorted array of all values."""
|
456
|
+
result = []
|
457
|
+
|
458
|
+
for high in sorted(self._containers.keys()):
|
459
|
+
container = self._containers[high]
|
460
|
+
for low in container.to_array():
|
461
|
+
value = (high << 16) | low
|
462
|
+
result.append(value)
|
463
|
+
|
464
|
+
return result
|
465
|
+
|
466
|
+
def rank(self, value: int) -> int:
|
467
|
+
"""Get rank of value (number of values <= value)."""
|
468
|
+
rank = 0
|
469
|
+
high, low = self._split_value(value)
|
470
|
+
|
471
|
+
# Count all values in containers with smaller high bits
|
472
|
+
for container_high in sorted(self._containers.keys()):
|
473
|
+
if container_high < high:
|
474
|
+
rank += self._containers[container_high].cardinality
|
475
|
+
elif container_high == high:
|
476
|
+
# Count values in this container <= low
|
477
|
+
container = self._containers[container_high]
|
478
|
+
for container_low in container.to_array():
|
479
|
+
if container_low <= low:
|
480
|
+
rank += 1
|
481
|
+
break
|
482
|
+
|
483
|
+
return rank
|
484
|
+
|
485
|
+
def select(self, rank: int) -> Optional[int]:
|
486
|
+
"""Get value at rank (0-indexed)."""
|
487
|
+
if rank < 0 or rank >= self._size:
|
488
|
+
return None
|
489
|
+
|
490
|
+
current_rank = 0
|
491
|
+
|
492
|
+
for high in sorted(self._containers.keys()):
|
493
|
+
container = self._containers[high]
|
494
|
+
if current_rank + container.cardinality > rank:
|
495
|
+
# Value is in this container
|
496
|
+
container_rank = rank - current_rank
|
497
|
+
container_values = container.to_array()
|
498
|
+
low = container_values[container_rank]
|
499
|
+
return (high << 16) | low
|
500
|
+
|
501
|
+
current_rank += container.cardinality
|
502
|
+
|
503
|
+
return None
|
504
|
+
|
505
|
+
def get_compression_ratio(self) -> float:
|
506
|
+
"""Get compression ratio compared to uncompressed bitmap."""
|
507
|
+
if self._size == 0:
|
508
|
+
return 1.0
|
509
|
+
|
510
|
+
# Estimate memory usage
|
511
|
+
memory_used = 0
|
512
|
+
for container in self._containers.values():
|
513
|
+
if isinstance(container, ArrayContainer):
|
514
|
+
memory_used += len(container.values) * 2 # 2 bytes per value
|
515
|
+
else: # BitmapContainer
|
516
|
+
memory_used += 8192 # Fixed 8KB
|
517
|
+
|
518
|
+
# Uncompressed would need 4 bytes per value
|
519
|
+
uncompressed_size = self._size * 4
|
520
|
+
|
521
|
+
return memory_used / max(1, uncompressed_size)
|
522
|
+
|
523
|
+
# ============================================================================
|
524
|
+
# PERFORMANCE CHARACTERISTICS
|
525
|
+
# ============================================================================
|
526
|
+
|
527
|
+
@property
|
528
|
+
def backend_info(self) -> Dict[str, Any]:
|
529
|
+
"""Get backend implementation info."""
|
530
|
+
array_containers = sum(1 for c in self._containers.values()
|
531
|
+
if isinstance(c, ArrayContainer))
|
532
|
+
bitmap_containers = len(self._containers) - array_containers
|
533
|
+
|
534
|
+
return {
|
535
|
+
'strategy': 'ROARING_BITMAP',
|
536
|
+
'backend': 'Hybrid Array/Bitmap containers',
|
537
|
+
'total_containers': len(self._containers),
|
538
|
+
'array_containers': array_containers,
|
539
|
+
'bitmap_containers': bitmap_containers,
|
540
|
+
'complexity': {
|
541
|
+
'add': 'O(log n) for arrays, O(1) for bitmaps',
|
542
|
+
'remove': 'O(log n) for arrays, O(1) for bitmaps',
|
543
|
+
'contains': 'O(log n) for arrays, O(1) for bitmaps',
|
544
|
+
'union': 'O(n + m)',
|
545
|
+
'intersection': 'O(min(n, m))',
|
546
|
+
'space': 'O(n) compressed'
|
547
|
+
}
|
548
|
+
}
|
549
|
+
|
550
|
+
@property
|
551
|
+
def metrics(self) -> Dict[str, Any]:
|
552
|
+
"""Get performance metrics."""
|
553
|
+
compression_ratio = self.get_compression_ratio()
|
554
|
+
|
555
|
+
# Estimate memory usage
|
556
|
+
memory_used = 0
|
557
|
+
for container in self._containers.values():
|
558
|
+
if isinstance(container, ArrayContainer):
|
559
|
+
memory_used += len(container.values) * 2 + 24 # Values + overhead
|
560
|
+
else:
|
561
|
+
memory_used += 8192 + 24 # Bitmap + overhead
|
562
|
+
|
563
|
+
return {
|
564
|
+
'set_bits': self._size,
|
565
|
+
'containers': len(self._containers),
|
566
|
+
'compression_ratio': f"{compression_ratio:.3f}",
|
567
|
+
'memory_usage': f"{memory_used} bytes (estimated)",
|
568
|
+
'memory_per_bit': f"{memory_used / max(1, self._size):.1f} bytes/bit",
|
569
|
+
'sparsity': f"{(1 - (self._size / max(1, self._next_value))) * 100:.1f}%"
|
570
|
+
}
|