exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,347 @@
|
|
1
|
+
"""
|
2
|
+
Bloom Filter Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the BLOOM_FILTER strategy for memory-efficient
|
5
|
+
probabilistic membership testing with no false negatives.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Optional
|
9
|
+
import hashlib
|
10
|
+
import math
|
11
|
+
from ._base_node import aNodeStrategy
|
12
|
+
from ...types import NodeMode, NodeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class xBloomFilterStrategy(aNodeStrategy):
|
16
|
+
"""
|
17
|
+
Bloom Filter node strategy for probabilistic membership testing.
|
18
|
+
|
19
|
+
Provides memory-efficient approximate membership testing with:
|
20
|
+
- No false negatives (if it says "no", it's definitely not there)
|
21
|
+
- Possible false positives (if it says "yes", it might be there)
|
22
|
+
- Configurable false positive rate
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
26
|
+
"""Initialize the Bloom Filter strategy."""
|
27
|
+
super().__init__(NodeMode.BLOOM_FILTER, traits, **options)
|
28
|
+
|
29
|
+
# Bloom filter parameters
|
30
|
+
self.expected_elements = options.get('expected_elements', 1000)
|
31
|
+
self.false_positive_rate = options.get('false_positive_rate', 0.01)
|
32
|
+
|
33
|
+
# Calculate optimal parameters
|
34
|
+
self.bit_array_size = self._calculate_bit_array_size()
|
35
|
+
self.num_hash_functions = self._calculate_num_hash_functions()
|
36
|
+
|
37
|
+
# Core storage
|
38
|
+
self._bit_array = [0] * self.bit_array_size
|
39
|
+
self._values: Dict[str, Any] = {} # Store actual values for retrieval
|
40
|
+
self._size = 0
|
41
|
+
self._insertions = 0
|
42
|
+
|
43
|
+
# Hash functions
|
44
|
+
self._hash_seeds = self._generate_hash_seeds()
|
45
|
+
|
46
|
+
def get_supported_traits(self) -> NodeTrait:
|
47
|
+
"""Get the traits supported by the bloom filter strategy."""
|
48
|
+
return (NodeTrait.PROBABILISTIC | NodeTrait.COMPRESSED | NodeTrait.STREAMING)
|
49
|
+
|
50
|
+
def _calculate_bit_array_size(self) -> int:
|
51
|
+
"""Calculate optimal bit array size."""
|
52
|
+
# m = -(n * ln(p)) / (ln(2)^2)
|
53
|
+
# where n = expected elements, p = false positive rate
|
54
|
+
n = self.expected_elements
|
55
|
+
p = self.false_positive_rate
|
56
|
+
|
57
|
+
if p <= 0 or p >= 1:
|
58
|
+
p = 0.01 # Default to 1% false positive rate
|
59
|
+
|
60
|
+
m = -(n * math.log(p)) / (math.log(2) ** 2)
|
61
|
+
return max(1, int(math.ceil(m)))
|
62
|
+
|
63
|
+
def _calculate_num_hash_functions(self) -> int:
|
64
|
+
"""Calculate optimal number of hash functions."""
|
65
|
+
# k = (m / n) * ln(2)
|
66
|
+
# where m = bit array size, n = expected elements
|
67
|
+
m = self.bit_array_size
|
68
|
+
n = self.expected_elements
|
69
|
+
|
70
|
+
k = (m / n) * math.log(2)
|
71
|
+
return max(1, int(round(k)))
|
72
|
+
|
73
|
+
def _generate_hash_seeds(self) -> List[int]:
|
74
|
+
"""Generate seeds for multiple hash functions."""
|
75
|
+
seeds = []
|
76
|
+
for i in range(self.num_hash_functions):
|
77
|
+
# Use different primes as seeds
|
78
|
+
seed = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47][i % 15]
|
79
|
+
seeds.append(seed * (i + 1))
|
80
|
+
return seeds
|
81
|
+
|
82
|
+
def _hash_element(self, element: str, seed: int) -> int:
|
83
|
+
"""Hash an element with a given seed."""
|
84
|
+
hash_obj = hashlib.md5(f"{element}{seed}".encode())
|
85
|
+
hash_int = int(hash_obj.hexdigest(), 16)
|
86
|
+
return hash_int % self.bit_array_size
|
87
|
+
|
88
|
+
def _get_bit_positions(self, element: str) -> List[int]:
|
89
|
+
"""Get all bit positions for an element."""
|
90
|
+
positions = []
|
91
|
+
for seed in self._hash_seeds:
|
92
|
+
pos = self._hash_element(element, seed)
|
93
|
+
positions.append(pos)
|
94
|
+
return positions
|
95
|
+
|
96
|
+
# ============================================================================
|
97
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
98
|
+
# ============================================================================
|
99
|
+
|
100
|
+
def put(self, key: Any, value: Any = None) -> None:
|
101
|
+
"""Add an element to the bloom filter."""
|
102
|
+
element = str(key)
|
103
|
+
|
104
|
+
# Set bits for this element
|
105
|
+
positions = self._get_bit_positions(element)
|
106
|
+
for pos in positions:
|
107
|
+
self._bit_array[pos] = 1
|
108
|
+
|
109
|
+
# Store actual value for retrieval (optional)
|
110
|
+
was_new = element not in self._values
|
111
|
+
self._values[element] = value if value is not None else key
|
112
|
+
|
113
|
+
if was_new:
|
114
|
+
self._size += 1
|
115
|
+
|
116
|
+
self._insertions += 1
|
117
|
+
|
118
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
119
|
+
"""Get value if definitely present (may have false positives)."""
|
120
|
+
element = str(key)
|
121
|
+
|
122
|
+
# Check if element might be present
|
123
|
+
if self.has(element):
|
124
|
+
return self._values.get(element, default)
|
125
|
+
else:
|
126
|
+
# Definitely not present
|
127
|
+
return default
|
128
|
+
|
129
|
+
def has(self, key: Any) -> bool:
|
130
|
+
"""Check if element might be present (probabilistic)."""
|
131
|
+
element = str(key)
|
132
|
+
|
133
|
+
# Check all bit positions
|
134
|
+
positions = self._get_bit_positions(element)
|
135
|
+
for pos in positions:
|
136
|
+
if self._bit_array[pos] == 0:
|
137
|
+
# Definitely not present
|
138
|
+
return False
|
139
|
+
|
140
|
+
# Might be present (could be false positive)
|
141
|
+
return True
|
142
|
+
|
143
|
+
def remove(self, key: Any) -> bool:
|
144
|
+
"""Remove from stored values (cannot remove from bloom filter)."""
|
145
|
+
element = str(key)
|
146
|
+
|
147
|
+
if element in self._values:
|
148
|
+
del self._values[element]
|
149
|
+
self._size -= 1
|
150
|
+
return True
|
151
|
+
|
152
|
+
return False
|
153
|
+
|
154
|
+
def delete(self, key: Any) -> bool:
|
155
|
+
"""Remove from stored values (alias for remove)."""
|
156
|
+
return self.remove(key)
|
157
|
+
|
158
|
+
def clear(self) -> None:
|
159
|
+
"""Clear all data."""
|
160
|
+
self._bit_array = [0] * self.bit_array_size
|
161
|
+
self._values.clear()
|
162
|
+
self._size = 0
|
163
|
+
self._insertions = 0
|
164
|
+
|
165
|
+
def keys(self) -> Iterator[str]:
|
166
|
+
"""Get all stored keys (not all elements in filter)."""
|
167
|
+
return iter(self._values.keys())
|
168
|
+
|
169
|
+
def values(self) -> Iterator[Any]:
|
170
|
+
"""Get all stored values."""
|
171
|
+
return iter(self._values.values())
|
172
|
+
|
173
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
174
|
+
"""Get all stored key-value pairs."""
|
175
|
+
return iter(self._values.items())
|
176
|
+
|
177
|
+
def __len__(self) -> int:
|
178
|
+
"""Get the number of stored elements."""
|
179
|
+
return self._size
|
180
|
+
|
181
|
+
def to_native(self) -> Dict[str, Any]:
|
182
|
+
"""Convert to native Python dict of stored values."""
|
183
|
+
return dict(self._values)
|
184
|
+
|
185
|
+
@property
|
186
|
+
def is_list(self) -> bool:
|
187
|
+
"""This is not a list strategy."""
|
188
|
+
return False
|
189
|
+
|
190
|
+
@property
|
191
|
+
def is_dict(self) -> bool:
|
192
|
+
"""This behaves like a dict but with probabilistic semantics."""
|
193
|
+
return True
|
194
|
+
|
195
|
+
# ============================================================================
|
196
|
+
# BLOOM FILTER SPECIFIC OPERATIONS
|
197
|
+
# ============================================================================
|
198
|
+
|
199
|
+
def add(self, element: Any) -> None:
|
200
|
+
"""Add an element to the bloom filter."""
|
201
|
+
self.put(element, element)
|
202
|
+
|
203
|
+
def might_contain(self, element: Any) -> bool:
|
204
|
+
"""Check if element might be in the filter (same as has)."""
|
205
|
+
return self.has(element)
|
206
|
+
|
207
|
+
def definitely_not_contains(self, element: Any) -> bool:
|
208
|
+
"""Check if element is definitely not in the filter."""
|
209
|
+
return not self.has(element)
|
210
|
+
|
211
|
+
def get_false_positive_probability(self) -> float:
|
212
|
+
"""Calculate current false positive probability."""
|
213
|
+
if self._insertions == 0:
|
214
|
+
return 0.0
|
215
|
+
|
216
|
+
# p = (1 - e^(-k*n/m))^k
|
217
|
+
# where k = num hash functions, n = insertions, m = bit array size
|
218
|
+
k = self.num_hash_functions
|
219
|
+
n = self._insertions
|
220
|
+
m = self.bit_array_size
|
221
|
+
|
222
|
+
if m == 0:
|
223
|
+
return 1.0
|
224
|
+
|
225
|
+
exponent = -(k * n) / m
|
226
|
+
try:
|
227
|
+
probability = (1 - math.exp(exponent)) ** k
|
228
|
+
return min(1.0, max(0.0, probability))
|
229
|
+
except (OverflowError, ValueError):
|
230
|
+
return 1.0
|
231
|
+
|
232
|
+
def get_capacity_utilization(self) -> float:
|
233
|
+
"""Get the utilization of the bit array capacity."""
|
234
|
+
bits_set = sum(self._bit_array)
|
235
|
+
return bits_set / self.bit_array_size if self.bit_array_size > 0 else 0.0
|
236
|
+
|
237
|
+
def union(self, other: 'xBloomFilterStrategy') -> 'xBloomFilterStrategy':
|
238
|
+
"""Create union of two bloom filters (must have same parameters)."""
|
239
|
+
if (self.bit_array_size != other.bit_array_size or
|
240
|
+
self.num_hash_functions != other.num_hash_functions):
|
241
|
+
raise ValueError("Bloom filters must have same parameters for union")
|
242
|
+
|
243
|
+
result = xBloomFilterStrategy(
|
244
|
+
traits=self._traits,
|
245
|
+
expected_elements=max(self.expected_elements, other.expected_elements),
|
246
|
+
false_positive_rate=max(self.false_positive_rate, other.false_positive_rate)
|
247
|
+
)
|
248
|
+
|
249
|
+
# OR the bit arrays
|
250
|
+
for i in range(self.bit_array_size):
|
251
|
+
result._bit_array[i] = self._bit_array[i] | other._bit_array[i]
|
252
|
+
|
253
|
+
# Combine stored values
|
254
|
+
result._values.update(self._values)
|
255
|
+
result._values.update(other._values)
|
256
|
+
result._size = len(result._values)
|
257
|
+
result._insertions = self._insertions + other._insertions
|
258
|
+
|
259
|
+
return result
|
260
|
+
|
261
|
+
def intersection_estimate(self, other: 'xBloomFilterStrategy') -> float:
|
262
|
+
"""Estimate intersection size (approximate)."""
|
263
|
+
if (self.bit_array_size != other.bit_array_size or
|
264
|
+
self.num_hash_functions != other.num_hash_functions):
|
265
|
+
raise ValueError("Bloom filters must have same parameters for intersection")
|
266
|
+
|
267
|
+
# Count bits set in both filters
|
268
|
+
intersection_bits = sum(1 for i in range(self.bit_array_size)
|
269
|
+
if self._bit_array[i] == 1 and other._bit_array[i] == 1)
|
270
|
+
|
271
|
+
# Rough estimation (not mathematically precise)
|
272
|
+
if intersection_bits == 0:
|
273
|
+
return 0.0
|
274
|
+
|
275
|
+
# Simple heuristic: intersection bits relative to total bits
|
276
|
+
total_bits_set = sum(self._bit_array) + sum(other._bit_array)
|
277
|
+
if total_bits_set == 0:
|
278
|
+
return 0.0
|
279
|
+
|
280
|
+
estimated_ratio = intersection_bits / (total_bits_set / 2)
|
281
|
+
estimated_size = estimated_ratio * min(self._insertions, other._insertions)
|
282
|
+
return max(0.0, estimated_size)
|
283
|
+
|
284
|
+
def export_bit_array(self) -> List[int]:
|
285
|
+
"""Export the bit array for analysis or storage."""
|
286
|
+
return self._bit_array.copy()
|
287
|
+
|
288
|
+
def import_bit_array(self, bit_array: List[int]) -> None:
|
289
|
+
"""Import a bit array (must match current size)."""
|
290
|
+
if len(bit_array) != self.bit_array_size:
|
291
|
+
raise ValueError(f"Bit array size mismatch: expected {self.bit_array_size}, got {len(bit_array)}")
|
292
|
+
|
293
|
+
self._bit_array = [int(bit) for bit in bit_array]
|
294
|
+
|
295
|
+
def optimize_for_insertions(self, actual_insertions: int) -> 'xBloomFilterStrategy':
|
296
|
+
"""Create an optimized bloom filter based on actual insertion count."""
|
297
|
+
optimized = xBloomFilterStrategy(
|
298
|
+
traits=self._traits,
|
299
|
+
expected_elements=actual_insertions,
|
300
|
+
false_positive_rate=self.false_positive_rate
|
301
|
+
)
|
302
|
+
|
303
|
+
# Re-insert all stored values
|
304
|
+
for key, value in self._values.items():
|
305
|
+
optimized.put(key, value)
|
306
|
+
|
307
|
+
return optimized
|
308
|
+
|
309
|
+
# ============================================================================
|
310
|
+
# PERFORMANCE CHARACTERISTICS
|
311
|
+
# ============================================================================
|
312
|
+
|
313
|
+
@property
|
314
|
+
def backend_info(self) -> Dict[str, Any]:
|
315
|
+
"""Get backend implementation info."""
|
316
|
+
return {
|
317
|
+
'strategy': 'BLOOM_FILTER',
|
318
|
+
'backend': 'Probabilistic bit array',
|
319
|
+
'bit_array_size': self.bit_array_size,
|
320
|
+
'num_hash_functions': self.num_hash_functions,
|
321
|
+
'expected_elements': self.expected_elements,
|
322
|
+
'target_false_positive_rate': self.false_positive_rate,
|
323
|
+
'complexity': {
|
324
|
+
'add': 'O(k)',
|
325
|
+
'contains': 'O(k)',
|
326
|
+
'space': 'O(m)',
|
327
|
+
'false_negatives': '0%',
|
328
|
+
'false_positives': f'{self.false_positive_rate * 100:.2f}% target'
|
329
|
+
}
|
330
|
+
}
|
331
|
+
|
332
|
+
@property
|
333
|
+
def metrics(self) -> Dict[str, Any]:
|
334
|
+
"""Get performance metrics."""
|
335
|
+
current_fp_rate = self.get_false_positive_probability()
|
336
|
+
capacity_util = self.get_capacity_utilization()
|
337
|
+
|
338
|
+
return {
|
339
|
+
'stored_elements': self._size,
|
340
|
+
'total_insertions': self._insertions,
|
341
|
+
'bit_array_size': self.bit_array_size,
|
342
|
+
'bits_set': sum(self._bit_array),
|
343
|
+
'capacity_utilization': f"{capacity_util * 100:.1f}%",
|
344
|
+
'current_false_positive_rate': f"{current_fp_rate * 100:.2f}%",
|
345
|
+
'target_false_positive_rate': f"{self.false_positive_rate * 100:.2f}%",
|
346
|
+
'memory_usage': f"{self.bit_array_size // 8} bytes (bit array)"
|
347
|
+
}
|
@@ -0,0 +1,357 @@
|
|
1
|
+
"""
|
2
|
+
B-Tree Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the B_TREE strategy for efficient range queries
|
5
|
+
and sorted key operations with guaranteed O(log n) performance.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Optional, Union, Dict
|
9
|
+
from .base import ANodeTreeStrategy
|
10
|
+
from ...types import NodeMode, NodeTrait
|
11
|
+
|
12
|
+
|
13
|
+
class BTreeNode:
|
14
|
+
"""A node in the B-tree."""
|
15
|
+
|
16
|
+
def __init__(self, degree: int, is_leaf: bool = False):
|
17
|
+
self.degree = degree
|
18
|
+
self.keys: List[str] = []
|
19
|
+
self.values: List[Any] = []
|
20
|
+
self.children: List['BTreeNode'] = []
|
21
|
+
self.is_leaf = is_leaf
|
22
|
+
|
23
|
+
def is_full(self) -> bool:
|
24
|
+
"""Check if node is full."""
|
25
|
+
return len(self.keys) == 2 * self.degree - 1
|
26
|
+
|
27
|
+
def search(self, key: str) -> Optional[Any]:
|
28
|
+
"""Search for a key in this subtree."""
|
29
|
+
i = 0
|
30
|
+
while i < len(self.keys) and key > self.keys[i]:
|
31
|
+
i += 1
|
32
|
+
|
33
|
+
if i < len(self.keys) and key == self.keys[i]:
|
34
|
+
return self.values[i]
|
35
|
+
|
36
|
+
if self.is_leaf:
|
37
|
+
return None
|
38
|
+
|
39
|
+
return self.children[i].search(key)
|
40
|
+
|
41
|
+
def insert_non_full(self, key: str, value: Any):
|
42
|
+
"""Insert key-value pair into non-full node."""
|
43
|
+
i = len(self.keys) - 1
|
44
|
+
|
45
|
+
if self.is_leaf:
|
46
|
+
# Insert into leaf
|
47
|
+
self.keys.append("")
|
48
|
+
self.values.append(None)
|
49
|
+
while i >= 0 and key < self.keys[i]:
|
50
|
+
self.keys[i + 1] = self.keys[i]
|
51
|
+
self.values[i + 1] = self.values[i]
|
52
|
+
i -= 1
|
53
|
+
self.keys[i + 1] = key
|
54
|
+
self.values[i + 1] = value
|
55
|
+
else:
|
56
|
+
# Find child to insert into
|
57
|
+
while i >= 0 and key < self.keys[i]:
|
58
|
+
i -= 1
|
59
|
+
i += 1
|
60
|
+
|
61
|
+
if self.children[i].is_full():
|
62
|
+
self.split_child(i)
|
63
|
+
if key > self.keys[i]:
|
64
|
+
i += 1
|
65
|
+
|
66
|
+
self.children[i].insert_non_full(key, value)
|
67
|
+
|
68
|
+
def split_child(self, i: int):
|
69
|
+
"""Split the full child at index i."""
|
70
|
+
full_child = self.children[i]
|
71
|
+
new_child = BTreeNode(full_child.degree, full_child.is_leaf)
|
72
|
+
|
73
|
+
# Move half of the keys/values to new child
|
74
|
+
mid = self.degree - 1
|
75
|
+
new_child.keys = full_child.keys[mid + 1:]
|
76
|
+
new_child.values = full_child.values[mid + 1:]
|
77
|
+
full_child.keys = full_child.keys[:mid]
|
78
|
+
full_child.values = full_child.values[:mid]
|
79
|
+
|
80
|
+
# Move children if not leaf
|
81
|
+
if not full_child.is_leaf:
|
82
|
+
new_child.children = full_child.children[mid + 1:]
|
83
|
+
full_child.children = full_child.children[:mid + 1]
|
84
|
+
|
85
|
+
# Insert new child and promote middle key
|
86
|
+
self.children.insert(i + 1, new_child)
|
87
|
+
self.keys.insert(i, full_child.keys[mid])
|
88
|
+
self.values.insert(i, full_child.values[mid])
|
89
|
+
|
90
|
+
|
91
|
+
class BTreeStrategy(ANodeTreeStrategy):
|
92
|
+
"""
|
93
|
+
B-Tree node strategy for efficient sorted operations and range queries.
|
94
|
+
|
95
|
+
Provides O(log n) search, insert, and delete operations with excellent
|
96
|
+
cache performance for range queries.
|
97
|
+
"""
|
98
|
+
|
99
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
100
|
+
"""Initialize the B-tree strategy."""
|
101
|
+
super().__init__(NodeMode.B_TREE, traits, **options)
|
102
|
+
self.degree = options.get('degree', 3) # Minimum degree
|
103
|
+
self.root: Optional[BTreeNode] = BTreeNode(self.degree, is_leaf=True)
|
104
|
+
self._size = 0
|
105
|
+
|
106
|
+
def get_supported_traits(self) -> NodeTrait:
|
107
|
+
"""Get the traits supported by the B-tree strategy."""
|
108
|
+
return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.HIERARCHICAL)
|
109
|
+
|
110
|
+
# ============================================================================
|
111
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
112
|
+
# ============================================================================
|
113
|
+
|
114
|
+
def put(self, key: Any, value: Any = None) -> None:
|
115
|
+
"""Store a value with the given key."""
|
116
|
+
key_str = str(key)
|
117
|
+
|
118
|
+
# Check if key already exists (update case)
|
119
|
+
if self.has(key_str):
|
120
|
+
self._update_existing(key_str, value)
|
121
|
+
return
|
122
|
+
|
123
|
+
if self.root.is_full():
|
124
|
+
# Create new root
|
125
|
+
new_root = BTreeNode(self.degree, is_leaf=False)
|
126
|
+
new_root.children.append(self.root)
|
127
|
+
new_root.split_child(0)
|
128
|
+
self.root = new_root
|
129
|
+
|
130
|
+
self.root.insert_non_full(key_str, value)
|
131
|
+
self._size += 1
|
132
|
+
|
133
|
+
def _update_existing(self, key: str, value: Any) -> None:
|
134
|
+
"""Update existing key with new value."""
|
135
|
+
def update_in_node(node: BTreeNode) -> bool:
|
136
|
+
i = 0
|
137
|
+
while i < len(node.keys) and key > node.keys[i]:
|
138
|
+
i += 1
|
139
|
+
|
140
|
+
if i < len(node.keys) and key == node.keys[i]:
|
141
|
+
node.values[i] = value
|
142
|
+
return True
|
143
|
+
|
144
|
+
if not node.is_leaf:
|
145
|
+
return update_in_node(node.children[i])
|
146
|
+
|
147
|
+
return False
|
148
|
+
|
149
|
+
update_in_node(self.root)
|
150
|
+
|
151
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
152
|
+
"""Retrieve a value by key."""
|
153
|
+
if not self.root:
|
154
|
+
return default
|
155
|
+
|
156
|
+
result = self.root.search(str(key))
|
157
|
+
return result if result is not None else default
|
158
|
+
|
159
|
+
def has(self, key: Any) -> bool:
|
160
|
+
"""Check if key exists in B-tree."""
|
161
|
+
return self.get(key, None) is not None
|
162
|
+
|
163
|
+
def remove(self, key: Any) -> bool:
|
164
|
+
"""Remove value by key (simplified implementation)."""
|
165
|
+
# Note: Full B-tree deletion is complex. This is a simplified version.
|
166
|
+
key_str = str(key)
|
167
|
+
if not self.has(key_str):
|
168
|
+
return False
|
169
|
+
|
170
|
+
# For simplicity, we'll rebuild without the key
|
171
|
+
# In production, implement proper B-tree deletion
|
172
|
+
all_items = list(self.items())
|
173
|
+
filtered_items = [(k, v) for k, v in all_items if k != key_str]
|
174
|
+
|
175
|
+
self.clear()
|
176
|
+
for k, v in filtered_items:
|
177
|
+
self.put(k, v)
|
178
|
+
|
179
|
+
return True
|
180
|
+
|
181
|
+
def delete(self, key: Any) -> bool:
|
182
|
+
"""Remove value by key (alias for remove)."""
|
183
|
+
return self.remove(key)
|
184
|
+
|
185
|
+
def clear(self) -> None:
|
186
|
+
"""Clear all data."""
|
187
|
+
self.root = BTreeNode(self.degree, is_leaf=True)
|
188
|
+
self._size = 0
|
189
|
+
|
190
|
+
def keys(self) -> Iterator[str]:
|
191
|
+
"""Get all keys in sorted order."""
|
192
|
+
def inorder_keys(node: BTreeNode) -> Iterator[str]:
|
193
|
+
if node.is_leaf:
|
194
|
+
yield from node.keys
|
195
|
+
else:
|
196
|
+
for i in range(len(node.keys)):
|
197
|
+
yield from inorder_keys(node.children[i])
|
198
|
+
yield node.keys[i]
|
199
|
+
if node.children:
|
200
|
+
yield from inorder_keys(node.children[-1])
|
201
|
+
|
202
|
+
if self.root:
|
203
|
+
yield from inorder_keys(self.root)
|
204
|
+
|
205
|
+
def values(self) -> Iterator[Any]:
|
206
|
+
"""Get all values in key-sorted order."""
|
207
|
+
def inorder_values(node: BTreeNode) -> Iterator[Any]:
|
208
|
+
if node.is_leaf:
|
209
|
+
yield from node.values
|
210
|
+
else:
|
211
|
+
for i in range(len(node.keys)):
|
212
|
+
yield from inorder_values(node.children[i])
|
213
|
+
yield node.values[i]
|
214
|
+
if node.children:
|
215
|
+
yield from inorder_values(node.children[-1])
|
216
|
+
|
217
|
+
if self.root:
|
218
|
+
yield from inorder_values(self.root)
|
219
|
+
|
220
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
221
|
+
"""Get all key-value pairs in sorted order."""
|
222
|
+
def inorder_items(node: BTreeNode) -> Iterator[tuple[str, Any]]:
|
223
|
+
if node.is_leaf:
|
224
|
+
yield from zip(node.keys, node.values)
|
225
|
+
else:
|
226
|
+
for i in range(len(node.keys)):
|
227
|
+
yield from inorder_items(node.children[i])
|
228
|
+
yield (node.keys[i], node.values[i])
|
229
|
+
if node.children:
|
230
|
+
yield from inorder_items(node.children[-1])
|
231
|
+
|
232
|
+
if self.root:
|
233
|
+
yield from inorder_items(self.root)
|
234
|
+
|
235
|
+
def __len__(self) -> int:
|
236
|
+
"""Get the number of items."""
|
237
|
+
return self._size
|
238
|
+
|
239
|
+
def to_native(self) -> Dict[str, Any]:
|
240
|
+
"""Convert to native Python dict."""
|
241
|
+
return dict(self.items())
|
242
|
+
|
243
|
+
@property
|
244
|
+
def is_list(self) -> bool:
|
245
|
+
"""This is not a list strategy."""
|
246
|
+
return False
|
247
|
+
|
248
|
+
@property
|
249
|
+
def is_dict(self) -> bool:
|
250
|
+
"""This is a dict-like strategy."""
|
251
|
+
return True
|
252
|
+
|
253
|
+
# ============================================================================
|
254
|
+
# B-TREE SPECIFIC OPERATIONS
|
255
|
+
# ============================================================================
|
256
|
+
|
257
|
+
def range_query(self, start_key: str, end_key: str) -> List[tuple[str, Any]]:
|
258
|
+
"""Get all key-value pairs in the specified range [start_key, end_key]."""
|
259
|
+
result = []
|
260
|
+
for key, value in self.items():
|
261
|
+
if start_key <= key <= end_key:
|
262
|
+
result.append((key, value))
|
263
|
+
elif key > end_key:
|
264
|
+
break
|
265
|
+
return result
|
266
|
+
|
267
|
+
def prefix_search(self, prefix: str) -> List[tuple[str, Any]]:
|
268
|
+
"""Find all keys that start with the given prefix."""
|
269
|
+
result = []
|
270
|
+
for key, value in self.items():
|
271
|
+
if key.startswith(prefix):
|
272
|
+
result.append((key, value))
|
273
|
+
elif key > prefix:
|
274
|
+
# Since keys are sorted, we can stop when we pass the prefix range
|
275
|
+
if not key.startswith(prefix):
|
276
|
+
break
|
277
|
+
return result
|
278
|
+
|
279
|
+
def min_key(self) -> Optional[str]:
|
280
|
+
"""Get the minimum key."""
|
281
|
+
if not self.root or self._size == 0:
|
282
|
+
return None
|
283
|
+
|
284
|
+
node = self.root
|
285
|
+
while not node.is_leaf:
|
286
|
+
node = node.children[0]
|
287
|
+
|
288
|
+
return node.keys[0] if node.keys else None
|
289
|
+
|
290
|
+
def max_key(self) -> Optional[str]:
|
291
|
+
"""Get the maximum key."""
|
292
|
+
if not self.root or self._size == 0:
|
293
|
+
return None
|
294
|
+
|
295
|
+
node = self.root
|
296
|
+
while not node.is_leaf:
|
297
|
+
node = node.children[-1]
|
298
|
+
|
299
|
+
return node.keys[-1] if node.keys else None
|
300
|
+
|
301
|
+
def successor(self, key: str) -> Optional[str]:
|
302
|
+
"""Find the successor of the given key."""
|
303
|
+
found = False
|
304
|
+
for k in self.keys():
|
305
|
+
if found and k > key:
|
306
|
+
return k
|
307
|
+
if k == key:
|
308
|
+
found = True
|
309
|
+
return None
|
310
|
+
|
311
|
+
def predecessor(self, key: str) -> Optional[str]:
|
312
|
+
"""Find the predecessor of the given key."""
|
313
|
+
prev_key = None
|
314
|
+
for k in self.keys():
|
315
|
+
if k == key:
|
316
|
+
return prev_key
|
317
|
+
prev_key = k
|
318
|
+
return None
|
319
|
+
|
320
|
+
# ============================================================================
|
321
|
+
# PERFORMANCE CHARACTERISTICS
|
322
|
+
# ============================================================================
|
323
|
+
|
324
|
+
@property
|
325
|
+
def backend_info(self) -> Dict[str, Any]:
|
326
|
+
"""Get backend implementation info."""
|
327
|
+
return {
|
328
|
+
'strategy': 'B_TREE',
|
329
|
+
'backend': 'Custom B-tree implementation',
|
330
|
+
'degree': self.degree,
|
331
|
+
'complexity': {
|
332
|
+
'search': 'O(log n)',
|
333
|
+
'insert': 'O(log n)',
|
334
|
+
'delete': 'O(log n)',
|
335
|
+
'range_query': 'O(log n + k)',
|
336
|
+
'iteration': 'O(n)'
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
@property
|
341
|
+
def metrics(self) -> Dict[str, Any]:
|
342
|
+
"""Get performance metrics."""
|
343
|
+
# Calculate tree height
|
344
|
+
height = 0
|
345
|
+
if self.root:
|
346
|
+
node = self.root
|
347
|
+
while not node.is_leaf and node.children:
|
348
|
+
node = node.children[0]
|
349
|
+
height += 1
|
350
|
+
|
351
|
+
return {
|
352
|
+
'size': self._size,
|
353
|
+
'degree': self.degree,
|
354
|
+
'height': height,
|
355
|
+
'memory_usage': f"{self._size * (24 + 16)} bytes (estimated)",
|
356
|
+
'is_sorted': True
|
357
|
+
}
|