exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,392 @@
|
|
1
|
+
"""
|
2
|
+
Cuckoo Hash Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the CUCKOO_HASH strategy for guaranteed O(1)
|
5
|
+
worst-case lookup time with efficient space utilization.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Optional, Tuple
|
9
|
+
import hashlib
|
10
|
+
import random
|
11
|
+
from ._base_node import aNodeStrategy
|
12
|
+
from ...types import NodeMode, NodeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class xCuckooHashStrategy(aNodeStrategy):
|
16
|
+
"""
|
17
|
+
Cuckoo Hash node strategy for guaranteed O(1) worst-case lookups.
|
18
|
+
|
19
|
+
Uses cuckoo hashing with two hash tables and eviction-based insertion
|
20
|
+
to guarantee constant-time operations in the worst case.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
24
|
+
"""Initialize the Cuckoo Hash strategy."""
|
25
|
+
super().__init__(NodeMode.CUCKOO_HASH, traits, **options)
|
26
|
+
|
27
|
+
# Cuckoo hash parameters
|
28
|
+
self.initial_capacity = options.get('initial_capacity', 16)
|
29
|
+
self.load_factor = options.get('load_factor', 0.5) # Lower for cuckoo hashing
|
30
|
+
self.max_evictions = options.get('max_evictions', 8)
|
31
|
+
|
32
|
+
# Two hash tables
|
33
|
+
self.capacity = self.initial_capacity
|
34
|
+
self._table1: List[Optional[Tuple[str, Any]]] = [None] * self.capacity
|
35
|
+
self._table2: List[Optional[Tuple[str, Any]]] = [None] * self.capacity
|
36
|
+
|
37
|
+
# Hash function parameters
|
38
|
+
self._hash1_a = random.randint(1, 1000000)
|
39
|
+
self._hash1_b = random.randint(0, 1000000)
|
40
|
+
self._hash2_a = random.randint(1, 1000000)
|
41
|
+
self._hash2_b = random.randint(0, 1000000)
|
42
|
+
self._prime = 1000003 # Large prime for hash functions
|
43
|
+
|
44
|
+
self._size = 0
|
45
|
+
self._resize_threshold = int(self.capacity * self.load_factor)
|
46
|
+
|
47
|
+
def get_supported_traits(self) -> NodeTrait:
|
48
|
+
"""Get the traits supported by the cuckoo hash strategy."""
|
49
|
+
return (NodeTrait.INDEXED | NodeTrait.HIERARCHICAL)
|
50
|
+
|
51
|
+
def _hash1(self, key: str) -> int:
|
52
|
+
"""First hash function."""
|
53
|
+
key_hash = hash(key)
|
54
|
+
return ((self._hash1_a * key_hash + self._hash1_b) % self._prime) % self.capacity
|
55
|
+
|
56
|
+
def _hash2(self, key: str) -> int:
|
57
|
+
"""Second hash function."""
|
58
|
+
key_hash = hash(key)
|
59
|
+
return ((self._hash2_a * key_hash + self._hash2_b) % self._prime) % self.capacity
|
60
|
+
|
61
|
+
def _resize(self) -> None:
|
62
|
+
"""Resize the hash tables when load factor is exceeded."""
|
63
|
+
old_table1 = self._table1
|
64
|
+
old_table2 = self._table2
|
65
|
+
old_capacity = self.capacity
|
66
|
+
|
67
|
+
# Double the capacity
|
68
|
+
self.capacity = old_capacity * 2
|
69
|
+
self._table1 = [None] * self.capacity
|
70
|
+
self._table2 = [None] * self.capacity
|
71
|
+
self._resize_threshold = int(self.capacity * self.load_factor)
|
72
|
+
|
73
|
+
# Regenerate hash function parameters
|
74
|
+
self._hash1_a = random.randint(1, 1000000)
|
75
|
+
self._hash1_b = random.randint(0, 1000000)
|
76
|
+
self._hash2_a = random.randint(1, 1000000)
|
77
|
+
self._hash2_b = random.randint(0, 1000000)
|
78
|
+
|
79
|
+
# Reinsert all elements
|
80
|
+
old_size = self._size
|
81
|
+
self._size = 0
|
82
|
+
|
83
|
+
for table in [old_table1, old_table2]:
|
84
|
+
for entry in table:
|
85
|
+
if entry is not None:
|
86
|
+
key, value = entry
|
87
|
+
self._insert_internal(key, value)
|
88
|
+
|
89
|
+
def _insert_internal(self, key: str, value: Any) -> bool:
|
90
|
+
"""Internal insertion with cuckoo eviction."""
|
91
|
+
# Try table 1 first
|
92
|
+
pos1 = self._hash1(key)
|
93
|
+
if self._table1[pos1] is None:
|
94
|
+
self._table1[pos1] = (key, value)
|
95
|
+
self._size += 1
|
96
|
+
return True
|
97
|
+
|
98
|
+
# Try table 2
|
99
|
+
pos2 = self._hash2(key)
|
100
|
+
if self._table2[pos2] is None:
|
101
|
+
self._table2[pos2] = (key, value)
|
102
|
+
self._size += 1
|
103
|
+
return True
|
104
|
+
|
105
|
+
# Both positions occupied, start cuckoo eviction
|
106
|
+
current_key, current_value = key, value
|
107
|
+
current_table = 1 # Start with table 1
|
108
|
+
|
109
|
+
for _ in range(self.max_evictions):
|
110
|
+
if current_table == 1:
|
111
|
+
pos = self._hash1(current_key)
|
112
|
+
if self._table1[pos] is None:
|
113
|
+
self._table1[pos] = (current_key, current_value)
|
114
|
+
self._size += 1
|
115
|
+
return True
|
116
|
+
|
117
|
+
# Evict existing element
|
118
|
+
evicted_key, evicted_value = self._table1[pos]
|
119
|
+
self._table1[pos] = (current_key, current_value)
|
120
|
+
current_key, current_value = evicted_key, evicted_value
|
121
|
+
current_table = 2
|
122
|
+
else:
|
123
|
+
pos = self._hash2(current_key)
|
124
|
+
if self._table2[pos] is None:
|
125
|
+
self._table2[pos] = (current_key, current_value)
|
126
|
+
self._size += 1
|
127
|
+
return True
|
128
|
+
|
129
|
+
# Evict existing element
|
130
|
+
evicted_key, evicted_value = self._table2[pos]
|
131
|
+
self._table2[pos] = (current_key, current_value)
|
132
|
+
current_key, current_value = evicted_key, evicted_value
|
133
|
+
current_table = 1
|
134
|
+
|
135
|
+
# Failed to insert after max evictions, need to resize
|
136
|
+
return False
|
137
|
+
|
138
|
+
# ============================================================================
|
139
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
140
|
+
# ============================================================================
|
141
|
+
|
142
|
+
def put(self, key: Any, value: Any = None) -> None:
|
143
|
+
"""Store a key-value pair."""
|
144
|
+
key_str = str(key)
|
145
|
+
|
146
|
+
# Check if key already exists
|
147
|
+
if self.has(key_str):
|
148
|
+
# Update existing
|
149
|
+
pos1 = self._hash1(key_str)
|
150
|
+
if self._table1[pos1] is not None and self._table1[pos1][0] == key_str:
|
151
|
+
self._table1[pos1] = (key_str, value)
|
152
|
+
return
|
153
|
+
|
154
|
+
pos2 = self._hash2(key_str)
|
155
|
+
if self._table2[pos2] is not None and self._table2[pos2][0] == key_str:
|
156
|
+
self._table2[pos2] = (key_str, value)
|
157
|
+
return
|
158
|
+
|
159
|
+
# Check if resize is needed
|
160
|
+
if self._size >= self._resize_threshold:
|
161
|
+
self._resize()
|
162
|
+
|
163
|
+
# Try to insert
|
164
|
+
while not self._insert_internal(key_str, value):
|
165
|
+
# Insertion failed, resize and try again
|
166
|
+
self._resize()
|
167
|
+
|
168
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
169
|
+
"""Retrieve a value by key (guaranteed O(1))."""
|
170
|
+
key_str = str(key)
|
171
|
+
|
172
|
+
# Check table 1
|
173
|
+
pos1 = self._hash1(key_str)
|
174
|
+
if self._table1[pos1] is not None and self._table1[pos1][0] == key_str:
|
175
|
+
return self._table1[pos1][1]
|
176
|
+
|
177
|
+
# Check table 2
|
178
|
+
pos2 = self._hash2(key_str)
|
179
|
+
if self._table2[pos2] is not None and self._table2[pos2][0] == key_str:
|
180
|
+
return self._table2[pos2][1]
|
181
|
+
|
182
|
+
return default
|
183
|
+
|
184
|
+
def has(self, key: Any) -> bool:
|
185
|
+
"""Check if key exists (guaranteed O(1))."""
|
186
|
+
key_str = str(key)
|
187
|
+
|
188
|
+
# Check table 1
|
189
|
+
pos1 = self._hash1(key_str)
|
190
|
+
if self._table1[pos1] is not None and self._table1[pos1][0] == key_str:
|
191
|
+
return True
|
192
|
+
|
193
|
+
# Check table 2
|
194
|
+
pos2 = self._hash2(key_str)
|
195
|
+
if self._table2[pos2] is not None and self._table2[pos2][0] == key_str:
|
196
|
+
return True
|
197
|
+
|
198
|
+
return False
|
199
|
+
|
200
|
+
def remove(self, key: Any) -> bool:
|
201
|
+
"""Remove a key-value pair."""
|
202
|
+
key_str = str(key)
|
203
|
+
|
204
|
+
# Check table 1
|
205
|
+
pos1 = self._hash1(key_str)
|
206
|
+
if self._table1[pos1] is not None and self._table1[pos1][0] == key_str:
|
207
|
+
self._table1[pos1] = None
|
208
|
+
self._size -= 1
|
209
|
+
return True
|
210
|
+
|
211
|
+
# Check table 2
|
212
|
+
pos2 = self._hash2(key_str)
|
213
|
+
if self._table2[pos2] is not None and self._table2[pos2][0] == key_str:
|
214
|
+
self._table2[pos2] = None
|
215
|
+
self._size -= 1
|
216
|
+
return True
|
217
|
+
|
218
|
+
return False
|
219
|
+
|
220
|
+
def delete(self, key: Any) -> bool:
|
221
|
+
"""Remove a key-value pair (alias for remove)."""
|
222
|
+
return self.remove(key)
|
223
|
+
|
224
|
+
def clear(self) -> None:
|
225
|
+
"""Clear all data."""
|
226
|
+
self._table1 = [None] * self.capacity
|
227
|
+
self._table2 = [None] * self.capacity
|
228
|
+
self._size = 0
|
229
|
+
|
230
|
+
def keys(self) -> Iterator[str]:
|
231
|
+
"""Get all keys."""
|
232
|
+
for table in [self._table1, self._table2]:
|
233
|
+
for entry in table:
|
234
|
+
if entry is not None:
|
235
|
+
yield entry[0]
|
236
|
+
|
237
|
+
def values(self) -> Iterator[Any]:
|
238
|
+
"""Get all values."""
|
239
|
+
for table in [self._table1, self._table2]:
|
240
|
+
for entry in table:
|
241
|
+
if entry is not None:
|
242
|
+
yield entry[1]
|
243
|
+
|
244
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
245
|
+
"""Get all key-value pairs."""
|
246
|
+
for table in [self._table1, self._table2]:
|
247
|
+
for entry in table:
|
248
|
+
if entry is not None:
|
249
|
+
yield entry
|
250
|
+
|
251
|
+
def __len__(self) -> int:
|
252
|
+
"""Get the number of key-value pairs."""
|
253
|
+
return self._size
|
254
|
+
|
255
|
+
def to_native(self) -> Dict[str, Any]:
|
256
|
+
"""Convert to native Python dict."""
|
257
|
+
return dict(self.items())
|
258
|
+
|
259
|
+
@property
|
260
|
+
def is_list(self) -> bool:
|
261
|
+
"""This is not a list strategy."""
|
262
|
+
return False
|
263
|
+
|
264
|
+
@property
|
265
|
+
def is_dict(self) -> bool:
|
266
|
+
"""This is a dict-like strategy."""
|
267
|
+
return True
|
268
|
+
|
269
|
+
# ============================================================================
|
270
|
+
# CUCKOO HASH SPECIFIC OPERATIONS
|
271
|
+
# ============================================================================
|
272
|
+
|
273
|
+
def get_table_utilization(self) -> Tuple[float, float]:
|
274
|
+
"""Get utilization of each table."""
|
275
|
+
table1_used = sum(1 for entry in self._table1 if entry is not None)
|
276
|
+
table2_used = sum(1 for entry in self._table2 if entry is not None)
|
277
|
+
|
278
|
+
util1 = table1_used / self.capacity if self.capacity > 0 else 0
|
279
|
+
util2 = table2_used / self.capacity if self.capacity > 0 else 0
|
280
|
+
|
281
|
+
return util1, util2
|
282
|
+
|
283
|
+
def get_max_probe_distance(self) -> int:
|
284
|
+
"""Get maximum probe distance (always 1 for cuckoo hashing)."""
|
285
|
+
return 1 # Cuckoo hashing guarantees O(1) lookup
|
286
|
+
|
287
|
+
def get_eviction_stats(self) -> Dict[str, int]:
|
288
|
+
"""Get statistics about evictions (would need tracking in real implementation)."""
|
289
|
+
return {
|
290
|
+
'total_evictions': 0, # Would track in real implementation
|
291
|
+
'max_eviction_chain': self.max_evictions,
|
292
|
+
'resize_count': 0 # Would track in real implementation
|
293
|
+
}
|
294
|
+
|
295
|
+
def rehash(self) -> None:
|
296
|
+
"""Force a rehash with new hash functions."""
|
297
|
+
# Save current data
|
298
|
+
current_items = list(self.items())
|
299
|
+
|
300
|
+
# Clear tables and regenerate hash functions
|
301
|
+
self._table1 = [None] * self.capacity
|
302
|
+
self._table2 = [None] * self.capacity
|
303
|
+
self._hash1_a = random.randint(1, 1000000)
|
304
|
+
self._hash1_b = random.randint(0, 1000000)
|
305
|
+
self._hash2_a = random.randint(1, 1000000)
|
306
|
+
self._hash2_b = random.randint(0, 1000000)
|
307
|
+
self._size = 0
|
308
|
+
|
309
|
+
# Reinsert all items
|
310
|
+
for key, value in current_items:
|
311
|
+
self.put(key, value)
|
312
|
+
|
313
|
+
def analyze_distribution(self) -> Dict[str, Any]:
|
314
|
+
"""Analyze the distribution of elements across tables."""
|
315
|
+
table1_count = sum(1 for entry in self._table1 if entry is not None)
|
316
|
+
table2_count = sum(1 for entry in self._table2 if entry is not None)
|
317
|
+
|
318
|
+
return {
|
319
|
+
'table1_count': table1_count,
|
320
|
+
'table2_count': table2_count,
|
321
|
+
'table1_percentage': (table1_count / self._size * 100) if self._size > 0 else 0,
|
322
|
+
'table2_percentage': (table2_count / self._size * 100) if self._size > 0 else 0,
|
323
|
+
'balance_ratio': min(table1_count, table2_count) / max(table1_count, table2_count, 1)
|
324
|
+
}
|
325
|
+
|
326
|
+
def compact(self) -> None:
|
327
|
+
"""Compact the hash tables if load factor is too low."""
|
328
|
+
current_load = self._size / (2 * self.capacity) if self.capacity > 0 else 0
|
329
|
+
|
330
|
+
if current_load < self.load_factor / 4 and self.capacity > self.initial_capacity:
|
331
|
+
# Save current data
|
332
|
+
current_items = list(self.items())
|
333
|
+
|
334
|
+
# Reduce capacity
|
335
|
+
self.capacity = max(self.initial_capacity, self.capacity // 2)
|
336
|
+
self._table1 = [None] * self.capacity
|
337
|
+
self._table2 = [None] * self.capacity
|
338
|
+
self._resize_threshold = int(self.capacity * self.load_factor)
|
339
|
+
|
340
|
+
# Regenerate hash functions
|
341
|
+
self._hash1_a = random.randint(1, 1000000)
|
342
|
+
self._hash1_b = random.randint(0, 1000000)
|
343
|
+
self._hash2_a = random.randint(1, 1000000)
|
344
|
+
self._hash2_b = random.randint(0, 1000000)
|
345
|
+
self._size = 0
|
346
|
+
|
347
|
+
# Reinsert all items
|
348
|
+
for key, value in current_items:
|
349
|
+
self.put(key, value)
|
350
|
+
|
351
|
+
# ============================================================================
|
352
|
+
# PERFORMANCE CHARACTERISTICS
|
353
|
+
# ============================================================================
|
354
|
+
|
355
|
+
@property
|
356
|
+
def backend_info(self) -> Dict[str, Any]:
|
357
|
+
"""Get backend implementation info."""
|
358
|
+
util1, util2 = self.get_table_utilization()
|
359
|
+
|
360
|
+
return {
|
361
|
+
'strategy': 'CUCKOO_HASH',
|
362
|
+
'backend': 'Dual hash tables with eviction',
|
363
|
+
'capacity': self.capacity,
|
364
|
+
'load_factor': self.load_factor,
|
365
|
+
'max_evictions': self.max_evictions,
|
366
|
+
'table1_utilization': f"{util1 * 100:.1f}%",
|
367
|
+
'table2_utilization': f"{util2 * 100:.1f}%",
|
368
|
+
'complexity': {
|
369
|
+
'lookup': 'O(1) worst-case',
|
370
|
+
'insert': 'O(1) amortized',
|
371
|
+
'delete': 'O(1) worst-case',
|
372
|
+
'space': 'O(n)',
|
373
|
+
'probe_distance': '1 (guaranteed)'
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
@property
|
378
|
+
def metrics(self) -> Dict[str, Any]:
|
379
|
+
"""Get performance metrics."""
|
380
|
+
overall_load = self._size / (2 * self.capacity) if self.capacity > 0 else 0
|
381
|
+
util1, util2 = self.get_table_utilization()
|
382
|
+
distribution = self.analyze_distribution()
|
383
|
+
|
384
|
+
return {
|
385
|
+
'size': self._size,
|
386
|
+
'capacity': self.capacity * 2, # Total capacity across both tables
|
387
|
+
'overall_load_factor': f"{overall_load * 100:.1f}%",
|
388
|
+
'table_balance': f"{distribution['balance_ratio']:.2f}",
|
389
|
+
'memory_usage': f"{self.capacity * 2 * 16} bytes (estimated)",
|
390
|
+
'guaranteed_lookup_time': 'O(1)',
|
391
|
+
'resize_threshold': self._resize_threshold
|
392
|
+
}
|
@@ -0,0 +1,301 @@
|
|
1
|
+
"""
|
2
|
+
Fenwick Tree (Binary Indexed Tree) Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the FENWICK_TREE strategy for efficient prefix sum
|
5
|
+
queries and point updates with O(log n) complexity.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Union
|
9
|
+
from .base import ANodeTreeStrategy
|
10
|
+
from ...types import NodeMode, NodeTrait
|
11
|
+
|
12
|
+
|
13
|
+
class FenwickTreeStrategy(ANodeTreeStrategy):
|
14
|
+
"""
|
15
|
+
Fenwick Tree node strategy for efficient prefix sum operations.
|
16
|
+
|
17
|
+
Also known as Binary Indexed Tree (BIT), provides O(log n) prefix sum
|
18
|
+
queries and point updates with minimal memory overhead.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
22
|
+
"""Initialize the Fenwick Tree strategy."""
|
23
|
+
super().__init__(NodeMode.FENWICK_TREE, traits, **options)
|
24
|
+
|
25
|
+
self.initial_size = options.get('initial_size', 1000)
|
26
|
+
|
27
|
+
# Fenwick tree (1-indexed for easier bit operations)
|
28
|
+
self._tree: List[float] = [0.0] * (self.initial_size + 1)
|
29
|
+
self._values: Dict[str, Any] = {} # Key-value storage for compatibility
|
30
|
+
self._indices: Dict[str, int] = {} # Map keys to tree indices
|
31
|
+
self._reverse_indices: Dict[int, str] = {} # Map indices to keys
|
32
|
+
self._next_index = 1 # 1-indexed
|
33
|
+
self._size = 0
|
34
|
+
|
35
|
+
def get_supported_traits(self) -> NodeTrait:
|
36
|
+
"""Get the traits supported by the Fenwick tree strategy."""
|
37
|
+
return (NodeTrait.INDEXED | NodeTrait.ORDERED | NodeTrait.STREAMING)
|
38
|
+
|
39
|
+
# ============================================================================
|
40
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
41
|
+
# ============================================================================
|
42
|
+
|
43
|
+
def put(self, key: Any, value: Any = None) -> None:
|
44
|
+
"""Store a value at the given key."""
|
45
|
+
key_str = str(key)
|
46
|
+
|
47
|
+
# Convert value to numeric for tree operations
|
48
|
+
try:
|
49
|
+
numeric_value = float(value) if value is not None else 0.0
|
50
|
+
except (ValueError, TypeError):
|
51
|
+
numeric_value = 0.0
|
52
|
+
|
53
|
+
if key_str in self._indices:
|
54
|
+
# Update existing
|
55
|
+
idx = self._indices[key_str]
|
56
|
+
old_value = self._get_point_value(idx)
|
57
|
+
delta = numeric_value - old_value
|
58
|
+
self._update_point(idx, delta)
|
59
|
+
else:
|
60
|
+
# Add new
|
61
|
+
if self._next_index >= len(self._tree):
|
62
|
+
self._resize_tree()
|
63
|
+
|
64
|
+
idx = self._next_index
|
65
|
+
self._indices[key_str] = idx
|
66
|
+
self._reverse_indices[idx] = key_str
|
67
|
+
self._next_index += 1
|
68
|
+
self._size += 1
|
69
|
+
|
70
|
+
self._update_point(idx, numeric_value)
|
71
|
+
|
72
|
+
self._values[key_str] = value
|
73
|
+
|
74
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
75
|
+
"""Retrieve a value by key."""
|
76
|
+
key_str = str(key)
|
77
|
+
return self._values.get(key_str, default)
|
78
|
+
|
79
|
+
def has(self, key: Any) -> bool:
|
80
|
+
"""Check if key exists."""
|
81
|
+
return str(key) in self._values
|
82
|
+
|
83
|
+
def remove(self, key: Any) -> bool:
|
84
|
+
"""Remove value by key."""
|
85
|
+
key_str = str(key)
|
86
|
+
if key_str not in self._indices:
|
87
|
+
return False
|
88
|
+
|
89
|
+
idx = self._indices[key_str]
|
90
|
+
old_value = self._get_point_value(idx)
|
91
|
+
self._update_point(idx, -old_value) # Set to 0
|
92
|
+
|
93
|
+
del self._indices[key_str]
|
94
|
+
del self._reverse_indices[idx]
|
95
|
+
del self._values[key_str]
|
96
|
+
self._size -= 1
|
97
|
+
|
98
|
+
return True
|
99
|
+
|
100
|
+
def delete(self, key: Any) -> bool:
|
101
|
+
"""Remove value by key (alias for remove)."""
|
102
|
+
return self.remove(key)
|
103
|
+
|
104
|
+
def clear(self) -> None:
|
105
|
+
"""Clear all data."""
|
106
|
+
self._tree = [0.0] * (self.initial_size + 1)
|
107
|
+
self._values.clear()
|
108
|
+
self._indices.clear()
|
109
|
+
self._reverse_indices.clear()
|
110
|
+
self._next_index = 1
|
111
|
+
self._size = 0
|
112
|
+
|
113
|
+
def keys(self) -> Iterator[str]:
|
114
|
+
"""Get all keys in index order."""
|
115
|
+
# Sort by index to maintain order
|
116
|
+
sorted_items = sorted(self._indices.items(), key=lambda x: x[1])
|
117
|
+
return (key for key, _ in sorted_items)
|
118
|
+
|
119
|
+
def values(self) -> Iterator[Any]:
|
120
|
+
"""Get all values in index order."""
|
121
|
+
for key in self.keys():
|
122
|
+
yield self._values[key]
|
123
|
+
|
124
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
125
|
+
"""Get all key-value pairs in index order."""
|
126
|
+
for key in self.keys():
|
127
|
+
yield (key, self._values[key])
|
128
|
+
|
129
|
+
def __len__(self) -> int:
|
130
|
+
"""Get the number of items."""
|
131
|
+
return self._size
|
132
|
+
|
133
|
+
def to_native(self) -> List[Any]:
|
134
|
+
"""Convert to native Python list (preserving order)."""
|
135
|
+
return [self._values[key] for key in self.keys()]
|
136
|
+
|
137
|
+
@property
|
138
|
+
def is_list(self) -> bool:
|
139
|
+
"""This behaves like a list (indexed)."""
|
140
|
+
return True
|
141
|
+
|
142
|
+
@property
|
143
|
+
def is_dict(self) -> bool:
|
144
|
+
"""This can behave like a dict."""
|
145
|
+
return True
|
146
|
+
|
147
|
+
# ============================================================================
|
148
|
+
# FENWICK TREE SPECIFIC OPERATIONS
|
149
|
+
# ============================================================================
|
150
|
+
|
151
|
+
def _resize_tree(self) -> None:
|
152
|
+
"""Resize the internal tree when needed."""
|
153
|
+
old_size = len(self._tree)
|
154
|
+
new_size = old_size * 2
|
155
|
+
self._tree.extend([0.0] * (new_size - old_size))
|
156
|
+
|
157
|
+
def _update_point(self, idx: int, delta: float) -> None:
|
158
|
+
"""Add delta to position idx (1-indexed)."""
|
159
|
+
while idx < len(self._tree):
|
160
|
+
self._tree[idx] += delta
|
161
|
+
idx += idx & (-idx) # Add lowest set bit
|
162
|
+
|
163
|
+
def _get_point_value(self, idx: int) -> float:
|
164
|
+
"""Get value at position idx by computing range sum."""
|
165
|
+
if idx == 1:
|
166
|
+
return self._prefix_sum(1)
|
167
|
+
else:
|
168
|
+
return self._prefix_sum(idx) - self._prefix_sum(idx - 1)
|
169
|
+
|
170
|
+
def _prefix_sum(self, idx: int) -> float:
|
171
|
+
"""Get prefix sum from 1 to idx (inclusive)."""
|
172
|
+
if idx <= 0:
|
173
|
+
return 0.0
|
174
|
+
|
175
|
+
result = 0.0
|
176
|
+
while idx > 0:
|
177
|
+
result += self._tree[idx]
|
178
|
+
idx -= idx & (-idx) # Remove lowest set bit
|
179
|
+
return result
|
180
|
+
|
181
|
+
def prefix_sum(self, index: int) -> float:
|
182
|
+
"""Public method: get prefix sum from start to index."""
|
183
|
+
if index < 0 or index >= self._size:
|
184
|
+
return 0.0
|
185
|
+
|
186
|
+
# Convert 0-indexed to 1-indexed
|
187
|
+
return self._prefix_sum(index + 1)
|
188
|
+
|
189
|
+
def range_sum(self, left: int, right: int) -> float:
|
190
|
+
"""Get sum of elements in range [left, right] (0-indexed)."""
|
191
|
+
if left > right or right < 0 or left >= self._size:
|
192
|
+
return 0.0
|
193
|
+
|
194
|
+
left = max(0, left)
|
195
|
+
right = min(self._size - 1, right)
|
196
|
+
|
197
|
+
if left == 0:
|
198
|
+
return self._prefix_sum(right + 1)
|
199
|
+
else:
|
200
|
+
return self._prefix_sum(right + 1) - self._prefix_sum(left)
|
201
|
+
|
202
|
+
def point_update(self, index: int, new_value: float) -> None:
|
203
|
+
"""Update value at index (0-indexed)."""
|
204
|
+
if index < 0 or index >= self._size:
|
205
|
+
return
|
206
|
+
|
207
|
+
tree_idx = index + 1 # Convert to 1-indexed
|
208
|
+
old_value = self._get_point_value(tree_idx)
|
209
|
+
delta = new_value - old_value
|
210
|
+
self._update_point(tree_idx, delta)
|
211
|
+
|
212
|
+
def point_add(self, index: int, delta: float) -> None:
|
213
|
+
"""Add delta to value at index (0-indexed)."""
|
214
|
+
if index < 0 or index >= self._size:
|
215
|
+
return
|
216
|
+
|
217
|
+
tree_idx = index + 1 # Convert to 1-indexed
|
218
|
+
self._update_point(tree_idx, delta)
|
219
|
+
|
220
|
+
def total_sum(self) -> float:
|
221
|
+
"""Get sum of all elements."""
|
222
|
+
return self._prefix_sum(self._size)
|
223
|
+
|
224
|
+
def find_prefix_sum_index(self, target_sum: float) -> int:
|
225
|
+
"""Find smallest index where prefix sum >= target_sum."""
|
226
|
+
# Binary search using Fenwick tree properties
|
227
|
+
idx = 0
|
228
|
+
current_sum = 0.0
|
229
|
+
|
230
|
+
# Start from the highest power of 2 <= tree size
|
231
|
+
bit_mask = 1
|
232
|
+
while bit_mask <= len(self._tree):
|
233
|
+
bit_mask <<= 1
|
234
|
+
bit_mask >>= 1
|
235
|
+
|
236
|
+
while bit_mask > 0:
|
237
|
+
next_idx = idx + bit_mask
|
238
|
+
if next_idx < len(self._tree) and current_sum + self._tree[next_idx] < target_sum:
|
239
|
+
idx = next_idx
|
240
|
+
current_sum += self._tree[idx]
|
241
|
+
bit_mask >>= 1
|
242
|
+
|
243
|
+
return idx # Returns 1-indexed, caller should adjust if needed
|
244
|
+
|
245
|
+
def get_range_statistics(self, left: int, right: int) -> Dict[str, float]:
|
246
|
+
"""Get statistics for a range."""
|
247
|
+
if left > right or right < 0 or left >= self._size:
|
248
|
+
return {'sum': 0.0, 'count': 0, 'average': 0.0}
|
249
|
+
|
250
|
+
left = max(0, left)
|
251
|
+
right = min(self._size - 1, right)
|
252
|
+
|
253
|
+
range_sum = self.range_sum(left, right)
|
254
|
+
count = right - left + 1
|
255
|
+
average = range_sum / count if count > 0 else 0.0
|
256
|
+
|
257
|
+
return {
|
258
|
+
'sum': range_sum,
|
259
|
+
'count': count,
|
260
|
+
'average': average
|
261
|
+
}
|
262
|
+
|
263
|
+
def bulk_update(self, updates: List[tuple[int, float]]) -> None:
|
264
|
+
"""Perform multiple point updates efficiently."""
|
265
|
+
for index, value in updates:
|
266
|
+
if 0 <= index < self._size:
|
267
|
+
self.point_update(index, value)
|
268
|
+
|
269
|
+
# ============================================================================
|
270
|
+
# PERFORMANCE CHARACTERISTICS
|
271
|
+
# ============================================================================
|
272
|
+
|
273
|
+
@property
|
274
|
+
def backend_info(self) -> Dict[str, Any]:
|
275
|
+
"""Get backend implementation info."""
|
276
|
+
return {
|
277
|
+
'strategy': 'FENWICK_TREE',
|
278
|
+
'backend': 'Binary Indexed Tree',
|
279
|
+
'indexing': '1-based internal, 0-based external',
|
280
|
+
'complexity': {
|
281
|
+
'prefix_sum': 'O(log n)',
|
282
|
+
'range_sum': 'O(log n)',
|
283
|
+
'point_update': 'O(log n)',
|
284
|
+
'point_add': 'O(log n)',
|
285
|
+
'space': 'O(n)'
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
@property
|
290
|
+
def metrics(self) -> Dict[str, Any]:
|
291
|
+
"""Get performance metrics."""
|
292
|
+
tree_utilization = self._size / max(1, len(self._tree) - 1) * 100
|
293
|
+
|
294
|
+
return {
|
295
|
+
'size': self._size,
|
296
|
+
'tree_capacity': len(self._tree) - 1,
|
297
|
+
'tree_utilization': f"{tree_utilization:.1f}%",
|
298
|
+
'total_sum': self.total_sum(),
|
299
|
+
'memory_usage': f"{len(self._tree) * 8 + self._size * 24} bytes (estimated)",
|
300
|
+
'next_index': self._next_index
|
301
|
+
}
|