exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,512 @@
|
|
1
|
+
"""
|
2
|
+
PATRICIA Trie Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the PATRICIA strategy (Practical Algorithm to
|
5
|
+
Retrieve Information Coded in Alphanumeric) for binary trie compression.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Optional, Tuple
|
9
|
+
from .base import ANodeTreeStrategy
|
10
|
+
from ...types import NodeMode, NodeTrait
|
11
|
+
|
12
|
+
|
13
|
+
class PatriciaNode:
|
14
|
+
"""Node in the PATRICIA trie (compressed binary trie)."""
|
15
|
+
|
16
|
+
def __init__(self, bit_position: int = -1, key: str = "", value: Any = None):
|
17
|
+
self.bit_position = bit_position # Which bit to test (-1 for leaves)
|
18
|
+
self.key = key # Full key (for leaves)
|
19
|
+
self.value = value # Value (for leaves)
|
20
|
+
self.left: Optional['PatriciaNode'] = None # 0 bit
|
21
|
+
self.right: Optional['PatriciaNode'] = None # 1 bit
|
22
|
+
self.is_leaf = bit_position == -1
|
23
|
+
|
24
|
+
def is_internal(self) -> bool:
|
25
|
+
"""Check if this is an internal node."""
|
26
|
+
return not self.is_leaf
|
27
|
+
|
28
|
+
|
29
|
+
class PatriciaStrategy(ANodeTreeStrategy):
|
30
|
+
"""
|
31
|
+
PATRICIA node strategy for compressed binary trie operations.
|
32
|
+
|
33
|
+
Implements PATRICIA algorithm for efficient string storage and
|
34
|
+
retrieval using compressed binary trie structure.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
38
|
+
"""Initialize the PATRICIA strategy."""
|
39
|
+
super().__init__(NodeMode.PATRICIA, traits, **options)
|
40
|
+
|
41
|
+
self.case_sensitive = options.get('case_sensitive', True)
|
42
|
+
self.use_bit_strings = options.get('use_bit_strings', False) # Convert to binary
|
43
|
+
|
44
|
+
# Core PATRICIA trie
|
45
|
+
self._root: Optional[PatriciaNode] = None
|
46
|
+
self._size = 0
|
47
|
+
|
48
|
+
# Statistics
|
49
|
+
self._total_nodes = 0
|
50
|
+
self._max_depth = 0
|
51
|
+
self._total_bits_saved = 0
|
52
|
+
|
53
|
+
def get_supported_traits(self) -> NodeTrait:
|
54
|
+
"""Get the traits supported by the PATRICIA strategy."""
|
55
|
+
return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.COMPRESSED | NodeTrait.HIERARCHICAL)
|
56
|
+
|
57
|
+
def _normalize_key(self, key: str) -> str:
|
58
|
+
"""Normalize key based on case sensitivity."""
|
59
|
+
return key if self.case_sensitive else key.lower()
|
60
|
+
|
61
|
+
def _string_to_bits(self, s: str) -> str:
|
62
|
+
"""Convert string to binary representation."""
|
63
|
+
if self.use_bit_strings:
|
64
|
+
return s # Assume it's already binary
|
65
|
+
|
66
|
+
# Convert each character to 8-bit binary
|
67
|
+
bits = ""
|
68
|
+
for char in s:
|
69
|
+
bits += format(ord(char), '08b')
|
70
|
+
return bits
|
71
|
+
|
72
|
+
def _get_bit(self, bit_string: str, position: int) -> int:
|
73
|
+
"""Get bit at position (0 or 1), returns 0 if position >= length."""
|
74
|
+
if position >= len(bit_string):
|
75
|
+
return 0
|
76
|
+
return int(bit_string[position])
|
77
|
+
|
78
|
+
def _find_first_differing_bit(self, key1: str, key2: str) -> int:
|
79
|
+
"""Find first bit position where two keys differ."""
|
80
|
+
bits1 = self._string_to_bits(key1)
|
81
|
+
bits2 = self._string_to_bits(key2)
|
82
|
+
|
83
|
+
max_len = max(len(bits1), len(bits2))
|
84
|
+
|
85
|
+
for i in range(max_len):
|
86
|
+
bit1 = self._get_bit(bits1, i)
|
87
|
+
bit2 = self._get_bit(bits2, i)
|
88
|
+
if bit1 != bit2:
|
89
|
+
return i
|
90
|
+
|
91
|
+
return max_len # Keys are identical up to the shorter length
|
92
|
+
|
93
|
+
def _search_node(self, key: str) -> Optional[PatriciaNode]:
|
94
|
+
"""Search for node containing key."""
|
95
|
+
if not self._root:
|
96
|
+
return None
|
97
|
+
|
98
|
+
normalized_key = self._normalize_key(key)
|
99
|
+
bits = self._string_to_bits(normalized_key)
|
100
|
+
current = self._root
|
101
|
+
|
102
|
+
# Traverse down the trie
|
103
|
+
while current and current.is_internal():
|
104
|
+
bit = self._get_bit(bits, current.bit_position)
|
105
|
+
if bit == 0:
|
106
|
+
current = current.left
|
107
|
+
else:
|
108
|
+
current = current.right
|
109
|
+
|
110
|
+
# Check if we found the correct key
|
111
|
+
if current and current.key == normalized_key:
|
112
|
+
return current
|
113
|
+
|
114
|
+
return None
|
115
|
+
|
116
|
+
def _insert_node(self, key: str, value: Any) -> None:
|
117
|
+
"""Insert key-value pair into PATRICIA trie."""
|
118
|
+
normalized_key = self._normalize_key(key)
|
119
|
+
|
120
|
+
if not self._root:
|
121
|
+
# First insertion
|
122
|
+
self._root = PatriciaNode(-1, normalized_key, value)
|
123
|
+
self._size += 1
|
124
|
+
self._total_nodes += 1
|
125
|
+
return
|
126
|
+
|
127
|
+
# Find where the new key should go
|
128
|
+
bits = self._string_to_bits(normalized_key)
|
129
|
+
current = self._root
|
130
|
+
parent = None
|
131
|
+
came_from_left = False
|
132
|
+
|
133
|
+
# Traverse to find insertion point
|
134
|
+
while current and current.is_internal():
|
135
|
+
parent = current
|
136
|
+
bit = self._get_bit(bits, current.bit_position)
|
137
|
+
if bit == 0:
|
138
|
+
current = current.left
|
139
|
+
came_from_left = True
|
140
|
+
else:
|
141
|
+
current = current.right
|
142
|
+
came_from_left = False
|
143
|
+
|
144
|
+
# If key already exists, update value
|
145
|
+
if current and current.key == normalized_key:
|
146
|
+
current.value = value
|
147
|
+
return
|
148
|
+
|
149
|
+
# Find first differing bit
|
150
|
+
existing_key = current.key if current else ""
|
151
|
+
diff_bit = self._find_first_differing_bit(normalized_key, existing_key)
|
152
|
+
|
153
|
+
# Create new leaf
|
154
|
+
new_leaf = PatriciaNode(-1, normalized_key, value)
|
155
|
+
self._total_nodes += 1
|
156
|
+
self._size += 1
|
157
|
+
|
158
|
+
if not current:
|
159
|
+
# Edge case: empty position
|
160
|
+
if parent:
|
161
|
+
if came_from_left:
|
162
|
+
parent.left = new_leaf
|
163
|
+
else:
|
164
|
+
parent.right = new_leaf
|
165
|
+
return
|
166
|
+
|
167
|
+
# Create new internal node
|
168
|
+
new_internal = PatriciaNode(diff_bit)
|
169
|
+
self._total_nodes += 1
|
170
|
+
|
171
|
+
# Determine which child goes where
|
172
|
+
new_bit = self._get_bit(bits, diff_bit)
|
173
|
+
existing_bit = self._get_bit(self._string_to_bits(existing_key), diff_bit)
|
174
|
+
|
175
|
+
if new_bit == 0:
|
176
|
+
new_internal.left = new_leaf
|
177
|
+
new_internal.right = current
|
178
|
+
else:
|
179
|
+
new_internal.left = current
|
180
|
+
new_internal.right = new_leaf
|
181
|
+
|
182
|
+
# Insert new internal node into tree
|
183
|
+
if parent:
|
184
|
+
if came_from_left:
|
185
|
+
parent.left = new_internal
|
186
|
+
else:
|
187
|
+
parent.right = new_internal
|
188
|
+
else:
|
189
|
+
# New root
|
190
|
+
self._root = new_internal
|
191
|
+
|
192
|
+
# Update statistics
|
193
|
+
self._total_bits_saved += 1 # Compression achieved
|
194
|
+
|
195
|
+
def _collect_all_pairs(self, node: Optional[PatriciaNode]) -> List[Tuple[str, Any]]:
|
196
|
+
"""Collect all key-value pairs from subtree."""
|
197
|
+
if not node:
|
198
|
+
return []
|
199
|
+
|
200
|
+
if node.is_leaf:
|
201
|
+
return [(node.key, node.value)]
|
202
|
+
|
203
|
+
result = []
|
204
|
+
result.extend(self._collect_all_pairs(node.left))
|
205
|
+
result.extend(self._collect_all_pairs(node.right))
|
206
|
+
return result
|
207
|
+
|
208
|
+
def _collect_with_prefix(self, node: Optional[PatriciaNode], prefix: str) -> List[Tuple[str, Any]]:
|
209
|
+
"""Collect all keys with given prefix."""
|
210
|
+
if not node:
|
211
|
+
return []
|
212
|
+
|
213
|
+
if node.is_leaf:
|
214
|
+
if node.key.startswith(prefix):
|
215
|
+
return [(node.key, node.value)]
|
216
|
+
return []
|
217
|
+
|
218
|
+
# For internal nodes, we need to check both subtrees
|
219
|
+
result = []
|
220
|
+
result.extend(self._collect_with_prefix(node.left, prefix))
|
221
|
+
result.extend(self._collect_with_prefix(node.right, prefix))
|
222
|
+
return result
|
223
|
+
|
224
|
+
# ============================================================================
|
225
|
+
# CORE OPERATIONS
|
226
|
+
# ============================================================================
|
227
|
+
|
228
|
+
def put(self, key: Any, value: Any = None) -> None:
|
229
|
+
"""Add key-value pair to PATRICIA trie."""
|
230
|
+
key_str = str(key)
|
231
|
+
self._insert_node(key_str, value)
|
232
|
+
|
233
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
234
|
+
"""Get value by key."""
|
235
|
+
key_str = str(key)
|
236
|
+
|
237
|
+
if key_str == "trie_info":
|
238
|
+
return {
|
239
|
+
'size': self._size,
|
240
|
+
'total_nodes': self._total_nodes,
|
241
|
+
'max_depth': self._max_depth,
|
242
|
+
'case_sensitive': self.case_sensitive,
|
243
|
+
'use_bit_strings': self.use_bit_strings,
|
244
|
+
'compression_ratio': self._total_bits_saved / max(1, self._total_nodes)
|
245
|
+
}
|
246
|
+
elif key_str == "all_keys":
|
247
|
+
all_pairs = self._collect_all_pairs(self._root)
|
248
|
+
return [key for key, _ in all_pairs]
|
249
|
+
|
250
|
+
node = self._search_node(key_str)
|
251
|
+
return node.value if node else default
|
252
|
+
|
253
|
+
def has(self, key: Any) -> bool:
|
254
|
+
"""Check if key exists."""
|
255
|
+
key_str = str(key)
|
256
|
+
|
257
|
+
if key_str in ["trie_info", "all_keys"]:
|
258
|
+
return True
|
259
|
+
|
260
|
+
return self._search_node(key_str) is not None
|
261
|
+
|
262
|
+
def remove(self, key: Any) -> bool:
|
263
|
+
"""Remove key from trie (simplified implementation)."""
|
264
|
+
key_str = str(key)
|
265
|
+
node = self._search_node(key_str)
|
266
|
+
|
267
|
+
if node:
|
268
|
+
# For simplicity, just mark as removed
|
269
|
+
# Full implementation would require tree restructuring
|
270
|
+
node.key = ""
|
271
|
+
node.value = None
|
272
|
+
self._size -= 1
|
273
|
+
return True
|
274
|
+
|
275
|
+
return False
|
276
|
+
|
277
|
+
def delete(self, key: Any) -> bool:
|
278
|
+
"""Remove key from trie (alias for remove)."""
|
279
|
+
return self.remove(key)
|
280
|
+
|
281
|
+
def clear(self) -> None:
|
282
|
+
"""Clear all data."""
|
283
|
+
self._root = None
|
284
|
+
self._size = 0
|
285
|
+
self._total_nodes = 0
|
286
|
+
self._max_depth = 0
|
287
|
+
self._total_bits_saved = 0
|
288
|
+
|
289
|
+
def keys(self) -> Iterator[str]:
|
290
|
+
"""Get all keys in lexicographic order."""
|
291
|
+
all_pairs = self._collect_all_pairs(self._root)
|
292
|
+
valid_pairs = [(k, v) for k, v in all_pairs if k] # Filter out removed keys
|
293
|
+
for key, _ in sorted(valid_pairs):
|
294
|
+
yield key
|
295
|
+
|
296
|
+
def values(self) -> Iterator[Any]:
|
297
|
+
"""Get all values in key order."""
|
298
|
+
all_pairs = self._collect_all_pairs(self._root)
|
299
|
+
valid_pairs = [(k, v) for k, v in all_pairs if k] # Filter out removed keys
|
300
|
+
for _, value in sorted(valid_pairs):
|
301
|
+
yield value
|
302
|
+
|
303
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
304
|
+
"""Get all key-value pairs in sorted order."""
|
305
|
+
all_pairs = self._collect_all_pairs(self._root)
|
306
|
+
valid_pairs = [(k, v) for k, v in all_pairs if k] # Filter out removed keys
|
307
|
+
for key, value in sorted(valid_pairs):
|
308
|
+
yield (key, value)
|
309
|
+
|
310
|
+
def __len__(self) -> int:
|
311
|
+
"""Get number of keys."""
|
312
|
+
return self._size
|
313
|
+
|
314
|
+
def to_native(self) -> Dict[str, Any]:
|
315
|
+
"""Convert to native Python dict."""
|
316
|
+
all_pairs = self._collect_all_pairs(self._root)
|
317
|
+
valid_pairs = [(k, v) for k, v in all_pairs if k] # Filter out removed keys
|
318
|
+
return dict(valid_pairs)
|
319
|
+
|
320
|
+
@property
|
321
|
+
def is_list(self) -> bool:
|
322
|
+
"""This is not a list strategy."""
|
323
|
+
return False
|
324
|
+
|
325
|
+
@property
|
326
|
+
def is_dict(self) -> bool:
|
327
|
+
"""This behaves like a dict."""
|
328
|
+
return True
|
329
|
+
|
330
|
+
# ============================================================================
|
331
|
+
# PATRICIA SPECIFIC OPERATIONS
|
332
|
+
# ============================================================================
|
333
|
+
|
334
|
+
def find_with_prefix(self, prefix: str) -> List[Tuple[str, Any]]:
|
335
|
+
"""Find all keys starting with given prefix."""
|
336
|
+
normalized_prefix = self._normalize_key(prefix)
|
337
|
+
return self._collect_with_prefix(self._root, normalized_prefix)
|
338
|
+
|
339
|
+
def get_keys_with_prefix(self, prefix: str) -> List[str]:
|
340
|
+
"""Get keys starting with given prefix."""
|
341
|
+
prefix_pairs = self.find_with_prefix(prefix)
|
342
|
+
return [key for key, _ in prefix_pairs]
|
343
|
+
|
344
|
+
def longest_common_prefix(self) -> str:
|
345
|
+
"""Find longest common prefix of all keys."""
|
346
|
+
if self._size == 0:
|
347
|
+
return ""
|
348
|
+
|
349
|
+
all_keys = list(self.keys())
|
350
|
+
if len(all_keys) == 1:
|
351
|
+
return all_keys[0]
|
352
|
+
|
353
|
+
# Find LCP using binary representation
|
354
|
+
first_bits = self._string_to_bits(all_keys[0])
|
355
|
+
lcp_bits = ""
|
356
|
+
|
357
|
+
for i in range(len(first_bits)):
|
358
|
+
bit = self._get_bit(first_bits, i)
|
359
|
+
if all(self._get_bit(self._string_to_bits(key), i) == bit for key in all_keys):
|
360
|
+
lcp_bits += str(bit)
|
361
|
+
else:
|
362
|
+
break
|
363
|
+
|
364
|
+
# Convert back to string (simplified)
|
365
|
+
if self.use_bit_strings:
|
366
|
+
return lcp_bits
|
367
|
+
|
368
|
+
# For character-based strings, find character boundaries
|
369
|
+
char_boundary = len(lcp_bits) // 8 * 8
|
370
|
+
if char_boundary > 0:
|
371
|
+
char_bits = lcp_bits[:char_boundary]
|
372
|
+
chars = ""
|
373
|
+
for i in range(0, len(char_bits), 8):
|
374
|
+
byte = char_bits[i:i+8]
|
375
|
+
if len(byte) == 8:
|
376
|
+
chars += chr(int(byte, 2))
|
377
|
+
return chars
|
378
|
+
|
379
|
+
return ""
|
380
|
+
|
381
|
+
def get_tree_depth(self) -> int:
|
382
|
+
"""Calculate maximum depth of the trie."""
|
383
|
+
def _calculate_depth(node: Optional[PatriciaNode], depth: int = 0) -> int:
|
384
|
+
if not node:
|
385
|
+
return depth
|
386
|
+
|
387
|
+
if node.is_leaf:
|
388
|
+
return depth
|
389
|
+
|
390
|
+
left_depth = _calculate_depth(node.left, depth + 1)
|
391
|
+
right_depth = _calculate_depth(node.right, depth + 1)
|
392
|
+
return max(left_depth, right_depth)
|
393
|
+
|
394
|
+
return _calculate_depth(self._root)
|
395
|
+
|
396
|
+
def get_compression_statistics(self) -> Dict[str, Any]:
|
397
|
+
"""Get detailed compression statistics."""
|
398
|
+
def _analyze_tree(node: Optional[PatriciaNode]) -> Dict[str, int]:
|
399
|
+
if not node:
|
400
|
+
return {'internal_nodes': 0, 'leaf_nodes': 0, 'total_nodes': 0}
|
401
|
+
|
402
|
+
if node.is_leaf:
|
403
|
+
return {'internal_nodes': 0, 'leaf_nodes': 1, 'total_nodes': 1}
|
404
|
+
|
405
|
+
left_stats = _analyze_tree(node.left)
|
406
|
+
right_stats = _analyze_tree(node.right)
|
407
|
+
|
408
|
+
return {
|
409
|
+
'internal_nodes': 1 + left_stats['internal_nodes'] + right_stats['internal_nodes'],
|
410
|
+
'leaf_nodes': left_stats['leaf_nodes'] + right_stats['leaf_nodes'],
|
411
|
+
'total_nodes': 1 + left_stats['total_nodes'] + right_stats['total_nodes']
|
412
|
+
}
|
413
|
+
|
414
|
+
stats = _analyze_tree(self._root)
|
415
|
+
|
416
|
+
# Calculate theoretical savings
|
417
|
+
total_chars = sum(len(key) for key in self.keys())
|
418
|
+
theoretical_bits = total_chars * 8 # Without compression
|
419
|
+
|
420
|
+
return {
|
421
|
+
'internal_nodes': stats['internal_nodes'],
|
422
|
+
'leaf_nodes': stats['leaf_nodes'],
|
423
|
+
'total_nodes': stats['total_nodes'],
|
424
|
+
'theoretical_bits': theoretical_bits,
|
425
|
+
'compression_achieved': self._total_bits_saved,
|
426
|
+
'compression_ratio': self._total_bits_saved / max(1, theoretical_bits),
|
427
|
+
'space_efficiency': stats['leaf_nodes'] / max(1, stats['total_nodes'])
|
428
|
+
}
|
429
|
+
|
430
|
+
def get_statistics(self) -> Dict[str, Any]:
|
431
|
+
"""Get comprehensive PATRICIA statistics."""
|
432
|
+
compression_stats = self.get_compression_statistics()
|
433
|
+
|
434
|
+
return {
|
435
|
+
'size': self._size,
|
436
|
+
'total_nodes': self._total_nodes,
|
437
|
+
'max_depth': self.get_tree_depth(),
|
438
|
+
'case_sensitive': self.case_sensitive,
|
439
|
+
'use_bit_strings': self.use_bit_strings,
|
440
|
+
'compression_statistics': compression_stats,
|
441
|
+
'compression_ratio': f"{compression_stats['compression_ratio']:.2%}",
|
442
|
+
'space_efficiency': f"{compression_stats['space_efficiency']:.2%}"
|
443
|
+
}
|
444
|
+
|
445
|
+
def export_tree_structure(self) -> Dict[str, Any]:
|
446
|
+
"""Export tree structure for analysis."""
|
447
|
+
def _export_node(node: Optional[PatriciaNode], node_id: int = 0) -> Tuple[Dict[str, Any], int]:
|
448
|
+
if not node:
|
449
|
+
return {}, node_id
|
450
|
+
|
451
|
+
if node.is_leaf:
|
452
|
+
return {
|
453
|
+
'id': node_id,
|
454
|
+
'type': 'leaf',
|
455
|
+
'key': node.key,
|
456
|
+
'value': str(node.value)
|
457
|
+
}, node_id + 1
|
458
|
+
|
459
|
+
left_data, next_id = _export_node(node.left, node_id + 1)
|
460
|
+
right_data, final_id = _export_node(node.right, next_id)
|
461
|
+
|
462
|
+
return {
|
463
|
+
'id': node_id,
|
464
|
+
'type': 'internal',
|
465
|
+
'bit_position': node.bit_position,
|
466
|
+
'left': left_data,
|
467
|
+
'right': right_data
|
468
|
+
}, final_id
|
469
|
+
|
470
|
+
tree_data, _ = _export_node(self._root)
|
471
|
+
return {
|
472
|
+
'tree': tree_data,
|
473
|
+
'statistics': self.get_statistics()
|
474
|
+
}
|
475
|
+
|
476
|
+
# ============================================================================
|
477
|
+
# PERFORMANCE CHARACTERISTICS
|
478
|
+
# ============================================================================
|
479
|
+
|
480
|
+
@property
|
481
|
+
def backend_info(self) -> Dict[str, Any]:
|
482
|
+
"""Get backend implementation info."""
|
483
|
+
return {
|
484
|
+
'strategy': 'PATRICIA',
|
485
|
+
'backend': 'Compressed binary trie (PATRICIA algorithm)',
|
486
|
+
'case_sensitive': self.case_sensitive,
|
487
|
+
'use_bit_strings': self.use_bit_strings,
|
488
|
+
'complexity': {
|
489
|
+
'insert': 'O(k)', # k = key length in bits
|
490
|
+
'search': 'O(k)',
|
491
|
+
'delete': 'O(k)',
|
492
|
+
'prefix_search': 'O(k + m)', # m = number of matches
|
493
|
+
'space': 'O(n)', # n = number of internal nodes
|
494
|
+
'compression': 'Binary path compression'
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
@property
|
499
|
+
def metrics(self) -> Dict[str, Any]:
|
500
|
+
"""Get performance metrics."""
|
501
|
+
stats = self.get_statistics()
|
502
|
+
comp_stats = stats['compression_statistics']
|
503
|
+
|
504
|
+
return {
|
505
|
+
'size': stats['size'],
|
506
|
+
'total_nodes': stats['total_nodes'],
|
507
|
+
'max_depth': stats['max_depth'],
|
508
|
+
'compression_ratio': stats['compression_ratio'],
|
509
|
+
'space_efficiency': stats['space_efficiency'],
|
510
|
+
'internal_nodes': comp_stats['internal_nodes'],
|
511
|
+
'memory_usage': f"{stats['total_nodes'] * 60} bytes (estimated)"
|
512
|
+
}
|