exonware-xwnode 0.0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. exonware/__init__.py +14 -0
  2. exonware/xwnode/__init__.py +127 -0
  3. exonware/xwnode/base.py +676 -0
  4. exonware/xwnode/config.py +178 -0
  5. exonware/xwnode/contracts.py +730 -0
  6. exonware/xwnode/errors.py +503 -0
  7. exonware/xwnode/facade.py +460 -0
  8. exonware/xwnode/strategies/__init__.py +158 -0
  9. exonware/xwnode/strategies/advisor.py +463 -0
  10. exonware/xwnode/strategies/edges/__init__.py +32 -0
  11. exonware/xwnode/strategies/edges/adj_list.py +227 -0
  12. exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
  13. exonware/xwnode/strategies/edges/base.py +169 -0
  14. exonware/xwnode/strategies/flyweight.py +328 -0
  15. exonware/xwnode/strategies/impls/__init__.py +13 -0
  16. exonware/xwnode/strategies/impls/_base_edge.py +403 -0
  17. exonware/xwnode/strategies/impls/_base_node.py +307 -0
  18. exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
  19. exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
  20. exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
  21. exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
  22. exonware/xwnode/strategies/impls/edge_coo.py +533 -0
  23. exonware/xwnode/strategies/impls/edge_csc.py +447 -0
  24. exonware/xwnode/strategies/impls/edge_csr.py +492 -0
  25. exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
  26. exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
  27. exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
  28. exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
  29. exonware/xwnode/strategies/impls/edge_octree.py +574 -0
  30. exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
  31. exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
  32. exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
  33. exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
  34. exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
  35. exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
  36. exonware/xwnode/strategies/manager.py +775 -0
  37. exonware/xwnode/strategies/metrics.py +538 -0
  38. exonware/xwnode/strategies/migration.py +432 -0
  39. exonware/xwnode/strategies/nodes/__init__.py +50 -0
  40. exonware/xwnode/strategies/nodes/_base_node.py +307 -0
  41. exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
  42. exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
  43. exonware/xwnode/strategies/nodes/array_list.py +209 -0
  44. exonware/xwnode/strategies/nodes/base.py +247 -0
  45. exonware/xwnode/strategies/nodes/deque.py +200 -0
  46. exonware/xwnode/strategies/nodes/hash_map.py +135 -0
  47. exonware/xwnode/strategies/nodes/heap.py +307 -0
  48. exonware/xwnode/strategies/nodes/linked_list.py +232 -0
  49. exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
  50. exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
  51. exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
  52. exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
  53. exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
  54. exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
  55. exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
  56. exonware/xwnode/strategies/nodes/node_btree.py +357 -0
  57. exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
  58. exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
  59. exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
  60. exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
  61. exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
  62. exonware/xwnode/strategies/nodes/node_heap.py +191 -0
  63. exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
  64. exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
  65. exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
  66. exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
  67. exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
  68. exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
  69. exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
  70. exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
  71. exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
  72. exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
  73. exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
  74. exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
  75. exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
  76. exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
  77. exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
  78. exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
  79. exonware/xwnode/strategies/nodes/node_treap.py +387 -0
  80. exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
  81. exonware/xwnode/strategies/nodes/node_trie.py +252 -0
  82. exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
  83. exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
  84. exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
  85. exonware/xwnode/strategies/nodes/queue.py +161 -0
  86. exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
  87. exonware/xwnode/strategies/nodes/stack.py +152 -0
  88. exonware/xwnode/strategies/nodes/trie.py +274 -0
  89. exonware/xwnode/strategies/nodes/union_find.py +283 -0
  90. exonware/xwnode/strategies/pattern_detector.py +603 -0
  91. exonware/xwnode/strategies/performance_monitor.py +487 -0
  92. exonware/xwnode/strategies/queries/__init__.py +24 -0
  93. exonware/xwnode/strategies/queries/base.py +236 -0
  94. exonware/xwnode/strategies/queries/cql.py +201 -0
  95. exonware/xwnode/strategies/queries/cypher.py +181 -0
  96. exonware/xwnode/strategies/queries/datalog.py +70 -0
  97. exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
  98. exonware/xwnode/strategies/queries/eql.py +70 -0
  99. exonware/xwnode/strategies/queries/flux.py +70 -0
  100. exonware/xwnode/strategies/queries/gql.py +70 -0
  101. exonware/xwnode/strategies/queries/graphql.py +240 -0
  102. exonware/xwnode/strategies/queries/gremlin.py +181 -0
  103. exonware/xwnode/strategies/queries/hiveql.py +214 -0
  104. exonware/xwnode/strategies/queries/hql.py +70 -0
  105. exonware/xwnode/strategies/queries/jmespath.py +219 -0
  106. exonware/xwnode/strategies/queries/jq.py +66 -0
  107. exonware/xwnode/strategies/queries/json_query.py +66 -0
  108. exonware/xwnode/strategies/queries/jsoniq.py +248 -0
  109. exonware/xwnode/strategies/queries/kql.py +70 -0
  110. exonware/xwnode/strategies/queries/linq.py +238 -0
  111. exonware/xwnode/strategies/queries/logql.py +70 -0
  112. exonware/xwnode/strategies/queries/mql.py +68 -0
  113. exonware/xwnode/strategies/queries/n1ql.py +210 -0
  114. exonware/xwnode/strategies/queries/partiql.py +70 -0
  115. exonware/xwnode/strategies/queries/pig.py +215 -0
  116. exonware/xwnode/strategies/queries/promql.py +70 -0
  117. exonware/xwnode/strategies/queries/sparql.py +220 -0
  118. exonware/xwnode/strategies/queries/sql.py +275 -0
  119. exonware/xwnode/strategies/queries/xml_query.py +66 -0
  120. exonware/xwnode/strategies/queries/xpath.py +223 -0
  121. exonware/xwnode/strategies/queries/xquery.py +258 -0
  122. exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
  123. exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
  124. exonware/xwnode/strategies/registry.py +604 -0
  125. exonware/xwnode/strategies/simple.py +273 -0
  126. exonware/xwnode/strategies/utils.py +532 -0
  127. exonware/xwnode/types.py +912 -0
  128. exonware/xwnode/version.py +78 -0
  129. exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
  130. exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
  131. exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
  132. exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,520 @@
1
+ """
2
+ Aho-Corasick Node Strategy Implementation
3
+
4
+ This module implements the AHO_CORASICK strategy for efficient multi-pattern
5
+ string matching using the Aho-Corasick automaton algorithm.
6
+ """
7
+
8
+ from typing import Any, Iterator, List, Dict, Set, Optional, Tuple
9
+ from collections import deque, defaultdict
10
+ from .base import ANodeTreeStrategy
11
+ from ...types import NodeMode, NodeTrait
12
+
13
+
14
+ class ACNode:
15
+ """Node in the Aho-Corasick trie."""
16
+
17
+ def __init__(self):
18
+ self.children: Dict[str, 'ACNode'] = {}
19
+ self.failure: Optional['ACNode'] = None
20
+ self.output: Set[str] = set() # Patterns that end at this node
21
+ self.pattern_indices: Set[int] = set() # Indices of patterns
22
+ self.depth = 0
23
+
24
+ def is_leaf(self) -> bool:
25
+ """Check if this is a leaf node."""
26
+ return len(self.children) == 0
27
+
28
+
29
+ class AhoCorasickStrategy(ANodeTreeStrategy):
30
+ """
31
+ Aho-Corasick node strategy for multi-pattern string matching.
32
+
33
+ Efficiently searches for multiple patterns simultaneously in a text
34
+ using a finite automaton with failure links for linear-time matching.
35
+ """
36
+
37
+ def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
38
+ """Initialize the Aho-Corasick strategy."""
39
+ super().__init__(NodeMode.AHO_CORASICK, traits, **options)
40
+
41
+ self.case_sensitive = options.get('case_sensitive', True)
42
+ self.enable_overlapping = options.get('enable_overlapping', True)
43
+ self.max_pattern_length = options.get('max_pattern_length', 1000)
44
+
45
+ # Core automaton
46
+ self._root = ACNode()
47
+ self._patterns: List[str] = []
48
+ self._pattern_to_index: Dict[str, int] = {}
49
+ self._automaton_built = False
50
+
51
+ # Key-value mapping for compatibility
52
+ self._values: Dict[str, Any] = {}
53
+ self._size = 0
54
+
55
+ # Statistics
56
+ self._total_nodes = 1 # Root node
57
+ self._max_depth = 0
58
+ self._search_cache: Dict[str, List[Tuple[str, int]]] = {}
59
+
60
+ def get_supported_traits(self) -> NodeTrait:
61
+ """Get the traits supported by the Aho-Corasick strategy."""
62
+ return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.STREAMING)
63
+
64
+ def _preprocess_pattern(self, pattern: str) -> str:
65
+ """Preprocess pattern based on settings."""
66
+ if not self.case_sensitive:
67
+ pattern = pattern.lower()
68
+ return pattern
69
+
70
+ def _preprocess_text(self, text: str) -> str:
71
+ """Preprocess text based on settings."""
72
+ if not self.case_sensitive:
73
+ text = text.lower()
74
+ return text
75
+
76
+ def _add_pattern_to_trie(self, pattern: str, pattern_index: int) -> None:
77
+ """Add pattern to the trie structure."""
78
+ current = self._root
79
+ depth = 0
80
+
81
+ for char in pattern:
82
+ if char not in current.children:
83
+ current.children[char] = ACNode()
84
+ current.children[char].depth = depth + 1
85
+ self._total_nodes += 1
86
+
87
+ current = current.children[char]
88
+ depth += 1
89
+
90
+ # Mark end of pattern
91
+ current.output.add(pattern)
92
+ current.pattern_indices.add(pattern_index)
93
+ self._max_depth = max(self._max_depth, depth)
94
+
95
+ def _build_failure_links(self) -> None:
96
+ """Build failure links using BFS."""
97
+ queue = deque()
98
+
99
+ # Initialize failure links for root's children
100
+ for child in self._root.children.values():
101
+ child.failure = self._root
102
+ queue.append(child)
103
+
104
+ # Build failure links for all other nodes
105
+ while queue:
106
+ current = queue.popleft()
107
+
108
+ for char, child in current.children.items():
109
+ queue.append(child)
110
+
111
+ # Find the failure link
112
+ failure_node = current.failure
113
+
114
+ while failure_node is not None and char not in failure_node.children:
115
+ failure_node = failure_node.failure
116
+
117
+ if failure_node is not None:
118
+ child.failure = failure_node.children[char]
119
+ else:
120
+ child.failure = self._root
121
+
122
+ # Add output from failure node
123
+ if child.failure:
124
+ child.output.update(child.failure.output)
125
+ child.pattern_indices.update(child.failure.pattern_indices)
126
+
127
+ def _build_automaton(self) -> None:
128
+ """Build the complete Aho-Corasick automaton."""
129
+ if self._automaton_built:
130
+ return
131
+
132
+ # Build failure links
133
+ self._build_failure_links()
134
+ self._automaton_built = True
135
+ self._search_cache.clear()
136
+
137
+ def _rebuild_automaton(self) -> None:
138
+ """Rebuild the automaton from scratch."""
139
+ # Reset automaton
140
+ self._root = ACNode()
141
+ self._total_nodes = 1
142
+ self._max_depth = 0
143
+ self._automaton_built = False
144
+ self._search_cache.clear()
145
+
146
+ # Rebuild trie
147
+ for i, pattern in enumerate(self._patterns):
148
+ self._add_pattern_to_trie(pattern, i)
149
+
150
+ # Build failure links
151
+ self._build_automaton()
152
+
153
+ # ============================================================================
154
+ # CORE OPERATIONS (Key-based interface for compatibility)
155
+ # ============================================================================
156
+
157
+ def put(self, key: Any, value: Any = None) -> None:
158
+ """Add pattern to automaton."""
159
+ pattern = str(key)
160
+ processed_pattern = self._preprocess_pattern(pattern)
161
+
162
+ if len(processed_pattern) > self.max_pattern_length:
163
+ raise ValueError(f"Pattern length {len(processed_pattern)} exceeds maximum {self.max_pattern_length}")
164
+
165
+ if processed_pattern not in self._pattern_to_index:
166
+ # Add new pattern
167
+ pattern_index = len(self._patterns)
168
+ self._patterns.append(processed_pattern)
169
+ self._pattern_to_index[processed_pattern] = pattern_index
170
+
171
+ # Add to trie
172
+ self._add_pattern_to_trie(processed_pattern, pattern_index)
173
+ self._automaton_built = False
174
+ self._size += 1
175
+
176
+ # Store value
177
+ self._values[pattern] = value if value is not None else pattern
178
+
179
+ def get(self, key: Any, default: Any = None) -> Any:
180
+ """Get value by key."""
181
+ key_str = str(key)
182
+
183
+ if key_str == "patterns":
184
+ return self._patterns.copy()
185
+ elif key_str == "automaton_info":
186
+ return {
187
+ 'total_nodes': self._total_nodes,
188
+ 'max_depth': self._max_depth,
189
+ 'automaton_built': self._automaton_built,
190
+ 'pattern_count': len(self._patterns)
191
+ }
192
+ elif key_str in self._values:
193
+ return self._values[key_str]
194
+
195
+ return default
196
+
197
+ def has(self, key: Any) -> bool:
198
+ """Check if key exists."""
199
+ key_str = str(key)
200
+ pattern = self._preprocess_pattern(key_str)
201
+ return pattern in self._pattern_to_index or key_str in self._values
202
+
203
+ def remove(self, key: Any) -> bool:
204
+ """Remove pattern (requires automaton rebuild)."""
205
+ pattern = str(key)
206
+ processed_pattern = self._preprocess_pattern(pattern)
207
+
208
+ if processed_pattern in self._pattern_to_index:
209
+ # Remove pattern
210
+ index = self._pattern_to_index[processed_pattern]
211
+ del self._pattern_to_index[processed_pattern]
212
+ self._patterns.pop(index)
213
+
214
+ # Update indices
215
+ for i, p in enumerate(self._patterns):
216
+ self._pattern_to_index[p] = i
217
+
218
+ # Remove value
219
+ self._values.pop(pattern, None)
220
+ self._size -= 1
221
+
222
+ # Rebuild automaton
223
+ self._rebuild_automaton()
224
+ return True
225
+
226
+ return False
227
+
228
+ def delete(self, key: Any) -> bool:
229
+ """Remove pattern (alias for remove)."""
230
+ return self.remove(key)
231
+
232
+ def clear(self) -> None:
233
+ """Clear all patterns."""
234
+ self._root = ACNode()
235
+ self._patterns.clear()
236
+ self._pattern_to_index.clear()
237
+ self._values.clear()
238
+ self._search_cache.clear()
239
+
240
+ self._total_nodes = 1
241
+ self._max_depth = 0
242
+ self._automaton_built = False
243
+ self._size = 0
244
+
245
+ def keys(self) -> Iterator[str]:
246
+ """Get all pattern keys."""
247
+ for pattern in self._patterns:
248
+ yield pattern
249
+ yield "patterns"
250
+ yield "automaton_info"
251
+
252
+ def values(self) -> Iterator[Any]:
253
+ """Get all values."""
254
+ for value in self._values.values():
255
+ yield value
256
+ yield self._patterns.copy()
257
+ yield self.get("automaton_info")
258
+
259
+ def items(self) -> Iterator[tuple[str, Any]]:
260
+ """Get all key-value pairs."""
261
+ for key, value in self._values.items():
262
+ yield (key, value)
263
+ yield ("patterns", self._patterns.copy())
264
+ yield ("automaton_info", self.get("automaton_info"))
265
+
266
+ def __len__(self) -> int:
267
+ """Get number of patterns."""
268
+ return self._size
269
+
270
+ def to_native(self) -> Dict[str, Any]:
271
+ """Convert to native Python dict."""
272
+ result = dict(self._values)
273
+ result["patterns"] = self._patterns.copy()
274
+ result["automaton_info"] = self.get("automaton_info")
275
+ return result
276
+
277
+ @property
278
+ def is_list(self) -> bool:
279
+ """This can behave like a list for pattern access."""
280
+ return True
281
+
282
+ @property
283
+ def is_dict(self) -> bool:
284
+ """This behaves like a dict."""
285
+ return True
286
+
287
+ # ============================================================================
288
+ # AHO-CORASICK SPECIFIC OPERATIONS
289
+ # ============================================================================
290
+
291
+ def add_pattern(self, pattern: str, metadata: Any = None) -> None:
292
+ """Add pattern with optional metadata."""
293
+ self.put(pattern, metadata)
294
+
295
+ def search_text(self, text: str) -> List[Tuple[str, int, Any]]:
296
+ """Search for all pattern matches in text."""
297
+ if not text or not self._patterns:
298
+ return []
299
+
300
+ # Check cache
301
+ cache_key = text[:100] # Cache based on first 100 chars
302
+ if cache_key in self._search_cache and len(text) <= 100:
303
+ return self._search_cache[cache_key]
304
+
305
+ processed_text = self._preprocess_text(text)
306
+ self._build_automaton()
307
+
308
+ matches = []
309
+ current = self._root
310
+
311
+ for i, char in enumerate(processed_text):
312
+ # Follow failure links until we find a valid transition
313
+ while current is not None and char not in current.children:
314
+ current = current.failure
315
+
316
+ if current is None:
317
+ current = self._root
318
+ continue
319
+
320
+ current = current.children[char]
321
+
322
+ # Report all patterns that end at this position
323
+ for pattern in current.output:
324
+ start_pos = i - len(pattern) + 1
325
+ metadata = self._values.get(pattern, None)
326
+ matches.append((pattern, start_pos, metadata))
327
+
328
+ # Cache small results
329
+ if len(text) <= 100:
330
+ self._search_cache[cache_key] = matches
331
+
332
+ return matches
333
+
334
+ def find_all_matches(self, text: str) -> Dict[str, List[int]]:
335
+ """Find all positions where each pattern matches."""
336
+ matches = self.search_text(text)
337
+ result = defaultdict(list)
338
+
339
+ for pattern, position, _ in matches:
340
+ result[pattern].append(position)
341
+
342
+ # Convert to regular dict
343
+ return dict(result)
344
+
345
+ def count_matches(self, text: str) -> Dict[str, int]:
346
+ """Count occurrences of each pattern."""
347
+ matches = self.find_all_matches(text)
348
+ return {pattern: len(positions) for pattern, positions in matches.items()}
349
+
350
+ def has_any_match(self, text: str) -> bool:
351
+ """Check if text contains any of the patterns."""
352
+ if not text or not self._patterns:
353
+ return False
354
+
355
+ processed_text = self._preprocess_text(text)
356
+ self._build_automaton()
357
+
358
+ current = self._root
359
+
360
+ for char in processed_text:
361
+ while current is not None and char not in current.children:
362
+ current = current.failure
363
+
364
+ if current is None:
365
+ current = self._root
366
+ continue
367
+
368
+ current = current.children[char]
369
+
370
+ if current.output:
371
+ return True
372
+
373
+ return False
374
+
375
+ def find_longest_match(self, text: str) -> Optional[Tuple[str, int, int]]:
376
+ """Find the longest pattern match in text."""
377
+ matches = self.search_text(text)
378
+
379
+ if not matches:
380
+ return None
381
+
382
+ longest = max(matches, key=lambda x: len(x[0]))
383
+ pattern, start_pos, _ = longest
384
+ return pattern, start_pos, len(pattern)
385
+
386
+ def replace_patterns(self, text: str, replacement_func: callable = None) -> str:
387
+ """Replace all pattern matches in text."""
388
+ if not replacement_func:
389
+ replacement_func = lambda pattern, metadata: f"[{pattern}]"
390
+
391
+ matches = self.search_text(text)
392
+
393
+ if not matches:
394
+ return text
395
+
396
+ # Sort matches by position (descending) to avoid index shifts
397
+ matches.sort(key=lambda x: x[1], reverse=True)
398
+
399
+ result = text
400
+ for pattern, start_pos, metadata in matches:
401
+ end_pos = start_pos + len(pattern)
402
+ replacement = replacement_func(pattern, metadata)
403
+ result = result[:start_pos] + replacement + result[end_pos:]
404
+
405
+ return result
406
+
407
+ def get_pattern_statistics(self) -> Dict[str, Any]:
408
+ """Get statistics about patterns and automaton."""
409
+ if not self._patterns:
410
+ return {'pattern_count': 0, 'total_nodes': 1, 'avg_pattern_length': 0}
411
+
412
+ pattern_lengths = [len(p) for p in self._patterns]
413
+ unique_chars = set()
414
+ for pattern in self._patterns:
415
+ unique_chars.update(pattern)
416
+
417
+ return {
418
+ 'pattern_count': len(self._patterns),
419
+ 'total_nodes': self._total_nodes,
420
+ 'max_depth': self._max_depth,
421
+ 'avg_pattern_length': sum(pattern_lengths) / len(pattern_lengths),
422
+ 'min_pattern_length': min(pattern_lengths),
423
+ 'max_pattern_length': max(pattern_lengths),
424
+ 'unique_characters': len(unique_chars),
425
+ 'alphabet_size': len(unique_chars),
426
+ 'automaton_built': self._automaton_built,
427
+ 'cache_size': len(self._search_cache)
428
+ }
429
+
430
+ def validate_automaton(self) -> bool:
431
+ """Validate the automaton structure."""
432
+ self._build_automaton()
433
+
434
+ def _validate_node(node: ACNode, visited: Set[ACNode]) -> bool:
435
+ if node in visited:
436
+ return True
437
+
438
+ visited.add(node)
439
+
440
+ # Check failure link
441
+ if node != self._root and node.failure is None:
442
+ return False
443
+
444
+ # Check children
445
+ for child in node.children.values():
446
+ if not _validate_node(child, visited):
447
+ return False
448
+
449
+ return True
450
+
451
+ return _validate_node(self._root, set())
452
+
453
+ def export_automaton(self) -> Dict[str, Any]:
454
+ """Export automaton structure for analysis."""
455
+ self._build_automaton()
456
+
457
+ def _export_node(node: ACNode, node_id: int) -> Dict[str, Any]:
458
+ return {
459
+ 'id': node_id,
460
+ 'depth': node.depth,
461
+ 'children': list(node.children.keys()),
462
+ 'output': list(node.output),
463
+ 'has_failure': node.failure is not None
464
+ }
465
+
466
+ nodes = []
467
+ node_queue = deque([(self._root, 0)])
468
+ node_id = 0
469
+
470
+ while node_queue:
471
+ node, current_id = node_queue.popleft()
472
+ nodes.append(_export_node(node, current_id))
473
+
474
+ for child in node.children.values():
475
+ node_id += 1
476
+ node_queue.append((child, node_id))
477
+
478
+ return {
479
+ 'nodes': nodes,
480
+ 'patterns': self._patterns.copy(),
481
+ 'statistics': self.get_pattern_statistics()
482
+ }
483
+
484
+ # ============================================================================
485
+ # PERFORMANCE CHARACTERISTICS
486
+ # ============================================================================
487
+
488
+ @property
489
+ def backend_info(self) -> Dict[str, Any]:
490
+ """Get backend implementation info."""
491
+ return {
492
+ 'strategy': 'AHO_CORASICK',
493
+ 'backend': 'Finite automaton with failure links',
494
+ 'case_sensitive': self.case_sensitive,
495
+ 'enable_overlapping': self.enable_overlapping,
496
+ 'max_pattern_length': self.max_pattern_length,
497
+ 'complexity': {
498
+ 'construction': 'O(Σ|patterns|)', # Σ = alphabet size
499
+ 'search': 'O(|text| + |matches|)',
500
+ 'space': 'O(Σ|patterns|)',
501
+ 'pattern_addition': 'O(|pattern|)',
502
+ 'pattern_removal': 'O(Σ|patterns|)' # Requires rebuild
503
+ }
504
+ }
505
+
506
+ @property
507
+ def metrics(self) -> Dict[str, Any]:
508
+ """Get performance metrics."""
509
+ stats = self.get_pattern_statistics()
510
+
511
+ return {
512
+ 'patterns': stats['pattern_count'],
513
+ 'nodes': stats['total_nodes'],
514
+ 'max_depth': stats['max_depth'],
515
+ 'avg_pattern_length': f"{stats['avg_pattern_length']:.1f}",
516
+ 'alphabet_size': stats['alphabet_size'],
517
+ 'automaton_built': stats['automaton_built'],
518
+ 'cache_entries': stats['cache_size'],
519
+ 'memory_usage': f"{stats['total_nodes'] * 100 + len(self._patterns) * 50} bytes (estimated)"
520
+ }
@@ -0,0 +1,175 @@
1
+ """
2
+ Array List Node Strategy Implementation
3
+
4
+ This module implements the ARRAY_LIST strategy for sequential data
5
+ with fast indexed access.
6
+ """
7
+
8
+ from typing import Any, Iterator, List, Union, Dict
9
+ from ._base_node import aNodeStrategy
10
+ from ...types import NodeMode, NodeTrait
11
+
12
+
13
+ class xArrayListStrategy(aNodeStrategy):
14
+ """
15
+ Array List node strategy for sequential data with O(1) indexed access.
16
+
17
+ Uses Python's built-in list for optimal performance with indexed operations.
18
+ """
19
+
20
+ def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
21
+ """Initialize the array list strategy."""
22
+ super().__init__(NodeMode.ARRAY_LIST, traits, **options)
23
+ self._data: List[Any] = []
24
+ self._size = 0
25
+
26
+ def get_supported_traits(self) -> NodeTrait:
27
+ """Get the traits supported by the array list strategy."""
28
+ return (NodeTrait.ORDERED | NodeTrait.INDEXED)
29
+
30
+ # ============================================================================
31
+ # CORE OPERATIONS (Key-based interface for compatibility)
32
+ # ============================================================================
33
+
34
+ def put(self, key: Any, value: Any = None) -> None:
35
+ """Store a value at index (key must be numeric)."""
36
+ try:
37
+ index = int(key)
38
+ except (ValueError, TypeError):
39
+ raise TypeError(f"Array list requires numeric indices, got {type(key).__name__}")
40
+
41
+ # Extend list if necessary
42
+ while len(self._data) <= index:
43
+ self._data.append(None)
44
+
45
+ if self._data[index] is None:
46
+ self._size += 1
47
+ self._data[index] = value
48
+
49
+ def get(self, key: Any, default: Any = None) -> Any:
50
+ """Retrieve a value by index."""
51
+ try:
52
+ index = int(key)
53
+ if 0 <= index < len(self._data):
54
+ value = self._data[index]
55
+ return value if value is not None else default
56
+ return default
57
+ except (ValueError, TypeError):
58
+ return default
59
+
60
+ def has(self, key: Any) -> bool:
61
+ """Check if index exists and has a value."""
62
+ try:
63
+ index = int(key)
64
+ return 0 <= index < len(self._data) and self._data[index] is not None
65
+ except (ValueError, TypeError):
66
+ return False
67
+
68
+ def remove(self, key: Any) -> bool:
69
+ """Remove value at index."""
70
+ try:
71
+ index = int(key)
72
+ if 0 <= index < len(self._data) and self._data[index] is not None:
73
+ self._data[index] = None
74
+ self._size -= 1
75
+ return True
76
+ return False
77
+ except (ValueError, TypeError):
78
+ return False
79
+
80
+ def delete(self, key: Any) -> bool:
81
+ """Remove value at index (alias for remove)."""
82
+ return self.remove(key)
83
+
84
+ def clear(self) -> None:
85
+ """Clear all data."""
86
+ self._data.clear()
87
+ self._size = 0
88
+
89
+ def keys(self) -> Iterator[str]:
90
+ """Get all valid indices as strings."""
91
+ return (str(i) for i, value in enumerate(self._data) if value is not None)
92
+
93
+ def values(self) -> Iterator[Any]:
94
+ """Get all values."""
95
+ return (value for value in self._data if value is not None)
96
+
97
+ def items(self) -> Iterator[tuple[str, Any]]:
98
+ """Get all index-value pairs."""
99
+ return ((str(i), value) for i, value in enumerate(self._data) if value is not None)
100
+
101
+ def __len__(self) -> int:
102
+ """Get the number of non-None items."""
103
+ return self._size
104
+
105
+ def to_native(self) -> List[Any]:
106
+ """Convert to native Python list."""
107
+ # Return only non-None values in order
108
+ return [value for value in self._data if value is not None]
109
+
110
+ @property
111
+ def is_list(self) -> bool:
112
+ """This is always a list strategy."""
113
+ return True
114
+
115
+ @property
116
+ def is_dict(self) -> bool:
117
+ """This is never a dict strategy."""
118
+ return False
119
+
120
+ # ============================================================================
121
+ # ARRAY-SPECIFIC OPERATIONS
122
+ # ============================================================================
123
+
124
+ def append(self, value: Any) -> None:
125
+ """Append a value to the end."""
126
+ self._data.append(value)
127
+ self._size += 1
128
+
129
+ def insert(self, index: int, value: Any) -> None:
130
+ """Insert a value at the specified index."""
131
+ self._data.insert(index, value)
132
+ self._size += 1
133
+
134
+ def pop(self, index: int = -1) -> Any:
135
+ """Remove and return value at index."""
136
+ if not self._data:
137
+ raise IndexError("pop from empty list")
138
+ value = self._data.pop(index)
139
+ if value is not None:
140
+ self._size -= 1
141
+ return value
142
+
143
+ def extend(self, values: List[Any]) -> None:
144
+ """Extend with multiple values."""
145
+ self._data.extend(values)
146
+ self._size += len(values)
147
+
148
+ # ============================================================================
149
+ # PERFORMANCE CHARACTERISTICS
150
+ # ============================================================================
151
+
152
+ @property
153
+ def backend_info(self) -> Dict[str, Any]:
154
+ """Get backend implementation info."""
155
+ return {
156
+ 'strategy': 'ARRAY_LIST',
157
+ 'backend': 'Python list',
158
+ 'complexity': {
159
+ 'get': 'O(1)',
160
+ 'put': 'O(1) amortized',
161
+ 'append': 'O(1) amortized',
162
+ 'insert': 'O(n)',
163
+ 'pop': 'O(1) end, O(n) middle'
164
+ }
165
+ }
166
+
167
+ @property
168
+ def metrics(self) -> Dict[str, Any]:
169
+ """Get performance metrics."""
170
+ return {
171
+ 'size': self._size,
172
+ 'capacity': len(self._data),
173
+ 'memory_usage': f"{len(self._data) * 8} bytes (estimated)",
174
+ 'utilization': f"{(self._size / max(1, len(self._data))) * 100:.1f}%"
175
+ }