exonware-xwnode 0.0.1.22__py3-none-any.whl → 0.0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. exonware/__init__.py +1 -1
  2. exonware/xwnode/__init__.py +18 -5
  3. exonware/xwnode/add_strategy_types.py +165 -0
  4. exonware/xwnode/common/__init__.py +1 -1
  5. exonware/xwnode/common/graph/__init__.py +30 -0
  6. exonware/xwnode/common/graph/caching.py +131 -0
  7. exonware/xwnode/common/graph/contracts.py +100 -0
  8. exonware/xwnode/common/graph/errors.py +44 -0
  9. exonware/xwnode/common/graph/indexing.py +260 -0
  10. exonware/xwnode/common/graph/manager.py +568 -0
  11. exonware/xwnode/common/management/__init__.py +3 -5
  12. exonware/xwnode/common/management/manager.py +2 -2
  13. exonware/xwnode/common/management/migration.py +3 -3
  14. exonware/xwnode/common/monitoring/__init__.py +3 -5
  15. exonware/xwnode/common/monitoring/metrics.py +6 -2
  16. exonware/xwnode/common/monitoring/pattern_detector.py +1 -1
  17. exonware/xwnode/common/monitoring/performance_monitor.py +5 -1
  18. exonware/xwnode/common/patterns/__init__.py +3 -5
  19. exonware/xwnode/common/patterns/flyweight.py +5 -1
  20. exonware/xwnode/common/patterns/registry.py +202 -183
  21. exonware/xwnode/common/utils/__init__.py +25 -11
  22. exonware/xwnode/common/utils/simple.py +1 -1
  23. exonware/xwnode/config.py +3 -8
  24. exonware/xwnode/contracts.py +4 -105
  25. exonware/xwnode/defs.py +413 -159
  26. exonware/xwnode/edges/strategies/__init__.py +86 -4
  27. exonware/xwnode/edges/strategies/_base_edge.py +2 -2
  28. exonware/xwnode/edges/strategies/adj_list.py +287 -121
  29. exonware/xwnode/edges/strategies/adj_matrix.py +316 -222
  30. exonware/xwnode/edges/strategies/base.py +1 -1
  31. exonware/xwnode/edges/strategies/{edge_bidir_wrapper.py → bidir_wrapper.py} +45 -4
  32. exonware/xwnode/edges/strategies/bitemporal.py +520 -0
  33. exonware/xwnode/edges/strategies/{edge_block_adj_matrix.py → block_adj_matrix.py} +77 -6
  34. exonware/xwnode/edges/strategies/bv_graph.py +664 -0
  35. exonware/xwnode/edges/strategies/compressed_graph.py +217 -0
  36. exonware/xwnode/edges/strategies/{edge_coo.py → coo.py} +46 -4
  37. exonware/xwnode/edges/strategies/{edge_csc.py → csc.py} +45 -4
  38. exonware/xwnode/edges/strategies/{edge_csr.py → csr.py} +94 -12
  39. exonware/xwnode/edges/strategies/{edge_dynamic_adj_list.py → dynamic_adj_list.py} +46 -4
  40. exonware/xwnode/edges/strategies/edge_list.py +168 -0
  41. exonware/xwnode/edges/strategies/edge_property_store.py +2 -2
  42. exonware/xwnode/edges/strategies/euler_tour.py +560 -0
  43. exonware/xwnode/edges/strategies/{edge_flow_network.py → flow_network.py} +2 -2
  44. exonware/xwnode/edges/strategies/graphblas.py +449 -0
  45. exonware/xwnode/edges/strategies/hnsw.py +637 -0
  46. exonware/xwnode/edges/strategies/hop2_labels.py +467 -0
  47. exonware/xwnode/edges/strategies/{edge_hyperedge_set.py → hyperedge_set.py} +2 -2
  48. exonware/xwnode/edges/strategies/incidence_matrix.py +250 -0
  49. exonware/xwnode/edges/strategies/k2_tree.py +613 -0
  50. exonware/xwnode/edges/strategies/link_cut.py +626 -0
  51. exonware/xwnode/edges/strategies/multiplex.py +532 -0
  52. exonware/xwnode/edges/strategies/{edge_neural_graph.py → neural_graph.py} +2 -2
  53. exonware/xwnode/edges/strategies/{edge_octree.py → octree.py} +69 -11
  54. exonware/xwnode/edges/strategies/{edge_quadtree.py → quadtree.py} +66 -10
  55. exonware/xwnode/edges/strategies/roaring_adj.py +438 -0
  56. exonware/xwnode/edges/strategies/{edge_rtree.py → rtree.py} +43 -5
  57. exonware/xwnode/edges/strategies/{edge_temporal_edgeset.py → temporal_edgeset.py} +24 -5
  58. exonware/xwnode/edges/strategies/{edge_tree_graph_basic.py → tree_graph_basic.py} +78 -7
  59. exonware/xwnode/edges/strategies/{edge_weighted_graph.py → weighted_graph.py} +188 -10
  60. exonware/xwnode/errors.py +3 -6
  61. exonware/xwnode/facade.py +20 -20
  62. exonware/xwnode/nodes/strategies/__init__.py +29 -9
  63. exonware/xwnode/nodes/strategies/adjacency_list.py +650 -177
  64. exonware/xwnode/nodes/strategies/aho_corasick.py +358 -183
  65. exonware/xwnode/nodes/strategies/array_list.py +36 -3
  66. exonware/xwnode/nodes/strategies/art.py +581 -0
  67. exonware/xwnode/nodes/strategies/{node_avl_tree.py → avl_tree.py} +77 -6
  68. exonware/xwnode/nodes/strategies/{node_b_plus_tree.py → b_plus_tree.py} +81 -40
  69. exonware/xwnode/nodes/strategies/{node_btree.py → b_tree.py} +79 -9
  70. exonware/xwnode/nodes/strategies/base.py +469 -98
  71. exonware/xwnode/nodes/strategies/{node_bitmap.py → bitmap.py} +12 -12
  72. exonware/xwnode/nodes/strategies/{node_bitset_dynamic.py → bitset_dynamic.py} +11 -11
  73. exonware/xwnode/nodes/strategies/{node_bloom_filter.py → bloom_filter.py} +15 -2
  74. exonware/xwnode/nodes/strategies/bloomier_filter.py +519 -0
  75. exonware/xwnode/nodes/strategies/bw_tree.py +531 -0
  76. exonware/xwnode/nodes/strategies/contracts.py +1 -1
  77. exonware/xwnode/nodes/strategies/{node_count_min_sketch.py → count_min_sketch.py} +3 -2
  78. exonware/xwnode/nodes/strategies/{node_cow_tree.py → cow_tree.py} +135 -13
  79. exonware/xwnode/nodes/strategies/crdt_map.py +629 -0
  80. exonware/xwnode/nodes/strategies/{node_cuckoo_hash.py → cuckoo_hash.py} +2 -2
  81. exonware/xwnode/nodes/strategies/{node_xdata_optimized.py → data_interchange_optimized.py} +21 -4
  82. exonware/xwnode/nodes/strategies/dawg.py +876 -0
  83. exonware/xwnode/nodes/strategies/deque.py +321 -153
  84. exonware/xwnode/nodes/strategies/extendible_hash.py +93 -0
  85. exonware/xwnode/nodes/strategies/{node_fenwick_tree.py → fenwick_tree.py} +111 -19
  86. exonware/xwnode/nodes/strategies/hamt.py +403 -0
  87. exonware/xwnode/nodes/strategies/hash_map.py +354 -67
  88. exonware/xwnode/nodes/strategies/heap.py +105 -5
  89. exonware/xwnode/nodes/strategies/hopscotch_hash.py +525 -0
  90. exonware/xwnode/nodes/strategies/{node_hyperloglog.py → hyperloglog.py} +6 -5
  91. exonware/xwnode/nodes/strategies/interval_tree.py +742 -0
  92. exonware/xwnode/nodes/strategies/kd_tree.py +703 -0
  93. exonware/xwnode/nodes/strategies/learned_index.py +533 -0
  94. exonware/xwnode/nodes/strategies/linear_hash.py +93 -0
  95. exonware/xwnode/nodes/strategies/linked_list.py +316 -119
  96. exonware/xwnode/nodes/strategies/{node_lsm_tree.py → lsm_tree.py} +219 -15
  97. exonware/xwnode/nodes/strategies/masstree.py +130 -0
  98. exonware/xwnode/nodes/strategies/{node_persistent_tree.py → persistent_tree.py} +149 -9
  99. exonware/xwnode/nodes/strategies/priority_queue.py +544 -132
  100. exonware/xwnode/nodes/strategies/queue.py +249 -120
  101. exonware/xwnode/nodes/strategies/{node_red_black_tree.py → red_black_tree.py} +183 -72
  102. exonware/xwnode/nodes/strategies/{node_roaring_bitmap.py → roaring_bitmap.py} +19 -6
  103. exonware/xwnode/nodes/strategies/rope.py +717 -0
  104. exonware/xwnode/nodes/strategies/{node_segment_tree.py → segment_tree.py} +106 -106
  105. exonware/xwnode/nodes/strategies/{node_set_hash.py → set_hash.py} +30 -29
  106. exonware/xwnode/nodes/strategies/{node_skip_list.py → skip_list.py} +74 -6
  107. exonware/xwnode/nodes/strategies/sparse_matrix.py +427 -131
  108. exonware/xwnode/nodes/strategies/{node_splay_tree.py → splay_tree.py} +55 -6
  109. exonware/xwnode/nodes/strategies/stack.py +244 -112
  110. exonware/xwnode/nodes/strategies/{node_suffix_array.py → suffix_array.py} +5 -1
  111. exonware/xwnode/nodes/strategies/t_tree.py +94 -0
  112. exonware/xwnode/nodes/strategies/{node_treap.py → treap.py} +75 -6
  113. exonware/xwnode/nodes/strategies/{node_tree_graph_hybrid.py → tree_graph_hybrid.py} +46 -5
  114. exonware/xwnode/nodes/strategies/trie.py +153 -9
  115. exonware/xwnode/nodes/strategies/union_find.py +111 -5
  116. exonware/xwnode/nodes/strategies/veb_tree.py +856 -0
  117. exonware/xwnode/strategies/__init__.py +5 -51
  118. exonware/xwnode/version.py +3 -3
  119. exonware_xwnode-0.0.1.24.dist-info/METADATA +900 -0
  120. exonware_xwnode-0.0.1.24.dist-info/RECORD +130 -0
  121. exonware/xwnode/edges/strategies/edge_adj_list.py +0 -353
  122. exonware/xwnode/edges/strategies/edge_adj_matrix.py +0 -445
  123. exonware/xwnode/nodes/strategies/_base_node.py +0 -307
  124. exonware/xwnode/nodes/strategies/node_aho_corasick.py +0 -525
  125. exonware/xwnode/nodes/strategies/node_array_list.py +0 -179
  126. exonware/xwnode/nodes/strategies/node_hash_map.py +0 -273
  127. exonware/xwnode/nodes/strategies/node_heap.py +0 -196
  128. exonware/xwnode/nodes/strategies/node_linked_list.py +0 -413
  129. exonware/xwnode/nodes/strategies/node_trie.py +0 -257
  130. exonware/xwnode/nodes/strategies/node_union_find.py +0 -192
  131. exonware/xwnode/queries/executors/__init__.py +0 -47
  132. exonware/xwnode/queries/executors/advanced/__init__.py +0 -37
  133. exonware/xwnode/queries/executors/advanced/aggregate_executor.py +0 -50
  134. exonware/xwnode/queries/executors/advanced/ask_executor.py +0 -50
  135. exonware/xwnode/queries/executors/advanced/construct_executor.py +0 -50
  136. exonware/xwnode/queries/executors/advanced/describe_executor.py +0 -50
  137. exonware/xwnode/queries/executors/advanced/for_loop_executor.py +0 -50
  138. exonware/xwnode/queries/executors/advanced/foreach_executor.py +0 -50
  139. exonware/xwnode/queries/executors/advanced/join_executor.py +0 -50
  140. exonware/xwnode/queries/executors/advanced/let_executor.py +0 -50
  141. exonware/xwnode/queries/executors/advanced/mutation_executor.py +0 -50
  142. exonware/xwnode/queries/executors/advanced/options_executor.py +0 -50
  143. exonware/xwnode/queries/executors/advanced/pipe_executor.py +0 -50
  144. exonware/xwnode/queries/executors/advanced/subscribe_executor.py +0 -50
  145. exonware/xwnode/queries/executors/advanced/subscription_executor.py +0 -50
  146. exonware/xwnode/queries/executors/advanced/union_executor.py +0 -50
  147. exonware/xwnode/queries/executors/advanced/window_executor.py +0 -51
  148. exonware/xwnode/queries/executors/advanced/with_cte_executor.py +0 -50
  149. exonware/xwnode/queries/executors/aggregation/__init__.py +0 -21
  150. exonware/xwnode/queries/executors/aggregation/avg_executor.py +0 -50
  151. exonware/xwnode/queries/executors/aggregation/count_executor.py +0 -38
  152. exonware/xwnode/queries/executors/aggregation/distinct_executor.py +0 -50
  153. exonware/xwnode/queries/executors/aggregation/group_executor.py +0 -50
  154. exonware/xwnode/queries/executors/aggregation/having_executor.py +0 -50
  155. exonware/xwnode/queries/executors/aggregation/max_executor.py +0 -50
  156. exonware/xwnode/queries/executors/aggregation/min_executor.py +0 -50
  157. exonware/xwnode/queries/executors/aggregation/sum_executor.py +0 -50
  158. exonware/xwnode/queries/executors/aggregation/summarize_executor.py +0 -50
  159. exonware/xwnode/queries/executors/array/__init__.py +0 -9
  160. exonware/xwnode/queries/executors/array/indexing_executor.py +0 -51
  161. exonware/xwnode/queries/executors/array/slicing_executor.py +0 -51
  162. exonware/xwnode/queries/executors/base.py +0 -257
  163. exonware/xwnode/queries/executors/capability_checker.py +0 -204
  164. exonware/xwnode/queries/executors/contracts.py +0 -166
  165. exonware/xwnode/queries/executors/core/__init__.py +0 -17
  166. exonware/xwnode/queries/executors/core/create_executor.py +0 -96
  167. exonware/xwnode/queries/executors/core/delete_executor.py +0 -99
  168. exonware/xwnode/queries/executors/core/drop_executor.py +0 -100
  169. exonware/xwnode/queries/executors/core/insert_executor.py +0 -39
  170. exonware/xwnode/queries/executors/core/select_executor.py +0 -152
  171. exonware/xwnode/queries/executors/core/update_executor.py +0 -102
  172. exonware/xwnode/queries/executors/data/__init__.py +0 -13
  173. exonware/xwnode/queries/executors/data/alter_executor.py +0 -50
  174. exonware/xwnode/queries/executors/data/load_executor.py +0 -50
  175. exonware/xwnode/queries/executors/data/merge_executor.py +0 -50
  176. exonware/xwnode/queries/executors/data/store_executor.py +0 -50
  177. exonware/xwnode/queries/executors/defs.py +0 -93
  178. exonware/xwnode/queries/executors/engine.py +0 -221
  179. exonware/xwnode/queries/executors/errors.py +0 -68
  180. exonware/xwnode/queries/executors/filtering/__init__.py +0 -25
  181. exonware/xwnode/queries/executors/filtering/between_executor.py +0 -80
  182. exonware/xwnode/queries/executors/filtering/filter_executor.py +0 -79
  183. exonware/xwnode/queries/executors/filtering/has_executor.py +0 -70
  184. exonware/xwnode/queries/executors/filtering/in_executor.py +0 -70
  185. exonware/xwnode/queries/executors/filtering/like_executor.py +0 -76
  186. exonware/xwnode/queries/executors/filtering/optional_executor.py +0 -76
  187. exonware/xwnode/queries/executors/filtering/range_executor.py +0 -80
  188. exonware/xwnode/queries/executors/filtering/term_executor.py +0 -77
  189. exonware/xwnode/queries/executors/filtering/values_executor.py +0 -71
  190. exonware/xwnode/queries/executors/filtering/where_executor.py +0 -44
  191. exonware/xwnode/queries/executors/graph/__init__.py +0 -15
  192. exonware/xwnode/queries/executors/graph/in_traverse_executor.py +0 -51
  193. exonware/xwnode/queries/executors/graph/match_executor.py +0 -51
  194. exonware/xwnode/queries/executors/graph/out_executor.py +0 -51
  195. exonware/xwnode/queries/executors/graph/path_executor.py +0 -51
  196. exonware/xwnode/queries/executors/graph/return_executor.py +0 -51
  197. exonware/xwnode/queries/executors/ordering/__init__.py +0 -9
  198. exonware/xwnode/queries/executors/ordering/by_executor.py +0 -50
  199. exonware/xwnode/queries/executors/ordering/order_executor.py +0 -51
  200. exonware/xwnode/queries/executors/projection/__init__.py +0 -9
  201. exonware/xwnode/queries/executors/projection/extend_executor.py +0 -50
  202. exonware/xwnode/queries/executors/projection/project_executor.py +0 -50
  203. exonware/xwnode/queries/executors/registry.py +0 -173
  204. exonware/xwnode/queries/parsers/__init__.py +0 -26
  205. exonware/xwnode/queries/parsers/base.py +0 -86
  206. exonware/xwnode/queries/parsers/contracts.py +0 -46
  207. exonware/xwnode/queries/parsers/errors.py +0 -53
  208. exonware/xwnode/queries/parsers/sql_param_extractor.py +0 -318
  209. exonware/xwnode/queries/strategies/__init__.py +0 -24
  210. exonware/xwnode/queries/strategies/base.py +0 -236
  211. exonware/xwnode/queries/strategies/cql.py +0 -201
  212. exonware/xwnode/queries/strategies/cypher.py +0 -181
  213. exonware/xwnode/queries/strategies/datalog.py +0 -70
  214. exonware/xwnode/queries/strategies/elastic_dsl.py +0 -70
  215. exonware/xwnode/queries/strategies/eql.py +0 -70
  216. exonware/xwnode/queries/strategies/flux.py +0 -70
  217. exonware/xwnode/queries/strategies/gql.py +0 -70
  218. exonware/xwnode/queries/strategies/graphql.py +0 -240
  219. exonware/xwnode/queries/strategies/gremlin.py +0 -181
  220. exonware/xwnode/queries/strategies/hiveql.py +0 -214
  221. exonware/xwnode/queries/strategies/hql.py +0 -70
  222. exonware/xwnode/queries/strategies/jmespath.py +0 -219
  223. exonware/xwnode/queries/strategies/jq.py +0 -66
  224. exonware/xwnode/queries/strategies/json_query.py +0 -66
  225. exonware/xwnode/queries/strategies/jsoniq.py +0 -248
  226. exonware/xwnode/queries/strategies/kql.py +0 -70
  227. exonware/xwnode/queries/strategies/linq.py +0 -238
  228. exonware/xwnode/queries/strategies/logql.py +0 -70
  229. exonware/xwnode/queries/strategies/mql.py +0 -68
  230. exonware/xwnode/queries/strategies/n1ql.py +0 -210
  231. exonware/xwnode/queries/strategies/partiql.py +0 -70
  232. exonware/xwnode/queries/strategies/pig.py +0 -215
  233. exonware/xwnode/queries/strategies/promql.py +0 -70
  234. exonware/xwnode/queries/strategies/sparql.py +0 -220
  235. exonware/xwnode/queries/strategies/sql.py +0 -275
  236. exonware/xwnode/queries/strategies/xml_query.py +0 -66
  237. exonware/xwnode/queries/strategies/xpath.py +0 -223
  238. exonware/xwnode/queries/strategies/xquery.py +0 -258
  239. exonware/xwnode/queries/strategies/xwnode_executor.py +0 -332
  240. exonware/xwnode/queries/strategies/xwquery.py +0 -456
  241. exonware_xwnode-0.0.1.22.dist-info/METADATA +0 -168
  242. exonware_xwnode-0.0.1.22.dist-info/RECORD +0 -214
  243. /exonware/xwnode/nodes/strategies/{node_ordered_map.py → ordered_map.py} +0 -0
  244. /exonware/xwnode/nodes/strategies/{node_ordered_map_balanced.py → ordered_map_balanced.py} +0 -0
  245. /exonware/xwnode/nodes/strategies/{node_patricia.py → patricia.py} +0 -0
  246. /exonware/xwnode/nodes/strategies/{node_radix_trie.py → radix_trie.py} +0 -0
  247. /exonware/xwnode/nodes/strategies/{node_set_tree.py → set_tree.py} +0 -0
  248. {exonware_xwnode-0.0.1.22.dist-info → exonware_xwnode-0.0.1.24.dist-info}/WHEEL +0 -0
  249. {exonware_xwnode-0.0.1.22.dist-info → exonware_xwnode-0.0.1.24.dist-info}/licenses/LICENSE +0 -0
@@ -27,7 +27,7 @@ class ACNode:
27
27
  return len(self.children) == 0
28
28
 
29
29
 
30
- class xAhoCorasickStrategy(ANodeTreeStrategy):
30
+ class AhoCorasickStrategy(ANodeTreeStrategy):
31
31
  """
32
32
  Aho-Corasick node strategy for multi-pattern string matching.
33
33
 
@@ -41,9 +41,7 @@ or linear-time matching.
41
41
 
42
42
  def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
43
43
  """Initialize the Aho-Corasick strategy."""
44
- super().__init__(data=None, **options)
45
- self._mode = NodeMode.AHO_CORASICK
46
- self._traits = traits
44
+ super().__init__(NodeMode.AHO_CORASICK, traits, **options)
47
45
 
48
46
  self.case_sensitive = options.get('case_sensitive', True)
49
47
  self.enable_overlapping = options.get('enable_overlapping', True)
@@ -55,167 +53,324 @@ or linear-time matching.
55
53
  self._pattern_to_index: Dict[str, int] = {}
56
54
  self._automaton_built = False
57
55
 
58
- # Performance tracking
59
- self._size_tracker = 0
60
- self._access_tracker = 0
56
+ # Key-value mapping for compatibility
57
+ self._values: Dict[str, Any] = {}
58
+ self._size = 0
59
+
60
+ # Statistics
61
+ self._total_nodes = 1 # Root node
62
+ self._max_depth = 0
63
+ self._search_cache: Dict[str, List[Tuple[str, int]]] = {}
61
64
 
62
65
  def get_supported_traits(self) -> NodeTrait:
63
66
  """Get the traits supported by the Aho-Corasick strategy."""
64
- return (NodeTrait.ORDERED | NodeTrait.HIERARCHICAL | NodeTrait.INDEXED)
67
+ return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.STREAMING)
68
+
69
+ def _preprocess_pattern(self, pattern: str) -> str:
70
+ """Preprocess pattern based on settings."""
71
+ if not self.case_sensitive:
72
+ pattern = pattern.lower()
73
+ return pattern
74
+
75
+ def _preprocess_text(self, text: str) -> str:
76
+ """Preprocess text based on settings."""
77
+ if not self.case_sensitive:
78
+ text = text.lower()
79
+ return text
80
+
81
+ def _add_pattern_to_trie(self, pattern: str, pattern_index: int) -> None:
82
+ """Add pattern to the trie structure."""
83
+ current = self._root
84
+ depth = 0
85
+
86
+ for char in pattern:
87
+ if char not in current.children:
88
+ current.children[char] = ACNode()
89
+ current.children[char].depth = depth + 1
90
+ self._total_nodes += 1
91
+
92
+ current = current.children[char]
93
+ depth += 1
94
+
95
+ # Mark end of pattern
96
+ current.output.add(pattern)
97
+ current.pattern_indices.add(pattern_index)
98
+ self._max_depth = max(self._max_depth, depth)
99
+
100
+ def _build_failure_links(self) -> None:
101
+ """Build failure links using BFS."""
102
+ queue = deque()
103
+
104
+ # Initialize failure links for root's children
105
+ for child in self._root.children.values():
106
+ child.failure = self._root
107
+ queue.append(child)
108
+
109
+ # Build failure links for all other nodes
110
+ while queue:
111
+ current = queue.popleft()
112
+
113
+ for char, child in current.children.items():
114
+ queue.append(child)
115
+
116
+ # Find the failure link
117
+ failure_node = current.failure
118
+
119
+ while failure_node is not None and char not in failure_node.children:
120
+ failure_node = failure_node.failure
121
+
122
+ if failure_node is not None:
123
+ child.failure = failure_node.children[char]
124
+ else:
125
+ child.failure = self._root
126
+
127
+ # Add output from failure node
128
+ if child.failure:
129
+ child.output.update(child.failure.output)
130
+ child.pattern_indices.update(child.failure.pattern_indices)
131
+
132
+ def _build_automaton(self) -> None:
133
+ """Build the complete Aho-Corasick automaton."""
134
+ if self._automaton_built:
135
+ return
136
+
137
+ # Build failure links
138
+ self._build_failure_links()
139
+ self._automaton_built = True
140
+ self._search_cache.clear()
141
+
142
+ def _rebuild_automaton(self) -> None:
143
+ """Rebuild the automaton from scratch."""
144
+ # Reset automaton
145
+ self._root = ACNode()
146
+ self._total_nodes = 1
147
+ self._max_depth = 0
148
+ self._automaton_built = False
149
+ self._search_cache.clear()
150
+
151
+ # Rebuild trie
152
+ for i, pattern in enumerate(self._patterns):
153
+ self._add_pattern_to_trie(pattern, i)
154
+
155
+ # Build failure links
156
+ self._build_automaton()
65
157
 
66
158
  # ============================================================================
67
- # CORE OPERATIONS
159
+ # CORE OPERATIONS (Key-based interface for compatibility)
68
160
  # ============================================================================
69
161
 
70
- def insert(self, key: Any, value: Any) -> None:
71
- """Store a pattern (key should be string-like)."""
162
+ def put(self, key: Any, value: Any = None) -> None:
163
+ """Add pattern to automaton."""
72
164
  pattern = str(key)
73
- if not self.case_sensitive:
74
- pattern = pattern.lower()
165
+ processed_pattern = self._preprocess_pattern(pattern)
75
166
 
76
- if len(pattern) > self.max_pattern_length:
77
- raise ValueError(f"Pattern too long: {len(pattern)} > {self.max_pattern_length}")
167
+ if len(processed_pattern) > self.max_pattern_length:
168
+ raise ValueError(f"Pattern length {len(processed_pattern)} exceeds maximum {self.max_pattern_length}")
78
169
 
79
- if pattern not in self._pattern_to_index:
80
- self._patterns.append(pattern)
81
- self._pattern_to_index[pattern] = len(self._patterns) - 1
170
+ if processed_pattern not in self._pattern_to_index:
171
+ # Add new pattern
172
+ pattern_index = len(self._patterns)
173
+ self._patterns.append(processed_pattern)
174
+ self._pattern_to_index[processed_pattern] = pattern_index
175
+
176
+ # Add to trie
177
+ self._add_pattern_to_trie(processed_pattern, pattern_index)
82
178
  self._automaton_built = False
83
- self._size_tracker += 1
179
+ self._size += 1
180
+
181
+ # Store value
182
+ self._values[pattern] = value if value is not None else pattern
84
183
 
85
- def find(self, key: Any) -> Any:
86
- """Find pattern index."""
87
- pattern = str(key)
88
- if not self.case_sensitive:
89
- pattern = pattern.lower()
90
- return self._pattern_to_index.get(pattern)
184
+ def get(self, key: Any, default: Any = None) -> Any:
185
+ """Get value by key."""
186
+ key_str = str(key)
187
+
188
+ if key_str == "patterns":
189
+ return self._patterns.copy()
190
+ elif key_str == "automaton_info":
191
+ return {
192
+ 'total_nodes': self._total_nodes,
193
+ 'max_depth': self._max_depth,
194
+ 'automaton_built': self._automaton_built,
195
+ 'pattern_count': len(self._patterns)
196
+ }
197
+ elif key_str in self._values:
198
+ return self._values[key_str]
199
+
200
+ return default
91
201
 
92
- def delete(self, key: Any) -> bool:
93
- """Remove a pattern."""
202
+ def has(self, key: Any) -> bool:
203
+ """Check if key exists."""
204
+ key_str = str(key)
205
+ pattern = self._preprocess_pattern(key_str)
206
+ return pattern in self._pattern_to_index or key_str in self._values
207
+
208
+ def remove(self, key: Any) -> bool:
209
+ """Remove pattern (requires automaton rebuild)."""
94
210
  pattern = str(key)
95
- if not self.case_sensitive:
96
- pattern = pattern.lower()
211
+ processed_pattern = self._preprocess_pattern(pattern)
97
212
 
98
- if pattern in self._pattern_to_index:
99
- index = self._pattern_to_index[pattern]
100
- del self._patterns[index]
101
- del self._pattern_to_index[pattern]
102
- self._automaton_built = False
103
- self._size_tracker -= 1
213
+ if processed_pattern in self._pattern_to_index:
214
+ # Remove pattern
215
+ index = self._pattern_to_index[processed_pattern]
216
+ del self._pattern_to_index[processed_pattern]
217
+ self._patterns.pop(index)
218
+
219
+ # Update indices
220
+ for i, p in enumerate(self._patterns):
221
+ self._pattern_to_index[p] = i
222
+
223
+ # Remove value
224
+ self._values.pop(pattern, None)
225
+ self._size -= 1
226
+
227
+ # Rebuild automaton
228
+ self._rebuild_automaton()
104
229
  return True
230
+
105
231
  return False
106
232
 
107
- def size(self) -> int:
108
- """Get the number of patterns."""
109
- return self._size_tracker
110
-
111
- def is_empty(self) -> bool:
112
- """Check if the structure is empty."""
113
- return self._size_tracker == 0
114
-
115
- def to_native(self) -> Dict[str, Any]:
116
- """Convert to native Python dictionary."""
117
- return {pattern: index for pattern, index in self._pattern_to_index.items()}
233
+ def delete(self, key: Any) -> bool:
234
+ """Remove pattern (alias for remove)."""
235
+ return self.remove(key)
118
236
 
119
- # ============================================================================
120
- # TREE STRATEGY METHODS
121
- # ============================================================================
237
+ def clear(self) -> None:
238
+ """Clear all patterns."""
239
+ self._root = ACNode()
240
+ self._patterns.clear()
241
+ self._pattern_to_index.clear()
242
+ self._values.clear()
243
+ self._search_cache.clear()
244
+
245
+ self._total_nodes = 1
246
+ self._max_depth = 0
247
+ self._automaton_built = False
248
+ self._size = 0
122
249
 
123
- def traverse(self, order: str = 'inorder') -> List[Any]:
124
- """Traverse patterns in specified order."""
125
- return self._patterns.copy()
250
+ def keys(self) -> Iterator[str]:
251
+ """Get all pattern keys."""
252
+ for pattern in self._patterns:
253
+ yield pattern
254
+ yield "patterns"
255
+ yield "automaton_info"
126
256
 
127
- def get_min(self) -> Any:
128
- """Get minimum pattern."""
129
- return min(self._patterns) if self._patterns else None
257
+ def values(self) -> Iterator[Any]:
258
+ """Get all values."""
259
+ for value in self._values.values():
260
+ yield value
261
+ yield self._patterns.copy()
262
+ yield self.get("automaton_info")
130
263
 
131
- def get_max(self) -> Any:
132
- """Get maximum pattern."""
133
- return max(self._patterns) if self._patterns else None
264
+ def items(self) -> Iterator[tuple[str, Any]]:
265
+ """Get all key-value pairs."""
266
+ for key, value in self._values.items():
267
+ yield (key, value)
268
+ yield ("patterns", self._patterns.copy())
269
+ yield ("automaton_info", self.get("automaton_info"))
134
270
 
135
- # ============================================================================
136
- # AUTO-3 Phase 2 methods
137
- # ============================================================================
271
+ def __len__(self) -> int:
272
+ """Get number of patterns."""
273
+ return self._size
138
274
 
139
- def as_trie(self):
140
- """Provide Trie behavioral view."""
141
- return self
275
+ def to_native(self) -> Dict[str, Any]:
276
+ """Convert to native Python dict."""
277
+ result = dict(self._values)
278
+ result["patterns"] = self._patterns.copy()
279
+ result["automaton_info"] = self.get("automaton_info")
280
+ return result
142
281
 
143
- def as_heap(self):
144
- """Provide Heap behavioral view."""
145
- # TODO: Implement Heap view
146
- return self
282
+ @property
283
+ def is_list(self) -> bool:
284
+ """This can behave like a list for pattern access."""
285
+ return True
147
286
 
148
- def as_skip_list(self):
149
- """Provide SkipList behavioral view."""
150
- # TODO: Implement SkipList view
151
- return self
287
+ @property
288
+ def is_dict(self) -> bool:
289
+ """This behaves like a dict."""
290
+ return True
152
291
 
153
292
  # ============================================================================
154
293
  # AHO-CORASICK SPECIFIC OPERATIONS
155
294
  # ============================================================================
156
295
 
157
296
  def add_pattern(self, pattern: str, metadata: Any = None) -> None:
158
- """Add a pattern to the automaton."""
159
- self.insert(pattern, metadata)
297
+ """Add pattern with optional metadata."""
298
+ self.put(pattern, metadata)
160
299
 
161
300
  def search_text(self, text: str) -> List[Tuple[str, int, Any]]:
162
- """Search for all patterns in the given text."""
163
- if not self._automaton_built:
164
- self._build_automaton()
301
+ """Search for all pattern matches in text."""
302
+ if not text or not self._patterns:
303
+ return []
304
+
305
+ # Check cache
306
+ cache_key = text[:100] # Cache based on first 100 chars
307
+ if cache_key in self._search_cache and len(text) <= 100:
308
+ return self._search_cache[cache_key]
165
309
 
166
- results = []
310
+ processed_text = self._preprocess_text(text)
311
+ self._build_automaton()
312
+
313
+ matches = []
167
314
  current = self._root
168
315
 
169
- for i, char in enumerate(text):
170
- if not self.case_sensitive:
171
- char = char.lower()
172
-
173
- # Follow failure links if needed
174
- while current != self._root and char not in current.children:
316
+ for i, char in enumerate(processed_text):
317
+ # Follow failure links until we find a valid transition
318
+ while current is not None and char not in current.children:
175
319
  current = current.failure
176
320
 
177
- # Move to next state
178
- if char in current.children:
179
- current = current.children[char]
321
+ if current is None:
322
+ current = self._root
323
+ continue
180
324
 
181
- # Check for matches
325
+ current = current.children[char]
326
+
327
+ # Report all patterns that end at this position
182
328
  for pattern in current.output:
183
- pattern_index = self._pattern_to_index[pattern]
184
- results.append((pattern, i - len(pattern) + 1, pattern_index))
329
+ start_pos = i - len(pattern) + 1
330
+ metadata = self._values.get(pattern, None)
331
+ matches.append((pattern, start_pos, metadata))
332
+
333
+ # Cache small results
334
+ if len(text) <= 100:
335
+ self._search_cache[cache_key] = matches
185
336
 
186
- return results
337
+ return matches
187
338
 
188
339
  def find_all_matches(self, text: str) -> Dict[str, List[int]]:
189
- """Find all matches grouped by pattern."""
340
+ """Find all positions where each pattern matches."""
190
341
  matches = self.search_text(text)
191
342
  result = defaultdict(list)
192
343
 
193
344
  for pattern, position, _ in matches:
194
345
  result[pattern].append(position)
195
346
 
347
+ # Convert to regular dict
196
348
  return dict(result)
197
349
 
198
350
  def count_matches(self, text: str) -> Dict[str, int]:
199
- """Count matches for each pattern."""
200
- all_matches = self.find_all_matches(text)
201
- return {pattern: len(positions) for pattern, positions in all_matches.items()}
351
+ """Count occurrences of each pattern."""
352
+ matches = self.find_all_matches(text)
353
+ return {pattern: len(positions) for pattern, positions in matches.items()}
202
354
 
203
355
  def has_any_match(self, text: str) -> bool:
204
- """Check if any pattern matches in the text."""
205
- if not self._automaton_built:
206
- self._build_automaton()
356
+ """Check if text contains any of the patterns."""
357
+ if not text or not self._patterns:
358
+ return False
359
+
360
+ processed_text = self._preprocess_text(text)
361
+ self._build_automaton()
207
362
 
208
363
  current = self._root
209
364
 
210
- for char in text:
211
- if not self.case_sensitive:
212
- char = char.lower()
213
-
214
- while current != self._root and char not in current.children:
365
+ for char in processed_text:
366
+ while current is not None and char not in current.children:
215
367
  current = current.failure
216
368
 
217
- if char in current.children:
218
- current = current.children[char]
369
+ if current is None:
370
+ current = self._root
371
+ continue
372
+
373
+ current = current.children[char]
219
374
 
220
375
  if current.output:
221
376
  return True
@@ -223,104 +378,113 @@ or linear-time matching.
223
378
  return False
224
379
 
225
380
  def find_longest_match(self, text: str) -> Optional[Tuple[str, int, int]]:
226
- """Find the longest matching pattern."""
381
+ """Find the longest pattern match in text."""
227
382
  matches = self.search_text(text)
383
+
228
384
  if not matches:
229
385
  return None
230
386
 
231
- # Find the longest match
232
387
  longest = max(matches, key=lambda x: len(x[0]))
233
- return (longest[0], longest[1], longest[1] + len(longest[0]) - 1)
388
+ pattern, start_pos, _ = longest
389
+ return pattern, start_pos, len(pattern)
234
390
 
235
391
  def replace_patterns(self, text: str, replacement_func: callable = None) -> str:
236
392
  """Replace all pattern matches in text."""
393
+ if not replacement_func:
394
+ replacement_func = lambda pattern, metadata: f"[{pattern}]"
395
+
237
396
  matches = self.search_text(text)
397
+
238
398
  if not matches:
239
399
  return text
240
400
 
241
- # Sort matches by position (descending) to replace from end to start
401
+ # Sort matches by position (descending) to avoid index shifts
242
402
  matches.sort(key=lambda x: x[1], reverse=True)
243
403
 
244
404
  result = text
245
- for pattern, position, _ in matches:
246
- if replacement_func:
247
- replacement = replacement_func(pattern, position)
248
- else:
249
- replacement = f"[{pattern}]"
250
-
251
- result = result[:position] + replacement + result[position + len(pattern):]
405
+ for pattern, start_pos, metadata in matches:
406
+ end_pos = start_pos + len(pattern)
407
+ replacement = replacement_func(pattern, metadata)
408
+ result = result[:start_pos] + replacement + result[end_pos:]
252
409
 
253
410
  return result
254
411
 
255
- def _build_automaton(self) -> None:
256
- """Build the Aho-Corasick automaton."""
257
- # Build trie
258
- for pattern in self._patterns:
259
- self._add_pattern_to_trie(pattern)
412
+ def get_pattern_statistics(self) -> Dict[str, Any]:
413
+ """Get statistics about patterns and automaton."""
414
+ if not self._patterns:
415
+ return {'pattern_count': 0, 'total_nodes': 1, 'avg_pattern_length': 0}
260
416
 
261
- # Build failure links
262
- self._build_failure_links()
417
+ pattern_lengths = [len(p) for p in self._patterns]
418
+ unique_chars = set()
419
+ for pattern in self._patterns:
420
+ unique_chars.update(pattern)
263
421
 
264
- self._automaton_built = True
422
+ return {
423
+ 'pattern_count': len(self._patterns),
424
+ 'total_nodes': self._total_nodes,
425
+ 'max_depth': self._max_depth,
426
+ 'avg_pattern_length': sum(pattern_lengths) / len(pattern_lengths),
427
+ 'min_pattern_length': min(pattern_lengths),
428
+ 'max_pattern_length': max(pattern_lengths),
429
+ 'unique_characters': len(unique_chars),
430
+ 'alphabet_size': len(unique_chars),
431
+ 'automaton_built': self._automaton_built,
432
+ 'cache_size': len(self._search_cache)
433
+ }
265
434
 
266
- def _add_pattern_to_trie(self, pattern: str, pattern_index: int) -> None:
267
- """Add a pattern to the trie."""
268
- current = self._root
435
+ def validate_automaton(self) -> bool:
436
+ """Validate the automaton structure."""
437
+ self._build_automaton()
269
438
 
270
- for char in pattern:
271
- if char not in current.children:
272
- current.children[char] = ACNode()
273
- current.children[char].depth = current.depth + 1
439
+ def _validate_node(node: ACNode, visited: Set[ACNode]) -> bool:
440
+ if node in visited:
441
+ return True
274
442
 
275
- current = current.children[char]
443
+ visited.add(node)
444
+
445
+ # Check failure link
446
+ if node != self._root and node.failure is None:
447
+ return False
448
+
449
+ # Check children
450
+ for child in node.children.values():
451
+ if not _validate_node(child, visited):
452
+ return False
453
+
454
+ return True
276
455
 
277
- current.output.add(pattern)
278
- current.pattern_indices.add(pattern_index)
456
+ return _validate_node(self._root, set())
279
457
 
280
- def _build_failure_links(self) -> None:
281
- """Build failure links using BFS."""
282
- queue = deque()
458
+ def export_automaton(self) -> Dict[str, Any]:
459
+ """Export automaton structure for analysis."""
460
+ self._build_automaton()
283
461
 
284
- # Initialize failure links for root's children
285
- for child in self._root.children.values():
286
- child.failure = self._root
287
- queue.append(child)
462
+ def _export_node(node: ACNode, node_id: int) -> Dict[str, Any]:
463
+ return {
464
+ 'id': node_id,
465
+ 'depth': node.depth,
466
+ 'children': list(node.children.keys()),
467
+ 'output': list(node.output),
468
+ 'has_failure': node.failure is not None
469
+ }
288
470
 
289
- # Build failure links for remaining nodes
290
- while queue:
291
- current = queue.popleft()
471
+ nodes = []
472
+ node_queue = deque([(self._root, 0)])
473
+ node_id = 0
474
+
475
+ while node_queue:
476
+ node, current_id = node_queue.popleft()
477
+ nodes.append(_export_node(node, current_id))
292
478
 
293
- for char, child in current.children.items():
294
- queue.append(child)
295
-
296
- # Find failure link
297
- failure = current.failure
298
- while failure != self._root and char not in failure.children:
299
- failure = failure.failure
300
-
301
- if char in failure.children:
302
- child.failure = failure.children[char]
303
- else:
304
- child.failure = self._root
305
-
306
- # Merge output sets
307
- child.output.update(child.failure.output)
308
-
309
- # ============================================================================
310
- # ITERATION
311
- # ============================================================================
312
-
313
- def keys(self) -> Iterator[str]:
314
- """Get all patterns."""
315
- return iter(self._patterns)
316
-
317
- def values(self) -> Iterator[Any]:
318
- """Get all pattern indices."""
319
- return iter(range(len(self._patterns)))
320
-
321
- def items(self) -> Iterator[tuple[str, Any]]:
322
- """Get all pattern-index pairs."""
323
- return ((pattern, index) for pattern, index in self._pattern_to_index.items())
479
+ for child in node.children.values():
480
+ node_id += 1
481
+ node_queue.append((child, node_id))
482
+
483
+ return {
484
+ 'nodes': nodes,
485
+ 'patterns': self._patterns.copy(),
486
+ 'statistics': self.get_pattern_statistics()
487
+ }
324
488
 
325
489
  # ============================================================================
326
490
  # PERFORMANCE CHARACTERISTICS
@@ -331,20 +495,31 @@ or linear-time matching.
331
495
  """Get backend implementation info."""
332
496
  return {
333
497
  'strategy': 'AHO_CORASICK',
334
- 'backend': 'Aho-Corasick automaton',
498
+ 'backend': 'Finite automaton with failure links',
499
+ 'case_sensitive': self.case_sensitive,
500
+ 'enable_overlapping': self.enable_overlapping,
501
+ 'max_pattern_length': self.max_pattern_length,
335
502
  'complexity': {
336
- 'build': 'O(sum of pattern lengths)',
337
- 'search': 'O(text length + number of matches)',
338
- 'space': 'O(sum of pattern lengths)'
503
+ 'construction': 'O(Σ|patterns|)', # Σ = alphabet size
504
+ 'search': 'O(|text| + |matches|)',
505
+ 'space': 'O(Σ|patterns|)',
506
+ 'pattern_addition': 'O(|pattern|)',
507
+ 'pattern_removal': 'O(Σ|patterns|)' # Requires rebuild
339
508
  }
340
509
  }
341
510
 
342
511
  @property
343
512
  def metrics(self) -> Dict[str, Any]:
344
513
  """Get performance metrics."""
514
+ stats = self.get_pattern_statistics()
515
+
345
516
  return {
346
- 'patterns': len(self._patterns),
347
- 'automaton_built': self._automaton_built,
348
- 'case_sensitive': self.case_sensitive,
349
- 'max_pattern_length': self.max_pattern_length
517
+ 'patterns': stats['pattern_count'],
518
+ 'nodes': stats['total_nodes'],
519
+ 'max_depth': stats['max_depth'],
520
+ 'avg_pattern_length': f"{stats['avg_pattern_length']:.1f}",
521
+ 'alphabet_size': stats['alphabet_size'],
522
+ 'automaton_built': stats['automaton_built'],
523
+ 'cache_entries': stats['cache_size'],
524
+ 'memory_usage': f"{stats['total_nodes'] * 100 + len(self._patterns) * 50} bytes (estimated)"
350
525
  }