exonware-xwnode 0.0.1.21__py3-none-any.whl → 0.0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. exonware/__init__.py +8 -1
  2. exonware/xwnode/__init__.py +18 -5
  3. exonware/xwnode/add_strategy_types.py +165 -0
  4. exonware/xwnode/base.py +7 -5
  5. exonware/xwnode/common/__init__.py +1 -1
  6. exonware/xwnode/common/graph/__init__.py +30 -0
  7. exonware/xwnode/common/graph/caching.py +131 -0
  8. exonware/xwnode/common/graph/contracts.py +100 -0
  9. exonware/xwnode/common/graph/errors.py +44 -0
  10. exonware/xwnode/common/graph/indexing.py +260 -0
  11. exonware/xwnode/common/graph/manager.py +568 -0
  12. exonware/xwnode/common/management/__init__.py +3 -5
  13. exonware/xwnode/common/management/manager.py +9 -9
  14. exonware/xwnode/common/management/migration.py +6 -6
  15. exonware/xwnode/common/monitoring/__init__.py +3 -5
  16. exonware/xwnode/common/monitoring/metrics.py +7 -3
  17. exonware/xwnode/common/monitoring/pattern_detector.py +2 -2
  18. exonware/xwnode/common/monitoring/performance_monitor.py +6 -2
  19. exonware/xwnode/common/patterns/__init__.py +3 -5
  20. exonware/xwnode/common/patterns/advisor.py +1 -1
  21. exonware/xwnode/common/patterns/flyweight.py +6 -2
  22. exonware/xwnode/common/patterns/registry.py +203 -184
  23. exonware/xwnode/common/utils/__init__.py +25 -11
  24. exonware/xwnode/common/utils/simple.py +1 -1
  25. exonware/xwnode/config.py +3 -8
  26. exonware/xwnode/contracts.py +4 -105
  27. exonware/xwnode/defs.py +413 -159
  28. exonware/xwnode/edges/strategies/__init__.py +86 -4
  29. exonware/xwnode/edges/strategies/_base_edge.py +2 -2
  30. exonware/xwnode/edges/strategies/adj_list.py +287 -121
  31. exonware/xwnode/edges/strategies/adj_matrix.py +316 -222
  32. exonware/xwnode/edges/strategies/base.py +1 -1
  33. exonware/xwnode/edges/strategies/{edge_bidir_wrapper.py → bidir_wrapper.py} +45 -4
  34. exonware/xwnode/edges/strategies/bitemporal.py +520 -0
  35. exonware/xwnode/edges/strategies/{edge_block_adj_matrix.py → block_adj_matrix.py} +77 -6
  36. exonware/xwnode/edges/strategies/bv_graph.py +664 -0
  37. exonware/xwnode/edges/strategies/compressed_graph.py +217 -0
  38. exonware/xwnode/edges/strategies/{edge_coo.py → coo.py} +46 -4
  39. exonware/xwnode/edges/strategies/{edge_csc.py → csc.py} +45 -4
  40. exonware/xwnode/edges/strategies/{edge_csr.py → csr.py} +94 -12
  41. exonware/xwnode/edges/strategies/{edge_dynamic_adj_list.py → dynamic_adj_list.py} +46 -4
  42. exonware/xwnode/edges/strategies/edge_list.py +168 -0
  43. exonware/xwnode/edges/strategies/edge_property_store.py +2 -2
  44. exonware/xwnode/edges/strategies/euler_tour.py +560 -0
  45. exonware/xwnode/edges/strategies/{edge_flow_network.py → flow_network.py} +2 -2
  46. exonware/xwnode/edges/strategies/graphblas.py +449 -0
  47. exonware/xwnode/edges/strategies/hnsw.py +637 -0
  48. exonware/xwnode/edges/strategies/hop2_labels.py +467 -0
  49. exonware/xwnode/edges/strategies/{edge_hyperedge_set.py → hyperedge_set.py} +2 -2
  50. exonware/xwnode/edges/strategies/incidence_matrix.py +250 -0
  51. exonware/xwnode/edges/strategies/k2_tree.py +613 -0
  52. exonware/xwnode/edges/strategies/link_cut.py +626 -0
  53. exonware/xwnode/edges/strategies/multiplex.py +532 -0
  54. exonware/xwnode/edges/strategies/{edge_neural_graph.py → neural_graph.py} +2 -2
  55. exonware/xwnode/edges/strategies/{edge_octree.py → octree.py} +69 -11
  56. exonware/xwnode/edges/strategies/{edge_quadtree.py → quadtree.py} +66 -10
  57. exonware/xwnode/edges/strategies/roaring_adj.py +438 -0
  58. exonware/xwnode/edges/strategies/{edge_rtree.py → rtree.py} +43 -5
  59. exonware/xwnode/edges/strategies/{edge_temporal_edgeset.py → temporal_edgeset.py} +24 -5
  60. exonware/xwnode/edges/strategies/{edge_tree_graph_basic.py → tree_graph_basic.py} +78 -7
  61. exonware/xwnode/edges/strategies/{edge_weighted_graph.py → weighted_graph.py} +188 -10
  62. exonware/xwnode/errors.py +3 -6
  63. exonware/xwnode/facade.py +20 -20
  64. exonware/xwnode/nodes/strategies/__init__.py +29 -9
  65. exonware/xwnode/nodes/strategies/adjacency_list.py +650 -177
  66. exonware/xwnode/nodes/strategies/aho_corasick.py +358 -183
  67. exonware/xwnode/nodes/strategies/array_list.py +36 -3
  68. exonware/xwnode/nodes/strategies/art.py +581 -0
  69. exonware/xwnode/nodes/strategies/{node_avl_tree.py → avl_tree.py} +77 -6
  70. exonware/xwnode/nodes/strategies/{node_b_plus_tree.py → b_plus_tree.py} +81 -40
  71. exonware/xwnode/nodes/strategies/{node_btree.py → b_tree.py} +79 -9
  72. exonware/xwnode/nodes/strategies/base.py +469 -98
  73. exonware/xwnode/nodes/strategies/{node_bitmap.py → bitmap.py} +12 -12
  74. exonware/xwnode/nodes/strategies/{node_bitset_dynamic.py → bitset_dynamic.py} +11 -11
  75. exonware/xwnode/nodes/strategies/{node_bloom_filter.py → bloom_filter.py} +15 -2
  76. exonware/xwnode/nodes/strategies/bloomier_filter.py +519 -0
  77. exonware/xwnode/nodes/strategies/bw_tree.py +531 -0
  78. exonware/xwnode/nodes/strategies/contracts.py +1 -1
  79. exonware/xwnode/nodes/strategies/{node_count_min_sketch.py → count_min_sketch.py} +3 -2
  80. exonware/xwnode/nodes/strategies/{node_cow_tree.py → cow_tree.py} +135 -13
  81. exonware/xwnode/nodes/strategies/crdt_map.py +629 -0
  82. exonware/xwnode/nodes/strategies/{node_cuckoo_hash.py → cuckoo_hash.py} +2 -2
  83. exonware/xwnode/nodes/strategies/{node_xdata_optimized.py → data_interchange_optimized.py} +21 -4
  84. exonware/xwnode/nodes/strategies/dawg.py +876 -0
  85. exonware/xwnode/nodes/strategies/deque.py +321 -153
  86. exonware/xwnode/nodes/strategies/extendible_hash.py +93 -0
  87. exonware/xwnode/nodes/strategies/{node_fenwick_tree.py → fenwick_tree.py} +111 -19
  88. exonware/xwnode/nodes/strategies/hamt.py +403 -0
  89. exonware/xwnode/nodes/strategies/hash_map.py +354 -67
  90. exonware/xwnode/nodes/strategies/heap.py +105 -5
  91. exonware/xwnode/nodes/strategies/hopscotch_hash.py +525 -0
  92. exonware/xwnode/nodes/strategies/{node_hyperloglog.py → hyperloglog.py} +6 -5
  93. exonware/xwnode/nodes/strategies/interval_tree.py +742 -0
  94. exonware/xwnode/nodes/strategies/kd_tree.py +703 -0
  95. exonware/xwnode/nodes/strategies/learned_index.py +533 -0
  96. exonware/xwnode/nodes/strategies/linear_hash.py +93 -0
  97. exonware/xwnode/nodes/strategies/linked_list.py +316 -119
  98. exonware/xwnode/nodes/strategies/{node_lsm_tree.py → lsm_tree.py} +219 -15
  99. exonware/xwnode/nodes/strategies/masstree.py +130 -0
  100. exonware/xwnode/nodes/strategies/{node_persistent_tree.py → persistent_tree.py} +149 -9
  101. exonware/xwnode/nodes/strategies/priority_queue.py +544 -132
  102. exonware/xwnode/nodes/strategies/queue.py +249 -120
  103. exonware/xwnode/nodes/strategies/{node_red_black_tree.py → red_black_tree.py} +183 -72
  104. exonware/xwnode/nodes/strategies/{node_roaring_bitmap.py → roaring_bitmap.py} +19 -6
  105. exonware/xwnode/nodes/strategies/rope.py +717 -0
  106. exonware/xwnode/nodes/strategies/{node_segment_tree.py → segment_tree.py} +106 -106
  107. exonware/xwnode/nodes/strategies/{node_set_hash.py → set_hash.py} +30 -29
  108. exonware/xwnode/nodes/strategies/{node_skip_list.py → skip_list.py} +74 -6
  109. exonware/xwnode/nodes/strategies/sparse_matrix.py +427 -131
  110. exonware/xwnode/nodes/strategies/{node_splay_tree.py → splay_tree.py} +55 -6
  111. exonware/xwnode/nodes/strategies/stack.py +244 -112
  112. exonware/xwnode/nodes/strategies/{node_suffix_array.py → suffix_array.py} +5 -1
  113. exonware/xwnode/nodes/strategies/t_tree.py +94 -0
  114. exonware/xwnode/nodes/strategies/{node_treap.py → treap.py} +75 -6
  115. exonware/xwnode/nodes/strategies/{node_tree_graph_hybrid.py → tree_graph_hybrid.py} +46 -5
  116. exonware/xwnode/nodes/strategies/trie.py +153 -9
  117. exonware/xwnode/nodes/strategies/union_find.py +111 -5
  118. exonware/xwnode/nodes/strategies/veb_tree.py +856 -0
  119. exonware/xwnode/strategies/__init__.py +5 -51
  120. exonware/xwnode/version.py +3 -3
  121. {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/METADATA +23 -3
  122. exonware_xwnode-0.0.1.23.dist-info/RECORD +130 -0
  123. exonware/xwnode/edges/strategies/edge_adj_list.py +0 -353
  124. exonware/xwnode/edges/strategies/edge_adj_matrix.py +0 -445
  125. exonware/xwnode/nodes/strategies/_base_node.py +0 -307
  126. exonware/xwnode/nodes/strategies/node_aho_corasick.py +0 -525
  127. exonware/xwnode/nodes/strategies/node_array_list.py +0 -179
  128. exonware/xwnode/nodes/strategies/node_hash_map.py +0 -273
  129. exonware/xwnode/nodes/strategies/node_heap.py +0 -196
  130. exonware/xwnode/nodes/strategies/node_linked_list.py +0 -413
  131. exonware/xwnode/nodes/strategies/node_trie.py +0 -257
  132. exonware/xwnode/nodes/strategies/node_union_find.py +0 -192
  133. exonware/xwnode/queries/executors/__init__.py +0 -47
  134. exonware/xwnode/queries/executors/advanced/__init__.py +0 -37
  135. exonware/xwnode/queries/executors/advanced/aggregate_executor.py +0 -50
  136. exonware/xwnode/queries/executors/advanced/ask_executor.py +0 -50
  137. exonware/xwnode/queries/executors/advanced/construct_executor.py +0 -50
  138. exonware/xwnode/queries/executors/advanced/describe_executor.py +0 -50
  139. exonware/xwnode/queries/executors/advanced/for_loop_executor.py +0 -50
  140. exonware/xwnode/queries/executors/advanced/foreach_executor.py +0 -50
  141. exonware/xwnode/queries/executors/advanced/join_executor.py +0 -50
  142. exonware/xwnode/queries/executors/advanced/let_executor.py +0 -50
  143. exonware/xwnode/queries/executors/advanced/mutation_executor.py +0 -50
  144. exonware/xwnode/queries/executors/advanced/options_executor.py +0 -50
  145. exonware/xwnode/queries/executors/advanced/pipe_executor.py +0 -50
  146. exonware/xwnode/queries/executors/advanced/subscribe_executor.py +0 -50
  147. exonware/xwnode/queries/executors/advanced/subscription_executor.py +0 -50
  148. exonware/xwnode/queries/executors/advanced/union_executor.py +0 -50
  149. exonware/xwnode/queries/executors/advanced/window_executor.py +0 -51
  150. exonware/xwnode/queries/executors/advanced/with_cte_executor.py +0 -50
  151. exonware/xwnode/queries/executors/aggregation/__init__.py +0 -21
  152. exonware/xwnode/queries/executors/aggregation/avg_executor.py +0 -50
  153. exonware/xwnode/queries/executors/aggregation/count_executor.py +0 -38
  154. exonware/xwnode/queries/executors/aggregation/distinct_executor.py +0 -50
  155. exonware/xwnode/queries/executors/aggregation/group_executor.py +0 -50
  156. exonware/xwnode/queries/executors/aggregation/having_executor.py +0 -50
  157. exonware/xwnode/queries/executors/aggregation/max_executor.py +0 -50
  158. exonware/xwnode/queries/executors/aggregation/min_executor.py +0 -50
  159. exonware/xwnode/queries/executors/aggregation/sum_executor.py +0 -50
  160. exonware/xwnode/queries/executors/aggregation/summarize_executor.py +0 -50
  161. exonware/xwnode/queries/executors/array/__init__.py +0 -9
  162. exonware/xwnode/queries/executors/array/indexing_executor.py +0 -51
  163. exonware/xwnode/queries/executors/array/slicing_executor.py +0 -51
  164. exonware/xwnode/queries/executors/base.py +0 -257
  165. exonware/xwnode/queries/executors/capability_checker.py +0 -204
  166. exonware/xwnode/queries/executors/contracts.py +0 -166
  167. exonware/xwnode/queries/executors/core/__init__.py +0 -17
  168. exonware/xwnode/queries/executors/core/create_executor.py +0 -96
  169. exonware/xwnode/queries/executors/core/delete_executor.py +0 -99
  170. exonware/xwnode/queries/executors/core/drop_executor.py +0 -100
  171. exonware/xwnode/queries/executors/core/insert_executor.py +0 -39
  172. exonware/xwnode/queries/executors/core/select_executor.py +0 -152
  173. exonware/xwnode/queries/executors/core/update_executor.py +0 -102
  174. exonware/xwnode/queries/executors/data/__init__.py +0 -13
  175. exonware/xwnode/queries/executors/data/alter_executor.py +0 -50
  176. exonware/xwnode/queries/executors/data/load_executor.py +0 -50
  177. exonware/xwnode/queries/executors/data/merge_executor.py +0 -50
  178. exonware/xwnode/queries/executors/data/store_executor.py +0 -50
  179. exonware/xwnode/queries/executors/defs.py +0 -93
  180. exonware/xwnode/queries/executors/engine.py +0 -221
  181. exonware/xwnode/queries/executors/errors.py +0 -68
  182. exonware/xwnode/queries/executors/filtering/__init__.py +0 -25
  183. exonware/xwnode/queries/executors/filtering/between_executor.py +0 -80
  184. exonware/xwnode/queries/executors/filtering/filter_executor.py +0 -79
  185. exonware/xwnode/queries/executors/filtering/has_executor.py +0 -70
  186. exonware/xwnode/queries/executors/filtering/in_executor.py +0 -70
  187. exonware/xwnode/queries/executors/filtering/like_executor.py +0 -76
  188. exonware/xwnode/queries/executors/filtering/optional_executor.py +0 -76
  189. exonware/xwnode/queries/executors/filtering/range_executor.py +0 -80
  190. exonware/xwnode/queries/executors/filtering/term_executor.py +0 -77
  191. exonware/xwnode/queries/executors/filtering/values_executor.py +0 -71
  192. exonware/xwnode/queries/executors/filtering/where_executor.py +0 -44
  193. exonware/xwnode/queries/executors/graph/__init__.py +0 -15
  194. exonware/xwnode/queries/executors/graph/in_traverse_executor.py +0 -51
  195. exonware/xwnode/queries/executors/graph/match_executor.py +0 -51
  196. exonware/xwnode/queries/executors/graph/out_executor.py +0 -51
  197. exonware/xwnode/queries/executors/graph/path_executor.py +0 -51
  198. exonware/xwnode/queries/executors/graph/return_executor.py +0 -51
  199. exonware/xwnode/queries/executors/ordering/__init__.py +0 -9
  200. exonware/xwnode/queries/executors/ordering/by_executor.py +0 -50
  201. exonware/xwnode/queries/executors/ordering/order_executor.py +0 -51
  202. exonware/xwnode/queries/executors/projection/__init__.py +0 -9
  203. exonware/xwnode/queries/executors/projection/extend_executor.py +0 -50
  204. exonware/xwnode/queries/executors/projection/project_executor.py +0 -50
  205. exonware/xwnode/queries/executors/registry.py +0 -173
  206. exonware/xwnode/queries/parsers/__init__.py +0 -26
  207. exonware/xwnode/queries/parsers/base.py +0 -86
  208. exonware/xwnode/queries/parsers/contracts.py +0 -46
  209. exonware/xwnode/queries/parsers/errors.py +0 -53
  210. exonware/xwnode/queries/parsers/sql_param_extractor.py +0 -318
  211. exonware/xwnode/queries/strategies/__init__.py +0 -24
  212. exonware/xwnode/queries/strategies/base.py +0 -236
  213. exonware/xwnode/queries/strategies/cql.py +0 -201
  214. exonware/xwnode/queries/strategies/cypher.py +0 -181
  215. exonware/xwnode/queries/strategies/datalog.py +0 -70
  216. exonware/xwnode/queries/strategies/elastic_dsl.py +0 -70
  217. exonware/xwnode/queries/strategies/eql.py +0 -70
  218. exonware/xwnode/queries/strategies/flux.py +0 -70
  219. exonware/xwnode/queries/strategies/gql.py +0 -70
  220. exonware/xwnode/queries/strategies/graphql.py +0 -240
  221. exonware/xwnode/queries/strategies/gremlin.py +0 -181
  222. exonware/xwnode/queries/strategies/hiveql.py +0 -214
  223. exonware/xwnode/queries/strategies/hql.py +0 -70
  224. exonware/xwnode/queries/strategies/jmespath.py +0 -219
  225. exonware/xwnode/queries/strategies/jq.py +0 -66
  226. exonware/xwnode/queries/strategies/json_query.py +0 -66
  227. exonware/xwnode/queries/strategies/jsoniq.py +0 -248
  228. exonware/xwnode/queries/strategies/kql.py +0 -70
  229. exonware/xwnode/queries/strategies/linq.py +0 -238
  230. exonware/xwnode/queries/strategies/logql.py +0 -70
  231. exonware/xwnode/queries/strategies/mql.py +0 -68
  232. exonware/xwnode/queries/strategies/n1ql.py +0 -210
  233. exonware/xwnode/queries/strategies/partiql.py +0 -70
  234. exonware/xwnode/queries/strategies/pig.py +0 -215
  235. exonware/xwnode/queries/strategies/promql.py +0 -70
  236. exonware/xwnode/queries/strategies/sparql.py +0 -220
  237. exonware/xwnode/queries/strategies/sql.py +0 -275
  238. exonware/xwnode/queries/strategies/xml_query.py +0 -66
  239. exonware/xwnode/queries/strategies/xpath.py +0 -223
  240. exonware/xwnode/queries/strategies/xquery.py +0 -258
  241. exonware/xwnode/queries/strategies/xwnode_executor.py +0 -332
  242. exonware/xwnode/queries/strategies/xwquery.py +0 -456
  243. exonware_xwnode-0.0.1.21.dist-info/RECORD +0 -214
  244. /exonware/xwnode/nodes/strategies/{node_ordered_map.py → ordered_map.py} +0 -0
  245. /exonware/xwnode/nodes/strategies/{node_ordered_map_balanced.py → ordered_map_balanced.py} +0 -0
  246. /exonware/xwnode/nodes/strategies/{node_patricia.py → patricia.py} +0 -0
  247. /exonware/xwnode/nodes/strategies/{node_radix_trie.py → radix_trie.py} +0 -0
  248. /exonware/xwnode/nodes/strategies/{node_set_tree.py → set_tree.py} +0 -0
  249. {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/WHEEL +0 -0
  250. {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/licenses/LICENSE +0 -0
@@ -1,525 +0,0 @@
1
- """
2
- Aho-Corasick Node Strategy Implementation
3
-
4
- This module implements the AHO_CORASICK strategy for efficient multi-pattern
5
- string matching using the Aho-Corasick automaton algorithm.
6
- """
7
-
8
- from typing import Any, Iterator, List, Dict, Set, Optional, Tuple
9
- from collections import deque, defaultdict
10
- from .base import ANodeTreeStrategy
11
- from .contracts import NodeType
12
- from ...defs import NodeMode, NodeTrait
13
-
14
-
15
- class ACNode:
16
- """Node in the Aho-Corasick trie."""
17
-
18
- def __init__(self):
19
- self.children: Dict[str, 'ACNode'] = {}
20
- self.failure: Optional['ACNode'] = None
21
- self.output: Set[str] = set() # Patterns that end at this node
22
- self.pattern_indices: Set[int] = set() # Indices of patterns
23
- self.depth = 0
24
-
25
- def is_leaf(self) -> bool:
26
- """Check if this is a leaf node."""
27
- return len(self.children) == 0
28
-
29
-
30
- class AhoCorasickStrategy(ANodeTreeStrategy):
31
- """
32
- Aho-Corasick node strategy for multi-pattern string matching.
33
-
34
- Efficiently searches for multiple patterns simultaneously in a text
35
- using a finite automaton with failure links f
36
-
37
- # Strategy type classification
38
- STRATEGY_TYPE = NodeType.TREE
39
- or linear-time matching.
40
- """
41
-
42
- def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
43
- """Initialize the Aho-Corasick strategy."""
44
- super().__init__(NodeMode.AHO_CORASICK, traits, **options)
45
-
46
- self.case_sensitive = options.get('case_sensitive', True)
47
- self.enable_overlapping = options.get('enable_overlapping', True)
48
- self.max_pattern_length = options.get('max_pattern_length', 1000)
49
-
50
- # Core automaton
51
- self._root = ACNode()
52
- self._patterns: List[str] = []
53
- self._pattern_to_index: Dict[str, int] = {}
54
- self._automaton_built = False
55
-
56
- # Key-value mapping for compatibility
57
- self._values: Dict[str, Any] = {}
58
- self._size = 0
59
-
60
- # Statistics
61
- self._total_nodes = 1 # Root node
62
- self._max_depth = 0
63
- self._search_cache: Dict[str, List[Tuple[str, int]]] = {}
64
-
65
- def get_supported_traits(self) -> NodeTrait:
66
- """Get the traits supported by the Aho-Corasick strategy."""
67
- return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.STREAMING)
68
-
69
- def _preprocess_pattern(self, pattern: str) -> str:
70
- """Preprocess pattern based on settings."""
71
- if not self.case_sensitive:
72
- pattern = pattern.lower()
73
- return pattern
74
-
75
- def _preprocess_text(self, text: str) -> str:
76
- """Preprocess text based on settings."""
77
- if not self.case_sensitive:
78
- text = text.lower()
79
- return text
80
-
81
- def _add_pattern_to_trie(self, pattern: str, pattern_index: int) -> None:
82
- """Add pattern to the trie structure."""
83
- current = self._root
84
- depth = 0
85
-
86
- for char in pattern:
87
- if char not in current.children:
88
- current.children[char] = ACNode()
89
- current.children[char].depth = depth + 1
90
- self._total_nodes += 1
91
-
92
- current = current.children[char]
93
- depth += 1
94
-
95
- # Mark end of pattern
96
- current.output.add(pattern)
97
- current.pattern_indices.add(pattern_index)
98
- self._max_depth = max(self._max_depth, depth)
99
-
100
- def _build_failure_links(self) -> None:
101
- """Build failure links using BFS."""
102
- queue = deque()
103
-
104
- # Initialize failure links for root's children
105
- for child in self._root.children.values():
106
- child.failure = self._root
107
- queue.append(child)
108
-
109
- # Build failure links for all other nodes
110
- while queue:
111
- current = queue.popleft()
112
-
113
- for char, child in current.children.items():
114
- queue.append(child)
115
-
116
- # Find the failure link
117
- failure_node = current.failure
118
-
119
- while failure_node is not None and char not in failure_node.children:
120
- failure_node = failure_node.failure
121
-
122
- if failure_node is not None:
123
- child.failure = failure_node.children[char]
124
- else:
125
- child.failure = self._root
126
-
127
- # Add output from failure node
128
- if child.failure:
129
- child.output.update(child.failure.output)
130
- child.pattern_indices.update(child.failure.pattern_indices)
131
-
132
- def _build_automaton(self) -> None:
133
- """Build the complete Aho-Corasick automaton."""
134
- if self._automaton_built:
135
- return
136
-
137
- # Build failure links
138
- self._build_failure_links()
139
- self._automaton_built = True
140
- self._search_cache.clear()
141
-
142
- def _rebuild_automaton(self) -> None:
143
- """Rebuild the automaton from scratch."""
144
- # Reset automaton
145
- self._root = ACNode()
146
- self._total_nodes = 1
147
- self._max_depth = 0
148
- self._automaton_built = False
149
- self._search_cache.clear()
150
-
151
- # Rebuild trie
152
- for i, pattern in enumerate(self._patterns):
153
- self._add_pattern_to_trie(pattern, i)
154
-
155
- # Build failure links
156
- self._build_automaton()
157
-
158
- # ============================================================================
159
- # CORE OPERATIONS (Key-based interface for compatibility)
160
- # ============================================================================
161
-
162
- def put(self, key: Any, value: Any = None) -> None:
163
- """Add pattern to automaton."""
164
- pattern = str(key)
165
- processed_pattern = self._preprocess_pattern(pattern)
166
-
167
- if len(processed_pattern) > self.max_pattern_length:
168
- raise ValueError(f"Pattern length {len(processed_pattern)} exceeds maximum {self.max_pattern_length}")
169
-
170
- if processed_pattern not in self._pattern_to_index:
171
- # Add new pattern
172
- pattern_index = len(self._patterns)
173
- self._patterns.append(processed_pattern)
174
- self._pattern_to_index[processed_pattern] = pattern_index
175
-
176
- # Add to trie
177
- self._add_pattern_to_trie(processed_pattern, pattern_index)
178
- self._automaton_built = False
179
- self._size += 1
180
-
181
- # Store value
182
- self._values[pattern] = value if value is not None else pattern
183
-
184
- def get(self, key: Any, default: Any = None) -> Any:
185
- """Get value by key."""
186
- key_str = str(key)
187
-
188
- if key_str == "patterns":
189
- return self._patterns.copy()
190
- elif key_str == "automaton_info":
191
- return {
192
- 'total_nodes': self._total_nodes,
193
- 'max_depth': self._max_depth,
194
- 'automaton_built': self._automaton_built,
195
- 'pattern_count': len(self._patterns)
196
- }
197
- elif key_str in self._values:
198
- return self._values[key_str]
199
-
200
- return default
201
-
202
- def has(self, key: Any) -> bool:
203
- """Check if key exists."""
204
- key_str = str(key)
205
- pattern = self._preprocess_pattern(key_str)
206
- return pattern in self._pattern_to_index or key_str in self._values
207
-
208
- def remove(self, key: Any) -> bool:
209
- """Remove pattern (requires automaton rebuild)."""
210
- pattern = str(key)
211
- processed_pattern = self._preprocess_pattern(pattern)
212
-
213
- if processed_pattern in self._pattern_to_index:
214
- # Remove pattern
215
- index = self._pattern_to_index[processed_pattern]
216
- del self._pattern_to_index[processed_pattern]
217
- self._patterns.pop(index)
218
-
219
- # Update indices
220
- for i, p in enumerate(self._patterns):
221
- self._pattern_to_index[p] = i
222
-
223
- # Remove value
224
- self._values.pop(pattern, None)
225
- self._size -= 1
226
-
227
- # Rebuild automaton
228
- self._rebuild_automaton()
229
- return True
230
-
231
- return False
232
-
233
- def delete(self, key: Any) -> bool:
234
- """Remove pattern (alias for remove)."""
235
- return self.remove(key)
236
-
237
- def clear(self) -> None:
238
- """Clear all patterns."""
239
- self._root = ACNode()
240
- self._patterns.clear()
241
- self._pattern_to_index.clear()
242
- self._values.clear()
243
- self._search_cache.clear()
244
-
245
- self._total_nodes = 1
246
- self._max_depth = 0
247
- self._automaton_built = False
248
- self._size = 0
249
-
250
- def keys(self) -> Iterator[str]:
251
- """Get all pattern keys."""
252
- for pattern in self._patterns:
253
- yield pattern
254
- yield "patterns"
255
- yield "automaton_info"
256
-
257
- def values(self) -> Iterator[Any]:
258
- """Get all values."""
259
- for value in self._values.values():
260
- yield value
261
- yield self._patterns.copy()
262
- yield self.get("automaton_info")
263
-
264
- def items(self) -> Iterator[tuple[str, Any]]:
265
- """Get all key-value pairs."""
266
- for key, value in self._values.items():
267
- yield (key, value)
268
- yield ("patterns", self._patterns.copy())
269
- yield ("automaton_info", self.get("automaton_info"))
270
-
271
- def __len__(self) -> int:
272
- """Get number of patterns."""
273
- return self._size
274
-
275
- def to_native(self) -> Dict[str, Any]:
276
- """Convert to native Python dict."""
277
- result = dict(self._values)
278
- result["patterns"] = self._patterns.copy()
279
- result["automaton_info"] = self.get("automaton_info")
280
- return result
281
-
282
- @property
283
- def is_list(self) -> bool:
284
- """This can behave like a list for pattern access."""
285
- return True
286
-
287
- @property
288
- def is_dict(self) -> bool:
289
- """This behaves like a dict."""
290
- return True
291
-
292
- # ============================================================================
293
- # AHO-CORASICK SPECIFIC OPERATIONS
294
- # ============================================================================
295
-
296
- def add_pattern(self, pattern: str, metadata: Any = None) -> None:
297
- """Add pattern with optional metadata."""
298
- self.put(pattern, metadata)
299
-
300
- def search_text(self, text: str) -> List[Tuple[str, int, Any]]:
301
- """Search for all pattern matches in text."""
302
- if not text or not self._patterns:
303
- return []
304
-
305
- # Check cache
306
- cache_key = text[:100] # Cache based on first 100 chars
307
- if cache_key in self._search_cache and len(text) <= 100:
308
- return self._search_cache[cache_key]
309
-
310
- processed_text = self._preprocess_text(text)
311
- self._build_automaton()
312
-
313
- matches = []
314
- current = self._root
315
-
316
- for i, char in enumerate(processed_text):
317
- # Follow failure links until we find a valid transition
318
- while current is not None and char not in current.children:
319
- current = current.failure
320
-
321
- if current is None:
322
- current = self._root
323
- continue
324
-
325
- current = current.children[char]
326
-
327
- # Report all patterns that end at this position
328
- for pattern in current.output:
329
- start_pos = i - len(pattern) + 1
330
- metadata = self._values.get(pattern, None)
331
- matches.append((pattern, start_pos, metadata))
332
-
333
- # Cache small results
334
- if len(text) <= 100:
335
- self._search_cache[cache_key] = matches
336
-
337
- return matches
338
-
339
- def find_all_matches(self, text: str) -> Dict[str, List[int]]:
340
- """Find all positions where each pattern matches."""
341
- matches = self.search_text(text)
342
- result = defaultdict(list)
343
-
344
- for pattern, position, _ in matches:
345
- result[pattern].append(position)
346
-
347
- # Convert to regular dict
348
- return dict(result)
349
-
350
- def count_matches(self, text: str) -> Dict[str, int]:
351
- """Count occurrences of each pattern."""
352
- matches = self.find_all_matches(text)
353
- return {pattern: len(positions) for pattern, positions in matches.items()}
354
-
355
- def has_any_match(self, text: str) -> bool:
356
- """Check if text contains any of the patterns."""
357
- if not text or not self._patterns:
358
- return False
359
-
360
- processed_text = self._preprocess_text(text)
361
- self._build_automaton()
362
-
363
- current = self._root
364
-
365
- for char in processed_text:
366
- while current is not None and char not in current.children:
367
- current = current.failure
368
-
369
- if current is None:
370
- current = self._root
371
- continue
372
-
373
- current = current.children[char]
374
-
375
- if current.output:
376
- return True
377
-
378
- return False
379
-
380
- def find_longest_match(self, text: str) -> Optional[Tuple[str, int, int]]:
381
- """Find the longest pattern match in text."""
382
- matches = self.search_text(text)
383
-
384
- if not matches:
385
- return None
386
-
387
- longest = max(matches, key=lambda x: len(x[0]))
388
- pattern, start_pos, _ = longest
389
- return pattern, start_pos, len(pattern)
390
-
391
- def replace_patterns(self, text: str, replacement_func: callable = None) -> str:
392
- """Replace all pattern matches in text."""
393
- if not replacement_func:
394
- replacement_func = lambda pattern, metadata: f"[{pattern}]"
395
-
396
- matches = self.search_text(text)
397
-
398
- if not matches:
399
- return text
400
-
401
- # Sort matches by position (descending) to avoid index shifts
402
- matches.sort(key=lambda x: x[1], reverse=True)
403
-
404
- result = text
405
- for pattern, start_pos, metadata in matches:
406
- end_pos = start_pos + len(pattern)
407
- replacement = replacement_func(pattern, metadata)
408
- result = result[:start_pos] + replacement + result[end_pos:]
409
-
410
- return result
411
-
412
- def get_pattern_statistics(self) -> Dict[str, Any]:
413
- """Get statistics about patterns and automaton."""
414
- if not self._patterns:
415
- return {'pattern_count': 0, 'total_nodes': 1, 'avg_pattern_length': 0}
416
-
417
- pattern_lengths = [len(p) for p in self._patterns]
418
- unique_chars = set()
419
- for pattern in self._patterns:
420
- unique_chars.update(pattern)
421
-
422
- return {
423
- 'pattern_count': len(self._patterns),
424
- 'total_nodes': self._total_nodes,
425
- 'max_depth': self._max_depth,
426
- 'avg_pattern_length': sum(pattern_lengths) / len(pattern_lengths),
427
- 'min_pattern_length': min(pattern_lengths),
428
- 'max_pattern_length': max(pattern_lengths),
429
- 'unique_characters': len(unique_chars),
430
- 'alphabet_size': len(unique_chars),
431
- 'automaton_built': self._automaton_built,
432
- 'cache_size': len(self._search_cache)
433
- }
434
-
435
- def validate_automaton(self) -> bool:
436
- """Validate the automaton structure."""
437
- self._build_automaton()
438
-
439
- def _validate_node(node: ACNode, visited: Set[ACNode]) -> bool:
440
- if node in visited:
441
- return True
442
-
443
- visited.add(node)
444
-
445
- # Check failure link
446
- if node != self._root and node.failure is None:
447
- return False
448
-
449
- # Check children
450
- for child in node.children.values():
451
- if not _validate_node(child, visited):
452
- return False
453
-
454
- return True
455
-
456
- return _validate_node(self._root, set())
457
-
458
- def export_automaton(self) -> Dict[str, Any]:
459
- """Export automaton structure for analysis."""
460
- self._build_automaton()
461
-
462
- def _export_node(node: ACNode, node_id: int) -> Dict[str, Any]:
463
- return {
464
- 'id': node_id,
465
- 'depth': node.depth,
466
- 'children': list(node.children.keys()),
467
- 'output': list(node.output),
468
- 'has_failure': node.failure is not None
469
- }
470
-
471
- nodes = []
472
- node_queue = deque([(self._root, 0)])
473
- node_id = 0
474
-
475
- while node_queue:
476
- node, current_id = node_queue.popleft()
477
- nodes.append(_export_node(node, current_id))
478
-
479
- for child in node.children.values():
480
- node_id += 1
481
- node_queue.append((child, node_id))
482
-
483
- return {
484
- 'nodes': nodes,
485
- 'patterns': self._patterns.copy(),
486
- 'statistics': self.get_pattern_statistics()
487
- }
488
-
489
- # ============================================================================
490
- # PERFORMANCE CHARACTERISTICS
491
- # ============================================================================
492
-
493
- @property
494
- def backend_info(self) -> Dict[str, Any]:
495
- """Get backend implementation info."""
496
- return {
497
- 'strategy': 'AHO_CORASICK',
498
- 'backend': 'Finite automaton with failure links',
499
- 'case_sensitive': self.case_sensitive,
500
- 'enable_overlapping': self.enable_overlapping,
501
- 'max_pattern_length': self.max_pattern_length,
502
- 'complexity': {
503
- 'construction': 'O(Σ|patterns|)', # Σ = alphabet size
504
- 'search': 'O(|text| + |matches|)',
505
- 'space': 'O(Σ|patterns|)',
506
- 'pattern_addition': 'O(|pattern|)',
507
- 'pattern_removal': 'O(Σ|patterns|)' # Requires rebuild
508
- }
509
- }
510
-
511
- @property
512
- def metrics(self) -> Dict[str, Any]:
513
- """Get performance metrics."""
514
- stats = self.get_pattern_statistics()
515
-
516
- return {
517
- 'patterns': stats['pattern_count'],
518
- 'nodes': stats['total_nodes'],
519
- 'max_depth': stats['max_depth'],
520
- 'avg_pattern_length': f"{stats['avg_pattern_length']:.1f}",
521
- 'alphabet_size': stats['alphabet_size'],
522
- 'automaton_built': stats['automaton_built'],
523
- 'cache_entries': stats['cache_size'],
524
- 'memory_usage': f"{stats['total_nodes'] * 100 + len(self._patterns) * 50} bytes (estimated)"
525
- }
@@ -1,179 +0,0 @@
1
- """
2
- Array List Node Strategy Implementation
3
-
4
- This module implements the ARRAY_LIST strategy for sequential data
5
- with fast indexed access.
6
- """
7
-
8
- from typing import Any, Iterator, List, Union, Dict
9
- from ._base_node import aNodeStrategy
10
- from ...defs import NodeMode, NodeTrait
11
-
12
-
13
- class xArrayListStrategy(aNodeStrategy):
14
- """
15
- Array List node strategy for sequential data with O(1) indexed access.
16
-
17
- Uses Python's built-in list for optimal performance with indexed operations.
18
- """
19
-
20
- # Strategy type classification
21
- STRATEGY_TYPE = NodeType.LINEAR
22
-
23
-
24
- def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
25
- """Initialize the array list strategy."""
26
- super().__init__(NodeMode.ARRAY_LIST, traits, **options)
27
- self._data: List[Any] = []
28
- self._size = 0
29
-
30
- def get_supported_traits(self) -> NodeTrait:
31
- """Get the traits supported by the array list strategy."""
32
- return (NodeTrait.ORDERED | NodeTrait.INDEXED)
33
-
34
- # ============================================================================
35
- # CORE OPERATIONS (Key-based interface for compatibility)
36
- # ============================================================================
37
-
38
- def put(self, key: Any, value: Any = None) -> None:
39
- """Store a value at index (key must be numeric)."""
40
- try:
41
- index = int(key)
42
- except (ValueError, TypeError):
43
- raise TypeError(f"Array list requires numeric indices, got {type(key).__name__}")
44
-
45
- # Extend list if necessary
46
- while len(self._data) <= index:
47
- self._data.append(None)
48
-
49
- if self._data[index] is None:
50
- self._size += 1
51
- self._data[index] = value
52
-
53
- def get(self, key: Any, default: Any = None) -> Any:
54
- """Retrieve a value by index."""
55
- try:
56
- index = int(key)
57
- if 0 <= index < len(self._data):
58
- value = self._data[index]
59
- return value if value is not None else default
60
- return default
61
- except (ValueError, TypeError):
62
- return default
63
-
64
- def has(self, key: Any) -> bool:
65
- """Check if index exists and has a value."""
66
- try:
67
- index = int(key)
68
- return 0 <= index < len(self._data) and self._data[index] is not None
69
- except (ValueError, TypeError):
70
- return False
71
-
72
- def remove(self, key: Any) -> bool:
73
- """Remove value at index."""
74
- try:
75
- index = int(key)
76
- if 0 <= index < len(self._data) and self._data[index] is not None:
77
- self._data[index] = None
78
- self._size -= 1
79
- return True
80
- return False
81
- except (ValueError, TypeError):
82
- return False
83
-
84
- def delete(self, key: Any) -> bool:
85
- """Remove value at index (alias for remove)."""
86
- return self.remove(key)
87
-
88
- def clear(self) -> None:
89
- """Clear all data."""
90
- self._data.clear()
91
- self._size = 0
92
-
93
- def keys(self) -> Iterator[str]:
94
- """Get all valid indices as strings."""
95
- return (str(i) for i, value in enumerate(self._data) if value is not None)
96
-
97
- def values(self) -> Iterator[Any]:
98
- """Get all values."""
99
- return (value for value in self._data if value is not None)
100
-
101
- def items(self) -> Iterator[tuple[str, Any]]:
102
- """Get all index-value pairs."""
103
- return ((str(i), value) for i, value in enumerate(self._data) if value is not None)
104
-
105
- def __len__(self) -> int:
106
- """Get the number of non-None items."""
107
- return self._size
108
-
109
- def to_native(self) -> List[Any]:
110
- """Convert to native Python list."""
111
- # Return only non-None values in order
112
- return [value for value in self._data if value is not None]
113
-
114
- @property
115
- def is_list(self) -> bool:
116
- """This is always a list strategy."""
117
- return True
118
-
119
- @property
120
- def is_dict(self) -> bool:
121
- """This is never a dict strategy."""
122
- return False
123
-
124
- # ============================================================================
125
- # ARRAY-SPECIFIC OPERATIONS
126
- # ============================================================================
127
-
128
- def append(self, value: Any) -> None:
129
- """Append a value to the end."""
130
- self._data.append(value)
131
- self._size += 1
132
-
133
- def insert(self, index: int, value: Any) -> None:
134
- """Insert a value at the specified index."""
135
- self._data.insert(index, value)
136
- self._size += 1
137
-
138
- def pop(self, index: int = -1) -> Any:
139
- """Remove and return value at index."""
140
- if not self._data:
141
- raise IndexError("pop from empty list")
142
- value = self._data.pop(index)
143
- if value is not None:
144
- self._size -= 1
145
- return value
146
-
147
- def extend(self, values: List[Any]) -> None:
148
- """Extend with multiple values."""
149
- self._data.extend(values)
150
- self._size += len(values)
151
-
152
- # ============================================================================
153
- # PERFORMANCE CHARACTERISTICS
154
- # ============================================================================
155
-
156
- @property
157
- def backend_info(self) -> Dict[str, Any]:
158
- """Get backend implementation info."""
159
- return {
160
- 'strategy': 'ARRAY_LIST',
161
- 'backend': 'Python list',
162
- 'complexity': {
163
- 'get': 'O(1)',
164
- 'put': 'O(1) amortized',
165
- 'append': 'O(1) amortized',
166
- 'insert': 'O(n)',
167
- 'pop': 'O(1) end, O(n) middle'
168
- }
169
- }
170
-
171
- @property
172
- def metrics(self) -> Dict[str, Any]:
173
- """Get performance metrics."""
174
- return {
175
- 'size': self._size,
176
- 'capacity': len(self._data),
177
- 'memory_usage': f"{len(self._data) * 8} bytes (estimated)",
178
- 'utilization': f"{(self._size / max(1, len(self._data))) * 100:.1f}%"
179
- }