exonware-xwnode 0.0.1.21__py3-none-any.whl → 0.0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +8 -1
- exonware/xwnode/__init__.py +18 -5
- exonware/xwnode/add_strategy_types.py +165 -0
- exonware/xwnode/base.py +7 -5
- exonware/xwnode/common/__init__.py +1 -1
- exonware/xwnode/common/graph/__init__.py +30 -0
- exonware/xwnode/common/graph/caching.py +131 -0
- exonware/xwnode/common/graph/contracts.py +100 -0
- exonware/xwnode/common/graph/errors.py +44 -0
- exonware/xwnode/common/graph/indexing.py +260 -0
- exonware/xwnode/common/graph/manager.py +568 -0
- exonware/xwnode/common/management/__init__.py +3 -5
- exonware/xwnode/common/management/manager.py +9 -9
- exonware/xwnode/common/management/migration.py +6 -6
- exonware/xwnode/common/monitoring/__init__.py +3 -5
- exonware/xwnode/common/monitoring/metrics.py +7 -3
- exonware/xwnode/common/monitoring/pattern_detector.py +2 -2
- exonware/xwnode/common/monitoring/performance_monitor.py +6 -2
- exonware/xwnode/common/patterns/__init__.py +3 -5
- exonware/xwnode/common/patterns/advisor.py +1 -1
- exonware/xwnode/common/patterns/flyweight.py +6 -2
- exonware/xwnode/common/patterns/registry.py +203 -184
- exonware/xwnode/common/utils/__init__.py +25 -11
- exonware/xwnode/common/utils/simple.py +1 -1
- exonware/xwnode/config.py +3 -8
- exonware/xwnode/contracts.py +4 -105
- exonware/xwnode/defs.py +413 -159
- exonware/xwnode/edges/strategies/__init__.py +86 -4
- exonware/xwnode/edges/strategies/_base_edge.py +2 -2
- exonware/xwnode/edges/strategies/adj_list.py +287 -121
- exonware/xwnode/edges/strategies/adj_matrix.py +316 -222
- exonware/xwnode/edges/strategies/base.py +1 -1
- exonware/xwnode/edges/strategies/{edge_bidir_wrapper.py → bidir_wrapper.py} +45 -4
- exonware/xwnode/edges/strategies/bitemporal.py +520 -0
- exonware/xwnode/edges/strategies/{edge_block_adj_matrix.py → block_adj_matrix.py} +77 -6
- exonware/xwnode/edges/strategies/bv_graph.py +664 -0
- exonware/xwnode/edges/strategies/compressed_graph.py +217 -0
- exonware/xwnode/edges/strategies/{edge_coo.py → coo.py} +46 -4
- exonware/xwnode/edges/strategies/{edge_csc.py → csc.py} +45 -4
- exonware/xwnode/edges/strategies/{edge_csr.py → csr.py} +94 -12
- exonware/xwnode/edges/strategies/{edge_dynamic_adj_list.py → dynamic_adj_list.py} +46 -4
- exonware/xwnode/edges/strategies/edge_list.py +168 -0
- exonware/xwnode/edges/strategies/edge_property_store.py +2 -2
- exonware/xwnode/edges/strategies/euler_tour.py +560 -0
- exonware/xwnode/edges/strategies/{edge_flow_network.py → flow_network.py} +2 -2
- exonware/xwnode/edges/strategies/graphblas.py +449 -0
- exonware/xwnode/edges/strategies/hnsw.py +637 -0
- exonware/xwnode/edges/strategies/hop2_labels.py +467 -0
- exonware/xwnode/edges/strategies/{edge_hyperedge_set.py → hyperedge_set.py} +2 -2
- exonware/xwnode/edges/strategies/incidence_matrix.py +250 -0
- exonware/xwnode/edges/strategies/k2_tree.py +613 -0
- exonware/xwnode/edges/strategies/link_cut.py +626 -0
- exonware/xwnode/edges/strategies/multiplex.py +532 -0
- exonware/xwnode/edges/strategies/{edge_neural_graph.py → neural_graph.py} +2 -2
- exonware/xwnode/edges/strategies/{edge_octree.py → octree.py} +69 -11
- exonware/xwnode/edges/strategies/{edge_quadtree.py → quadtree.py} +66 -10
- exonware/xwnode/edges/strategies/roaring_adj.py +438 -0
- exonware/xwnode/edges/strategies/{edge_rtree.py → rtree.py} +43 -5
- exonware/xwnode/edges/strategies/{edge_temporal_edgeset.py → temporal_edgeset.py} +24 -5
- exonware/xwnode/edges/strategies/{edge_tree_graph_basic.py → tree_graph_basic.py} +78 -7
- exonware/xwnode/edges/strategies/{edge_weighted_graph.py → weighted_graph.py} +188 -10
- exonware/xwnode/errors.py +3 -6
- exonware/xwnode/facade.py +20 -20
- exonware/xwnode/nodes/strategies/__init__.py +29 -9
- exonware/xwnode/nodes/strategies/adjacency_list.py +650 -177
- exonware/xwnode/nodes/strategies/aho_corasick.py +358 -183
- exonware/xwnode/nodes/strategies/array_list.py +36 -3
- exonware/xwnode/nodes/strategies/art.py +581 -0
- exonware/xwnode/nodes/strategies/{node_avl_tree.py → avl_tree.py} +77 -6
- exonware/xwnode/nodes/strategies/{node_b_plus_tree.py → b_plus_tree.py} +81 -40
- exonware/xwnode/nodes/strategies/{node_btree.py → b_tree.py} +79 -9
- exonware/xwnode/nodes/strategies/base.py +469 -98
- exonware/xwnode/nodes/strategies/{node_bitmap.py → bitmap.py} +12 -12
- exonware/xwnode/nodes/strategies/{node_bitset_dynamic.py → bitset_dynamic.py} +11 -11
- exonware/xwnode/nodes/strategies/{node_bloom_filter.py → bloom_filter.py} +15 -2
- exonware/xwnode/nodes/strategies/bloomier_filter.py +519 -0
- exonware/xwnode/nodes/strategies/bw_tree.py +531 -0
- exonware/xwnode/nodes/strategies/contracts.py +1 -1
- exonware/xwnode/nodes/strategies/{node_count_min_sketch.py → count_min_sketch.py} +3 -2
- exonware/xwnode/nodes/strategies/{node_cow_tree.py → cow_tree.py} +135 -13
- exonware/xwnode/nodes/strategies/crdt_map.py +629 -0
- exonware/xwnode/nodes/strategies/{node_cuckoo_hash.py → cuckoo_hash.py} +2 -2
- exonware/xwnode/nodes/strategies/{node_xdata_optimized.py → data_interchange_optimized.py} +21 -4
- exonware/xwnode/nodes/strategies/dawg.py +876 -0
- exonware/xwnode/nodes/strategies/deque.py +321 -153
- exonware/xwnode/nodes/strategies/extendible_hash.py +93 -0
- exonware/xwnode/nodes/strategies/{node_fenwick_tree.py → fenwick_tree.py} +111 -19
- exonware/xwnode/nodes/strategies/hamt.py +403 -0
- exonware/xwnode/nodes/strategies/hash_map.py +354 -67
- exonware/xwnode/nodes/strategies/heap.py +105 -5
- exonware/xwnode/nodes/strategies/hopscotch_hash.py +525 -0
- exonware/xwnode/nodes/strategies/{node_hyperloglog.py → hyperloglog.py} +6 -5
- exonware/xwnode/nodes/strategies/interval_tree.py +742 -0
- exonware/xwnode/nodes/strategies/kd_tree.py +703 -0
- exonware/xwnode/nodes/strategies/learned_index.py +533 -0
- exonware/xwnode/nodes/strategies/linear_hash.py +93 -0
- exonware/xwnode/nodes/strategies/linked_list.py +316 -119
- exonware/xwnode/nodes/strategies/{node_lsm_tree.py → lsm_tree.py} +219 -15
- exonware/xwnode/nodes/strategies/masstree.py +130 -0
- exonware/xwnode/nodes/strategies/{node_persistent_tree.py → persistent_tree.py} +149 -9
- exonware/xwnode/nodes/strategies/priority_queue.py +544 -132
- exonware/xwnode/nodes/strategies/queue.py +249 -120
- exonware/xwnode/nodes/strategies/{node_red_black_tree.py → red_black_tree.py} +183 -72
- exonware/xwnode/nodes/strategies/{node_roaring_bitmap.py → roaring_bitmap.py} +19 -6
- exonware/xwnode/nodes/strategies/rope.py +717 -0
- exonware/xwnode/nodes/strategies/{node_segment_tree.py → segment_tree.py} +106 -106
- exonware/xwnode/nodes/strategies/{node_set_hash.py → set_hash.py} +30 -29
- exonware/xwnode/nodes/strategies/{node_skip_list.py → skip_list.py} +74 -6
- exonware/xwnode/nodes/strategies/sparse_matrix.py +427 -131
- exonware/xwnode/nodes/strategies/{node_splay_tree.py → splay_tree.py} +55 -6
- exonware/xwnode/nodes/strategies/stack.py +244 -112
- exonware/xwnode/nodes/strategies/{node_suffix_array.py → suffix_array.py} +5 -1
- exonware/xwnode/nodes/strategies/t_tree.py +94 -0
- exonware/xwnode/nodes/strategies/{node_treap.py → treap.py} +75 -6
- exonware/xwnode/nodes/strategies/{node_tree_graph_hybrid.py → tree_graph_hybrid.py} +46 -5
- exonware/xwnode/nodes/strategies/trie.py +153 -9
- exonware/xwnode/nodes/strategies/union_find.py +111 -5
- exonware/xwnode/nodes/strategies/veb_tree.py +856 -0
- exonware/xwnode/strategies/__init__.py +5 -51
- exonware/xwnode/version.py +3 -3
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/METADATA +23 -3
- exonware_xwnode-0.0.1.23.dist-info/RECORD +130 -0
- exonware/xwnode/edges/strategies/edge_adj_list.py +0 -353
- exonware/xwnode/edges/strategies/edge_adj_matrix.py +0 -445
- exonware/xwnode/nodes/strategies/_base_node.py +0 -307
- exonware/xwnode/nodes/strategies/node_aho_corasick.py +0 -525
- exonware/xwnode/nodes/strategies/node_array_list.py +0 -179
- exonware/xwnode/nodes/strategies/node_hash_map.py +0 -273
- exonware/xwnode/nodes/strategies/node_heap.py +0 -196
- exonware/xwnode/nodes/strategies/node_linked_list.py +0 -413
- exonware/xwnode/nodes/strategies/node_trie.py +0 -257
- exonware/xwnode/nodes/strategies/node_union_find.py +0 -192
- exonware/xwnode/queries/executors/__init__.py +0 -47
- exonware/xwnode/queries/executors/advanced/__init__.py +0 -37
- exonware/xwnode/queries/executors/advanced/aggregate_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/ask_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/construct_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/describe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/for_loop_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/foreach_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/join_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/let_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/mutation_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/options_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/pipe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/subscribe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/subscription_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/union_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/window_executor.py +0 -51
- exonware/xwnode/queries/executors/advanced/with_cte_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/__init__.py +0 -21
- exonware/xwnode/queries/executors/aggregation/avg_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/count_executor.py +0 -38
- exonware/xwnode/queries/executors/aggregation/distinct_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/group_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/having_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/max_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/min_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/sum_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/summarize_executor.py +0 -50
- exonware/xwnode/queries/executors/array/__init__.py +0 -9
- exonware/xwnode/queries/executors/array/indexing_executor.py +0 -51
- exonware/xwnode/queries/executors/array/slicing_executor.py +0 -51
- exonware/xwnode/queries/executors/base.py +0 -257
- exonware/xwnode/queries/executors/capability_checker.py +0 -204
- exonware/xwnode/queries/executors/contracts.py +0 -166
- exonware/xwnode/queries/executors/core/__init__.py +0 -17
- exonware/xwnode/queries/executors/core/create_executor.py +0 -96
- exonware/xwnode/queries/executors/core/delete_executor.py +0 -99
- exonware/xwnode/queries/executors/core/drop_executor.py +0 -100
- exonware/xwnode/queries/executors/core/insert_executor.py +0 -39
- exonware/xwnode/queries/executors/core/select_executor.py +0 -152
- exonware/xwnode/queries/executors/core/update_executor.py +0 -102
- exonware/xwnode/queries/executors/data/__init__.py +0 -13
- exonware/xwnode/queries/executors/data/alter_executor.py +0 -50
- exonware/xwnode/queries/executors/data/load_executor.py +0 -50
- exonware/xwnode/queries/executors/data/merge_executor.py +0 -50
- exonware/xwnode/queries/executors/data/store_executor.py +0 -50
- exonware/xwnode/queries/executors/defs.py +0 -93
- exonware/xwnode/queries/executors/engine.py +0 -221
- exonware/xwnode/queries/executors/errors.py +0 -68
- exonware/xwnode/queries/executors/filtering/__init__.py +0 -25
- exonware/xwnode/queries/executors/filtering/between_executor.py +0 -80
- exonware/xwnode/queries/executors/filtering/filter_executor.py +0 -79
- exonware/xwnode/queries/executors/filtering/has_executor.py +0 -70
- exonware/xwnode/queries/executors/filtering/in_executor.py +0 -70
- exonware/xwnode/queries/executors/filtering/like_executor.py +0 -76
- exonware/xwnode/queries/executors/filtering/optional_executor.py +0 -76
- exonware/xwnode/queries/executors/filtering/range_executor.py +0 -80
- exonware/xwnode/queries/executors/filtering/term_executor.py +0 -77
- exonware/xwnode/queries/executors/filtering/values_executor.py +0 -71
- exonware/xwnode/queries/executors/filtering/where_executor.py +0 -44
- exonware/xwnode/queries/executors/graph/__init__.py +0 -15
- exonware/xwnode/queries/executors/graph/in_traverse_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/match_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/out_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/path_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/return_executor.py +0 -51
- exonware/xwnode/queries/executors/ordering/__init__.py +0 -9
- exonware/xwnode/queries/executors/ordering/by_executor.py +0 -50
- exonware/xwnode/queries/executors/ordering/order_executor.py +0 -51
- exonware/xwnode/queries/executors/projection/__init__.py +0 -9
- exonware/xwnode/queries/executors/projection/extend_executor.py +0 -50
- exonware/xwnode/queries/executors/projection/project_executor.py +0 -50
- exonware/xwnode/queries/executors/registry.py +0 -173
- exonware/xwnode/queries/parsers/__init__.py +0 -26
- exonware/xwnode/queries/parsers/base.py +0 -86
- exonware/xwnode/queries/parsers/contracts.py +0 -46
- exonware/xwnode/queries/parsers/errors.py +0 -53
- exonware/xwnode/queries/parsers/sql_param_extractor.py +0 -318
- exonware/xwnode/queries/strategies/__init__.py +0 -24
- exonware/xwnode/queries/strategies/base.py +0 -236
- exonware/xwnode/queries/strategies/cql.py +0 -201
- exonware/xwnode/queries/strategies/cypher.py +0 -181
- exonware/xwnode/queries/strategies/datalog.py +0 -70
- exonware/xwnode/queries/strategies/elastic_dsl.py +0 -70
- exonware/xwnode/queries/strategies/eql.py +0 -70
- exonware/xwnode/queries/strategies/flux.py +0 -70
- exonware/xwnode/queries/strategies/gql.py +0 -70
- exonware/xwnode/queries/strategies/graphql.py +0 -240
- exonware/xwnode/queries/strategies/gremlin.py +0 -181
- exonware/xwnode/queries/strategies/hiveql.py +0 -214
- exonware/xwnode/queries/strategies/hql.py +0 -70
- exonware/xwnode/queries/strategies/jmespath.py +0 -219
- exonware/xwnode/queries/strategies/jq.py +0 -66
- exonware/xwnode/queries/strategies/json_query.py +0 -66
- exonware/xwnode/queries/strategies/jsoniq.py +0 -248
- exonware/xwnode/queries/strategies/kql.py +0 -70
- exonware/xwnode/queries/strategies/linq.py +0 -238
- exonware/xwnode/queries/strategies/logql.py +0 -70
- exonware/xwnode/queries/strategies/mql.py +0 -68
- exonware/xwnode/queries/strategies/n1ql.py +0 -210
- exonware/xwnode/queries/strategies/partiql.py +0 -70
- exonware/xwnode/queries/strategies/pig.py +0 -215
- exonware/xwnode/queries/strategies/promql.py +0 -70
- exonware/xwnode/queries/strategies/sparql.py +0 -220
- exonware/xwnode/queries/strategies/sql.py +0 -275
- exonware/xwnode/queries/strategies/xml_query.py +0 -66
- exonware/xwnode/queries/strategies/xpath.py +0 -223
- exonware/xwnode/queries/strategies/xquery.py +0 -258
- exonware/xwnode/queries/strategies/xwnode_executor.py +0 -332
- exonware/xwnode/queries/strategies/xwquery.py +0 -456
- exonware_xwnode-0.0.1.21.dist-info/RECORD +0 -214
- /exonware/xwnode/nodes/strategies/{node_ordered_map.py → ordered_map.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_ordered_map_balanced.py → ordered_map_balanced.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_patricia.py → patricia.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_radix_trie.py → radix_trie.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_set_tree.py → set_tree.py} +0 -0
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/WHEEL +0 -0
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/licenses/LICENSE +0 -0
@@ -27,7 +27,7 @@ class ACNode:
|
|
27
27
|
return len(self.children) == 0
|
28
28
|
|
29
29
|
|
30
|
-
class
|
30
|
+
class AhoCorasickStrategy(ANodeTreeStrategy):
|
31
31
|
"""
|
32
32
|
Aho-Corasick node strategy for multi-pattern string matching.
|
33
33
|
|
@@ -41,9 +41,7 @@ or linear-time matching.
|
|
41
41
|
|
42
42
|
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
43
43
|
"""Initialize the Aho-Corasick strategy."""
|
44
|
-
super().__init__(
|
45
|
-
self._mode = NodeMode.AHO_CORASICK
|
46
|
-
self._traits = traits
|
44
|
+
super().__init__(NodeMode.AHO_CORASICK, traits, **options)
|
47
45
|
|
48
46
|
self.case_sensitive = options.get('case_sensitive', True)
|
49
47
|
self.enable_overlapping = options.get('enable_overlapping', True)
|
@@ -55,167 +53,324 @@ or linear-time matching.
|
|
55
53
|
self._pattern_to_index: Dict[str, int] = {}
|
56
54
|
self._automaton_built = False
|
57
55
|
|
58
|
-
#
|
59
|
-
self.
|
60
|
-
self.
|
56
|
+
# Key-value mapping for compatibility
|
57
|
+
self._values: Dict[str, Any] = {}
|
58
|
+
self._size = 0
|
59
|
+
|
60
|
+
# Statistics
|
61
|
+
self._total_nodes = 1 # Root node
|
62
|
+
self._max_depth = 0
|
63
|
+
self._search_cache: Dict[str, List[Tuple[str, int]]] = {}
|
61
64
|
|
62
65
|
def get_supported_traits(self) -> NodeTrait:
|
63
66
|
"""Get the traits supported by the Aho-Corasick strategy."""
|
64
|
-
return (NodeTrait.ORDERED | NodeTrait.
|
67
|
+
return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.STREAMING)
|
68
|
+
|
69
|
+
def _preprocess_pattern(self, pattern: str) -> str:
|
70
|
+
"""Preprocess pattern based on settings."""
|
71
|
+
if not self.case_sensitive:
|
72
|
+
pattern = pattern.lower()
|
73
|
+
return pattern
|
74
|
+
|
75
|
+
def _preprocess_text(self, text: str) -> str:
|
76
|
+
"""Preprocess text based on settings."""
|
77
|
+
if not self.case_sensitive:
|
78
|
+
text = text.lower()
|
79
|
+
return text
|
80
|
+
|
81
|
+
def _add_pattern_to_trie(self, pattern: str, pattern_index: int) -> None:
|
82
|
+
"""Add pattern to the trie structure."""
|
83
|
+
current = self._root
|
84
|
+
depth = 0
|
85
|
+
|
86
|
+
for char in pattern:
|
87
|
+
if char not in current.children:
|
88
|
+
current.children[char] = ACNode()
|
89
|
+
current.children[char].depth = depth + 1
|
90
|
+
self._total_nodes += 1
|
91
|
+
|
92
|
+
current = current.children[char]
|
93
|
+
depth += 1
|
94
|
+
|
95
|
+
# Mark end of pattern
|
96
|
+
current.output.add(pattern)
|
97
|
+
current.pattern_indices.add(pattern_index)
|
98
|
+
self._max_depth = max(self._max_depth, depth)
|
99
|
+
|
100
|
+
def _build_failure_links(self) -> None:
|
101
|
+
"""Build failure links using BFS."""
|
102
|
+
queue = deque()
|
103
|
+
|
104
|
+
# Initialize failure links for root's children
|
105
|
+
for child in self._root.children.values():
|
106
|
+
child.failure = self._root
|
107
|
+
queue.append(child)
|
108
|
+
|
109
|
+
# Build failure links for all other nodes
|
110
|
+
while queue:
|
111
|
+
current = queue.popleft()
|
112
|
+
|
113
|
+
for char, child in current.children.items():
|
114
|
+
queue.append(child)
|
115
|
+
|
116
|
+
# Find the failure link
|
117
|
+
failure_node = current.failure
|
118
|
+
|
119
|
+
while failure_node is not None and char not in failure_node.children:
|
120
|
+
failure_node = failure_node.failure
|
121
|
+
|
122
|
+
if failure_node is not None:
|
123
|
+
child.failure = failure_node.children[char]
|
124
|
+
else:
|
125
|
+
child.failure = self._root
|
126
|
+
|
127
|
+
# Add output from failure node
|
128
|
+
if child.failure:
|
129
|
+
child.output.update(child.failure.output)
|
130
|
+
child.pattern_indices.update(child.failure.pattern_indices)
|
131
|
+
|
132
|
+
def _build_automaton(self) -> None:
|
133
|
+
"""Build the complete Aho-Corasick automaton."""
|
134
|
+
if self._automaton_built:
|
135
|
+
return
|
136
|
+
|
137
|
+
# Build failure links
|
138
|
+
self._build_failure_links()
|
139
|
+
self._automaton_built = True
|
140
|
+
self._search_cache.clear()
|
141
|
+
|
142
|
+
def _rebuild_automaton(self) -> None:
|
143
|
+
"""Rebuild the automaton from scratch."""
|
144
|
+
# Reset automaton
|
145
|
+
self._root = ACNode()
|
146
|
+
self._total_nodes = 1
|
147
|
+
self._max_depth = 0
|
148
|
+
self._automaton_built = False
|
149
|
+
self._search_cache.clear()
|
150
|
+
|
151
|
+
# Rebuild trie
|
152
|
+
for i, pattern in enumerate(self._patterns):
|
153
|
+
self._add_pattern_to_trie(pattern, i)
|
154
|
+
|
155
|
+
# Build failure links
|
156
|
+
self._build_automaton()
|
65
157
|
|
66
158
|
# ============================================================================
|
67
|
-
# CORE OPERATIONS
|
159
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
68
160
|
# ============================================================================
|
69
161
|
|
70
|
-
def
|
71
|
-
"""
|
162
|
+
def put(self, key: Any, value: Any = None) -> None:
|
163
|
+
"""Add pattern to automaton."""
|
72
164
|
pattern = str(key)
|
73
|
-
|
74
|
-
pattern = pattern.lower()
|
165
|
+
processed_pattern = self._preprocess_pattern(pattern)
|
75
166
|
|
76
|
-
if len(
|
77
|
-
raise ValueError(f"Pattern
|
167
|
+
if len(processed_pattern) > self.max_pattern_length:
|
168
|
+
raise ValueError(f"Pattern length {len(processed_pattern)} exceeds maximum {self.max_pattern_length}")
|
78
169
|
|
79
|
-
if
|
80
|
-
|
81
|
-
|
170
|
+
if processed_pattern not in self._pattern_to_index:
|
171
|
+
# Add new pattern
|
172
|
+
pattern_index = len(self._patterns)
|
173
|
+
self._patterns.append(processed_pattern)
|
174
|
+
self._pattern_to_index[processed_pattern] = pattern_index
|
175
|
+
|
176
|
+
# Add to trie
|
177
|
+
self._add_pattern_to_trie(processed_pattern, pattern_index)
|
82
178
|
self._automaton_built = False
|
83
|
-
self.
|
179
|
+
self._size += 1
|
180
|
+
|
181
|
+
# Store value
|
182
|
+
self._values[pattern] = value if value is not None else pattern
|
84
183
|
|
85
|
-
def
|
86
|
-
"""
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
184
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
185
|
+
"""Get value by key."""
|
186
|
+
key_str = str(key)
|
187
|
+
|
188
|
+
if key_str == "patterns":
|
189
|
+
return self._patterns.copy()
|
190
|
+
elif key_str == "automaton_info":
|
191
|
+
return {
|
192
|
+
'total_nodes': self._total_nodes,
|
193
|
+
'max_depth': self._max_depth,
|
194
|
+
'automaton_built': self._automaton_built,
|
195
|
+
'pattern_count': len(self._patterns)
|
196
|
+
}
|
197
|
+
elif key_str in self._values:
|
198
|
+
return self._values[key_str]
|
199
|
+
|
200
|
+
return default
|
91
201
|
|
92
|
-
def
|
93
|
-
"""
|
202
|
+
def has(self, key: Any) -> bool:
|
203
|
+
"""Check if key exists."""
|
204
|
+
key_str = str(key)
|
205
|
+
pattern = self._preprocess_pattern(key_str)
|
206
|
+
return pattern in self._pattern_to_index or key_str in self._values
|
207
|
+
|
208
|
+
def remove(self, key: Any) -> bool:
|
209
|
+
"""Remove pattern (requires automaton rebuild)."""
|
94
210
|
pattern = str(key)
|
95
|
-
|
96
|
-
pattern = pattern.lower()
|
211
|
+
processed_pattern = self._preprocess_pattern(pattern)
|
97
212
|
|
98
|
-
if
|
99
|
-
|
100
|
-
|
101
|
-
del self._pattern_to_index[
|
102
|
-
self.
|
103
|
-
|
213
|
+
if processed_pattern in self._pattern_to_index:
|
214
|
+
# Remove pattern
|
215
|
+
index = self._pattern_to_index[processed_pattern]
|
216
|
+
del self._pattern_to_index[processed_pattern]
|
217
|
+
self._patterns.pop(index)
|
218
|
+
|
219
|
+
# Update indices
|
220
|
+
for i, p in enumerate(self._patterns):
|
221
|
+
self._pattern_to_index[p] = i
|
222
|
+
|
223
|
+
# Remove value
|
224
|
+
self._values.pop(pattern, None)
|
225
|
+
self._size -= 1
|
226
|
+
|
227
|
+
# Rebuild automaton
|
228
|
+
self._rebuild_automaton()
|
104
229
|
return True
|
230
|
+
|
105
231
|
return False
|
106
232
|
|
107
|
-
def
|
108
|
-
"""
|
109
|
-
return self.
|
110
|
-
|
111
|
-
def is_empty(self) -> bool:
|
112
|
-
"""Check if the structure is empty."""
|
113
|
-
return self._size_tracker == 0
|
114
|
-
|
115
|
-
def to_native(self) -> Dict[str, Any]:
|
116
|
-
"""Convert to native Python dictionary."""
|
117
|
-
return {pattern: index for pattern, index in self._pattern_to_index.items()}
|
233
|
+
def delete(self, key: Any) -> bool:
|
234
|
+
"""Remove pattern (alias for remove)."""
|
235
|
+
return self.remove(key)
|
118
236
|
|
119
|
-
|
120
|
-
|
121
|
-
|
237
|
+
def clear(self) -> None:
|
238
|
+
"""Clear all patterns."""
|
239
|
+
self._root = ACNode()
|
240
|
+
self._patterns.clear()
|
241
|
+
self._pattern_to_index.clear()
|
242
|
+
self._values.clear()
|
243
|
+
self._search_cache.clear()
|
244
|
+
|
245
|
+
self._total_nodes = 1
|
246
|
+
self._max_depth = 0
|
247
|
+
self._automaton_built = False
|
248
|
+
self._size = 0
|
122
249
|
|
123
|
-
def
|
124
|
-
"""
|
125
|
-
|
250
|
+
def keys(self) -> Iterator[str]:
|
251
|
+
"""Get all pattern keys."""
|
252
|
+
for pattern in self._patterns:
|
253
|
+
yield pattern
|
254
|
+
yield "patterns"
|
255
|
+
yield "automaton_info"
|
126
256
|
|
127
|
-
def
|
128
|
-
"""Get
|
129
|
-
|
257
|
+
def values(self) -> Iterator[Any]:
|
258
|
+
"""Get all values."""
|
259
|
+
for value in self._values.values():
|
260
|
+
yield value
|
261
|
+
yield self._patterns.copy()
|
262
|
+
yield self.get("automaton_info")
|
130
263
|
|
131
|
-
def
|
132
|
-
"""Get
|
133
|
-
|
264
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
265
|
+
"""Get all key-value pairs."""
|
266
|
+
for key, value in self._values.items():
|
267
|
+
yield (key, value)
|
268
|
+
yield ("patterns", self._patterns.copy())
|
269
|
+
yield ("automaton_info", self.get("automaton_info"))
|
134
270
|
|
135
|
-
|
136
|
-
|
137
|
-
|
271
|
+
def __len__(self) -> int:
|
272
|
+
"""Get number of patterns."""
|
273
|
+
return self._size
|
138
274
|
|
139
|
-
def
|
140
|
-
"""
|
141
|
-
|
275
|
+
def to_native(self) -> Dict[str, Any]:
|
276
|
+
"""Convert to native Python dict."""
|
277
|
+
result = dict(self._values)
|
278
|
+
result["patterns"] = self._patterns.copy()
|
279
|
+
result["automaton_info"] = self.get("automaton_info")
|
280
|
+
return result
|
142
281
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return
|
282
|
+
@property
|
283
|
+
def is_list(self) -> bool:
|
284
|
+
"""This can behave like a list for pattern access."""
|
285
|
+
return True
|
147
286
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
return
|
287
|
+
@property
|
288
|
+
def is_dict(self) -> bool:
|
289
|
+
"""This behaves like a dict."""
|
290
|
+
return True
|
152
291
|
|
153
292
|
# ============================================================================
|
154
293
|
# AHO-CORASICK SPECIFIC OPERATIONS
|
155
294
|
# ============================================================================
|
156
295
|
|
157
296
|
def add_pattern(self, pattern: str, metadata: Any = None) -> None:
|
158
|
-
"""Add
|
159
|
-
self.
|
297
|
+
"""Add pattern with optional metadata."""
|
298
|
+
self.put(pattern, metadata)
|
160
299
|
|
161
300
|
def search_text(self, text: str) -> List[Tuple[str, int, Any]]:
|
162
|
-
"""Search for all
|
163
|
-
if not self.
|
164
|
-
|
301
|
+
"""Search for all pattern matches in text."""
|
302
|
+
if not text or not self._patterns:
|
303
|
+
return []
|
304
|
+
|
305
|
+
# Check cache
|
306
|
+
cache_key = text[:100] # Cache based on first 100 chars
|
307
|
+
if cache_key in self._search_cache and len(text) <= 100:
|
308
|
+
return self._search_cache[cache_key]
|
165
309
|
|
166
|
-
|
310
|
+
processed_text = self._preprocess_text(text)
|
311
|
+
self._build_automaton()
|
312
|
+
|
313
|
+
matches = []
|
167
314
|
current = self._root
|
168
315
|
|
169
|
-
for i, char in enumerate(
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
# Follow failure links if needed
|
174
|
-
while current != self._root and char not in current.children:
|
316
|
+
for i, char in enumerate(processed_text):
|
317
|
+
# Follow failure links until we find a valid transition
|
318
|
+
while current is not None and char not in current.children:
|
175
319
|
current = current.failure
|
176
320
|
|
177
|
-
|
178
|
-
|
179
|
-
|
321
|
+
if current is None:
|
322
|
+
current = self._root
|
323
|
+
continue
|
180
324
|
|
181
|
-
|
325
|
+
current = current.children[char]
|
326
|
+
|
327
|
+
# Report all patterns that end at this position
|
182
328
|
for pattern in current.output:
|
183
|
-
|
184
|
-
|
329
|
+
start_pos = i - len(pattern) + 1
|
330
|
+
metadata = self._values.get(pattern, None)
|
331
|
+
matches.append((pattern, start_pos, metadata))
|
332
|
+
|
333
|
+
# Cache small results
|
334
|
+
if len(text) <= 100:
|
335
|
+
self._search_cache[cache_key] = matches
|
185
336
|
|
186
|
-
return
|
337
|
+
return matches
|
187
338
|
|
188
339
|
def find_all_matches(self, text: str) -> Dict[str, List[int]]:
|
189
|
-
"""Find all
|
340
|
+
"""Find all positions where each pattern matches."""
|
190
341
|
matches = self.search_text(text)
|
191
342
|
result = defaultdict(list)
|
192
343
|
|
193
344
|
for pattern, position, _ in matches:
|
194
345
|
result[pattern].append(position)
|
195
346
|
|
347
|
+
# Convert to regular dict
|
196
348
|
return dict(result)
|
197
349
|
|
198
350
|
def count_matches(self, text: str) -> Dict[str, int]:
|
199
|
-
"""Count
|
200
|
-
|
201
|
-
return {pattern: len(positions) for pattern, positions in
|
351
|
+
"""Count occurrences of each pattern."""
|
352
|
+
matches = self.find_all_matches(text)
|
353
|
+
return {pattern: len(positions) for pattern, positions in matches.items()}
|
202
354
|
|
203
355
|
def has_any_match(self, text: str) -> bool:
|
204
|
-
"""Check if
|
205
|
-
if not self.
|
206
|
-
|
356
|
+
"""Check if text contains any of the patterns."""
|
357
|
+
if not text or not self._patterns:
|
358
|
+
return False
|
359
|
+
|
360
|
+
processed_text = self._preprocess_text(text)
|
361
|
+
self._build_automaton()
|
207
362
|
|
208
363
|
current = self._root
|
209
364
|
|
210
|
-
for char in
|
211
|
-
|
212
|
-
char = char.lower()
|
213
|
-
|
214
|
-
while current != self._root and char not in current.children:
|
365
|
+
for char in processed_text:
|
366
|
+
while current is not None and char not in current.children:
|
215
367
|
current = current.failure
|
216
368
|
|
217
|
-
if
|
218
|
-
current =
|
369
|
+
if current is None:
|
370
|
+
current = self._root
|
371
|
+
continue
|
372
|
+
|
373
|
+
current = current.children[char]
|
219
374
|
|
220
375
|
if current.output:
|
221
376
|
return True
|
@@ -223,104 +378,113 @@ or linear-time matching.
|
|
223
378
|
return False
|
224
379
|
|
225
380
|
def find_longest_match(self, text: str) -> Optional[Tuple[str, int, int]]:
|
226
|
-
"""Find the longest
|
381
|
+
"""Find the longest pattern match in text."""
|
227
382
|
matches = self.search_text(text)
|
383
|
+
|
228
384
|
if not matches:
|
229
385
|
return None
|
230
386
|
|
231
|
-
# Find the longest match
|
232
387
|
longest = max(matches, key=lambda x: len(x[0]))
|
233
|
-
|
388
|
+
pattern, start_pos, _ = longest
|
389
|
+
return pattern, start_pos, len(pattern)
|
234
390
|
|
235
391
|
def replace_patterns(self, text: str, replacement_func: callable = None) -> str:
|
236
392
|
"""Replace all pattern matches in text."""
|
393
|
+
if not replacement_func:
|
394
|
+
replacement_func = lambda pattern, metadata: f"[{pattern}]"
|
395
|
+
|
237
396
|
matches = self.search_text(text)
|
397
|
+
|
238
398
|
if not matches:
|
239
399
|
return text
|
240
400
|
|
241
|
-
# Sort matches by position (descending) to
|
401
|
+
# Sort matches by position (descending) to avoid index shifts
|
242
402
|
matches.sort(key=lambda x: x[1], reverse=True)
|
243
403
|
|
244
404
|
result = text
|
245
|
-
for pattern,
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
replacement = f"[{pattern}]"
|
250
|
-
|
251
|
-
result = result[:position] + replacement + result[position + len(pattern):]
|
405
|
+
for pattern, start_pos, metadata in matches:
|
406
|
+
end_pos = start_pos + len(pattern)
|
407
|
+
replacement = replacement_func(pattern, metadata)
|
408
|
+
result = result[:start_pos] + replacement + result[end_pos:]
|
252
409
|
|
253
410
|
return result
|
254
411
|
|
255
|
-
def
|
256
|
-
"""
|
257
|
-
|
258
|
-
|
259
|
-
self._add_pattern_to_trie(pattern)
|
412
|
+
def get_pattern_statistics(self) -> Dict[str, Any]:
|
413
|
+
"""Get statistics about patterns and automaton."""
|
414
|
+
if not self._patterns:
|
415
|
+
return {'pattern_count': 0, 'total_nodes': 1, 'avg_pattern_length': 0}
|
260
416
|
|
261
|
-
|
262
|
-
|
417
|
+
pattern_lengths = [len(p) for p in self._patterns]
|
418
|
+
unique_chars = set()
|
419
|
+
for pattern in self._patterns:
|
420
|
+
unique_chars.update(pattern)
|
263
421
|
|
264
|
-
|
422
|
+
return {
|
423
|
+
'pattern_count': len(self._patterns),
|
424
|
+
'total_nodes': self._total_nodes,
|
425
|
+
'max_depth': self._max_depth,
|
426
|
+
'avg_pattern_length': sum(pattern_lengths) / len(pattern_lengths),
|
427
|
+
'min_pattern_length': min(pattern_lengths),
|
428
|
+
'max_pattern_length': max(pattern_lengths),
|
429
|
+
'unique_characters': len(unique_chars),
|
430
|
+
'alphabet_size': len(unique_chars),
|
431
|
+
'automaton_built': self._automaton_built,
|
432
|
+
'cache_size': len(self._search_cache)
|
433
|
+
}
|
265
434
|
|
266
|
-
def
|
267
|
-
"""
|
268
|
-
|
435
|
+
def validate_automaton(self) -> bool:
|
436
|
+
"""Validate the automaton structure."""
|
437
|
+
self._build_automaton()
|
269
438
|
|
270
|
-
|
271
|
-
if
|
272
|
-
|
273
|
-
current.children[char].depth = current.depth + 1
|
439
|
+
def _validate_node(node: ACNode, visited: Set[ACNode]) -> bool:
|
440
|
+
if node in visited:
|
441
|
+
return True
|
274
442
|
|
275
|
-
|
443
|
+
visited.add(node)
|
444
|
+
|
445
|
+
# Check failure link
|
446
|
+
if node != self._root and node.failure is None:
|
447
|
+
return False
|
448
|
+
|
449
|
+
# Check children
|
450
|
+
for child in node.children.values():
|
451
|
+
if not _validate_node(child, visited):
|
452
|
+
return False
|
453
|
+
|
454
|
+
return True
|
276
455
|
|
277
|
-
|
278
|
-
current.pattern_indices.add(pattern_index)
|
456
|
+
return _validate_node(self._root, set())
|
279
457
|
|
280
|
-
def
|
281
|
-
"""
|
282
|
-
|
458
|
+
def export_automaton(self) -> Dict[str, Any]:
|
459
|
+
"""Export automaton structure for analysis."""
|
460
|
+
self._build_automaton()
|
283
461
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
462
|
+
def _export_node(node: ACNode, node_id: int) -> Dict[str, Any]:
|
463
|
+
return {
|
464
|
+
'id': node_id,
|
465
|
+
'depth': node.depth,
|
466
|
+
'children': list(node.children.keys()),
|
467
|
+
'output': list(node.output),
|
468
|
+
'has_failure': node.failure is not None
|
469
|
+
}
|
288
470
|
|
289
|
-
|
290
|
-
|
291
|
-
|
471
|
+
nodes = []
|
472
|
+
node_queue = deque([(self._root, 0)])
|
473
|
+
node_id = 0
|
474
|
+
|
475
|
+
while node_queue:
|
476
|
+
node, current_id = node_queue.popleft()
|
477
|
+
nodes.append(_export_node(node, current_id))
|
292
478
|
|
293
|
-
for
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
child.failure = failure.children[char]
|
303
|
-
else:
|
304
|
-
child.failure = self._root
|
305
|
-
|
306
|
-
# Merge output sets
|
307
|
-
child.output.update(child.failure.output)
|
308
|
-
|
309
|
-
# ============================================================================
|
310
|
-
# ITERATION
|
311
|
-
# ============================================================================
|
312
|
-
|
313
|
-
def keys(self) -> Iterator[str]:
|
314
|
-
"""Get all patterns."""
|
315
|
-
return iter(self._patterns)
|
316
|
-
|
317
|
-
def values(self) -> Iterator[Any]:
|
318
|
-
"""Get all pattern indices."""
|
319
|
-
return iter(range(len(self._patterns)))
|
320
|
-
|
321
|
-
def items(self) -> Iterator[tuple[str, Any]]:
|
322
|
-
"""Get all pattern-index pairs."""
|
323
|
-
return ((pattern, index) for pattern, index in self._pattern_to_index.items())
|
479
|
+
for child in node.children.values():
|
480
|
+
node_id += 1
|
481
|
+
node_queue.append((child, node_id))
|
482
|
+
|
483
|
+
return {
|
484
|
+
'nodes': nodes,
|
485
|
+
'patterns': self._patterns.copy(),
|
486
|
+
'statistics': self.get_pattern_statistics()
|
487
|
+
}
|
324
488
|
|
325
489
|
# ============================================================================
|
326
490
|
# PERFORMANCE CHARACTERISTICS
|
@@ -331,20 +495,31 @@ or linear-time matching.
|
|
331
495
|
"""Get backend implementation info."""
|
332
496
|
return {
|
333
497
|
'strategy': 'AHO_CORASICK',
|
334
|
-
'backend': '
|
498
|
+
'backend': 'Finite automaton with failure links',
|
499
|
+
'case_sensitive': self.case_sensitive,
|
500
|
+
'enable_overlapping': self.enable_overlapping,
|
501
|
+
'max_pattern_length': self.max_pattern_length,
|
335
502
|
'complexity': {
|
336
|
-
'
|
337
|
-
'search': 'O(text
|
338
|
-
'space': 'O(
|
503
|
+
'construction': 'O(Σ|patterns|)', # Σ = alphabet size
|
504
|
+
'search': 'O(|text| + |matches|)',
|
505
|
+
'space': 'O(Σ|patterns|)',
|
506
|
+
'pattern_addition': 'O(|pattern|)',
|
507
|
+
'pattern_removal': 'O(Σ|patterns|)' # Requires rebuild
|
339
508
|
}
|
340
509
|
}
|
341
510
|
|
342
511
|
@property
|
343
512
|
def metrics(self) -> Dict[str, Any]:
|
344
513
|
"""Get performance metrics."""
|
514
|
+
stats = self.get_pattern_statistics()
|
515
|
+
|
345
516
|
return {
|
346
|
-
'patterns':
|
347
|
-
'
|
348
|
-
'
|
349
|
-
'
|
517
|
+
'patterns': stats['pattern_count'],
|
518
|
+
'nodes': stats['total_nodes'],
|
519
|
+
'max_depth': stats['max_depth'],
|
520
|
+
'avg_pattern_length': f"{stats['avg_pattern_length']:.1f}",
|
521
|
+
'alphabet_size': stats['alphabet_size'],
|
522
|
+
'automaton_built': stats['automaton_built'],
|
523
|
+
'cache_entries': stats['cache_size'],
|
524
|
+
'memory_usage': f"{stats['total_nodes'] * 100 + len(self._patterns) * 50} bytes (estimated)"
|
350
525
|
}
|