exonware-xwnode 0.0.1.22__py3-none-any.whl → 0.0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. exonware/__init__.py +1 -1
  2. exonware/xwnode/__init__.py +18 -5
  3. exonware/xwnode/add_strategy_types.py +165 -0
  4. exonware/xwnode/common/__init__.py +1 -1
  5. exonware/xwnode/common/graph/__init__.py +30 -0
  6. exonware/xwnode/common/graph/caching.py +131 -0
  7. exonware/xwnode/common/graph/contracts.py +100 -0
  8. exonware/xwnode/common/graph/errors.py +44 -0
  9. exonware/xwnode/common/graph/indexing.py +260 -0
  10. exonware/xwnode/common/graph/manager.py +568 -0
  11. exonware/xwnode/common/management/__init__.py +3 -5
  12. exonware/xwnode/common/management/manager.py +2 -2
  13. exonware/xwnode/common/management/migration.py +3 -3
  14. exonware/xwnode/common/monitoring/__init__.py +3 -5
  15. exonware/xwnode/common/monitoring/metrics.py +6 -2
  16. exonware/xwnode/common/monitoring/pattern_detector.py +1 -1
  17. exonware/xwnode/common/monitoring/performance_monitor.py +5 -1
  18. exonware/xwnode/common/patterns/__init__.py +3 -5
  19. exonware/xwnode/common/patterns/flyweight.py +5 -1
  20. exonware/xwnode/common/patterns/registry.py +202 -183
  21. exonware/xwnode/common/utils/__init__.py +25 -11
  22. exonware/xwnode/common/utils/simple.py +1 -1
  23. exonware/xwnode/config.py +3 -8
  24. exonware/xwnode/contracts.py +4 -105
  25. exonware/xwnode/defs.py +413 -159
  26. exonware/xwnode/edges/strategies/__init__.py +86 -4
  27. exonware/xwnode/edges/strategies/_base_edge.py +2 -2
  28. exonware/xwnode/edges/strategies/adj_list.py +287 -121
  29. exonware/xwnode/edges/strategies/adj_matrix.py +316 -222
  30. exonware/xwnode/edges/strategies/base.py +1 -1
  31. exonware/xwnode/edges/strategies/{edge_bidir_wrapper.py → bidir_wrapper.py} +45 -4
  32. exonware/xwnode/edges/strategies/bitemporal.py +520 -0
  33. exonware/xwnode/edges/strategies/{edge_block_adj_matrix.py → block_adj_matrix.py} +77 -6
  34. exonware/xwnode/edges/strategies/bv_graph.py +664 -0
  35. exonware/xwnode/edges/strategies/compressed_graph.py +217 -0
  36. exonware/xwnode/edges/strategies/{edge_coo.py → coo.py} +46 -4
  37. exonware/xwnode/edges/strategies/{edge_csc.py → csc.py} +45 -4
  38. exonware/xwnode/edges/strategies/{edge_csr.py → csr.py} +94 -12
  39. exonware/xwnode/edges/strategies/{edge_dynamic_adj_list.py → dynamic_adj_list.py} +46 -4
  40. exonware/xwnode/edges/strategies/edge_list.py +168 -0
  41. exonware/xwnode/edges/strategies/edge_property_store.py +2 -2
  42. exonware/xwnode/edges/strategies/euler_tour.py +560 -0
  43. exonware/xwnode/edges/strategies/{edge_flow_network.py → flow_network.py} +2 -2
  44. exonware/xwnode/edges/strategies/graphblas.py +449 -0
  45. exonware/xwnode/edges/strategies/hnsw.py +637 -0
  46. exonware/xwnode/edges/strategies/hop2_labels.py +467 -0
  47. exonware/xwnode/edges/strategies/{edge_hyperedge_set.py → hyperedge_set.py} +2 -2
  48. exonware/xwnode/edges/strategies/incidence_matrix.py +250 -0
  49. exonware/xwnode/edges/strategies/k2_tree.py +613 -0
  50. exonware/xwnode/edges/strategies/link_cut.py +626 -0
  51. exonware/xwnode/edges/strategies/multiplex.py +532 -0
  52. exonware/xwnode/edges/strategies/{edge_neural_graph.py → neural_graph.py} +2 -2
  53. exonware/xwnode/edges/strategies/{edge_octree.py → octree.py} +69 -11
  54. exonware/xwnode/edges/strategies/{edge_quadtree.py → quadtree.py} +66 -10
  55. exonware/xwnode/edges/strategies/roaring_adj.py +438 -0
  56. exonware/xwnode/edges/strategies/{edge_rtree.py → rtree.py} +43 -5
  57. exonware/xwnode/edges/strategies/{edge_temporal_edgeset.py → temporal_edgeset.py} +24 -5
  58. exonware/xwnode/edges/strategies/{edge_tree_graph_basic.py → tree_graph_basic.py} +78 -7
  59. exonware/xwnode/edges/strategies/{edge_weighted_graph.py → weighted_graph.py} +188 -10
  60. exonware/xwnode/errors.py +3 -6
  61. exonware/xwnode/facade.py +20 -20
  62. exonware/xwnode/nodes/strategies/__init__.py +29 -9
  63. exonware/xwnode/nodes/strategies/adjacency_list.py +650 -177
  64. exonware/xwnode/nodes/strategies/aho_corasick.py +358 -183
  65. exonware/xwnode/nodes/strategies/array_list.py +36 -3
  66. exonware/xwnode/nodes/strategies/art.py +581 -0
  67. exonware/xwnode/nodes/strategies/{node_avl_tree.py → avl_tree.py} +77 -6
  68. exonware/xwnode/nodes/strategies/{node_b_plus_tree.py → b_plus_tree.py} +81 -40
  69. exonware/xwnode/nodes/strategies/{node_btree.py → b_tree.py} +79 -9
  70. exonware/xwnode/nodes/strategies/base.py +469 -98
  71. exonware/xwnode/nodes/strategies/{node_bitmap.py → bitmap.py} +12 -12
  72. exonware/xwnode/nodes/strategies/{node_bitset_dynamic.py → bitset_dynamic.py} +11 -11
  73. exonware/xwnode/nodes/strategies/{node_bloom_filter.py → bloom_filter.py} +15 -2
  74. exonware/xwnode/nodes/strategies/bloomier_filter.py +519 -0
  75. exonware/xwnode/nodes/strategies/bw_tree.py +531 -0
  76. exonware/xwnode/nodes/strategies/contracts.py +1 -1
  77. exonware/xwnode/nodes/strategies/{node_count_min_sketch.py → count_min_sketch.py} +3 -2
  78. exonware/xwnode/nodes/strategies/{node_cow_tree.py → cow_tree.py} +135 -13
  79. exonware/xwnode/nodes/strategies/crdt_map.py +629 -0
  80. exonware/xwnode/nodes/strategies/{node_cuckoo_hash.py → cuckoo_hash.py} +2 -2
  81. exonware/xwnode/nodes/strategies/{node_xdata_optimized.py → data_interchange_optimized.py} +21 -4
  82. exonware/xwnode/nodes/strategies/dawg.py +876 -0
  83. exonware/xwnode/nodes/strategies/deque.py +321 -153
  84. exonware/xwnode/nodes/strategies/extendible_hash.py +93 -0
  85. exonware/xwnode/nodes/strategies/{node_fenwick_tree.py → fenwick_tree.py} +111 -19
  86. exonware/xwnode/nodes/strategies/hamt.py +403 -0
  87. exonware/xwnode/nodes/strategies/hash_map.py +354 -67
  88. exonware/xwnode/nodes/strategies/heap.py +105 -5
  89. exonware/xwnode/nodes/strategies/hopscotch_hash.py +525 -0
  90. exonware/xwnode/nodes/strategies/{node_hyperloglog.py → hyperloglog.py} +6 -5
  91. exonware/xwnode/nodes/strategies/interval_tree.py +742 -0
  92. exonware/xwnode/nodes/strategies/kd_tree.py +703 -0
  93. exonware/xwnode/nodes/strategies/learned_index.py +533 -0
  94. exonware/xwnode/nodes/strategies/linear_hash.py +93 -0
  95. exonware/xwnode/nodes/strategies/linked_list.py +316 -119
  96. exonware/xwnode/nodes/strategies/{node_lsm_tree.py → lsm_tree.py} +219 -15
  97. exonware/xwnode/nodes/strategies/masstree.py +130 -0
  98. exonware/xwnode/nodes/strategies/{node_persistent_tree.py → persistent_tree.py} +149 -9
  99. exonware/xwnode/nodes/strategies/priority_queue.py +544 -132
  100. exonware/xwnode/nodes/strategies/queue.py +249 -120
  101. exonware/xwnode/nodes/strategies/{node_red_black_tree.py → red_black_tree.py} +183 -72
  102. exonware/xwnode/nodes/strategies/{node_roaring_bitmap.py → roaring_bitmap.py} +19 -6
  103. exonware/xwnode/nodes/strategies/rope.py +717 -0
  104. exonware/xwnode/nodes/strategies/{node_segment_tree.py → segment_tree.py} +106 -106
  105. exonware/xwnode/nodes/strategies/{node_set_hash.py → set_hash.py} +30 -29
  106. exonware/xwnode/nodes/strategies/{node_skip_list.py → skip_list.py} +74 -6
  107. exonware/xwnode/nodes/strategies/sparse_matrix.py +427 -131
  108. exonware/xwnode/nodes/strategies/{node_splay_tree.py → splay_tree.py} +55 -6
  109. exonware/xwnode/nodes/strategies/stack.py +244 -112
  110. exonware/xwnode/nodes/strategies/{node_suffix_array.py → suffix_array.py} +5 -1
  111. exonware/xwnode/nodes/strategies/t_tree.py +94 -0
  112. exonware/xwnode/nodes/strategies/{node_treap.py → treap.py} +75 -6
  113. exonware/xwnode/nodes/strategies/{node_tree_graph_hybrid.py → tree_graph_hybrid.py} +46 -5
  114. exonware/xwnode/nodes/strategies/trie.py +153 -9
  115. exonware/xwnode/nodes/strategies/union_find.py +111 -5
  116. exonware/xwnode/nodes/strategies/veb_tree.py +856 -0
  117. exonware/xwnode/strategies/__init__.py +5 -51
  118. exonware/xwnode/version.py +3 -3
  119. exonware_xwnode-0.0.1.24.dist-info/METADATA +900 -0
  120. exonware_xwnode-0.0.1.24.dist-info/RECORD +130 -0
  121. exonware/xwnode/edges/strategies/edge_adj_list.py +0 -353
  122. exonware/xwnode/edges/strategies/edge_adj_matrix.py +0 -445
  123. exonware/xwnode/nodes/strategies/_base_node.py +0 -307
  124. exonware/xwnode/nodes/strategies/node_aho_corasick.py +0 -525
  125. exonware/xwnode/nodes/strategies/node_array_list.py +0 -179
  126. exonware/xwnode/nodes/strategies/node_hash_map.py +0 -273
  127. exonware/xwnode/nodes/strategies/node_heap.py +0 -196
  128. exonware/xwnode/nodes/strategies/node_linked_list.py +0 -413
  129. exonware/xwnode/nodes/strategies/node_trie.py +0 -257
  130. exonware/xwnode/nodes/strategies/node_union_find.py +0 -192
  131. exonware/xwnode/queries/executors/__init__.py +0 -47
  132. exonware/xwnode/queries/executors/advanced/__init__.py +0 -37
  133. exonware/xwnode/queries/executors/advanced/aggregate_executor.py +0 -50
  134. exonware/xwnode/queries/executors/advanced/ask_executor.py +0 -50
  135. exonware/xwnode/queries/executors/advanced/construct_executor.py +0 -50
  136. exonware/xwnode/queries/executors/advanced/describe_executor.py +0 -50
  137. exonware/xwnode/queries/executors/advanced/for_loop_executor.py +0 -50
  138. exonware/xwnode/queries/executors/advanced/foreach_executor.py +0 -50
  139. exonware/xwnode/queries/executors/advanced/join_executor.py +0 -50
  140. exonware/xwnode/queries/executors/advanced/let_executor.py +0 -50
  141. exonware/xwnode/queries/executors/advanced/mutation_executor.py +0 -50
  142. exonware/xwnode/queries/executors/advanced/options_executor.py +0 -50
  143. exonware/xwnode/queries/executors/advanced/pipe_executor.py +0 -50
  144. exonware/xwnode/queries/executors/advanced/subscribe_executor.py +0 -50
  145. exonware/xwnode/queries/executors/advanced/subscription_executor.py +0 -50
  146. exonware/xwnode/queries/executors/advanced/union_executor.py +0 -50
  147. exonware/xwnode/queries/executors/advanced/window_executor.py +0 -51
  148. exonware/xwnode/queries/executors/advanced/with_cte_executor.py +0 -50
  149. exonware/xwnode/queries/executors/aggregation/__init__.py +0 -21
  150. exonware/xwnode/queries/executors/aggregation/avg_executor.py +0 -50
  151. exonware/xwnode/queries/executors/aggregation/count_executor.py +0 -38
  152. exonware/xwnode/queries/executors/aggregation/distinct_executor.py +0 -50
  153. exonware/xwnode/queries/executors/aggregation/group_executor.py +0 -50
  154. exonware/xwnode/queries/executors/aggregation/having_executor.py +0 -50
  155. exonware/xwnode/queries/executors/aggregation/max_executor.py +0 -50
  156. exonware/xwnode/queries/executors/aggregation/min_executor.py +0 -50
  157. exonware/xwnode/queries/executors/aggregation/sum_executor.py +0 -50
  158. exonware/xwnode/queries/executors/aggregation/summarize_executor.py +0 -50
  159. exonware/xwnode/queries/executors/array/__init__.py +0 -9
  160. exonware/xwnode/queries/executors/array/indexing_executor.py +0 -51
  161. exonware/xwnode/queries/executors/array/slicing_executor.py +0 -51
  162. exonware/xwnode/queries/executors/base.py +0 -257
  163. exonware/xwnode/queries/executors/capability_checker.py +0 -204
  164. exonware/xwnode/queries/executors/contracts.py +0 -166
  165. exonware/xwnode/queries/executors/core/__init__.py +0 -17
  166. exonware/xwnode/queries/executors/core/create_executor.py +0 -96
  167. exonware/xwnode/queries/executors/core/delete_executor.py +0 -99
  168. exonware/xwnode/queries/executors/core/drop_executor.py +0 -100
  169. exonware/xwnode/queries/executors/core/insert_executor.py +0 -39
  170. exonware/xwnode/queries/executors/core/select_executor.py +0 -152
  171. exonware/xwnode/queries/executors/core/update_executor.py +0 -102
  172. exonware/xwnode/queries/executors/data/__init__.py +0 -13
  173. exonware/xwnode/queries/executors/data/alter_executor.py +0 -50
  174. exonware/xwnode/queries/executors/data/load_executor.py +0 -50
  175. exonware/xwnode/queries/executors/data/merge_executor.py +0 -50
  176. exonware/xwnode/queries/executors/data/store_executor.py +0 -50
  177. exonware/xwnode/queries/executors/defs.py +0 -93
  178. exonware/xwnode/queries/executors/engine.py +0 -221
  179. exonware/xwnode/queries/executors/errors.py +0 -68
  180. exonware/xwnode/queries/executors/filtering/__init__.py +0 -25
  181. exonware/xwnode/queries/executors/filtering/between_executor.py +0 -80
  182. exonware/xwnode/queries/executors/filtering/filter_executor.py +0 -79
  183. exonware/xwnode/queries/executors/filtering/has_executor.py +0 -70
  184. exonware/xwnode/queries/executors/filtering/in_executor.py +0 -70
  185. exonware/xwnode/queries/executors/filtering/like_executor.py +0 -76
  186. exonware/xwnode/queries/executors/filtering/optional_executor.py +0 -76
  187. exonware/xwnode/queries/executors/filtering/range_executor.py +0 -80
  188. exonware/xwnode/queries/executors/filtering/term_executor.py +0 -77
  189. exonware/xwnode/queries/executors/filtering/values_executor.py +0 -71
  190. exonware/xwnode/queries/executors/filtering/where_executor.py +0 -44
  191. exonware/xwnode/queries/executors/graph/__init__.py +0 -15
  192. exonware/xwnode/queries/executors/graph/in_traverse_executor.py +0 -51
  193. exonware/xwnode/queries/executors/graph/match_executor.py +0 -51
  194. exonware/xwnode/queries/executors/graph/out_executor.py +0 -51
  195. exonware/xwnode/queries/executors/graph/path_executor.py +0 -51
  196. exonware/xwnode/queries/executors/graph/return_executor.py +0 -51
  197. exonware/xwnode/queries/executors/ordering/__init__.py +0 -9
  198. exonware/xwnode/queries/executors/ordering/by_executor.py +0 -50
  199. exonware/xwnode/queries/executors/ordering/order_executor.py +0 -51
  200. exonware/xwnode/queries/executors/projection/__init__.py +0 -9
  201. exonware/xwnode/queries/executors/projection/extend_executor.py +0 -50
  202. exonware/xwnode/queries/executors/projection/project_executor.py +0 -50
  203. exonware/xwnode/queries/executors/registry.py +0 -173
  204. exonware/xwnode/queries/parsers/__init__.py +0 -26
  205. exonware/xwnode/queries/parsers/base.py +0 -86
  206. exonware/xwnode/queries/parsers/contracts.py +0 -46
  207. exonware/xwnode/queries/parsers/errors.py +0 -53
  208. exonware/xwnode/queries/parsers/sql_param_extractor.py +0 -318
  209. exonware/xwnode/queries/strategies/__init__.py +0 -24
  210. exonware/xwnode/queries/strategies/base.py +0 -236
  211. exonware/xwnode/queries/strategies/cql.py +0 -201
  212. exonware/xwnode/queries/strategies/cypher.py +0 -181
  213. exonware/xwnode/queries/strategies/datalog.py +0 -70
  214. exonware/xwnode/queries/strategies/elastic_dsl.py +0 -70
  215. exonware/xwnode/queries/strategies/eql.py +0 -70
  216. exonware/xwnode/queries/strategies/flux.py +0 -70
  217. exonware/xwnode/queries/strategies/gql.py +0 -70
  218. exonware/xwnode/queries/strategies/graphql.py +0 -240
  219. exonware/xwnode/queries/strategies/gremlin.py +0 -181
  220. exonware/xwnode/queries/strategies/hiveql.py +0 -214
  221. exonware/xwnode/queries/strategies/hql.py +0 -70
  222. exonware/xwnode/queries/strategies/jmespath.py +0 -219
  223. exonware/xwnode/queries/strategies/jq.py +0 -66
  224. exonware/xwnode/queries/strategies/json_query.py +0 -66
  225. exonware/xwnode/queries/strategies/jsoniq.py +0 -248
  226. exonware/xwnode/queries/strategies/kql.py +0 -70
  227. exonware/xwnode/queries/strategies/linq.py +0 -238
  228. exonware/xwnode/queries/strategies/logql.py +0 -70
  229. exonware/xwnode/queries/strategies/mql.py +0 -68
  230. exonware/xwnode/queries/strategies/n1ql.py +0 -210
  231. exonware/xwnode/queries/strategies/partiql.py +0 -70
  232. exonware/xwnode/queries/strategies/pig.py +0 -215
  233. exonware/xwnode/queries/strategies/promql.py +0 -70
  234. exonware/xwnode/queries/strategies/sparql.py +0 -220
  235. exonware/xwnode/queries/strategies/sql.py +0 -275
  236. exonware/xwnode/queries/strategies/xml_query.py +0 -66
  237. exonware/xwnode/queries/strategies/xpath.py +0 -223
  238. exonware/xwnode/queries/strategies/xquery.py +0 -258
  239. exonware/xwnode/queries/strategies/xwnode_executor.py +0 -332
  240. exonware/xwnode/queries/strategies/xwquery.py +0 -456
  241. exonware_xwnode-0.0.1.22.dist-info/METADATA +0 -168
  242. exonware_xwnode-0.0.1.22.dist-info/RECORD +0 -214
  243. /exonware/xwnode/nodes/strategies/{node_ordered_map.py → ordered_map.py} +0 -0
  244. /exonware/xwnode/nodes/strategies/{node_ordered_map_balanced.py → ordered_map_balanced.py} +0 -0
  245. /exonware/xwnode/nodes/strategies/{node_patricia.py → patricia.py} +0 -0
  246. /exonware/xwnode/nodes/strategies/{node_radix_trie.py → radix_trie.py} +0 -0
  247. /exonware/xwnode/nodes/strategies/{node_set_tree.py → set_tree.py} +0 -0
  248. {exonware_xwnode-0.0.1.22.dist-info → exonware_xwnode-0.0.1.24.dist-info}/WHEEL +0 -0
  249. {exonware_xwnode-0.0.1.22.dist-info → exonware_xwnode-0.0.1.24.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,876 @@
1
+ """
2
+ #exonware/xwnode/src/exonware/xwnode/nodes/strategies/dawg.py
3
+
4
+ DAWG (Directed Acyclic Word Graph) Node Strategy Implementation
5
+
6
+ This module implements the DAWG strategy for minimal automaton representation
7
+ of string sets with massive memory savings over standard tries.
8
+
9
+ Company: eXonware.com
10
+ Author: Eng. Muhammad AlShehri
11
+ Email: connect@exonware.com
12
+ Version: 0.0.1.24
13
+ Generation Date: 12-Oct-2025
14
+ """
15
+
16
+ from typing import Any, Iterator, List, Dict, Optional, Set, Tuple
17
+ from collections import defaultdict
18
+ from .base import ANodeTreeStrategy
19
+ from .contracts import NodeType
20
+ from ...defs import NodeMode, NodeTrait
21
+ from ...errors import XWNodeError, XWNodeValueError
22
+
23
+
24
+ class DawgNode:
25
+ """
26
+ Node in the DAWG structure.
27
+
28
+ WHY suffix sharing:
29
+ - Multiple words can share common suffixes
30
+ - Drastically reduces memory compared to trie
31
+ - 10-100x smaller for large dictionaries
32
+ """
33
+
34
+ def __init__(self):
35
+ """Initialize DAWG node."""
36
+ self.edges: Dict[str, 'DawgNode'] = {}
37
+ self.is_final = False
38
+ self.value: Any = None
39
+ self._hash: Optional[int] = None
40
+ self._id = id(self)
41
+
42
+ def __hash__(self) -> int:
43
+ """
44
+ Hash based on structure for suffix sharing.
45
+
46
+ WHY structural hashing:
47
+ - Identifies identical subtrees for merging
48
+ - Enables suffix sharing optimization
49
+ - Critical for DAWG compression
50
+ """
51
+ if self._hash is None:
52
+ # Hash based on edges and final status
53
+ edge_tuple = tuple(sorted(
54
+ (char, id(node)) for char, node in self.edges.items()
55
+ ))
56
+ self._hash = hash((edge_tuple, self.is_final, self.value))
57
+ return self._hash
58
+
59
+ def __eq__(self, other: Any) -> bool:
60
+ """
61
+ Structural equality for suffix sharing.
62
+
63
+ WHY structural equality:
64
+ - Two nodes with same structure can be merged
65
+ - Enables automatic suffix compression
66
+ """
67
+ if not isinstance(other, DawgNode):
68
+ return False
69
+
70
+ if self.is_final != other.is_final:
71
+ return False
72
+
73
+ if self.value != other.value:
74
+ return False
75
+
76
+ if len(self.edges) != len(other.edges):
77
+ return False
78
+
79
+ for char, node in self.edges.items():
80
+ if char not in other.edges:
81
+ return False
82
+ if node != other.edges[char]:
83
+ return False
84
+
85
+ return True
86
+
87
+ def invalidate_hash(self) -> None:
88
+ """Invalidate cached hash after modification."""
89
+ self._hash = None
90
+
91
+
92
+ class DawgStrategy(ANodeTreeStrategy):
93
+ """
94
+ DAWG (Directed Acyclic Word Graph) strategy for minimal string storage.
95
+
96
+ WHY DAWG:
97
+ - 10-100x memory reduction vs standard trie through suffix sharing
98
+ - Perfect for large dictionaries, lexicons, spell checkers
99
+ - Fast prefix queries while using minimal space
100
+ - Deterministic automaton enables efficient string matching
101
+ - Excellent for autocomplete with memory constraints
102
+
103
+ WHY this implementation:
104
+ - Incremental construction allows online updates
105
+ - Structural hashing enables automatic suffix detection
106
+ - Final state markers support both sets and maps
107
+ - Value storage enables key-value DAWG variant
108
+ - Lazy minimization balances construction time and space
109
+
110
+ Time Complexity:
111
+ - Insert: O(k) where k is string length (amortized with minimization)
112
+ - Search: O(k) where k is string length
113
+ - Prefix query: O(k + m) where m is result size
114
+ - Delete: O(k) with lazy minimization
115
+ - Minimization: O(n log n) where n is total nodes
116
+
117
+ Space Complexity: O(c) where c is total unique characters across all suffixes
118
+ (10-100x smaller than trie which is O(alphabet_size × total_chars))
119
+
120
+ Trade-offs:
121
+ - Advantage: Massive space savings (10-100x vs trie)
122
+ - Advantage: Still O(k) lookups like trie
123
+ - Advantage: Perfect for read-heavy dictionary workloads
124
+ - Limitation: Construction more complex than trie
125
+ - Limitation: Minimization step adds overhead
126
+ - Limitation: Best for static or slowly-changing dictionaries
127
+ - Compared to Trie: Much smaller, same lookup speed
128
+ - Compared to HashMap: Supports prefix queries, more memory efficient
129
+
130
+ Best for:
131
+ - Large dictionaries and lexicons (>100k words)
132
+ - Spell checkers and autocomplete systems
133
+ - Natural language processing applications
134
+ - Genomics sequence storage
135
+ - Memory-constrained environments
136
+ - Read-heavy string matching workloads
137
+
138
+ Not recommended for:
139
+ - Small string sets (<1000 words) - overhead not worth it
140
+ - Frequently updated dictionaries - minimization expensive
141
+ - Non-string keys
142
+ - Random access by index (use array instead)
143
+ - When trie memory usage is acceptable
144
+ - Real-time insertion requirements
145
+
146
+ Following eXonware Priorities:
147
+ 1. Security: Validates string inputs, prevents malicious data
148
+ 2. Usability: Simple API for dictionary operations, clear errors
149
+ 3. Maintainability: Clean automaton structure, well-documented
150
+ 4. Performance: O(k) operations with minimal memory
151
+ 5. Extensibility: Easy to add pattern matching, fuzzy search
152
+
153
+ Industry Best Practices:
154
+ - Follows Daciuk et al. incremental construction algorithm
155
+ - Implements structural hashing for suffix detection
156
+ - Supports both DAWG (set) and DAFSA (map) variants
157
+ - Provides lazy minimization for performance
158
+ - Compatible with Aho-Corasick for multi-pattern matching
159
+ """
160
+
161
+ # Tree node type for classification
162
+ STRATEGY_TYPE: NodeType = NodeType.TREE
163
+
164
+ def __init__(self, mode: NodeMode = NodeMode.DAWG,
165
+ traits: NodeTrait = NodeTrait.NONE, **options):
166
+ """
167
+ Initialize DAWG strategy.
168
+
169
+ Args:
170
+ mode: Node mode (DAWG)
171
+ traits: Node traits
172
+ **options: Additional options
173
+ """
174
+ super().__init__(mode, traits, **options)
175
+
176
+ self._root = DawgNode()
177
+ self._size = 0
178
+ self._word_count = 0
179
+
180
+ # For incremental minimization
181
+ self._unchecked_nodes: List[Tuple[DawgNode, str, DawgNode]] = []
182
+ self._minimized_nodes: Dict[DawgNode, DawgNode] = {}
183
+ self._previous_word = ""
184
+
185
+ def get_supported_traits(self) -> NodeTrait:
186
+ """Get supported traits."""
187
+ return (NodeTrait.HIERARCHICAL | NodeTrait.INDEXED |
188
+ NodeTrait.MEMORY_EFFICIENT | NodeTrait.PREFIX_TREE)
189
+
190
+ # ============================================================================
191
+ # CORE OPERATIONS
192
+ # ============================================================================
193
+
194
+ def put(self, key: Any, value: Any = None) -> None:
195
+ """
196
+ Insert word into DAWG.
197
+
198
+ Args:
199
+ key: String key (word)
200
+ value: Associated value
201
+
202
+ Raises:
203
+ XWNodeValueError: If key is not a string
204
+ """
205
+ # Security: Type validation
206
+ if not isinstance(key, str):
207
+ raise XWNodeValueError(
208
+ f"DAWG requires string keys, got {type(key).__name__}"
209
+ )
210
+
211
+ # Security: Empty string validation
212
+ if not key:
213
+ raise XWNodeValueError("DAWG does not support empty string keys")
214
+
215
+ # Incremental insertion with minimization
216
+ self._insert_with_minimization(key, value)
217
+ self._size += 1
218
+ self._word_count += 1
219
+
220
+ def _insert_with_minimization(self, word: str, value: Any) -> None:
221
+ """
222
+ Insert word using incremental minimization algorithm.
223
+
224
+ WHY incremental minimization:
225
+ - Maintains DAWG property during construction
226
+ - Avoids full reconstruction after each insert
227
+ - Balances construction time and space efficiency
228
+ """
229
+ # Find common prefix with previous word
230
+ common_prefix_len = 0
231
+ for i in range(min(len(word), len(self._previous_word))):
232
+ if word[i] == self._previous_word[i]:
233
+ common_prefix_len += 1
234
+ else:
235
+ break
236
+
237
+ # Minimize nodes from previous word
238
+ self._minimize(common_prefix_len)
239
+
240
+ # Add suffix for current word
241
+ current_node = self._root
242
+ for i in range(len(self._unchecked_nodes)):
243
+ if i < common_prefix_len:
244
+ current_node = self._unchecked_nodes[i][2]
245
+
246
+ for char in word[common_prefix_len:]:
247
+ next_node = DawgNode()
248
+ current_node.edges[char] = next_node
249
+ self._unchecked_nodes.append((current_node, char, next_node))
250
+ current_node = next_node
251
+
252
+ # Mark as final and store value
253
+ current_node.is_final = True
254
+ current_node.value = value
255
+ self._previous_word = word
256
+
257
+ def _minimize(self, down_to: int) -> None:
258
+ """
259
+ Minimize unchecked nodes down to specified prefix length.
260
+
261
+ Args:
262
+ down_to: Prefix length to minimize to
263
+
264
+ WHY minimization:
265
+ - Merges structurally equivalent nodes
266
+ - Achieves suffix sharing compression
267
+ - Maintains DAWG minimality property
268
+ """
269
+ # Pop unchecked nodes and minimize
270
+ while len(self._unchecked_nodes) > down_to:
271
+ parent, char, child = self._unchecked_nodes.pop()
272
+
273
+ # Check if equivalent node exists
274
+ if child in self._minimized_nodes:
275
+ # Replace with existing equivalent node
276
+ parent.edges[char] = self._minimized_nodes[child]
277
+ else:
278
+ # Add to minimized set
279
+ self._minimized_nodes[child] = child
280
+
281
+ parent.invalidate_hash()
282
+
283
+ def finish_construction(self) -> None:
284
+ """
285
+ Finish DAWG construction by minimizing all remaining nodes.
286
+
287
+ WHY explicit finish:
288
+ - Completes minimization for all inserted words
289
+ - Maximizes compression ratio
290
+ - Should be called after bulk inserts
291
+ """
292
+ self._minimize(0)
293
+
294
+ def get(self, key: Any, default: Any = None) -> Any:
295
+ """
296
+ Retrieve value by key.
297
+
298
+ Args:
299
+ key: String key
300
+ default: Default value if not found
301
+
302
+ Returns:
303
+ Value or default
304
+ """
305
+ if not isinstance(key, str):
306
+ return default
307
+
308
+ current_node = self._root
309
+
310
+ # Traverse DAWG
311
+ for char in key:
312
+ if char not in current_node.edges:
313
+ return default
314
+ current_node = current_node.edges[char]
315
+
316
+ # Check if final state
317
+ if current_node.is_final:
318
+ return current_node.value
319
+
320
+ return default
321
+
322
+ def has(self, key: Any) -> bool:
323
+ """
324
+ Check if key exists.
325
+
326
+ Args:
327
+ key: String key
328
+
329
+ Returns:
330
+ True if exists, False otherwise
331
+ """
332
+ if not isinstance(key, str):
333
+ return False
334
+
335
+ current_node = self._root
336
+
337
+ # Traverse DAWG
338
+ for char in key:
339
+ if char not in current_node.edges:
340
+ return False
341
+ current_node = current_node.edges[char]
342
+
343
+ return current_node.is_final
344
+
345
+ def delete(self, key: Any) -> bool:
346
+ """
347
+ Remove key from DAWG.
348
+
349
+ Args:
350
+ key: String key
351
+
352
+ Returns:
353
+ True if deleted, False if not found
354
+
355
+ Note: This is a simplified deletion. Full implementation
356
+ would rebuild DAWG for optimal compression.
357
+ """
358
+ if not isinstance(key, str):
359
+ return False
360
+
361
+ # Navigate to node
362
+ path: List[Tuple[DawgNode, str]] = []
363
+ current_node = self._root
364
+
365
+ for char in key:
366
+ if char not in current_node.edges:
367
+ return False
368
+ path.append((current_node, char))
369
+ current_node = current_node.edges[char]
370
+
371
+ # Check if it's a final node
372
+ if not current_node.is_final:
373
+ return False
374
+
375
+ # Unmark as final
376
+ current_node.is_final = False
377
+ current_node.value = None
378
+ current_node.invalidate_hash()
379
+
380
+ # Remove nodes if they have no children and aren't final
381
+ for i in range(len(path) - 1, -1, -1):
382
+ parent, char = path[i]
383
+ child = parent.edges[char]
384
+
385
+ if not child.edges and not child.is_final:
386
+ del parent.edges[char]
387
+ parent.invalidate_hash()
388
+ else:
389
+ break
390
+
391
+ self._size -= 1
392
+ self._word_count -= 1
393
+ return True
394
+
395
+ def keys(self) -> Iterator[Any]:
396
+ """
397
+ Get iterator over all keys in lexicographic order.
398
+
399
+ Returns:
400
+ Iterator of string keys
401
+ """
402
+ yield from self._collect_words(self._root, "")
403
+
404
+ def _collect_words(self, node: DawgNode, prefix: str) -> Iterator[str]:
405
+ """
406
+ Recursively collect all words from node.
407
+
408
+ Args:
409
+ node: Current DAWG node
410
+ prefix: Current prefix string
411
+
412
+ Yields:
413
+ Complete words in lexicographic order
414
+ """
415
+ if node.is_final:
416
+ yield prefix
417
+
418
+ # Traverse in sorted order for lexicographic output
419
+ for char in sorted(node.edges.keys()):
420
+ yield from self._collect_words(node.edges[char], prefix + char)
421
+
422
+ def values(self) -> Iterator[Any]:
423
+ """
424
+ Get iterator over all values in key-sorted order.
425
+
426
+ Returns:
427
+ Iterator of values
428
+ """
429
+ for key in self.keys():
430
+ yield self.get(key)
431
+
432
+ def items(self) -> Iterator[tuple[Any, Any]]:
433
+ """
434
+ Get iterator over all key-value pairs.
435
+
436
+ Returns:
437
+ Iterator of (key, value) tuples
438
+ """
439
+ for key in self.keys():
440
+ yield (key, self.get(key))
441
+
442
+ def __len__(self) -> int:
443
+ """Get number of words."""
444
+ return self._word_count
445
+
446
+ def to_native(self) -> Any:
447
+ """
448
+ Convert to native Python dict.
449
+
450
+ Returns:
451
+ Dictionary representation
452
+ """
453
+ return dict(self.items())
454
+
455
+ # ============================================================================
456
+ # DAWG-SPECIFIC OPERATIONS
457
+ # ============================================================================
458
+
459
+ def has_prefix(self, prefix: str) -> bool:
460
+ """
461
+ Check if any word starts with prefix.
462
+
463
+ Args:
464
+ prefix: Prefix string to check
465
+
466
+ Returns:
467
+ True if prefix exists, False otherwise
468
+
469
+ Raises:
470
+ XWNodeValueError: If prefix is not a string
471
+ """
472
+ if not isinstance(prefix, str):
473
+ raise XWNodeValueError(
474
+ f"Prefix must be string, got {type(prefix).__name__}"
475
+ )
476
+
477
+ current_node = self._root
478
+
479
+ for char in prefix:
480
+ if char not in current_node.edges:
481
+ return False
482
+ current_node = current_node.edges[char]
483
+
484
+ return True
485
+
486
+ def get_with_prefix(self, prefix: str) -> List[str]:
487
+ """
488
+ Get all words with given prefix.
489
+
490
+ Args:
491
+ prefix: Prefix string
492
+
493
+ Returns:
494
+ List of words starting with prefix
495
+
496
+ Raises:
497
+ XWNodeValueError: If prefix is not a string
498
+ """
499
+ if not isinstance(prefix, str):
500
+ raise XWNodeValueError(
501
+ f"Prefix must be string, got {type(prefix).__name__}"
502
+ )
503
+
504
+ # Navigate to prefix node
505
+ current_node = self._root
506
+ for char in prefix:
507
+ if char not in current_node.edges:
508
+ return []
509
+ current_node = current_node.edges[char]
510
+
511
+ # Collect all words from this node
512
+ return list(self._collect_words(current_node, prefix))
513
+
514
+ def longest_prefix(self, text: str) -> Optional[str]:
515
+ """
516
+ Find longest prefix in DAWG that matches text.
517
+
518
+ Args:
519
+ text: Text to search
520
+
521
+ Returns:
522
+ Longest matching prefix or None
523
+
524
+ Raises:
525
+ XWNodeValueError: If text is not a string
526
+ """
527
+ if not isinstance(text, str):
528
+ raise XWNodeValueError(
529
+ f"Text must be string, got {type(text).__name__}"
530
+ )
531
+
532
+ current_node = self._root
533
+ longest = None
534
+ current_prefix = ""
535
+
536
+ for char in text:
537
+ if char not in current_node.edges:
538
+ break
539
+ current_prefix += char
540
+ current_node = current_node.edges[char]
541
+
542
+ if current_node.is_final:
543
+ longest = current_prefix
544
+
545
+ return longest
546
+
547
+ def count_words_with_prefix(self, prefix: str) -> int:
548
+ """
549
+ Count words with given prefix.
550
+
551
+ Args:
552
+ prefix: Prefix string
553
+
554
+ Returns:
555
+ Number of words with prefix
556
+ """
557
+ return len(self.get_with_prefix(prefix))
558
+
559
+ # ============================================================================
560
+ # COMPRESSION STATISTICS
561
+ # ============================================================================
562
+
563
+ def get_node_count(self) -> int:
564
+ """
565
+ Count total nodes in DAWG.
566
+
567
+ Returns:
568
+ Number of nodes
569
+ """
570
+ visited: Set[int] = set()
571
+ return self._count_nodes(self._root, visited)
572
+
573
+ def _count_nodes(self, node: DawgNode, visited: Set[int]) -> int:
574
+ """
575
+ Recursively count unique nodes.
576
+
577
+ Args:
578
+ node: Current node
579
+ visited: Set of visited node IDs
580
+
581
+ Returns:
582
+ Node count
583
+ """
584
+ if node._id in visited:
585
+ return 0
586
+
587
+ visited.add(node._id)
588
+ count = 1
589
+
590
+ for child in node.edges.values():
591
+ count += self._count_nodes(child, visited)
592
+
593
+ return count
594
+
595
+ def get_compression_ratio(self) -> float:
596
+ """
597
+ Calculate compression ratio vs standard trie.
598
+
599
+ Returns:
600
+ Estimated compression ratio
601
+
602
+ WHY this matters:
603
+ - Quantifies space savings
604
+ - Validates DAWG effectiveness
605
+ - Helps choose between DAWG and trie
606
+ """
607
+ if self._word_count == 0:
608
+ return 1.0
609
+
610
+ # Estimate trie nodes (sum of word lengths)
611
+ trie_nodes = sum(len(word) for word in self.keys())
612
+
613
+ # Actual DAWG nodes
614
+ dawg_nodes = self.get_node_count()
615
+
616
+ if dawg_nodes == 0:
617
+ return 1.0
618
+
619
+ return trie_nodes / dawg_nodes
620
+
621
+ def get_statistics(self) -> Dict[str, Any]:
622
+ """
623
+ Get comprehensive DAWG statistics.
624
+
625
+ Returns:
626
+ Statistics dictionary
627
+ """
628
+ return {
629
+ 'word_count': self._word_count,
630
+ 'node_count': self.get_node_count(),
631
+ 'compression_ratio': self.get_compression_ratio(),
632
+ 'minimized_nodes': len(self._minimized_nodes),
633
+ 'unchecked_nodes': len(self._unchecked_nodes),
634
+ 'memory_saved_percent': (1 - 1/self.get_compression_ratio()) * 100
635
+ }
636
+
637
+ # ============================================================================
638
+ # BULK OPERATIONS
639
+ # ============================================================================
640
+
641
+ def build_from_sorted_words(self, words: List[str], values: Optional[List[Any]] = None) -> None:
642
+ """
643
+ Build DAWG from sorted word list efficiently.
644
+
645
+ Args:
646
+ words: Sorted list of words
647
+ values: Optional list of values (must match words length)
648
+
649
+ Raises:
650
+ XWNodeValueError: If words not sorted or values length mismatch
651
+
652
+ WHY sorted requirement:
653
+ - Enables incremental minimization algorithm
654
+ - Ensures optimal compression
655
+ - O(n) construction vs O(n log n) for unsorted
656
+ """
657
+ # Security: Validation
658
+ if not all(isinstance(w, str) for w in words):
659
+ raise XWNodeValueError("All words must be strings")
660
+
661
+ # Check sorted
662
+ for i in range(len(words) - 1):
663
+ if words[i] > words[i + 1]:
664
+ raise XWNodeValueError(
665
+ f"Words must be sorted, but '{words[i]}' > '{words[i+1]}'"
666
+ )
667
+
668
+ if values is not None and len(values) != len(words):
669
+ raise XWNodeValueError(
670
+ f"Values length ({len(values)}) must match words length ({len(words)})"
671
+ )
672
+
673
+ # Clear existing data
674
+ self.clear()
675
+
676
+ # Insert all words
677
+ for i, word in enumerate(words):
678
+ value = values[i] if values else None
679
+ self.put(word, value)
680
+
681
+ # Final minimization
682
+ self.finish_construction()
683
+
684
+ def get(self, key: Any, default: Any = None) -> Any:
685
+ """
686
+ Retrieve value by key.
687
+
688
+ Args:
689
+ key: String key
690
+ default: Default value
691
+
692
+ Returns:
693
+ Value or default
694
+ """
695
+ if not isinstance(key, str):
696
+ return default
697
+
698
+ current_node = self._root
699
+
700
+ for char in key:
701
+ if char not in current_node.edges:
702
+ return default
703
+ current_node = current_node.edges[char]
704
+
705
+ if current_node.is_final:
706
+ return current_node.value if current_node.value is not None else default
707
+
708
+ return default
709
+
710
+ def has(self, key: Any) -> bool:
711
+ """Check if key exists."""
712
+ if not isinstance(key, str):
713
+ return False
714
+
715
+ current_node = self._root
716
+
717
+ for char in key:
718
+ if char not in current_node.edges:
719
+ return False
720
+ current_node = current_node.edges[char]
721
+
722
+ return current_node.is_final
723
+
724
+ # ============================================================================
725
+ # PATTERN MATCHING
726
+ # ============================================================================
727
+
728
+ def fuzzy_search(self, word: str, max_distance: int = 1) -> List[str]:
729
+ """
730
+ Find words within edit distance.
731
+
732
+ Args:
733
+ word: Search word
734
+ max_distance: Maximum Levenshtein distance
735
+
736
+ Returns:
737
+ List of matching words
738
+
739
+ WHY fuzzy search:
740
+ - Essential for spell checkers
741
+ - Handles typos in autocomplete
742
+ - Improves usability
743
+ """
744
+ results = []
745
+
746
+ def _fuzzy_helper(node: DawgNode, prefix: str,
747
+ remaining: str, distance: int) -> None:
748
+ """Recursive fuzzy matching."""
749
+ # Found match
750
+ if not remaining:
751
+ if node.is_final and distance <= max_distance:
752
+ results.append(prefix)
753
+ # Continue for insertions
754
+ if distance < max_distance:
755
+ for char, child in node.edges.items():
756
+ _fuzzy_helper(child, prefix + char, "", distance + 1)
757
+ return
758
+
759
+ # Exact match
760
+ if remaining[0] in node.edges:
761
+ _fuzzy_helper(
762
+ node.edges[remaining[0]],
763
+ prefix + remaining[0],
764
+ remaining[1:],
765
+ distance
766
+ )
767
+
768
+ # Try edits if distance allows
769
+ if distance < max_distance:
770
+ # Deletion
771
+ _fuzzy_helper(node, prefix, remaining[1:], distance + 1)
772
+
773
+ # Substitution and Insertion
774
+ for char, child in node.edges.items():
775
+ # Substitution
776
+ _fuzzy_helper(child, prefix + char, remaining[1:], distance + 1)
777
+ # Insertion
778
+ _fuzzy_helper(child, prefix + char, remaining, distance + 1)
779
+
780
+ _fuzzy_helper(self._root, "", word, 0)
781
+ return results
782
+
783
+ # ============================================================================
784
+ # UTILITY METHODS
785
+ # ============================================================================
786
+
787
+ def clear(self) -> None:
788
+ """Clear all data."""
789
+ self._root = DawgNode()
790
+ self._size = 0
791
+ self._word_count = 0
792
+ self._unchecked_nodes.clear()
793
+ self._minimized_nodes.clear()
794
+ self._previous_word = ""
795
+
796
+ def is_empty(self) -> bool:
797
+ """Check if empty."""
798
+ return self._word_count == 0
799
+
800
+ def size(self) -> int:
801
+ """Get number of words."""
802
+ return self._word_count
803
+
804
+ def get_mode(self) -> NodeMode:
805
+ """Get strategy mode."""
806
+ return self.mode
807
+
808
+ def get_traits(self) -> NodeTrait:
809
+ """Get strategy traits."""
810
+ return self.traits
811
+
812
+ # ============================================================================
813
+ # COMPATIBILITY METHODS
814
+ # ============================================================================
815
+
816
+ def find(self, key: Any) -> Optional[Any]:
817
+ """Find value by key."""
818
+ return self.get(key)
819
+
820
+ def insert(self, key: Any, value: Any = None) -> None:
821
+ """Insert key-value pair."""
822
+ self.put(key, value)
823
+
824
+ def __str__(self) -> str:
825
+ """String representation."""
826
+ stats = self.get_statistics()
827
+ return (f"DawgStrategy(words={stats['word_count']}, "
828
+ f"nodes={stats['node_count']}, "
829
+ f"compression={stats['compression_ratio']:.1f}x)")
830
+
831
+ def __repr__(self) -> str:
832
+ """Detailed representation."""
833
+ return f"DawgStrategy(mode={self.mode.name}, words={self._word_count}, traits={self.traits})"
834
+
835
+ # ============================================================================
836
+ # FACTORY METHOD
837
+ # ============================================================================
838
+
839
+ @classmethod
840
+ def create_from_data(cls, data: Any) -> 'DawgStrategy':
841
+ """
842
+ Create DAWG from data.
843
+
844
+ Args:
845
+ data: Dictionary with string keys or list of strings
846
+
847
+ Returns:
848
+ New DawgStrategy instance
849
+
850
+ Raises:
851
+ XWNodeValueError: If data contains non-string keys
852
+ """
853
+ instance = cls()
854
+
855
+ if isinstance(data, dict):
856
+ # Sort keys for optimal compression
857
+ sorted_keys = sorted(data.keys())
858
+ for key in sorted_keys:
859
+ if not isinstance(key, str):
860
+ raise XWNodeValueError(
861
+ f"DAWG requires string keys, found {type(key).__name__}"
862
+ )
863
+ instance.put(key, data[key])
864
+ instance.finish_construction()
865
+ elif isinstance(data, (list, tuple)):
866
+ # Treat as list of strings (set variant)
867
+ sorted_words = sorted(str(item) for item in data)
868
+ for word in sorted_words:
869
+ instance.put(word, None)
870
+ instance.finish_construction()
871
+ else:
872
+ # Store scalar as single word
873
+ instance.put(str(data), data)
874
+
875
+ return instance
876
+