exonware-xwnode 0.0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. exonware/__init__.py +14 -0
  2. exonware/xwnode/__init__.py +127 -0
  3. exonware/xwnode/base.py +676 -0
  4. exonware/xwnode/config.py +178 -0
  5. exonware/xwnode/contracts.py +730 -0
  6. exonware/xwnode/errors.py +503 -0
  7. exonware/xwnode/facade.py +460 -0
  8. exonware/xwnode/strategies/__init__.py +158 -0
  9. exonware/xwnode/strategies/advisor.py +463 -0
  10. exonware/xwnode/strategies/edges/__init__.py +32 -0
  11. exonware/xwnode/strategies/edges/adj_list.py +227 -0
  12. exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
  13. exonware/xwnode/strategies/edges/base.py +169 -0
  14. exonware/xwnode/strategies/flyweight.py +328 -0
  15. exonware/xwnode/strategies/impls/__init__.py +13 -0
  16. exonware/xwnode/strategies/impls/_base_edge.py +403 -0
  17. exonware/xwnode/strategies/impls/_base_node.py +307 -0
  18. exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
  19. exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
  20. exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
  21. exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
  22. exonware/xwnode/strategies/impls/edge_coo.py +533 -0
  23. exonware/xwnode/strategies/impls/edge_csc.py +447 -0
  24. exonware/xwnode/strategies/impls/edge_csr.py +492 -0
  25. exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
  26. exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
  27. exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
  28. exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
  29. exonware/xwnode/strategies/impls/edge_octree.py +574 -0
  30. exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
  31. exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
  32. exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
  33. exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
  34. exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
  35. exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
  36. exonware/xwnode/strategies/manager.py +775 -0
  37. exonware/xwnode/strategies/metrics.py +538 -0
  38. exonware/xwnode/strategies/migration.py +432 -0
  39. exonware/xwnode/strategies/nodes/__init__.py +50 -0
  40. exonware/xwnode/strategies/nodes/_base_node.py +307 -0
  41. exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
  42. exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
  43. exonware/xwnode/strategies/nodes/array_list.py +209 -0
  44. exonware/xwnode/strategies/nodes/base.py +247 -0
  45. exonware/xwnode/strategies/nodes/deque.py +200 -0
  46. exonware/xwnode/strategies/nodes/hash_map.py +135 -0
  47. exonware/xwnode/strategies/nodes/heap.py +307 -0
  48. exonware/xwnode/strategies/nodes/linked_list.py +232 -0
  49. exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
  50. exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
  51. exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
  52. exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
  53. exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
  54. exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
  55. exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
  56. exonware/xwnode/strategies/nodes/node_btree.py +357 -0
  57. exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
  58. exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
  59. exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
  60. exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
  61. exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
  62. exonware/xwnode/strategies/nodes/node_heap.py +191 -0
  63. exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
  64. exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
  65. exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
  66. exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
  67. exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
  68. exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
  69. exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
  70. exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
  71. exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
  72. exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
  73. exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
  74. exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
  75. exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
  76. exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
  77. exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
  78. exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
  79. exonware/xwnode/strategies/nodes/node_treap.py +387 -0
  80. exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
  81. exonware/xwnode/strategies/nodes/node_trie.py +252 -0
  82. exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
  83. exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
  84. exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
  85. exonware/xwnode/strategies/nodes/queue.py +161 -0
  86. exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
  87. exonware/xwnode/strategies/nodes/stack.py +152 -0
  88. exonware/xwnode/strategies/nodes/trie.py +274 -0
  89. exonware/xwnode/strategies/nodes/union_find.py +283 -0
  90. exonware/xwnode/strategies/pattern_detector.py +603 -0
  91. exonware/xwnode/strategies/performance_monitor.py +487 -0
  92. exonware/xwnode/strategies/queries/__init__.py +24 -0
  93. exonware/xwnode/strategies/queries/base.py +236 -0
  94. exonware/xwnode/strategies/queries/cql.py +201 -0
  95. exonware/xwnode/strategies/queries/cypher.py +181 -0
  96. exonware/xwnode/strategies/queries/datalog.py +70 -0
  97. exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
  98. exonware/xwnode/strategies/queries/eql.py +70 -0
  99. exonware/xwnode/strategies/queries/flux.py +70 -0
  100. exonware/xwnode/strategies/queries/gql.py +70 -0
  101. exonware/xwnode/strategies/queries/graphql.py +240 -0
  102. exonware/xwnode/strategies/queries/gremlin.py +181 -0
  103. exonware/xwnode/strategies/queries/hiveql.py +214 -0
  104. exonware/xwnode/strategies/queries/hql.py +70 -0
  105. exonware/xwnode/strategies/queries/jmespath.py +219 -0
  106. exonware/xwnode/strategies/queries/jq.py +66 -0
  107. exonware/xwnode/strategies/queries/json_query.py +66 -0
  108. exonware/xwnode/strategies/queries/jsoniq.py +248 -0
  109. exonware/xwnode/strategies/queries/kql.py +70 -0
  110. exonware/xwnode/strategies/queries/linq.py +238 -0
  111. exonware/xwnode/strategies/queries/logql.py +70 -0
  112. exonware/xwnode/strategies/queries/mql.py +68 -0
  113. exonware/xwnode/strategies/queries/n1ql.py +210 -0
  114. exonware/xwnode/strategies/queries/partiql.py +70 -0
  115. exonware/xwnode/strategies/queries/pig.py +215 -0
  116. exonware/xwnode/strategies/queries/promql.py +70 -0
  117. exonware/xwnode/strategies/queries/sparql.py +220 -0
  118. exonware/xwnode/strategies/queries/sql.py +275 -0
  119. exonware/xwnode/strategies/queries/xml_query.py +66 -0
  120. exonware/xwnode/strategies/queries/xpath.py +223 -0
  121. exonware/xwnode/strategies/queries/xquery.py +258 -0
  122. exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
  123. exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
  124. exonware/xwnode/strategies/registry.py +604 -0
  125. exonware/xwnode/strategies/simple.py +273 -0
  126. exonware/xwnode/strategies/utils.py +532 -0
  127. exonware/xwnode/types.py +912 -0
  128. exonware/xwnode/version.py +78 -0
  129. exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
  130. exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
  131. exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
  132. exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,487 @@
1
+ """
2
+ Suffix Array Node Strategy Implementation
3
+
4
+ This module implements the SUFFIX_ARRAY strategy for efficient substring
5
+ searches and string pattern matching with linear time construction.
6
+ """
7
+
8
+ from typing import Any, Iterator, List, Dict, Optional, Tuple
9
+ import bisect
10
+ from .base import ANodeTreeStrategy
11
+ from ...types import NodeMode, NodeTrait
12
+
13
+
14
+ class SuffixArrayStrategy(ANodeTreeStrategy):
15
+ """
16
+ Suffix Array node strategy for efficient string operations.
17
+
18
+ Provides fast substring searches, pattern matching, and string analysis
19
+ with linear space usage and efficient query operations.
20
+ """
21
+
22
+ def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
23
+ """Initialize the Suffix Array strategy."""
24
+ super().__init__(NodeMode.SUFFIX_ARRAY, traits, **options)
25
+
26
+ self.enable_lcp = options.get('enable_lcp', True) # Longest Common Prefix array
27
+ self.case_sensitive = options.get('case_sensitive', True)
28
+ self.separator = options.get('separator', '$') # End-of-string marker
29
+
30
+ # Core storage
31
+ self._text = ""
32
+ self._suffix_array: List[int] = []
33
+ self._lcp_array: List[int] = [] # Longest Common Prefix
34
+ self._rank: List[int] = [] # Inverse suffix array
35
+
36
+ # Key-value mapping for compatibility
37
+ self._key_to_pos: Dict[str, List[int]] = {}
38
+ self._values: Dict[str, Any] = {}
39
+ self._size = 0
40
+
41
+ # Performance optimizations
42
+ self._is_built = False
43
+ self._pattern_cache: Dict[str, List[int]] = {}
44
+
45
+ def get_supported_traits(self) -> NodeTrait:
46
+ """Get the traits supported by the suffix array strategy."""
47
+ return (NodeTrait.ORDERED | NodeTrait.INDEXED | NodeTrait.STREAMING)
48
+
49
+ def _preprocess_text(self, text: str) -> str:
50
+ """Preprocess text for suffix array construction."""
51
+ if not self.case_sensitive:
52
+ text = text.lower()
53
+
54
+ # Ensure text ends with separator
55
+ if not text.endswith(self.separator):
56
+ text += self.separator
57
+
58
+ return text
59
+
60
+ def _build_suffix_array_naive(self) -> None:
61
+ """Build suffix array using naive O(n²log n) algorithm."""
62
+ n = len(self._text)
63
+ suffixes = []
64
+
65
+ for i in range(n):
66
+ suffixes.append((self._text[i:], i))
67
+
68
+ # Sort suffixes lexicographically
69
+ suffixes.sort()
70
+
71
+ self._suffix_array = [suffix[1] for suffix in suffixes]
72
+ self._build_rank_array()
73
+
74
+ if self.enable_lcp:
75
+ self._build_lcp_array()
76
+
77
+ def _build_suffix_array_optimized(self) -> None:
78
+ """Build suffix array using optimized radix sort approach."""
79
+ # For simplicity, using naive approach - can be optimized with DC3/SA-IS algorithms
80
+ self._build_suffix_array_naive()
81
+
82
+ def _build_rank_array(self) -> None:
83
+ """Build rank array (inverse of suffix array)."""
84
+ n = len(self._suffix_array)
85
+ self._rank = [0] * n
86
+
87
+ for i in range(n):
88
+ self._rank[self._suffix_array[i]] = i
89
+
90
+ def _build_lcp_array(self) -> None:
91
+ """Build Longest Common Prefix array using Kasai's algorithm."""
92
+ n = len(self._text)
93
+ self._lcp_array = [0] * n
94
+
95
+ if n == 0:
96
+ return
97
+
98
+ k = 0
99
+ for i in range(n):
100
+ if self._rank[i] == n - 1:
101
+ k = 0
102
+ continue
103
+
104
+ j = self._suffix_array[self._rank[i] + 1]
105
+
106
+ while (i + k < n and j + k < n and
107
+ self._text[i + k] == self._text[j + k]):
108
+ k += 1
109
+
110
+ self._lcp_array[self._rank[i]] = k
111
+
112
+ if k > 0:
113
+ k -= 1
114
+
115
+ def _rebuild_if_needed(self) -> None:
116
+ """Rebuild suffix array if text has changed."""
117
+ if not self._is_built and self._text:
118
+ self._build_suffix_array_optimized()
119
+ self._is_built = True
120
+
121
+ # ============================================================================
122
+ # CORE OPERATIONS (Key-based interface for compatibility)
123
+ # ============================================================================
124
+
125
+ def put(self, key: Any, value: Any = None) -> None:
126
+ """Add string to suffix array."""
127
+ key_str = str(key)
128
+
129
+ # If this is the first key or a text replacement
130
+ if not self._text or key_str == "text":
131
+ # Replace entire text
132
+ self._text = self._preprocess_text(str(value) if value else key_str)
133
+ self._is_built = False
134
+ self._pattern_cache.clear()
135
+ self._key_to_pos.clear()
136
+ self._values[key_str] = value
137
+ self._size = 1
138
+ else:
139
+ # Append to text (less efficient, requires rebuild)
140
+ if self._text.endswith(self.separator):
141
+ self._text = self._text[:-1] + str(value) + self.separator
142
+ else:
143
+ self._text += str(value) + self.separator
144
+
145
+ self._is_built = False
146
+ self._pattern_cache.clear()
147
+ self._values[key_str] = value
148
+ self._size += 1
149
+
150
+ def get(self, key: Any, default: Any = None) -> Any:
151
+ """Get value by key."""
152
+ key_str = str(key)
153
+
154
+ if key_str == "text":
155
+ return self._text
156
+ elif key_str == "suffix_array":
157
+ self._rebuild_if_needed()
158
+ return self._suffix_array.copy()
159
+ elif key_str == "lcp_array":
160
+ self._rebuild_if_needed()
161
+ return self._lcp_array.copy()
162
+ elif key_str in self._values:
163
+ return self._values[key_str]
164
+
165
+ return default
166
+
167
+ def has(self, key: Any) -> bool:
168
+ """Check if key exists."""
169
+ key_str = str(key)
170
+ return key_str in self._values or key_str in ["text", "suffix_array", "lcp_array"]
171
+
172
+ def remove(self, key: Any) -> bool:
173
+ """Remove key (limited support)."""
174
+ key_str = str(key)
175
+
176
+ if key_str in self._values:
177
+ del self._values[key_str]
178
+ self._size -= 1
179
+ return True
180
+
181
+ return False
182
+
183
+ def delete(self, key: Any) -> bool:
184
+ """Remove key (alias for remove)."""
185
+ return self.remove(key)
186
+
187
+ def clear(self) -> None:
188
+ """Clear all data."""
189
+ self._text = ""
190
+ self._suffix_array.clear()
191
+ self._lcp_array.clear()
192
+ self._rank.clear()
193
+ self._key_to_pos.clear()
194
+ self._values.clear()
195
+ self._pattern_cache.clear()
196
+ self._size = 0
197
+ self._is_built = False
198
+
199
+ def keys(self) -> Iterator[str]:
200
+ """Get all keys."""
201
+ yield "text"
202
+ yield "suffix_array"
203
+ if self.enable_lcp:
204
+ yield "lcp_array"
205
+ for key in self._values.keys():
206
+ yield key
207
+
208
+ def values(self) -> Iterator[Any]:
209
+ """Get all values."""
210
+ yield self._text
211
+ self._rebuild_if_needed()
212
+ yield self._suffix_array.copy()
213
+ if self.enable_lcp:
214
+ yield self._lcp_array.copy()
215
+ for value in self._values.values():
216
+ yield value
217
+
218
+ def items(self) -> Iterator[tuple[str, Any]]:
219
+ """Get all key-value pairs."""
220
+ yield ("text", self._text)
221
+ self._rebuild_if_needed()
222
+ yield ("suffix_array", self._suffix_array.copy())
223
+ if self.enable_lcp:
224
+ yield ("lcp_array", self._lcp_array.copy())
225
+ for key, value in self._values.items():
226
+ yield (key, value)
227
+
228
+ def __len__(self) -> int:
229
+ """Get number of stored items."""
230
+ return self._size
231
+
232
+ def to_native(self) -> Dict[str, Any]:
233
+ """Convert to native Python dict."""
234
+ result = {"text": self._text}
235
+ self._rebuild_if_needed()
236
+ result["suffix_array"] = self._suffix_array.copy()
237
+ if self.enable_lcp:
238
+ result["lcp_array"] = self._lcp_array.copy()
239
+ result.update(self._values)
240
+ return result
241
+
242
+ @property
243
+ def is_list(self) -> bool:
244
+ """This can behave like a list for suffix access."""
245
+ return True
246
+
247
+ @property
248
+ def is_dict(self) -> bool:
249
+ """This behaves like a dict."""
250
+ return True
251
+
252
+ # ============================================================================
253
+ # SUFFIX ARRAY SPECIFIC OPERATIONS
254
+ # ============================================================================
255
+
256
+ def set_text(self, text: str) -> None:
257
+ """Set the text for suffix array operations."""
258
+ self._text = self._preprocess_text(text)
259
+ self._is_built = False
260
+ self._pattern_cache.clear()
261
+ self._size = 1
262
+
263
+ def search_pattern(self, pattern: str) -> List[int]:
264
+ """Search for pattern occurrences using binary search."""
265
+ if not pattern:
266
+ return []
267
+
268
+ # Check cache first
269
+ if pattern in self._pattern_cache:
270
+ return self._pattern_cache[pattern]
271
+
272
+ self._rebuild_if_needed()
273
+
274
+ if not self._suffix_array:
275
+ return []
276
+
277
+ if not self.case_sensitive:
278
+ pattern = pattern.lower()
279
+
280
+ # Binary search for leftmost occurrence
281
+ left = self._binary_search_left(pattern)
282
+ if left == -1:
283
+ self._pattern_cache[pattern] = []
284
+ return []
285
+
286
+ # Binary search for rightmost occurrence
287
+ right = self._binary_search_right(pattern)
288
+
289
+ # Extract all matching positions
290
+ positions = []
291
+ for i in range(left, right + 1):
292
+ pos = self._suffix_array[i]
293
+ positions.append(pos)
294
+
295
+ positions.sort()
296
+ self._pattern_cache[pattern] = positions
297
+ return positions
298
+
299
+ def _binary_search_left(self, pattern: str) -> int:
300
+ """Find leftmost occurrence of pattern."""
301
+ left, right = 0, len(self._suffix_array) - 1
302
+ result = -1
303
+
304
+ while left <= right:
305
+ mid = (left + right) // 2
306
+ suffix_pos = self._suffix_array[mid]
307
+ suffix = self._text[suffix_pos:]
308
+
309
+ if suffix.startswith(pattern):
310
+ result = mid
311
+ right = mid - 1 # Continue searching left
312
+ elif suffix < pattern:
313
+ left = mid + 1
314
+ else:
315
+ right = mid - 1
316
+
317
+ return result
318
+
319
+ def _binary_search_right(self, pattern: str) -> int:
320
+ """Find rightmost occurrence of pattern."""
321
+ left, right = 0, len(self._suffix_array) - 1
322
+ result = -1
323
+
324
+ while left <= right:
325
+ mid = (left + right) // 2
326
+ suffix_pos = self._suffix_array[mid]
327
+ suffix = self._text[suffix_pos:]
328
+
329
+ if suffix.startswith(pattern):
330
+ result = mid
331
+ left = mid + 1 # Continue searching right
332
+ elif suffix < pattern:
333
+ left = mid + 1
334
+ else:
335
+ right = mid - 1
336
+
337
+ return result
338
+
339
+ def count_occurrences(self, pattern: str) -> int:
340
+ """Count occurrences of pattern."""
341
+ return len(self.search_pattern(pattern))
342
+
343
+ def find_longest_common_substring(self, other_text: str) -> Tuple[str, int, int]:
344
+ """Find longest common substring with another text."""
345
+ if not self._text or not other_text:
346
+ return "", 0, 0
347
+
348
+ # Create combined text with separator
349
+ combined = self._text + "#" + other_text + self.separator
350
+ original_text = self._text
351
+
352
+ # Temporarily set combined text
353
+ self.set_text(combined)
354
+ self._rebuild_if_needed()
355
+
356
+ # Find longest common substring using LCP array
357
+ max_lcp = 0
358
+ max_pos = 0
359
+ text1_len = len(original_text)
360
+
361
+ for i in range(len(self._lcp_array) - 1):
362
+ pos1 = self._suffix_array[i]
363
+ pos2 = self._suffix_array[i + 1]
364
+
365
+ # Check if suffixes are from different texts
366
+ if ((pos1 < text1_len) != (pos2 < text1_len)) and self._lcp_array[i] > max_lcp:
367
+ max_lcp = self._lcp_array[i]
368
+ max_pos = min(pos1, pos2)
369
+
370
+ # Restore original text
371
+ self.set_text(original_text)
372
+
373
+ if max_lcp > 0:
374
+ lcs = combined[max_pos:max_pos + max_lcp]
375
+ return lcs, max_pos, max_lcp
376
+
377
+ return "", 0, 0
378
+
379
+ def get_suffix(self, index: int) -> str:
380
+ """Get suffix starting at given index."""
381
+ if 0 <= index < len(self._text):
382
+ return self._text[index:]
383
+ return ""
384
+
385
+ def get_sorted_suffixes(self) -> List[str]:
386
+ """Get all suffixes in sorted order."""
387
+ self._rebuild_if_needed()
388
+
389
+ suffixes = []
390
+ for pos in self._suffix_array:
391
+ suffixes.append(self._text[pos:])
392
+
393
+ return suffixes
394
+
395
+ def find_repeated_substrings(self, min_length: int = 2) -> List[Tuple[str, int, List[int]]]:
396
+ """Find repeated substrings using LCP array."""
397
+ self._rebuild_if_needed()
398
+
399
+ if not self.enable_lcp:
400
+ return []
401
+
402
+ repeated = []
403
+
404
+ for i in range(len(self._lcp_array)):
405
+ lcp_len = self._lcp_array[i]
406
+
407
+ if lcp_len >= min_length:
408
+ pos1 = self._suffix_array[i]
409
+ pos2 = self._suffix_array[i + 1]
410
+
411
+ substring = self._text[pos1:pos1 + lcp_len]
412
+
413
+ # Find all occurrences of this substring
414
+ positions = self.search_pattern(substring)
415
+
416
+ if len(positions) > 1:
417
+ repeated.append((substring, lcp_len, positions))
418
+
419
+ # Remove duplicates and sort by length
420
+ unique_repeated = {}
421
+ for substr, length, positions in repeated:
422
+ if substr not in unique_repeated or len(positions) > len(unique_repeated[substr][1]):
423
+ unique_repeated[substr] = (length, positions)
424
+
425
+ result = [(substr, data[0], data[1]) for substr, data in unique_repeated.items()]
426
+ result.sort(key=lambda x: x[1], reverse=True)
427
+
428
+ return result
429
+
430
+ def get_statistics(self) -> Dict[str, Any]:
431
+ """Get comprehensive suffix array statistics."""
432
+ self._rebuild_if_needed()
433
+
434
+ if not self._text:
435
+ return {'text_length': 0, 'unique_characters': 0, 'suffixes': 0}
436
+
437
+ unique_chars = len(set(self._text))
438
+ avg_lcp = sum(self._lcp_array) / len(self._lcp_array) if self._lcp_array else 0
439
+ max_lcp = max(self._lcp_array) if self._lcp_array else 0
440
+
441
+ return {
442
+ 'text_length': len(self._text),
443
+ 'unique_characters': unique_chars,
444
+ 'suffixes': len(self._suffix_array),
445
+ 'avg_lcp': avg_lcp,
446
+ 'max_lcp': max_lcp,
447
+ 'case_sensitive': self.case_sensitive,
448
+ 'pattern_cache_size': len(self._pattern_cache),
449
+ 'memory_usage': len(self._text) + len(self._suffix_array) * 4 + len(self._lcp_array) * 4
450
+ }
451
+
452
+ # ============================================================================
453
+ # PERFORMANCE CHARACTERISTICS
454
+ # ============================================================================
455
+
456
+ @property
457
+ def backend_info(self) -> Dict[str, Any]:
458
+ """Get backend implementation info."""
459
+ return {
460
+ 'strategy': 'SUFFIX_ARRAY',
461
+ 'backend': 'Suffix array with LCP array and binary search',
462
+ 'enable_lcp': self.enable_lcp,
463
+ 'case_sensitive': self.case_sensitive,
464
+ 'separator': self.separator,
465
+ 'complexity': {
466
+ 'construction': 'O(n log n)', # Can be optimized to O(n)
467
+ 'pattern_search': 'O(m log n + occ)', # m = pattern length, occ = occurrences
468
+ 'space': 'O(n)',
469
+ 'lcp_construction': 'O(n)',
470
+ 'substring_queries': 'O(log n + occ)'
471
+ }
472
+ }
473
+
474
+ @property
475
+ def metrics(self) -> Dict[str, Any]:
476
+ """Get performance metrics."""
477
+ stats = self.get_statistics()
478
+
479
+ return {
480
+ 'text_length': stats['text_length'],
481
+ 'suffixes': stats['suffixes'],
482
+ 'unique_chars': stats['unique_characters'],
483
+ 'avg_lcp': f"{stats['avg_lcp']:.2f}",
484
+ 'max_lcp': stats['max_lcp'],
485
+ 'cache_entries': stats['pattern_cache_size'],
486
+ 'memory_usage': f"{stats['memory_usage']} bytes"
487
+ }