exonware-xwnode 0.0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. exonware/__init__.py +14 -0
  2. exonware/xwnode/__init__.py +127 -0
  3. exonware/xwnode/base.py +676 -0
  4. exonware/xwnode/config.py +178 -0
  5. exonware/xwnode/contracts.py +730 -0
  6. exonware/xwnode/errors.py +503 -0
  7. exonware/xwnode/facade.py +460 -0
  8. exonware/xwnode/strategies/__init__.py +158 -0
  9. exonware/xwnode/strategies/advisor.py +463 -0
  10. exonware/xwnode/strategies/edges/__init__.py +32 -0
  11. exonware/xwnode/strategies/edges/adj_list.py +227 -0
  12. exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
  13. exonware/xwnode/strategies/edges/base.py +169 -0
  14. exonware/xwnode/strategies/flyweight.py +328 -0
  15. exonware/xwnode/strategies/impls/__init__.py +13 -0
  16. exonware/xwnode/strategies/impls/_base_edge.py +403 -0
  17. exonware/xwnode/strategies/impls/_base_node.py +307 -0
  18. exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
  19. exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
  20. exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
  21. exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
  22. exonware/xwnode/strategies/impls/edge_coo.py +533 -0
  23. exonware/xwnode/strategies/impls/edge_csc.py +447 -0
  24. exonware/xwnode/strategies/impls/edge_csr.py +492 -0
  25. exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
  26. exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
  27. exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
  28. exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
  29. exonware/xwnode/strategies/impls/edge_octree.py +574 -0
  30. exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
  31. exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
  32. exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
  33. exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
  34. exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
  35. exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
  36. exonware/xwnode/strategies/manager.py +775 -0
  37. exonware/xwnode/strategies/metrics.py +538 -0
  38. exonware/xwnode/strategies/migration.py +432 -0
  39. exonware/xwnode/strategies/nodes/__init__.py +50 -0
  40. exonware/xwnode/strategies/nodes/_base_node.py +307 -0
  41. exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
  42. exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
  43. exonware/xwnode/strategies/nodes/array_list.py +209 -0
  44. exonware/xwnode/strategies/nodes/base.py +247 -0
  45. exonware/xwnode/strategies/nodes/deque.py +200 -0
  46. exonware/xwnode/strategies/nodes/hash_map.py +135 -0
  47. exonware/xwnode/strategies/nodes/heap.py +307 -0
  48. exonware/xwnode/strategies/nodes/linked_list.py +232 -0
  49. exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
  50. exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
  51. exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
  52. exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
  53. exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
  54. exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
  55. exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
  56. exonware/xwnode/strategies/nodes/node_btree.py +357 -0
  57. exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
  58. exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
  59. exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
  60. exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
  61. exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
  62. exonware/xwnode/strategies/nodes/node_heap.py +191 -0
  63. exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
  64. exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
  65. exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
  66. exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
  67. exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
  68. exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
  69. exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
  70. exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
  71. exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
  72. exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
  73. exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
  74. exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
  75. exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
  76. exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
  77. exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
  78. exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
  79. exonware/xwnode/strategies/nodes/node_treap.py +387 -0
  80. exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
  81. exonware/xwnode/strategies/nodes/node_trie.py +252 -0
  82. exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
  83. exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
  84. exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
  85. exonware/xwnode/strategies/nodes/queue.py +161 -0
  86. exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
  87. exonware/xwnode/strategies/nodes/stack.py +152 -0
  88. exonware/xwnode/strategies/nodes/trie.py +274 -0
  89. exonware/xwnode/strategies/nodes/union_find.py +283 -0
  90. exonware/xwnode/strategies/pattern_detector.py +603 -0
  91. exonware/xwnode/strategies/performance_monitor.py +487 -0
  92. exonware/xwnode/strategies/queries/__init__.py +24 -0
  93. exonware/xwnode/strategies/queries/base.py +236 -0
  94. exonware/xwnode/strategies/queries/cql.py +201 -0
  95. exonware/xwnode/strategies/queries/cypher.py +181 -0
  96. exonware/xwnode/strategies/queries/datalog.py +70 -0
  97. exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
  98. exonware/xwnode/strategies/queries/eql.py +70 -0
  99. exonware/xwnode/strategies/queries/flux.py +70 -0
  100. exonware/xwnode/strategies/queries/gql.py +70 -0
  101. exonware/xwnode/strategies/queries/graphql.py +240 -0
  102. exonware/xwnode/strategies/queries/gremlin.py +181 -0
  103. exonware/xwnode/strategies/queries/hiveql.py +214 -0
  104. exonware/xwnode/strategies/queries/hql.py +70 -0
  105. exonware/xwnode/strategies/queries/jmespath.py +219 -0
  106. exonware/xwnode/strategies/queries/jq.py +66 -0
  107. exonware/xwnode/strategies/queries/json_query.py +66 -0
  108. exonware/xwnode/strategies/queries/jsoniq.py +248 -0
  109. exonware/xwnode/strategies/queries/kql.py +70 -0
  110. exonware/xwnode/strategies/queries/linq.py +238 -0
  111. exonware/xwnode/strategies/queries/logql.py +70 -0
  112. exonware/xwnode/strategies/queries/mql.py +68 -0
  113. exonware/xwnode/strategies/queries/n1ql.py +210 -0
  114. exonware/xwnode/strategies/queries/partiql.py +70 -0
  115. exonware/xwnode/strategies/queries/pig.py +215 -0
  116. exonware/xwnode/strategies/queries/promql.py +70 -0
  117. exonware/xwnode/strategies/queries/sparql.py +220 -0
  118. exonware/xwnode/strategies/queries/sql.py +275 -0
  119. exonware/xwnode/strategies/queries/xml_query.py +66 -0
  120. exonware/xwnode/strategies/queries/xpath.py +223 -0
  121. exonware/xwnode/strategies/queries/xquery.py +258 -0
  122. exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
  123. exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
  124. exonware/xwnode/strategies/registry.py +604 -0
  125. exonware/xwnode/strategies/simple.py +273 -0
  126. exonware/xwnode/strategies/utils.py +532 -0
  127. exonware/xwnode/types.py +912 -0
  128. exonware/xwnode/version.py +78 -0
  129. exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
  130. exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
  131. exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
  132. exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,603 @@
1
+ #!/usr/bin/env python3
2
+ #exonware/xwnode/src/exonware/xwnode/strategies/pattern_detector.py
3
+ """
4
+ Data Pattern Detector for Strategy Selection
5
+
6
+ Intelligent pattern detection that analyzes data characteristics to recommend
7
+ the optimal strategy for different use cases. This enhances the AUTO mode
8
+ selection with sophisticated heuristics.
9
+
10
+ Company: eXonware.com
11
+ Author: Eng. Muhammad AlShehri
12
+ Email: connect@exonware.com
13
+ Version: 0.0.1.12
14
+ Generation Date: 07-Sep-2025
15
+ """
16
+
17
+ import re
18
+ import time
19
+ import threading
20
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
21
+ from dataclasses import dataclass
22
+ from enum import Enum
23
+ from exonware.xwsystem import get_logger
24
+
25
+ logger = get_logger(__name__)
26
+
27
+ from ..types import NodeMode, EdgeMode, NodeTrait, EdgeTrait
28
+
29
+
30
+
31
+ class DataPattern(Enum):
32
+ """Data pattern types for strategy selection."""
33
+ SEQUENTIAL_NUMERIC = "sequential_numeric"
34
+ STRING_KEYS = "string_keys"
35
+ MIXED_KEYS = "mixed_keys"
36
+ PREFIX_HEAVY = "prefix_heavy"
37
+ HIERARCHICAL = "hierarchical"
38
+ FLAT_STRUCTURE = "flat_structure"
39
+ LARGE_DATASET = "large_dataset"
40
+ SMALL_DATASET = "small_dataset"
41
+ FREQUENT_UPDATES = "frequent_updates"
42
+ READ_HEAVY = "read_heavy"
43
+ WRITE_HEAVY = "write_heavy"
44
+ TEMPORAL_DATA = "temporal_data"
45
+ SPATIAL_DATA = "spatial_data"
46
+ GRAPH_STRUCTURE = "graph_structure"
47
+
48
+
49
+ @dataclass
50
+ class StrategyRecommendation:
51
+ """Strategy recommendation with confidence score."""
52
+ mode: Union[NodeMode, EdgeMode]
53
+ confidence: float
54
+ reasoning: str
55
+ estimated_performance_gain: float
56
+ data_loss_risk: bool
57
+ alternative_modes: List[Union[NodeMode, EdgeMode]]
58
+
59
+
60
+ @dataclass
61
+ class DataProfile:
62
+ """Comprehensive data profile for strategy selection."""
63
+ size: int
64
+ depth: int
65
+ key_types: Set[type]
66
+ value_types: Set[type]
67
+ patterns: Set[DataPattern]
68
+ access_pattern: str
69
+ update_frequency: str
70
+ memory_usage_estimate: int
71
+ complexity_score: float
72
+
73
+
74
+ class DataPatternDetector:
75
+ """
76
+ Intelligent data pattern detector that analyzes data characteristics
77
+ to recommend optimal strategies for different use cases.
78
+ """
79
+
80
+ def __init__(self, confidence_threshold: float = 0.7):
81
+ """
82
+ Initialize pattern detector.
83
+
84
+ Args:
85
+ confidence_threshold: Minimum confidence level for recommendations
86
+ """
87
+ self._confidence_threshold = confidence_threshold
88
+ self._pattern_weights = self._build_pattern_weights()
89
+ self._strategy_rules = self._build_strategy_rules()
90
+ self._stats = {
91
+ 'analyses_performed': 0,
92
+ 'recommendations_given': 0,
93
+ 'high_confidence_recommendations': 0,
94
+ 'average_analysis_time': 0.0
95
+ }
96
+
97
+ def analyze_data(self, data: Any, **context: Any) -> DataProfile:
98
+ """
99
+ Analyze data and create a comprehensive profile.
100
+
101
+ Args:
102
+ data: Data to analyze
103
+ **context: Additional context (size, access_pattern, etc.)
104
+
105
+ Returns:
106
+ DataProfile with analysis results
107
+ """
108
+ start_time = time.time()
109
+
110
+ try:
111
+ # Basic characteristics
112
+ size = self._calculate_size(data)
113
+ depth = self._calculate_depth(data)
114
+ key_types, value_types = self._analyze_types(data)
115
+
116
+ # Pattern detection
117
+ patterns = self._detect_patterns(data, context)
118
+
119
+ # Performance characteristics
120
+ access_pattern = context.get('access_pattern', 'mixed')
121
+ update_frequency = context.get('update_frequency', 'moderate')
122
+ memory_usage = self._estimate_memory_usage(data)
123
+ complexity_score = self._calculate_complexity_score(data, patterns)
124
+
125
+ profile = DataProfile(
126
+ size=size,
127
+ depth=depth,
128
+ key_types=key_types,
129
+ value_types=value_types,
130
+ patterns=patterns,
131
+ access_pattern=access_pattern,
132
+ update_frequency=update_frequency,
133
+ memory_usage_estimate=memory_usage,
134
+ complexity_score=complexity_score
135
+ )
136
+
137
+ # Update statistics
138
+ analysis_time = time.time() - start_time
139
+ self._update_stats(analysis_time)
140
+
141
+ logger.debug(f"🔍 Data analysis completed in {analysis_time:.3f}s: {len(patterns)} patterns detected")
142
+ return profile
143
+
144
+ except Exception as e:
145
+ logger.error(f"❌ Data analysis failed: {e}")
146
+ # Return minimal profile
147
+ return DataProfile(
148
+ size=0, depth=0, key_types=set(), value_types=set(),
149
+ patterns=set(), access_pattern='unknown', update_frequency='unknown',
150
+ memory_usage_estimate=0, complexity_score=0.0
151
+ )
152
+
153
+ def recommend_node_strategy(self, profile: DataProfile, **options: Any) -> StrategyRecommendation:
154
+ """
155
+ Recommend optimal node strategy based on data profile.
156
+
157
+ Args:
158
+ profile: Data profile from analysis
159
+ **options: Additional options for recommendation
160
+
161
+ Returns:
162
+ Strategy recommendation with confidence score
163
+ """
164
+ recommendations = []
165
+
166
+ # Apply strategy rules
167
+ for rule in self._strategy_rules['node']:
168
+ confidence = self._evaluate_rule(rule, profile)
169
+ if confidence >= self._confidence_threshold:
170
+ recommendations.append((
171
+ rule['mode'],
172
+ confidence,
173
+ rule['reasoning'],
174
+ rule.get('performance_gain', 0.0),
175
+ rule.get('data_loss_risk', False)
176
+ ))
177
+
178
+ if not recommendations:
179
+ # Fallback to default recommendation
180
+ return StrategyRecommendation(
181
+ mode=NodeMode.HASH_MAP,
182
+ confidence=0.5,
183
+ reasoning="No specific patterns detected, using default hash map strategy",
184
+ estimated_performance_gain=0.0,
185
+ data_loss_risk=False,
186
+ alternative_modes=[NodeMode.ARRAY_LIST, NodeMode.TREE_GRAPH_HYBRID]
187
+ )
188
+
189
+ # Sort by confidence and select best
190
+ recommendations.sort(key=lambda x: x[1], reverse=True)
191
+ best_mode, best_confidence, reasoning, performance_gain, data_loss_risk = recommendations[0]
192
+
193
+ # Get alternatives
194
+ alternatives = [rec[0] for rec in recommendations[1:3]] # Top 2 alternatives
195
+
196
+ self._stats['recommendations_given'] += 1
197
+ if best_confidence >= 0.8:
198
+ self._stats['high_confidence_recommendations'] += 1
199
+
200
+ logger.debug(f"🎯 Node strategy recommendation: {best_mode.name} (confidence: {best_confidence:.2f})")
201
+
202
+ return StrategyRecommendation(
203
+ mode=best_mode,
204
+ confidence=best_confidence,
205
+ reasoning=reasoning,
206
+ estimated_performance_gain=performance_gain,
207
+ data_loss_risk=data_loss_risk,
208
+ alternative_modes=alternatives
209
+ )
210
+
211
+ def recommend_edge_strategy(self, profile: DataProfile, **options: Any) -> StrategyRecommendation:
212
+ """
213
+ Recommend optimal edge strategy based on data profile.
214
+
215
+ Args:
216
+ profile: Data profile from analysis
217
+ **options: Additional options for recommendation
218
+
219
+ Returns:
220
+ Strategy recommendation with confidence score
221
+ """
222
+ recommendations = []
223
+
224
+ # Apply strategy rules
225
+ for rule in self._strategy_rules['edge']:
226
+ confidence = self._evaluate_rule(rule, profile)
227
+ if confidence >= self._confidence_threshold:
228
+ recommendations.append((
229
+ rule['mode'],
230
+ confidence,
231
+ rule['reasoning'],
232
+ rule.get('performance_gain', 0.0),
233
+ rule.get('data_loss_risk', False)
234
+ ))
235
+
236
+ if not recommendations:
237
+ # Fallback to default recommendation
238
+ return StrategyRecommendation(
239
+ mode=EdgeMode.ADJ_LIST,
240
+ confidence=0.5,
241
+ reasoning="No specific patterns detected, using default adjacency list strategy",
242
+ estimated_performance_gain=0.0,
243
+ data_loss_risk=False,
244
+ alternative_modes=[EdgeMode.ADJ_MATRIX]
245
+ )
246
+
247
+ # Sort by confidence and select best
248
+ recommendations.sort(key=lambda x: x[1], reverse=True)
249
+ best_mode, best_confidence, reasoning, performance_gain, data_loss_risk = recommendations[0]
250
+
251
+ # Get alternatives
252
+ alternatives = [rec[0] for rec in recommendations[1:2]] # Top alternative
253
+
254
+ self._stats['recommendations_given'] += 1
255
+ if best_confidence >= 0.8:
256
+ self._stats['high_confidence_recommendations'] += 1
257
+
258
+ logger.debug(f"🎯 Edge strategy recommendation: {best_mode.name} (confidence: {best_confidence:.2f})")
259
+
260
+ return StrategyRecommendation(
261
+ mode=best_mode,
262
+ confidence=best_confidence,
263
+ reasoning=reasoning,
264
+ estimated_performance_gain=performance_gain,
265
+ data_loss_risk=data_loss_risk,
266
+ alternative_modes=alternatives
267
+ )
268
+
269
+ def _calculate_size(self, data: Any) -> int:
270
+ """Calculate the size of the data structure."""
271
+ if isinstance(data, (dict, list)):
272
+ return len(data)
273
+ elif hasattr(data, '__len__'):
274
+ return len(data)
275
+ else:
276
+ return 1
277
+
278
+ def _calculate_depth(self, data: Any, current_depth: int = 0, max_depth: int = 10) -> int:
279
+ """Calculate the maximum nesting depth."""
280
+ if current_depth >= max_depth:
281
+ return current_depth
282
+
283
+ if isinstance(data, dict):
284
+ if not data:
285
+ return current_depth
286
+ return max(self._calculate_depth(v, current_depth + 1, max_depth) for v in data.values())
287
+ elif isinstance(data, list):
288
+ if not data:
289
+ return current_depth
290
+ return max(self._calculate_depth(item, current_depth + 1, max_depth) for item in data)
291
+ else:
292
+ return current_depth
293
+
294
+ def _analyze_types(self, data: Any) -> Tuple[Set[type], Set[type]]:
295
+ """Analyze key and value types in the data."""
296
+ key_types = set()
297
+ value_types = set()
298
+
299
+ if isinstance(data, dict):
300
+ for key, value in data.items():
301
+ key_types.add(type(key))
302
+ value_types.add(type(value))
303
+ elif isinstance(data, list):
304
+ for item in data:
305
+ value_types.add(type(item))
306
+ else:
307
+ value_types.add(type(data))
308
+
309
+ return key_types, value_types
310
+
311
+ def _detect_patterns(self, data: Any, context: Dict[str, Any]) -> Set[DataPattern]:
312
+ """Detect patterns in the data."""
313
+ patterns = set()
314
+
315
+ if isinstance(data, dict):
316
+ keys = list(data.keys())
317
+
318
+ # Check for sequential numeric keys
319
+ if self._is_sequential_numeric_keys(keys):
320
+ patterns.add(DataPattern.SEQUENTIAL_NUMERIC)
321
+
322
+ # Check for string keys
323
+ if all(isinstance(k, str) for k in keys):
324
+ patterns.add(DataPattern.STRING_KEYS)
325
+
326
+ # Check for prefix patterns
327
+ if self._has_prefix_patterns(keys):
328
+ patterns.add(DataPattern.PREFIX_HEAVY)
329
+
330
+ # Check for mixed key types
331
+ if len(set(type(k) for k in keys)) > 1:
332
+ patterns.add(DataPattern.MIXED_KEYS)
333
+
334
+ # Check for hierarchical structure
335
+ if self._is_hierarchical(data):
336
+ patterns.add(DataPattern.HIERARCHICAL)
337
+ else:
338
+ patterns.add(DataPattern.FLAT_STRUCTURE)
339
+
340
+ # Size-based patterns
341
+ size = self._calculate_size(data)
342
+ if size > 1000:
343
+ patterns.add(DataPattern.LARGE_DATASET)
344
+ elif size < 100:
345
+ patterns.add(DataPattern.SMALL_DATASET)
346
+
347
+ # Context-based patterns
348
+ if context.get('update_frequency') == 'high':
349
+ patterns.add(DataPattern.FREQUENT_UPDATES)
350
+ elif context.get('access_pattern') == 'read_heavy':
351
+ patterns.add(DataPattern.READ_HEAVY)
352
+ elif context.get('access_pattern') == 'write_heavy':
353
+ patterns.add(DataPattern.WRITE_HEAVY)
354
+
355
+ return patterns
356
+
357
+ def _is_sequential_numeric_keys(self, keys: List[Any]) -> bool:
358
+ """Check if keys are sequential numeric indices."""
359
+ if not keys:
360
+ return False
361
+
362
+ try:
363
+ # Convert to integers and check if sequential
364
+ int_keys = [int(k) for k in keys if str(k).isdigit()]
365
+ if len(int_keys) != len(keys):
366
+ return False
367
+
368
+ int_keys.sort()
369
+ return int_keys == list(range(len(int_keys)))
370
+ except (ValueError, TypeError):
371
+ return False
372
+
373
+ def _has_prefix_patterns(self, keys: List[str]) -> bool:
374
+ """Check if keys have common prefixes."""
375
+ if len(keys) < 3:
376
+ return False
377
+
378
+ # Find common prefixes
379
+ common_prefixes = set()
380
+ for i, key1 in enumerate(keys):
381
+ for key2 in keys[i+1:]:
382
+ prefix = self._common_prefix(key1, key2)
383
+ if len(prefix) > 2: # Meaningful prefix
384
+ common_prefixes.add(prefix)
385
+
386
+ return len(common_prefixes) > 0
387
+
388
+ def _common_prefix(self, str1: str, str2: str) -> str:
389
+ """Find common prefix between two strings."""
390
+ prefix = ""
391
+ for i in range(min(len(str1), len(str2))):
392
+ if str1[i] == str2[i]:
393
+ prefix += str1[i]
394
+ else:
395
+ break
396
+ return prefix
397
+
398
+ def _is_hierarchical(self, data: Any, max_check: int = 5) -> bool:
399
+ """Check if data has hierarchical structure."""
400
+ if not isinstance(data, dict):
401
+ return False
402
+
403
+ checked = 0
404
+ for value in data.values():
405
+ if checked >= max_check:
406
+ break
407
+ if isinstance(value, (dict, list)):
408
+ return True
409
+ checked += 1
410
+
411
+ return False
412
+
413
+ def _estimate_memory_usage(self, data: Any) -> int:
414
+ """Estimate memory usage in bytes."""
415
+ try:
416
+ import sys
417
+ return sys.getsizeof(data)
418
+ except:
419
+ # Fallback estimation
420
+ size = self._calculate_size(data)
421
+ return size * 50 # Rough estimate: 50 bytes per item
422
+
423
+ def _calculate_complexity_score(self, data: Any, patterns: Set[DataPattern]) -> float:
424
+ """Calculate complexity score (0.0 to 1.0)."""
425
+ score = 0.0
426
+
427
+ # Base complexity from size
428
+ size = self._calculate_size(data)
429
+ if size > 10000:
430
+ score += 0.3
431
+ elif size > 1000:
432
+ score += 0.2
433
+ elif size > 100:
434
+ score += 0.1
435
+
436
+ # Pattern-based complexity
437
+ if DataPattern.MIXED_KEYS in patterns:
438
+ score += 0.2
439
+ if DataPattern.HIERARCHICAL in patterns:
440
+ score += 0.2
441
+ if DataPattern.PREFIX_HEAVY in patterns:
442
+ score += 0.1
443
+ if DataPattern.FREQUENT_UPDATES in patterns:
444
+ score += 0.1
445
+
446
+ return min(score, 1.0)
447
+
448
+ def _build_pattern_weights(self) -> Dict[DataPattern, float]:
449
+ """Build weights for different patterns."""
450
+ return {
451
+ DataPattern.SEQUENTIAL_NUMERIC: 0.9,
452
+ DataPattern.STRING_KEYS: 0.7,
453
+ DataPattern.PREFIX_HEAVY: 0.8,
454
+ DataPattern.HIERARCHICAL: 0.6,
455
+ DataPattern.LARGE_DATASET: 0.5,
456
+ DataPattern.SMALL_DATASET: 0.3,
457
+ DataPattern.FREQUENT_UPDATES: 0.4,
458
+ DataPattern.READ_HEAVY: 0.3,
459
+ DataPattern.WRITE_HEAVY: 0.4,
460
+ }
461
+
462
+ def _build_strategy_rules(self) -> Dict[str, List[Dict[str, Any]]]:
463
+ """Build strategy selection rules."""
464
+ return {
465
+ 'node': [
466
+ {
467
+ 'mode': NodeMode.ARRAY_LIST,
468
+ 'conditions': [DataPattern.SEQUENTIAL_NUMERIC, DataPattern.SMALL_DATASET],
469
+ 'reasoning': 'Sequential numeric keys with small dataset - optimal for array list',
470
+ 'performance_gain': 0.3,
471
+ 'data_loss_risk': False
472
+ },
473
+ {
474
+ 'mode': NodeMode.HASH_MAP,
475
+ 'conditions': [DataPattern.STRING_KEYS, DataPattern.FLAT_STRUCTURE],
476
+ 'reasoning': 'String keys with flat structure - optimal for hash map',
477
+ 'performance_gain': 0.2,
478
+ 'data_loss_risk': False
479
+ },
480
+ {
481
+ 'mode': NodeMode.TREE_GRAPH_HYBRID,
482
+ 'conditions': [DataPattern.PREFIX_HEAVY, DataPattern.HIERARCHICAL],
483
+ 'reasoning': 'Prefix-heavy hierarchical data - optimal for tree structure',
484
+ 'performance_gain': 0.4,
485
+ 'data_loss_risk': False
486
+ },
487
+ {
488
+ 'mode': NodeMode.HASH_MAP,
489
+ 'conditions': [DataPattern.LARGE_DATASET, DataPattern.READ_HEAVY],
490
+ 'reasoning': 'Large dataset with read-heavy access - optimized for data interchange',
491
+ 'performance_gain': 0.5,
492
+ 'data_loss_risk': False
493
+ }
494
+ ],
495
+ 'edge': [
496
+ {
497
+ 'mode': EdgeMode.ADJ_LIST,
498
+ 'conditions': [DataPattern.GRAPH_STRUCTURE],
499
+ 'reasoning': 'Graph structure detected - optimal for adjacency list',
500
+ 'performance_gain': 0.3,
501
+ 'data_loss_risk': False
502
+ },
503
+ {
504
+ 'mode': EdgeMode.ADJ_MATRIX,
505
+ 'conditions': [DataPattern.LARGE_DATASET, DataPattern.SPATIAL_DATA],
506
+ 'reasoning': 'Large spatial dataset - optimal for adjacency matrix',
507
+ 'performance_gain': 0.2,
508
+ 'data_loss_risk': False
509
+ }
510
+ ]
511
+ }
512
+
513
+ def _evaluate_rule(self, rule: Dict[str, Any], profile: DataProfile) -> float:
514
+ """Evaluate how well a rule matches the profile."""
515
+ conditions = rule.get('conditions', [])
516
+ if not conditions:
517
+ return 0.0
518
+
519
+ matches = 0
520
+ total_conditions = len(conditions)
521
+
522
+ for condition in conditions:
523
+ if condition in profile.patterns:
524
+ matches += 1
525
+
526
+ # Base confidence from pattern matches
527
+ confidence = matches / total_conditions
528
+
529
+ # Adjust based on data characteristics
530
+ if profile.size > 1000 and DataPattern.LARGE_DATASET in conditions:
531
+ confidence += 0.1
532
+ elif profile.size < 100 and DataPattern.SMALL_DATASET in conditions:
533
+ confidence += 0.1
534
+
535
+ return min(confidence, 1.0)
536
+
537
+ def _update_stats(self, analysis_time: float) -> None:
538
+ """Update internal statistics."""
539
+ self._stats['analyses_performed'] += 1
540
+
541
+ # Update average analysis time
542
+ total_time = self._stats['average_analysis_time'] * (self._stats['analyses_performed'] - 1)
543
+ self._stats['average_analysis_time'] = (total_time + analysis_time) / self._stats['analyses_performed']
544
+
545
+ def get_stats(self) -> Dict[str, Any]:
546
+ """Get detector statistics."""
547
+ return self._stats.copy()
548
+
549
+
550
+ # Global detector instance
551
+ _detector_instance: Optional[DataPatternDetector] = None
552
+ _detector_lock = threading.Lock()
553
+
554
+
555
+ def get_detector() -> DataPatternDetector:
556
+ """
557
+ Get the global pattern detector instance.
558
+
559
+ Returns:
560
+ Global DataPatternDetector instance
561
+ """
562
+ global _detector_instance
563
+
564
+ if _detector_instance is None:
565
+ with _detector_lock:
566
+ if _detector_instance is None:
567
+ _detector_instance = DataPatternDetector()
568
+ logger.info("🔍 Initialized global data pattern detector")
569
+
570
+ return _detector_instance
571
+
572
+
573
+ def analyze_data_patterns(data: Any, **context: Any) -> DataProfile:
574
+ """
575
+ Analyze data patterns using the global detector.
576
+
577
+ Args:
578
+ data: Data to analyze
579
+ **context: Additional context
580
+
581
+ Returns:
582
+ Data profile
583
+ """
584
+ return get_detector().analyze_data(data, **context)
585
+
586
+
587
+ def recommend_strategy(profile: DataProfile, strategy_type: str = 'node', **options: Any) -> StrategyRecommendation:
588
+ """
589
+ Get strategy recommendation using the global detector.
590
+
591
+ Args:
592
+ profile: Data profile
593
+ strategy_type: 'node' or 'edge'
594
+ **options: Additional options
595
+
596
+ Returns:
597
+ Strategy recommendation
598
+ """
599
+ detector = get_detector()
600
+ if strategy_type == 'node':
601
+ return detector.recommend_node_strategy(profile, **options)
602
+ else:
603
+ return detector.recommend_edge_strategy(profile, **options)