exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,603 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
#exonware/xwnode/src/exonware/xwnode/strategies/pattern_detector.py
|
3
|
+
"""
|
4
|
+
Data Pattern Detector for Strategy Selection
|
5
|
+
|
6
|
+
Intelligent pattern detection that analyzes data characteristics to recommend
|
7
|
+
the optimal strategy for different use cases. This enhances the AUTO mode
|
8
|
+
selection with sophisticated heuristics.
|
9
|
+
|
10
|
+
Company: eXonware.com
|
11
|
+
Author: Eng. Muhammad AlShehri
|
12
|
+
Email: connect@exonware.com
|
13
|
+
Version: 0.0.1.12
|
14
|
+
Generation Date: 07-Sep-2025
|
15
|
+
"""
|
16
|
+
|
17
|
+
import re
|
18
|
+
import time
|
19
|
+
import threading
|
20
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
21
|
+
from dataclasses import dataclass
|
22
|
+
from enum import Enum
|
23
|
+
from exonware.xwsystem import get_logger
|
24
|
+
|
25
|
+
logger = get_logger(__name__)
|
26
|
+
|
27
|
+
from ..types import NodeMode, EdgeMode, NodeTrait, EdgeTrait
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
class DataPattern(Enum):
|
32
|
+
"""Data pattern types for strategy selection."""
|
33
|
+
SEQUENTIAL_NUMERIC = "sequential_numeric"
|
34
|
+
STRING_KEYS = "string_keys"
|
35
|
+
MIXED_KEYS = "mixed_keys"
|
36
|
+
PREFIX_HEAVY = "prefix_heavy"
|
37
|
+
HIERARCHICAL = "hierarchical"
|
38
|
+
FLAT_STRUCTURE = "flat_structure"
|
39
|
+
LARGE_DATASET = "large_dataset"
|
40
|
+
SMALL_DATASET = "small_dataset"
|
41
|
+
FREQUENT_UPDATES = "frequent_updates"
|
42
|
+
READ_HEAVY = "read_heavy"
|
43
|
+
WRITE_HEAVY = "write_heavy"
|
44
|
+
TEMPORAL_DATA = "temporal_data"
|
45
|
+
SPATIAL_DATA = "spatial_data"
|
46
|
+
GRAPH_STRUCTURE = "graph_structure"
|
47
|
+
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class StrategyRecommendation:
|
51
|
+
"""Strategy recommendation with confidence score."""
|
52
|
+
mode: Union[NodeMode, EdgeMode]
|
53
|
+
confidence: float
|
54
|
+
reasoning: str
|
55
|
+
estimated_performance_gain: float
|
56
|
+
data_loss_risk: bool
|
57
|
+
alternative_modes: List[Union[NodeMode, EdgeMode]]
|
58
|
+
|
59
|
+
|
60
|
+
@dataclass
|
61
|
+
class DataProfile:
|
62
|
+
"""Comprehensive data profile for strategy selection."""
|
63
|
+
size: int
|
64
|
+
depth: int
|
65
|
+
key_types: Set[type]
|
66
|
+
value_types: Set[type]
|
67
|
+
patterns: Set[DataPattern]
|
68
|
+
access_pattern: str
|
69
|
+
update_frequency: str
|
70
|
+
memory_usage_estimate: int
|
71
|
+
complexity_score: float
|
72
|
+
|
73
|
+
|
74
|
+
class DataPatternDetector:
|
75
|
+
"""
|
76
|
+
Intelligent data pattern detector that analyzes data characteristics
|
77
|
+
to recommend optimal strategies for different use cases.
|
78
|
+
"""
|
79
|
+
|
80
|
+
def __init__(self, confidence_threshold: float = 0.7):
|
81
|
+
"""
|
82
|
+
Initialize pattern detector.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
confidence_threshold: Minimum confidence level for recommendations
|
86
|
+
"""
|
87
|
+
self._confidence_threshold = confidence_threshold
|
88
|
+
self._pattern_weights = self._build_pattern_weights()
|
89
|
+
self._strategy_rules = self._build_strategy_rules()
|
90
|
+
self._stats = {
|
91
|
+
'analyses_performed': 0,
|
92
|
+
'recommendations_given': 0,
|
93
|
+
'high_confidence_recommendations': 0,
|
94
|
+
'average_analysis_time': 0.0
|
95
|
+
}
|
96
|
+
|
97
|
+
def analyze_data(self, data: Any, **context: Any) -> DataProfile:
|
98
|
+
"""
|
99
|
+
Analyze data and create a comprehensive profile.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
data: Data to analyze
|
103
|
+
**context: Additional context (size, access_pattern, etc.)
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
DataProfile with analysis results
|
107
|
+
"""
|
108
|
+
start_time = time.time()
|
109
|
+
|
110
|
+
try:
|
111
|
+
# Basic characteristics
|
112
|
+
size = self._calculate_size(data)
|
113
|
+
depth = self._calculate_depth(data)
|
114
|
+
key_types, value_types = self._analyze_types(data)
|
115
|
+
|
116
|
+
# Pattern detection
|
117
|
+
patterns = self._detect_patterns(data, context)
|
118
|
+
|
119
|
+
# Performance characteristics
|
120
|
+
access_pattern = context.get('access_pattern', 'mixed')
|
121
|
+
update_frequency = context.get('update_frequency', 'moderate')
|
122
|
+
memory_usage = self._estimate_memory_usage(data)
|
123
|
+
complexity_score = self._calculate_complexity_score(data, patterns)
|
124
|
+
|
125
|
+
profile = DataProfile(
|
126
|
+
size=size,
|
127
|
+
depth=depth,
|
128
|
+
key_types=key_types,
|
129
|
+
value_types=value_types,
|
130
|
+
patterns=patterns,
|
131
|
+
access_pattern=access_pattern,
|
132
|
+
update_frequency=update_frequency,
|
133
|
+
memory_usage_estimate=memory_usage,
|
134
|
+
complexity_score=complexity_score
|
135
|
+
)
|
136
|
+
|
137
|
+
# Update statistics
|
138
|
+
analysis_time = time.time() - start_time
|
139
|
+
self._update_stats(analysis_time)
|
140
|
+
|
141
|
+
logger.debug(f"🔍 Data analysis completed in {analysis_time:.3f}s: {len(patterns)} patterns detected")
|
142
|
+
return profile
|
143
|
+
|
144
|
+
except Exception as e:
|
145
|
+
logger.error(f"❌ Data analysis failed: {e}")
|
146
|
+
# Return minimal profile
|
147
|
+
return DataProfile(
|
148
|
+
size=0, depth=0, key_types=set(), value_types=set(),
|
149
|
+
patterns=set(), access_pattern='unknown', update_frequency='unknown',
|
150
|
+
memory_usage_estimate=0, complexity_score=0.0
|
151
|
+
)
|
152
|
+
|
153
|
+
def recommend_node_strategy(self, profile: DataProfile, **options: Any) -> StrategyRecommendation:
|
154
|
+
"""
|
155
|
+
Recommend optimal node strategy based on data profile.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
profile: Data profile from analysis
|
159
|
+
**options: Additional options for recommendation
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
Strategy recommendation with confidence score
|
163
|
+
"""
|
164
|
+
recommendations = []
|
165
|
+
|
166
|
+
# Apply strategy rules
|
167
|
+
for rule in self._strategy_rules['node']:
|
168
|
+
confidence = self._evaluate_rule(rule, profile)
|
169
|
+
if confidence >= self._confidence_threshold:
|
170
|
+
recommendations.append((
|
171
|
+
rule['mode'],
|
172
|
+
confidence,
|
173
|
+
rule['reasoning'],
|
174
|
+
rule.get('performance_gain', 0.0),
|
175
|
+
rule.get('data_loss_risk', False)
|
176
|
+
))
|
177
|
+
|
178
|
+
if not recommendations:
|
179
|
+
# Fallback to default recommendation
|
180
|
+
return StrategyRecommendation(
|
181
|
+
mode=NodeMode.HASH_MAP,
|
182
|
+
confidence=0.5,
|
183
|
+
reasoning="No specific patterns detected, using default hash map strategy",
|
184
|
+
estimated_performance_gain=0.0,
|
185
|
+
data_loss_risk=False,
|
186
|
+
alternative_modes=[NodeMode.ARRAY_LIST, NodeMode.TREE_GRAPH_HYBRID]
|
187
|
+
)
|
188
|
+
|
189
|
+
# Sort by confidence and select best
|
190
|
+
recommendations.sort(key=lambda x: x[1], reverse=True)
|
191
|
+
best_mode, best_confidence, reasoning, performance_gain, data_loss_risk = recommendations[0]
|
192
|
+
|
193
|
+
# Get alternatives
|
194
|
+
alternatives = [rec[0] for rec in recommendations[1:3]] # Top 2 alternatives
|
195
|
+
|
196
|
+
self._stats['recommendations_given'] += 1
|
197
|
+
if best_confidence >= 0.8:
|
198
|
+
self._stats['high_confidence_recommendations'] += 1
|
199
|
+
|
200
|
+
logger.debug(f"🎯 Node strategy recommendation: {best_mode.name} (confidence: {best_confidence:.2f})")
|
201
|
+
|
202
|
+
return StrategyRecommendation(
|
203
|
+
mode=best_mode,
|
204
|
+
confidence=best_confidence,
|
205
|
+
reasoning=reasoning,
|
206
|
+
estimated_performance_gain=performance_gain,
|
207
|
+
data_loss_risk=data_loss_risk,
|
208
|
+
alternative_modes=alternatives
|
209
|
+
)
|
210
|
+
|
211
|
+
def recommend_edge_strategy(self, profile: DataProfile, **options: Any) -> StrategyRecommendation:
|
212
|
+
"""
|
213
|
+
Recommend optimal edge strategy based on data profile.
|
214
|
+
|
215
|
+
Args:
|
216
|
+
profile: Data profile from analysis
|
217
|
+
**options: Additional options for recommendation
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
Strategy recommendation with confidence score
|
221
|
+
"""
|
222
|
+
recommendations = []
|
223
|
+
|
224
|
+
# Apply strategy rules
|
225
|
+
for rule in self._strategy_rules['edge']:
|
226
|
+
confidence = self._evaluate_rule(rule, profile)
|
227
|
+
if confidence >= self._confidence_threshold:
|
228
|
+
recommendations.append((
|
229
|
+
rule['mode'],
|
230
|
+
confidence,
|
231
|
+
rule['reasoning'],
|
232
|
+
rule.get('performance_gain', 0.0),
|
233
|
+
rule.get('data_loss_risk', False)
|
234
|
+
))
|
235
|
+
|
236
|
+
if not recommendations:
|
237
|
+
# Fallback to default recommendation
|
238
|
+
return StrategyRecommendation(
|
239
|
+
mode=EdgeMode.ADJ_LIST,
|
240
|
+
confidence=0.5,
|
241
|
+
reasoning="No specific patterns detected, using default adjacency list strategy",
|
242
|
+
estimated_performance_gain=0.0,
|
243
|
+
data_loss_risk=False,
|
244
|
+
alternative_modes=[EdgeMode.ADJ_MATRIX]
|
245
|
+
)
|
246
|
+
|
247
|
+
# Sort by confidence and select best
|
248
|
+
recommendations.sort(key=lambda x: x[1], reverse=True)
|
249
|
+
best_mode, best_confidence, reasoning, performance_gain, data_loss_risk = recommendations[0]
|
250
|
+
|
251
|
+
# Get alternatives
|
252
|
+
alternatives = [rec[0] for rec in recommendations[1:2]] # Top alternative
|
253
|
+
|
254
|
+
self._stats['recommendations_given'] += 1
|
255
|
+
if best_confidence >= 0.8:
|
256
|
+
self._stats['high_confidence_recommendations'] += 1
|
257
|
+
|
258
|
+
logger.debug(f"🎯 Edge strategy recommendation: {best_mode.name} (confidence: {best_confidence:.2f})")
|
259
|
+
|
260
|
+
return StrategyRecommendation(
|
261
|
+
mode=best_mode,
|
262
|
+
confidence=best_confidence,
|
263
|
+
reasoning=reasoning,
|
264
|
+
estimated_performance_gain=performance_gain,
|
265
|
+
data_loss_risk=data_loss_risk,
|
266
|
+
alternative_modes=alternatives
|
267
|
+
)
|
268
|
+
|
269
|
+
def _calculate_size(self, data: Any) -> int:
|
270
|
+
"""Calculate the size of the data structure."""
|
271
|
+
if isinstance(data, (dict, list)):
|
272
|
+
return len(data)
|
273
|
+
elif hasattr(data, '__len__'):
|
274
|
+
return len(data)
|
275
|
+
else:
|
276
|
+
return 1
|
277
|
+
|
278
|
+
def _calculate_depth(self, data: Any, current_depth: int = 0, max_depth: int = 10) -> int:
|
279
|
+
"""Calculate the maximum nesting depth."""
|
280
|
+
if current_depth >= max_depth:
|
281
|
+
return current_depth
|
282
|
+
|
283
|
+
if isinstance(data, dict):
|
284
|
+
if not data:
|
285
|
+
return current_depth
|
286
|
+
return max(self._calculate_depth(v, current_depth + 1, max_depth) for v in data.values())
|
287
|
+
elif isinstance(data, list):
|
288
|
+
if not data:
|
289
|
+
return current_depth
|
290
|
+
return max(self._calculate_depth(item, current_depth + 1, max_depth) for item in data)
|
291
|
+
else:
|
292
|
+
return current_depth
|
293
|
+
|
294
|
+
def _analyze_types(self, data: Any) -> Tuple[Set[type], Set[type]]:
|
295
|
+
"""Analyze key and value types in the data."""
|
296
|
+
key_types = set()
|
297
|
+
value_types = set()
|
298
|
+
|
299
|
+
if isinstance(data, dict):
|
300
|
+
for key, value in data.items():
|
301
|
+
key_types.add(type(key))
|
302
|
+
value_types.add(type(value))
|
303
|
+
elif isinstance(data, list):
|
304
|
+
for item in data:
|
305
|
+
value_types.add(type(item))
|
306
|
+
else:
|
307
|
+
value_types.add(type(data))
|
308
|
+
|
309
|
+
return key_types, value_types
|
310
|
+
|
311
|
+
def _detect_patterns(self, data: Any, context: Dict[str, Any]) -> Set[DataPattern]:
|
312
|
+
"""Detect patterns in the data."""
|
313
|
+
patterns = set()
|
314
|
+
|
315
|
+
if isinstance(data, dict):
|
316
|
+
keys = list(data.keys())
|
317
|
+
|
318
|
+
# Check for sequential numeric keys
|
319
|
+
if self._is_sequential_numeric_keys(keys):
|
320
|
+
patterns.add(DataPattern.SEQUENTIAL_NUMERIC)
|
321
|
+
|
322
|
+
# Check for string keys
|
323
|
+
if all(isinstance(k, str) for k in keys):
|
324
|
+
patterns.add(DataPattern.STRING_KEYS)
|
325
|
+
|
326
|
+
# Check for prefix patterns
|
327
|
+
if self._has_prefix_patterns(keys):
|
328
|
+
patterns.add(DataPattern.PREFIX_HEAVY)
|
329
|
+
|
330
|
+
# Check for mixed key types
|
331
|
+
if len(set(type(k) for k in keys)) > 1:
|
332
|
+
patterns.add(DataPattern.MIXED_KEYS)
|
333
|
+
|
334
|
+
# Check for hierarchical structure
|
335
|
+
if self._is_hierarchical(data):
|
336
|
+
patterns.add(DataPattern.HIERARCHICAL)
|
337
|
+
else:
|
338
|
+
patterns.add(DataPattern.FLAT_STRUCTURE)
|
339
|
+
|
340
|
+
# Size-based patterns
|
341
|
+
size = self._calculate_size(data)
|
342
|
+
if size > 1000:
|
343
|
+
patterns.add(DataPattern.LARGE_DATASET)
|
344
|
+
elif size < 100:
|
345
|
+
patterns.add(DataPattern.SMALL_DATASET)
|
346
|
+
|
347
|
+
# Context-based patterns
|
348
|
+
if context.get('update_frequency') == 'high':
|
349
|
+
patterns.add(DataPattern.FREQUENT_UPDATES)
|
350
|
+
elif context.get('access_pattern') == 'read_heavy':
|
351
|
+
patterns.add(DataPattern.READ_HEAVY)
|
352
|
+
elif context.get('access_pattern') == 'write_heavy':
|
353
|
+
patterns.add(DataPattern.WRITE_HEAVY)
|
354
|
+
|
355
|
+
return patterns
|
356
|
+
|
357
|
+
def _is_sequential_numeric_keys(self, keys: List[Any]) -> bool:
|
358
|
+
"""Check if keys are sequential numeric indices."""
|
359
|
+
if not keys:
|
360
|
+
return False
|
361
|
+
|
362
|
+
try:
|
363
|
+
# Convert to integers and check if sequential
|
364
|
+
int_keys = [int(k) for k in keys if str(k).isdigit()]
|
365
|
+
if len(int_keys) != len(keys):
|
366
|
+
return False
|
367
|
+
|
368
|
+
int_keys.sort()
|
369
|
+
return int_keys == list(range(len(int_keys)))
|
370
|
+
except (ValueError, TypeError):
|
371
|
+
return False
|
372
|
+
|
373
|
+
def _has_prefix_patterns(self, keys: List[str]) -> bool:
|
374
|
+
"""Check if keys have common prefixes."""
|
375
|
+
if len(keys) < 3:
|
376
|
+
return False
|
377
|
+
|
378
|
+
# Find common prefixes
|
379
|
+
common_prefixes = set()
|
380
|
+
for i, key1 in enumerate(keys):
|
381
|
+
for key2 in keys[i+1:]:
|
382
|
+
prefix = self._common_prefix(key1, key2)
|
383
|
+
if len(prefix) > 2: # Meaningful prefix
|
384
|
+
common_prefixes.add(prefix)
|
385
|
+
|
386
|
+
return len(common_prefixes) > 0
|
387
|
+
|
388
|
+
def _common_prefix(self, str1: str, str2: str) -> str:
|
389
|
+
"""Find common prefix between two strings."""
|
390
|
+
prefix = ""
|
391
|
+
for i in range(min(len(str1), len(str2))):
|
392
|
+
if str1[i] == str2[i]:
|
393
|
+
prefix += str1[i]
|
394
|
+
else:
|
395
|
+
break
|
396
|
+
return prefix
|
397
|
+
|
398
|
+
def _is_hierarchical(self, data: Any, max_check: int = 5) -> bool:
|
399
|
+
"""Check if data has hierarchical structure."""
|
400
|
+
if not isinstance(data, dict):
|
401
|
+
return False
|
402
|
+
|
403
|
+
checked = 0
|
404
|
+
for value in data.values():
|
405
|
+
if checked >= max_check:
|
406
|
+
break
|
407
|
+
if isinstance(value, (dict, list)):
|
408
|
+
return True
|
409
|
+
checked += 1
|
410
|
+
|
411
|
+
return False
|
412
|
+
|
413
|
+
def _estimate_memory_usage(self, data: Any) -> int:
|
414
|
+
"""Estimate memory usage in bytes."""
|
415
|
+
try:
|
416
|
+
import sys
|
417
|
+
return sys.getsizeof(data)
|
418
|
+
except:
|
419
|
+
# Fallback estimation
|
420
|
+
size = self._calculate_size(data)
|
421
|
+
return size * 50 # Rough estimate: 50 bytes per item
|
422
|
+
|
423
|
+
def _calculate_complexity_score(self, data: Any, patterns: Set[DataPattern]) -> float:
|
424
|
+
"""Calculate complexity score (0.0 to 1.0)."""
|
425
|
+
score = 0.0
|
426
|
+
|
427
|
+
# Base complexity from size
|
428
|
+
size = self._calculate_size(data)
|
429
|
+
if size > 10000:
|
430
|
+
score += 0.3
|
431
|
+
elif size > 1000:
|
432
|
+
score += 0.2
|
433
|
+
elif size > 100:
|
434
|
+
score += 0.1
|
435
|
+
|
436
|
+
# Pattern-based complexity
|
437
|
+
if DataPattern.MIXED_KEYS in patterns:
|
438
|
+
score += 0.2
|
439
|
+
if DataPattern.HIERARCHICAL in patterns:
|
440
|
+
score += 0.2
|
441
|
+
if DataPattern.PREFIX_HEAVY in patterns:
|
442
|
+
score += 0.1
|
443
|
+
if DataPattern.FREQUENT_UPDATES in patterns:
|
444
|
+
score += 0.1
|
445
|
+
|
446
|
+
return min(score, 1.0)
|
447
|
+
|
448
|
+
def _build_pattern_weights(self) -> Dict[DataPattern, float]:
|
449
|
+
"""Build weights for different patterns."""
|
450
|
+
return {
|
451
|
+
DataPattern.SEQUENTIAL_NUMERIC: 0.9,
|
452
|
+
DataPattern.STRING_KEYS: 0.7,
|
453
|
+
DataPattern.PREFIX_HEAVY: 0.8,
|
454
|
+
DataPattern.HIERARCHICAL: 0.6,
|
455
|
+
DataPattern.LARGE_DATASET: 0.5,
|
456
|
+
DataPattern.SMALL_DATASET: 0.3,
|
457
|
+
DataPattern.FREQUENT_UPDATES: 0.4,
|
458
|
+
DataPattern.READ_HEAVY: 0.3,
|
459
|
+
DataPattern.WRITE_HEAVY: 0.4,
|
460
|
+
}
|
461
|
+
|
462
|
+
def _build_strategy_rules(self) -> Dict[str, List[Dict[str, Any]]]:
|
463
|
+
"""Build strategy selection rules."""
|
464
|
+
return {
|
465
|
+
'node': [
|
466
|
+
{
|
467
|
+
'mode': NodeMode.ARRAY_LIST,
|
468
|
+
'conditions': [DataPattern.SEQUENTIAL_NUMERIC, DataPattern.SMALL_DATASET],
|
469
|
+
'reasoning': 'Sequential numeric keys with small dataset - optimal for array list',
|
470
|
+
'performance_gain': 0.3,
|
471
|
+
'data_loss_risk': False
|
472
|
+
},
|
473
|
+
{
|
474
|
+
'mode': NodeMode.HASH_MAP,
|
475
|
+
'conditions': [DataPattern.STRING_KEYS, DataPattern.FLAT_STRUCTURE],
|
476
|
+
'reasoning': 'String keys with flat structure - optimal for hash map',
|
477
|
+
'performance_gain': 0.2,
|
478
|
+
'data_loss_risk': False
|
479
|
+
},
|
480
|
+
{
|
481
|
+
'mode': NodeMode.TREE_GRAPH_HYBRID,
|
482
|
+
'conditions': [DataPattern.PREFIX_HEAVY, DataPattern.HIERARCHICAL],
|
483
|
+
'reasoning': 'Prefix-heavy hierarchical data - optimal for tree structure',
|
484
|
+
'performance_gain': 0.4,
|
485
|
+
'data_loss_risk': False
|
486
|
+
},
|
487
|
+
{
|
488
|
+
'mode': NodeMode.HASH_MAP,
|
489
|
+
'conditions': [DataPattern.LARGE_DATASET, DataPattern.READ_HEAVY],
|
490
|
+
'reasoning': 'Large dataset with read-heavy access - optimized for data interchange',
|
491
|
+
'performance_gain': 0.5,
|
492
|
+
'data_loss_risk': False
|
493
|
+
}
|
494
|
+
],
|
495
|
+
'edge': [
|
496
|
+
{
|
497
|
+
'mode': EdgeMode.ADJ_LIST,
|
498
|
+
'conditions': [DataPattern.GRAPH_STRUCTURE],
|
499
|
+
'reasoning': 'Graph structure detected - optimal for adjacency list',
|
500
|
+
'performance_gain': 0.3,
|
501
|
+
'data_loss_risk': False
|
502
|
+
},
|
503
|
+
{
|
504
|
+
'mode': EdgeMode.ADJ_MATRIX,
|
505
|
+
'conditions': [DataPattern.LARGE_DATASET, DataPattern.SPATIAL_DATA],
|
506
|
+
'reasoning': 'Large spatial dataset - optimal for adjacency matrix',
|
507
|
+
'performance_gain': 0.2,
|
508
|
+
'data_loss_risk': False
|
509
|
+
}
|
510
|
+
]
|
511
|
+
}
|
512
|
+
|
513
|
+
def _evaluate_rule(self, rule: Dict[str, Any], profile: DataProfile) -> float:
|
514
|
+
"""Evaluate how well a rule matches the profile."""
|
515
|
+
conditions = rule.get('conditions', [])
|
516
|
+
if not conditions:
|
517
|
+
return 0.0
|
518
|
+
|
519
|
+
matches = 0
|
520
|
+
total_conditions = len(conditions)
|
521
|
+
|
522
|
+
for condition in conditions:
|
523
|
+
if condition in profile.patterns:
|
524
|
+
matches += 1
|
525
|
+
|
526
|
+
# Base confidence from pattern matches
|
527
|
+
confidence = matches / total_conditions
|
528
|
+
|
529
|
+
# Adjust based on data characteristics
|
530
|
+
if profile.size > 1000 and DataPattern.LARGE_DATASET in conditions:
|
531
|
+
confidence += 0.1
|
532
|
+
elif profile.size < 100 and DataPattern.SMALL_DATASET in conditions:
|
533
|
+
confidence += 0.1
|
534
|
+
|
535
|
+
return min(confidence, 1.0)
|
536
|
+
|
537
|
+
def _update_stats(self, analysis_time: float) -> None:
|
538
|
+
"""Update internal statistics."""
|
539
|
+
self._stats['analyses_performed'] += 1
|
540
|
+
|
541
|
+
# Update average analysis time
|
542
|
+
total_time = self._stats['average_analysis_time'] * (self._stats['analyses_performed'] - 1)
|
543
|
+
self._stats['average_analysis_time'] = (total_time + analysis_time) / self._stats['analyses_performed']
|
544
|
+
|
545
|
+
def get_stats(self) -> Dict[str, Any]:
|
546
|
+
"""Get detector statistics."""
|
547
|
+
return self._stats.copy()
|
548
|
+
|
549
|
+
|
550
|
+
# Global detector instance
|
551
|
+
_detector_instance: Optional[DataPatternDetector] = None
|
552
|
+
_detector_lock = threading.Lock()
|
553
|
+
|
554
|
+
|
555
|
+
def get_detector() -> DataPatternDetector:
|
556
|
+
"""
|
557
|
+
Get the global pattern detector instance.
|
558
|
+
|
559
|
+
Returns:
|
560
|
+
Global DataPatternDetector instance
|
561
|
+
"""
|
562
|
+
global _detector_instance
|
563
|
+
|
564
|
+
if _detector_instance is None:
|
565
|
+
with _detector_lock:
|
566
|
+
if _detector_instance is None:
|
567
|
+
_detector_instance = DataPatternDetector()
|
568
|
+
logger.info("🔍 Initialized global data pattern detector")
|
569
|
+
|
570
|
+
return _detector_instance
|
571
|
+
|
572
|
+
|
573
|
+
def analyze_data_patterns(data: Any, **context: Any) -> DataProfile:
|
574
|
+
"""
|
575
|
+
Analyze data patterns using the global detector.
|
576
|
+
|
577
|
+
Args:
|
578
|
+
data: Data to analyze
|
579
|
+
**context: Additional context
|
580
|
+
|
581
|
+
Returns:
|
582
|
+
Data profile
|
583
|
+
"""
|
584
|
+
return get_detector().analyze_data(data, **context)
|
585
|
+
|
586
|
+
|
587
|
+
def recommend_strategy(profile: DataProfile, strategy_type: str = 'node', **options: Any) -> StrategyRecommendation:
|
588
|
+
"""
|
589
|
+
Get strategy recommendation using the global detector.
|
590
|
+
|
591
|
+
Args:
|
592
|
+
profile: Data profile
|
593
|
+
strategy_type: 'node' or 'edge'
|
594
|
+
**options: Additional options
|
595
|
+
|
596
|
+
Returns:
|
597
|
+
Strategy recommendation
|
598
|
+
"""
|
599
|
+
detector = get_detector()
|
600
|
+
if strategy_type == 'node':
|
601
|
+
return detector.recommend_node_strategy(profile, **options)
|
602
|
+
else:
|
603
|
+
return detector.recommend_edge_strategy(profile, **options)
|