code2llm 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +23 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/pipeline_detector.py +445 -0
- code2flow/analysis/side_effects.py +313 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/analysis/type_inference.py +306 -0
- code2flow/cli.py +493 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +35 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/context_exporter.py +207 -0
- code2flow/exporters/flow_exporter.py +570 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +12 -0
- code2flow/exporters/map_exporter.py +218 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +982 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2llm-0.3.7.dist-info/METADATA +604 -0
- code2llm-0.3.7.dist-info/RECORD +49 -0
- code2llm-0.3.7.dist-info/WHEEL +5 -0
- code2llm-0.3.7.dist-info/entry_points.txt +2 -0
- code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
- code2llm-0.3.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Query Normalization - Steps 1a-1e.
|
|
2
|
+
|
|
3
|
+
1a. Lowercase conversion
|
|
4
|
+
1b. Punctuation removal
|
|
5
|
+
1c. Whitespace normalization
|
|
6
|
+
1d. Unicode normalization (NFKC)
|
|
7
|
+
1e. Stopword removal (optional)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import unicodedata
|
|
12
|
+
from typing import List, Dict, Optional
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
from .config import NormalizationConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class NormalizationResult:
|
|
20
|
+
"""Result of query normalization."""
|
|
21
|
+
original: str
|
|
22
|
+
normalized: str
|
|
23
|
+
tokens: List[str] = field(default_factory=list)
|
|
24
|
+
language: str = "en"
|
|
25
|
+
steps_applied: List[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class QueryNormalizer:
|
|
29
|
+
"""Normalize queries for consistent processing."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, config: Optional[NormalizationConfig] = None):
|
|
32
|
+
self.config = config or NormalizationConfig()
|
|
33
|
+
|
|
34
|
+
def normalize(self, query: str, language: str = "en") -> NormalizationResult:
|
|
35
|
+
"""Apply full normalization pipeline (1a-1e)."""
|
|
36
|
+
result = NormalizationResult(
|
|
37
|
+
original=query,
|
|
38
|
+
normalized=query,
|
|
39
|
+
language=language,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# 1d. Unicode normalization (NFKC) - do first
|
|
43
|
+
if self.config.unicode_normalize:
|
|
44
|
+
result.normalized = self._unicode_normalize(result.normalized)
|
|
45
|
+
result.steps_applied.append("unicode_nfkc")
|
|
46
|
+
|
|
47
|
+
# 1a. Lowercase conversion
|
|
48
|
+
if self.config.lowercase:
|
|
49
|
+
result.normalized = self._lowercase(result.normalized)
|
|
50
|
+
result.steps_applied.append("lowercase")
|
|
51
|
+
|
|
52
|
+
# 1b. Punctuation removal
|
|
53
|
+
if self.config.remove_punctuation:
|
|
54
|
+
result.normalized = self._remove_punctuation(result.normalized)
|
|
55
|
+
result.steps_applied.append("remove_punctuation")
|
|
56
|
+
|
|
57
|
+
# 1c. Whitespace normalization
|
|
58
|
+
if self.config.normalize_whitespace:
|
|
59
|
+
result.normalized = self._normalize_whitespace(result.normalized)
|
|
60
|
+
result.steps_applied.append("normalize_whitespace")
|
|
61
|
+
|
|
62
|
+
# 1e. Stopword removal
|
|
63
|
+
if self.config.remove_stopwords:
|
|
64
|
+
result.normalized = self._remove_stopwords(result.normalized, language)
|
|
65
|
+
result.steps_applied.append("remove_stopwords")
|
|
66
|
+
|
|
67
|
+
# Tokenize
|
|
68
|
+
result.tokens = self._tokenize(result.normalized)
|
|
69
|
+
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
def _unicode_normalize(self, text: str) -> str:
|
|
73
|
+
"""1d. Normalize Unicode to NFKC form."""
|
|
74
|
+
return unicodedata.normalize('NFKC', text)
|
|
75
|
+
|
|
76
|
+
def _lowercase(self, text: str) -> str:
|
|
77
|
+
"""1a. Convert to lowercase."""
|
|
78
|
+
return text.lower()
|
|
79
|
+
|
|
80
|
+
def _remove_punctuation(self, text: str) -> str:
|
|
81
|
+
"""1b. Remove punctuation marks."""
|
|
82
|
+
# Keep alphanumeric, whitespace, and dots (for qualified names)
|
|
83
|
+
return re.sub(r'[^\w\s\.]', ' ', text)
|
|
84
|
+
|
|
85
|
+
def _normalize_whitespace(self, text: str) -> str:
|
|
86
|
+
"""1c. Normalize whitespace (multiple spaces -> single)."""
|
|
87
|
+
return ' '.join(text.split())
|
|
88
|
+
|
|
89
|
+
def _remove_stopwords(self, text: str, language: str) -> str:
|
|
90
|
+
"""1e. Remove stopwords."""
|
|
91
|
+
stopwords = self.config.stopwords.get(language, [])
|
|
92
|
+
words = text.split()
|
|
93
|
+
filtered = [w for w in words if w not in stopwords]
|
|
94
|
+
return ' '.join(filtered)
|
|
95
|
+
|
|
96
|
+
def _tokenize(self, text: str) -> List[str]:
|
|
97
|
+
"""Split text into tokens."""
|
|
98
|
+
return text.split()
|
|
99
|
+
|
|
100
|
+
# Individual step methods for granular control
|
|
101
|
+
def step_1a_lowercase(self, text: str) -> str:
|
|
102
|
+
"""Step 1a: Convert to lowercase."""
|
|
103
|
+
return text.lower()
|
|
104
|
+
|
|
105
|
+
def step_1b_remove_punctuation(self, text: str) -> str:
|
|
106
|
+
"""Step 1b: Remove punctuation."""
|
|
107
|
+
return re.sub(r'[^\w\s\.]', ' ', text)
|
|
108
|
+
|
|
109
|
+
def step_1c_normalize_whitespace(self, text: str) -> str:
|
|
110
|
+
"""Step 1c: Normalize whitespace."""
|
|
111
|
+
return ' '.join(text.split())
|
|
112
|
+
|
|
113
|
+
def step_1d_unicode_normalize(self, text: str) -> str:
|
|
114
|
+
"""Step 1d: Unicode NFKC normalization."""
|
|
115
|
+
return unicodedata.normalize('NFKC', text)
|
|
116
|
+
|
|
117
|
+
def step_1e_remove_stopwords(self, text: str, language: str = "en") -> str:
|
|
118
|
+
"""Step 1e: Remove stopwords."""
|
|
119
|
+
stopwords = self.config.stopwords.get(language, [])
|
|
120
|
+
words = text.split()
|
|
121
|
+
filtered = [w for w in words if w not in stopwords]
|
|
122
|
+
return ' '.join(filtered)
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""Main NLP Pipeline - Integration of all components (Steps 4a-4e).
|
|
2
|
+
|
|
3
|
+
4a. Pipeline orchestration
|
|
4
|
+
4b. Result aggregation
|
|
5
|
+
4c. Confidence scoring
|
|
6
|
+
4d. Fallback handling
|
|
7
|
+
4e. Output formatting
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import List, Dict, Optional, Any
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from .config import NLPConfig, FAST_NLP_CONFIG
|
|
15
|
+
from .normalization import QueryNormalizer, NormalizationResult
|
|
16
|
+
from .intent_matching import IntentMatcher, IntentMatchingResult
|
|
17
|
+
from .entity_resolution import EntityResolver, EntityResolutionResult, Entity
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class PipelineStage:
|
|
22
|
+
"""Single pipeline stage result."""
|
|
23
|
+
stage_name: str
|
|
24
|
+
success: bool
|
|
25
|
+
result: Any
|
|
26
|
+
execution_time_ms: float = 0.0
|
|
27
|
+
error: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class NLPPipelineResult:
|
|
32
|
+
"""Complete NLP pipeline result (4b-4e aggregation)."""
|
|
33
|
+
# 4b. Aggregated results
|
|
34
|
+
original_query: str
|
|
35
|
+
normalized_query: NormalizationResult
|
|
36
|
+
intent_result: IntentMatchingResult
|
|
37
|
+
entity_result: EntityResolutionResult
|
|
38
|
+
|
|
39
|
+
# 4c. Confidence scoring
|
|
40
|
+
overall_confidence: float = 0.0
|
|
41
|
+
stage_confidences: Dict[str, float] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
# 4d. Fallback information
|
|
44
|
+
fallback_used: bool = False
|
|
45
|
+
fallback_reason: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
# 4e. Formatted output
|
|
48
|
+
formatted_response: Optional[str] = None
|
|
49
|
+
action_recommendation: Optional[str] = None
|
|
50
|
+
|
|
51
|
+
# Execution metadata
|
|
52
|
+
stages: List[PipelineStage] = field(default_factory=list)
|
|
53
|
+
total_execution_time_ms: float = 0.0
|
|
54
|
+
|
|
55
|
+
def is_successful(self) -> bool:
|
|
56
|
+
"""Check if pipeline produced actionable result."""
|
|
57
|
+
return (
|
|
58
|
+
self.overall_confidence >= 0.5
|
|
59
|
+
and self.intent_result.primary_intent is not None
|
|
60
|
+
and not self.fallback_used
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def get_intent(self) -> Optional[str]:
|
|
64
|
+
"""Get resolved intent."""
|
|
65
|
+
if self.intent_result.primary_intent:
|
|
66
|
+
return self.intent_result.primary_intent.intent
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def get_entities(self) -> List[Entity]:
|
|
70
|
+
"""Get resolved entities."""
|
|
71
|
+
return self.entity_result.entities
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
74
|
+
"""Convert to dictionary."""
|
|
75
|
+
return {
|
|
76
|
+
"original_query": self.original_query,
|
|
77
|
+
"normalized": self.normalized_query.normalized,
|
|
78
|
+
"intent": self.get_intent(),
|
|
79
|
+
"intent_confidence": self.intent_result.get_confidence(),
|
|
80
|
+
"entities": [
|
|
81
|
+
{
|
|
82
|
+
"name": e.name,
|
|
83
|
+
"type": e.entity_type,
|
|
84
|
+
"confidence": e.confidence,
|
|
85
|
+
}
|
|
86
|
+
for e in self.entity_result.entities[:5]
|
|
87
|
+
],
|
|
88
|
+
"overall_confidence": self.overall_confidence,
|
|
89
|
+
"fallback_used": self.fallback_used,
|
|
90
|
+
"action": self.action_recommendation,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class NLPPipeline:
|
|
95
|
+
"""Main NLP processing pipeline (4a-4e)."""
|
|
96
|
+
|
|
97
|
+
def __init__(self, config: Optional[NLPConfig] = None):
|
|
98
|
+
self.config = config or FAST_NLP_CONFIG
|
|
99
|
+
|
|
100
|
+
# Initialize components
|
|
101
|
+
self.normalizer = QueryNormalizer(self.config.normalization)
|
|
102
|
+
self.intent_matcher = IntentMatcher(self.config.intent_matching)
|
|
103
|
+
self.entity_resolver = EntityResolver(self.config.entity_resolution)
|
|
104
|
+
|
|
105
|
+
# Execution history for context
|
|
106
|
+
self.query_history: List[str] = []
|
|
107
|
+
|
|
108
|
+
def process(self, query: str, language: str = "en") -> NLPPipelineResult:
|
|
109
|
+
"""Process query through full pipeline (4a-4e)."""
|
|
110
|
+
import time
|
|
111
|
+
|
|
112
|
+
start_time = time.time()
|
|
113
|
+
stages = []
|
|
114
|
+
|
|
115
|
+
# 4a. Pipeline orchestration - Step 1: Normalization
|
|
116
|
+
norm_start = time.time()
|
|
117
|
+
normalized = self._step_normalize(query, language)
|
|
118
|
+
norm_time = (time.time() - norm_start) * 1000
|
|
119
|
+
|
|
120
|
+
stages.append(PipelineStage(
|
|
121
|
+
stage_name="normalization",
|
|
122
|
+
success=True,
|
|
123
|
+
result=normalized,
|
|
124
|
+
execution_time_ms=norm_time
|
|
125
|
+
))
|
|
126
|
+
|
|
127
|
+
# Step 2: Intent Matching
|
|
128
|
+
intent_start = time.time()
|
|
129
|
+
intent_result = self._step_match_intent(normalized.normalized)
|
|
130
|
+
intent_time = (time.time() - intent_start) * 1000
|
|
131
|
+
|
|
132
|
+
intent_success = intent_result.primary_intent is not None
|
|
133
|
+
stages.append(PipelineStage(
|
|
134
|
+
stage_name="intent_matching",
|
|
135
|
+
success=intent_success,
|
|
136
|
+
result=intent_result,
|
|
137
|
+
execution_time_ms=intent_time
|
|
138
|
+
))
|
|
139
|
+
|
|
140
|
+
# Step 3: Entity Resolution
|
|
141
|
+
entity_start = time.time()
|
|
142
|
+
|
|
143
|
+
# Determine expected entity types from intent
|
|
144
|
+
expected_types = self._infer_entity_types(intent_result)
|
|
145
|
+
|
|
146
|
+
entity_result = self._step_resolve_entities(
|
|
147
|
+
normalized.normalized,
|
|
148
|
+
expected_types=expected_types,
|
|
149
|
+
context=normalized.normalized
|
|
150
|
+
)
|
|
151
|
+
entity_time = (time.time() - entity_start) * 1000
|
|
152
|
+
|
|
153
|
+
entity_success = len(entity_result.entities) > 0
|
|
154
|
+
stages.append(PipelineStage(
|
|
155
|
+
stage_name="entity_resolution",
|
|
156
|
+
success=entity_success,
|
|
157
|
+
result=entity_result,
|
|
158
|
+
execution_time_ms=entity_time
|
|
159
|
+
))
|
|
160
|
+
|
|
161
|
+
# 4b. Result aggregation
|
|
162
|
+
total_time = (time.time() - start_time) * 1000
|
|
163
|
+
|
|
164
|
+
result = NLPPipelineResult(
|
|
165
|
+
original_query=query,
|
|
166
|
+
normalized_query=normalized,
|
|
167
|
+
intent_result=intent_result,
|
|
168
|
+
entity_result=entity_result,
|
|
169
|
+
stages=stages,
|
|
170
|
+
total_execution_time_ms=total_time,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# 4c. Confidence scoring
|
|
174
|
+
result.overall_confidence = self._calculate_overall_confidence(result)
|
|
175
|
+
result.stage_confidences = {
|
|
176
|
+
"normalization": 1.0, # Normalization is deterministic
|
|
177
|
+
"intent": intent_result.get_confidence(),
|
|
178
|
+
"entity": self._calculate_entity_confidence(entity_result),
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# 4d. Fallback handling
|
|
182
|
+
if result.overall_confidence < 0.5:
|
|
183
|
+
result = self._apply_fallback(result)
|
|
184
|
+
|
|
185
|
+
# 4e. Output formatting
|
|
186
|
+
result.action_recommendation = self._format_action(result)
|
|
187
|
+
result.formatted_response = self._format_response(result)
|
|
188
|
+
|
|
189
|
+
# Update history
|
|
190
|
+
self.query_history.append(query)
|
|
191
|
+
if len(self.query_history) > 10:
|
|
192
|
+
self.query_history.pop(0)
|
|
193
|
+
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
def _step_normalize(self, query: str, language: str) -> NormalizationResult:
|
|
197
|
+
"""Step 1: Query normalization."""
|
|
198
|
+
if not self.config.enable_normalization:
|
|
199
|
+
# Skip normalization, return identity
|
|
200
|
+
return NormalizationResult(
|
|
201
|
+
original=query,
|
|
202
|
+
normalized=query,
|
|
203
|
+
tokens=query.split(),
|
|
204
|
+
language=language,
|
|
205
|
+
steps_applied=["skipped"]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return self.normalizer.normalize(query, language)
|
|
209
|
+
|
|
210
|
+
def _step_match_intent(self, normalized_query: str) -> IntentMatchingResult:
|
|
211
|
+
"""Step 2: Intent matching."""
|
|
212
|
+
if not self.config.enable_intent_matching:
|
|
213
|
+
return IntentMatchingResult(query=normalized_query)
|
|
214
|
+
|
|
215
|
+
return self.intent_matcher.match(
|
|
216
|
+
normalized_query,
|
|
217
|
+
context=self.query_history[-3:] if self.query_history else None
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def _step_resolve_entities(
|
|
221
|
+
self,
|
|
222
|
+
normalized_query: str,
|
|
223
|
+
expected_types: Optional[List[str]] = None,
|
|
224
|
+
context: Optional[str] = None
|
|
225
|
+
) -> EntityResolutionResult:
|
|
226
|
+
"""Step 3: Entity resolution."""
|
|
227
|
+
if not self.config.enable_entity_resolution:
|
|
228
|
+
return EntityResolutionResult(query=normalized_query)
|
|
229
|
+
|
|
230
|
+
return self.entity_resolver.resolve(
|
|
231
|
+
normalized_query,
|
|
232
|
+
context=context,
|
|
233
|
+
expected_types=expected_types
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def _infer_entity_types(
|
|
237
|
+
self,
|
|
238
|
+
intent_result: IntentMatchingResult
|
|
239
|
+
) -> Optional[List[str]]:
|
|
240
|
+
"""Infer expected entity types from matched intent."""
|
|
241
|
+
if not intent_result.primary_intent:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
intent = intent_result.primary_intent.intent
|
|
245
|
+
|
|
246
|
+
# Map intents to expected entity types
|
|
247
|
+
intent_to_entities = {
|
|
248
|
+
"find_function": ["function"],
|
|
249
|
+
"find_class": ["class"],
|
|
250
|
+
"analyze_flow": ["function", "class"],
|
|
251
|
+
"show_call_graph": ["function", "class", "module"],
|
|
252
|
+
"find_dependencies": ["module", "file"],
|
|
253
|
+
"explain_code": ["function", "class"],
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
return intent_to_entities.get(intent)
|
|
257
|
+
|
|
258
|
+
def _calculate_overall_confidence(self, result: NLPPipelineResult) -> float:
|
|
259
|
+
"""4c. Calculate overall pipeline confidence."""
|
|
260
|
+
weights = {
|
|
261
|
+
"intent": 0.5,
|
|
262
|
+
"entity": 0.3,
|
|
263
|
+
"normalization": 0.2,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
intent_conf = result.intent_result.get_confidence()
|
|
267
|
+
entity_conf = self._calculate_entity_confidence(result.entity_result)
|
|
268
|
+
norm_conf = 1.0 # Normalization is reliable
|
|
269
|
+
|
|
270
|
+
overall = (
|
|
271
|
+
weights["intent"] * intent_conf +
|
|
272
|
+
weights["entity"] * entity_conf +
|
|
273
|
+
weights["normalization"] * norm_conf
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return round(overall, 3)
|
|
277
|
+
|
|
278
|
+
def _calculate_entity_confidence(self, entity_result: EntityResolutionResult) -> float:
|
|
279
|
+
"""Calculate aggregate entity confidence."""
|
|
280
|
+
if not entity_result.entities:
|
|
281
|
+
return 0.0
|
|
282
|
+
|
|
283
|
+
# Use best entity confidence
|
|
284
|
+
best = max(e.confidence for e in entity_result.entities)
|
|
285
|
+
return best
|
|
286
|
+
|
|
287
|
+
def _apply_fallback(self, result: NLPPipelineResult) -> NLPPipelineResult:
|
|
288
|
+
"""4d. Apply fallback strategies when confidence is low."""
|
|
289
|
+
result.fallback_used = True
|
|
290
|
+
|
|
291
|
+
# Try keyword-only matching as fallback
|
|
292
|
+
if result.intent_result.get_confidence() < 0.3:
|
|
293
|
+
# Re-run with lower thresholds
|
|
294
|
+
self.intent_matcher.config.fuzzy_threshold = 0.5
|
|
295
|
+
fallback_intent = self.intent_matcher.match(
|
|
296
|
+
result.normalized_query.normalized
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if fallback_intent.get_confidence() > result.intent_result.get_confidence():
|
|
300
|
+
result.intent_result = fallback_intent
|
|
301
|
+
result.fallback_reason = "lowered_threshold"
|
|
302
|
+
|
|
303
|
+
# Restore original threshold
|
|
304
|
+
self.intent_matcher.config.fuzzy_threshold = self.config.intent_matching.fuzzy_threshold
|
|
305
|
+
|
|
306
|
+
# If still no intent, default to generic search
|
|
307
|
+
if result.intent_result.get_confidence() < 0.2:
|
|
308
|
+
from .intent_matching import IntentMatch
|
|
309
|
+
result.intent_result.primary_intent = IntentMatch(
|
|
310
|
+
intent="generic_search",
|
|
311
|
+
confidence=0.3,
|
|
312
|
+
matched_phrase=result.normalized_query.normalized,
|
|
313
|
+
match_type="fallback"
|
|
314
|
+
)
|
|
315
|
+
result.fallback_reason = "generic_search"
|
|
316
|
+
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
def _format_action(self, result: NLPPipelineResult) -> Optional[str]:
|
|
320
|
+
"""4e. Format action recommendation."""
|
|
321
|
+
intent = result.get_intent()
|
|
322
|
+
entities = result.get_entities()
|
|
323
|
+
|
|
324
|
+
if not intent:
|
|
325
|
+
return "Unable to determine action. Please clarify your query."
|
|
326
|
+
|
|
327
|
+
# Format based on intent type
|
|
328
|
+
if intent == "find_function":
|
|
329
|
+
if entities:
|
|
330
|
+
return f"Search for function: {entities[0].name}"
|
|
331
|
+
return "Search for functions"
|
|
332
|
+
|
|
333
|
+
elif intent == "find_class":
|
|
334
|
+
if entities:
|
|
335
|
+
return f"Search for class: {entities[0].name}"
|
|
336
|
+
return "Search for classes"
|
|
337
|
+
|
|
338
|
+
elif intent == "analyze_flow":
|
|
339
|
+
if entities:
|
|
340
|
+
return f"Analyze control flow of: {entities[0].name}"
|
|
341
|
+
return "Analyze control flow"
|
|
342
|
+
|
|
343
|
+
elif intent == "show_call_graph":
|
|
344
|
+
return "Generate call graph visualization"
|
|
345
|
+
|
|
346
|
+
elif intent == "generic_search":
|
|
347
|
+
return f"Search for: {result.normalized_query.normalized}"
|
|
348
|
+
|
|
349
|
+
return f"Execute: {intent}"
|
|
350
|
+
|
|
351
|
+
def _format_response(self, result: NLPPipelineResult) -> str:
|
|
352
|
+
"""4e. Format human-readable response."""
|
|
353
|
+
lines = [
|
|
354
|
+
f"Query: {result.original_query}",
|
|
355
|
+
f"Intent: {result.get_intent() or 'unknown'} (confidence: {result.overall_confidence:.2f})",
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
if result.entity_result.entities:
|
|
359
|
+
lines.append("Entities:")
|
|
360
|
+
for e in result.entity_result.entities[:3]:
|
|
361
|
+
lines.append(f" - {e.name} ({e.entity_type}, {e.confidence:.2f})")
|
|
362
|
+
|
|
363
|
+
if result.fallback_used:
|
|
364
|
+
lines.append(f"[Fallback used: {result.fallback_reason}]")
|
|
365
|
+
|
|
366
|
+
return "\n".join(lines)
|
|
367
|
+
|
|
368
|
+
# Individual step methods for 4a-4e
|
|
369
|
+
def step_4a_orchestrate(self, query: str) -> List[PipelineStage]:
|
|
370
|
+
"""Step 4a: Pipeline orchestration."""
|
|
371
|
+
return self.process(query).stages
|
|
372
|
+
|
|
373
|
+
def step_4b_aggregate(self, stages: List[PipelineStage]) -> NLPPipelineResult:
|
|
374
|
+
"""Step 4b: Result aggregation."""
|
|
375
|
+
# This is done in process() method
|
|
376
|
+
pass
|
|
377
|
+
|
|
378
|
+
def step_4c_confidence(self, result: NLPPipelineResult) -> float:
|
|
379
|
+
"""Step 4c: Confidence scoring."""
|
|
380
|
+
return self._calculate_overall_confidence(result)
|
|
381
|
+
|
|
382
|
+
def step_4d_fallback(self, result: NLPPipelineResult) -> NLPPipelineResult:
|
|
383
|
+
"""Step 4d: Fallback handling."""
|
|
384
|
+
return self._apply_fallback(result)
|
|
385
|
+
|
|
386
|
+
def step_4e_format(self, result: NLPPipelineResult) -> str:
|
|
387
|
+
"""Step 4e: Output formatting."""
|
|
388
|
+
return self._format_response(result)
|
|
File without changes
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Pattern detection for behavioral analysis."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Set
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from ..core.config import Config
|
|
7
|
+
from ..core.models import AnalysisResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PatternDetector:
|
|
11
|
+
"""Detect behavioral patterns in code."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, config: Config):
|
|
14
|
+
self.config = config
|
|
15
|
+
|
|
16
|
+
def detect_patterns(self, result: AnalysisResult) -> List[Dict]:
|
|
17
|
+
"""Detect all behavioral patterns in analysis result."""
|
|
18
|
+
patterns = []
|
|
19
|
+
|
|
20
|
+
# Detect recursive patterns
|
|
21
|
+
if self.config.detect_recursion:
|
|
22
|
+
patterns.extend(self._detect_recursion(result))
|
|
23
|
+
|
|
24
|
+
# Detect state machines
|
|
25
|
+
if self.config.detect_state_machines:
|
|
26
|
+
patterns.extend(self._detect_state_machines(result))
|
|
27
|
+
|
|
28
|
+
# Detect factory patterns
|
|
29
|
+
patterns.extend(self._detect_factory_pattern(result))
|
|
30
|
+
|
|
31
|
+
# Detect singleton pattern
|
|
32
|
+
patterns.extend(self._detect_singleton(result))
|
|
33
|
+
|
|
34
|
+
# Detect strategy pattern
|
|
35
|
+
patterns.extend(self._detect_strategy_pattern(result))
|
|
36
|
+
|
|
37
|
+
return patterns
|
|
38
|
+
|
|
39
|
+
def _detect_recursion(self, result: AnalysisResult) -> List[Dict]:
|
|
40
|
+
"""Detect recursive function calls."""
|
|
41
|
+
patterns = []
|
|
42
|
+
|
|
43
|
+
for func_name, func_info in result.functions.items():
|
|
44
|
+
if func_name in func_info.calls:
|
|
45
|
+
patterns.append({
|
|
46
|
+
'type': 'recursive',
|
|
47
|
+
'name': f'recursive_{func_name}',
|
|
48
|
+
'function': func_name,
|
|
49
|
+
'confidence': 1.0,
|
|
50
|
+
'description': f'Function {func_name} calls itself recursively'
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
return patterns
|
|
54
|
+
|
|
55
|
+
def _detect_state_machines(self, result: AnalysisResult) -> List[Dict]:
|
|
56
|
+
"""Detect state machine patterns in classes."""
|
|
57
|
+
patterns = []
|
|
58
|
+
|
|
59
|
+
for class_name, class_info in result.classes.items():
|
|
60
|
+
# Look for state-related attributes and transition methods
|
|
61
|
+
has_state = False
|
|
62
|
+
transition_methods = []
|
|
63
|
+
|
|
64
|
+
for method in class_info.get('methods', []):
|
|
65
|
+
method_lower = method.lower()
|
|
66
|
+
if 'state' in method_lower:
|
|
67
|
+
has_state = True
|
|
68
|
+
if any(word in method_lower for word in ['transition', 'change', 'set', 'next', 'prev']):
|
|
69
|
+
transition_methods.append(method)
|
|
70
|
+
|
|
71
|
+
if has_state or transition_methods:
|
|
72
|
+
patterns.append({
|
|
73
|
+
'type': 'state_machine',
|
|
74
|
+
'name': f'state_machine_{class_name}',
|
|
75
|
+
'class': class_name,
|
|
76
|
+
'transitions': transition_methods,
|
|
77
|
+
'confidence': 0.8 if transition_methods else 0.5,
|
|
78
|
+
'description': f'Class {class_name} appears to implement a state machine'
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
return patterns
|
|
82
|
+
|
|
83
|
+
def _detect_factory_pattern(self, result: AnalysisResult) -> List[Dict]:
|
|
84
|
+
"""Detect factory method pattern."""
|
|
85
|
+
patterns = []
|
|
86
|
+
|
|
87
|
+
for func_name, func_info in result.functions.items():
|
|
88
|
+
# Check if function returns instances of different classes
|
|
89
|
+
name_lower = func_name.lower()
|
|
90
|
+
if 'create' in name_lower or 'factory' in name_lower or 'build' in name_lower:
|
|
91
|
+
# Check if it returns different types
|
|
92
|
+
returns_classes = self._check_returns_classes(result, func_name)
|
|
93
|
+
if returns_classes:
|
|
94
|
+
patterns.append({
|
|
95
|
+
'type': 'factory',
|
|
96
|
+
'name': f'factory_{func_name}',
|
|
97
|
+
'function': func_name,
|
|
98
|
+
'creates': list(returns_classes),
|
|
99
|
+
'confidence': 0.7,
|
|
100
|
+
'description': f'Function {func_name} appears to be a factory'
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
return patterns
|
|
104
|
+
|
|
105
|
+
def _detect_singleton(self, result: AnalysisResult) -> List[Dict]:
|
|
106
|
+
"""Detect singleton pattern."""
|
|
107
|
+
patterns = []
|
|
108
|
+
|
|
109
|
+
for class_name, class_info in result.classes.items():
|
|
110
|
+
methods = class_info.get('methods', [])
|
|
111
|
+
|
|
112
|
+
# Check for getInstance or similar
|
|
113
|
+
has_get_instance = any(
|
|
114
|
+
m.lower() in ('getinstance', 'get_instance', 'instance')
|
|
115
|
+
for m in methods
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Check for __new__ override
|
|
119
|
+
has_new = '__new__' in methods
|
|
120
|
+
|
|
121
|
+
if has_get_instance or has_new:
|
|
122
|
+
patterns.append({
|
|
123
|
+
'type': 'singleton',
|
|
124
|
+
'name': f'singleton_{class_name}',
|
|
125
|
+
'class': class_name,
|
|
126
|
+
'confidence': 0.75,
|
|
127
|
+
'description': f'Class {class_name} appears to be a singleton'
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
return patterns
|
|
131
|
+
|
|
132
|
+
def _detect_strategy_pattern(self, result: AnalysisResult) -> List[Dict]:
|
|
133
|
+
"""Detect strategy pattern."""
|
|
134
|
+
patterns = []
|
|
135
|
+
|
|
136
|
+
# Look for interface-like classes with execute/run methods
|
|
137
|
+
strategy_candidates = []
|
|
138
|
+
|
|
139
|
+
for class_name, class_info in result.classes.items():
|
|
140
|
+
methods = class_info.get('methods', [])
|
|
141
|
+
|
|
142
|
+
# Look for execute, run, or process methods
|
|
143
|
+
if any(m in methods for m in ['execute', 'run', 'process', 'apply']):
|
|
144
|
+
strategy_candidates.append(class_name)
|
|
145
|
+
|
|
146
|
+
# Check if these are used interchangeably
|
|
147
|
+
if len(strategy_candidates) > 1:
|
|
148
|
+
for func_name, func_info in result.functions.items():
|
|
149
|
+
calls = func_info.calls
|
|
150
|
+
called_strategies = [s for s in strategy_candidates if s in str(calls)]
|
|
151
|
+
|
|
152
|
+
if len(called_strategies) > 1:
|
|
153
|
+
patterns.append({
|
|
154
|
+
'type': 'strategy',
|
|
155
|
+
'name': f'strategy_in_{func_name}',
|
|
156
|
+
'context': func_name,
|
|
157
|
+
'strategies': called_strategies,
|
|
158
|
+
'confidence': 0.7,
|
|
159
|
+
'description': f'Function {func_name} uses strategy pattern'
|
|
160
|
+
})
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
return patterns
|
|
164
|
+
|
|
165
|
+
def _check_returns_classes(self, result: AnalysisResult, func_name: str) -> Set[str]:
|
|
166
|
+
"""Check what classes a function might return."""
|
|
167
|
+
# Simplified - in full implementation would analyze return statements
|
|
168
|
+
return set() # Placeholder
|
|
File without changes
|