gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
"""Change type classifier using semantic analysis of commit messages."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from typing import Dict, List, Tuple, Set, Any, Optional
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from ..models.schemas import ChangeTypeConfig
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import spacy
|
|
12
|
+
from spacy.tokens import Doc
|
|
13
|
+
SPACY_AVAILABLE = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
SPACY_AVAILABLE = False
|
|
16
|
+
Doc = Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChangeTypeClassifier:
|
|
20
|
+
"""Classify commits by change type using semantic analysis.
|
|
21
|
+
|
|
22
|
+
This classifier determines the type of change represented by a commit
|
|
23
|
+
(feature, bugfix, refactor, etc.) by analyzing the commit message semantics
|
|
24
|
+
and file patterns.
|
|
25
|
+
|
|
26
|
+
The classification uses a combination of:
|
|
27
|
+
- Semantic keyword matching with action/object/context patterns
|
|
28
|
+
- File pattern analysis for additional signals
|
|
29
|
+
- Rule-based patterns for common commit message formats
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: ChangeTypeConfig):
|
|
33
|
+
"""Initialize change type classifier.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
config: Configuration for change type classification
|
|
37
|
+
"""
|
|
38
|
+
self.config = config
|
|
39
|
+
self.logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
# Define semantic patterns for each change type
|
|
42
|
+
self.change_patterns = {
|
|
43
|
+
'feature': {
|
|
44
|
+
'action_words': {
|
|
45
|
+
'add', 'implement', 'create', 'build', 'introduce', 'develop',
|
|
46
|
+
'enable', 'support', 'allow', 'provide', 'include'
|
|
47
|
+
},
|
|
48
|
+
'object_words': {
|
|
49
|
+
'feature', 'functionality', 'capability', 'component', 'module',
|
|
50
|
+
'endpoint', 'api', 'service', 'interface', 'system'
|
|
51
|
+
},
|
|
52
|
+
'context_words': {
|
|
53
|
+
'new', 'initial', 'first', 'user', 'client', 'support',
|
|
54
|
+
'enhancement', 'improvement'
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
'bugfix': {
|
|
58
|
+
'action_words': {
|
|
59
|
+
'fix', 'resolve', 'correct', 'repair', 'patch', 'address',
|
|
60
|
+
'handle', 'solve', 'debug', 'prevent'
|
|
61
|
+
},
|
|
62
|
+
'object_words': {
|
|
63
|
+
'bug', 'issue', 'problem', 'error', 'defect', 'exception',
|
|
64
|
+
'crash', 'failure', 'leak', 'regression'
|
|
65
|
+
},
|
|
66
|
+
'context_words': {
|
|
67
|
+
'broken', 'failing', 'incorrect', 'wrong', 'invalid',
|
|
68
|
+
'missing', 'null', 'undefined'
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
'refactor': {
|
|
72
|
+
'action_words': {
|
|
73
|
+
'refactor', 'restructure', 'reorganize', 'cleanup', 'simplify',
|
|
74
|
+
'optimize', 'improve', 'enhance', 'streamline', 'consolidate'
|
|
75
|
+
},
|
|
76
|
+
'object_words': {
|
|
77
|
+
'code', 'structure', 'architecture', 'design', 'logic',
|
|
78
|
+
'method', 'function', 'class', 'module'
|
|
79
|
+
},
|
|
80
|
+
'context_words': {
|
|
81
|
+
'better', 'cleaner', 'simpler', 'efficient', 'maintainable',
|
|
82
|
+
'readable', 'performance'
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
'docs': {
|
|
86
|
+
'action_words': {
|
|
87
|
+
'update', 'add', 'improve', 'write', 'document', 'clarify',
|
|
88
|
+
'explain', 'describe', 'detail'
|
|
89
|
+
},
|
|
90
|
+
'object_words': {
|
|
91
|
+
'documentation', 'readme', 'docs', 'comment', 'docstring',
|
|
92
|
+
'guide', 'tutorial', 'example', 'specification'
|
|
93
|
+
},
|
|
94
|
+
'context_words': {
|
|
95
|
+
'explain', 'clarify', 'describe', 'instruction', 'help'
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
'test': {
|
|
99
|
+
'action_words': {
|
|
100
|
+
'add', 'update', 'fix', 'improve', 'write', 'create',
|
|
101
|
+
'enhance', 'extend'
|
|
102
|
+
},
|
|
103
|
+
'object_words': {
|
|
104
|
+
'test', 'spec', 'coverage', 'unit', 'integration', 'e2e',
|
|
105
|
+
'testing', 'mock', 'stub', 'fixture'
|
|
106
|
+
},
|
|
107
|
+
'context_words': {
|
|
108
|
+
'testing', 'verify', 'validate', 'check', 'ensure',
|
|
109
|
+
'coverage', 'assertion'
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
'chore': {
|
|
113
|
+
'action_words': {
|
|
114
|
+
'update', 'bump', 'upgrade', 'configure', 'setup', 'install',
|
|
115
|
+
'remove', 'delete', 'clean'
|
|
116
|
+
},
|
|
117
|
+
'object_words': {
|
|
118
|
+
'dependency', 'package', 'config', 'configuration', 'build',
|
|
119
|
+
'version', 'tool', 'script', 'workflow'
|
|
120
|
+
},
|
|
121
|
+
'context_words': {
|
|
122
|
+
'maintenance', 'housekeeping', 'routine', 'automated',
|
|
123
|
+
'ci', 'cd', 'pipeline'
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
'security': {
|
|
127
|
+
'action_words': {
|
|
128
|
+
'fix', 'secure', 'protect', 'validate', 'sanitize',
|
|
129
|
+
'encrypt', 'authenticate', 'authorize'
|
|
130
|
+
},
|
|
131
|
+
'object_words': {
|
|
132
|
+
'security', 'vulnerability', 'exploit', 'xss', 'csrf',
|
|
133
|
+
'injection', 'authentication', 'authorization', 'permission'
|
|
134
|
+
},
|
|
135
|
+
'context_words': {
|
|
136
|
+
'secure', 'safe', 'protected', 'validated', 'sanitized',
|
|
137
|
+
'encrypted', 'threat', 'attack'
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
'hotfix': {
|
|
141
|
+
'action_words': {
|
|
142
|
+
'hotfix', 'fix', 'patch', 'urgent', 'critical', 'emergency'
|
|
143
|
+
},
|
|
144
|
+
'object_words': {
|
|
145
|
+
'production', 'critical', 'urgent', 'emergency', 'hotfix',
|
|
146
|
+
'issue', 'bug', 'problem'
|
|
147
|
+
},
|
|
148
|
+
'context_words': {
|
|
149
|
+
'urgent', 'critical', 'immediate', 'production', 'live',
|
|
150
|
+
'emergency', 'asap'
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
'config': {
|
|
154
|
+
'action_words': {
|
|
155
|
+
'configure', 'setup', 'adjust', 'modify', 'change',
|
|
156
|
+
'update', 'tweak'
|
|
157
|
+
},
|
|
158
|
+
'object_words': {
|
|
159
|
+
'config', 'configuration', 'settings', 'environment',
|
|
160
|
+
'parameter', 'option', 'flag', 'variable'
|
|
161
|
+
},
|
|
162
|
+
'context_words': {
|
|
163
|
+
'environment', 'production', 'development', 'staging',
|
|
164
|
+
'deployment', 'setup'
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# File pattern signals for change types
|
|
170
|
+
self.file_patterns = {
|
|
171
|
+
'test': [
|
|
172
|
+
r'.*test.*\.py$', r'.*spec.*\.js$', r'.*test.*\.java$',
|
|
173
|
+
r'test_.*\.py$', r'.*_test\.go$', r'.*\.test\.(js|ts)$',
|
|
174
|
+
r'__tests__/.*', r'tests?/.*', r'spec/.*'
|
|
175
|
+
],
|
|
176
|
+
'docs': [
|
|
177
|
+
r'.*\.md$', r'.*\.rst$', r'.*\.txt$', r'README.*',
|
|
178
|
+
r'CHANGELOG.*', r'docs?/.*', r'documentation/.*'
|
|
179
|
+
],
|
|
180
|
+
'config': [
|
|
181
|
+
r'.*\.ya?ml$', r'.*\.json$', r'.*\.toml$', r'.*\.ini$',
|
|
182
|
+
r'.*\.env.*', r'Dockerfile.*', r'.*config.*', r'\.github/.*'
|
|
183
|
+
],
|
|
184
|
+
'chore': [
|
|
185
|
+
r'package.*\.json$', r'requirements.*\.txt$', r'Pipfile.*',
|
|
186
|
+
r'pom\.xml$', r'build\.gradle$', r'.*\.lock$'
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# Compile regex patterns for efficiency
|
|
191
|
+
self._compile_file_patterns()
|
|
192
|
+
|
|
193
|
+
# Common commit message prefixes
|
|
194
|
+
self.prefix_patterns = {
|
|
195
|
+
'feat': 'feature',
|
|
196
|
+
'feature': 'feature',
|
|
197
|
+
'fix': 'bugfix',
|
|
198
|
+
'bugfix': 'bugfix',
|
|
199
|
+
'refactor': 'refactor',
|
|
200
|
+
'docs': 'docs',
|
|
201
|
+
'test': 'test',
|
|
202
|
+
'chore': 'chore',
|
|
203
|
+
'security': 'security',
|
|
204
|
+
'hotfix': 'hotfix',
|
|
205
|
+
'config': 'config',
|
|
206
|
+
'style': 'chore', # Style changes are usually chores
|
|
207
|
+
'perf': 'refactor', # Performance improvements are refactoring
|
|
208
|
+
'build': 'chore',
|
|
209
|
+
'ci': 'chore'
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def _compile_file_patterns(self) -> None:
|
|
213
|
+
"""Compile regex patterns for file matching."""
|
|
214
|
+
self.compiled_file_patterns = {}
|
|
215
|
+
for change_type, patterns in self.file_patterns.items():
|
|
216
|
+
self.compiled_file_patterns[change_type] = [
|
|
217
|
+
re.compile(pattern, re.IGNORECASE) for pattern in patterns
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
def classify(self, message: str, doc: Doc, files: List[str]) -> Tuple[str, float]:
|
|
221
|
+
"""Classify commit change type with confidence score.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
message: Commit message
|
|
225
|
+
doc: spaCy processed document
|
|
226
|
+
files: List of changed files
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Tuple of (change_type, confidence_score)
|
|
230
|
+
"""
|
|
231
|
+
if not message:
|
|
232
|
+
return 'unknown', 0.0
|
|
233
|
+
|
|
234
|
+
# Step 1: Check for conventional commit prefixes
|
|
235
|
+
prefix_result = self._check_conventional_prefix(message)
|
|
236
|
+
if prefix_result:
|
|
237
|
+
change_type, confidence = prefix_result
|
|
238
|
+
if confidence >= self.config.min_confidence:
|
|
239
|
+
return change_type, confidence
|
|
240
|
+
|
|
241
|
+
# Step 2: Semantic analysis of message content
|
|
242
|
+
semantic_scores = self._analyze_semantic_content(message, doc)
|
|
243
|
+
|
|
244
|
+
# Step 3: File pattern analysis
|
|
245
|
+
file_scores = self._analyze_file_patterns(files)
|
|
246
|
+
|
|
247
|
+
# Step 4: Combine scores with weights
|
|
248
|
+
combined_scores = self._combine_scores(semantic_scores, file_scores)
|
|
249
|
+
|
|
250
|
+
# Step 5: Select best match
|
|
251
|
+
if not combined_scores:
|
|
252
|
+
return 'unknown', 0.0
|
|
253
|
+
|
|
254
|
+
best_type = max(combined_scores.keys(), key=lambda k: combined_scores[k])
|
|
255
|
+
confidence = combined_scores[best_type]
|
|
256
|
+
|
|
257
|
+
# Apply confidence threshold
|
|
258
|
+
if confidence < self.config.min_confidence:
|
|
259
|
+
return 'unknown', confidence
|
|
260
|
+
|
|
261
|
+
return best_type, confidence
|
|
262
|
+
|
|
263
|
+
def _check_conventional_prefix(self, message: str) -> Optional[Tuple[str, float]]:
|
|
264
|
+
"""Check for conventional commit message prefixes.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
message: Commit message
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Tuple of (change_type, confidence) if found, None otherwise
|
|
271
|
+
"""
|
|
272
|
+
# Look for conventional commit format: type(scope): description
|
|
273
|
+
conventional_pattern = r'^(\w+)(?:\([^)]*\))?\s*:\s*(.+)'
|
|
274
|
+
match = re.match(conventional_pattern, message.strip(), re.IGNORECASE)
|
|
275
|
+
|
|
276
|
+
if match:
|
|
277
|
+
prefix = match.group(1).lower()
|
|
278
|
+
if prefix in self.prefix_patterns:
|
|
279
|
+
return self.prefix_patterns[prefix], 0.9 # High confidence for explicit prefixes
|
|
280
|
+
|
|
281
|
+
# Check for simple prefixes at start of message
|
|
282
|
+
words = message.lower().split()
|
|
283
|
+
if words:
|
|
284
|
+
first_word = words[0].rstrip(':').rstrip('-')
|
|
285
|
+
if first_word in self.prefix_patterns:
|
|
286
|
+
return self.prefix_patterns[first_word], 0.8
|
|
287
|
+
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
def _analyze_semantic_content(self, message: str, doc: Doc) -> Dict[str, float]:
|
|
291
|
+
"""Analyze semantic content of commit message.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
message: Commit message
|
|
295
|
+
doc: spaCy processed document
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Dictionary of change_type -> confidence_score
|
|
299
|
+
"""
|
|
300
|
+
if not SPACY_AVAILABLE or not doc:
|
|
301
|
+
# Fallback to simple keyword matching
|
|
302
|
+
return self._simple_keyword_analysis(message.lower())
|
|
303
|
+
|
|
304
|
+
# Extract semantic features from spaCy doc
|
|
305
|
+
features = self._extract_semantic_features(doc)
|
|
306
|
+
|
|
307
|
+
# Calculate similarity to each change type
|
|
308
|
+
scores = {}
|
|
309
|
+
for change_type, patterns in self.change_patterns.items():
|
|
310
|
+
similarity = self._calculate_semantic_similarity(features, patterns)
|
|
311
|
+
if similarity > 0:
|
|
312
|
+
scores[change_type] = similarity
|
|
313
|
+
|
|
314
|
+
return scores
|
|
315
|
+
|
|
316
|
+
def _extract_semantic_features(self, doc: Doc) -> Dict[str, Set[str]]:
|
|
317
|
+
"""Extract semantic features from spaCy document.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
doc: spaCy processed document
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Dictionary of feature_type -> set_of_words
|
|
324
|
+
"""
|
|
325
|
+
features = {
|
|
326
|
+
'verbs': set(),
|
|
327
|
+
'nouns': set(),
|
|
328
|
+
'adjectives': set(),
|
|
329
|
+
'entities': set(),
|
|
330
|
+
'lemmas': set()
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
for token in doc:
|
|
334
|
+
if token.is_stop or token.is_punct or len(token.text) < 2:
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
lemma = token.lemma_.lower()
|
|
338
|
+
features['lemmas'].add(lemma)
|
|
339
|
+
|
|
340
|
+
if token.pos_ == 'VERB':
|
|
341
|
+
features['verbs'].add(lemma)
|
|
342
|
+
elif token.pos_ in ['NOUN', 'PROPN']:
|
|
343
|
+
features['nouns'].add(lemma)
|
|
344
|
+
elif token.pos_ == 'ADJ':
|
|
345
|
+
features['adjectives'].add(lemma)
|
|
346
|
+
|
|
347
|
+
# Add named entities
|
|
348
|
+
for ent in doc.ents:
|
|
349
|
+
features['entities'].add(ent.text.lower())
|
|
350
|
+
|
|
351
|
+
return features
|
|
352
|
+
|
|
353
|
+
def _calculate_semantic_similarity(self, features: Dict[str, Set[str]],
|
|
354
|
+
patterns: Dict[str, Set[str]]) -> float:
|
|
355
|
+
"""Calculate semantic similarity between features and patterns.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
features: Extracted semantic features
|
|
359
|
+
patterns: Change type patterns
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
Similarity score (0.0 to 1.0)
|
|
363
|
+
"""
|
|
364
|
+
similarity_score = 0.0
|
|
365
|
+
|
|
366
|
+
# Action words (verbs) - highest weight
|
|
367
|
+
action_matches = len(features['verbs'].intersection(patterns['action_words']))
|
|
368
|
+
if action_matches > 0:
|
|
369
|
+
similarity_score += action_matches * 0.5
|
|
370
|
+
|
|
371
|
+
# Object words (nouns) - medium weight
|
|
372
|
+
object_matches = len(features['nouns'].intersection(patterns['object_words']))
|
|
373
|
+
if object_matches > 0:
|
|
374
|
+
similarity_score += object_matches * 0.3
|
|
375
|
+
|
|
376
|
+
# Context words (any lemma) - lower weight
|
|
377
|
+
all_lemmas = features['lemmas']
|
|
378
|
+
context_matches = len(all_lemmas.intersection(patterns['context_words']))
|
|
379
|
+
if context_matches > 0:
|
|
380
|
+
similarity_score += context_matches * 0.2
|
|
381
|
+
|
|
382
|
+
# Normalize by maximum possible score
|
|
383
|
+
max_possible = len(patterns['action_words']) * 0.5 + \
|
|
384
|
+
len(patterns['object_words']) * 0.3 + \
|
|
385
|
+
len(patterns['context_words']) * 0.2
|
|
386
|
+
|
|
387
|
+
return min(1.0, similarity_score / max_possible) if max_possible > 0 else 0.0
|
|
388
|
+
|
|
389
|
+
def _simple_keyword_analysis(self, message: str) -> Dict[str, float]:
|
|
390
|
+
"""Simple keyword-based analysis fallback.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
message: Lowercase commit message
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Dictionary of change_type -> confidence_score
|
|
397
|
+
"""
|
|
398
|
+
scores = {}
|
|
399
|
+
words = set(re.findall(r'\b\w+\b', message))
|
|
400
|
+
|
|
401
|
+
for change_type, patterns in self.change_patterns.items():
|
|
402
|
+
all_pattern_words = patterns['action_words'] | patterns['object_words'] | patterns['context_words']
|
|
403
|
+
matches = len(words.intersection(all_pattern_words))
|
|
404
|
+
|
|
405
|
+
if matches > 0:
|
|
406
|
+
# Simple scoring based on keyword matches
|
|
407
|
+
scores[change_type] = min(1.0, matches / 5.0) # Scale to 0-1
|
|
408
|
+
|
|
409
|
+
return scores
|
|
410
|
+
|
|
411
|
+
def _analyze_file_patterns(self, files: List[str]) -> Dict[str, float]:
|
|
412
|
+
"""Analyze file patterns for change type signals.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
files: List of changed file paths
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
Dictionary of change_type -> confidence_score
|
|
419
|
+
"""
|
|
420
|
+
if not files:
|
|
421
|
+
return {}
|
|
422
|
+
|
|
423
|
+
scores = {}
|
|
424
|
+
|
|
425
|
+
for change_type, patterns in self.compiled_file_patterns.items():
|
|
426
|
+
matching_files = 0
|
|
427
|
+
|
|
428
|
+
for file_path in files:
|
|
429
|
+
for pattern in patterns:
|
|
430
|
+
if pattern.search(file_path):
|
|
431
|
+
matching_files += 1
|
|
432
|
+
break # Don't double-count same file
|
|
433
|
+
|
|
434
|
+
if matching_files > 0:
|
|
435
|
+
# File pattern confidence based on proportion of matching files
|
|
436
|
+
confidence = min(1.0, matching_files / len(files))
|
|
437
|
+
scores[change_type] = confidence
|
|
438
|
+
|
|
439
|
+
return scores
|
|
440
|
+
|
|
441
|
+
def _combine_scores(self, semantic_scores: Dict[str, float],
|
|
442
|
+
file_scores: Dict[str, float]) -> Dict[str, float]:
|
|
443
|
+
"""Combine semantic and file pattern scores.
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
semantic_scores: Scores from semantic analysis
|
|
447
|
+
file_scores: Scores from file pattern analysis
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Combined scores dictionary
|
|
451
|
+
"""
|
|
452
|
+
combined = {}
|
|
453
|
+
all_types = set(semantic_scores.keys()) | set(file_scores.keys())
|
|
454
|
+
|
|
455
|
+
for change_type in all_types:
|
|
456
|
+
semantic_score = semantic_scores.get(change_type, 0.0)
|
|
457
|
+
file_score = file_scores.get(change_type, 0.0)
|
|
458
|
+
|
|
459
|
+
# Weighted combination
|
|
460
|
+
combined_score = (
|
|
461
|
+
semantic_score * self.config.semantic_weight +
|
|
462
|
+
file_score * self.config.file_pattern_weight
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
if combined_score > 0:
|
|
466
|
+
combined[change_type] = combined_score
|
|
467
|
+
|
|
468
|
+
return combined
|