gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,214 +1,482 @@
|
|
|
1
1
|
"""Change type classifier using semantic analysis of commit messages."""
|
|
2
2
|
|
|
3
|
+
import importlib.util
|
|
3
4
|
import logging
|
|
4
5
|
import re
|
|
5
|
-
from typing import
|
|
6
|
-
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
from ..models.schemas import ChangeTypeConfig
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
# Check if spacy is available without importing it
|
|
11
|
+
SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
|
|
12
|
+
|
|
13
|
+
if SPACY_AVAILABLE:
|
|
12
14
|
from spacy.tokens import Doc
|
|
13
|
-
|
|
14
|
-
except ImportError:
|
|
15
|
-
SPACY_AVAILABLE = False
|
|
15
|
+
else:
|
|
16
16
|
Doc = Any
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class ChangeTypeClassifier:
|
|
20
20
|
"""Classify commits by change type using semantic analysis.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
This classifier determines the type of change represented by a commit
|
|
23
23
|
(feature, bugfix, refactor, etc.) by analyzing the commit message semantics
|
|
24
24
|
and file patterns.
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
The classification uses a combination of:
|
|
27
27
|
- Semantic keyword matching with action/object/context patterns
|
|
28
28
|
- File pattern analysis for additional signals
|
|
29
29
|
- Rule-based patterns for common commit message formats
|
|
30
30
|
"""
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
def __init__(self, config: ChangeTypeConfig):
|
|
33
33
|
"""Initialize change type classifier.
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
Args:
|
|
36
36
|
config: Configuration for change type classification
|
|
37
37
|
"""
|
|
38
38
|
self.config = config
|
|
39
39
|
self.logger = logging.getLogger(__name__)
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
# Define semantic patterns for each change type
|
|
42
42
|
self.change_patterns = {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
"feature": {
|
|
44
|
+
"action_words": {
|
|
45
|
+
"add",
|
|
46
|
+
"implement",
|
|
47
|
+
"create",
|
|
48
|
+
"build",
|
|
49
|
+
"introduce",
|
|
50
|
+
"develop",
|
|
51
|
+
"enable",
|
|
52
|
+
"support",
|
|
53
|
+
"allow",
|
|
54
|
+
"provide",
|
|
55
|
+
"include",
|
|
56
|
+
"addition",
|
|
57
|
+
"initialize",
|
|
58
|
+
"prepare",
|
|
59
|
+
"extend",
|
|
60
|
+
},
|
|
61
|
+
"object_words": {
|
|
62
|
+
"feature",
|
|
63
|
+
"functionality",
|
|
64
|
+
"capability",
|
|
65
|
+
"component",
|
|
66
|
+
"module",
|
|
67
|
+
"endpoint",
|
|
68
|
+
"api",
|
|
69
|
+
"service",
|
|
70
|
+
"interface",
|
|
71
|
+
"system",
|
|
72
|
+
"integration",
|
|
73
|
+
"column",
|
|
74
|
+
"field",
|
|
75
|
+
"property",
|
|
76
|
+
},
|
|
77
|
+
"context_words": {
|
|
78
|
+
"new",
|
|
79
|
+
"initial",
|
|
80
|
+
"first",
|
|
81
|
+
"user",
|
|
82
|
+
"client",
|
|
83
|
+
"support",
|
|
84
|
+
"enhancement",
|
|
85
|
+
"improvement",
|
|
86
|
+
"missing",
|
|
87
|
+
"space",
|
|
88
|
+
"sticky",
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
"bugfix": {
|
|
92
|
+
"action_words": {
|
|
93
|
+
"fix",
|
|
94
|
+
"resolve",
|
|
95
|
+
"correct",
|
|
96
|
+
"repair",
|
|
97
|
+
"patch",
|
|
98
|
+
"address",
|
|
99
|
+
"handle",
|
|
100
|
+
"solve",
|
|
101
|
+
"debug",
|
|
102
|
+
"prevent",
|
|
103
|
+
"corrected",
|
|
104
|
+
},
|
|
105
|
+
"object_words": {
|
|
106
|
+
"bug",
|
|
107
|
+
"issue",
|
|
108
|
+
"problem",
|
|
109
|
+
"error",
|
|
110
|
+
"defect",
|
|
111
|
+
"exception",
|
|
112
|
+
"crash",
|
|
113
|
+
"failure",
|
|
114
|
+
"leak",
|
|
115
|
+
"regression",
|
|
116
|
+
"beacon",
|
|
117
|
+
"beacons",
|
|
47
118
|
},
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
119
|
+
"context_words": {
|
|
120
|
+
"broken",
|
|
121
|
+
"failing",
|
|
122
|
+
"incorrect",
|
|
123
|
+
"wrong",
|
|
124
|
+
"invalid",
|
|
125
|
+
"missing",
|
|
126
|
+
"null",
|
|
127
|
+
"undefined",
|
|
128
|
+
"not",
|
|
129
|
+
"allowing",
|
|
51
130
|
},
|
|
52
|
-
'context_words': {
|
|
53
|
-
'new', 'initial', 'first', 'user', 'client', 'support',
|
|
54
|
-
'enhancement', 'improvement'
|
|
55
|
-
}
|
|
56
131
|
},
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
132
|
+
"refactor": {
|
|
133
|
+
"action_words": {
|
|
134
|
+
"refactor",
|
|
135
|
+
"restructure",
|
|
136
|
+
"reorganize",
|
|
137
|
+
"cleanup",
|
|
138
|
+
"simplify",
|
|
139
|
+
"optimize",
|
|
140
|
+
"improve",
|
|
141
|
+
"enhance",
|
|
142
|
+
"streamline",
|
|
143
|
+
"consolidate",
|
|
144
|
+
"refine",
|
|
145
|
+
"ensure",
|
|
146
|
+
"replace",
|
|
147
|
+
"improves",
|
|
61
148
|
},
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
149
|
+
"object_words": {
|
|
150
|
+
"code",
|
|
151
|
+
"structure",
|
|
152
|
+
"architecture",
|
|
153
|
+
"design",
|
|
154
|
+
"logic",
|
|
155
|
+
"method",
|
|
156
|
+
"function",
|
|
157
|
+
"class",
|
|
158
|
+
"module",
|
|
159
|
+
"combo",
|
|
160
|
+
"behavior",
|
|
161
|
+
"focus",
|
|
162
|
+
},
|
|
163
|
+
"context_words": {
|
|
164
|
+
"better",
|
|
165
|
+
"cleaner",
|
|
166
|
+
"simpler",
|
|
167
|
+
"efficient",
|
|
168
|
+
"maintainable",
|
|
169
|
+
"readable",
|
|
170
|
+
"performance",
|
|
171
|
+
"box",
|
|
172
|
+
"hacking",
|
|
65
173
|
},
|
|
66
|
-
'context_words': {
|
|
67
|
-
'broken', 'failing', 'incorrect', 'wrong', 'invalid',
|
|
68
|
-
'missing', 'null', 'undefined'
|
|
69
|
-
}
|
|
70
174
|
},
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
175
|
+
"docs": {
|
|
176
|
+
"action_words": {
|
|
177
|
+
"update",
|
|
178
|
+
"add",
|
|
179
|
+
"improve",
|
|
180
|
+
"write",
|
|
181
|
+
"document",
|
|
182
|
+
"clarify",
|
|
183
|
+
"explain",
|
|
184
|
+
"describe",
|
|
185
|
+
"detail",
|
|
186
|
+
"added",
|
|
187
|
+
},
|
|
188
|
+
"object_words": {
|
|
189
|
+
"documentation",
|
|
190
|
+
"readme",
|
|
191
|
+
"docs",
|
|
192
|
+
"comment",
|
|
193
|
+
"docstring",
|
|
194
|
+
"guide",
|
|
195
|
+
"tutorial",
|
|
196
|
+
"example",
|
|
197
|
+
"specification",
|
|
198
|
+
"translations",
|
|
199
|
+
"spanish",
|
|
200
|
+
"label",
|
|
75
201
|
},
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
202
|
+
"context_words": {
|
|
203
|
+
"explain",
|
|
204
|
+
"clarify",
|
|
205
|
+
"describe",
|
|
206
|
+
"instruction",
|
|
207
|
+
"help",
|
|
208
|
+
"change",
|
|
209
|
+
"dynamically",
|
|
210
|
+
"language",
|
|
79
211
|
},
|
|
80
|
-
'context_words': {
|
|
81
|
-
'better', 'cleaner', 'simpler', 'efficient', 'maintainable',
|
|
82
|
-
'readable', 'performance'
|
|
83
|
-
}
|
|
84
212
|
},
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
213
|
+
"test": {
|
|
214
|
+
"action_words": {
|
|
215
|
+
"add",
|
|
216
|
+
"update",
|
|
217
|
+
"fix",
|
|
218
|
+
"improve",
|
|
219
|
+
"write",
|
|
220
|
+
"create",
|
|
221
|
+
"enhance",
|
|
222
|
+
"extend",
|
|
223
|
+
},
|
|
224
|
+
"object_words": {
|
|
225
|
+
"test",
|
|
226
|
+
"spec",
|
|
227
|
+
"coverage",
|
|
228
|
+
"unit",
|
|
229
|
+
"integration",
|
|
230
|
+
"e2e",
|
|
231
|
+
"testing",
|
|
232
|
+
"mock",
|
|
233
|
+
"stub",
|
|
234
|
+
"fixture",
|
|
89
235
|
},
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
236
|
+
"context_words": {
|
|
237
|
+
"testing",
|
|
238
|
+
"verify",
|
|
239
|
+
"validate",
|
|
240
|
+
"check",
|
|
241
|
+
"ensure",
|
|
242
|
+
"coverage",
|
|
243
|
+
"assertion",
|
|
93
244
|
},
|
|
94
|
-
'context_words': {
|
|
95
|
-
'explain', 'clarify', 'describe', 'instruction', 'help'
|
|
96
|
-
}
|
|
97
245
|
},
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
246
|
+
"chore": {
|
|
247
|
+
"action_words": {
|
|
248
|
+
"update",
|
|
249
|
+
"bump",
|
|
250
|
+
"upgrade",
|
|
251
|
+
"configure",
|
|
252
|
+
"setup",
|
|
253
|
+
"install",
|
|
254
|
+
"remove",
|
|
255
|
+
"delete",
|
|
256
|
+
"clean",
|
|
257
|
+
"sync",
|
|
258
|
+
"merge",
|
|
102
259
|
},
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
260
|
+
"object_words": {
|
|
261
|
+
"dependency",
|
|
262
|
+
"package",
|
|
263
|
+
"config",
|
|
264
|
+
"configuration",
|
|
265
|
+
"build",
|
|
266
|
+
"version",
|
|
267
|
+
"tool",
|
|
268
|
+
"script",
|
|
269
|
+
"workflow",
|
|
270
|
+
"console",
|
|
271
|
+
"log",
|
|
272
|
+
"main",
|
|
273
|
+
},
|
|
274
|
+
"context_words": {
|
|
275
|
+
"maintenance",
|
|
276
|
+
"housekeeping",
|
|
277
|
+
"routine",
|
|
278
|
+
"automated",
|
|
279
|
+
"ci",
|
|
280
|
+
"cd",
|
|
281
|
+
"pipeline",
|
|
282
|
+
"auto",
|
|
283
|
+
"removal",
|
|
106
284
|
},
|
|
107
|
-
'context_words': {
|
|
108
|
-
'testing', 'verify', 'validate', 'check', 'ensure',
|
|
109
|
-
'coverage', 'assertion'
|
|
110
|
-
}
|
|
111
285
|
},
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
286
|
+
"security": {
|
|
287
|
+
"action_words": {
|
|
288
|
+
"fix",
|
|
289
|
+
"secure",
|
|
290
|
+
"protect",
|
|
291
|
+
"validate",
|
|
292
|
+
"sanitize",
|
|
293
|
+
"encrypt",
|
|
294
|
+
"authenticate",
|
|
295
|
+
"authorize",
|
|
296
|
+
},
|
|
297
|
+
"object_words": {
|
|
298
|
+
"security",
|
|
299
|
+
"vulnerability",
|
|
300
|
+
"exploit",
|
|
301
|
+
"xss",
|
|
302
|
+
"csrf",
|
|
303
|
+
"injection",
|
|
304
|
+
"authentication",
|
|
305
|
+
"authorization",
|
|
306
|
+
"permission",
|
|
116
307
|
},
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
308
|
+
"context_words": {
|
|
309
|
+
"secure",
|
|
310
|
+
"safe",
|
|
311
|
+
"protected",
|
|
312
|
+
"validated",
|
|
313
|
+
"sanitized",
|
|
314
|
+
"encrypted",
|
|
315
|
+
"threat",
|
|
316
|
+
"attack",
|
|
120
317
|
},
|
|
121
|
-
'context_words': {
|
|
122
|
-
'maintenance', 'housekeeping', 'routine', 'automated',
|
|
123
|
-
'ci', 'cd', 'pipeline'
|
|
124
|
-
}
|
|
125
318
|
},
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
319
|
+
"hotfix": {
|
|
320
|
+
"action_words": {"hotfix", "fix", "patch", "urgent", "critical", "emergency"},
|
|
321
|
+
"object_words": {
|
|
322
|
+
"production",
|
|
323
|
+
"critical",
|
|
324
|
+
"urgent",
|
|
325
|
+
"emergency",
|
|
326
|
+
"hotfix",
|
|
327
|
+
"issue",
|
|
328
|
+
"bug",
|
|
329
|
+
"problem",
|
|
130
330
|
},
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
331
|
+
"context_words": {
|
|
332
|
+
"urgent",
|
|
333
|
+
"critical",
|
|
334
|
+
"immediate",
|
|
335
|
+
"production",
|
|
336
|
+
"live",
|
|
337
|
+
"emergency",
|
|
338
|
+
"asap",
|
|
134
339
|
},
|
|
135
|
-
'context_words': {
|
|
136
|
-
'secure', 'safe', 'protected', 'validated', 'sanitized',
|
|
137
|
-
'encrypted', 'threat', 'attack'
|
|
138
|
-
}
|
|
139
340
|
},
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
341
|
+
"config": {
|
|
342
|
+
"action_words": {
|
|
343
|
+
"configure",
|
|
344
|
+
"setup",
|
|
345
|
+
"adjust",
|
|
346
|
+
"modify",
|
|
347
|
+
"change",
|
|
348
|
+
"update",
|
|
349
|
+
"tweak",
|
|
350
|
+
"changing",
|
|
351
|
+
},
|
|
352
|
+
"object_words": {
|
|
353
|
+
"config",
|
|
354
|
+
"configuration",
|
|
355
|
+
"settings",
|
|
356
|
+
"environment",
|
|
357
|
+
"parameter",
|
|
358
|
+
"option",
|
|
359
|
+
"flag",
|
|
360
|
+
"variable",
|
|
361
|
+
"roles",
|
|
362
|
+
"user",
|
|
363
|
+
"schema",
|
|
364
|
+
"access",
|
|
365
|
+
"levels",
|
|
143
366
|
},
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
367
|
+
"context_words": {
|
|
368
|
+
"environment",
|
|
369
|
+
"production",
|
|
370
|
+
"development",
|
|
371
|
+
"staging",
|
|
372
|
+
"deployment",
|
|
373
|
+
"setup",
|
|
374
|
+
"roles",
|
|
375
|
+
"permission",
|
|
376
|
+
"api",
|
|
147
377
|
},
|
|
148
|
-
'context_words': {
|
|
149
|
-
'urgent', 'critical', 'immediate', 'production', 'live',
|
|
150
|
-
'emergency', 'asap'
|
|
151
|
-
}
|
|
152
378
|
},
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
379
|
+
"integration": {
|
|
380
|
+
"action_words": {
|
|
381
|
+
"integrate",
|
|
382
|
+
"add",
|
|
383
|
+
"implement",
|
|
384
|
+
"connect",
|
|
385
|
+
"setup",
|
|
386
|
+
"remove",
|
|
387
|
+
"extend",
|
|
388
|
+
"removing",
|
|
157
389
|
},
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
390
|
+
"object_words": {
|
|
391
|
+
"integration",
|
|
392
|
+
"posthog",
|
|
393
|
+
"iubenda",
|
|
394
|
+
"auth0",
|
|
395
|
+
"oauth",
|
|
396
|
+
"api",
|
|
397
|
+
"service",
|
|
398
|
+
"third-party",
|
|
399
|
+
"external",
|
|
400
|
+
"mena",
|
|
161
401
|
},
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
402
|
+
"context_words": {
|
|
403
|
+
"collection",
|
|
404
|
+
"data",
|
|
405
|
+
"privacy",
|
|
406
|
+
"policy",
|
|
407
|
+
"implementation",
|
|
408
|
+
"access",
|
|
409
|
+
"redirect",
|
|
410
|
+
},
|
|
411
|
+
},
|
|
167
412
|
}
|
|
168
|
-
|
|
413
|
+
|
|
169
414
|
# File pattern signals for change types
|
|
170
415
|
self.file_patterns = {
|
|
171
|
-
|
|
172
|
-
r
|
|
173
|
-
r
|
|
174
|
-
r
|
|
416
|
+
"test": [
|
|
417
|
+
r".*test.*\.py$",
|
|
418
|
+
r".*spec.*\.js$",
|
|
419
|
+
r".*test.*\.java$",
|
|
420
|
+
r"test_.*\.py$",
|
|
421
|
+
r".*_test\.go$",
|
|
422
|
+
r".*\.test\.(js|ts)$",
|
|
423
|
+
r"__tests__/.*",
|
|
424
|
+
r"tests?/.*",
|
|
425
|
+
r"spec/.*",
|
|
175
426
|
],
|
|
176
|
-
|
|
177
|
-
r
|
|
178
|
-
r
|
|
427
|
+
"docs": [
|
|
428
|
+
r".*\.md$",
|
|
429
|
+
r".*\.rst$",
|
|
430
|
+
r".*\.txt$",
|
|
431
|
+
r"README.*",
|
|
432
|
+
r"CHANGELOG.*",
|
|
433
|
+
r"docs?/.*",
|
|
434
|
+
r"documentation/.*",
|
|
179
435
|
],
|
|
180
|
-
|
|
181
|
-
r
|
|
182
|
-
r
|
|
436
|
+
"config": [
|
|
437
|
+
r".*\.ya?ml$",
|
|
438
|
+
r".*\.json$",
|
|
439
|
+
r".*\.toml$",
|
|
440
|
+
r".*\.ini$",
|
|
441
|
+
r".*\.env.*",
|
|
442
|
+
r"Dockerfile.*",
|
|
443
|
+
r".*config.*",
|
|
444
|
+
r"\.github/.*",
|
|
445
|
+
],
|
|
446
|
+
"chore": [
|
|
447
|
+
r"package.*\.json$",
|
|
448
|
+
r"requirements.*\.txt$",
|
|
449
|
+
r"Pipfile.*",
|
|
450
|
+
r"pom\.xml$",
|
|
451
|
+
r"build\.gradle$",
|
|
452
|
+
r".*\.lock$",
|
|
183
453
|
],
|
|
184
|
-
'chore': [
|
|
185
|
-
r'package.*\.json$', r'requirements.*\.txt$', r'Pipfile.*',
|
|
186
|
-
r'pom\.xml$', r'build\.gradle$', r'.*\.lock$'
|
|
187
|
-
]
|
|
188
454
|
}
|
|
189
|
-
|
|
455
|
+
|
|
190
456
|
# Compile regex patterns for efficiency
|
|
191
457
|
self._compile_file_patterns()
|
|
192
|
-
|
|
458
|
+
|
|
193
459
|
# Common commit message prefixes
|
|
194
460
|
self.prefix_patterns = {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
461
|
+
"feat": "feature",
|
|
462
|
+
"feature": "feature",
|
|
463
|
+
"fix": "bugfix",
|
|
464
|
+
"bugfix": "bugfix",
|
|
465
|
+
"refactor": "refactor",
|
|
466
|
+
"docs": "docs",
|
|
467
|
+
"test": "test",
|
|
468
|
+
"chore": "chore",
|
|
469
|
+
"security": "security",
|
|
470
|
+
"hotfix": "hotfix",
|
|
471
|
+
"config": "config",
|
|
472
|
+
"integration": "integration",
|
|
473
|
+
"integrate": "integration",
|
|
474
|
+
"style": "chore", # Style changes are usually chores
|
|
475
|
+
"perf": "refactor", # Performance improvements are refactoring
|
|
476
|
+
"build": "chore",
|
|
477
|
+
"ci": "chore",
|
|
210
478
|
}
|
|
211
|
-
|
|
479
|
+
|
|
212
480
|
def _compile_file_patterns(self) -> None:
|
|
213
481
|
"""Compile regex patterns for file matching."""
|
|
214
482
|
self.compiled_file_patterns = {}
|
|
@@ -216,253 +484,259 @@ class ChangeTypeClassifier:
|
|
|
216
484
|
self.compiled_file_patterns[change_type] = [
|
|
217
485
|
re.compile(pattern, re.IGNORECASE) for pattern in patterns
|
|
218
486
|
]
|
|
219
|
-
|
|
220
|
-
def classify(self, message: str, doc: Doc, files:
|
|
487
|
+
|
|
488
|
+
def classify(self, message: str, doc: Doc, files: list[str]) -> tuple[str, float]:
|
|
221
489
|
"""Classify commit change type with confidence score.
|
|
222
|
-
|
|
490
|
+
|
|
223
491
|
Args:
|
|
224
492
|
message: Commit message
|
|
225
493
|
doc: spaCy processed document
|
|
226
494
|
files: List of changed files
|
|
227
|
-
|
|
495
|
+
|
|
228
496
|
Returns:
|
|
229
497
|
Tuple of (change_type, confidence_score)
|
|
230
498
|
"""
|
|
231
499
|
if not message:
|
|
232
|
-
return
|
|
233
|
-
|
|
500
|
+
return "unknown", 0.0
|
|
501
|
+
|
|
234
502
|
# Step 1: Check for conventional commit prefixes
|
|
235
503
|
prefix_result = self._check_conventional_prefix(message)
|
|
236
504
|
if prefix_result:
|
|
237
505
|
change_type, confidence = prefix_result
|
|
238
506
|
if confidence >= self.config.min_confidence:
|
|
239
507
|
return change_type, confidence
|
|
240
|
-
|
|
508
|
+
|
|
241
509
|
# Step 2: Semantic analysis of message content
|
|
242
510
|
semantic_scores = self._analyze_semantic_content(message, doc)
|
|
243
|
-
|
|
511
|
+
|
|
244
512
|
# Step 3: File pattern analysis
|
|
245
513
|
file_scores = self._analyze_file_patterns(files)
|
|
246
|
-
|
|
514
|
+
|
|
247
515
|
# Step 4: Combine scores with weights
|
|
248
516
|
combined_scores = self._combine_scores(semantic_scores, file_scores)
|
|
249
|
-
|
|
517
|
+
|
|
250
518
|
# Step 5: Select best match
|
|
251
519
|
if not combined_scores:
|
|
252
|
-
return
|
|
253
|
-
|
|
520
|
+
return "unknown", 0.0
|
|
521
|
+
|
|
254
522
|
best_type = max(combined_scores.keys(), key=lambda k: combined_scores[k])
|
|
255
523
|
confidence = combined_scores[best_type]
|
|
256
|
-
|
|
524
|
+
|
|
257
525
|
# Apply confidence threshold
|
|
258
526
|
if confidence < self.config.min_confidence:
|
|
259
|
-
return
|
|
260
|
-
|
|
527
|
+
return "unknown", confidence
|
|
528
|
+
|
|
261
529
|
return best_type, confidence
|
|
262
|
-
|
|
263
|
-
def _check_conventional_prefix(self, message: str) -> Optional[
|
|
530
|
+
|
|
531
|
+
def _check_conventional_prefix(self, message: str) -> Optional[tuple[str, float]]:
|
|
264
532
|
"""Check for conventional commit message prefixes.
|
|
265
|
-
|
|
533
|
+
|
|
266
534
|
Args:
|
|
267
535
|
message: Commit message
|
|
268
|
-
|
|
536
|
+
|
|
269
537
|
Returns:
|
|
270
538
|
Tuple of (change_type, confidence) if found, None otherwise
|
|
271
539
|
"""
|
|
272
540
|
# Look for conventional commit format: type(scope): description
|
|
273
|
-
conventional_pattern = r
|
|
541
|
+
conventional_pattern = r"^(\w+)(?:\([^)]*\))?\s*:\s*(.+)"
|
|
274
542
|
match = re.match(conventional_pattern, message.strip(), re.IGNORECASE)
|
|
275
|
-
|
|
543
|
+
|
|
276
544
|
if match:
|
|
277
545
|
prefix = match.group(1).lower()
|
|
278
546
|
if prefix in self.prefix_patterns:
|
|
279
547
|
return self.prefix_patterns[prefix], 0.9 # High confidence for explicit prefixes
|
|
280
|
-
|
|
548
|
+
|
|
281
549
|
# Check for simple prefixes at start of message
|
|
282
550
|
words = message.lower().split()
|
|
283
551
|
if words:
|
|
284
|
-
first_word = words[0].rstrip(
|
|
552
|
+
first_word = words[0].rstrip(":").rstrip("-")
|
|
285
553
|
if first_word in self.prefix_patterns:
|
|
286
554
|
return self.prefix_patterns[first_word], 0.8
|
|
287
|
-
|
|
555
|
+
|
|
288
556
|
return None
|
|
289
|
-
|
|
290
|
-
def _analyze_semantic_content(self, message: str, doc: Doc) ->
|
|
557
|
+
|
|
558
|
+
def _analyze_semantic_content(self, message: str, doc: Doc) -> dict[str, float]:
|
|
291
559
|
"""Analyze semantic content of commit message.
|
|
292
|
-
|
|
560
|
+
|
|
293
561
|
Args:
|
|
294
562
|
message: Commit message
|
|
295
563
|
doc: spaCy processed document
|
|
296
|
-
|
|
564
|
+
|
|
297
565
|
Returns:
|
|
298
566
|
Dictionary of change_type -> confidence_score
|
|
299
567
|
"""
|
|
300
568
|
if not SPACY_AVAILABLE or not doc:
|
|
301
569
|
# Fallback to simple keyword matching
|
|
302
570
|
return self._simple_keyword_analysis(message.lower())
|
|
303
|
-
|
|
571
|
+
|
|
304
572
|
# Extract semantic features from spaCy doc
|
|
305
573
|
features = self._extract_semantic_features(doc)
|
|
306
|
-
|
|
574
|
+
|
|
307
575
|
# Calculate similarity to each change type
|
|
308
576
|
scores = {}
|
|
309
577
|
for change_type, patterns in self.change_patterns.items():
|
|
310
578
|
similarity = self._calculate_semantic_similarity(features, patterns)
|
|
311
579
|
if similarity > 0:
|
|
312
580
|
scores[change_type] = similarity
|
|
313
|
-
|
|
581
|
+
|
|
314
582
|
return scores
|
|
315
|
-
|
|
316
|
-
def _extract_semantic_features(self, doc: Doc) ->
|
|
583
|
+
|
|
584
|
+
def _extract_semantic_features(self, doc: Doc) -> dict[str, set[str]]:
|
|
317
585
|
"""Extract semantic features from spaCy document.
|
|
318
|
-
|
|
586
|
+
|
|
319
587
|
Args:
|
|
320
588
|
doc: spaCy processed document
|
|
321
|
-
|
|
589
|
+
|
|
322
590
|
Returns:
|
|
323
591
|
Dictionary of feature_type -> set_of_words
|
|
324
592
|
"""
|
|
325
593
|
features = {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
594
|
+
"verbs": set(),
|
|
595
|
+
"nouns": set(),
|
|
596
|
+
"adjectives": set(),
|
|
597
|
+
"entities": set(),
|
|
598
|
+
"lemmas": set(),
|
|
331
599
|
}
|
|
332
|
-
|
|
600
|
+
|
|
333
601
|
for token in doc:
|
|
334
602
|
if token.is_stop or token.is_punct or len(token.text) < 2:
|
|
335
603
|
continue
|
|
336
|
-
|
|
604
|
+
|
|
337
605
|
lemma = token.lemma_.lower()
|
|
338
|
-
features[
|
|
339
|
-
|
|
340
|
-
if token.pos_ ==
|
|
341
|
-
features[
|
|
342
|
-
elif token.pos_ in [
|
|
343
|
-
features[
|
|
344
|
-
elif token.pos_ ==
|
|
345
|
-
features[
|
|
346
|
-
|
|
606
|
+
features["lemmas"].add(lemma)
|
|
607
|
+
|
|
608
|
+
if token.pos_ == "VERB":
|
|
609
|
+
features["verbs"].add(lemma)
|
|
610
|
+
elif token.pos_ in ["NOUN", "PROPN"]:
|
|
611
|
+
features["nouns"].add(lemma)
|
|
612
|
+
elif token.pos_ == "ADJ":
|
|
613
|
+
features["adjectives"].add(lemma)
|
|
614
|
+
|
|
347
615
|
# Add named entities
|
|
348
616
|
for ent in doc.ents:
|
|
349
|
-
features[
|
|
350
|
-
|
|
617
|
+
features["entities"].add(ent.text.lower())
|
|
618
|
+
|
|
351
619
|
return features
|
|
352
|
-
|
|
353
|
-
def _calculate_semantic_similarity(
|
|
354
|
-
|
|
620
|
+
|
|
621
|
+
def _calculate_semantic_similarity(
|
|
622
|
+
self, features: dict[str, set[str]], patterns: dict[str, set[str]]
|
|
623
|
+
) -> float:
|
|
355
624
|
"""Calculate semantic similarity between features and patterns.
|
|
356
|
-
|
|
625
|
+
|
|
357
626
|
Args:
|
|
358
627
|
features: Extracted semantic features
|
|
359
628
|
patterns: Change type patterns
|
|
360
|
-
|
|
629
|
+
|
|
361
630
|
Returns:
|
|
362
631
|
Similarity score (0.0 to 1.0)
|
|
363
632
|
"""
|
|
364
633
|
similarity_score = 0.0
|
|
365
|
-
|
|
634
|
+
|
|
366
635
|
# Action words (verbs) - highest weight
|
|
367
|
-
action_matches = len(features[
|
|
636
|
+
action_matches = len(features["verbs"].intersection(patterns["action_words"]))
|
|
368
637
|
if action_matches > 0:
|
|
369
638
|
similarity_score += action_matches * 0.5
|
|
370
|
-
|
|
371
|
-
# Object words (nouns) - medium weight
|
|
372
|
-
object_matches = len(features[
|
|
639
|
+
|
|
640
|
+
# Object words (nouns) - medium weight
|
|
641
|
+
object_matches = len(features["nouns"].intersection(patterns["object_words"]))
|
|
373
642
|
if object_matches > 0:
|
|
374
643
|
similarity_score += object_matches * 0.3
|
|
375
|
-
|
|
644
|
+
|
|
376
645
|
# Context words (any lemma) - lower weight
|
|
377
|
-
all_lemmas = features[
|
|
378
|
-
context_matches = len(all_lemmas.intersection(patterns[
|
|
646
|
+
all_lemmas = features["lemmas"]
|
|
647
|
+
context_matches = len(all_lemmas.intersection(patterns["context_words"]))
|
|
379
648
|
if context_matches > 0:
|
|
380
649
|
similarity_score += context_matches * 0.2
|
|
381
|
-
|
|
650
|
+
|
|
382
651
|
# Normalize by maximum possible score
|
|
383
|
-
max_possible =
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
652
|
+
max_possible = (
|
|
653
|
+
len(patterns["action_words"]) * 0.5
|
|
654
|
+
+ len(patterns["object_words"]) * 0.3
|
|
655
|
+
+ len(patterns["context_words"]) * 0.2
|
|
656
|
+
)
|
|
657
|
+
|
|
387
658
|
return min(1.0, similarity_score / max_possible) if max_possible > 0 else 0.0
|
|
388
|
-
|
|
389
|
-
def _simple_keyword_analysis(self, message: str) ->
|
|
659
|
+
|
|
660
|
+
def _simple_keyword_analysis(self, message: str) -> dict[str, float]:
|
|
390
661
|
"""Simple keyword-based analysis fallback.
|
|
391
|
-
|
|
662
|
+
|
|
392
663
|
Args:
|
|
393
664
|
message: Lowercase commit message
|
|
394
|
-
|
|
665
|
+
|
|
395
666
|
Returns:
|
|
396
667
|
Dictionary of change_type -> confidence_score
|
|
397
668
|
"""
|
|
398
669
|
scores = {}
|
|
399
|
-
words = set(re.findall(r
|
|
400
|
-
|
|
670
|
+
words = set(re.findall(r"\b\w+\b", message))
|
|
671
|
+
|
|
401
672
|
for change_type, patterns in self.change_patterns.items():
|
|
402
|
-
all_pattern_words =
|
|
673
|
+
all_pattern_words = (
|
|
674
|
+
patterns["action_words"] | patterns["object_words"] | patterns["context_words"]
|
|
675
|
+
)
|
|
403
676
|
matches = len(words.intersection(all_pattern_words))
|
|
404
|
-
|
|
677
|
+
|
|
405
678
|
if matches > 0:
|
|
406
679
|
# Simple scoring based on keyword matches
|
|
407
680
|
scores[change_type] = min(1.0, matches / 5.0) # Scale to 0-1
|
|
408
|
-
|
|
681
|
+
|
|
409
682
|
return scores
|
|
410
|
-
|
|
411
|
-
def _analyze_file_patterns(self, files:
|
|
683
|
+
|
|
684
|
+
def _analyze_file_patterns(self, files: list[str]) -> dict[str, float]:
|
|
412
685
|
"""Analyze file patterns for change type signals.
|
|
413
|
-
|
|
686
|
+
|
|
414
687
|
Args:
|
|
415
688
|
files: List of changed file paths
|
|
416
|
-
|
|
689
|
+
|
|
417
690
|
Returns:
|
|
418
691
|
Dictionary of change_type -> confidence_score
|
|
419
692
|
"""
|
|
420
693
|
if not files:
|
|
421
694
|
return {}
|
|
422
|
-
|
|
695
|
+
|
|
423
696
|
scores = {}
|
|
424
|
-
|
|
697
|
+
|
|
425
698
|
for change_type, patterns in self.compiled_file_patterns.items():
|
|
426
699
|
matching_files = 0
|
|
427
|
-
|
|
700
|
+
|
|
428
701
|
for file_path in files:
|
|
429
702
|
for pattern in patterns:
|
|
430
703
|
if pattern.search(file_path):
|
|
431
704
|
matching_files += 1
|
|
432
705
|
break # Don't double-count same file
|
|
433
|
-
|
|
706
|
+
|
|
434
707
|
if matching_files > 0:
|
|
435
708
|
# File pattern confidence based on proportion of matching files
|
|
436
709
|
confidence = min(1.0, matching_files / len(files))
|
|
437
710
|
scores[change_type] = confidence
|
|
438
|
-
|
|
711
|
+
|
|
439
712
|
return scores
|
|
440
|
-
|
|
441
|
-
def _combine_scores(
|
|
442
|
-
|
|
713
|
+
|
|
714
|
+
def _combine_scores(
|
|
715
|
+
self, semantic_scores: dict[str, float], file_scores: dict[str, float]
|
|
716
|
+
) -> dict[str, float]:
|
|
443
717
|
"""Combine semantic and file pattern scores.
|
|
444
|
-
|
|
718
|
+
|
|
445
719
|
Args:
|
|
446
720
|
semantic_scores: Scores from semantic analysis
|
|
447
721
|
file_scores: Scores from file pattern analysis
|
|
448
|
-
|
|
722
|
+
|
|
449
723
|
Returns:
|
|
450
724
|
Combined scores dictionary
|
|
451
725
|
"""
|
|
452
726
|
combined = {}
|
|
453
727
|
all_types = set(semantic_scores.keys()) | set(file_scores.keys())
|
|
454
|
-
|
|
728
|
+
|
|
455
729
|
for change_type in all_types:
|
|
456
730
|
semantic_score = semantic_scores.get(change_type, 0.0)
|
|
457
731
|
file_score = file_scores.get(change_type, 0.0)
|
|
458
|
-
|
|
732
|
+
|
|
459
733
|
# Weighted combination
|
|
460
734
|
combined_score = (
|
|
461
|
-
semantic_score * self.config.semantic_weight
|
|
462
|
-
file_score * self.config.file_pattern_weight
|
|
735
|
+
semantic_score * self.config.semantic_weight
|
|
736
|
+
+ file_score * self.config.file_pattern_weight
|
|
463
737
|
)
|
|
464
|
-
|
|
738
|
+
|
|
465
739
|
if combined_score > 0:
|
|
466
740
|
combined[change_type] = combined_score
|
|
467
|
-
|
|
468
|
-
return combined
|
|
741
|
+
|
|
742
|
+
return combined
|