gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,214 +1,482 @@
1
1
  """Change type classifier using semantic analysis of commit messages."""
2
2
 
3
+ import importlib.util
3
4
  import logging
4
5
  import re
5
- from typing import Dict, List, Tuple, Set, Any, Optional
6
- from pathlib import Path
6
+ from typing import Any, Optional
7
7
 
8
8
  from ..models.schemas import ChangeTypeConfig
9
9
 
10
- try:
11
- import spacy
10
+ # Check if spacy is available without importing it
11
+ SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
12
+
13
+ if SPACY_AVAILABLE:
12
14
  from spacy.tokens import Doc
13
- SPACY_AVAILABLE = True
14
- except ImportError:
15
- SPACY_AVAILABLE = False
15
+ else:
16
16
  Doc = Any
17
17
 
18
18
 
19
19
  class ChangeTypeClassifier:
20
20
  """Classify commits by change type using semantic analysis.
21
-
21
+
22
22
  This classifier determines the type of change represented by a commit
23
23
  (feature, bugfix, refactor, etc.) by analyzing the commit message semantics
24
24
  and file patterns.
25
-
25
+
26
26
  The classification uses a combination of:
27
27
  - Semantic keyword matching with action/object/context patterns
28
28
  - File pattern analysis for additional signals
29
29
  - Rule-based patterns for common commit message formats
30
30
  """
31
-
31
+
32
32
  def __init__(self, config: ChangeTypeConfig):
33
33
  """Initialize change type classifier.
34
-
34
+
35
35
  Args:
36
36
  config: Configuration for change type classification
37
37
  """
38
38
  self.config = config
39
39
  self.logger = logging.getLogger(__name__)
40
-
40
+
41
41
  # Define semantic patterns for each change type
42
42
  self.change_patterns = {
43
- 'feature': {
44
- 'action_words': {
45
- 'add', 'implement', 'create', 'build', 'introduce', 'develop',
46
- 'enable', 'support', 'allow', 'provide', 'include'
43
+ "feature": {
44
+ "action_words": {
45
+ "add",
46
+ "implement",
47
+ "create",
48
+ "build",
49
+ "introduce",
50
+ "develop",
51
+ "enable",
52
+ "support",
53
+ "allow",
54
+ "provide",
55
+ "include",
56
+ "addition",
57
+ "initialize",
58
+ "prepare",
59
+ "extend",
60
+ },
61
+ "object_words": {
62
+ "feature",
63
+ "functionality",
64
+ "capability",
65
+ "component",
66
+ "module",
67
+ "endpoint",
68
+ "api",
69
+ "service",
70
+ "interface",
71
+ "system",
72
+ "integration",
73
+ "column",
74
+ "field",
75
+ "property",
76
+ },
77
+ "context_words": {
78
+ "new",
79
+ "initial",
80
+ "first",
81
+ "user",
82
+ "client",
83
+ "support",
84
+ "enhancement",
85
+ "improvement",
86
+ "missing",
87
+ "space",
88
+ "sticky",
89
+ },
90
+ },
91
+ "bugfix": {
92
+ "action_words": {
93
+ "fix",
94
+ "resolve",
95
+ "correct",
96
+ "repair",
97
+ "patch",
98
+ "address",
99
+ "handle",
100
+ "solve",
101
+ "debug",
102
+ "prevent",
103
+ "corrected",
104
+ },
105
+ "object_words": {
106
+ "bug",
107
+ "issue",
108
+ "problem",
109
+ "error",
110
+ "defect",
111
+ "exception",
112
+ "crash",
113
+ "failure",
114
+ "leak",
115
+ "regression",
116
+ "beacon",
117
+ "beacons",
47
118
  },
48
- 'object_words': {
49
- 'feature', 'functionality', 'capability', 'component', 'module',
50
- 'endpoint', 'api', 'service', 'interface', 'system'
119
+ "context_words": {
120
+ "broken",
121
+ "failing",
122
+ "incorrect",
123
+ "wrong",
124
+ "invalid",
125
+ "missing",
126
+ "null",
127
+ "undefined",
128
+ "not",
129
+ "allowing",
51
130
  },
52
- 'context_words': {
53
- 'new', 'initial', 'first', 'user', 'client', 'support',
54
- 'enhancement', 'improvement'
55
- }
56
131
  },
57
- 'bugfix': {
58
- 'action_words': {
59
- 'fix', 'resolve', 'correct', 'repair', 'patch', 'address',
60
- 'handle', 'solve', 'debug', 'prevent'
132
+ "refactor": {
133
+ "action_words": {
134
+ "refactor",
135
+ "restructure",
136
+ "reorganize",
137
+ "cleanup",
138
+ "simplify",
139
+ "optimize",
140
+ "improve",
141
+ "enhance",
142
+ "streamline",
143
+ "consolidate",
144
+ "refine",
145
+ "ensure",
146
+ "replace",
147
+ "improves",
61
148
  },
62
- 'object_words': {
63
- 'bug', 'issue', 'problem', 'error', 'defect', 'exception',
64
- 'crash', 'failure', 'leak', 'regression'
149
+ "object_words": {
150
+ "code",
151
+ "structure",
152
+ "architecture",
153
+ "design",
154
+ "logic",
155
+ "method",
156
+ "function",
157
+ "class",
158
+ "module",
159
+ "combo",
160
+ "behavior",
161
+ "focus",
162
+ },
163
+ "context_words": {
164
+ "better",
165
+ "cleaner",
166
+ "simpler",
167
+ "efficient",
168
+ "maintainable",
169
+ "readable",
170
+ "performance",
171
+ "box",
172
+ "hacking",
65
173
  },
66
- 'context_words': {
67
- 'broken', 'failing', 'incorrect', 'wrong', 'invalid',
68
- 'missing', 'null', 'undefined'
69
- }
70
174
  },
71
- 'refactor': {
72
- 'action_words': {
73
- 'refactor', 'restructure', 'reorganize', 'cleanup', 'simplify',
74
- 'optimize', 'improve', 'enhance', 'streamline', 'consolidate'
175
+ "docs": {
176
+ "action_words": {
177
+ "update",
178
+ "add",
179
+ "improve",
180
+ "write",
181
+ "document",
182
+ "clarify",
183
+ "explain",
184
+ "describe",
185
+ "detail",
186
+ "added",
187
+ },
188
+ "object_words": {
189
+ "documentation",
190
+ "readme",
191
+ "docs",
192
+ "comment",
193
+ "docstring",
194
+ "guide",
195
+ "tutorial",
196
+ "example",
197
+ "specification",
198
+ "translations",
199
+ "spanish",
200
+ "label",
75
201
  },
76
- 'object_words': {
77
- 'code', 'structure', 'architecture', 'design', 'logic',
78
- 'method', 'function', 'class', 'module'
202
+ "context_words": {
203
+ "explain",
204
+ "clarify",
205
+ "describe",
206
+ "instruction",
207
+ "help",
208
+ "change",
209
+ "dynamically",
210
+ "language",
79
211
  },
80
- 'context_words': {
81
- 'better', 'cleaner', 'simpler', 'efficient', 'maintainable',
82
- 'readable', 'performance'
83
- }
84
212
  },
85
- 'docs': {
86
- 'action_words': {
87
- 'update', 'add', 'improve', 'write', 'document', 'clarify',
88
- 'explain', 'describe', 'detail'
213
+ "test": {
214
+ "action_words": {
215
+ "add",
216
+ "update",
217
+ "fix",
218
+ "improve",
219
+ "write",
220
+ "create",
221
+ "enhance",
222
+ "extend",
223
+ },
224
+ "object_words": {
225
+ "test",
226
+ "spec",
227
+ "coverage",
228
+ "unit",
229
+ "integration",
230
+ "e2e",
231
+ "testing",
232
+ "mock",
233
+ "stub",
234
+ "fixture",
89
235
  },
90
- 'object_words': {
91
- 'documentation', 'readme', 'docs', 'comment', 'docstring',
92
- 'guide', 'tutorial', 'example', 'specification'
236
+ "context_words": {
237
+ "testing",
238
+ "verify",
239
+ "validate",
240
+ "check",
241
+ "ensure",
242
+ "coverage",
243
+ "assertion",
93
244
  },
94
- 'context_words': {
95
- 'explain', 'clarify', 'describe', 'instruction', 'help'
96
- }
97
245
  },
98
- 'test': {
99
- 'action_words': {
100
- 'add', 'update', 'fix', 'improve', 'write', 'create',
101
- 'enhance', 'extend'
246
+ "chore": {
247
+ "action_words": {
248
+ "update",
249
+ "bump",
250
+ "upgrade",
251
+ "configure",
252
+ "setup",
253
+ "install",
254
+ "remove",
255
+ "delete",
256
+ "clean",
257
+ "sync",
258
+ "merge",
102
259
  },
103
- 'object_words': {
104
- 'test', 'spec', 'coverage', 'unit', 'integration', 'e2e',
105
- 'testing', 'mock', 'stub', 'fixture'
260
+ "object_words": {
261
+ "dependency",
262
+ "package",
263
+ "config",
264
+ "configuration",
265
+ "build",
266
+ "version",
267
+ "tool",
268
+ "script",
269
+ "workflow",
270
+ "console",
271
+ "log",
272
+ "main",
273
+ },
274
+ "context_words": {
275
+ "maintenance",
276
+ "housekeeping",
277
+ "routine",
278
+ "automated",
279
+ "ci",
280
+ "cd",
281
+ "pipeline",
282
+ "auto",
283
+ "removal",
106
284
  },
107
- 'context_words': {
108
- 'testing', 'verify', 'validate', 'check', 'ensure',
109
- 'coverage', 'assertion'
110
- }
111
285
  },
112
- 'chore': {
113
- 'action_words': {
114
- 'update', 'bump', 'upgrade', 'configure', 'setup', 'install',
115
- 'remove', 'delete', 'clean'
286
+ "security": {
287
+ "action_words": {
288
+ "fix",
289
+ "secure",
290
+ "protect",
291
+ "validate",
292
+ "sanitize",
293
+ "encrypt",
294
+ "authenticate",
295
+ "authorize",
296
+ },
297
+ "object_words": {
298
+ "security",
299
+ "vulnerability",
300
+ "exploit",
301
+ "xss",
302
+ "csrf",
303
+ "injection",
304
+ "authentication",
305
+ "authorization",
306
+ "permission",
116
307
  },
117
- 'object_words': {
118
- 'dependency', 'package', 'config', 'configuration', 'build',
119
- 'version', 'tool', 'script', 'workflow'
308
+ "context_words": {
309
+ "secure",
310
+ "safe",
311
+ "protected",
312
+ "validated",
313
+ "sanitized",
314
+ "encrypted",
315
+ "threat",
316
+ "attack",
120
317
  },
121
- 'context_words': {
122
- 'maintenance', 'housekeeping', 'routine', 'automated',
123
- 'ci', 'cd', 'pipeline'
124
- }
125
318
  },
126
- 'security': {
127
- 'action_words': {
128
- 'fix', 'secure', 'protect', 'validate', 'sanitize',
129
- 'encrypt', 'authenticate', 'authorize'
319
+ "hotfix": {
320
+ "action_words": {"hotfix", "fix", "patch", "urgent", "critical", "emergency"},
321
+ "object_words": {
322
+ "production",
323
+ "critical",
324
+ "urgent",
325
+ "emergency",
326
+ "hotfix",
327
+ "issue",
328
+ "bug",
329
+ "problem",
130
330
  },
131
- 'object_words': {
132
- 'security', 'vulnerability', 'exploit', 'xss', 'csrf',
133
- 'injection', 'authentication', 'authorization', 'permission'
331
+ "context_words": {
332
+ "urgent",
333
+ "critical",
334
+ "immediate",
335
+ "production",
336
+ "live",
337
+ "emergency",
338
+ "asap",
134
339
  },
135
- 'context_words': {
136
- 'secure', 'safe', 'protected', 'validated', 'sanitized',
137
- 'encrypted', 'threat', 'attack'
138
- }
139
340
  },
140
- 'hotfix': {
141
- 'action_words': {
142
- 'hotfix', 'fix', 'patch', 'urgent', 'critical', 'emergency'
341
+ "config": {
342
+ "action_words": {
343
+ "configure",
344
+ "setup",
345
+ "adjust",
346
+ "modify",
347
+ "change",
348
+ "update",
349
+ "tweak",
350
+ "changing",
351
+ },
352
+ "object_words": {
353
+ "config",
354
+ "configuration",
355
+ "settings",
356
+ "environment",
357
+ "parameter",
358
+ "option",
359
+ "flag",
360
+ "variable",
361
+ "roles",
362
+ "user",
363
+ "schema",
364
+ "access",
365
+ "levels",
143
366
  },
144
- 'object_words': {
145
- 'production', 'critical', 'urgent', 'emergency', 'hotfix',
146
- 'issue', 'bug', 'problem'
367
+ "context_words": {
368
+ "environment",
369
+ "production",
370
+ "development",
371
+ "staging",
372
+ "deployment",
373
+ "setup",
374
+ "roles",
375
+ "permission",
376
+ "api",
147
377
  },
148
- 'context_words': {
149
- 'urgent', 'critical', 'immediate', 'production', 'live',
150
- 'emergency', 'asap'
151
- }
152
378
  },
153
- 'config': {
154
- 'action_words': {
155
- 'configure', 'setup', 'adjust', 'modify', 'change',
156
- 'update', 'tweak'
379
+ "integration": {
380
+ "action_words": {
381
+ "integrate",
382
+ "add",
383
+ "implement",
384
+ "connect",
385
+ "setup",
386
+ "remove",
387
+ "extend",
388
+ "removing",
157
389
  },
158
- 'object_words': {
159
- 'config', 'configuration', 'settings', 'environment',
160
- 'parameter', 'option', 'flag', 'variable'
390
+ "object_words": {
391
+ "integration",
392
+ "posthog",
393
+ "iubenda",
394
+ "auth0",
395
+ "oauth",
396
+ "api",
397
+ "service",
398
+ "third-party",
399
+ "external",
400
+ "mena",
161
401
  },
162
- 'context_words': {
163
- 'environment', 'production', 'development', 'staging',
164
- 'deployment', 'setup'
165
- }
166
- }
402
+ "context_words": {
403
+ "collection",
404
+ "data",
405
+ "privacy",
406
+ "policy",
407
+ "implementation",
408
+ "access",
409
+ "redirect",
410
+ },
411
+ },
167
412
  }
168
-
413
+
169
414
  # File pattern signals for change types
170
415
  self.file_patterns = {
171
- 'test': [
172
- r'.*test.*\.py$', r'.*spec.*\.js$', r'.*test.*\.java$',
173
- r'test_.*\.py$', r'.*_test\.go$', r'.*\.test\.(js|ts)$',
174
- r'__tests__/.*', r'tests?/.*', r'spec/.*'
416
+ "test": [
417
+ r".*test.*\.py$",
418
+ r".*spec.*\.js$",
419
+ r".*test.*\.java$",
420
+ r"test_.*\.py$",
421
+ r".*_test\.go$",
422
+ r".*\.test\.(js|ts)$",
423
+ r"__tests__/.*",
424
+ r"tests?/.*",
425
+ r"spec/.*",
175
426
  ],
176
- 'docs': [
177
- r'.*\.md$', r'.*\.rst$', r'.*\.txt$', r'README.*',
178
- r'CHANGELOG.*', r'docs?/.*', r'documentation/.*'
427
+ "docs": [
428
+ r".*\.md$",
429
+ r".*\.rst$",
430
+ r".*\.txt$",
431
+ r"README.*",
432
+ r"CHANGELOG.*",
433
+ r"docs?/.*",
434
+ r"documentation/.*",
179
435
  ],
180
- 'config': [
181
- r'.*\.ya?ml$', r'.*\.json$', r'.*\.toml$', r'.*\.ini$',
182
- r'.*\.env.*', r'Dockerfile.*', r'.*config.*', r'\.github/.*'
436
+ "config": [
437
+ r".*\.ya?ml$",
438
+ r".*\.json$",
439
+ r".*\.toml$",
440
+ r".*\.ini$",
441
+ r".*\.env.*",
442
+ r"Dockerfile.*",
443
+ r".*config.*",
444
+ r"\.github/.*",
445
+ ],
446
+ "chore": [
447
+ r"package.*\.json$",
448
+ r"requirements.*\.txt$",
449
+ r"Pipfile.*",
450
+ r"pom\.xml$",
451
+ r"build\.gradle$",
452
+ r".*\.lock$",
183
453
  ],
184
- 'chore': [
185
- r'package.*\.json$', r'requirements.*\.txt$', r'Pipfile.*',
186
- r'pom\.xml$', r'build\.gradle$', r'.*\.lock$'
187
- ]
188
454
  }
189
-
455
+
190
456
  # Compile regex patterns for efficiency
191
457
  self._compile_file_patterns()
192
-
458
+
193
459
  # Common commit message prefixes
194
460
  self.prefix_patterns = {
195
- 'feat': 'feature',
196
- 'feature': 'feature',
197
- 'fix': 'bugfix',
198
- 'bugfix': 'bugfix',
199
- 'refactor': 'refactor',
200
- 'docs': 'docs',
201
- 'test': 'test',
202
- 'chore': 'chore',
203
- 'security': 'security',
204
- 'hotfix': 'hotfix',
205
- 'config': 'config',
206
- 'style': 'chore', # Style changes are usually chores
207
- 'perf': 'refactor', # Performance improvements are refactoring
208
- 'build': 'chore',
209
- 'ci': 'chore'
461
+ "feat": "feature",
462
+ "feature": "feature",
463
+ "fix": "bugfix",
464
+ "bugfix": "bugfix",
465
+ "refactor": "refactor",
466
+ "docs": "docs",
467
+ "test": "test",
468
+ "chore": "chore",
469
+ "security": "security",
470
+ "hotfix": "hotfix",
471
+ "config": "config",
472
+ "integration": "integration",
473
+ "integrate": "integration",
474
+ "style": "chore", # Style changes are usually chores
475
+ "perf": "refactor", # Performance improvements are refactoring
476
+ "build": "chore",
477
+ "ci": "chore",
210
478
  }
211
-
479
+
212
480
  def _compile_file_patterns(self) -> None:
213
481
  """Compile regex patterns for file matching."""
214
482
  self.compiled_file_patterns = {}
@@ -216,253 +484,259 @@ class ChangeTypeClassifier:
216
484
  self.compiled_file_patterns[change_type] = [
217
485
  re.compile(pattern, re.IGNORECASE) for pattern in patterns
218
486
  ]
219
-
220
- def classify(self, message: str, doc: Doc, files: List[str]) -> Tuple[str, float]:
487
+
488
+ def classify(self, message: str, doc: Doc, files: list[str]) -> tuple[str, float]:
221
489
  """Classify commit change type with confidence score.
222
-
490
+
223
491
  Args:
224
492
  message: Commit message
225
493
  doc: spaCy processed document
226
494
  files: List of changed files
227
-
495
+
228
496
  Returns:
229
497
  Tuple of (change_type, confidence_score)
230
498
  """
231
499
  if not message:
232
- return 'unknown', 0.0
233
-
500
+ return "unknown", 0.0
501
+
234
502
  # Step 1: Check for conventional commit prefixes
235
503
  prefix_result = self._check_conventional_prefix(message)
236
504
  if prefix_result:
237
505
  change_type, confidence = prefix_result
238
506
  if confidence >= self.config.min_confidence:
239
507
  return change_type, confidence
240
-
508
+
241
509
  # Step 2: Semantic analysis of message content
242
510
  semantic_scores = self._analyze_semantic_content(message, doc)
243
-
511
+
244
512
  # Step 3: File pattern analysis
245
513
  file_scores = self._analyze_file_patterns(files)
246
-
514
+
247
515
  # Step 4: Combine scores with weights
248
516
  combined_scores = self._combine_scores(semantic_scores, file_scores)
249
-
517
+
250
518
  # Step 5: Select best match
251
519
  if not combined_scores:
252
- return 'unknown', 0.0
253
-
520
+ return "unknown", 0.0
521
+
254
522
  best_type = max(combined_scores.keys(), key=lambda k: combined_scores[k])
255
523
  confidence = combined_scores[best_type]
256
-
524
+
257
525
  # Apply confidence threshold
258
526
  if confidence < self.config.min_confidence:
259
- return 'unknown', confidence
260
-
527
+ return "unknown", confidence
528
+
261
529
  return best_type, confidence
262
-
263
- def _check_conventional_prefix(self, message: str) -> Optional[Tuple[str, float]]:
530
+
531
+ def _check_conventional_prefix(self, message: str) -> Optional[tuple[str, float]]:
264
532
  """Check for conventional commit message prefixes.
265
-
533
+
266
534
  Args:
267
535
  message: Commit message
268
-
536
+
269
537
  Returns:
270
538
  Tuple of (change_type, confidence) if found, None otherwise
271
539
  """
272
540
  # Look for conventional commit format: type(scope): description
273
- conventional_pattern = r'^(\w+)(?:\([^)]*\))?\s*:\s*(.+)'
541
+ conventional_pattern = r"^(\w+)(?:\([^)]*\))?\s*:\s*(.+)"
274
542
  match = re.match(conventional_pattern, message.strip(), re.IGNORECASE)
275
-
543
+
276
544
  if match:
277
545
  prefix = match.group(1).lower()
278
546
  if prefix in self.prefix_patterns:
279
547
  return self.prefix_patterns[prefix], 0.9 # High confidence for explicit prefixes
280
-
548
+
281
549
  # Check for simple prefixes at start of message
282
550
  words = message.lower().split()
283
551
  if words:
284
- first_word = words[0].rstrip(':').rstrip('-')
552
+ first_word = words[0].rstrip(":").rstrip("-")
285
553
  if first_word in self.prefix_patterns:
286
554
  return self.prefix_patterns[first_word], 0.8
287
-
555
+
288
556
  return None
289
-
290
- def _analyze_semantic_content(self, message: str, doc: Doc) -> Dict[str, float]:
557
+
558
+ def _analyze_semantic_content(self, message: str, doc: Doc) -> dict[str, float]:
291
559
  """Analyze semantic content of commit message.
292
-
560
+
293
561
  Args:
294
562
  message: Commit message
295
563
  doc: spaCy processed document
296
-
564
+
297
565
  Returns:
298
566
  Dictionary of change_type -> confidence_score
299
567
  """
300
568
  if not SPACY_AVAILABLE or not doc:
301
569
  # Fallback to simple keyword matching
302
570
  return self._simple_keyword_analysis(message.lower())
303
-
571
+
304
572
  # Extract semantic features from spaCy doc
305
573
  features = self._extract_semantic_features(doc)
306
-
574
+
307
575
  # Calculate similarity to each change type
308
576
  scores = {}
309
577
  for change_type, patterns in self.change_patterns.items():
310
578
  similarity = self._calculate_semantic_similarity(features, patterns)
311
579
  if similarity > 0:
312
580
  scores[change_type] = similarity
313
-
581
+
314
582
  return scores
315
-
316
- def _extract_semantic_features(self, doc: Doc) -> Dict[str, Set[str]]:
583
+
584
+ def _extract_semantic_features(self, doc: Doc) -> dict[str, set[str]]:
317
585
  """Extract semantic features from spaCy document.
318
-
586
+
319
587
  Args:
320
588
  doc: spaCy processed document
321
-
589
+
322
590
  Returns:
323
591
  Dictionary of feature_type -> set_of_words
324
592
  """
325
593
  features = {
326
- 'verbs': set(),
327
- 'nouns': set(),
328
- 'adjectives': set(),
329
- 'entities': set(),
330
- 'lemmas': set()
594
+ "verbs": set(),
595
+ "nouns": set(),
596
+ "adjectives": set(),
597
+ "entities": set(),
598
+ "lemmas": set(),
331
599
  }
332
-
600
+
333
601
  for token in doc:
334
602
  if token.is_stop or token.is_punct or len(token.text) < 2:
335
603
  continue
336
-
604
+
337
605
  lemma = token.lemma_.lower()
338
- features['lemmas'].add(lemma)
339
-
340
- if token.pos_ == 'VERB':
341
- features['verbs'].add(lemma)
342
- elif token.pos_ in ['NOUN', 'PROPN']:
343
- features['nouns'].add(lemma)
344
- elif token.pos_ == 'ADJ':
345
- features['adjectives'].add(lemma)
346
-
606
+ features["lemmas"].add(lemma)
607
+
608
+ if token.pos_ == "VERB":
609
+ features["verbs"].add(lemma)
610
+ elif token.pos_ in ["NOUN", "PROPN"]:
611
+ features["nouns"].add(lemma)
612
+ elif token.pos_ == "ADJ":
613
+ features["adjectives"].add(lemma)
614
+
347
615
  # Add named entities
348
616
  for ent in doc.ents:
349
- features['entities'].add(ent.text.lower())
350
-
617
+ features["entities"].add(ent.text.lower())
618
+
351
619
  return features
352
-
353
- def _calculate_semantic_similarity(self, features: Dict[str, Set[str]],
354
- patterns: Dict[str, Set[str]]) -> float:
620
+
621
+ def _calculate_semantic_similarity(
622
+ self, features: dict[str, set[str]], patterns: dict[str, set[str]]
623
+ ) -> float:
355
624
  """Calculate semantic similarity between features and patterns.
356
-
625
+
357
626
  Args:
358
627
  features: Extracted semantic features
359
628
  patterns: Change type patterns
360
-
629
+
361
630
  Returns:
362
631
  Similarity score (0.0 to 1.0)
363
632
  """
364
633
  similarity_score = 0.0
365
-
634
+
366
635
  # Action words (verbs) - highest weight
367
- action_matches = len(features['verbs'].intersection(patterns['action_words']))
636
+ action_matches = len(features["verbs"].intersection(patterns["action_words"]))
368
637
  if action_matches > 0:
369
638
  similarity_score += action_matches * 0.5
370
-
371
- # Object words (nouns) - medium weight
372
- object_matches = len(features['nouns'].intersection(patterns['object_words']))
639
+
640
+ # Object words (nouns) - medium weight
641
+ object_matches = len(features["nouns"].intersection(patterns["object_words"]))
373
642
  if object_matches > 0:
374
643
  similarity_score += object_matches * 0.3
375
-
644
+
376
645
  # Context words (any lemma) - lower weight
377
- all_lemmas = features['lemmas']
378
- context_matches = len(all_lemmas.intersection(patterns['context_words']))
646
+ all_lemmas = features["lemmas"]
647
+ context_matches = len(all_lemmas.intersection(patterns["context_words"]))
379
648
  if context_matches > 0:
380
649
  similarity_score += context_matches * 0.2
381
-
650
+
382
651
  # Normalize by maximum possible score
383
- max_possible = len(patterns['action_words']) * 0.5 + \
384
- len(patterns['object_words']) * 0.3 + \
385
- len(patterns['context_words']) * 0.2
386
-
652
+ max_possible = (
653
+ len(patterns["action_words"]) * 0.5
654
+ + len(patterns["object_words"]) * 0.3
655
+ + len(patterns["context_words"]) * 0.2
656
+ )
657
+
387
658
  return min(1.0, similarity_score / max_possible) if max_possible > 0 else 0.0
388
-
389
- def _simple_keyword_analysis(self, message: str) -> Dict[str, float]:
659
+
660
+ def _simple_keyword_analysis(self, message: str) -> dict[str, float]:
390
661
  """Simple keyword-based analysis fallback.
391
-
662
+
392
663
  Args:
393
664
  message: Lowercase commit message
394
-
665
+
395
666
  Returns:
396
667
  Dictionary of change_type -> confidence_score
397
668
  """
398
669
  scores = {}
399
- words = set(re.findall(r'\b\w+\b', message))
400
-
670
+ words = set(re.findall(r"\b\w+\b", message))
671
+
401
672
  for change_type, patterns in self.change_patterns.items():
402
- all_pattern_words = patterns['action_words'] | patterns['object_words'] | patterns['context_words']
673
+ all_pattern_words = (
674
+ patterns["action_words"] | patterns["object_words"] | patterns["context_words"]
675
+ )
403
676
  matches = len(words.intersection(all_pattern_words))
404
-
677
+
405
678
  if matches > 0:
406
679
  # Simple scoring based on keyword matches
407
680
  scores[change_type] = min(1.0, matches / 5.0) # Scale to 0-1
408
-
681
+
409
682
  return scores
410
-
411
- def _analyze_file_patterns(self, files: List[str]) -> Dict[str, float]:
683
+
684
+ def _analyze_file_patterns(self, files: list[str]) -> dict[str, float]:
412
685
  """Analyze file patterns for change type signals.
413
-
686
+
414
687
  Args:
415
688
  files: List of changed file paths
416
-
689
+
417
690
  Returns:
418
691
  Dictionary of change_type -> confidence_score
419
692
  """
420
693
  if not files:
421
694
  return {}
422
-
695
+
423
696
  scores = {}
424
-
697
+
425
698
  for change_type, patterns in self.compiled_file_patterns.items():
426
699
  matching_files = 0
427
-
700
+
428
701
  for file_path in files:
429
702
  for pattern in patterns:
430
703
  if pattern.search(file_path):
431
704
  matching_files += 1
432
705
  break # Don't double-count same file
433
-
706
+
434
707
  if matching_files > 0:
435
708
  # File pattern confidence based on proportion of matching files
436
709
  confidence = min(1.0, matching_files / len(files))
437
710
  scores[change_type] = confidence
438
-
711
+
439
712
  return scores
440
-
441
- def _combine_scores(self, semantic_scores: Dict[str, float],
442
- file_scores: Dict[str, float]) -> Dict[str, float]:
713
+
714
+ def _combine_scores(
715
+ self, semantic_scores: dict[str, float], file_scores: dict[str, float]
716
+ ) -> dict[str, float]:
443
717
  """Combine semantic and file pattern scores.
444
-
718
+
445
719
  Args:
446
720
  semantic_scores: Scores from semantic analysis
447
721
  file_scores: Scores from file pattern analysis
448
-
722
+
449
723
  Returns:
450
724
  Combined scores dictionary
451
725
  """
452
726
  combined = {}
453
727
  all_types = set(semantic_scores.keys()) | set(file_scores.keys())
454
-
728
+
455
729
  for change_type in all_types:
456
730
  semantic_score = semantic_scores.get(change_type, 0.0)
457
731
  file_score = file_scores.get(change_type, 0.0)
458
-
732
+
459
733
  # Weighted combination
460
734
  combined_score = (
461
- semantic_score * self.config.semantic_weight +
462
- file_score * self.config.file_pattern_weight
735
+ semantic_score * self.config.semantic_weight
736
+ + file_score * self.config.file_pattern_weight
463
737
  )
464
-
738
+
465
739
  if combined_score > 0:
466
740
  combined[change_type] = combined_score
467
-
468
- return combined
741
+
742
+ return combined