gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -2,19 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
-
from
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
|
-
from ..models.schemas import NLPConfig, QualitativeCommitData
|
|
8
8
|
from ..classifiers.change_type import ChangeTypeClassifier
|
|
9
9
|
from ..classifiers.domain_classifier import DomainClassifier
|
|
10
10
|
from ..classifiers.intent_analyzer import IntentAnalyzer
|
|
11
11
|
from ..classifiers.risk_analyzer import RiskAnalyzer
|
|
12
|
-
from ..
|
|
12
|
+
from ..models.schemas import NLPConfig, QualitativeCommitData
|
|
13
13
|
from ..utils.metrics import PerformanceMetrics
|
|
14
|
+
from ..utils.text_processing import TextProcessor
|
|
14
15
|
|
|
15
16
|
try:
|
|
16
17
|
import spacy
|
|
17
18
|
from spacy.tokens import Doc
|
|
19
|
+
|
|
18
20
|
SPACY_AVAILABLE = True
|
|
19
21
|
except ImportError:
|
|
20
22
|
SPACY_AVAILABLE = False
|
|
@@ -23,105 +25,118 @@ except ImportError:
|
|
|
23
25
|
|
|
24
26
|
class NLPEngine:
|
|
25
27
|
"""Core NLP processing engine using spaCy for fast commit analysis.
|
|
26
|
-
|
|
28
|
+
|
|
27
29
|
This engine provides the primary classification pipeline for commit analysis,
|
|
28
30
|
handling 85-90% of commits through fast NLP processing without requiring
|
|
29
31
|
expensive LLM calls.
|
|
30
|
-
|
|
32
|
+
|
|
31
33
|
The engine orchestrates multiple specialized classifiers:
|
|
32
34
|
- ChangeTypeClassifier: Determines commit type (feature, bugfix, etc.)
|
|
33
35
|
- DomainClassifier: Identifies business domain (frontend, backend, etc.)
|
|
34
36
|
- IntentAnalyzer: Extracts intent signals and urgency
|
|
35
37
|
- RiskAnalyzer: Assesses commit risk level
|
|
36
38
|
"""
|
|
37
|
-
|
|
39
|
+
|
|
38
40
|
def __init__(self, config: NLPConfig):
|
|
39
41
|
"""Initialize NLP engine with spaCy pipeline.
|
|
40
|
-
|
|
42
|
+
|
|
41
43
|
Args:
|
|
42
44
|
config: NLP configuration
|
|
43
|
-
|
|
45
|
+
|
|
44
46
|
Raises:
|
|
45
47
|
ImportError: If spaCy is not available
|
|
46
48
|
OSError: If spaCy model is not installed
|
|
47
49
|
"""
|
|
48
50
|
if not SPACY_AVAILABLE:
|
|
51
|
+
# Create a temporary logger since self.logger doesn't exist yet
|
|
52
|
+
temp_logger = logging.getLogger(__name__)
|
|
53
|
+
temp_logger.warning(
|
|
54
|
+
"spaCy is not available. NLP processing will be disabled. "
|
|
55
|
+
"To enable ML features, install spaCy: pip install spacy"
|
|
56
|
+
)
|
|
49
57
|
raise ImportError(
|
|
50
58
|
"spaCy is required for NLP processing. Install with: pip install spacy"
|
|
51
59
|
)
|
|
52
|
-
|
|
60
|
+
|
|
53
61
|
self.config = config
|
|
54
62
|
self.logger = logging.getLogger(__name__)
|
|
55
|
-
|
|
63
|
+
|
|
56
64
|
# Initialize spaCy pipeline
|
|
57
65
|
self._init_spacy_pipeline()
|
|
58
|
-
|
|
66
|
+
|
|
59
67
|
# Initialize text processor
|
|
60
68
|
self.text_processor = TextProcessor()
|
|
61
|
-
|
|
69
|
+
|
|
62
70
|
# Initialize classifiers
|
|
63
71
|
self.change_classifier = ChangeTypeClassifier(config.change_type_config)
|
|
64
72
|
self.domain_classifier = DomainClassifier(config.domain_config)
|
|
65
73
|
self.intent_analyzer = IntentAnalyzer(config.intent_config)
|
|
66
74
|
self.risk_analyzer = RiskAnalyzer(config.risk_config)
|
|
67
|
-
|
|
75
|
+
|
|
68
76
|
# Performance tracking
|
|
69
77
|
self.metrics = PerformanceMetrics()
|
|
70
78
|
self.processing_times = []
|
|
71
|
-
|
|
79
|
+
|
|
72
80
|
self.logger.info(f"NLP engine initialized with model: {config.spacy_model}")
|
|
73
|
-
|
|
81
|
+
|
|
74
82
|
def _init_spacy_pipeline(self) -> None:
|
|
75
83
|
"""Initialize spaCy NLP pipeline with optimizations."""
|
|
76
84
|
try:
|
|
77
85
|
self.nlp = spacy.load(self.config.spacy_model)
|
|
78
|
-
|
|
86
|
+
|
|
79
87
|
# Optimize pipeline for speed if in fast mode
|
|
80
88
|
if self.config.fast_mode:
|
|
81
89
|
# Disable expensive components we don't need
|
|
82
90
|
disabled_components = []
|
|
83
|
-
if
|
|
84
|
-
disabled_components.append(
|
|
85
|
-
if
|
|
86
|
-
disabled_components.append(
|
|
87
|
-
|
|
91
|
+
if "parser" in self.nlp.pipe_names:
|
|
92
|
+
disabled_components.append("parser")
|
|
93
|
+
if "ner" in self.nlp.pipe_names:
|
|
94
|
+
disabled_components.append("ner")
|
|
95
|
+
|
|
88
96
|
if disabled_components:
|
|
89
97
|
self.nlp.disable_pipes(*disabled_components)
|
|
90
98
|
self.logger.info(f"Disabled spaCy components for speed: {disabled_components}")
|
|
91
|
-
|
|
99
|
+
|
|
92
100
|
except OSError as e:
|
|
101
|
+
self.logger.warning(
|
|
102
|
+
f"spaCy model '{self.config.spacy_model}' not found. "
|
|
103
|
+
f"ML features will be disabled. To enable, install with: python -m spacy download {self.config.spacy_model}"
|
|
104
|
+
)
|
|
105
|
+
# Raise the original error since the NLP engine requires spaCy
|
|
93
106
|
raise OSError(
|
|
94
107
|
f"spaCy model '{self.config.spacy_model}' not found. "
|
|
95
108
|
f"Install with: python -m spacy download {self.config.spacy_model}"
|
|
96
109
|
) from e
|
|
97
|
-
|
|
98
|
-
def process_batch(self, commits:
|
|
110
|
+
|
|
111
|
+
def process_batch(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
|
|
99
112
|
"""Process a batch of commits efficiently using spaCy pipeline.
|
|
100
|
-
|
|
113
|
+
|
|
101
114
|
This method leverages spaCy's batch processing capabilities to analyze
|
|
102
115
|
multiple commit messages simultaneously for maximum efficiency.
|
|
103
|
-
|
|
116
|
+
|
|
104
117
|
Args:
|
|
105
118
|
commits: List of commit dictionaries with message, files_changed, etc.
|
|
106
|
-
|
|
119
|
+
|
|
107
120
|
Returns:
|
|
108
121
|
List of QualitativeCommitData with analysis results
|
|
109
122
|
"""
|
|
110
123
|
if not commits:
|
|
111
124
|
return []
|
|
112
|
-
|
|
125
|
+
|
|
113
126
|
start_time = time.time()
|
|
114
|
-
|
|
127
|
+
|
|
115
128
|
# Extract messages for batch processing
|
|
116
|
-
messages = [commit.get(
|
|
117
|
-
|
|
129
|
+
messages = [commit.get("message", "") for commit in commits]
|
|
130
|
+
|
|
118
131
|
# Process all messages through spaCy pipeline at once
|
|
119
132
|
try:
|
|
120
|
-
docs = list(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
133
|
+
docs = list(
|
|
134
|
+
self.nlp.pipe(
|
|
135
|
+
messages,
|
|
136
|
+
batch_size=self.config.spacy_batch_size,
|
|
137
|
+
disable=[] if not self.config.fast_mode else ["parser", "ner"],
|
|
138
|
+
)
|
|
139
|
+
)
|
|
125
140
|
except Exception as e:
|
|
126
141
|
self.logger.error(f"spaCy processing failed: {e}")
|
|
127
142
|
# Fallback to individual processing
|
|
@@ -132,7 +147,7 @@ class NLPEngine:
|
|
|
132
147
|
except Exception:
|
|
133
148
|
# Create empty doc as fallback
|
|
134
149
|
docs.append(self.nlp(""))
|
|
135
|
-
|
|
150
|
+
|
|
136
151
|
# Analyze each commit with its processed document
|
|
137
152
|
results = []
|
|
138
153
|
for commit, doc in zip(commits, docs):
|
|
@@ -143,211 +158,201 @@ class NLPEngine:
|
|
|
143
158
|
self.logger.error(f"Error analyzing commit {commit.get('hash', 'unknown')}: {e}")
|
|
144
159
|
# Create fallback result
|
|
145
160
|
results.append(self._create_fallback_result(commit))
|
|
146
|
-
|
|
161
|
+
|
|
147
162
|
# Track performance
|
|
148
163
|
processing_time = (time.time() - start_time) * 1000 # ms
|
|
149
164
|
self.processing_times.append(processing_time)
|
|
150
|
-
|
|
165
|
+
|
|
151
166
|
# Record metrics
|
|
152
167
|
avg_confidence = sum(r.confidence_score for r in results) / len(results) if results else 0.0
|
|
153
168
|
self.metrics.record_processing(
|
|
154
|
-
operation=
|
|
169
|
+
operation="nlp_batch",
|
|
155
170
|
processing_time_ms=processing_time,
|
|
156
171
|
items_processed=len(commits),
|
|
157
172
|
confidence_score=avg_confidence,
|
|
158
|
-
method_used=
|
|
173
|
+
method_used="nlp",
|
|
159
174
|
)
|
|
160
|
-
|
|
175
|
+
|
|
161
176
|
self.logger.debug(
|
|
162
177
|
f"Processed {len(commits)} commits in {processing_time:.1f}ms "
|
|
163
178
|
f"({len(commits) * 1000 / processing_time:.1f} commits/sec)"
|
|
164
179
|
)
|
|
165
|
-
|
|
180
|
+
|
|
166
181
|
return results
|
|
167
|
-
|
|
168
|
-
def _analyze_commit(self, commit:
|
|
182
|
+
|
|
183
|
+
def _analyze_commit(self, commit: dict[str, Any], doc: Doc) -> QualitativeCommitData:
|
|
169
184
|
"""Analyze a single commit with all classifiers.
|
|
170
|
-
|
|
185
|
+
|
|
171
186
|
Args:
|
|
172
187
|
commit: Commit dictionary with message, files, etc.
|
|
173
188
|
doc: spaCy processed document
|
|
174
|
-
|
|
189
|
+
|
|
175
190
|
Returns:
|
|
176
191
|
QualitativeCommitData with analysis results
|
|
177
192
|
"""
|
|
178
193
|
analysis_start = time.time()
|
|
179
|
-
|
|
194
|
+
|
|
180
195
|
# Extract basic commit info
|
|
181
|
-
message = commit.get(
|
|
182
|
-
files_changed = commit.get(
|
|
183
|
-
|
|
196
|
+
message = commit.get("message", "")
|
|
197
|
+
files_changed = commit.get("files_changed", [])
|
|
198
|
+
|
|
184
199
|
# Run all classifiers
|
|
185
200
|
change_type, change_confidence = self.change_classifier.classify(
|
|
186
201
|
message, doc, files_changed
|
|
187
202
|
)
|
|
188
|
-
|
|
189
|
-
domain, domain_confidence = self.domain_classifier.classify(
|
|
190
|
-
|
|
191
|
-
)
|
|
192
|
-
|
|
203
|
+
|
|
204
|
+
domain, domain_confidence = self.domain_classifier.classify(message, doc, files_changed)
|
|
205
|
+
|
|
193
206
|
intent_signals = self.intent_analyzer.analyze(message, doc)
|
|
194
|
-
|
|
207
|
+
|
|
195
208
|
risk_assessment = self.risk_analyzer.assess(commit, doc)
|
|
196
|
-
|
|
209
|
+
|
|
197
210
|
# Calculate overall confidence score
|
|
198
211
|
overall_confidence = self._calculate_overall_confidence(
|
|
199
|
-
change_confidence,
|
|
200
|
-
domain_confidence,
|
|
201
|
-
intent_signals.get('confidence', 0.5)
|
|
212
|
+
change_confidence, domain_confidence, intent_signals.get("confidence", 0.5)
|
|
202
213
|
)
|
|
203
|
-
|
|
214
|
+
|
|
204
215
|
# Extract technical context
|
|
205
216
|
technical_context = {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
message, files_changed,
|
|
209
|
-
commit.get('insertions', 0),
|
|
210
|
-
commit.get('deletions', 0)
|
|
217
|
+
"file_patterns": self.text_processor.extract_file_patterns(files_changed),
|
|
218
|
+
"complexity_metrics": self.text_processor.calculate_commit_complexity(
|
|
219
|
+
message, files_changed, commit.get("insertions", 0), commit.get("deletions", 0)
|
|
211
220
|
),
|
|
212
|
-
|
|
221
|
+
"semantic_fingerprint": self.text_processor.create_semantic_fingerprint(
|
|
213
222
|
message, files_changed
|
|
214
|
-
)
|
|
223
|
+
),
|
|
215
224
|
}
|
|
216
|
-
|
|
225
|
+
|
|
217
226
|
processing_time = (time.time() - analysis_start) * 1000 # ms
|
|
218
|
-
|
|
227
|
+
|
|
219
228
|
return QualitativeCommitData(
|
|
220
229
|
# Copy existing commit fields
|
|
221
|
-
hash=commit.get(
|
|
230
|
+
hash=commit.get("hash", ""),
|
|
222
231
|
message=message,
|
|
223
|
-
author_name=commit.get(
|
|
224
|
-
author_email=commit.get(
|
|
225
|
-
timestamp=commit.get(
|
|
232
|
+
author_name=commit.get("author_name", ""),
|
|
233
|
+
author_email=commit.get("author_email", ""),
|
|
234
|
+
timestamp=commit.get("timestamp", datetime.now()),
|
|
226
235
|
files_changed=files_changed,
|
|
227
|
-
insertions=commit.get(
|
|
228
|
-
deletions=commit.get(
|
|
229
|
-
|
|
236
|
+
insertions=commit.get("insertions", 0),
|
|
237
|
+
deletions=commit.get("deletions", 0),
|
|
230
238
|
# Qualitative analysis results
|
|
231
239
|
change_type=change_type,
|
|
232
240
|
change_type_confidence=change_confidence,
|
|
233
241
|
business_domain=domain,
|
|
234
242
|
domain_confidence=domain_confidence,
|
|
235
|
-
risk_level=risk_assessment[
|
|
236
|
-
risk_factors=risk_assessment[
|
|
243
|
+
risk_level=risk_assessment["level"],
|
|
244
|
+
risk_factors=risk_assessment["factors"],
|
|
237
245
|
intent_signals=intent_signals,
|
|
238
246
|
collaboration_patterns={}, # TODO: Implement collaboration analysis
|
|
239
247
|
technical_context=technical_context,
|
|
240
|
-
|
|
241
248
|
# Processing metadata
|
|
242
|
-
processing_method=
|
|
249
|
+
processing_method="nlp",
|
|
243
250
|
processing_time_ms=processing_time,
|
|
244
|
-
confidence_score=overall_confidence
|
|
251
|
+
confidence_score=overall_confidence,
|
|
245
252
|
)
|
|
246
|
-
|
|
247
|
-
def _calculate_overall_confidence(
|
|
248
|
-
|
|
249
|
-
|
|
253
|
+
|
|
254
|
+
def _calculate_overall_confidence(
|
|
255
|
+
self, change_confidence: float, domain_confidence: float, intent_confidence: float
|
|
256
|
+
) -> float:
|
|
250
257
|
"""Calculate weighted overall confidence score.
|
|
251
|
-
|
|
258
|
+
|
|
252
259
|
Args:
|
|
253
260
|
change_confidence: Change type classification confidence
|
|
254
261
|
domain_confidence: Domain classification confidence
|
|
255
262
|
intent_confidence: Intent analysis confidence
|
|
256
|
-
|
|
263
|
+
|
|
257
264
|
Returns:
|
|
258
265
|
Overall confidence score (0.0 to 1.0)
|
|
259
266
|
"""
|
|
260
267
|
# Weighted average with change_type being most important
|
|
261
268
|
weights = {
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
269
|
+
"change": 0.5, # Change type is most critical
|
|
270
|
+
"domain": 0.3, # Domain is important for reporting
|
|
271
|
+
"intent": 0.2, # Intent is supplementary
|
|
265
272
|
}
|
|
266
|
-
|
|
273
|
+
|
|
267
274
|
overall = (
|
|
268
|
-
change_confidence * weights[
|
|
269
|
-
domain_confidence * weights[
|
|
270
|
-
intent_confidence * weights[
|
|
275
|
+
change_confidence * weights["change"]
|
|
276
|
+
+ domain_confidence * weights["domain"]
|
|
277
|
+
+ intent_confidence * weights["intent"]
|
|
271
278
|
)
|
|
272
|
-
|
|
279
|
+
|
|
273
280
|
return min(1.0, max(0.0, overall))
|
|
274
|
-
|
|
275
|
-
def _create_fallback_result(self, commit:
|
|
281
|
+
|
|
282
|
+
def _create_fallback_result(self, commit: dict[str, Any]) -> QualitativeCommitData:
|
|
276
283
|
"""Create a fallback result when analysis fails.
|
|
277
|
-
|
|
284
|
+
|
|
278
285
|
Args:
|
|
279
286
|
commit: Commit dictionary
|
|
280
|
-
|
|
287
|
+
|
|
281
288
|
Returns:
|
|
282
289
|
QualitativeCommitData with default values
|
|
283
290
|
"""
|
|
284
291
|
return QualitativeCommitData(
|
|
285
292
|
# Basic commit info
|
|
286
|
-
hash=commit.get(
|
|
287
|
-
message=commit.get(
|
|
288
|
-
author_name=commit.get(
|
|
289
|
-
author_email=commit.get(
|
|
290
|
-
timestamp=commit.get(
|
|
291
|
-
files_changed=commit.get(
|
|
292
|
-
insertions=commit.get(
|
|
293
|
-
deletions=commit.get(
|
|
294
|
-
|
|
293
|
+
hash=commit.get("hash", ""),
|
|
294
|
+
message=commit.get("message", ""),
|
|
295
|
+
author_name=commit.get("author_name", ""),
|
|
296
|
+
author_email=commit.get("author_email", ""),
|
|
297
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
298
|
+
files_changed=commit.get("files_changed", []),
|
|
299
|
+
insertions=commit.get("insertions", 0),
|
|
300
|
+
deletions=commit.get("deletions", 0),
|
|
295
301
|
# Default classifications
|
|
296
|
-
change_type=
|
|
302
|
+
change_type="unknown",
|
|
297
303
|
change_type_confidence=0.0,
|
|
298
|
-
business_domain=
|
|
304
|
+
business_domain="unknown",
|
|
299
305
|
domain_confidence=0.0,
|
|
300
|
-
risk_level=
|
|
301
|
-
risk_factors=[
|
|
302
|
-
intent_signals={
|
|
306
|
+
risk_level="medium",
|
|
307
|
+
risk_factors=["analysis_failed"],
|
|
308
|
+
intent_signals={"confidence": 0.0, "signals": []},
|
|
303
309
|
collaboration_patterns={},
|
|
304
310
|
technical_context={},
|
|
305
|
-
|
|
306
311
|
# Processing metadata
|
|
307
|
-
processing_method=
|
|
312
|
+
processing_method="nlp",
|
|
308
313
|
processing_time_ms=0.0,
|
|
309
|
-
confidence_score=0.0
|
|
314
|
+
confidence_score=0.0,
|
|
310
315
|
)
|
|
311
|
-
|
|
312
|
-
def get_performance_stats(self) ->
|
|
316
|
+
|
|
317
|
+
def get_performance_stats(self) -> dict[str, Any]:
|
|
313
318
|
"""Get NLP engine performance statistics.
|
|
314
|
-
|
|
319
|
+
|
|
315
320
|
Returns:
|
|
316
321
|
Dictionary with performance metrics
|
|
317
322
|
"""
|
|
318
323
|
if not self.processing_times:
|
|
319
324
|
return {
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
+
"total_batches": 0,
|
|
326
|
+
"avg_processing_time_ms": 0.0,
|
|
327
|
+
"min_processing_time_ms": 0.0,
|
|
328
|
+
"max_processing_time_ms": 0.0,
|
|
329
|
+
"total_processing_time_ms": 0.0,
|
|
325
330
|
}
|
|
326
|
-
|
|
331
|
+
|
|
327
332
|
return {
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
333
|
+
"total_batches": len(self.processing_times),
|
|
334
|
+
"avg_processing_time_ms": sum(self.processing_times) / len(self.processing_times),
|
|
335
|
+
"min_processing_time_ms": min(self.processing_times),
|
|
336
|
+
"max_processing_time_ms": max(self.processing_times),
|
|
337
|
+
"total_processing_time_ms": sum(self.processing_times),
|
|
338
|
+
"spacy_model": self.config.spacy_model,
|
|
339
|
+
"fast_mode": self.config.fast_mode,
|
|
340
|
+
"batch_size": self.config.spacy_batch_size,
|
|
336
341
|
}
|
|
337
|
-
|
|
338
|
-
def validate_setup(self) ->
|
|
342
|
+
|
|
343
|
+
def validate_setup(self) -> tuple[bool, list[str]]:
|
|
339
344
|
"""Validate NLP engine setup and dependencies.
|
|
340
|
-
|
|
345
|
+
|
|
341
346
|
Returns:
|
|
342
347
|
Tuple of (is_valid, list_of_issues)
|
|
343
348
|
"""
|
|
344
349
|
issues = []
|
|
345
|
-
|
|
350
|
+
|
|
346
351
|
# Check spaCy availability
|
|
347
352
|
if not SPACY_AVAILABLE:
|
|
348
353
|
issues.append("spaCy not installed")
|
|
349
354
|
return False, issues
|
|
350
|
-
|
|
355
|
+
|
|
351
356
|
# Check model availability
|
|
352
357
|
try:
|
|
353
358
|
test_nlp = spacy.load(self.config.spacy_model)
|
|
@@ -359,15 +364,19 @@ class NLPEngine:
|
|
|
359
364
|
issues.append(f"spaCy model '{self.config.spacy_model}' not installed")
|
|
360
365
|
except Exception as e:
|
|
361
366
|
issues.append(f"spaCy model error: {e}")
|
|
362
|
-
|
|
367
|
+
|
|
363
368
|
# Check classifier initialization
|
|
364
369
|
for classifier_name, classifier in [
|
|
365
|
-
(
|
|
366
|
-
(
|
|
367
|
-
(
|
|
368
|
-
(
|
|
370
|
+
("change_type", self.change_classifier),
|
|
371
|
+
("domain", self.domain_classifier),
|
|
372
|
+
("intent", self.intent_analyzer),
|
|
373
|
+
("risk", self.risk_analyzer),
|
|
369
374
|
]:
|
|
370
|
-
if
|
|
375
|
+
if (
|
|
376
|
+
not hasattr(classifier, "classify")
|
|
377
|
+
and not hasattr(classifier, "analyze")
|
|
378
|
+
and not hasattr(classifier, "assess")
|
|
379
|
+
):
|
|
371
380
|
issues.append(f"{classifier_name} classifier not properly initialized")
|
|
372
|
-
|
|
373
|
-
return len(issues) == 0, issues
|
|
381
|
+
|
|
382
|
+
return len(issues) == 0, issues
|