gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -2,260 +2,389 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
-
from typing import Dict, List, Any, Tuple, Optional
|
|
6
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
8
7
|
|
|
8
|
+
from ...core.schema_version import create_schema_manager
|
|
9
9
|
from ...models.database import Database
|
|
10
|
-
from ..models.schemas import
|
|
11
|
-
from .nlp_engine import NLPEngine
|
|
12
|
-
from .llm_fallback import LLMFallback
|
|
13
|
-
from .pattern_cache import PatternCache
|
|
10
|
+
from ..models.schemas import QualitativeCommitData, QualitativeConfig
|
|
14
11
|
from ..utils.batch_processor import BatchProcessor, ProgressTracker
|
|
15
12
|
from ..utils.metrics import PerformanceMetrics
|
|
13
|
+
from .llm_fallback import LLMFallback
|
|
14
|
+
from .nlp_engine import NLPEngine
|
|
15
|
+
from .pattern_cache import PatternCache
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class QualitativeProcessor:
|
|
19
19
|
"""Main orchestrator for qualitative analysis of Git commits.
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
This processor coordinates the entire qualitative analysis pipeline:
|
|
22
22
|
1. Pattern cache lookup for known commit patterns
|
|
23
23
|
2. Fast NLP processing for most commits
|
|
24
24
|
3. Strategic LLM fallback for uncertain cases
|
|
25
25
|
4. Pattern learning and cache updates
|
|
26
26
|
5. Performance monitoring and optimization
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
The system is designed to process 10,000+ commits in under 60 seconds
|
|
29
29
|
while maintaining high accuracy and keeping LLM costs low.
|
|
30
30
|
"""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self, config: QualitativeConfig, database: Database, cache_dir: Optional[Path] = None
|
|
34
|
+
):
|
|
33
35
|
"""Initialize qualitative processor.
|
|
34
|
-
|
|
36
|
+
|
|
35
37
|
Args:
|
|
36
38
|
config: Configuration for qualitative analysis
|
|
37
39
|
database: Database instance for caching and storage
|
|
40
|
+
cache_dir: Cache directory for schema versioning
|
|
38
41
|
"""
|
|
39
42
|
self.config = config
|
|
40
43
|
self.database = database
|
|
41
44
|
self.logger = logging.getLogger(__name__)
|
|
42
|
-
|
|
45
|
+
|
|
46
|
+
# Initialize schema version manager
|
|
47
|
+
if cache_dir is None:
|
|
48
|
+
cache_dir = Path(config.cache_config.cache_dir)
|
|
49
|
+
self.schema_manager = create_schema_manager(cache_dir)
|
|
50
|
+
|
|
43
51
|
# Initialize core components
|
|
44
52
|
self.nlp_engine = NLPEngine(config.nlp_config)
|
|
45
53
|
self.pattern_cache = PatternCache(config.cache_config, database)
|
|
46
|
-
|
|
54
|
+
|
|
47
55
|
# Initialize LLM fallback if enabled
|
|
48
56
|
self.llm_fallback = None
|
|
49
57
|
if config.llm_config.openrouter_api_key:
|
|
50
58
|
try:
|
|
51
|
-
self.llm_fallback = LLMFallback(config.llm_config)
|
|
59
|
+
self.llm_fallback = LLMFallback(config.llm_config, cache_dir)
|
|
52
60
|
self.logger.info("LLM fallback system initialized")
|
|
53
61
|
except Exception as e:
|
|
54
62
|
self.logger.warning(f"LLM fallback initialization failed: {e}")
|
|
55
63
|
else:
|
|
56
64
|
self.logger.info("LLM fallback disabled (no API key configured)")
|
|
57
|
-
|
|
65
|
+
|
|
58
66
|
# Initialize utilities
|
|
59
67
|
self.batch_processor = BatchProcessor(
|
|
60
|
-
batch_size=config.batch_size,
|
|
61
|
-
max_workers=config.nlp_config.max_workers
|
|
68
|
+
batch_size=config.batch_size, max_workers=config.nlp_config.max_workers
|
|
62
69
|
)
|
|
63
70
|
self.metrics = PerformanceMetrics()
|
|
64
|
-
|
|
71
|
+
|
|
65
72
|
# Processing statistics
|
|
66
73
|
self.processing_stats = {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
74
|
+
"total_processed": 0,
|
|
75
|
+
"cache_hits": 0,
|
|
76
|
+
"nlp_processed": 0,
|
|
77
|
+
"llm_processed": 0,
|
|
78
|
+
"processing_start_time": None,
|
|
79
|
+
"last_optimization": None,
|
|
73
80
|
}
|
|
74
|
-
|
|
81
|
+
|
|
75
82
|
self.logger.info("Qualitative processor initialized")
|
|
76
|
-
|
|
77
|
-
def
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
|
|
84
|
+
def _filter_commits_for_processing(
|
|
85
|
+
self, commits: list[dict[str, Any]], force_reprocess: bool = False
|
|
86
|
+
) -> list[dict[str, Any]]:
|
|
87
|
+
"""Filter commits to only those that need processing based on schema versioning."""
|
|
88
|
+
if force_reprocess:
|
|
89
|
+
return commits
|
|
90
|
+
|
|
91
|
+
# Convert config to dict for schema comparison
|
|
92
|
+
config_dict = {
|
|
93
|
+
"nlp_config": self.config.nlp_config.__dict__,
|
|
94
|
+
"llm_config": self.config.llm_config.__dict__,
|
|
95
|
+
"cache_config": self.config.cache_config.__dict__,
|
|
96
|
+
"confidence_threshold": self.config.confidence_threshold,
|
|
97
|
+
"max_llm_fallback_pct": self.config.max_llm_fallback_pct,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Check if schema has changed
|
|
101
|
+
schema_changed = self.schema_manager.has_schema_changed("qualitative", config_dict)
|
|
102
|
+
|
|
103
|
+
if schema_changed:
|
|
104
|
+
self.logger.info("Qualitative analysis schema has changed, reprocessing all commits")
|
|
105
|
+
return commits
|
|
106
|
+
|
|
107
|
+
# Filter by date - only process commits after last processed date
|
|
108
|
+
last_processed = self.schema_manager.get_last_processed_date("qualitative")
|
|
109
|
+
if not last_processed:
|
|
110
|
+
self.logger.info("No previous processing date found, processing all commits")
|
|
111
|
+
return commits
|
|
112
|
+
|
|
113
|
+
# Filter commits by date
|
|
114
|
+
commits_to_process = []
|
|
115
|
+
for commit in commits:
|
|
116
|
+
commit_date = commit.get("timestamp")
|
|
117
|
+
if commit_date and commit_date > last_processed:
|
|
118
|
+
commits_to_process.append(commit)
|
|
119
|
+
|
|
120
|
+
return commits_to_process
|
|
121
|
+
|
|
122
|
+
def _get_existing_results(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
|
|
123
|
+
"""Get existing qualitative results for commits from the database."""
|
|
124
|
+
results = []
|
|
125
|
+
|
|
126
|
+
# Try to load existing results from database
|
|
127
|
+
# This is a simplified version - in practice you'd query the qualitative_commits table
|
|
128
|
+
for commit in commits:
|
|
129
|
+
# Create minimal result indicating no processing needed
|
|
130
|
+
result = QualitativeCommitData(
|
|
131
|
+
hash=commit.get("hash", ""),
|
|
132
|
+
message=commit.get("message", ""),
|
|
133
|
+
author_name=commit.get("author_name", ""),
|
|
134
|
+
author_email=commit.get("author_email", ""),
|
|
135
|
+
timestamp=commit.get("timestamp"),
|
|
136
|
+
files_changed=commit.get("files_changed", []),
|
|
137
|
+
insertions=commit.get("insertions", 0),
|
|
138
|
+
deletions=commit.get("deletions", 0),
|
|
139
|
+
change_type="unknown",
|
|
140
|
+
change_type_confidence=0.0,
|
|
141
|
+
business_domain="unknown",
|
|
142
|
+
domain_confidence=0.0,
|
|
143
|
+
risk_level="low",
|
|
144
|
+
risk_factors=[],
|
|
145
|
+
intent_signals={},
|
|
146
|
+
collaboration_patterns={},
|
|
147
|
+
technical_context={},
|
|
148
|
+
processing_method="cached",
|
|
149
|
+
processing_time_ms=0.0,
|
|
150
|
+
confidence_score=0.0,
|
|
151
|
+
)
|
|
152
|
+
results.append(result)
|
|
153
|
+
|
|
154
|
+
return results
|
|
155
|
+
|
|
156
|
+
def _update_schema_tracking(self, commits: list[dict[str, Any]]):
|
|
157
|
+
"""Update schema version tracking after processing commits."""
|
|
158
|
+
if not commits:
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
# Convert config to dict for schema tracking
|
|
162
|
+
config_dict = {
|
|
163
|
+
"nlp_config": self.config.nlp_config.__dict__,
|
|
164
|
+
"llm_config": self.config.llm_config.__dict__,
|
|
165
|
+
"cache_config": self.config.cache_config.__dict__,
|
|
166
|
+
"confidence_threshold": self.config.confidence_threshold,
|
|
167
|
+
"max_llm_fallback_pct": self.config.max_llm_fallback_pct,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
# Find the latest commit date
|
|
171
|
+
latest_date = max(commit.get("timestamp") for commit in commits if commit.get("timestamp"))
|
|
172
|
+
|
|
173
|
+
# Update schema version with latest processed date
|
|
174
|
+
self.schema_manager.update_schema_version("qualitative", config_dict, latest_date)
|
|
175
|
+
self.schema_manager.mark_date_processed("qualitative", latest_date, config_dict)
|
|
176
|
+
|
|
177
|
+
def process_commits(
|
|
178
|
+
self,
|
|
179
|
+
commits: list[dict[str, Any]],
|
|
180
|
+
show_progress: bool = True,
|
|
181
|
+
force_reprocess: bool = False,
|
|
182
|
+
) -> list[QualitativeCommitData]:
|
|
183
|
+
"""Process commits with qualitative analysis using incremental processing.
|
|
184
|
+
|
|
81
185
|
Args:
|
|
82
186
|
commits: List of commit dictionaries from GitFlow Analytics
|
|
83
187
|
show_progress: Whether to show progress information
|
|
84
|
-
|
|
188
|
+
force_reprocess: Force reprocessing even if schema hasn't changed
|
|
189
|
+
|
|
85
190
|
Returns:
|
|
86
191
|
List of QualitativeCommitData with analysis results
|
|
87
192
|
"""
|
|
88
193
|
if not commits:
|
|
89
194
|
return []
|
|
90
|
-
|
|
195
|
+
|
|
91
196
|
if not self.config.enabled:
|
|
92
197
|
self.logger.info("Qualitative analysis disabled in configuration")
|
|
93
198
|
return self._create_disabled_results(commits)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
self.
|
|
97
|
-
|
|
199
|
+
|
|
200
|
+
# Filter commits for incremental processing
|
|
201
|
+
commits_to_process = self._filter_commits_for_processing(commits, force_reprocess)
|
|
202
|
+
|
|
203
|
+
if not commits_to_process:
|
|
204
|
+
self.logger.info("No commits require processing (all up-to-date)")
|
|
205
|
+
# Return existing results for all commits
|
|
206
|
+
return self._get_existing_results(commits)
|
|
207
|
+
|
|
208
|
+
self.processing_stats["processing_start_time"] = time.time()
|
|
209
|
+
self.logger.info(
|
|
210
|
+
f"Starting qualitative analysis of {len(commits_to_process)} commits "
|
|
211
|
+
f"({len(commits) - len(commits_to_process)} already processed)"
|
|
212
|
+
)
|
|
213
|
+
|
|
98
214
|
# Setup progress tracking
|
|
99
|
-
progress_tracker =
|
|
100
|
-
total=len(commits),
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
215
|
+
progress_tracker = (
|
|
216
|
+
ProgressTracker(total=len(commits), description="Qualitative Analysis")
|
|
217
|
+
if show_progress
|
|
218
|
+
else None
|
|
219
|
+
)
|
|
220
|
+
|
|
104
221
|
# Step 1: Check cache for known patterns
|
|
105
222
|
cached_results, uncached_commits = self._check_cache(commits, progress_tracker)
|
|
106
|
-
self.logger.info(
|
|
107
|
-
|
|
223
|
+
self.logger.info(
|
|
224
|
+
f"Cache provided {len(cached_results)} results, processing {len(uncached_commits)} commits"
|
|
225
|
+
)
|
|
226
|
+
|
|
108
227
|
# Step 2: Process uncached commits with NLP
|
|
109
228
|
nlp_results = []
|
|
110
229
|
if uncached_commits:
|
|
111
230
|
nlp_results = self._process_with_nlp(uncached_commits, progress_tracker)
|
|
112
|
-
|
|
231
|
+
|
|
113
232
|
# Step 3: Identify uncertain cases for LLM processing
|
|
114
233
|
confident_results, uncertain_commits = self._separate_by_confidence(nlp_results)
|
|
115
|
-
self.logger.info(
|
|
116
|
-
|
|
234
|
+
self.logger.info(
|
|
235
|
+
f"NLP confident: {len(confident_results)}, uncertain: {len(uncertain_commits)}"
|
|
236
|
+
)
|
|
237
|
+
|
|
117
238
|
# Step 4: Process uncertain cases with LLM if available
|
|
118
239
|
llm_results = []
|
|
119
240
|
if uncertain_commits and self.llm_fallback:
|
|
120
241
|
llm_results = self._process_with_llm(uncertain_commits, progress_tracker)
|
|
121
242
|
else:
|
|
122
243
|
# If no LLM available, keep uncertain results with lower confidence
|
|
123
|
-
llm_results = [
|
|
124
|
-
|
|
244
|
+
llm_results = [
|
|
245
|
+
self._convert_to_uncertain_result(commit) for commit in uncertain_commits
|
|
246
|
+
]
|
|
247
|
+
|
|
125
248
|
# Step 5: Update cache with new high-confidence patterns
|
|
126
249
|
all_new_results = confident_results + llm_results
|
|
127
250
|
if self.config.cache_config.enable_pattern_learning:
|
|
128
251
|
self.pattern_cache.learn_from_results(all_new_results)
|
|
129
|
-
|
|
252
|
+
|
|
130
253
|
# Step 6: Combine all results
|
|
131
254
|
all_results = cached_results + confident_results + llm_results
|
|
132
|
-
|
|
255
|
+
|
|
133
256
|
# Update processing statistics
|
|
134
|
-
self._update_processing_stats(
|
|
135
|
-
|
|
136
|
-
|
|
257
|
+
self._update_processing_stats(
|
|
258
|
+
len(commits), len(cached_results), len(confident_results), len(llm_results)
|
|
259
|
+
)
|
|
260
|
+
|
|
137
261
|
# Periodic cache optimization
|
|
138
262
|
if self._should_optimize_cache():
|
|
139
263
|
self._optimize_system()
|
|
140
|
-
|
|
141
|
-
|
|
264
|
+
|
|
265
|
+
# Update schema tracking after successful processing
|
|
266
|
+
self._update_schema_tracking(commits_to_process)
|
|
267
|
+
|
|
268
|
+
self.logger.info(
|
|
269
|
+
f"Qualitative analysis completed in {time.time() - self.processing_stats['processing_start_time']:.2f}s"
|
|
270
|
+
)
|
|
142
271
|
return all_results
|
|
143
|
-
|
|
144
|
-
def _check_cache(
|
|
145
|
-
|
|
272
|
+
|
|
273
|
+
def _check_cache(
|
|
274
|
+
self, commits: list[dict[str, Any]], progress_tracker: Optional[ProgressTracker]
|
|
275
|
+
) -> tuple[list[QualitativeCommitData], list[dict[str, Any]]]:
|
|
146
276
|
"""Check pattern cache for known commit patterns.
|
|
147
|
-
|
|
277
|
+
|
|
148
278
|
Args:
|
|
149
279
|
commits: List of commit dictionaries
|
|
150
280
|
progress_tracker: Optional progress tracker
|
|
151
|
-
|
|
281
|
+
|
|
152
282
|
Returns:
|
|
153
283
|
Tuple of (cached_results, uncached_commits)
|
|
154
284
|
"""
|
|
155
285
|
cached_results = []
|
|
156
286
|
uncached_commits = []
|
|
157
|
-
|
|
287
|
+
|
|
158
288
|
for commit in commits:
|
|
159
289
|
cached_result = self.pattern_cache.lookup_pattern(
|
|
160
|
-
commit.get(
|
|
161
|
-
commit.get('files_changed', [])
|
|
290
|
+
commit.get("message", ""), commit.get("files_changed", [])
|
|
162
291
|
)
|
|
163
|
-
|
|
292
|
+
|
|
164
293
|
if cached_result:
|
|
165
294
|
# Convert cached result to QualitativeCommitData
|
|
166
295
|
result = self._create_result_from_cache(commit, cached_result)
|
|
167
296
|
cached_results.append(result)
|
|
168
|
-
self.processing_stats[
|
|
297
|
+
self.processing_stats["cache_hits"] += 1
|
|
169
298
|
else:
|
|
170
299
|
uncached_commits.append(commit)
|
|
171
|
-
|
|
300
|
+
|
|
172
301
|
if progress_tracker:
|
|
173
302
|
progress_tracker.update(1)
|
|
174
|
-
|
|
303
|
+
|
|
175
304
|
return cached_results, uncached_commits
|
|
176
|
-
|
|
177
|
-
def _process_with_nlp(
|
|
178
|
-
|
|
305
|
+
|
|
306
|
+
def _process_with_nlp(
|
|
307
|
+
self, commits: list[dict[str, Any]], progress_tracker: Optional[ProgressTracker]
|
|
308
|
+
) -> list[QualitativeCommitData]:
|
|
179
309
|
"""Process commits using NLP engine.
|
|
180
|
-
|
|
310
|
+
|
|
181
311
|
Args:
|
|
182
312
|
commits: List of commit dictionaries
|
|
183
313
|
progress_tracker: Optional progress tracker
|
|
184
|
-
|
|
314
|
+
|
|
185
315
|
Returns:
|
|
186
316
|
List of QualitativeCommitData from NLP processing
|
|
187
317
|
"""
|
|
188
318
|
if not commits:
|
|
189
319
|
return []
|
|
190
|
-
|
|
191
|
-
def process_batch_with_progress(batch:
|
|
320
|
+
|
|
321
|
+
def process_batch_with_progress(batch: list[dict[str, Any]]) -> list[QualitativeCommitData]:
|
|
192
322
|
results = self.nlp_engine.process_batch(batch)
|
|
193
323
|
if progress_tracker:
|
|
194
324
|
progress_tracker.update(len(batch))
|
|
195
325
|
return results
|
|
196
|
-
|
|
326
|
+
|
|
197
327
|
# Use batch processing for efficiency
|
|
198
328
|
if self.config.nlp_config.enable_parallel_processing and len(commits) > 1000:
|
|
199
329
|
all_results = self.batch_processor.process_batches(
|
|
200
|
-
commits,
|
|
201
|
-
process_batch_with_progress,
|
|
202
|
-
parallel=True
|
|
330
|
+
commits, process_batch_with_progress, parallel=True
|
|
203
331
|
)
|
|
204
332
|
else:
|
|
205
333
|
all_results = self.batch_processor.process_batches(
|
|
206
|
-
commits,
|
|
207
|
-
process_batch_with_progress,
|
|
208
|
-
parallel=False
|
|
334
|
+
commits, process_batch_with_progress, parallel=False
|
|
209
335
|
)
|
|
210
|
-
|
|
211
|
-
self.processing_stats[
|
|
336
|
+
|
|
337
|
+
self.processing_stats["nlp_processed"] += len(commits)
|
|
212
338
|
return all_results
|
|
213
|
-
|
|
214
|
-
def _separate_by_confidence(
|
|
339
|
+
|
|
340
|
+
def _separate_by_confidence(
|
|
341
|
+
self, results: list[QualitativeCommitData]
|
|
342
|
+
) -> tuple[list[QualitativeCommitData], list[dict[str, Any]]]:
|
|
215
343
|
"""Separate results by confidence threshold.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
344
|
+
|
|
345
|
+
Args:
|
|
218
346
|
results: List of NLP analysis results
|
|
219
|
-
|
|
347
|
+
|
|
220
348
|
Returns:
|
|
221
349
|
Tuple of (confident_results, uncertain_commit_dicts)
|
|
222
350
|
"""
|
|
223
351
|
confident_results = []
|
|
224
352
|
uncertain_commits = []
|
|
225
|
-
|
|
353
|
+
|
|
226
354
|
for result in results:
|
|
227
355
|
if result.confidence_score >= self.config.confidence_threshold:
|
|
228
356
|
confident_results.append(result)
|
|
229
357
|
else:
|
|
230
358
|
# Convert back to commit dict for LLM processing
|
|
231
359
|
commit_dict = {
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
360
|
+
"hash": result.hash,
|
|
361
|
+
"message": result.message,
|
|
362
|
+
"author_name": result.author_name,
|
|
363
|
+
"author_email": result.author_email,
|
|
364
|
+
"timestamp": result.timestamp,
|
|
365
|
+
"files_changed": result.files_changed,
|
|
366
|
+
"insertions": result.insertions,
|
|
367
|
+
"deletions": result.deletions,
|
|
240
368
|
}
|
|
241
369
|
uncertain_commits.append(commit_dict)
|
|
242
|
-
|
|
370
|
+
|
|
243
371
|
return confident_results, uncertain_commits
|
|
244
|
-
|
|
245
|
-
def _process_with_llm(
|
|
246
|
-
|
|
372
|
+
|
|
373
|
+
def _process_with_llm(
|
|
374
|
+
self, commits: list[dict[str, Any]], progress_tracker: Optional[ProgressTracker]
|
|
375
|
+
) -> list[QualitativeCommitData]:
|
|
247
376
|
"""Process uncertain commits with LLM fallback.
|
|
248
|
-
|
|
377
|
+
|
|
249
378
|
Args:
|
|
250
379
|
commits: List of uncertain commit dictionaries
|
|
251
380
|
progress_tracker: Optional progress tracker
|
|
252
|
-
|
|
381
|
+
|
|
253
382
|
Returns:
|
|
254
383
|
List of QualitativeCommitData from LLM processing
|
|
255
384
|
"""
|
|
256
385
|
if not commits or not self.llm_fallback:
|
|
257
386
|
return []
|
|
258
|
-
|
|
387
|
+
|
|
259
388
|
# Check LLM usage limits
|
|
260
389
|
max_llm_commits = int(len(commits) * self.config.max_llm_fallback_pct)
|
|
261
390
|
if len(commits) > max_llm_commits:
|
|
@@ -263,278 +392,282 @@ class QualitativeProcessor:
|
|
|
263
392
|
f"LLM limit reached: processing {max_llm_commits} of {len(commits)} uncertain commits"
|
|
264
393
|
)
|
|
265
394
|
commits = commits[:max_llm_commits]
|
|
266
|
-
|
|
395
|
+
|
|
267
396
|
# Group similar commits for batch processing
|
|
268
397
|
grouped_commits = self.llm_fallback.group_similar_commits(commits)
|
|
269
|
-
self.logger.debug(
|
|
270
|
-
|
|
398
|
+
self.logger.debug(
|
|
399
|
+
f"Grouped {len(commits)} commits into {len(grouped_commits)} groups for LLM processing"
|
|
400
|
+
)
|
|
401
|
+
|
|
271
402
|
all_results = []
|
|
272
|
-
|
|
403
|
+
|
|
273
404
|
for group in grouped_commits:
|
|
274
405
|
try:
|
|
275
406
|
group_results = self.llm_fallback.process_group(group)
|
|
276
407
|
all_results.extend(group_results)
|
|
277
|
-
|
|
408
|
+
|
|
278
409
|
if progress_tracker:
|
|
279
410
|
progress_tracker.update(len(group))
|
|
280
|
-
|
|
411
|
+
|
|
281
412
|
except Exception as e:
|
|
282
413
|
self.logger.error(f"LLM processing failed for group of {len(group)} commits: {e}")
|
|
283
414
|
# Create fallback results for this group
|
|
284
415
|
fallback_results = [self._convert_to_uncertain_result(commit) for commit in group]
|
|
285
416
|
all_results.extend(fallback_results)
|
|
286
|
-
|
|
417
|
+
|
|
287
418
|
if progress_tracker:
|
|
288
419
|
progress_tracker.update(len(group))
|
|
289
|
-
|
|
290
|
-
self.processing_stats[
|
|
420
|
+
|
|
421
|
+
self.processing_stats["llm_processed"] += len(commits)
|
|
291
422
|
return all_results
|
|
292
|
-
|
|
293
|
-
def _create_result_from_cache(
|
|
294
|
-
|
|
423
|
+
|
|
424
|
+
def _create_result_from_cache(
|
|
425
|
+
self, commit: dict[str, Any], cached_data: dict[str, Any]
|
|
426
|
+
) -> QualitativeCommitData:
|
|
295
427
|
"""Create QualitativeCommitData from cached pattern.
|
|
296
|
-
|
|
428
|
+
|
|
297
429
|
Args:
|
|
298
430
|
commit: Original commit dictionary
|
|
299
431
|
cached_data: Cached classification data
|
|
300
|
-
|
|
432
|
+
|
|
301
433
|
Returns:
|
|
302
434
|
QualitativeCommitData object
|
|
303
435
|
"""
|
|
304
436
|
return QualitativeCommitData(
|
|
305
437
|
# Copy commit fields
|
|
306
|
-
hash=commit.get(
|
|
307
|
-
message=commit.get(
|
|
308
|
-
author_name=commit.get(
|
|
309
|
-
author_email=commit.get(
|
|
310
|
-
timestamp=commit.get(
|
|
311
|
-
files_changed=commit.get(
|
|
312
|
-
insertions=commit.get(
|
|
313
|
-
deletions=commit.get(
|
|
314
|
-
|
|
438
|
+
hash=commit.get("hash", ""),
|
|
439
|
+
message=commit.get("message", ""),
|
|
440
|
+
author_name=commit.get("author_name", ""),
|
|
441
|
+
author_email=commit.get("author_email", ""),
|
|
442
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
443
|
+
files_changed=commit.get("files_changed", []),
|
|
444
|
+
insertions=commit.get("insertions", 0),
|
|
445
|
+
deletions=commit.get("deletions", 0),
|
|
315
446
|
# Use cached classification data
|
|
316
|
-
change_type=cached_data.get(
|
|
317
|
-
change_type_confidence=cached_data.get(
|
|
318
|
-
business_domain=cached_data.get(
|
|
319
|
-
domain_confidence=cached_data.get(
|
|
320
|
-
risk_level=cached_data.get(
|
|
321
|
-
risk_factors=cached_data.get(
|
|
322
|
-
intent_signals=cached_data.get(
|
|
323
|
-
collaboration_patterns=cached_data.get(
|
|
324
|
-
technical_context={
|
|
325
|
-
|
|
447
|
+
change_type=cached_data.get("change_type", "unknown"),
|
|
448
|
+
change_type_confidence=cached_data.get("change_type_confidence", 0.5),
|
|
449
|
+
business_domain=cached_data.get("business_domain", "unknown"),
|
|
450
|
+
domain_confidence=cached_data.get("domain_confidence", 0.5),
|
|
451
|
+
risk_level=cached_data.get("risk_level", "medium"),
|
|
452
|
+
risk_factors=cached_data.get("risk_factors", []),
|
|
453
|
+
intent_signals=cached_data.get("intent_signals", {}),
|
|
454
|
+
collaboration_patterns=cached_data.get("collaboration_patterns", {}),
|
|
455
|
+
technical_context={"processing_method": "cached"},
|
|
326
456
|
# Processing metadata
|
|
327
|
-
processing_method=
|
|
457
|
+
processing_method="cache",
|
|
328
458
|
processing_time_ms=0.5, # Very fast for cached results
|
|
329
|
-
confidence_score=cached_data.get(
|
|
459
|
+
confidence_score=cached_data.get("confidence_score", 0.5),
|
|
330
460
|
)
|
|
331
|
-
|
|
332
|
-
def _convert_to_uncertain_result(self, commit:
|
|
461
|
+
|
|
462
|
+
def _convert_to_uncertain_result(self, commit: dict[str, Any]) -> QualitativeCommitData:
|
|
333
463
|
"""Convert commit to uncertain result when LLM is unavailable.
|
|
334
|
-
|
|
464
|
+
|
|
335
465
|
Args:
|
|
336
466
|
commit: Commit dictionary
|
|
337
|
-
|
|
467
|
+
|
|
338
468
|
Returns:
|
|
339
469
|
QualitativeCommitData with uncertain classifications
|
|
340
470
|
"""
|
|
341
471
|
return QualitativeCommitData(
|
|
342
472
|
# Copy commit fields
|
|
343
|
-
hash=commit.get(
|
|
344
|
-
message=commit.get(
|
|
345
|
-
author_name=commit.get(
|
|
346
|
-
author_email=commit.get(
|
|
347
|
-
timestamp=commit.get(
|
|
348
|
-
files_changed=commit.get(
|
|
349
|
-
insertions=commit.get(
|
|
350
|
-
deletions=commit.get(
|
|
351
|
-
|
|
473
|
+
hash=commit.get("hash", ""),
|
|
474
|
+
message=commit.get("message", ""),
|
|
475
|
+
author_name=commit.get("author_name", ""),
|
|
476
|
+
author_email=commit.get("author_email", ""),
|
|
477
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
478
|
+
files_changed=commit.get("files_changed", []),
|
|
479
|
+
insertions=commit.get("insertions", 0),
|
|
480
|
+
deletions=commit.get("deletions", 0),
|
|
352
481
|
# Uncertain classifications
|
|
353
|
-
change_type=
|
|
482
|
+
change_type="unknown",
|
|
354
483
|
change_type_confidence=0.3,
|
|
355
|
-
business_domain=
|
|
484
|
+
business_domain="unknown",
|
|
356
485
|
domain_confidence=0.3,
|
|
357
|
-
risk_level=
|
|
358
|
-
risk_factors=[
|
|
359
|
-
intent_signals={
|
|
486
|
+
risk_level="medium",
|
|
487
|
+
risk_factors=["low_confidence_classification"],
|
|
488
|
+
intent_signals={"confidence": 0.3},
|
|
360
489
|
collaboration_patterns={},
|
|
361
|
-
technical_context={
|
|
362
|
-
|
|
490
|
+
technical_context={"processing_method": "uncertain_fallback"},
|
|
363
491
|
# Processing metadata
|
|
364
|
-
processing_method=
|
|
492
|
+
processing_method="nlp",
|
|
365
493
|
processing_time_ms=1.0,
|
|
366
|
-
confidence_score=0.3
|
|
494
|
+
confidence_score=0.3,
|
|
367
495
|
)
|
|
368
|
-
|
|
369
|
-
def _create_disabled_results(
|
|
496
|
+
|
|
497
|
+
def _create_disabled_results(
|
|
498
|
+
self, commits: list[dict[str, Any]]
|
|
499
|
+
) -> list[QualitativeCommitData]:
|
|
370
500
|
"""Create disabled results when qualitative analysis is turned off.
|
|
371
|
-
|
|
501
|
+
|
|
372
502
|
Args:
|
|
373
503
|
commits: List of commit dictionaries
|
|
374
|
-
|
|
504
|
+
|
|
375
505
|
Returns:
|
|
376
506
|
List of QualitativeCommitData with disabled status
|
|
377
507
|
"""
|
|
378
508
|
results = []
|
|
379
|
-
|
|
509
|
+
|
|
380
510
|
for commit in commits:
|
|
381
511
|
result = QualitativeCommitData(
|
|
382
512
|
# Copy commit fields
|
|
383
|
-
hash=commit.get(
|
|
384
|
-
message=commit.get(
|
|
385
|
-
author_name=commit.get(
|
|
386
|
-
author_email=commit.get(
|
|
387
|
-
timestamp=commit.get(
|
|
388
|
-
files_changed=commit.get(
|
|
389
|
-
insertions=commit.get(
|
|
390
|
-
deletions=commit.get(
|
|
391
|
-
|
|
513
|
+
hash=commit.get("hash", ""),
|
|
514
|
+
message=commit.get("message", ""),
|
|
515
|
+
author_name=commit.get("author_name", ""),
|
|
516
|
+
author_email=commit.get("author_email", ""),
|
|
517
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
518
|
+
files_changed=commit.get("files_changed", []),
|
|
519
|
+
insertions=commit.get("insertions", 0),
|
|
520
|
+
deletions=commit.get("deletions", 0),
|
|
392
521
|
# Disabled classifications
|
|
393
|
-
change_type=
|
|
522
|
+
change_type="disabled",
|
|
394
523
|
change_type_confidence=0.0,
|
|
395
|
-
business_domain=
|
|
524
|
+
business_domain="disabled",
|
|
396
525
|
domain_confidence=0.0,
|
|
397
|
-
risk_level=
|
|
398
|
-
risk_factors=[
|
|
399
|
-
intent_signals={
|
|
526
|
+
risk_level="unknown",
|
|
527
|
+
risk_factors=["qualitative_analysis_disabled"],
|
|
528
|
+
intent_signals={"disabled": True},
|
|
400
529
|
collaboration_patterns={},
|
|
401
|
-
technical_context={
|
|
402
|
-
|
|
530
|
+
technical_context={"processing_method": "disabled"},
|
|
403
531
|
# Processing metadata
|
|
404
|
-
processing_method=
|
|
532
|
+
processing_method="disabled",
|
|
405
533
|
processing_time_ms=0.0,
|
|
406
|
-
confidence_score=0.0
|
|
534
|
+
confidence_score=0.0,
|
|
407
535
|
)
|
|
408
536
|
results.append(result)
|
|
409
|
-
|
|
537
|
+
|
|
410
538
|
return results
|
|
411
|
-
|
|
412
|
-
def _update_processing_stats(
|
|
413
|
-
|
|
539
|
+
|
|
540
|
+
def _update_processing_stats(
|
|
541
|
+
self, total_commits: int, cached: int, nlp_processed: int, llm_processed: int
|
|
542
|
+
) -> None:
|
|
414
543
|
"""Update processing statistics.
|
|
415
|
-
|
|
544
|
+
|
|
416
545
|
Args:
|
|
417
546
|
total_commits: Total number of commits processed
|
|
418
547
|
cached: Number of cache hits
|
|
419
548
|
nlp_processed: Number processed by NLP
|
|
420
549
|
llm_processed: Number processed by LLM
|
|
421
550
|
"""
|
|
422
|
-
self.processing_stats[
|
|
423
|
-
self.processing_stats[
|
|
424
|
-
self.processing_stats[
|
|
425
|
-
self.processing_stats[
|
|
426
|
-
|
|
551
|
+
self.processing_stats["total_processed"] += total_commits
|
|
552
|
+
self.processing_stats["cache_hits"] += cached
|
|
553
|
+
self.processing_stats["nlp_processed"] += nlp_processed
|
|
554
|
+
self.processing_stats["llm_processed"] += llm_processed
|
|
555
|
+
|
|
427
556
|
# Log processing breakdown
|
|
428
557
|
cache_pct = (cached / total_commits) * 100 if total_commits > 0 else 0
|
|
429
558
|
nlp_pct = (nlp_processed / total_commits) * 100 if total_commits > 0 else 0
|
|
430
559
|
llm_pct = (llm_processed / total_commits) * 100 if total_commits > 0 else 0
|
|
431
|
-
|
|
560
|
+
|
|
432
561
|
self.logger.info(
|
|
433
562
|
f"Processing breakdown: {cache_pct:.1f}% cached, "
|
|
434
563
|
f"{nlp_pct:.1f}% NLP, {llm_pct:.1f}% LLM"
|
|
435
564
|
)
|
|
436
|
-
|
|
565
|
+
|
|
437
566
|
def _should_optimize_cache(self) -> bool:
|
|
438
567
|
"""Check if cache optimization should be performed.
|
|
439
|
-
|
|
568
|
+
|
|
440
569
|
Returns:
|
|
441
570
|
True if optimization should be performed
|
|
442
571
|
"""
|
|
443
572
|
# Optimize every 10,000 commits or every hour
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
573
|
+
return bool(
|
|
574
|
+
self.processing_stats["total_processed"] % 10000 == 0
|
|
575
|
+
or self.processing_stats["last_optimization"] is None
|
|
576
|
+
or time.time() - self.processing_stats["last_optimization"] > 3600
|
|
577
|
+
)
|
|
578
|
+
|
|
450
579
|
def _optimize_system(self) -> None:
|
|
451
580
|
"""Perform system optimization."""
|
|
452
581
|
self.logger.info("Performing system optimization...")
|
|
453
|
-
|
|
582
|
+
|
|
454
583
|
# Optimize pattern cache
|
|
455
584
|
self.pattern_cache.optimize_cache()
|
|
456
|
-
|
|
585
|
+
|
|
457
586
|
# Update last optimization time
|
|
458
|
-
self.processing_stats[
|
|
459
|
-
|
|
587
|
+
self.processing_stats["last_optimization"] = time.time()
|
|
588
|
+
|
|
460
589
|
self.logger.info("System optimization completed")
|
|
461
|
-
|
|
462
|
-
def get_processing_statistics(self) ->
|
|
590
|
+
|
|
591
|
+
def get_processing_statistics(self) -> dict[str, Any]:
|
|
463
592
|
"""Get comprehensive processing statistics.
|
|
464
|
-
|
|
593
|
+
|
|
465
594
|
Returns:
|
|
466
595
|
Dictionary with processing statistics
|
|
467
596
|
"""
|
|
468
597
|
# Get component statistics
|
|
469
598
|
cache_stats = self.pattern_cache.get_cache_statistics()
|
|
470
599
|
nlp_stats = self.nlp_engine.get_performance_stats()
|
|
471
|
-
|
|
600
|
+
|
|
472
601
|
# Calculate processing rates
|
|
473
|
-
total_time = time.time() - (self.processing_stats[
|
|
474
|
-
commits_per_second =
|
|
475
|
-
|
|
602
|
+
total_time = time.time() - (self.processing_stats["processing_start_time"] or time.time())
|
|
603
|
+
commits_per_second = (
|
|
604
|
+
self.processing_stats["total_processed"] / total_time if total_time > 0 else 0
|
|
605
|
+
)
|
|
606
|
+
|
|
476
607
|
# Calculate method percentages
|
|
477
|
-
total = self.processing_stats[
|
|
608
|
+
total = self.processing_stats["total_processed"]
|
|
478
609
|
method_percentages = {
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
610
|
+
"cache": (self.processing_stats["cache_hits"] / total * 100) if total > 0 else 0,
|
|
611
|
+
"nlp": (self.processing_stats["nlp_processed"] / total * 100) if total > 0 else 0,
|
|
612
|
+
"llm": (self.processing_stats["llm_processed"] / total * 100) if total > 0 else 0,
|
|
482
613
|
}
|
|
483
|
-
|
|
614
|
+
|
|
484
615
|
stats = {
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
616
|
+
"processing_summary": {
|
|
617
|
+
"total_commits_processed": self.processing_stats["total_processed"],
|
|
618
|
+
"commits_per_second": commits_per_second,
|
|
619
|
+
"total_processing_time_seconds": total_time,
|
|
620
|
+
"method_breakdown": method_percentages,
|
|
621
|
+
},
|
|
622
|
+
"cache_statistics": cache_stats,
|
|
623
|
+
"nlp_statistics": nlp_stats,
|
|
624
|
+
"configuration": {
|
|
625
|
+
"enabled": self.config.enabled,
|
|
626
|
+
"confidence_threshold": self.config.confidence_threshold,
|
|
627
|
+
"max_llm_fallback_pct": self.config.max_llm_fallback_pct,
|
|
628
|
+
"batch_size": self.config.batch_size,
|
|
490
629
|
},
|
|
491
|
-
'cache_statistics': cache_stats,
|
|
492
|
-
'nlp_statistics': nlp_stats,
|
|
493
|
-
'configuration': {
|
|
494
|
-
'enabled': self.config.enabled,
|
|
495
|
-
'confidence_threshold': self.config.confidence_threshold,
|
|
496
|
-
'max_llm_fallback_pct': self.config.max_llm_fallback_pct,
|
|
497
|
-
'batch_size': self.config.batch_size
|
|
498
|
-
}
|
|
499
630
|
}
|
|
500
|
-
|
|
631
|
+
|
|
501
632
|
# Add LLM statistics if available
|
|
502
633
|
if self.llm_fallback:
|
|
503
|
-
stats[
|
|
504
|
-
|
|
505
|
-
|
|
634
|
+
stats["llm_statistics"] = {
|
|
635
|
+
"cost_tracking": self.llm_fallback.cost_tracker.get_usage_stats(),
|
|
636
|
+
"model_usage": "available",
|
|
506
637
|
}
|
|
507
638
|
else:
|
|
508
|
-
stats[
|
|
509
|
-
|
|
639
|
+
stats["llm_statistics"] = {"model_usage": "disabled"}
|
|
640
|
+
|
|
510
641
|
return stats
|
|
511
|
-
|
|
512
|
-
def validate_setup(self) ->
|
|
642
|
+
|
|
643
|
+
def validate_setup(self) -> tuple[bool, list[str]]:
|
|
513
644
|
"""Validate processor setup and dependencies.
|
|
514
|
-
|
|
645
|
+
|
|
515
646
|
Returns:
|
|
516
647
|
Tuple of (is_valid, list_of_issues)
|
|
517
648
|
"""
|
|
518
649
|
issues = []
|
|
519
|
-
|
|
650
|
+
|
|
520
651
|
# Validate NLP engine
|
|
521
652
|
nlp_valid, nlp_issues = self.nlp_engine.validate_setup()
|
|
522
653
|
if not nlp_valid:
|
|
523
654
|
issues.extend([f"NLP: {issue}" for issue in nlp_issues])
|
|
524
|
-
|
|
655
|
+
|
|
525
656
|
# Validate LLM fallback if configured
|
|
526
657
|
if self.config.llm_config.openrouter_api_key and self.llm_fallback is None:
|
|
527
658
|
issues.append("LLM: API key configured but fallback system failed to initialize")
|
|
528
|
-
|
|
659
|
+
|
|
529
660
|
# Validate configuration
|
|
530
661
|
config_warnings = self.config.validate()
|
|
531
662
|
issues.extend([f"Config: {warning}" for warning in config_warnings])
|
|
532
|
-
|
|
663
|
+
|
|
533
664
|
# Test database connection
|
|
534
665
|
try:
|
|
535
666
|
with self.database.get_session() as session:
|
|
536
|
-
|
|
667
|
+
from sqlalchemy import text
|
|
668
|
+
|
|
669
|
+
session.execute(text("SELECT 1"))
|
|
537
670
|
except Exception as e:
|
|
538
671
|
issues.append(f"Database: Connection failed - {e}")
|
|
539
|
-
|
|
540
|
-
return len(issues) == 0, issues
|
|
672
|
+
|
|
673
|
+
return len(issues) == 0, issues
|