gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""LLM fallback system for uncertain commit classifications using OpenRouter."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
import time
|
|
7
|
-
import uuid
|
|
8
|
-
from typing import Dict, List, Optional, Tuple, Any
|
|
9
|
-
import hashlib
|
|
10
6
|
import os
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
11
10
|
|
|
12
11
|
from ..models.schemas import LLMConfig, QualitativeCommitData
|
|
13
12
|
from ..utils.cost_tracker import CostTracker
|
|
@@ -16,23 +15,26 @@ from ..utils.text_processing import TextProcessor
|
|
|
16
15
|
try:
|
|
17
16
|
import openai
|
|
18
17
|
import tiktoken
|
|
18
|
+
|
|
19
19
|
OPENAI_AVAILABLE = True
|
|
20
20
|
except ImportError:
|
|
21
21
|
OPENAI_AVAILABLE = False
|
|
22
|
+
|
|
22
23
|
# Create mock objects for type hints when not available
|
|
23
24
|
class MockOpenAI:
|
|
24
25
|
class OpenAI:
|
|
25
26
|
pass
|
|
27
|
+
|
|
26
28
|
openai = MockOpenAI()
|
|
27
29
|
tiktoken = None
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class ModelRouter:
|
|
31
33
|
"""Smart model selection based on complexity and cost constraints."""
|
|
32
|
-
|
|
34
|
+
|
|
33
35
|
def __init__(self, config: LLMConfig, cost_tracker: CostTracker):
|
|
34
36
|
"""Initialize model router.
|
|
35
|
-
|
|
37
|
+
|
|
36
38
|
Args:
|
|
37
39
|
config: LLM configuration
|
|
38
40
|
cost_tracker: Cost tracking instance
|
|
@@ -40,51 +42,52 @@ class ModelRouter:
|
|
|
40
42
|
self.config = config
|
|
41
43
|
self.cost_tracker = cost_tracker
|
|
42
44
|
self.logger = logging.getLogger(__name__)
|
|
43
|
-
|
|
45
|
+
|
|
44
46
|
def select_model(self, complexity_score: float, batch_size: int) -> str:
|
|
45
47
|
"""Select appropriate model based on complexity and budget.
|
|
46
|
-
|
|
48
|
+
|
|
47
49
|
Args:
|
|
48
50
|
complexity_score: Complexity score (0.0 to 1.0)
|
|
49
51
|
batch_size: Number of commits in batch
|
|
50
|
-
|
|
52
|
+
|
|
51
53
|
Returns:
|
|
52
54
|
Selected model name
|
|
53
55
|
"""
|
|
54
56
|
# Check daily budget remaining
|
|
55
57
|
remaining_budget = self.cost_tracker.check_budget_remaining()
|
|
56
|
-
|
|
58
|
+
|
|
57
59
|
# If we're over budget, use free model
|
|
58
60
|
if remaining_budget <= 0:
|
|
59
61
|
self.logger.warning("Daily budget exceeded, using free model")
|
|
60
62
|
return self.config.fallback_model
|
|
61
|
-
|
|
63
|
+
|
|
62
64
|
# For simple cases or when budget is tight, use free model
|
|
63
65
|
if complexity_score < 0.3 or remaining_budget < 0.50:
|
|
64
66
|
return self.config.fallback_model
|
|
65
|
-
|
|
67
|
+
|
|
66
68
|
# For complex cases with sufficient budget, use premium model
|
|
67
69
|
if complexity_score > self.config.complexity_threshold and remaining_budget > 2.0:
|
|
68
70
|
return self.config.complex_model
|
|
69
|
-
|
|
71
|
+
|
|
70
72
|
# Default to primary model (Claude Haiku - fast and cheap)
|
|
71
73
|
return self.config.primary_model
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
class LLMFallback:
|
|
75
77
|
"""Strategic LLM usage for uncertain cases via OpenRouter.
|
|
76
|
-
|
|
78
|
+
|
|
77
79
|
This class provides intelligent fallback to LLM processing when NLP
|
|
78
80
|
classification confidence is below the threshold. It uses OpenRouter
|
|
79
81
|
to access multiple models cost-effectively.
|
|
80
82
|
"""
|
|
81
|
-
|
|
82
|
-
def __init__(self, config: LLMConfig):
|
|
83
|
+
|
|
84
|
+
def __init__(self, config: LLMConfig, cache_dir: Optional[Path] = None):
|
|
83
85
|
"""Initialize LLM fallback system.
|
|
84
|
-
|
|
86
|
+
|
|
85
87
|
Args:
|
|
86
88
|
config: LLM configuration
|
|
87
|
-
|
|
89
|
+
cache_dir: Cache directory for cost tracking (defaults to config cache_dir)
|
|
90
|
+
|
|
88
91
|
Raises:
|
|
89
92
|
ImportError: If OpenAI library is not available
|
|
90
93
|
"""
|
|
@@ -92,36 +95,41 @@ class LLMFallback:
|
|
|
92
95
|
raise ImportError(
|
|
93
96
|
"OpenAI library required for LLM fallback. Install with: pip install openai"
|
|
94
97
|
)
|
|
95
|
-
|
|
98
|
+
|
|
96
99
|
self.config = config
|
|
97
100
|
self.logger = logging.getLogger(__name__)
|
|
98
|
-
|
|
101
|
+
|
|
99
102
|
# Initialize OpenRouter client
|
|
100
103
|
self.client = self._initialize_openrouter_client()
|
|
101
|
-
|
|
102
|
-
# Initialize utilities
|
|
103
|
-
|
|
104
|
+
|
|
105
|
+
# Initialize utilities with proper cache directory
|
|
106
|
+
cost_cache_dir = cache_dir / ".qualitative_cache" if cache_dir else None
|
|
107
|
+
self.cost_tracker = CostTracker(
|
|
108
|
+
cache_dir=cost_cache_dir, daily_budget=config.max_daily_cost
|
|
109
|
+
)
|
|
104
110
|
self.model_router = ModelRouter(config, self.cost_tracker)
|
|
105
111
|
self.text_processor = TextProcessor()
|
|
106
|
-
|
|
112
|
+
|
|
107
113
|
# Batch processing cache
|
|
108
114
|
self.batch_cache = {}
|
|
109
|
-
|
|
115
|
+
|
|
110
116
|
# Token encoder for cost estimation
|
|
111
117
|
try:
|
|
112
118
|
self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
|
|
113
119
|
except Exception:
|
|
114
120
|
self.encoding = None
|
|
115
|
-
self.logger.warning(
|
|
116
|
-
|
|
121
|
+
self.logger.warning(
|
|
122
|
+
"Could not load tiktoken encoder, token estimation may be inaccurate"
|
|
123
|
+
)
|
|
124
|
+
|
|
117
125
|
self.logger.info("LLM fallback system initialized with OpenRouter")
|
|
118
|
-
|
|
126
|
+
|
|
119
127
|
def _initialize_openrouter_client(self) -> openai.OpenAI:
|
|
120
128
|
"""Initialize OpenRouter client with API key.
|
|
121
|
-
|
|
129
|
+
|
|
122
130
|
Returns:
|
|
123
131
|
Configured OpenAI client for OpenRouter
|
|
124
|
-
|
|
132
|
+
|
|
125
133
|
Raises:
|
|
126
134
|
ValueError: If API key is not configured
|
|
127
135
|
"""
|
|
@@ -130,117 +138,116 @@ class LLMFallback:
|
|
|
130
138
|
raise ValueError(
|
|
131
139
|
"OpenRouter API key not configured. Set OPENROUTER_API_KEY environment variable."
|
|
132
140
|
)
|
|
133
|
-
|
|
141
|
+
|
|
134
142
|
return openai.OpenAI(
|
|
135
143
|
base_url=self.config.base_url,
|
|
136
144
|
api_key=api_key,
|
|
137
145
|
default_headers={
|
|
138
146
|
"HTTP-Referer": "https://github.com/bobmatnyc/gitflow-analytics",
|
|
139
|
-
"X-Title": "GitFlow Analytics - Qualitative Analysis"
|
|
140
|
-
}
|
|
147
|
+
"X-Title": "GitFlow Analytics - Qualitative Analysis",
|
|
148
|
+
},
|
|
141
149
|
)
|
|
142
|
-
|
|
150
|
+
|
|
143
151
|
def _resolve_api_key(self) -> Optional[str]:
|
|
144
152
|
"""Resolve OpenRouter API key from config or environment.
|
|
145
|
-
|
|
153
|
+
|
|
146
154
|
Returns:
|
|
147
155
|
API key string or None if not found
|
|
148
156
|
"""
|
|
149
157
|
api_key = self.config.openrouter_api_key
|
|
150
|
-
|
|
158
|
+
|
|
151
159
|
if api_key.startswith("${") and api_key.endswith("}"):
|
|
152
160
|
env_var = api_key[2:-1]
|
|
153
161
|
return os.environ.get(env_var)
|
|
154
162
|
else:
|
|
155
163
|
return api_key
|
|
156
|
-
|
|
157
|
-
def group_similar_commits(self, commits:
|
|
164
|
+
|
|
165
|
+
def group_similar_commits(self, commits: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
|
|
158
166
|
"""Group similar commits for efficient batch processing.
|
|
159
|
-
|
|
167
|
+
|
|
160
168
|
Args:
|
|
161
169
|
commits: List of commit dictionaries
|
|
162
|
-
|
|
170
|
+
|
|
163
171
|
Returns:
|
|
164
172
|
List of commit groups
|
|
165
173
|
"""
|
|
166
174
|
if not commits:
|
|
167
175
|
return []
|
|
168
|
-
|
|
176
|
+
|
|
169
177
|
groups = []
|
|
170
178
|
similarity_threshold = self.config.similarity_threshold
|
|
171
|
-
|
|
179
|
+
|
|
172
180
|
for commit in commits:
|
|
173
181
|
# Find similar group or create new one
|
|
174
182
|
placed = False
|
|
175
|
-
|
|
183
|
+
|
|
176
184
|
for group in groups:
|
|
177
185
|
if len(group) >= self.config.max_group_size:
|
|
178
186
|
continue # Group is full
|
|
179
|
-
|
|
187
|
+
|
|
180
188
|
# Calculate similarity with first commit in group
|
|
181
189
|
similarity = self.text_processor.calculate_message_similarity(
|
|
182
|
-
commit.get(
|
|
183
|
-
group[0].get('message', '')
|
|
190
|
+
commit.get("message", ""), group[0].get("message", "")
|
|
184
191
|
)
|
|
185
|
-
|
|
192
|
+
|
|
186
193
|
if similarity > similarity_threshold:
|
|
187
194
|
group.append(commit)
|
|
188
195
|
placed = True
|
|
189
196
|
break
|
|
190
|
-
|
|
197
|
+
|
|
191
198
|
if not placed:
|
|
192
199
|
groups.append([commit])
|
|
193
|
-
|
|
200
|
+
|
|
194
201
|
self.logger.debug(f"Grouped {len(commits)} commits into {len(groups)} groups")
|
|
195
202
|
return groups
|
|
196
|
-
|
|
197
|
-
def process_group(self, commits:
|
|
203
|
+
|
|
204
|
+
def process_group(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
|
|
198
205
|
"""Process a group of similar commits with OpenRouter.
|
|
199
|
-
|
|
206
|
+
|
|
200
207
|
Args:
|
|
201
208
|
commits: List of similar commit dictionaries
|
|
202
|
-
|
|
209
|
+
|
|
203
210
|
Returns:
|
|
204
211
|
List of QualitativeCommitData with LLM analysis
|
|
205
212
|
"""
|
|
206
213
|
if not commits:
|
|
207
214
|
return []
|
|
208
|
-
|
|
215
|
+
|
|
209
216
|
start_time = time.time()
|
|
210
|
-
|
|
217
|
+
|
|
211
218
|
# Check cache first
|
|
212
219
|
cache_key = self._generate_group_cache_key(commits)
|
|
213
220
|
if cache_key in self.batch_cache:
|
|
214
221
|
self.logger.debug(f"Using cached result for {len(commits)} commits")
|
|
215
222
|
template_result = self.batch_cache[cache_key]
|
|
216
223
|
return self._apply_template_to_group(template_result, commits)
|
|
217
|
-
|
|
224
|
+
|
|
218
225
|
# Assess complexity and select model
|
|
219
226
|
complexity_score = self._assess_complexity(commits)
|
|
220
227
|
selected_model = self.model_router.select_model(complexity_score, len(commits))
|
|
221
|
-
|
|
228
|
+
|
|
222
229
|
self.logger.debug(
|
|
223
230
|
f"Processing {len(commits)} commits with {selected_model} "
|
|
224
231
|
f"(complexity: {complexity_score:.2f})"
|
|
225
232
|
)
|
|
226
|
-
|
|
233
|
+
|
|
227
234
|
# Build optimized prompt
|
|
228
235
|
prompt = self._build_batch_classification_prompt(commits)
|
|
229
|
-
|
|
236
|
+
|
|
230
237
|
# Estimate tokens and cost
|
|
231
238
|
estimated_input_tokens = self._estimate_tokens(prompt)
|
|
232
239
|
if not self.cost_tracker.can_afford_call(selected_model, estimated_input_tokens * 2):
|
|
233
240
|
self.logger.warning("Cannot afford LLM call, using fallback model")
|
|
234
241
|
selected_model = self.config.fallback_model
|
|
235
|
-
|
|
242
|
+
|
|
236
243
|
# Make OpenRouter API call
|
|
237
244
|
try:
|
|
238
245
|
response = self._call_openrouter(prompt, selected_model)
|
|
239
246
|
processing_time = time.time() - start_time
|
|
240
|
-
|
|
247
|
+
|
|
241
248
|
# Parse response
|
|
242
249
|
results = self._parse_llm_response(response, commits)
|
|
243
|
-
|
|
250
|
+
|
|
244
251
|
# Track costs and performance
|
|
245
252
|
estimated_output_tokens = self._estimate_tokens(response)
|
|
246
253
|
self.cost_tracker.record_call(
|
|
@@ -249,22 +256,22 @@ class LLMFallback:
|
|
|
249
256
|
output_tokens=estimated_output_tokens,
|
|
250
257
|
processing_time=processing_time,
|
|
251
258
|
batch_size=len(commits),
|
|
252
|
-
success=len(results) > 0
|
|
259
|
+
success=len(results) > 0,
|
|
253
260
|
)
|
|
254
|
-
|
|
261
|
+
|
|
255
262
|
# Cache successful result
|
|
256
263
|
if results:
|
|
257
264
|
self.batch_cache[cache_key] = self._create_template_from_results(results)
|
|
258
|
-
|
|
265
|
+
|
|
259
266
|
# Update processing time in results
|
|
260
267
|
for result in results:
|
|
261
268
|
result.processing_time_ms = (processing_time * 1000) / len(results)
|
|
262
|
-
|
|
269
|
+
|
|
263
270
|
return results
|
|
264
|
-
|
|
271
|
+
|
|
265
272
|
except Exception as e:
|
|
266
273
|
self.logger.error(f"OpenRouter processing failed: {e}")
|
|
267
|
-
|
|
274
|
+
|
|
268
275
|
# Record failed call
|
|
269
276
|
self.cost_tracker.record_call(
|
|
270
277
|
model=selected_model,
|
|
@@ -273,25 +280,25 @@ class LLMFallback:
|
|
|
273
280
|
processing_time=time.time() - start_time,
|
|
274
281
|
batch_size=len(commits),
|
|
275
282
|
success=False,
|
|
276
|
-
error_message=str(e)
|
|
283
|
+
error_message=str(e),
|
|
277
284
|
)
|
|
278
|
-
|
|
285
|
+
|
|
279
286
|
# Try fallback model if primary failed
|
|
280
287
|
if selected_model != self.config.fallback_model:
|
|
281
288
|
return self._retry_with_fallback_model(commits, prompt)
|
|
282
289
|
else:
|
|
283
290
|
return self._create_fallback_results(commits)
|
|
284
|
-
|
|
291
|
+
|
|
285
292
|
def _call_openrouter(self, prompt: str, model: str) -> str:
|
|
286
293
|
"""Make API call to OpenRouter with selected model.
|
|
287
|
-
|
|
294
|
+
|
|
288
295
|
Args:
|
|
289
296
|
prompt: Classification prompt
|
|
290
297
|
model: Model to use
|
|
291
|
-
|
|
298
|
+
|
|
292
299
|
Returns:
|
|
293
300
|
Response content
|
|
294
|
-
|
|
301
|
+
|
|
295
302
|
Raises:
|
|
296
303
|
Exception: If API call fails
|
|
297
304
|
"""
|
|
@@ -300,52 +307,52 @@ class LLMFallback:
|
|
|
300
307
|
model=model,
|
|
301
308
|
messages=[
|
|
302
309
|
{
|
|
303
|
-
"role": "system",
|
|
304
|
-
"content": "You are an expert Git commit classifier. Analyze commits and respond only with valid JSON. Be concise but accurate."
|
|
310
|
+
"role": "system",
|
|
311
|
+
"content": "You are an expert Git commit classifier. Analyze commits and respond only with valid JSON. Be concise but accurate.",
|
|
305
312
|
},
|
|
306
|
-
{"role": "user", "content": prompt}
|
|
313
|
+
{"role": "user", "content": prompt},
|
|
307
314
|
],
|
|
308
315
|
max_tokens=self.config.max_tokens,
|
|
309
316
|
temperature=self.config.temperature,
|
|
310
|
-
stream=False
|
|
317
|
+
stream=False,
|
|
311
318
|
)
|
|
312
|
-
|
|
319
|
+
|
|
313
320
|
return response.choices[0].message.content
|
|
314
|
-
|
|
321
|
+
|
|
315
322
|
except Exception as e:
|
|
316
323
|
self.logger.error(f"OpenRouter API call failed: {e}")
|
|
317
324
|
raise
|
|
318
|
-
|
|
319
|
-
def _build_batch_classification_prompt(self, commits:
|
|
325
|
+
|
|
326
|
+
def _build_batch_classification_prompt(self, commits: list[dict[str, Any]]) -> str:
|
|
320
327
|
"""Build optimized prompt for OpenRouter batch processing.
|
|
321
|
-
|
|
328
|
+
|
|
322
329
|
Args:
|
|
323
330
|
commits: List of commit dictionaries
|
|
324
|
-
|
|
331
|
+
|
|
325
332
|
Returns:
|
|
326
333
|
Formatted prompt string
|
|
327
334
|
"""
|
|
328
335
|
# Limit to max group size for token management
|
|
329
|
-
commits_to_process = commits[:self.config.max_group_size]
|
|
330
|
-
|
|
336
|
+
commits_to_process = commits[: self.config.max_group_size]
|
|
337
|
+
|
|
331
338
|
commit_data = []
|
|
332
339
|
for i, commit in enumerate(commits_to_process, 1):
|
|
333
|
-
message = commit.get(
|
|
334
|
-
files = commit.get(
|
|
335
|
-
|
|
340
|
+
message = commit.get("message", "")[:150] # Truncate long messages
|
|
341
|
+
files = commit.get("files_changed", [])
|
|
342
|
+
|
|
336
343
|
# Include key file context
|
|
337
344
|
files_context = ""
|
|
338
345
|
if files:
|
|
339
346
|
key_files = files[:5] # Top 5 files
|
|
340
347
|
files_context = f" | Modified: {', '.join(key_files)}"
|
|
341
|
-
|
|
348
|
+
|
|
342
349
|
# Add size context
|
|
343
|
-
insertions = commit.get(
|
|
344
|
-
deletions = commit.get(
|
|
350
|
+
insertions = commit.get("insertions", 0)
|
|
351
|
+
deletions = commit.get("deletions", 0)
|
|
345
352
|
size_context = f" | +{insertions}/-{deletions}"
|
|
346
|
-
|
|
353
|
+
|
|
347
354
|
commit_data.append(f"{i}. {message}{files_context}{size_context}")
|
|
348
|
-
|
|
355
|
+
|
|
349
356
|
prompt = f"""Analyze these Git commits and classify each one. Consider the commit message, modified files, and change size.
|
|
350
357
|
|
|
351
358
|
Commits to classify:
|
|
@@ -363,134 +370,132 @@ Respond with JSON array only:
|
|
|
363
370
|
[{{"id": 1, "change_type": "feature", "business_domain": "frontend", "risk_level": "low", "confidence": 0.9, "urgency": "routine", "complexity": "moderate"}}]"""
|
|
364
371
|
|
|
365
372
|
return prompt
|
|
366
|
-
|
|
367
|
-
def _parse_llm_response(
|
|
373
|
+
|
|
374
|
+
def _parse_llm_response(
|
|
375
|
+
self, response: str, commits: list[dict[str, Any]]
|
|
376
|
+
) -> list[QualitativeCommitData]:
|
|
368
377
|
"""Parse LLM response into QualitativeCommitData objects.
|
|
369
|
-
|
|
378
|
+
|
|
370
379
|
Args:
|
|
371
380
|
response: JSON response from LLM
|
|
372
381
|
commits: Original commit dictionaries
|
|
373
|
-
|
|
382
|
+
|
|
374
383
|
Returns:
|
|
375
384
|
List of QualitativeCommitData objects
|
|
376
385
|
"""
|
|
377
386
|
try:
|
|
378
387
|
# Clean response (remove any markdown formatting)
|
|
379
388
|
cleaned_response = response.strip()
|
|
380
|
-
if cleaned_response.startswith(
|
|
389
|
+
if cleaned_response.startswith("```json"):
|
|
381
390
|
cleaned_response = cleaned_response[7:]
|
|
382
|
-
if cleaned_response.endswith(
|
|
391
|
+
if cleaned_response.endswith("```"):
|
|
383
392
|
cleaned_response = cleaned_response[:-3]
|
|
384
393
|
cleaned_response = cleaned_response.strip()
|
|
385
|
-
|
|
394
|
+
|
|
386
395
|
classifications = json.loads(cleaned_response)
|
|
387
|
-
|
|
396
|
+
|
|
388
397
|
if not isinstance(classifications, list):
|
|
389
398
|
raise ValueError("Response is not a JSON array")
|
|
390
|
-
|
|
399
|
+
|
|
391
400
|
results = []
|
|
392
|
-
|
|
401
|
+
|
|
393
402
|
for i, commit in enumerate(commits):
|
|
394
403
|
if i < len(classifications):
|
|
395
404
|
classification = classifications[i]
|
|
396
405
|
else:
|
|
397
406
|
# Fallback if fewer classifications than commits
|
|
398
407
|
classification = {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
408
|
+
"change_type": "unknown",
|
|
409
|
+
"business_domain": "unknown",
|
|
410
|
+
"risk_level": "medium",
|
|
411
|
+
"confidence": 0.5,
|
|
412
|
+
"urgency": "routine",
|
|
413
|
+
"complexity": "moderate",
|
|
405
414
|
}
|
|
406
|
-
|
|
415
|
+
|
|
407
416
|
result = QualitativeCommitData(
|
|
408
417
|
# Copy existing commit fields
|
|
409
|
-
hash=commit.get(
|
|
410
|
-
message=commit.get(
|
|
411
|
-
author_name=commit.get(
|
|
412
|
-
author_email=commit.get(
|
|
413
|
-
timestamp=commit.get(
|
|
414
|
-
files_changed=commit.get(
|
|
415
|
-
insertions=commit.get(
|
|
416
|
-
deletions=commit.get(
|
|
417
|
-
|
|
418
|
+
hash=commit.get("hash", ""),
|
|
419
|
+
message=commit.get("message", ""),
|
|
420
|
+
author_name=commit.get("author_name", ""),
|
|
421
|
+
author_email=commit.get("author_email", ""),
|
|
422
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
423
|
+
files_changed=commit.get("files_changed", []),
|
|
424
|
+
insertions=commit.get("insertions", 0),
|
|
425
|
+
deletions=commit.get("deletions", 0),
|
|
418
426
|
# LLM-provided classifications
|
|
419
|
-
change_type=classification.get(
|
|
420
|
-
change_type_confidence=classification.get(
|
|
421
|
-
business_domain=classification.get(
|
|
422
|
-
domain_confidence=classification.get(
|
|
423
|
-
risk_level=classification.get(
|
|
424
|
-
risk_factors=classification.get(
|
|
425
|
-
|
|
427
|
+
change_type=classification.get("change_type", "unknown"),
|
|
428
|
+
change_type_confidence=classification.get("confidence", 0.5),
|
|
429
|
+
business_domain=classification.get("business_domain", "unknown"),
|
|
430
|
+
domain_confidence=classification.get("confidence", 0.5),
|
|
431
|
+
risk_level=classification.get("risk_level", "medium"),
|
|
432
|
+
risk_factors=classification.get("risk_factors", []),
|
|
426
433
|
# Intent signals from LLM analysis
|
|
427
434
|
intent_signals={
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
435
|
+
"urgency": classification.get("urgency", "routine"),
|
|
436
|
+
"complexity": classification.get("complexity", "moderate"),
|
|
437
|
+
"confidence": classification.get("confidence", 0.5),
|
|
438
|
+
"signals": [
|
|
439
|
+
f"llm_classified:{classification.get('change_type', 'unknown')}"
|
|
440
|
+
],
|
|
432
441
|
},
|
|
433
442
|
collaboration_patterns={},
|
|
434
|
-
technical_context={
|
|
435
|
-
'llm_model': 'openrouter',
|
|
436
|
-
'processing_method': 'batch'
|
|
437
|
-
},
|
|
438
|
-
|
|
443
|
+
technical_context={"llm_model": "openrouter", "processing_method": "batch"},
|
|
439
444
|
# Processing metadata
|
|
440
|
-
processing_method=
|
|
445
|
+
processing_method="llm",
|
|
441
446
|
processing_time_ms=0, # Set by caller
|
|
442
|
-
confidence_score=classification.get(
|
|
447
|
+
confidence_score=classification.get("confidence", 0.5),
|
|
443
448
|
)
|
|
444
449
|
results.append(result)
|
|
445
|
-
|
|
450
|
+
|
|
446
451
|
return results
|
|
447
|
-
|
|
452
|
+
|
|
448
453
|
except (json.JSONDecodeError, KeyError, ValueError) as e:
|
|
449
454
|
self.logger.error(f"Failed to parse LLM response: {e}")
|
|
450
455
|
self.logger.debug(f"Raw response: {response}")
|
|
451
456
|
return self._create_fallback_results(commits)
|
|
452
|
-
|
|
453
|
-
def _assess_complexity(self, commits:
|
|
457
|
+
|
|
458
|
+
def _assess_complexity(self, commits: list[dict[str, Any]]) -> float:
|
|
454
459
|
"""Assess complexity of commits for model selection.
|
|
455
|
-
|
|
460
|
+
|
|
456
461
|
Args:
|
|
457
462
|
commits: List of commit dictionaries
|
|
458
|
-
|
|
463
|
+
|
|
459
464
|
Returns:
|
|
460
465
|
Complexity score (0.0 to 1.0)
|
|
461
466
|
"""
|
|
462
467
|
if not commits:
|
|
463
468
|
return 0.0
|
|
464
|
-
|
|
469
|
+
|
|
465
470
|
total_complexity = 0.0
|
|
466
|
-
|
|
471
|
+
|
|
467
472
|
for commit in commits:
|
|
468
473
|
# Message complexity
|
|
469
|
-
message = commit.get(
|
|
474
|
+
message = commit.get("message", "")
|
|
470
475
|
message_complexity = min(1.0, len(message.split()) / 20.0)
|
|
471
|
-
|
|
476
|
+
|
|
472
477
|
# File change complexity
|
|
473
|
-
files_changed = len(commit.get(
|
|
478
|
+
files_changed = len(commit.get("files_changed", []))
|
|
474
479
|
file_complexity = min(1.0, files_changed / 15.0)
|
|
475
|
-
|
|
480
|
+
|
|
476
481
|
# Size complexity
|
|
477
|
-
total_changes = commit.get(
|
|
482
|
+
total_changes = commit.get("insertions", 0) + commit.get("deletions", 0)
|
|
478
483
|
size_complexity = min(1.0, total_changes / 200.0)
|
|
479
|
-
|
|
484
|
+
|
|
480
485
|
# Combine complexities
|
|
481
|
-
commit_complexity = (
|
|
482
|
-
|
|
483
|
-
|
|
486
|
+
commit_complexity = (
|
|
487
|
+
message_complexity * 0.3 + file_complexity * 0.4 + size_complexity * 0.3
|
|
488
|
+
)
|
|
484
489
|
total_complexity += commit_complexity
|
|
485
|
-
|
|
490
|
+
|
|
486
491
|
return total_complexity / len(commits)
|
|
487
|
-
|
|
492
|
+
|
|
488
493
|
def _estimate_tokens(self, text: str) -> int:
|
|
489
494
|
"""Estimate token count for text.
|
|
490
|
-
|
|
495
|
+
|
|
491
496
|
Args:
|
|
492
497
|
text: Text to count tokens for
|
|
493
|
-
|
|
498
|
+
|
|
494
499
|
Returns:
|
|
495
500
|
Estimated token count
|
|
496
501
|
"""
|
|
@@ -499,105 +504,104 @@ Respond with JSON array only:
|
|
|
499
504
|
return len(self.encoding.encode(text))
|
|
500
505
|
except Exception:
|
|
501
506
|
pass
|
|
502
|
-
|
|
507
|
+
|
|
503
508
|
# Fallback estimation (roughly 4 characters per token)
|
|
504
509
|
return len(text) // 4
|
|
505
|
-
|
|
506
|
-
def _generate_group_cache_key(self, commits:
|
|
510
|
+
|
|
511
|
+
def _generate_group_cache_key(self, commits: list[dict[str, Any]]) -> str:
|
|
507
512
|
"""Generate cache key for a group of commits.
|
|
508
|
-
|
|
513
|
+
|
|
509
514
|
Args:
|
|
510
515
|
commits: List of commit dictionaries
|
|
511
|
-
|
|
516
|
+
|
|
512
517
|
Returns:
|
|
513
518
|
Cache key string
|
|
514
519
|
"""
|
|
515
520
|
# Create fingerprint from commit messages and file patterns
|
|
516
521
|
fingerprints = []
|
|
517
522
|
for commit in commits:
|
|
518
|
-
message = commit.get(
|
|
519
|
-
files = commit.get(
|
|
523
|
+
message = commit.get("message", "")
|
|
524
|
+
files = commit.get("files_changed", [])
|
|
520
525
|
fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
|
|
521
526
|
fingerprints.append(fingerprint)
|
|
522
|
-
|
|
523
|
-
combined_fingerprint =
|
|
527
|
+
|
|
528
|
+
combined_fingerprint = "|".join(sorted(fingerprints))
|
|
524
529
|
return hashlib.md5(combined_fingerprint.encode()).hexdigest()
|
|
525
|
-
|
|
526
|
-
def _create_template_from_results(self, results:
|
|
530
|
+
|
|
531
|
+
def _create_template_from_results(self, results: list[QualitativeCommitData]) -> dict[str, Any]:
|
|
527
532
|
"""Create a template from successful results for caching.
|
|
528
|
-
|
|
533
|
+
|
|
529
534
|
Args:
|
|
530
535
|
results: List of analysis results
|
|
531
|
-
|
|
536
|
+
|
|
532
537
|
Returns:
|
|
533
538
|
Template dictionary
|
|
534
539
|
"""
|
|
535
540
|
if not results:
|
|
536
541
|
return {}
|
|
537
|
-
|
|
542
|
+
|
|
538
543
|
# Use first result as template
|
|
539
544
|
template = results[0]
|
|
540
545
|
return {
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
546
|
+
"change_type": template.change_type,
|
|
547
|
+
"business_domain": template.business_domain,
|
|
548
|
+
"risk_level": template.risk_level,
|
|
549
|
+
"confidence_score": template.confidence_score,
|
|
545
550
|
}
|
|
546
|
-
|
|
547
|
-
def _apply_template_to_group(
|
|
548
|
-
|
|
551
|
+
|
|
552
|
+
def _apply_template_to_group(
|
|
553
|
+
self, template: dict[str, Any], commits: list[dict[str, Any]]
|
|
554
|
+
) -> list[QualitativeCommitData]:
|
|
549
555
|
"""Apply cached template to a group of commits.
|
|
550
|
-
|
|
556
|
+
|
|
551
557
|
Args:
|
|
552
558
|
template: Cached analysis template
|
|
553
559
|
commits: List of commit dictionaries
|
|
554
|
-
|
|
560
|
+
|
|
555
561
|
Returns:
|
|
556
562
|
List of QualitativeCommitData using template
|
|
557
563
|
"""
|
|
558
564
|
results = []
|
|
559
|
-
|
|
565
|
+
|
|
560
566
|
for commit in commits:
|
|
561
567
|
result = QualitativeCommitData(
|
|
562
568
|
# Copy existing commit fields
|
|
563
|
-
hash=commit.get(
|
|
564
|
-
message=commit.get(
|
|
565
|
-
author_name=commit.get(
|
|
566
|
-
author_email=commit.get(
|
|
567
|
-
timestamp=commit.get(
|
|
568
|
-
files_changed=commit.get(
|
|
569
|
-
insertions=commit.get(
|
|
570
|
-
deletions=commit.get(
|
|
571
|
-
|
|
569
|
+
hash=commit.get("hash", ""),
|
|
570
|
+
message=commit.get("message", ""),
|
|
571
|
+
author_name=commit.get("author_name", ""),
|
|
572
|
+
author_email=commit.get("author_email", ""),
|
|
573
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
574
|
+
files_changed=commit.get("files_changed", []),
|
|
575
|
+
insertions=commit.get("insertions", 0),
|
|
576
|
+
deletions=commit.get("deletions", 0),
|
|
572
577
|
# Apply template values
|
|
573
|
-
change_type=template.get(
|
|
574
|
-
change_type_confidence=template.get(
|
|
575
|
-
business_domain=template.get(
|
|
576
|
-
domain_confidence=template.get(
|
|
577
|
-
risk_level=template.get(
|
|
578
|
+
change_type=template.get("change_type", "unknown"),
|
|
579
|
+
change_type_confidence=template.get("confidence_score", 0.5),
|
|
580
|
+
business_domain=template.get("business_domain", "unknown"),
|
|
581
|
+
domain_confidence=template.get("confidence_score", 0.5),
|
|
582
|
+
risk_level=template.get("risk_level", "medium"),
|
|
578
583
|
risk_factors=[],
|
|
579
|
-
|
|
580
|
-
intent_signals={'confidence': template.get('confidence_score', 0.5)},
|
|
584
|
+
intent_signals={"confidence": template.get("confidence_score", 0.5)},
|
|
581
585
|
collaboration_patterns={},
|
|
582
|
-
technical_context={
|
|
583
|
-
|
|
586
|
+
technical_context={"processing_method": "cached_template"},
|
|
584
587
|
# Processing metadata
|
|
585
|
-
processing_method=
|
|
588
|
+
processing_method="llm",
|
|
586
589
|
processing_time_ms=1.0, # Very fast for cached results
|
|
587
|
-
confidence_score=template.get(
|
|
590
|
+
confidence_score=template.get("confidence_score", 0.5),
|
|
588
591
|
)
|
|
589
592
|
results.append(result)
|
|
590
|
-
|
|
593
|
+
|
|
591
594
|
return results
|
|
592
|
-
|
|
593
|
-
def _retry_with_fallback_model(
|
|
594
|
-
|
|
595
|
+
|
|
596
|
+
def _retry_with_fallback_model(
|
|
597
|
+
self, commits: list[dict[str, Any]], prompt: str
|
|
598
|
+
) -> list[QualitativeCommitData]:
|
|
595
599
|
"""Retry processing with fallback model.
|
|
596
|
-
|
|
600
|
+
|
|
597
601
|
Args:
|
|
598
602
|
commits: List of commit dictionaries
|
|
599
603
|
prompt: Classification prompt
|
|
600
|
-
|
|
604
|
+
|
|
601
605
|
Returns:
|
|
602
606
|
List of QualitativeCommitData or fallback results
|
|
603
607
|
"""
|
|
@@ -608,46 +612,46 @@ Respond with JSON array only:
|
|
|
608
612
|
except Exception as e:
|
|
609
613
|
self.logger.error(f"Fallback model also failed: {e}")
|
|
610
614
|
return self._create_fallback_results(commits)
|
|
611
|
-
|
|
612
|
-
def _create_fallback_results(
|
|
615
|
+
|
|
616
|
+
def _create_fallback_results(
|
|
617
|
+
self, commits: list[dict[str, Any]]
|
|
618
|
+
) -> list[QualitativeCommitData]:
|
|
613
619
|
"""Create fallback results when LLM processing fails.
|
|
614
|
-
|
|
620
|
+
|
|
615
621
|
Args:
|
|
616
622
|
commits: List of commit dictionaries
|
|
617
|
-
|
|
623
|
+
|
|
618
624
|
Returns:
|
|
619
625
|
List of QualitativeCommitData with default values
|
|
620
626
|
"""
|
|
621
627
|
results = []
|
|
622
|
-
|
|
628
|
+
|
|
623
629
|
for commit in commits:
|
|
624
630
|
result = QualitativeCommitData(
|
|
625
631
|
# Basic commit info
|
|
626
|
-
hash=commit.get(
|
|
627
|
-
message=commit.get(
|
|
628
|
-
author_name=commit.get(
|
|
629
|
-
author_email=commit.get(
|
|
630
|
-
timestamp=commit.get(
|
|
631
|
-
files_changed=commit.get(
|
|
632
|
-
insertions=commit.get(
|
|
633
|
-
deletions=commit.get(
|
|
634
|
-
|
|
632
|
+
hash=commit.get("hash", ""),
|
|
633
|
+
message=commit.get("message", ""),
|
|
634
|
+
author_name=commit.get("author_name", ""),
|
|
635
|
+
author_email=commit.get("author_email", ""),
|
|
636
|
+
timestamp=commit.get("timestamp", time.time()),
|
|
637
|
+
files_changed=commit.get("files_changed", []),
|
|
638
|
+
insertions=commit.get("insertions", 0),
|
|
639
|
+
deletions=commit.get("deletions", 0),
|
|
635
640
|
# Default classifications
|
|
636
|
-
change_type=
|
|
641
|
+
change_type="unknown",
|
|
637
642
|
change_type_confidence=0.0,
|
|
638
|
-
business_domain=
|
|
643
|
+
business_domain="unknown",
|
|
639
644
|
domain_confidence=0.0,
|
|
640
|
-
risk_level=
|
|
641
|
-
risk_factors=[
|
|
642
|
-
intent_signals={
|
|
645
|
+
risk_level="medium",
|
|
646
|
+
risk_factors=["llm_processing_failed"],
|
|
647
|
+
intent_signals={"confidence": 0.0},
|
|
643
648
|
collaboration_patterns={},
|
|
644
|
-
technical_context={
|
|
645
|
-
|
|
649
|
+
technical_context={"processing_method": "fallback"},
|
|
646
650
|
# Processing metadata
|
|
647
|
-
processing_method=
|
|
651
|
+
processing_method="llm",
|
|
648
652
|
processing_time_ms=0.0,
|
|
649
|
-
confidence_score=0.0
|
|
653
|
+
confidence_score=0.0,
|
|
650
654
|
)
|
|
651
655
|
results.append(result)
|
|
652
|
-
|
|
653
|
-
return results
|
|
656
|
+
|
|
657
|
+
return results
|