gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,657 @@
1
+ """LLM fallback system for uncertain commit classifications using OpenRouter."""
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Any, Optional
10
+
11
+ from ..models.schemas import LLMConfig, QualitativeCommitData
12
+ from ..utils.cost_tracker import CostTracker
13
+ from ..utils.text_processing import TextProcessor
14
+
15
+ try:
16
+ import openai
17
+ import tiktoken
18
+
19
+ OPENAI_AVAILABLE = True
20
+ except ImportError:
21
+ OPENAI_AVAILABLE = False
22
+
23
+ # Create mock objects for type hints when not available
24
+ class MockOpenAI:
25
+ class OpenAI:
26
+ pass
27
+
28
+ openai = MockOpenAI()
29
+ tiktoken = None
30
+
31
+
32
+ class ModelRouter:
33
+ """Smart model selection based on complexity and cost constraints."""
34
+
35
+ def __init__(self, config: LLMConfig, cost_tracker: CostTracker):
36
+ """Initialize model router.
37
+
38
+ Args:
39
+ config: LLM configuration
40
+ cost_tracker: Cost tracking instance
41
+ """
42
+ self.config = config
43
+ self.cost_tracker = cost_tracker
44
+ self.logger = logging.getLogger(__name__)
45
+
46
+ def select_model(self, complexity_score: float, batch_size: int) -> str:
47
+ """Select appropriate model based on complexity and budget.
48
+
49
+ Args:
50
+ complexity_score: Complexity score (0.0 to 1.0)
51
+ batch_size: Number of commits in batch
52
+
53
+ Returns:
54
+ Selected model name
55
+ """
56
+ # Check daily budget remaining
57
+ remaining_budget = self.cost_tracker.check_budget_remaining()
58
+
59
+ # If we're over budget, use free model
60
+ if remaining_budget <= 0:
61
+ self.logger.warning("Daily budget exceeded, using free model")
62
+ return self.config.fallback_model
63
+
64
+ # For simple cases or when budget is tight, use free model
65
+ if complexity_score < 0.3 or remaining_budget < 0.50:
66
+ return self.config.fallback_model
67
+
68
+ # For complex cases with sufficient budget, use premium model
69
+ if complexity_score > self.config.complexity_threshold and remaining_budget > 2.0:
70
+ return self.config.complex_model
71
+
72
+ # Default to primary model (Claude Haiku - fast and cheap)
73
+ return self.config.primary_model
74
+
75
+
76
+ class LLMFallback:
77
+ """Strategic LLM usage for uncertain cases via OpenRouter.
78
+
79
+ This class provides intelligent fallback to LLM processing when NLP
80
+ classification confidence is below the threshold. It uses OpenRouter
81
+ to access multiple models cost-effectively.
82
+ """
83
+
84
+ def __init__(self, config: LLMConfig, cache_dir: Optional[Path] = None):
85
+ """Initialize LLM fallback system.
86
+
87
+ Args:
88
+ config: LLM configuration
89
+ cache_dir: Cache directory for cost tracking (defaults to config cache_dir)
90
+
91
+ Raises:
92
+ ImportError: If OpenAI library is not available
93
+ """
94
+ if not OPENAI_AVAILABLE:
95
+ raise ImportError(
96
+ "OpenAI library required for LLM fallback. Install with: pip install openai"
97
+ )
98
+
99
+ self.config = config
100
+ self.logger = logging.getLogger(__name__)
101
+
102
+ # Initialize OpenRouter client
103
+ self.client = self._initialize_openrouter_client()
104
+
105
+ # Initialize utilities with proper cache directory
106
+ cost_cache_dir = cache_dir / ".qualitative_cache" if cache_dir else None
107
+ self.cost_tracker = CostTracker(
108
+ cache_dir=cost_cache_dir, daily_budget=config.max_daily_cost
109
+ )
110
+ self.model_router = ModelRouter(config, self.cost_tracker)
111
+ self.text_processor = TextProcessor()
112
+
113
+ # Batch processing cache
114
+ self.batch_cache = {}
115
+
116
+ # Token encoder for cost estimation
117
+ try:
118
+ self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
119
+ except Exception:
120
+ self.encoding = None
121
+ self.logger.warning(
122
+ "Could not load tiktoken encoder, token estimation may be inaccurate"
123
+ )
124
+
125
+ self.logger.info("LLM fallback system initialized with OpenRouter")
126
+
127
+ def _initialize_openrouter_client(self) -> openai.OpenAI:
128
+ """Initialize OpenRouter client with API key.
129
+
130
+ Returns:
131
+ Configured OpenAI client for OpenRouter
132
+
133
+ Raises:
134
+ ValueError: If API key is not configured
135
+ """
136
+ api_key = self._resolve_api_key()
137
+ if not api_key:
138
+ raise ValueError(
139
+ "OpenRouter API key not configured. Set OPENROUTER_API_KEY environment variable."
140
+ )
141
+
142
+ return openai.OpenAI(
143
+ base_url=self.config.base_url,
144
+ api_key=api_key,
145
+ default_headers={
146
+ "HTTP-Referer": "https://github.com/bobmatnyc/gitflow-analytics",
147
+ "X-Title": "GitFlow Analytics - Qualitative Analysis",
148
+ },
149
+ )
150
+
151
+ def _resolve_api_key(self) -> Optional[str]:
152
+ """Resolve OpenRouter API key from config or environment.
153
+
154
+ Returns:
155
+ API key string or None if not found
156
+ """
157
+ api_key = self.config.openrouter_api_key
158
+
159
+ if api_key.startswith("${") and api_key.endswith("}"):
160
+ env_var = api_key[2:-1]
161
+ return os.environ.get(env_var)
162
+ else:
163
+ return api_key
164
+
165
+ def group_similar_commits(self, commits: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
166
+ """Group similar commits for efficient batch processing.
167
+
168
+ Args:
169
+ commits: List of commit dictionaries
170
+
171
+ Returns:
172
+ List of commit groups
173
+ """
174
+ if not commits:
175
+ return []
176
+
177
+ groups = []
178
+ similarity_threshold = self.config.similarity_threshold
179
+
180
+ for commit in commits:
181
+ # Find similar group or create new one
182
+ placed = False
183
+
184
+ for group in groups:
185
+ if len(group) >= self.config.max_group_size:
186
+ continue # Group is full
187
+
188
+ # Calculate similarity with first commit in group
189
+ similarity = self.text_processor.calculate_message_similarity(
190
+ commit.get("message", ""), group[0].get("message", "")
191
+ )
192
+
193
+ if similarity > similarity_threshold:
194
+ group.append(commit)
195
+ placed = True
196
+ break
197
+
198
+ if not placed:
199
+ groups.append([commit])
200
+
201
+ self.logger.debug(f"Grouped {len(commits)} commits into {len(groups)} groups")
202
+ return groups
203
+
204
+ def process_group(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
205
+ """Process a group of similar commits with OpenRouter.
206
+
207
+ Args:
208
+ commits: List of similar commit dictionaries
209
+
210
+ Returns:
211
+ List of QualitativeCommitData with LLM analysis
212
+ """
213
+ if not commits:
214
+ return []
215
+
216
+ start_time = time.time()
217
+
218
+ # Check cache first
219
+ cache_key = self._generate_group_cache_key(commits)
220
+ if cache_key in self.batch_cache:
221
+ self.logger.debug(f"Using cached result for {len(commits)} commits")
222
+ template_result = self.batch_cache[cache_key]
223
+ return self._apply_template_to_group(template_result, commits)
224
+
225
+ # Assess complexity and select model
226
+ complexity_score = self._assess_complexity(commits)
227
+ selected_model = self.model_router.select_model(complexity_score, len(commits))
228
+
229
+ self.logger.debug(
230
+ f"Processing {len(commits)} commits with {selected_model} "
231
+ f"(complexity: {complexity_score:.2f})"
232
+ )
233
+
234
+ # Build optimized prompt
235
+ prompt = self._build_batch_classification_prompt(commits)
236
+
237
+ # Estimate tokens and cost
238
+ estimated_input_tokens = self._estimate_tokens(prompt)
239
+ if not self.cost_tracker.can_afford_call(selected_model, estimated_input_tokens * 2):
240
+ self.logger.warning("Cannot afford LLM call, using fallback model")
241
+ selected_model = self.config.fallback_model
242
+
243
+ # Make OpenRouter API call
244
+ try:
245
+ response = self._call_openrouter(prompt, selected_model)
246
+ processing_time = time.time() - start_time
247
+
248
+ # Parse response
249
+ results = self._parse_llm_response(response, commits)
250
+
251
+ # Track costs and performance
252
+ estimated_output_tokens = self._estimate_tokens(response)
253
+ self.cost_tracker.record_call(
254
+ model=selected_model,
255
+ input_tokens=estimated_input_tokens,
256
+ output_tokens=estimated_output_tokens,
257
+ processing_time=processing_time,
258
+ batch_size=len(commits),
259
+ success=len(results) > 0,
260
+ )
261
+
262
+ # Cache successful result
263
+ if results:
264
+ self.batch_cache[cache_key] = self._create_template_from_results(results)
265
+
266
+ # Update processing time in results
267
+ for result in results:
268
+ result.processing_time_ms = (processing_time * 1000) / len(results)
269
+
270
+ return results
271
+
272
+ except Exception as e:
273
+ self.logger.error(f"OpenRouter processing failed: {e}")
274
+
275
+ # Record failed call
276
+ self.cost_tracker.record_call(
277
+ model=selected_model,
278
+ input_tokens=estimated_input_tokens,
279
+ output_tokens=0,
280
+ processing_time=time.time() - start_time,
281
+ batch_size=len(commits),
282
+ success=False,
283
+ error_message=str(e),
284
+ )
285
+
286
+ # Try fallback model if primary failed
287
+ if selected_model != self.config.fallback_model:
288
+ return self._retry_with_fallback_model(commits, prompt)
289
+ else:
290
+ return self._create_fallback_results(commits)
291
+
292
+ def _call_openrouter(self, prompt: str, model: str) -> str:
293
+ """Make API call to OpenRouter with selected model.
294
+
295
+ Args:
296
+ prompt: Classification prompt
297
+ model: Model to use
298
+
299
+ Returns:
300
+ Response content
301
+
302
+ Raises:
303
+ Exception: If API call fails
304
+ """
305
+ try:
306
+ response = self.client.chat.completions.create(
307
+ model=model,
308
+ messages=[
309
+ {
310
+ "role": "system",
311
+ "content": "You are an expert Git commit classifier. Analyze commits and respond only with valid JSON. Be concise but accurate.",
312
+ },
313
+ {"role": "user", "content": prompt},
314
+ ],
315
+ max_tokens=self.config.max_tokens,
316
+ temperature=self.config.temperature,
317
+ stream=False,
318
+ )
319
+
320
+ return response.choices[0].message.content
321
+
322
+ except Exception as e:
323
+ self.logger.error(f"OpenRouter API call failed: {e}")
324
+ raise
325
+
326
+ def _build_batch_classification_prompt(self, commits: list[dict[str, Any]]) -> str:
327
+ """Build optimized prompt for OpenRouter batch processing.
328
+
329
+ Args:
330
+ commits: List of commit dictionaries
331
+
332
+ Returns:
333
+ Formatted prompt string
334
+ """
335
+ # Limit to max group size for token management
336
+ commits_to_process = commits[: self.config.max_group_size]
337
+
338
+ commit_data = []
339
+ for i, commit in enumerate(commits_to_process, 1):
340
+ message = commit.get("message", "")[:150] # Truncate long messages
341
+ files = commit.get("files_changed", [])
342
+
343
+ # Include key file context
344
+ files_context = ""
345
+ if files:
346
+ key_files = files[:5] # Top 5 files
347
+ files_context = f" | Modified: {', '.join(key_files)}"
348
+
349
+ # Add size context
350
+ insertions = commit.get("insertions", 0)
351
+ deletions = commit.get("deletions", 0)
352
+ size_context = f" | +{insertions}/-{deletions}"
353
+
354
+ commit_data.append(f"{i}. {message}{files_context}{size_context}")
355
+
356
+ prompt = f"""Analyze these Git commits and classify each one. Consider the commit message, modified files, and change size.
357
+
358
+ Commits to classify:
359
+ {chr(10).join(commit_data)}
360
+
361
+ For each commit, provide:
362
+ - change_type: feature|bugfix|refactor|docs|test|chore|security|hotfix|config
363
+ - business_domain: frontend|backend|database|infrastructure|mobile|devops|unknown
364
+ - risk_level: low|medium|high|critical
365
+ - confidence: 0.0-1.0 (classification certainty)
366
+ - urgency: routine|important|urgent|critical
367
+ - complexity: simple|moderate|complex
368
+
369
+ Respond with JSON array only:
370
+ [{{"id": 1, "change_type": "feature", "business_domain": "frontend", "risk_level": "low", "confidence": 0.9, "urgency": "routine", "complexity": "moderate"}}]"""
371
+
372
+ return prompt
373
+
374
+ def _parse_llm_response(
375
+ self, response: str, commits: list[dict[str, Any]]
376
+ ) -> list[QualitativeCommitData]:
377
+ """Parse LLM response into QualitativeCommitData objects.
378
+
379
+ Args:
380
+ response: JSON response from LLM
381
+ commits: Original commit dictionaries
382
+
383
+ Returns:
384
+ List of QualitativeCommitData objects
385
+ """
386
+ try:
387
+ # Clean response (remove any markdown formatting)
388
+ cleaned_response = response.strip()
389
+ if cleaned_response.startswith("```json"):
390
+ cleaned_response = cleaned_response[7:]
391
+ if cleaned_response.endswith("```"):
392
+ cleaned_response = cleaned_response[:-3]
393
+ cleaned_response = cleaned_response.strip()
394
+
395
+ classifications = json.loads(cleaned_response)
396
+
397
+ if not isinstance(classifications, list):
398
+ raise ValueError("Response is not a JSON array")
399
+
400
+ results = []
401
+
402
+ for i, commit in enumerate(commits):
403
+ if i < len(classifications):
404
+ classification = classifications[i]
405
+ else:
406
+ # Fallback if fewer classifications than commits
407
+ classification = {
408
+ "change_type": "unknown",
409
+ "business_domain": "unknown",
410
+ "risk_level": "medium",
411
+ "confidence": 0.5,
412
+ "urgency": "routine",
413
+ "complexity": "moderate",
414
+ }
415
+
416
+ result = QualitativeCommitData(
417
+ # Copy existing commit fields
418
+ hash=commit.get("hash", ""),
419
+ message=commit.get("message", ""),
420
+ author_name=commit.get("author_name", ""),
421
+ author_email=commit.get("author_email", ""),
422
+ timestamp=commit.get("timestamp", time.time()),
423
+ files_changed=commit.get("files_changed", []),
424
+ insertions=commit.get("insertions", 0),
425
+ deletions=commit.get("deletions", 0),
426
+ # LLM-provided classifications
427
+ change_type=classification.get("change_type", "unknown"),
428
+ change_type_confidence=classification.get("confidence", 0.5),
429
+ business_domain=classification.get("business_domain", "unknown"),
430
+ domain_confidence=classification.get("confidence", 0.5),
431
+ risk_level=classification.get("risk_level", "medium"),
432
+ risk_factors=classification.get("risk_factors", []),
433
+ # Intent signals from LLM analysis
434
+ intent_signals={
435
+ "urgency": classification.get("urgency", "routine"),
436
+ "complexity": classification.get("complexity", "moderate"),
437
+ "confidence": classification.get("confidence", 0.5),
438
+ "signals": [
439
+ f"llm_classified:{classification.get('change_type', 'unknown')}"
440
+ ],
441
+ },
442
+ collaboration_patterns={},
443
+ technical_context={"llm_model": "openrouter", "processing_method": "batch"},
444
+ # Processing metadata
445
+ processing_method="llm",
446
+ processing_time_ms=0, # Set by caller
447
+ confidence_score=classification.get("confidence", 0.5),
448
+ )
449
+ results.append(result)
450
+
451
+ return results
452
+
453
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
454
+ self.logger.error(f"Failed to parse LLM response: {e}")
455
+ self.logger.debug(f"Raw response: {response}")
456
+ return self._create_fallback_results(commits)
457
+
458
+ def _assess_complexity(self, commits: list[dict[str, Any]]) -> float:
459
+ """Assess complexity of commits for model selection.
460
+
461
+ Args:
462
+ commits: List of commit dictionaries
463
+
464
+ Returns:
465
+ Complexity score (0.0 to 1.0)
466
+ """
467
+ if not commits:
468
+ return 0.0
469
+
470
+ total_complexity = 0.0
471
+
472
+ for commit in commits:
473
+ # Message complexity
474
+ message = commit.get("message", "")
475
+ message_complexity = min(1.0, len(message.split()) / 20.0)
476
+
477
+ # File change complexity
478
+ files_changed = len(commit.get("files_changed", []))
479
+ file_complexity = min(1.0, files_changed / 15.0)
480
+
481
+ # Size complexity
482
+ total_changes = commit.get("insertions", 0) + commit.get("deletions", 0)
483
+ size_complexity = min(1.0, total_changes / 200.0)
484
+
485
+ # Combine complexities
486
+ commit_complexity = (
487
+ message_complexity * 0.3 + file_complexity * 0.4 + size_complexity * 0.3
488
+ )
489
+ total_complexity += commit_complexity
490
+
491
+ return total_complexity / len(commits)
492
+
493
+ def _estimate_tokens(self, text: str) -> int:
494
+ """Estimate token count for text.
495
+
496
+ Args:
497
+ text: Text to count tokens for
498
+
499
+ Returns:
500
+ Estimated token count
501
+ """
502
+ if self.encoding:
503
+ try:
504
+ return len(self.encoding.encode(text))
505
+ except Exception:
506
+ pass
507
+
508
+ # Fallback estimation (roughly 4 characters per token)
509
+ return len(text) // 4
510
+
511
+ def _generate_group_cache_key(self, commits: list[dict[str, Any]]) -> str:
512
+ """Generate cache key for a group of commits.
513
+
514
+ Args:
515
+ commits: List of commit dictionaries
516
+
517
+ Returns:
518
+ Cache key string
519
+ """
520
+ # Create fingerprint from commit messages and file patterns
521
+ fingerprints = []
522
+ for commit in commits:
523
+ message = commit.get("message", "")
524
+ files = commit.get("files_changed", [])
525
+ fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
526
+ fingerprints.append(fingerprint)
527
+
528
+ combined_fingerprint = "|".join(sorted(fingerprints))
529
+ return hashlib.md5(combined_fingerprint.encode()).hexdigest()
530
+
531
+ def _create_template_from_results(self, results: list[QualitativeCommitData]) -> dict[str, Any]:
532
+ """Create a template from successful results for caching.
533
+
534
+ Args:
535
+ results: List of analysis results
536
+
537
+ Returns:
538
+ Template dictionary
539
+ """
540
+ if not results:
541
+ return {}
542
+
543
+ # Use first result as template
544
+ template = results[0]
545
+ return {
546
+ "change_type": template.change_type,
547
+ "business_domain": template.business_domain,
548
+ "risk_level": template.risk_level,
549
+ "confidence_score": template.confidence_score,
550
+ }
551
+
552
+ def _apply_template_to_group(
553
+ self, template: dict[str, Any], commits: list[dict[str, Any]]
554
+ ) -> list[QualitativeCommitData]:
555
+ """Apply cached template to a group of commits.
556
+
557
+ Args:
558
+ template: Cached analysis template
559
+ commits: List of commit dictionaries
560
+
561
+ Returns:
562
+ List of QualitativeCommitData using template
563
+ """
564
+ results = []
565
+
566
+ for commit in commits:
567
+ result = QualitativeCommitData(
568
+ # Copy existing commit fields
569
+ hash=commit.get("hash", ""),
570
+ message=commit.get("message", ""),
571
+ author_name=commit.get("author_name", ""),
572
+ author_email=commit.get("author_email", ""),
573
+ timestamp=commit.get("timestamp", time.time()),
574
+ files_changed=commit.get("files_changed", []),
575
+ insertions=commit.get("insertions", 0),
576
+ deletions=commit.get("deletions", 0),
577
+ # Apply template values
578
+ change_type=template.get("change_type", "unknown"),
579
+ change_type_confidence=template.get("confidence_score", 0.5),
580
+ business_domain=template.get("business_domain", "unknown"),
581
+ domain_confidence=template.get("confidence_score", 0.5),
582
+ risk_level=template.get("risk_level", "medium"),
583
+ risk_factors=[],
584
+ intent_signals={"confidence": template.get("confidence_score", 0.5)},
585
+ collaboration_patterns={},
586
+ technical_context={"processing_method": "cached_template"},
587
+ # Processing metadata
588
+ processing_method="llm",
589
+ processing_time_ms=1.0, # Very fast for cached results
590
+ confidence_score=template.get("confidence_score", 0.5),
591
+ )
592
+ results.append(result)
593
+
594
+ return results
595
+
596
+ def _retry_with_fallback_model(
597
+ self, commits: list[dict[str, Any]], prompt: str
598
+ ) -> list[QualitativeCommitData]:
599
+ """Retry processing with fallback model.
600
+
601
+ Args:
602
+ commits: List of commit dictionaries
603
+ prompt: Classification prompt
604
+
605
+ Returns:
606
+ List of QualitativeCommitData or fallback results
607
+ """
608
+ try:
609
+ self.logger.info(f"Retrying with fallback model: {self.config.fallback_model}")
610
+ response = self._call_openrouter(prompt, self.config.fallback_model)
611
+ return self._parse_llm_response(response, commits)
612
+ except Exception as e:
613
+ self.logger.error(f"Fallback model also failed: {e}")
614
+ return self._create_fallback_results(commits)
615
+
616
+ def _create_fallback_results(
617
+ self, commits: list[dict[str, Any]]
618
+ ) -> list[QualitativeCommitData]:
619
+ """Create fallback results when LLM processing fails.
620
+
621
+ Args:
622
+ commits: List of commit dictionaries
623
+
624
+ Returns:
625
+ List of QualitativeCommitData with default values
626
+ """
627
+ results = []
628
+
629
+ for commit in commits:
630
+ result = QualitativeCommitData(
631
+ # Basic commit info
632
+ hash=commit.get("hash", ""),
633
+ message=commit.get("message", ""),
634
+ author_name=commit.get("author_name", ""),
635
+ author_email=commit.get("author_email", ""),
636
+ timestamp=commit.get("timestamp", time.time()),
637
+ files_changed=commit.get("files_changed", []),
638
+ insertions=commit.get("insertions", 0),
639
+ deletions=commit.get("deletions", 0),
640
+ # Default classifications
641
+ change_type="unknown",
642
+ change_type_confidence=0.0,
643
+ business_domain="unknown",
644
+ domain_confidence=0.0,
645
+ risk_level="medium",
646
+ risk_factors=["llm_processing_failed"],
647
+ intent_signals={"confidence": 0.0},
648
+ collaboration_patterns={},
649
+ technical_context={"processing_method": "fallback"},
650
+ # Processing metadata
651
+ processing_method="llm",
652
+ processing_time_ms=0.0,
653
+ confidence_score=0.0,
654
+ )
655
+ results.append(result)
656
+
657
+ return results