gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,12 @@
1
1
  """LLM fallback system for uncertain commit classifications using OpenRouter."""
2
2
 
3
- import asyncio
3
+ import hashlib
4
4
  import json
5
5
  import logging
6
- import time
7
- import uuid
8
- from typing import Dict, List, Optional, Tuple, Any
9
- import hashlib
10
6
  import os
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Any, Optional
11
10
 
12
11
  from ..models.schemas import LLMConfig, QualitativeCommitData
13
12
  from ..utils.cost_tracker import CostTracker
@@ -16,23 +15,26 @@ from ..utils.text_processing import TextProcessor
16
15
  try:
17
16
  import openai
18
17
  import tiktoken
18
+
19
19
  OPENAI_AVAILABLE = True
20
20
  except ImportError:
21
21
  OPENAI_AVAILABLE = False
22
+
22
23
  # Create mock objects for type hints when not available
23
24
  class MockOpenAI:
24
25
  class OpenAI:
25
26
  pass
27
+
26
28
  openai = MockOpenAI()
27
29
  tiktoken = None
28
30
 
29
31
 
30
32
  class ModelRouter:
31
33
  """Smart model selection based on complexity and cost constraints."""
32
-
34
+
33
35
  def __init__(self, config: LLMConfig, cost_tracker: CostTracker):
34
36
  """Initialize model router.
35
-
37
+
36
38
  Args:
37
39
  config: LLM configuration
38
40
  cost_tracker: Cost tracking instance
@@ -40,51 +42,52 @@ class ModelRouter:
40
42
  self.config = config
41
43
  self.cost_tracker = cost_tracker
42
44
  self.logger = logging.getLogger(__name__)
43
-
45
+
44
46
  def select_model(self, complexity_score: float, batch_size: int) -> str:
45
47
  """Select appropriate model based on complexity and budget.
46
-
48
+
47
49
  Args:
48
50
  complexity_score: Complexity score (0.0 to 1.0)
49
51
  batch_size: Number of commits in batch
50
-
52
+
51
53
  Returns:
52
54
  Selected model name
53
55
  """
54
56
  # Check daily budget remaining
55
57
  remaining_budget = self.cost_tracker.check_budget_remaining()
56
-
58
+
57
59
  # If we're over budget, use free model
58
60
  if remaining_budget <= 0:
59
61
  self.logger.warning("Daily budget exceeded, using free model")
60
62
  return self.config.fallback_model
61
-
63
+
62
64
  # For simple cases or when budget is tight, use free model
63
65
  if complexity_score < 0.3 or remaining_budget < 0.50:
64
66
  return self.config.fallback_model
65
-
67
+
66
68
  # For complex cases with sufficient budget, use premium model
67
69
  if complexity_score > self.config.complexity_threshold and remaining_budget > 2.0:
68
70
  return self.config.complex_model
69
-
71
+
70
72
  # Default to primary model (Claude Haiku - fast and cheap)
71
73
  return self.config.primary_model
72
74
 
73
75
 
74
76
  class LLMFallback:
75
77
  """Strategic LLM usage for uncertain cases via OpenRouter.
76
-
78
+
77
79
  This class provides intelligent fallback to LLM processing when NLP
78
80
  classification confidence is below the threshold. It uses OpenRouter
79
81
  to access multiple models cost-effectively.
80
82
  """
81
-
82
- def __init__(self, config: LLMConfig):
83
+
84
+ def __init__(self, config: LLMConfig, cache_dir: Optional[Path] = None):
83
85
  """Initialize LLM fallback system.
84
-
86
+
85
87
  Args:
86
88
  config: LLM configuration
87
-
89
+ cache_dir: Cache directory for cost tracking (defaults to config cache_dir)
90
+
88
91
  Raises:
89
92
  ImportError: If OpenAI library is not available
90
93
  """
@@ -92,36 +95,41 @@ class LLMFallback:
92
95
  raise ImportError(
93
96
  "OpenAI library required for LLM fallback. Install with: pip install openai"
94
97
  )
95
-
98
+
96
99
  self.config = config
97
100
  self.logger = logging.getLogger(__name__)
98
-
101
+
99
102
  # Initialize OpenRouter client
100
103
  self.client = self._initialize_openrouter_client()
101
-
102
- # Initialize utilities
103
- self.cost_tracker = CostTracker(daily_budget=config.max_daily_cost)
104
+
105
+ # Initialize utilities with proper cache directory
106
+ cost_cache_dir = cache_dir / ".qualitative_cache" if cache_dir else None
107
+ self.cost_tracker = CostTracker(
108
+ cache_dir=cost_cache_dir, daily_budget=config.max_daily_cost
109
+ )
104
110
  self.model_router = ModelRouter(config, self.cost_tracker)
105
111
  self.text_processor = TextProcessor()
106
-
112
+
107
113
  # Batch processing cache
108
114
  self.batch_cache = {}
109
-
115
+
110
116
  # Token encoder for cost estimation
111
117
  try:
112
118
  self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
113
119
  except Exception:
114
120
  self.encoding = None
115
- self.logger.warning("Could not load tiktoken encoder, token estimation may be inaccurate")
116
-
121
+ self.logger.warning(
122
+ "Could not load tiktoken encoder, token estimation may be inaccurate"
123
+ )
124
+
117
125
  self.logger.info("LLM fallback system initialized with OpenRouter")
118
-
126
+
119
127
  def _initialize_openrouter_client(self) -> openai.OpenAI:
120
128
  """Initialize OpenRouter client with API key.
121
-
129
+
122
130
  Returns:
123
131
  Configured OpenAI client for OpenRouter
124
-
132
+
125
133
  Raises:
126
134
  ValueError: If API key is not configured
127
135
  """
@@ -130,117 +138,116 @@ class LLMFallback:
130
138
  raise ValueError(
131
139
  "OpenRouter API key not configured. Set OPENROUTER_API_KEY environment variable."
132
140
  )
133
-
141
+
134
142
  return openai.OpenAI(
135
143
  base_url=self.config.base_url,
136
144
  api_key=api_key,
137
145
  default_headers={
138
146
  "HTTP-Referer": "https://github.com/bobmatnyc/gitflow-analytics",
139
- "X-Title": "GitFlow Analytics - Qualitative Analysis"
140
- }
147
+ "X-Title": "GitFlow Analytics - Qualitative Analysis",
148
+ },
141
149
  )
142
-
150
+
143
151
  def _resolve_api_key(self) -> Optional[str]:
144
152
  """Resolve OpenRouter API key from config or environment.
145
-
153
+
146
154
  Returns:
147
155
  API key string or None if not found
148
156
  """
149
157
  api_key = self.config.openrouter_api_key
150
-
158
+
151
159
  if api_key.startswith("${") and api_key.endswith("}"):
152
160
  env_var = api_key[2:-1]
153
161
  return os.environ.get(env_var)
154
162
  else:
155
163
  return api_key
156
-
157
- def group_similar_commits(self, commits: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
164
+
165
+ def group_similar_commits(self, commits: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
158
166
  """Group similar commits for efficient batch processing.
159
-
167
+
160
168
  Args:
161
169
  commits: List of commit dictionaries
162
-
170
+
163
171
  Returns:
164
172
  List of commit groups
165
173
  """
166
174
  if not commits:
167
175
  return []
168
-
176
+
169
177
  groups = []
170
178
  similarity_threshold = self.config.similarity_threshold
171
-
179
+
172
180
  for commit in commits:
173
181
  # Find similar group or create new one
174
182
  placed = False
175
-
183
+
176
184
  for group in groups:
177
185
  if len(group) >= self.config.max_group_size:
178
186
  continue # Group is full
179
-
187
+
180
188
  # Calculate similarity with first commit in group
181
189
  similarity = self.text_processor.calculate_message_similarity(
182
- commit.get('message', ''),
183
- group[0].get('message', '')
190
+ commit.get("message", ""), group[0].get("message", "")
184
191
  )
185
-
192
+
186
193
  if similarity > similarity_threshold:
187
194
  group.append(commit)
188
195
  placed = True
189
196
  break
190
-
197
+
191
198
  if not placed:
192
199
  groups.append([commit])
193
-
200
+
194
201
  self.logger.debug(f"Grouped {len(commits)} commits into {len(groups)} groups")
195
202
  return groups
196
-
197
- def process_group(self, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
203
+
204
+ def process_group(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
198
205
  """Process a group of similar commits with OpenRouter.
199
-
206
+
200
207
  Args:
201
208
  commits: List of similar commit dictionaries
202
-
209
+
203
210
  Returns:
204
211
  List of QualitativeCommitData with LLM analysis
205
212
  """
206
213
  if not commits:
207
214
  return []
208
-
215
+
209
216
  start_time = time.time()
210
-
217
+
211
218
  # Check cache first
212
219
  cache_key = self._generate_group_cache_key(commits)
213
220
  if cache_key in self.batch_cache:
214
221
  self.logger.debug(f"Using cached result for {len(commits)} commits")
215
222
  template_result = self.batch_cache[cache_key]
216
223
  return self._apply_template_to_group(template_result, commits)
217
-
224
+
218
225
  # Assess complexity and select model
219
226
  complexity_score = self._assess_complexity(commits)
220
227
  selected_model = self.model_router.select_model(complexity_score, len(commits))
221
-
228
+
222
229
  self.logger.debug(
223
230
  f"Processing {len(commits)} commits with {selected_model} "
224
231
  f"(complexity: {complexity_score:.2f})"
225
232
  )
226
-
233
+
227
234
  # Build optimized prompt
228
235
  prompt = self._build_batch_classification_prompt(commits)
229
-
236
+
230
237
  # Estimate tokens and cost
231
238
  estimated_input_tokens = self._estimate_tokens(prompt)
232
239
  if not self.cost_tracker.can_afford_call(selected_model, estimated_input_tokens * 2):
233
240
  self.logger.warning("Cannot afford LLM call, using fallback model")
234
241
  selected_model = self.config.fallback_model
235
-
242
+
236
243
  # Make OpenRouter API call
237
244
  try:
238
245
  response = self._call_openrouter(prompt, selected_model)
239
246
  processing_time = time.time() - start_time
240
-
247
+
241
248
  # Parse response
242
249
  results = self._parse_llm_response(response, commits)
243
-
250
+
244
251
  # Track costs and performance
245
252
  estimated_output_tokens = self._estimate_tokens(response)
246
253
  self.cost_tracker.record_call(
@@ -249,22 +256,22 @@ class LLMFallback:
249
256
  output_tokens=estimated_output_tokens,
250
257
  processing_time=processing_time,
251
258
  batch_size=len(commits),
252
- success=len(results) > 0
259
+ success=len(results) > 0,
253
260
  )
254
-
261
+
255
262
  # Cache successful result
256
263
  if results:
257
264
  self.batch_cache[cache_key] = self._create_template_from_results(results)
258
-
265
+
259
266
  # Update processing time in results
260
267
  for result in results:
261
268
  result.processing_time_ms = (processing_time * 1000) / len(results)
262
-
269
+
263
270
  return results
264
-
271
+
265
272
  except Exception as e:
266
273
  self.logger.error(f"OpenRouter processing failed: {e}")
267
-
274
+
268
275
  # Record failed call
269
276
  self.cost_tracker.record_call(
270
277
  model=selected_model,
@@ -273,25 +280,25 @@ class LLMFallback:
273
280
  processing_time=time.time() - start_time,
274
281
  batch_size=len(commits),
275
282
  success=False,
276
- error_message=str(e)
283
+ error_message=str(e),
277
284
  )
278
-
285
+
279
286
  # Try fallback model if primary failed
280
287
  if selected_model != self.config.fallback_model:
281
288
  return self._retry_with_fallback_model(commits, prompt)
282
289
  else:
283
290
  return self._create_fallback_results(commits)
284
-
291
+
285
292
  def _call_openrouter(self, prompt: str, model: str) -> str:
286
293
  """Make API call to OpenRouter with selected model.
287
-
294
+
288
295
  Args:
289
296
  prompt: Classification prompt
290
297
  model: Model to use
291
-
298
+
292
299
  Returns:
293
300
  Response content
294
-
301
+
295
302
  Raises:
296
303
  Exception: If API call fails
297
304
  """
@@ -300,52 +307,52 @@ class LLMFallback:
300
307
  model=model,
301
308
  messages=[
302
309
  {
303
- "role": "system",
304
- "content": "You are an expert Git commit classifier. Analyze commits and respond only with valid JSON. Be concise but accurate."
310
+ "role": "system",
311
+ "content": "You are an expert Git commit classifier. Analyze commits and respond only with valid JSON. Be concise but accurate.",
305
312
  },
306
- {"role": "user", "content": prompt}
313
+ {"role": "user", "content": prompt},
307
314
  ],
308
315
  max_tokens=self.config.max_tokens,
309
316
  temperature=self.config.temperature,
310
- stream=False
317
+ stream=False,
311
318
  )
312
-
319
+
313
320
  return response.choices[0].message.content
314
-
321
+
315
322
  except Exception as e:
316
323
  self.logger.error(f"OpenRouter API call failed: {e}")
317
324
  raise
318
-
319
- def _build_batch_classification_prompt(self, commits: List[Dict[str, Any]]) -> str:
325
+
326
+ def _build_batch_classification_prompt(self, commits: list[dict[str, Any]]) -> str:
320
327
  """Build optimized prompt for OpenRouter batch processing.
321
-
328
+
322
329
  Args:
323
330
  commits: List of commit dictionaries
324
-
331
+
325
332
  Returns:
326
333
  Formatted prompt string
327
334
  """
328
335
  # Limit to max group size for token management
329
- commits_to_process = commits[:self.config.max_group_size]
330
-
336
+ commits_to_process = commits[: self.config.max_group_size]
337
+
331
338
  commit_data = []
332
339
  for i, commit in enumerate(commits_to_process, 1):
333
- message = commit.get('message', '')[:150] # Truncate long messages
334
- files = commit.get('files_changed', [])
335
-
340
+ message = commit.get("message", "")[:150] # Truncate long messages
341
+ files = commit.get("files_changed", [])
342
+
336
343
  # Include key file context
337
344
  files_context = ""
338
345
  if files:
339
346
  key_files = files[:5] # Top 5 files
340
347
  files_context = f" | Modified: {', '.join(key_files)}"
341
-
348
+
342
349
  # Add size context
343
- insertions = commit.get('insertions', 0)
344
- deletions = commit.get('deletions', 0)
350
+ insertions = commit.get("insertions", 0)
351
+ deletions = commit.get("deletions", 0)
345
352
  size_context = f" | +{insertions}/-{deletions}"
346
-
353
+
347
354
  commit_data.append(f"{i}. {message}{files_context}{size_context}")
348
-
355
+
349
356
  prompt = f"""Analyze these Git commits and classify each one. Consider the commit message, modified files, and change size.
350
357
 
351
358
  Commits to classify:
@@ -363,134 +370,132 @@ Respond with JSON array only:
363
370
  [{{"id": 1, "change_type": "feature", "business_domain": "frontend", "risk_level": "low", "confidence": 0.9, "urgency": "routine", "complexity": "moderate"}}]"""
364
371
 
365
372
  return prompt
366
-
367
- def _parse_llm_response(self, response: str, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
373
+
374
+ def _parse_llm_response(
375
+ self, response: str, commits: list[dict[str, Any]]
376
+ ) -> list[QualitativeCommitData]:
368
377
  """Parse LLM response into QualitativeCommitData objects.
369
-
378
+
370
379
  Args:
371
380
  response: JSON response from LLM
372
381
  commits: Original commit dictionaries
373
-
382
+
374
383
  Returns:
375
384
  List of QualitativeCommitData objects
376
385
  """
377
386
  try:
378
387
  # Clean response (remove any markdown formatting)
379
388
  cleaned_response = response.strip()
380
- if cleaned_response.startswith('```json'):
389
+ if cleaned_response.startswith("```json"):
381
390
  cleaned_response = cleaned_response[7:]
382
- if cleaned_response.endswith('```'):
391
+ if cleaned_response.endswith("```"):
383
392
  cleaned_response = cleaned_response[:-3]
384
393
  cleaned_response = cleaned_response.strip()
385
-
394
+
386
395
  classifications = json.loads(cleaned_response)
387
-
396
+
388
397
  if not isinstance(classifications, list):
389
398
  raise ValueError("Response is not a JSON array")
390
-
399
+
391
400
  results = []
392
-
401
+
393
402
  for i, commit in enumerate(commits):
394
403
  if i < len(classifications):
395
404
  classification = classifications[i]
396
405
  else:
397
406
  # Fallback if fewer classifications than commits
398
407
  classification = {
399
- 'change_type': 'unknown',
400
- 'business_domain': 'unknown',
401
- 'risk_level': 'medium',
402
- 'confidence': 0.5,
403
- 'urgency': 'routine',
404
- 'complexity': 'moderate'
408
+ "change_type": "unknown",
409
+ "business_domain": "unknown",
410
+ "risk_level": "medium",
411
+ "confidence": 0.5,
412
+ "urgency": "routine",
413
+ "complexity": "moderate",
405
414
  }
406
-
415
+
407
416
  result = QualitativeCommitData(
408
417
  # Copy existing commit fields
409
- hash=commit.get('hash', ''),
410
- message=commit.get('message', ''),
411
- author_name=commit.get('author_name', ''),
412
- author_email=commit.get('author_email', ''),
413
- timestamp=commit.get('timestamp', time.time()),
414
- files_changed=commit.get('files_changed', []),
415
- insertions=commit.get('insertions', 0),
416
- deletions=commit.get('deletions', 0),
417
-
418
+ hash=commit.get("hash", ""),
419
+ message=commit.get("message", ""),
420
+ author_name=commit.get("author_name", ""),
421
+ author_email=commit.get("author_email", ""),
422
+ timestamp=commit.get("timestamp", time.time()),
423
+ files_changed=commit.get("files_changed", []),
424
+ insertions=commit.get("insertions", 0),
425
+ deletions=commit.get("deletions", 0),
418
426
  # LLM-provided classifications
419
- change_type=classification.get('change_type', 'unknown'),
420
- change_type_confidence=classification.get('confidence', 0.5),
421
- business_domain=classification.get('business_domain', 'unknown'),
422
- domain_confidence=classification.get('confidence', 0.5),
423
- risk_level=classification.get('risk_level', 'medium'),
424
- risk_factors=classification.get('risk_factors', []),
425
-
427
+ change_type=classification.get("change_type", "unknown"),
428
+ change_type_confidence=classification.get("confidence", 0.5),
429
+ business_domain=classification.get("business_domain", "unknown"),
430
+ domain_confidence=classification.get("confidence", 0.5),
431
+ risk_level=classification.get("risk_level", "medium"),
432
+ risk_factors=classification.get("risk_factors", []),
426
433
  # Intent signals from LLM analysis
427
434
  intent_signals={
428
- 'urgency': classification.get('urgency', 'routine'),
429
- 'complexity': classification.get('complexity', 'moderate'),
430
- 'confidence': classification.get('confidence', 0.5),
431
- 'signals': [f"llm_classified:{classification.get('change_type', 'unknown')}"]
435
+ "urgency": classification.get("urgency", "routine"),
436
+ "complexity": classification.get("complexity", "moderate"),
437
+ "confidence": classification.get("confidence", 0.5),
438
+ "signals": [
439
+ f"llm_classified:{classification.get('change_type', 'unknown')}"
440
+ ],
432
441
  },
433
442
  collaboration_patterns={},
434
- technical_context={
435
- 'llm_model': 'openrouter',
436
- 'processing_method': 'batch'
437
- },
438
-
443
+ technical_context={"llm_model": "openrouter", "processing_method": "batch"},
439
444
  # Processing metadata
440
- processing_method='llm',
445
+ processing_method="llm",
441
446
  processing_time_ms=0, # Set by caller
442
- confidence_score=classification.get('confidence', 0.5)
447
+ confidence_score=classification.get("confidence", 0.5),
443
448
  )
444
449
  results.append(result)
445
-
450
+
446
451
  return results
447
-
452
+
448
453
  except (json.JSONDecodeError, KeyError, ValueError) as e:
449
454
  self.logger.error(f"Failed to parse LLM response: {e}")
450
455
  self.logger.debug(f"Raw response: {response}")
451
456
  return self._create_fallback_results(commits)
452
-
453
- def _assess_complexity(self, commits: List[Dict[str, Any]]) -> float:
457
+
458
+ def _assess_complexity(self, commits: list[dict[str, Any]]) -> float:
454
459
  """Assess complexity of commits for model selection.
455
-
460
+
456
461
  Args:
457
462
  commits: List of commit dictionaries
458
-
463
+
459
464
  Returns:
460
465
  Complexity score (0.0 to 1.0)
461
466
  """
462
467
  if not commits:
463
468
  return 0.0
464
-
469
+
465
470
  total_complexity = 0.0
466
-
471
+
467
472
  for commit in commits:
468
473
  # Message complexity
469
- message = commit.get('message', '')
474
+ message = commit.get("message", "")
470
475
  message_complexity = min(1.0, len(message.split()) / 20.0)
471
-
476
+
472
477
  # File change complexity
473
- files_changed = len(commit.get('files_changed', []))
478
+ files_changed = len(commit.get("files_changed", []))
474
479
  file_complexity = min(1.0, files_changed / 15.0)
475
-
480
+
476
481
  # Size complexity
477
- total_changes = commit.get('insertions', 0) + commit.get('deletions', 0)
482
+ total_changes = commit.get("insertions", 0) + commit.get("deletions", 0)
478
483
  size_complexity = min(1.0, total_changes / 200.0)
479
-
484
+
480
485
  # Combine complexities
481
- commit_complexity = (message_complexity * 0.3 +
482
- file_complexity * 0.4 +
483
- size_complexity * 0.3)
486
+ commit_complexity = (
487
+ message_complexity * 0.3 + file_complexity * 0.4 + size_complexity * 0.3
488
+ )
484
489
  total_complexity += commit_complexity
485
-
490
+
486
491
  return total_complexity / len(commits)
487
-
492
+
488
493
  def _estimate_tokens(self, text: str) -> int:
489
494
  """Estimate token count for text.
490
-
495
+
491
496
  Args:
492
497
  text: Text to count tokens for
493
-
498
+
494
499
  Returns:
495
500
  Estimated token count
496
501
  """
@@ -499,105 +504,104 @@ Respond with JSON array only:
499
504
  return len(self.encoding.encode(text))
500
505
  except Exception:
501
506
  pass
502
-
507
+
503
508
  # Fallback estimation (roughly 4 characters per token)
504
509
  return len(text) // 4
505
-
506
- def _generate_group_cache_key(self, commits: List[Dict[str, Any]]) -> str:
510
+
511
+ def _generate_group_cache_key(self, commits: list[dict[str, Any]]) -> str:
507
512
  """Generate cache key for a group of commits.
508
-
513
+
509
514
  Args:
510
515
  commits: List of commit dictionaries
511
-
516
+
512
517
  Returns:
513
518
  Cache key string
514
519
  """
515
520
  # Create fingerprint from commit messages and file patterns
516
521
  fingerprints = []
517
522
  for commit in commits:
518
- message = commit.get('message', '')
519
- files = commit.get('files_changed', [])
523
+ message = commit.get("message", "")
524
+ files = commit.get("files_changed", [])
520
525
  fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
521
526
  fingerprints.append(fingerprint)
522
-
523
- combined_fingerprint = '|'.join(sorted(fingerprints))
527
+
528
+ combined_fingerprint = "|".join(sorted(fingerprints))
524
529
  return hashlib.md5(combined_fingerprint.encode()).hexdigest()
525
-
526
- def _create_template_from_results(self, results: List[QualitativeCommitData]) -> Dict[str, Any]:
530
+
531
+ def _create_template_from_results(self, results: list[QualitativeCommitData]) -> dict[str, Any]:
527
532
  """Create a template from successful results for caching.
528
-
533
+
529
534
  Args:
530
535
  results: List of analysis results
531
-
536
+
532
537
  Returns:
533
538
  Template dictionary
534
539
  """
535
540
  if not results:
536
541
  return {}
537
-
542
+
538
543
  # Use first result as template
539
544
  template = results[0]
540
545
  return {
541
- 'change_type': template.change_type,
542
- 'business_domain': template.business_domain,
543
- 'risk_level': template.risk_level,
544
- 'confidence_score': template.confidence_score
546
+ "change_type": template.change_type,
547
+ "business_domain": template.business_domain,
548
+ "risk_level": template.risk_level,
549
+ "confidence_score": template.confidence_score,
545
550
  }
546
-
547
- def _apply_template_to_group(self, template: Dict[str, Any],
548
- commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
551
+
552
+ def _apply_template_to_group(
553
+ self, template: dict[str, Any], commits: list[dict[str, Any]]
554
+ ) -> list[QualitativeCommitData]:
549
555
  """Apply cached template to a group of commits.
550
-
556
+
551
557
  Args:
552
558
  template: Cached analysis template
553
559
  commits: List of commit dictionaries
554
-
560
+
555
561
  Returns:
556
562
  List of QualitativeCommitData using template
557
563
  """
558
564
  results = []
559
-
565
+
560
566
  for commit in commits:
561
567
  result = QualitativeCommitData(
562
568
  # Copy existing commit fields
563
- hash=commit.get('hash', ''),
564
- message=commit.get('message', ''),
565
- author_name=commit.get('author_name', ''),
566
- author_email=commit.get('author_email', ''),
567
- timestamp=commit.get('timestamp', time.time()),
568
- files_changed=commit.get('files_changed', []),
569
- insertions=commit.get('insertions', 0),
570
- deletions=commit.get('deletions', 0),
571
-
569
+ hash=commit.get("hash", ""),
570
+ message=commit.get("message", ""),
571
+ author_name=commit.get("author_name", ""),
572
+ author_email=commit.get("author_email", ""),
573
+ timestamp=commit.get("timestamp", time.time()),
574
+ files_changed=commit.get("files_changed", []),
575
+ insertions=commit.get("insertions", 0),
576
+ deletions=commit.get("deletions", 0),
572
577
  # Apply template values
573
- change_type=template.get('change_type', 'unknown'),
574
- change_type_confidence=template.get('confidence_score', 0.5),
575
- business_domain=template.get('business_domain', 'unknown'),
576
- domain_confidence=template.get('confidence_score', 0.5),
577
- risk_level=template.get('risk_level', 'medium'),
578
+ change_type=template.get("change_type", "unknown"),
579
+ change_type_confidence=template.get("confidence_score", 0.5),
580
+ business_domain=template.get("business_domain", "unknown"),
581
+ domain_confidence=template.get("confidence_score", 0.5),
582
+ risk_level=template.get("risk_level", "medium"),
578
583
  risk_factors=[],
579
-
580
- intent_signals={'confidence': template.get('confidence_score', 0.5)},
584
+ intent_signals={"confidence": template.get("confidence_score", 0.5)},
581
585
  collaboration_patterns={},
582
- technical_context={'processing_method': 'cached_template'},
583
-
586
+ technical_context={"processing_method": "cached_template"},
584
587
  # Processing metadata
585
- processing_method='llm',
588
+ processing_method="llm",
586
589
  processing_time_ms=1.0, # Very fast for cached results
587
- confidence_score=template.get('confidence_score', 0.5)
590
+ confidence_score=template.get("confidence_score", 0.5),
588
591
  )
589
592
  results.append(result)
590
-
593
+
591
594
  return results
592
-
593
- def _retry_with_fallback_model(self, commits: List[Dict[str, Any]],
594
- prompt: str) -> List[QualitativeCommitData]:
595
+
596
+ def _retry_with_fallback_model(
597
+ self, commits: list[dict[str, Any]], prompt: str
598
+ ) -> list[QualitativeCommitData]:
595
599
  """Retry processing with fallback model.
596
-
600
+
597
601
  Args:
598
602
  commits: List of commit dictionaries
599
603
  prompt: Classification prompt
600
-
604
+
601
605
  Returns:
602
606
  List of QualitativeCommitData or fallback results
603
607
  """
@@ -608,46 +612,46 @@ Respond with JSON array only:
608
612
  except Exception as e:
609
613
  self.logger.error(f"Fallback model also failed: {e}")
610
614
  return self._create_fallback_results(commits)
611
-
612
- def _create_fallback_results(self, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
615
+
616
+ def _create_fallback_results(
617
+ self, commits: list[dict[str, Any]]
618
+ ) -> list[QualitativeCommitData]:
613
619
  """Create fallback results when LLM processing fails.
614
-
620
+
615
621
  Args:
616
622
  commits: List of commit dictionaries
617
-
623
+
618
624
  Returns:
619
625
  List of QualitativeCommitData with default values
620
626
  """
621
627
  results = []
622
-
628
+
623
629
  for commit in commits:
624
630
  result = QualitativeCommitData(
625
631
  # Basic commit info
626
- hash=commit.get('hash', ''),
627
- message=commit.get('message', ''),
628
- author_name=commit.get('author_name', ''),
629
- author_email=commit.get('author_email', ''),
630
- timestamp=commit.get('timestamp', time.time()),
631
- files_changed=commit.get('files_changed', []),
632
- insertions=commit.get('insertions', 0),
633
- deletions=commit.get('deletions', 0),
634
-
632
+ hash=commit.get("hash", ""),
633
+ message=commit.get("message", ""),
634
+ author_name=commit.get("author_name", ""),
635
+ author_email=commit.get("author_email", ""),
636
+ timestamp=commit.get("timestamp", time.time()),
637
+ files_changed=commit.get("files_changed", []),
638
+ insertions=commit.get("insertions", 0),
639
+ deletions=commit.get("deletions", 0),
635
640
  # Default classifications
636
- change_type='unknown',
641
+ change_type="unknown",
637
642
  change_type_confidence=0.0,
638
- business_domain='unknown',
643
+ business_domain="unknown",
639
644
  domain_confidence=0.0,
640
- risk_level='medium',
641
- risk_factors=['llm_processing_failed'],
642
- intent_signals={'confidence': 0.0},
645
+ risk_level="medium",
646
+ risk_factors=["llm_processing_failed"],
647
+ intent_signals={"confidence": 0.0},
643
648
  collaboration_patterns={},
644
- technical_context={'processing_method': 'fallback'},
645
-
649
+ technical_context={"processing_method": "fallback"},
646
650
  # Processing metadata
647
- processing_method='llm',
651
+ processing_method="llm",
648
652
  processing_time_ms=0.0,
649
- confidence_score=0.0
653
+ confidence_score=0.0,
650
654
  )
651
655
  results.append(result)
652
-
653
- return results
656
+
657
+ return results