gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,403 @@
1
+ """OpenAI and OpenRouter API client for LLM classification.
2
+
3
+ This module handles all OpenAI-compatible API interactions, including
4
+ OpenRouter which provides access to multiple models through a unified API.
5
+
6
+ WHY: Separating API interaction logic from classification logic makes the
7
+ system more maintainable and allows easy addition of new providers.
8
+
9
+ DESIGN DECISIONS:
10
+ - Support both OpenAI direct and OpenRouter endpoints
11
+ - Implement exponential backoff for retries
12
+ - Handle rate limiting gracefully
13
+ - Track token usage and costs accurately
14
+ - Support different pricing models
15
+ """
16
+
17
+ import logging
18
+ import time
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Any, Optional
22
+
23
+ try:
24
+ import requests
25
+
26
+ REQUESTS_AVAILABLE = True
27
+ except ImportError:
28
+ REQUESTS_AVAILABLE = False
29
+ requests = None
30
+
31
+ from .base import BaseLLMClassifier, ClassificationResult, LLMProviderConfig
32
+ from .cost_tracker import CostTracker, ModelPricing
33
+ from .prompts import PromptGenerator, PromptVersion
34
+ from .response_parser import ResponseParser
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @dataclass
40
+ class OpenAIConfig(LLMProviderConfig):
41
+ """Configuration specific to OpenAI/OpenRouter providers.
42
+
43
+ WHY: OpenAI-compatible APIs have specific configuration needs
44
+ beyond the base configuration.
45
+ """
46
+
47
+ api_base_url: str = "https://openrouter.ai/api/v1" # Default to OpenRouter
48
+ organization: Optional[str] = None # OpenAI organization ID
49
+
50
+ # OpenRouter specific
51
+ site_url: str = "https://github.com/gitflow-analytics"
52
+ app_name: str = "GitFlow Analytics"
53
+
54
+ # Model selection
55
+ use_openrouter: bool = True # If False, use direct OpenAI API
56
+
57
+ def validate(self) -> None:
58
+ """Validate OpenAI-specific configuration."""
59
+ super().validate()
60
+
61
+ if not REQUESTS_AVAILABLE:
62
+ raise ImportError("requests library required for OpenAI/OpenRouter")
63
+
64
+ # API key is optional - classifier will gracefully degrade without it
65
+ # if not self.api_key:
66
+ # raise ValueError("API key is required for OpenAI/OpenRouter")
67
+
68
+ # Set appropriate base URL based on provider
69
+ if not self.use_openrouter and "openrouter" in self.api_base_url:
70
+ self.api_base_url = "https://api.openai.com/v1"
71
+
72
+
73
+ class OpenAIClassifier(BaseLLMClassifier):
74
+ """OpenAI/OpenRouter-based commit classifier.
75
+
76
+ WHY: OpenAI and OpenRouter provide high-quality language models
77
+ for classification. This implementation supports both providers
78
+ through their compatible APIs.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ config: OpenAIConfig,
84
+ cache_dir: Optional[Path] = None,
85
+ prompt_version: PromptVersion = PromptVersion.V3_CONTEXTUAL,
86
+ ):
87
+ """Initialize OpenAI classifier.
88
+
89
+ Args:
90
+ config: OpenAI-specific configuration
91
+ cache_dir: Directory for caching predictions
92
+ prompt_version: Version of prompts to use
93
+ """
94
+ super().__init__(config, cache_dir)
95
+ self.config: OpenAIConfig = config
96
+
97
+ # Initialize components
98
+ self.prompt_generator = PromptGenerator(prompt_version)
99
+ self.response_parser = ResponseParser()
100
+ self.cost_tracker = CostTracker()
101
+
102
+ # Set up model pricing
103
+ self._setup_pricing()
104
+
105
+ # Rate limiting state
106
+ self._last_request_time = 0
107
+ self._request_count = 0
108
+ self._minute_start = time.time()
109
+
110
+ logger.info(f"OpenAIClassifier initialized with model: {config.model}")
111
+
112
+ def _setup_pricing(self) -> None:
113
+ """Set up pricing information for the configured model.
114
+
115
+ WHY: Accurate cost tracking helps users monitor and control
116
+ their LLM usage expenses.
117
+ """
118
+ # Common model pricing (per 1M tokens)
119
+ pricing_map = {
120
+ "gpt-4": ModelPricing("gpt-4", 30.0, 60.0),
121
+ "gpt-4-turbo": ModelPricing("gpt-4-turbo", 10.0, 30.0),
122
+ "gpt-3.5-turbo": ModelPricing("gpt-3.5-turbo", 0.5, 1.5),
123
+ "mistralai/mistral-7b-instruct": ModelPricing("mistral-7b", 0.25, 0.25),
124
+ "meta-llama/llama-2-70b-chat": ModelPricing("llama-2-70b", 0.7, 0.9),
125
+ "anthropic/claude-2": ModelPricing("claude-2", 8.0, 24.0),
126
+ }
127
+
128
+ # Find matching pricing or use default
129
+ model_lower = self.config.model.lower()
130
+ for model_key, pricing in pricing_map.items():
131
+ if model_key in model_lower:
132
+ self.cost_tracker.set_model_pricing(pricing)
133
+ return
134
+
135
+ # Default pricing for unknown models
136
+ self.cost_tracker.set_model_pricing(ModelPricing(self.config.model, 1.0, 1.0))
137
+
138
+ def get_provider_name(self) -> str:
139
+ """Get the name of the LLM provider."""
140
+ if self.config.use_openrouter:
141
+ return "openrouter"
142
+ return "openai"
143
+
144
+ def classify_commit(
145
+ self, message: str, files_changed: Optional[list[str]] = None
146
+ ) -> ClassificationResult:
147
+ """Classify a single commit message.
148
+
149
+ Args:
150
+ message: Commit message to classify
151
+ files_changed: Optional list of changed files
152
+
153
+ Returns:
154
+ Classification result
155
+ """
156
+ start_time = time.time()
157
+
158
+ # Validate input
159
+ if not message or not message.strip():
160
+ return ClassificationResult(
161
+ category="maintenance",
162
+ confidence=0.3,
163
+ method="empty_message",
164
+ reasoning="Empty commit message",
165
+ model="none",
166
+ alternatives=[],
167
+ processing_time_ms=(time.time() - start_time) * 1000,
168
+ )
169
+
170
+ # Apply rate limiting
171
+ self._apply_rate_limiting()
172
+
173
+ # Generate prompt
174
+ system_prompt, user_prompt = self.prompt_generator.generate_prompt(message, files_changed)
175
+
176
+ # Make API request with retries
177
+ for attempt in range(self.config.max_retries):
178
+ try:
179
+ response_text, tokens_used = self._make_api_request(system_prompt, user_prompt)
180
+
181
+ # Parse response
182
+ category, confidence, reasoning = self.response_parser.parse_response(
183
+ response_text, self.prompt_generator.CATEGORIES
184
+ )
185
+
186
+ # Track costs
187
+ prompt_tokens = self._estimate_tokens(system_prompt + user_prompt)
188
+ completion_tokens = tokens_used - prompt_tokens if tokens_used else 50
189
+ cost = self.cost_tracker.track_usage(prompt_tokens, completion_tokens)
190
+
191
+ # Update statistics
192
+ self.total_tokens_used += (
193
+ tokens_used if tokens_used else prompt_tokens + completion_tokens
194
+ )
195
+ self.total_cost += cost
196
+ self.api_calls_made += 1
197
+
198
+ return ClassificationResult(
199
+ category=category,
200
+ confidence=confidence,
201
+ method="llm",
202
+ reasoning=reasoning,
203
+ model=self.config.model,
204
+ alternatives=[],
205
+ processing_time_ms=(time.time() - start_time) * 1000,
206
+ )
207
+
208
+ except Exception as e:
209
+ logger.warning(f"API request attempt {attempt + 1} failed: {e}")
210
+ if attempt < self.config.max_retries - 1:
211
+ time.sleep(self.config.retry_delay_seconds * (2**attempt))
212
+ else:
213
+ # Final attempt failed, return fallback
214
+ return ClassificationResult(
215
+ category="maintenance",
216
+ confidence=0.1,
217
+ method="llm_error",
218
+ reasoning=f"LLM classification failed: {str(e)}",
219
+ model="fallback",
220
+ alternatives=[],
221
+ processing_time_ms=(time.time() - start_time) * 1000,
222
+ )
223
+
224
+ # Should never reach here
225
+ return ClassificationResult(
226
+ category="maintenance",
227
+ confidence=0.1,
228
+ method="llm_error",
229
+ reasoning="Unexpected error in classification",
230
+ model="fallback",
231
+ alternatives=[],
232
+ processing_time_ms=(time.time() - start_time) * 1000,
233
+ )
234
+
235
+ def classify_commits_batch(
236
+ self, commits: list[dict[str, Any]], batch_id: Optional[str] = None
237
+ ) -> list[ClassificationResult]:
238
+ """Classify a batch of commits.
239
+
240
+ WHY: Batch processing can be more efficient for large numbers
241
+ of commits, though this implementation processes them serially
242
+ to respect rate limits.
243
+
244
+ Args:
245
+ commits: List of commit dictionaries
246
+ batch_id: Optional batch identifier
247
+
248
+ Returns:
249
+ List of classification results
250
+ """
251
+ results = []
252
+
253
+ for commit in commits:
254
+ message = commit.get("message", "")
255
+ files_changed = []
256
+
257
+ # Extract files from commit data
258
+ if "files_changed" in commit:
259
+ fc = commit["files_changed"]
260
+ if isinstance(fc, list):
261
+ files_changed = fc
262
+
263
+ # Classify individual commit
264
+ result = self.classify_commit(message, files_changed)
265
+
266
+ # Add batch ID if provided
267
+ if batch_id:
268
+ result.batch_id = batch_id
269
+
270
+ results.append(result)
271
+
272
+ return results
273
+
274
+ def _make_api_request(self, system_prompt: str, user_prompt: str) -> tuple[str, int]:
275
+ """Make API request to OpenAI/OpenRouter.
276
+
277
+ Args:
278
+ system_prompt: System prompt for the model
279
+ user_prompt: User prompt with the classification task
280
+
281
+ Returns:
282
+ Tuple of (response_text, tokens_used)
283
+
284
+ Raises:
285
+ Exception: If API request fails
286
+ """
287
+ if not self.config.api_key:
288
+ raise ValueError("API key not configured - cannot make LLM requests")
289
+
290
+ headers = {
291
+ "Authorization": f"Bearer {self.config.api_key}",
292
+ "Content-Type": "application/json",
293
+ }
294
+
295
+ # Add OpenRouter-specific headers
296
+ if self.config.use_openrouter:
297
+ headers["HTTP-Referer"] = self.config.site_url
298
+ headers["X-Title"] = self.config.app_name
299
+
300
+ # Add OpenAI organization if specified
301
+ if self.config.organization:
302
+ headers["OpenAI-Organization"] = self.config.organization
303
+
304
+ # Prepare request payload
305
+ payload = {
306
+ "model": self.config.model,
307
+ "messages": [
308
+ {"role": "system", "content": system_prompt},
309
+ {"role": "user", "content": user_prompt},
310
+ ],
311
+ "max_tokens": self.config.max_tokens,
312
+ "temperature": self.config.temperature,
313
+ }
314
+
315
+ # Make request
316
+ url = f"{self.config.api_base_url}/chat/completions"
317
+ response = requests.post(
318
+ url, headers=headers, json=payload, timeout=self.config.timeout_seconds
319
+ )
320
+
321
+ # Check response
322
+ if response.status_code != 200:
323
+ error_msg = f"API request failed with status {response.status_code}"
324
+ try:
325
+ error_data = response.json()
326
+ if "error" in error_data:
327
+ error_msg += f": {error_data['error'].get('message', 'Unknown error')}"
328
+ except Exception:
329
+ error_msg += f": {response.text}"
330
+ raise Exception(error_msg)
331
+
332
+ # Parse response
333
+ data = response.json()
334
+
335
+ if "choices" not in data or not data["choices"]:
336
+ raise Exception("No response choices in API response")
337
+
338
+ response_text = data["choices"][0]["message"]["content"].strip()
339
+
340
+ # Extract token usage if available
341
+ tokens_used = 0
342
+ if "usage" in data:
343
+ tokens_used = data["usage"].get("total_tokens", 0)
344
+
345
+ return response_text, tokens_used
346
+
347
+ def _apply_rate_limiting(self) -> None:
348
+ """Apply rate limiting to respect API limits.
349
+
350
+ WHY: Prevents hitting API rate limits which would cause
351
+ errors and potential account suspension.
352
+ """
353
+ current_time = time.time()
354
+
355
+ # Check if we're in a new minute
356
+ if current_time - self._minute_start >= 60:
357
+ self._request_count = 0
358
+ self._minute_start = current_time
359
+
360
+ # If we've hit the per-minute limit, wait
361
+ if self._request_count >= self.config.max_requests_per_minute:
362
+ sleep_time = 60 - (current_time - self._minute_start)
363
+ if sleep_time > 0:
364
+ logger.debug(f"Rate limiting: sleeping for {sleep_time:.1f} seconds")
365
+ time.sleep(sleep_time)
366
+ self._request_count = 0
367
+ self._minute_start = time.time()
368
+
369
+ # Increment request count
370
+ self._request_count += 1
371
+ self._last_request_time = time.time()
372
+
373
+ def _estimate_tokens(self, text: str) -> int:
374
+ """Estimate token count for text.
375
+
376
+ WHY: Token estimation helps track costs even when the API
377
+ doesn't return exact token counts.
378
+
379
+ Args:
380
+ text: Text to estimate tokens for
381
+
382
+ Returns:
383
+ Estimated token count
384
+ """
385
+ # Simple estimation: ~4 characters per token on average
386
+ # This is a rough approximation; actual tokenization varies
387
+ return len(text) // 4
388
+
389
+ def estimate_cost(self, text: str) -> float:
390
+ """Estimate the cost of classifying the given text.
391
+
392
+ Args:
393
+ text: Text to be classified
394
+
395
+ Returns:
396
+ Estimated cost in USD
397
+ """
398
+ # Estimate tokens for the full prompt
399
+ system_prompt = "You are a commit classification expert." # Simplified
400
+ prompt_tokens = self._estimate_tokens(system_prompt + text) + 100 # Add buffer
401
+ completion_tokens = self.config.max_tokens
402
+
403
+ return self.cost_tracker.calculate_cost(prompt_tokens, completion_tokens)