gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""Prompt templates and generation for LLM commit classification.
|
|
2
|
+
|
|
3
|
+
This module manages all prompt engineering for commit classification,
|
|
4
|
+
including templates, versioning, and context preparation.
|
|
5
|
+
|
|
6
|
+
WHY: Centralizing prompt management allows for easy experimentation,
|
|
7
|
+
A/B testing, and optimization without modifying classifier logic.
|
|
8
|
+
|
|
9
|
+
DESIGN DECISIONS:
|
|
10
|
+
- Version prompts for tracking and rollback capability
|
|
11
|
+
- Support template variables for dynamic content
|
|
12
|
+
- Separate system prompts from user prompts
|
|
13
|
+
- Include few-shot examples for better accuracy
|
|
14
|
+
- Make prompts provider-agnostic
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Optional
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PromptVersion(Enum):
|
|
24
|
+
"""Versions of prompt templates for A/B testing and evolution.
|
|
25
|
+
|
|
26
|
+
WHY: Track prompt versions to measure performance improvements
|
|
27
|
+
and enable rollback if newer versions perform worse.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
V1_SIMPLE = "v1_simple" # Original simple prompt
|
|
31
|
+
V2_STRUCTURED = "v2_structured" # More structured with examples
|
|
32
|
+
V3_CONTEXTUAL = "v3_contextual" # Enhanced with file context
|
|
33
|
+
V4_FEWSHOT = "v4_fewshot" # Few-shot learning with examples
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class PromptTemplate:
|
|
38
|
+
"""Template for generating classification prompts.
|
|
39
|
+
|
|
40
|
+
WHY: Structured templates ensure consistent prompt formatting
|
|
41
|
+
and make it easy to swap different prompt strategies.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
version: PromptVersion
|
|
45
|
+
system_prompt: str
|
|
46
|
+
user_prompt_template: str
|
|
47
|
+
few_shot_examples: Optional[list[dict[str, str]]] = None
|
|
48
|
+
|
|
49
|
+
def format(self, **kwargs) -> tuple[str, str]:
|
|
50
|
+
"""Format the prompt with provided variables.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
**kwargs: Variables to substitute in the template
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Tuple of (system_prompt, user_prompt)
|
|
57
|
+
"""
|
|
58
|
+
user_prompt = self.user_prompt_template.format(**kwargs)
|
|
59
|
+
return self.system_prompt, user_prompt
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class PromptGenerator:
|
|
63
|
+
"""Generates prompts for commit classification.
|
|
64
|
+
|
|
65
|
+
WHY: Encapsulates all prompt engineering logic, making it easy
|
|
66
|
+
to experiment with different prompt strategies and optimize
|
|
67
|
+
classification accuracy.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Streamlined categories optimized for enterprise workflows
|
|
71
|
+
CATEGORIES = {
|
|
72
|
+
"feature": "New functionality, capabilities, enhancements, additions",
|
|
73
|
+
"bugfix": "Fixes, errors, issues, crashes, bugs, corrections",
|
|
74
|
+
"maintenance": "Configuration, chores, dependencies, cleanup, refactoring, updates",
|
|
75
|
+
"integration": "Third-party services, APIs, webhooks, external systems",
|
|
76
|
+
"content": "Text, copy, documentation, README updates, comments",
|
|
77
|
+
"media": "Video, audio, streaming, players, visual assets, images",
|
|
78
|
+
"localization": "Translations, i18n, l10n, regional adaptations",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Prompt templates for different versions
|
|
82
|
+
TEMPLATES = {
|
|
83
|
+
PromptVersion.V1_SIMPLE: PromptTemplate(
|
|
84
|
+
version=PromptVersion.V1_SIMPLE,
|
|
85
|
+
system_prompt="You are a commit classification expert.",
|
|
86
|
+
user_prompt_template="""Classify this commit message into one of these 7 categories:
|
|
87
|
+
|
|
88
|
+
{categories_desc}
|
|
89
|
+
|
|
90
|
+
Commit message: "{message}"{context_info}
|
|
91
|
+
|
|
92
|
+
Respond with only: CATEGORY_NAME confidence_score reasoning
|
|
93
|
+
Example: feature 0.85 adds new user authentication system
|
|
94
|
+
|
|
95
|
+
Response:""",
|
|
96
|
+
),
|
|
97
|
+
PromptVersion.V2_STRUCTURED: PromptTemplate(
|
|
98
|
+
version=PromptVersion.V2_STRUCTURED,
|
|
99
|
+
system_prompt="""You are an expert at classifying git commit messages.
|
|
100
|
+
Your task is to categorize commits accurately based on their content and context.
|
|
101
|
+
Be precise and consistent in your classifications.""",
|
|
102
|
+
user_prompt_template="""Task: Classify the following git commit into exactly ONE category.
|
|
103
|
+
|
|
104
|
+
Available Categories:
|
|
105
|
+
{categories_desc}
|
|
106
|
+
|
|
107
|
+
Commit Information:
|
|
108
|
+
- Message: "{message}"
|
|
109
|
+
{context_info}
|
|
110
|
+
|
|
111
|
+
Output Format: CATEGORY confidence reasoning
|
|
112
|
+
- CATEGORY: One of the 7 categories above (lowercase)
|
|
113
|
+
- confidence: Float between 0.0 and 1.0
|
|
114
|
+
- reasoning: Brief explanation (max 10 words)
|
|
115
|
+
|
|
116
|
+
Response:""",
|
|
117
|
+
),
|
|
118
|
+
PromptVersion.V3_CONTEXTUAL: PromptTemplate(
|
|
119
|
+
version=PromptVersion.V3_CONTEXTUAL,
|
|
120
|
+
system_prompt="""You are a specialized git commit classifier with deep understanding
|
|
121
|
+
of software development patterns. Consider both the commit message and file context
|
|
122
|
+
to make accurate classifications.""",
|
|
123
|
+
user_prompt_template="""Analyze this commit and classify it into the most appropriate category.
|
|
124
|
+
|
|
125
|
+
Categories (choose ONE):
|
|
126
|
+
{categories_desc}
|
|
127
|
+
|
|
128
|
+
Commit Details:
|
|
129
|
+
Message: "{message}"
|
|
130
|
+
{context_info}
|
|
131
|
+
|
|
132
|
+
Classification Rules:
|
|
133
|
+
1. Focus on the PRIMARY purpose of the commit
|
|
134
|
+
2. Consider file types and patterns for additional context
|
|
135
|
+
3. If multiple categories apply, choose the most significant one
|
|
136
|
+
4. Be confident in clear cases, conservative when ambiguous
|
|
137
|
+
|
|
138
|
+
Format: CATEGORY confidence reasoning
|
|
139
|
+
Response:""",
|
|
140
|
+
),
|
|
141
|
+
PromptVersion.V4_FEWSHOT: PromptTemplate(
|
|
142
|
+
version=PromptVersion.V4_FEWSHOT,
|
|
143
|
+
system_prompt="""You are an expert commit classifier. Classify commits based on
|
|
144
|
+
the examples provided and return results in the exact format shown.""",
|
|
145
|
+
user_prompt_template="""Learn from these examples, then classify the new commit.
|
|
146
|
+
|
|
147
|
+
Examples:
|
|
148
|
+
{examples}
|
|
149
|
+
|
|
150
|
+
Categories:
|
|
151
|
+
{categories_desc}
|
|
152
|
+
|
|
153
|
+
Now classify this commit:
|
|
154
|
+
Message: "{message}"
|
|
155
|
+
{context_info}
|
|
156
|
+
|
|
157
|
+
Response (format: CATEGORY confidence reasoning):""",
|
|
158
|
+
few_shot_examples=[
|
|
159
|
+
{
|
|
160
|
+
"message": "feat: add user authentication",
|
|
161
|
+
"response": "feature 0.95 adds authentication functionality",
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
"message": "fix: resolve login crash",
|
|
165
|
+
"response": "bugfix 0.90 fixes crash issue",
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"message": "chore: update dependencies",
|
|
169
|
+
"response": "maintenance 0.85 dependency updates",
|
|
170
|
+
},
|
|
171
|
+
{"message": "docs: update README", "response": "content 0.95 documentation update"},
|
|
172
|
+
{
|
|
173
|
+
"message": "feat: add Spanish translations",
|
|
174
|
+
"response": "localization 0.90 adds language support",
|
|
175
|
+
},
|
|
176
|
+
],
|
|
177
|
+
),
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
def __init__(self, version: PromptVersion = PromptVersion.V3_CONTEXTUAL):
|
|
181
|
+
"""Initialize prompt generator with specified version.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
version: Prompt template version to use
|
|
185
|
+
"""
|
|
186
|
+
self.version = version
|
|
187
|
+
self.template = self.TEMPLATES[version]
|
|
188
|
+
self.domain_terms = self._get_default_domain_terms()
|
|
189
|
+
|
|
190
|
+
def _get_default_domain_terms(self) -> dict[str, list[str]]:
|
|
191
|
+
"""Get default domain-specific terms for context enhancement.
|
|
192
|
+
|
|
193
|
+
WHY: Domain-specific terms help the LLM understand the context
|
|
194
|
+
better and make more accurate classifications.
|
|
195
|
+
"""
|
|
196
|
+
return {
|
|
197
|
+
"media": [
|
|
198
|
+
"video",
|
|
199
|
+
"audio",
|
|
200
|
+
"streaming",
|
|
201
|
+
"player",
|
|
202
|
+
"media",
|
|
203
|
+
"content",
|
|
204
|
+
"broadcast",
|
|
205
|
+
"live",
|
|
206
|
+
"recording",
|
|
207
|
+
"episode",
|
|
208
|
+
"program",
|
|
209
|
+
],
|
|
210
|
+
"localization": [
|
|
211
|
+
"translation",
|
|
212
|
+
"i18n",
|
|
213
|
+
"l10n",
|
|
214
|
+
"locale",
|
|
215
|
+
"language",
|
|
216
|
+
"spanish",
|
|
217
|
+
"french",
|
|
218
|
+
"german",
|
|
219
|
+
"italian",
|
|
220
|
+
"portuguese",
|
|
221
|
+
"multilingual",
|
|
222
|
+
],
|
|
223
|
+
"integration": [
|
|
224
|
+
"api",
|
|
225
|
+
"webhook",
|
|
226
|
+
"third-party",
|
|
227
|
+
"external",
|
|
228
|
+
"service",
|
|
229
|
+
"integration",
|
|
230
|
+
"sync",
|
|
231
|
+
"import",
|
|
232
|
+
"export",
|
|
233
|
+
"connector",
|
|
234
|
+
],
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
def prepare_context(
|
|
238
|
+
self, message: str, files_changed: Optional[list[str]] = None
|
|
239
|
+
) -> dict[str, Any]:
|
|
240
|
+
"""Prepare context information from commit data.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
message: Commit message
|
|
244
|
+
files_changed: Optional list of changed files
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Context dictionary with relevant information
|
|
248
|
+
"""
|
|
249
|
+
context = {"file_extensions": [], "file_patterns": [], "domain_indicators": []}
|
|
250
|
+
|
|
251
|
+
if files_changed:
|
|
252
|
+
# Extract file extensions
|
|
253
|
+
extensions = set()
|
|
254
|
+
for file_path in files_changed:
|
|
255
|
+
ext = Path(file_path).suffix.lower()
|
|
256
|
+
if ext:
|
|
257
|
+
extensions.add(ext)
|
|
258
|
+
context["file_extensions"] = list(extensions)
|
|
259
|
+
|
|
260
|
+
# Look for specific file patterns
|
|
261
|
+
patterns = []
|
|
262
|
+
for file_path in files_changed:
|
|
263
|
+
file_lower = file_path.lower()
|
|
264
|
+
if any(
|
|
265
|
+
term in file_lower for term in ["config", "settings", ".env", ".yaml", ".json"]
|
|
266
|
+
):
|
|
267
|
+
patterns.append("configuration")
|
|
268
|
+
elif any(term in file_lower for term in ["test", "spec", "__test__"]):
|
|
269
|
+
patterns.append("test")
|
|
270
|
+
elif any(term in file_lower for term in ["doc", "readme", "changelog"]):
|
|
271
|
+
patterns.append("documentation")
|
|
272
|
+
elif any(
|
|
273
|
+
term in file_lower for term in ["video", "audio", "media", ".mp4", ".mp3"]
|
|
274
|
+
):
|
|
275
|
+
patterns.append("media")
|
|
276
|
+
context["file_patterns"] = list(set(patterns))
|
|
277
|
+
|
|
278
|
+
# Check for domain-specific terms in message
|
|
279
|
+
message_lower = message.lower()
|
|
280
|
+
for domain, terms in self.domain_terms.items():
|
|
281
|
+
if any(term in message_lower for term in terms):
|
|
282
|
+
context["domain_indicators"].append(domain)
|
|
283
|
+
|
|
284
|
+
return context
|
|
285
|
+
|
|
286
|
+
def generate_prompt(
|
|
287
|
+
self, message: str, files_changed: Optional[list[str]] = None, include_examples: bool = True
|
|
288
|
+
) -> tuple[str, str]:
|
|
289
|
+
"""Generate classification prompt for the given commit.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
message: Commit message to classify
|
|
293
|
+
files_changed: Optional list of changed files
|
|
294
|
+
include_examples: Whether to include few-shot examples
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Tuple of (system_prompt, user_prompt)
|
|
298
|
+
"""
|
|
299
|
+
# Prepare context
|
|
300
|
+
context = self.prepare_context(message, files_changed)
|
|
301
|
+
|
|
302
|
+
# Format context information
|
|
303
|
+
context_info = self._format_context(context)
|
|
304
|
+
|
|
305
|
+
# Format categories description
|
|
306
|
+
categories_desc = "\n".join([f"- {cat}: {desc}" for cat, desc in self.CATEGORIES.items()])
|
|
307
|
+
|
|
308
|
+
# Prepare examples if needed
|
|
309
|
+
examples = ""
|
|
310
|
+
if include_examples and self.template.few_shot_examples:
|
|
311
|
+
examples = self._format_examples(self.template.few_shot_examples)
|
|
312
|
+
|
|
313
|
+
# Format the prompt
|
|
314
|
+
return self.template.format(
|
|
315
|
+
message=message,
|
|
316
|
+
context_info=context_info,
|
|
317
|
+
categories_desc=categories_desc,
|
|
318
|
+
examples=examples,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def _format_context(self, context: dict[str, Any]) -> str:
|
|
322
|
+
"""Format context information for inclusion in prompt.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
context: Context dictionary
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Formatted context string
|
|
329
|
+
"""
|
|
330
|
+
parts = []
|
|
331
|
+
|
|
332
|
+
if context.get("file_extensions"):
|
|
333
|
+
parts.append(f"File types: {', '.join(context['file_extensions'])}")
|
|
334
|
+
|
|
335
|
+
if context.get("file_patterns"):
|
|
336
|
+
parts.append(f"File patterns: {', '.join(context['file_patterns'])}")
|
|
337
|
+
|
|
338
|
+
if context.get("domain_indicators"):
|
|
339
|
+
parts.append(f"Domain indicators: {', '.join(context['domain_indicators'])}")
|
|
340
|
+
|
|
341
|
+
if parts:
|
|
342
|
+
return "\n" + "\n".join(parts)
|
|
343
|
+
return ""
|
|
344
|
+
|
|
345
|
+
def _format_examples(self, examples: list[dict[str, str]]) -> str:
|
|
346
|
+
"""Format few-shot examples for inclusion in prompt.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
examples: List of example classifications
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Formatted examples string
|
|
353
|
+
"""
|
|
354
|
+
formatted = []
|
|
355
|
+
for i, example in enumerate(examples, 1):
|
|
356
|
+
formatted.append(f"{i}. Message: \"{example['message']}\"")
|
|
357
|
+
formatted.append(f" Response: {example['response']}")
|
|
358
|
+
return "\n".join(formatted)
|
|
359
|
+
|
|
360
|
+
def get_version_info(self) -> dict[str, Any]:
|
|
361
|
+
"""Get information about the current prompt version.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Dictionary with version information
|
|
365
|
+
"""
|
|
366
|
+
return {
|
|
367
|
+
"version": self.version.value,
|
|
368
|
+
"has_few_shot": bool(self.template.few_shot_examples),
|
|
369
|
+
"num_examples": (
|
|
370
|
+
len(self.template.few_shot_examples) if self.template.few_shot_examples else 0
|
|
371
|
+
),
|
|
372
|
+
"categories": list(self.CATEGORIES.keys()),
|
|
373
|
+
}
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""Response parsing and validation for LLM outputs.
|
|
2
|
+
|
|
3
|
+
This module handles parsing of LLM responses into structured classification
|
|
4
|
+
results, including validation and error handling.
|
|
5
|
+
|
|
6
|
+
WHY: LLM responses can be unpredictable. Robust parsing with fallbacks
|
|
7
|
+
ensures the system remains stable even with unexpected outputs.
|
|
8
|
+
|
|
9
|
+
DESIGN DECISIONS:
|
|
10
|
+
- Support multiple response formats for flexibility
|
|
11
|
+
- Validate categories against known categories
|
|
12
|
+
- Extract confidence scores with bounds checking
|
|
13
|
+
- Parse reasoning text safely
|
|
14
|
+
- Provide detailed error messages for debugging
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ResponseParser:
|
|
25
|
+
"""Parses and validates LLM classification responses.
|
|
26
|
+
|
|
27
|
+
WHY: Centralizing response parsing logic makes it easier to handle
|
|
28
|
+
different response formats and add new parsing strategies.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
"""Initialize response parser."""
|
|
33
|
+
# Regex patterns for different response formats
|
|
34
|
+
self.patterns = {
|
|
35
|
+
"standard": re.compile(r"^(\w+)\s+([\d.]+)\s+(.*)$", re.IGNORECASE),
|
|
36
|
+
"colon_separated": re.compile(r"^(\w+):\s*([\d.]+)[,\s]+(.*)$", re.IGNORECASE),
|
|
37
|
+
"json_like": re.compile(
|
|
38
|
+
r'["\']?category["\']?\s*:\s*["\']?(\w+)["\']?.*?["\']?confidence["\']?\s*:\s*([\d.]+)',
|
|
39
|
+
re.IGNORECASE | re.DOTALL,
|
|
40
|
+
),
|
|
41
|
+
"simple": re.compile(r"^(\w+)\s+([\d.]+)$", re.IGNORECASE),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def parse_response(
|
|
45
|
+
self, response: str, valid_categories: dict[str, str]
|
|
46
|
+
) -> tuple[str, float, str]:
|
|
47
|
+
"""Parse LLM response to extract classification components.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
response: Raw LLM response text
|
|
51
|
+
valid_categories: Dictionary of valid category names
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Tuple of (category, confidence, reasoning)
|
|
55
|
+
"""
|
|
56
|
+
if not response:
|
|
57
|
+
logger.warning("Empty response from LLM")
|
|
58
|
+
return self._fallback_result("Empty response")
|
|
59
|
+
|
|
60
|
+
# Clean the response
|
|
61
|
+
response = response.strip()
|
|
62
|
+
|
|
63
|
+
# Try each parsing pattern
|
|
64
|
+
for pattern_name, pattern in self.patterns.items():
|
|
65
|
+
match = pattern.match(response)
|
|
66
|
+
if match:
|
|
67
|
+
return self._process_match(match, pattern_name, valid_categories)
|
|
68
|
+
|
|
69
|
+
# Try to extract just the category if nothing else works
|
|
70
|
+
category = self._extract_category_fuzzy(response, valid_categories)
|
|
71
|
+
if category:
|
|
72
|
+
logger.debug(f"Fuzzy matched category: {category} from response: {response}")
|
|
73
|
+
return category, 0.5, "Fuzzy match from response"
|
|
74
|
+
|
|
75
|
+
# Complete fallback
|
|
76
|
+
logger.warning(f"Could not parse response: {response}")
|
|
77
|
+
return self._fallback_result(f"Parse failed: {response[:50]}")
|
|
78
|
+
|
|
79
|
+
def _process_match(
|
|
80
|
+
self, match: re.Match, pattern_name: str, valid_categories: dict[str, str]
|
|
81
|
+
) -> tuple[str, float, str]:
|
|
82
|
+
"""Process a regex match to extract classification components.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
match: Regex match object
|
|
86
|
+
pattern_name: Name of the pattern that matched
|
|
87
|
+
valid_categories: Dictionary of valid categories
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (category, confidence, reasoning)
|
|
91
|
+
"""
|
|
92
|
+
groups = match.groups()
|
|
93
|
+
|
|
94
|
+
# Extract category
|
|
95
|
+
category = groups[0].lower().strip()
|
|
96
|
+
|
|
97
|
+
# Validate category
|
|
98
|
+
if category not in valid_categories:
|
|
99
|
+
# Try to find closest match
|
|
100
|
+
category = self._find_closest_category(category, valid_categories)
|
|
101
|
+
if not category:
|
|
102
|
+
return self._fallback_result(f"Invalid category: {groups[0]}")
|
|
103
|
+
|
|
104
|
+
# Extract confidence
|
|
105
|
+
confidence = 0.5 # Default
|
|
106
|
+
if len(groups) > 1:
|
|
107
|
+
try:
|
|
108
|
+
confidence = float(groups[1])
|
|
109
|
+
# Clamp to valid range
|
|
110
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
111
|
+
except (ValueError, TypeError):
|
|
112
|
+
logger.debug(f"Could not parse confidence: {groups[1]}")
|
|
113
|
+
|
|
114
|
+
# Extract reasoning
|
|
115
|
+
reasoning = "No reasoning provided"
|
|
116
|
+
if len(groups) > 2 and groups[2]:
|
|
117
|
+
reasoning = groups[2].strip()
|
|
118
|
+
# Clean up reasoning
|
|
119
|
+
reasoning = self._clean_reasoning(reasoning)
|
|
120
|
+
elif pattern_name == "simple":
|
|
121
|
+
reasoning = f"Classified as {category}"
|
|
122
|
+
|
|
123
|
+
return category, confidence, reasoning
|
|
124
|
+
|
|
125
|
+
def _extract_category_fuzzy(
|
|
126
|
+
self, response: str, valid_categories: dict[str, str]
|
|
127
|
+
) -> Optional[str]:
|
|
128
|
+
"""Try to extract a category using fuzzy matching.
|
|
129
|
+
|
|
130
|
+
WHY: Sometimes LLMs include extra text or formatting that
|
|
131
|
+
breaks strict parsing but the category is still identifiable.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
response: Response text to search
|
|
135
|
+
valid_categories: Dictionary of valid categories
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Matched category or None
|
|
139
|
+
"""
|
|
140
|
+
response_lower = response.lower()
|
|
141
|
+
|
|
142
|
+
# Look for exact category names in the response
|
|
143
|
+
for category in valid_categories:
|
|
144
|
+
if category in response_lower:
|
|
145
|
+
# Check it's not part of another word
|
|
146
|
+
pattern = r"\b" + re.escape(category) + r"\b"
|
|
147
|
+
if re.search(pattern, response_lower):
|
|
148
|
+
return category
|
|
149
|
+
|
|
150
|
+
# Look for category descriptions
|
|
151
|
+
for category, description in valid_categories.items():
|
|
152
|
+
# Check if key terms from description appear
|
|
153
|
+
key_terms = description.lower().split(",")[0].split()
|
|
154
|
+
if len(key_terms) > 0 and key_terms[0] in response_lower:
|
|
155
|
+
return category
|
|
156
|
+
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
def _find_closest_category(
|
|
160
|
+
self, candidate: str, valid_categories: dict[str, str]
|
|
161
|
+
) -> Optional[str]:
|
|
162
|
+
"""Find the closest matching category for a candidate.
|
|
163
|
+
|
|
164
|
+
WHY: Handle minor typos or variations in category names
|
|
165
|
+
to improve robustness.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
candidate: Candidate category name
|
|
169
|
+
valid_categories: Dictionary of valid categories
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Closest matching category or None
|
|
173
|
+
"""
|
|
174
|
+
candidate_lower = candidate.lower()
|
|
175
|
+
|
|
176
|
+
# Check for common variations
|
|
177
|
+
variations = {
|
|
178
|
+
"bug": "bugfix",
|
|
179
|
+
"fix": "bugfix",
|
|
180
|
+
"bugs": "bugfix",
|
|
181
|
+
"feat": "feature",
|
|
182
|
+
"features": "feature",
|
|
183
|
+
"maint": "maintenance",
|
|
184
|
+
"maintain": "maintenance",
|
|
185
|
+
"chore": "maintenance",
|
|
186
|
+
"docs": "content",
|
|
187
|
+
"documentation": "content",
|
|
188
|
+
"doc": "content",
|
|
189
|
+
"i18n": "localization",
|
|
190
|
+
"l10n": "localization",
|
|
191
|
+
"translation": "localization",
|
|
192
|
+
"integrate": "integration",
|
|
193
|
+
"api": "integration",
|
|
194
|
+
"video": "media",
|
|
195
|
+
"audio": "media",
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if candidate_lower in variations:
|
|
199
|
+
matched = variations[candidate_lower]
|
|
200
|
+
if matched in valid_categories:
|
|
201
|
+
return matched
|
|
202
|
+
|
|
203
|
+
# Check for partial matches
|
|
204
|
+
for category in valid_categories:
|
|
205
|
+
if candidate_lower.startswith(category[:3]):
|
|
206
|
+
return category
|
|
207
|
+
if category.startswith(candidate_lower[:3]):
|
|
208
|
+
return category
|
|
209
|
+
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
def _clean_reasoning(self, reasoning: str) -> str:
|
|
213
|
+
"""Clean up reasoning text.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
reasoning: Raw reasoning text
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Cleaned reasoning text
|
|
220
|
+
"""
|
|
221
|
+
# Remove extra whitespace
|
|
222
|
+
reasoning = " ".join(reasoning.split())
|
|
223
|
+
|
|
224
|
+
# Remove quotes if present
|
|
225
|
+
if reasoning.startswith('"') and reasoning.endswith('"'):
|
|
226
|
+
reasoning = reasoning[1:-1]
|
|
227
|
+
if reasoning.startswith("'") and reasoning.endswith("'"):
|
|
228
|
+
reasoning = reasoning[1:-1]
|
|
229
|
+
|
|
230
|
+
# Truncate if too long
|
|
231
|
+
max_length = 200
|
|
232
|
+
if len(reasoning) > max_length:
|
|
233
|
+
reasoning = reasoning[:max_length] + "..."
|
|
234
|
+
|
|
235
|
+
# Ensure it's not empty
|
|
236
|
+
if not reasoning:
|
|
237
|
+
reasoning = "No reasoning provided"
|
|
238
|
+
|
|
239
|
+
return reasoning
|
|
240
|
+
|
|
241
|
+
def _fallback_result(self, error_context: str) -> tuple[str, float, str]:
|
|
242
|
+
"""Generate a fallback result when parsing fails.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
error_context: Context about the parsing failure
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Tuple of (category, confidence, reasoning)
|
|
249
|
+
"""
|
|
250
|
+
return "maintenance", 0.1, f"Parse error: {error_context}"
|
|
251
|
+
|
|
252
|
+
def validate_classification(
|
|
253
|
+
self, category: str, confidence: float, valid_categories: dict[str, str]
|
|
254
|
+
) -> tuple[str, float, bool]:
|
|
255
|
+
"""Validate and potentially correct a classification.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
category: Classified category
|
|
259
|
+
confidence: Confidence score
|
|
260
|
+
valid_categories: Dictionary of valid categories
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Tuple of (category, confidence, is_valid)
|
|
264
|
+
"""
|
|
265
|
+
is_valid = True
|
|
266
|
+
|
|
267
|
+
# Check category validity
|
|
268
|
+
if category not in valid_categories:
|
|
269
|
+
# Try to correct
|
|
270
|
+
corrected = self._find_closest_category(category, valid_categories)
|
|
271
|
+
if corrected:
|
|
272
|
+
logger.debug(f"Corrected category {category} to {corrected}")
|
|
273
|
+
category = corrected
|
|
274
|
+
confidence *= 0.8 # Reduce confidence for correction
|
|
275
|
+
else:
|
|
276
|
+
logger.warning(f"Invalid category {category}, defaulting to maintenance")
|
|
277
|
+
category = "maintenance"
|
|
278
|
+
confidence = 0.1
|
|
279
|
+
is_valid = False
|
|
280
|
+
|
|
281
|
+
# Validate confidence bounds
|
|
282
|
+
if confidence < 0 or confidence > 1:
|
|
283
|
+
logger.warning(f"Invalid confidence {confidence}, clamping to [0, 1]")
|
|
284
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
285
|
+
is_valid = False
|
|
286
|
+
|
|
287
|
+
return category, confidence, is_valid
|