gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,167 +1,199 @@
|
|
|
1
1
|
"""Data models and configuration schemas for qualitative analysis."""
|
|
2
2
|
|
|
3
|
-
import time
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from datetime import datetime
|
|
6
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
@dataclass
|
|
10
9
|
class QualitativeCommitData:
|
|
11
10
|
"""Enhanced commit data with qualitative analysis results.
|
|
12
|
-
|
|
11
|
+
|
|
13
12
|
This class extends basic commit information with semantic analysis results
|
|
14
13
|
including change type, business domain, risk assessment, and processing metadata.
|
|
15
14
|
"""
|
|
16
|
-
|
|
15
|
+
|
|
17
16
|
# Existing commit data from GitFlow Analytics
|
|
18
17
|
hash: str
|
|
19
18
|
message: str
|
|
20
19
|
author_name: str
|
|
21
20
|
author_email: str
|
|
22
21
|
timestamp: datetime
|
|
23
|
-
files_changed:
|
|
22
|
+
files_changed: list[str]
|
|
24
23
|
insertions: int
|
|
25
24
|
deletions: int
|
|
26
|
-
|
|
25
|
+
|
|
27
26
|
# New qualitative analysis fields
|
|
28
27
|
change_type: str # feature|bugfix|refactor|docs|test|chore|security|hotfix|config
|
|
29
28
|
change_type_confidence: float # 0.0-1.0
|
|
30
29
|
business_domain: str # frontend|backend|database|infrastructure|mobile|devops|unknown
|
|
31
30
|
domain_confidence: float # 0.0-1.0
|
|
32
31
|
risk_level: str # low|medium|high|critical
|
|
33
|
-
risk_factors:
|
|
34
|
-
intent_signals:
|
|
35
|
-
collaboration_patterns:
|
|
36
|
-
technical_context:
|
|
37
|
-
|
|
32
|
+
risk_factors: list[str] # List of identified risk factors
|
|
33
|
+
intent_signals: dict[str, Any] # Intent analysis results
|
|
34
|
+
collaboration_patterns: dict[str, Any] # Team interaction patterns
|
|
35
|
+
technical_context: dict[str, Any] # Technical context information
|
|
36
|
+
|
|
38
37
|
# Processing metadata
|
|
39
38
|
processing_method: str # 'nlp' or 'llm'
|
|
40
39
|
processing_time_ms: float
|
|
41
40
|
confidence_score: float # Overall confidence in analysis
|
|
42
|
-
|
|
43
|
-
def to_dict(self) ->
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> dict[str, Any]:
|
|
44
43
|
"""Convert to dictionary for JSON serialization."""
|
|
45
44
|
return {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
45
|
+
"hash": self.hash,
|
|
46
|
+
"message": self.message,
|
|
47
|
+
"author_name": self.author_name,
|
|
48
|
+
"author_email": self.author_email,
|
|
49
|
+
"timestamp": self.timestamp.isoformat(),
|
|
50
|
+
"files_changed": self.files_changed,
|
|
51
|
+
"insertions": self.insertions,
|
|
52
|
+
"deletions": self.deletions,
|
|
53
|
+
"change_type": self.change_type,
|
|
54
|
+
"change_type_confidence": self.change_type_confidence,
|
|
55
|
+
"business_domain": self.business_domain,
|
|
56
|
+
"domain_confidence": self.domain_confidence,
|
|
57
|
+
"risk_level": self.risk_level,
|
|
58
|
+
"risk_factors": self.risk_factors,
|
|
59
|
+
"intent_signals": self.intent_signals,
|
|
60
|
+
"collaboration_patterns": self.collaboration_patterns,
|
|
61
|
+
"technical_context": self.technical_context,
|
|
62
|
+
"processing_method": self.processing_method,
|
|
63
|
+
"processing_time_ms": self.processing_time_ms,
|
|
64
|
+
"confidence_score": self.confidence_score,
|
|
66
65
|
}
|
|
67
66
|
|
|
68
67
|
|
|
69
68
|
@dataclass
|
|
70
69
|
class ChangeTypeConfig:
|
|
71
70
|
"""Configuration for change type classification."""
|
|
72
|
-
|
|
71
|
+
|
|
73
72
|
min_confidence: float = 0.7
|
|
74
73
|
semantic_weight: float = 0.6 # Weight for semantic features
|
|
75
74
|
file_pattern_weight: float = 0.4 # Weight for file pattern signals
|
|
76
75
|
enable_custom_patterns: bool = True
|
|
77
|
-
custom_patterns:
|
|
76
|
+
custom_patterns: dict[str, dict[str, list[str]]] = field(default_factory=dict)
|
|
78
77
|
|
|
79
78
|
|
|
80
79
|
@dataclass
|
|
81
80
|
class IntentConfig:
|
|
82
81
|
"""Configuration for intent analysis."""
|
|
83
|
-
|
|
84
|
-
urgency_keywords:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
82
|
+
|
|
83
|
+
urgency_keywords: dict[str, list[str]] = field(
|
|
84
|
+
default_factory=lambda: {
|
|
85
|
+
"critical": ["critical", "urgent", "hotfix", "emergency", "immediate"],
|
|
86
|
+
"important": ["important", "priority", "asap", "needed"],
|
|
87
|
+
"routine": ["routine", "regular", "normal", "standard"],
|
|
88
|
+
}
|
|
89
|
+
)
|
|
89
90
|
confidence_threshold: float = 0.6
|
|
90
91
|
sentiment_analysis: bool = True
|
|
91
92
|
|
|
92
93
|
|
|
93
|
-
@dataclass
|
|
94
|
+
@dataclass
|
|
94
95
|
class DomainConfig:
|
|
95
96
|
"""Configuration for domain classification."""
|
|
96
|
-
|
|
97
|
-
file_patterns:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
97
|
+
|
|
98
|
+
file_patterns: dict[str, list[str]] = field(
|
|
99
|
+
default_factory=lambda: {
|
|
100
|
+
"frontend": ["*.js", "*.jsx", "*.ts", "*.tsx", "*.vue", "*.html", "*.css", "*.scss"],
|
|
101
|
+
"backend": ["*.py", "*.java", "*.go", "*.rb", "*.php", "*.cs", "*.cpp"],
|
|
102
|
+
"database": ["*.sql", "migrations/*", "schema/*", "**/models/**"],
|
|
103
|
+
"infrastructure": ["Dockerfile", "*.yaml", "*.yml", "terraform/*", "*.tf"],
|
|
104
|
+
"mobile": ["*.swift", "*.kt", "*.java", "android/*", "ios/*"],
|
|
105
|
+
"devops": ["*.yml", "*.yaml", "ci/*", ".github/*", "docker/*"],
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
keyword_patterns: dict[str, list[str]] = field(
|
|
109
|
+
default_factory=lambda: {
|
|
110
|
+
"frontend": ["ui", "component", "styling", "interface", "layout"],
|
|
111
|
+
"backend": ["api", "endpoint", "service", "server", "logic"],
|
|
112
|
+
"database": ["query", "schema", "migration", "data", "model"],
|
|
113
|
+
"infrastructure": ["deploy", "config", "environment", "setup"],
|
|
114
|
+
"mobile": ["android", "ios", "mobile", "app"],
|
|
115
|
+
"devops": ["build", "pipeline", "deploy", "ci", "docker"],
|
|
116
|
+
}
|
|
117
|
+
)
|
|
113
118
|
min_confidence: float = 0.6
|
|
114
119
|
|
|
115
120
|
|
|
116
121
|
@dataclass
|
|
117
122
|
class RiskConfig:
|
|
118
123
|
"""Configuration for risk analysis."""
|
|
119
|
-
|
|
120
|
-
high_risk_patterns:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
124
|
+
|
|
125
|
+
high_risk_patterns: list[str] = field(
|
|
126
|
+
default_factory=lambda: [
|
|
127
|
+
# Security-related patterns
|
|
128
|
+
"password",
|
|
129
|
+
"secret",
|
|
130
|
+
"key",
|
|
131
|
+
"token",
|
|
132
|
+
"auth",
|
|
133
|
+
"security",
|
|
134
|
+
# Critical system patterns
|
|
135
|
+
"production",
|
|
136
|
+
"prod",
|
|
137
|
+
"critical",
|
|
138
|
+
"emergency",
|
|
139
|
+
# Infrastructure patterns
|
|
140
|
+
"database",
|
|
141
|
+
"migration",
|
|
142
|
+
"schema",
|
|
143
|
+
"deploy",
|
|
144
|
+
# Large change patterns
|
|
145
|
+
"refactor",
|
|
146
|
+
"rewrite",
|
|
147
|
+
"restructure",
|
|
148
|
+
]
|
|
149
|
+
)
|
|
150
|
+
medium_risk_patterns: list[str] = field(
|
|
151
|
+
default_factory=lambda: [
|
|
152
|
+
"config",
|
|
153
|
+
"configuration",
|
|
154
|
+
"settings",
|
|
155
|
+
"environment",
|
|
156
|
+
"api",
|
|
157
|
+
"endpoint",
|
|
158
|
+
"service",
|
|
159
|
+
"integration",
|
|
160
|
+
]
|
|
161
|
+
)
|
|
162
|
+
file_risk_patterns: dict[str, str] = field(
|
|
163
|
+
default_factory=lambda: {
|
|
164
|
+
# High risk file patterns
|
|
165
|
+
"**/*prod*": "high",
|
|
166
|
+
"**/migrations/**": "high",
|
|
167
|
+
"**/schema/**": "high",
|
|
168
|
+
"Dockerfile": "medium",
|
|
169
|
+
"*.yml": "medium",
|
|
170
|
+
"*.yaml": "medium",
|
|
171
|
+
"**/*config*": "medium",
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
size_thresholds: dict[str, int] = field(
|
|
175
|
+
default_factory=lambda: {
|
|
176
|
+
"large_commit_files": 20, # Files changed
|
|
177
|
+
"large_commit_lines": 500, # Lines changed
|
|
178
|
+
"massive_commit_lines": 2000, # Very large changes
|
|
179
|
+
}
|
|
180
|
+
)
|
|
149
181
|
|
|
150
182
|
|
|
151
183
|
@dataclass
|
|
152
184
|
class NLPConfig:
|
|
153
185
|
"""Configuration for NLP processing engine."""
|
|
154
|
-
|
|
186
|
+
|
|
155
187
|
spacy_model: str = "en_core_web_sm"
|
|
156
188
|
spacy_batch_size: int = 1000
|
|
157
189
|
fast_mode: bool = True # Disable parser/NER for speed
|
|
158
|
-
|
|
190
|
+
|
|
159
191
|
# Component configurations
|
|
160
192
|
change_type_config: ChangeTypeConfig = field(default_factory=ChangeTypeConfig)
|
|
161
193
|
intent_config: IntentConfig = field(default_factory=IntentConfig)
|
|
162
194
|
domain_config: DomainConfig = field(default_factory=DomainConfig)
|
|
163
195
|
risk_config: RiskConfig = field(default_factory=RiskConfig)
|
|
164
|
-
|
|
196
|
+
|
|
165
197
|
# Performance settings
|
|
166
198
|
enable_parallel_processing: bool = True
|
|
167
199
|
max_workers: int = 4
|
|
@@ -170,32 +202,32 @@ class NLPConfig:
|
|
|
170
202
|
@dataclass
|
|
171
203
|
class LLMConfig:
|
|
172
204
|
"""Configuration for LLM fallback processing via OpenRouter."""
|
|
173
|
-
|
|
205
|
+
|
|
174
206
|
# OpenRouter API settings
|
|
175
207
|
openrouter_api_key: str = "${OPENROUTER_API_KEY}"
|
|
176
208
|
base_url: str = "https://openrouter.ai/api/v1"
|
|
177
|
-
|
|
209
|
+
|
|
178
210
|
# Model selection strategy
|
|
179
211
|
primary_model: str = "anthropic/claude-3-haiku" # Fast, cheap classification
|
|
180
212
|
fallback_model: str = "meta-llama/llama-3.1-8b-instruct:free" # Free fallback
|
|
181
213
|
complex_model: str = "anthropic/claude-3-sonnet" # For complex cases
|
|
182
|
-
|
|
214
|
+
|
|
183
215
|
# Model routing thresholds
|
|
184
216
|
complexity_threshold: float = 0.5 # Route complex cases to better model
|
|
185
217
|
cost_threshold_per_1k: float = 0.01 # Max cost per 1k commits
|
|
186
|
-
|
|
218
|
+
|
|
187
219
|
# Processing settings
|
|
188
220
|
max_tokens: int = 1000
|
|
189
221
|
temperature: float = 0.1
|
|
190
|
-
|
|
191
|
-
# Batching settings
|
|
222
|
+
|
|
223
|
+
# Batching settings
|
|
192
224
|
max_group_size: int = 10 # Process up to 10 commits per batch
|
|
193
225
|
similarity_threshold: float = 0.8 # Group similar commits together
|
|
194
|
-
|
|
226
|
+
|
|
195
227
|
# Rate limiting
|
|
196
228
|
requests_per_minute: int = 200 # Higher limit with OpenRouter
|
|
197
229
|
max_retries: int = 3
|
|
198
|
-
|
|
230
|
+
|
|
199
231
|
# Cost control
|
|
200
232
|
max_daily_cost: float = 5.0 # Max daily spend in USD
|
|
201
233
|
enable_cost_tracking: bool = True
|
|
@@ -204,16 +236,16 @@ class LLMConfig:
|
|
|
204
236
|
@dataclass
|
|
205
237
|
class CacheConfig:
|
|
206
238
|
"""Configuration for qualitative analysis caching."""
|
|
207
|
-
|
|
239
|
+
|
|
208
240
|
cache_dir: str = ".qualitative_cache"
|
|
209
241
|
semantic_cache_size: int = 10000 # Max cached patterns
|
|
210
242
|
pattern_cache_ttl_hours: int = 168 # 1 week
|
|
211
|
-
|
|
243
|
+
|
|
212
244
|
# Learning settings
|
|
213
245
|
enable_pattern_learning: bool = True
|
|
214
246
|
learning_threshold: int = 10 # Min examples to learn pattern
|
|
215
247
|
confidence_boost_factor: float = 0.1 # Boost for learned patterns
|
|
216
|
-
|
|
248
|
+
|
|
217
249
|
# Cache optimization
|
|
218
250
|
enable_compression: bool = True
|
|
219
251
|
max_cache_size_mb: int = 100
|
|
@@ -222,51 +254,53 @@ class CacheConfig:
|
|
|
222
254
|
@dataclass
|
|
223
255
|
class QualitativeConfig:
|
|
224
256
|
"""Main configuration for qualitative analysis system.
|
|
225
|
-
|
|
257
|
+
|
|
226
258
|
This configuration orchestrates the entire qualitative analysis pipeline,
|
|
227
|
-
balancing performance, accuracy, and cost through intelligent NLP and
|
|
259
|
+
balancing performance, accuracy, and cost through intelligent NLP and
|
|
228
260
|
strategic LLM usage.
|
|
229
261
|
"""
|
|
230
|
-
|
|
262
|
+
|
|
231
263
|
# Processing settings
|
|
232
264
|
enabled: bool = True
|
|
233
265
|
batch_size: int = 1000 # Commits processed per batch
|
|
234
266
|
max_llm_fallback_pct: float = 0.15 # Max 15% of commits use LLM
|
|
235
267
|
confidence_threshold: float = 0.7 # Min confidence for NLP results
|
|
236
|
-
|
|
268
|
+
|
|
237
269
|
# Component configurations
|
|
238
270
|
nlp_config: NLPConfig = field(default_factory=NLPConfig)
|
|
239
271
|
llm_config: LLMConfig = field(default_factory=LLMConfig)
|
|
240
272
|
cache_config: CacheConfig = field(default_factory=CacheConfig)
|
|
241
|
-
|
|
273
|
+
|
|
242
274
|
# Performance monitoring
|
|
243
275
|
enable_performance_tracking: bool = True
|
|
244
276
|
target_processing_time_ms: float = 2.0 # Target per-commit processing time
|
|
245
|
-
|
|
277
|
+
|
|
246
278
|
# Quality settings
|
|
247
279
|
min_overall_confidence: float = 0.6 # Min confidence for any result
|
|
248
280
|
enable_quality_feedback: bool = True # Learn from corrections
|
|
249
|
-
|
|
250
|
-
def validate(self) ->
|
|
281
|
+
|
|
282
|
+
def validate(self) -> list[str]:
|
|
251
283
|
"""Validate configuration and return any warnings.
|
|
252
|
-
|
|
284
|
+
|
|
253
285
|
Returns:
|
|
254
286
|
List of validation warning messages.
|
|
255
287
|
"""
|
|
256
288
|
warnings = []
|
|
257
|
-
|
|
289
|
+
|
|
258
290
|
if self.max_llm_fallback_pct > 0.3:
|
|
259
291
|
warnings.append("LLM fallback percentage > 30% may result in high costs")
|
|
260
|
-
|
|
292
|
+
|
|
261
293
|
if self.confidence_threshold > 0.9:
|
|
262
294
|
warnings.append("Very high confidence threshold may route too many commits to LLM")
|
|
263
|
-
|
|
295
|
+
|
|
264
296
|
if self.batch_size > 5000:
|
|
265
297
|
warnings.append("Large batch size may cause memory issues")
|
|
266
|
-
|
|
298
|
+
|
|
267
299
|
# Validate LLM config if API key is set
|
|
268
|
-
if
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
300
|
+
if (
|
|
301
|
+
self.llm_config.openrouter_api_key
|
|
302
|
+
and self.llm_config.openrouter_api_key != "${OPENROUTER_API_KEY}"
|
|
303
|
+
) and self.llm_config.max_daily_cost < 1.0:
|
|
304
|
+
warnings.append("Very low daily cost limit may restrict LLM usage")
|
|
305
|
+
|
|
306
|
+
return warnings
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Utility functions for qualitative analysis."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .batch_processor import BatchProcessor
|
|
5
|
-
from .metrics import PerformanceMetrics
|
|
3
|
+
from .batch_processor import BatchProcessor
|
|
6
4
|
from .cost_tracker import CostTracker
|
|
5
|
+
from .metrics import PerformanceMetrics
|
|
6
|
+
from .text_processing import TextProcessor
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"TextProcessor",
|
|
10
10
|
"BatchProcessor",
|
|
11
11
|
"PerformanceMetrics",
|
|
12
12
|
"CostTracker",
|
|
13
|
-
]
|
|
13
|
+
]
|