gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
gitflow_analytics/config.py
CHANGED
|
@@ -1,215 +1,508 @@
|
|
|
1
1
|
"""Configuration management for GitFlow Analytics."""
|
|
2
|
+
|
|
2
3
|
import os
|
|
3
|
-
import yaml
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Dict, Any, Optional, List
|
|
6
4
|
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
7
11
|
|
|
8
12
|
@dataclass
|
|
9
13
|
class RepositoryConfig:
|
|
10
14
|
"""Configuration for a single repository."""
|
|
15
|
+
|
|
11
16
|
name: str
|
|
12
17
|
path: Path
|
|
13
18
|
github_repo: Optional[str] = None
|
|
14
19
|
project_key: Optional[str] = None
|
|
15
20
|
branch: Optional[str] = None
|
|
16
|
-
|
|
17
|
-
def __post_init__(self):
|
|
21
|
+
|
|
22
|
+
def __post_init__(self) -> None:
|
|
18
23
|
self.path = Path(self.path).expanduser().resolve()
|
|
19
24
|
if not self.project_key:
|
|
20
|
-
self.project_key = self.name.upper().replace(
|
|
25
|
+
self.project_key = self.name.upper().replace("-", "_")
|
|
26
|
+
|
|
21
27
|
|
|
22
28
|
@dataclass
|
|
23
29
|
class GitHubConfig:
|
|
24
30
|
"""GitHub API configuration."""
|
|
31
|
+
|
|
25
32
|
token: Optional[str] = None
|
|
26
33
|
owner: Optional[str] = None
|
|
34
|
+
organization: Optional[str] = None
|
|
27
35
|
base_url: str = "https://api.github.com"
|
|
28
36
|
max_retries: int = 3
|
|
29
37
|
backoff_factor: int = 2
|
|
30
|
-
|
|
38
|
+
|
|
31
39
|
def get_repo_full_name(self, repo_name: str) -> str:
|
|
32
40
|
"""Get full repository name including owner."""
|
|
33
|
-
if
|
|
41
|
+
if "/" in repo_name:
|
|
34
42
|
return repo_name
|
|
35
43
|
if self.owner:
|
|
36
44
|
return f"{self.owner}/{repo_name}"
|
|
37
45
|
raise ValueError(f"Repository {repo_name} needs owner specified")
|
|
38
46
|
|
|
47
|
+
|
|
39
48
|
@dataclass
|
|
40
49
|
class AnalysisConfig:
|
|
41
50
|
"""Analysis-specific configuration."""
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
51
|
+
|
|
52
|
+
story_point_patterns: list[str] = field(default_factory=list)
|
|
53
|
+
exclude_authors: list[str] = field(default_factory=list)
|
|
54
|
+
exclude_message_patterns: list[str] = field(default_factory=list)
|
|
55
|
+
exclude_paths: list[str] = field(default_factory=list)
|
|
45
56
|
similarity_threshold: float = 0.85
|
|
46
|
-
manual_identity_mappings:
|
|
57
|
+
manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
|
|
47
58
|
default_ticket_platform: Optional[str] = None
|
|
48
|
-
branch_mapping_rules:
|
|
59
|
+
branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
|
|
60
|
+
ticket_platforms: Optional[list[str]] = None
|
|
61
|
+
|
|
49
62
|
|
|
50
63
|
@dataclass
|
|
51
64
|
class OutputConfig:
|
|
52
65
|
"""Output configuration."""
|
|
66
|
+
|
|
53
67
|
directory: Optional[Path] = None
|
|
54
|
-
formats:
|
|
68
|
+
formats: list[str] = field(default_factory=lambda: ["csv", "markdown"])
|
|
55
69
|
csv_delimiter: str = ","
|
|
56
70
|
csv_encoding: str = "utf-8"
|
|
57
71
|
anonymize_enabled: bool = False
|
|
58
|
-
anonymize_fields:
|
|
72
|
+
anonymize_fields: list[str] = field(default_factory=list)
|
|
59
73
|
anonymize_method: str = "hash"
|
|
60
74
|
|
|
75
|
+
|
|
61
76
|
@dataclass
|
|
62
77
|
class CacheConfig:
|
|
63
78
|
"""Cache configuration."""
|
|
79
|
+
|
|
64
80
|
directory: Path = Path(".gitflow-cache")
|
|
65
81
|
ttl_hours: int = 168
|
|
66
82
|
max_size_mb: int = 500
|
|
67
83
|
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class JIRAConfig:
|
|
87
|
+
"""JIRA configuration."""
|
|
88
|
+
|
|
89
|
+
access_user: str
|
|
90
|
+
access_token: str
|
|
91
|
+
base_url: Optional[str] = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class JIRAIntegrationConfig:
|
|
96
|
+
"""JIRA integration specific configuration."""
|
|
97
|
+
|
|
98
|
+
enabled: bool = True
|
|
99
|
+
fetch_story_points: bool = True
|
|
100
|
+
project_keys: list[str] = field(default_factory=list)
|
|
101
|
+
story_point_fields: list[str] = field(
|
|
102
|
+
default_factory=lambda: ["customfield_10016", "customfield_10021", "Story Points"]
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
68
106
|
@dataclass
|
|
69
107
|
class Config:
|
|
70
108
|
"""Main configuration container."""
|
|
71
|
-
|
|
109
|
+
|
|
110
|
+
repositories: list[RepositoryConfig]
|
|
72
111
|
github: GitHubConfig
|
|
73
112
|
analysis: AnalysisConfig
|
|
74
113
|
output: OutputConfig
|
|
75
114
|
cache: CacheConfig
|
|
115
|
+
jira: Optional[JIRAConfig] = None
|
|
116
|
+
jira_integration: Optional[JIRAIntegrationConfig] = None
|
|
117
|
+
qualitative: Optional['QualitativeConfig'] = None
|
|
118
|
+
|
|
119
|
+
def discover_organization_repositories(
|
|
120
|
+
self, clone_base_path: Optional[Path] = None
|
|
121
|
+
) -> list[RepositoryConfig]:
|
|
122
|
+
"""Discover repositories from GitHub organization.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
clone_base_path: Base directory where repos should be cloned/found.
|
|
126
|
+
If None, uses output directory.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List of discovered repository configurations.
|
|
130
|
+
"""
|
|
131
|
+
if not self.github.organization or not self.github.token:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
from github import Github
|
|
135
|
+
|
|
136
|
+
github_client = Github(self.github.token, base_url=self.github.base_url)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
org = github_client.get_organization(self.github.organization)
|
|
140
|
+
discovered_repos = []
|
|
141
|
+
|
|
142
|
+
base_path = clone_base_path or self.output.directory
|
|
143
|
+
if base_path is None:
|
|
144
|
+
raise ValueError("No base path available for repository cloning")
|
|
145
|
+
|
|
146
|
+
for repo in org.get_repos():
|
|
147
|
+
# Skip archived repositories
|
|
148
|
+
if repo.archived:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Create repository configuration
|
|
152
|
+
repo_path = base_path / repo.name
|
|
153
|
+
repo_config = RepositoryConfig(
|
|
154
|
+
name=repo.name,
|
|
155
|
+
path=repo_path,
|
|
156
|
+
github_repo=repo.full_name,
|
|
157
|
+
project_key=repo.name.upper().replace("-", "_"),
|
|
158
|
+
branch=repo.default_branch,
|
|
159
|
+
)
|
|
160
|
+
discovered_repos.append(repo_config)
|
|
161
|
+
|
|
162
|
+
return discovered_repos
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"Failed to discover repositories from organization {self.github.organization}: {e}"
|
|
167
|
+
) from e
|
|
168
|
+
|
|
76
169
|
|
|
77
170
|
class ConfigLoader:
|
|
78
171
|
"""Load and validate configuration from YAML files."""
|
|
79
|
-
|
|
80
|
-
@
|
|
81
|
-
def load(config_path: Path) -> Config:
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def load(cls, config_path: Path) -> Config:
|
|
82
175
|
"""Load configuration from YAML file."""
|
|
83
|
-
|
|
176
|
+
# Load .env file from the same directory as the config file if it exists
|
|
177
|
+
config_dir = config_path.parent
|
|
178
|
+
env_file = config_dir / ".env"
|
|
179
|
+
if env_file.exists():
|
|
180
|
+
load_dotenv(env_file, override=True)
|
|
181
|
+
print(f"📋 Loaded environment variables from {env_file}")
|
|
182
|
+
|
|
183
|
+
with open(config_path) as f:
|
|
84
184
|
data = yaml.safe_load(f)
|
|
85
|
-
|
|
185
|
+
|
|
86
186
|
# Validate version
|
|
87
|
-
version = data.get(
|
|
88
|
-
if version not in [
|
|
187
|
+
version = data.get("version", "1.0")
|
|
188
|
+
if version not in ["1.0"]:
|
|
89
189
|
raise ValueError(f"Unsupported config version: {version}")
|
|
90
|
-
|
|
190
|
+
|
|
91
191
|
# Process GitHub config
|
|
92
|
-
github_data = data.get(
|
|
192
|
+
github_data = data.get("github", {})
|
|
193
|
+
|
|
194
|
+
# Resolve GitHub token
|
|
195
|
+
github_token = cls._resolve_env_var(github_data.get("token"))
|
|
196
|
+
if github_data.get("token") and not github_token:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
"GitHub is configured but GITHUB_TOKEN environment variable is not set"
|
|
199
|
+
)
|
|
200
|
+
|
|
93
201
|
github_config = GitHubConfig(
|
|
94
|
-
token=
|
|
95
|
-
owner=
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
202
|
+
token=github_token,
|
|
203
|
+
owner=cls._resolve_env_var(github_data.get("owner")),
|
|
204
|
+
organization=cls._resolve_env_var(github_data.get("organization")),
|
|
205
|
+
base_url=github_data.get("base_url", "https://api.github.com"),
|
|
206
|
+
max_retries=github_data.get("rate_limit", {}).get("max_retries", 3),
|
|
207
|
+
backoff_factor=github_data.get("rate_limit", {}).get("backoff_factor", 2),
|
|
99
208
|
)
|
|
100
|
-
|
|
209
|
+
|
|
101
210
|
# Process repositories
|
|
102
211
|
repositories = []
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
github_repo=github_repo
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
212
|
+
|
|
213
|
+
# Handle organization-based repository discovery
|
|
214
|
+
if github_config.organization and not data.get("repositories"):
|
|
215
|
+
# Organization specified but no explicit repositories - will be discovered at runtime
|
|
216
|
+
pass
|
|
217
|
+
else:
|
|
218
|
+
# Process explicitly defined repositories
|
|
219
|
+
for repo_data in data.get("repositories", []):
|
|
220
|
+
# Handle github_repo with owner/organization fallback
|
|
221
|
+
github_repo = repo_data.get("github_repo")
|
|
222
|
+
if github_repo and "/" not in github_repo:
|
|
223
|
+
if github_config.organization:
|
|
224
|
+
github_repo = f"{github_config.organization}/{github_repo}"
|
|
225
|
+
elif github_config.owner:
|
|
226
|
+
github_repo = f"{github_config.owner}/{github_repo}"
|
|
227
|
+
|
|
228
|
+
repo_config = RepositoryConfig(
|
|
229
|
+
name=repo_data["name"],
|
|
230
|
+
path=repo_data["path"],
|
|
231
|
+
github_repo=github_repo,
|
|
232
|
+
project_key=repo_data.get("project_key"),
|
|
233
|
+
branch=repo_data.get("branch"),
|
|
234
|
+
)
|
|
235
|
+
repositories.append(repo_config)
|
|
236
|
+
|
|
237
|
+
# Allow empty repositories list if organization is specified
|
|
238
|
+
if not repositories and not github_config.organization:
|
|
239
|
+
raise ValueError("No repositories defined and no organization specified for discovery")
|
|
240
|
+
|
|
121
241
|
# Process analysis settings
|
|
122
|
-
analysis_data = data.get(
|
|
242
|
+
analysis_data = data.get("analysis", {})
|
|
243
|
+
|
|
244
|
+
# Default exclude paths for common boilerplate/generated files
|
|
245
|
+
default_exclude_paths = [
|
|
246
|
+
"**/node_modules/**",
|
|
247
|
+
"**/vendor/**",
|
|
248
|
+
"**/dist/**",
|
|
249
|
+
"**/build/**",
|
|
250
|
+
"**/.next/**",
|
|
251
|
+
"**/__pycache__/**",
|
|
252
|
+
"**/*.min.js",
|
|
253
|
+
"**/*.min.css",
|
|
254
|
+
"**/*.bundle.js",
|
|
255
|
+
"**/*.bundle.css",
|
|
256
|
+
"**/package-lock.json",
|
|
257
|
+
"**/yarn.lock",
|
|
258
|
+
"**/poetry.lock",
|
|
259
|
+
"**/Pipfile.lock",
|
|
260
|
+
"**/composer.lock",
|
|
261
|
+
"**/Gemfile.lock",
|
|
262
|
+
"**/Cargo.lock",
|
|
263
|
+
"**/go.sum",
|
|
264
|
+
"**/*.generated.*",
|
|
265
|
+
"**/generated/**",
|
|
266
|
+
"**/coverage/**",
|
|
267
|
+
"**/.coverage/**",
|
|
268
|
+
"**/htmlcov/**",
|
|
269
|
+
"**/*.map",
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
# Merge user-provided paths with defaults (user paths take precedence)
|
|
273
|
+
user_exclude_paths = analysis_data.get("exclude", {}).get("paths", [])
|
|
274
|
+
exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
|
|
275
|
+
|
|
123
276
|
analysis_config = AnalysisConfig(
|
|
124
|
-
story_point_patterns=analysis_data.get(
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
277
|
+
story_point_patterns=analysis_data.get(
|
|
278
|
+
"story_point_patterns",
|
|
279
|
+
[
|
|
280
|
+
r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
|
|
281
|
+
r"\[(\d+)\s*(?:sp|pts?)\]",
|
|
282
|
+
r"#(\d+)sp",
|
|
283
|
+
],
|
|
284
|
+
),
|
|
285
|
+
exclude_authors=analysis_data.get("exclude", {}).get(
|
|
286
|
+
"authors", ["dependabot[bot]", "renovate[bot]"]
|
|
287
|
+
),
|
|
288
|
+
exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
|
|
289
|
+
exclude_paths=exclude_paths,
|
|
290
|
+
similarity_threshold=analysis_data.get("identity", {}).get(
|
|
291
|
+
"similarity_threshold", 0.85
|
|
292
|
+
),
|
|
293
|
+
manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
|
|
294
|
+
default_ticket_platform=analysis_data.get("default_ticket_platform"),
|
|
295
|
+
branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
|
|
296
|
+
ticket_platforms=analysis_data.get("ticket_platforms"),
|
|
138
297
|
)
|
|
139
|
-
|
|
298
|
+
|
|
140
299
|
# Process output settings
|
|
141
|
-
output_data = data.get(
|
|
142
|
-
output_dir = output_data.get(
|
|
300
|
+
output_data = data.get("output", {})
|
|
301
|
+
output_dir = output_data.get("directory")
|
|
143
302
|
if output_dir:
|
|
144
|
-
output_dir = Path(output_dir).expanduser()
|
|
145
|
-
|
|
303
|
+
output_dir = Path(output_dir).expanduser()
|
|
304
|
+
# If relative path, make it relative to config file directory
|
|
305
|
+
if not output_dir.is_absolute():
|
|
306
|
+
output_dir = config_path.parent / output_dir
|
|
307
|
+
output_dir = output_dir.resolve()
|
|
308
|
+
else:
|
|
309
|
+
# Default to config file directory if not specified
|
|
310
|
+
output_dir = config_path.parent
|
|
311
|
+
|
|
146
312
|
output_config = OutputConfig(
|
|
147
313
|
directory=output_dir,
|
|
148
|
-
formats=output_data.get(
|
|
149
|
-
csv_delimiter=output_data.get(
|
|
150
|
-
csv_encoding=output_data.get(
|
|
151
|
-
anonymize_enabled=output_data.get(
|
|
152
|
-
anonymize_fields=output_data.get(
|
|
153
|
-
anonymize_method=output_data.get(
|
|
314
|
+
formats=output_data.get("formats", ["csv", "markdown"]),
|
|
315
|
+
csv_delimiter=output_data.get("csv", {}).get("delimiter", ","),
|
|
316
|
+
csv_encoding=output_data.get("csv", {}).get("encoding", "utf-8"),
|
|
317
|
+
anonymize_enabled=output_data.get("anonymization", {}).get("enabled", False),
|
|
318
|
+
anonymize_fields=output_data.get("anonymization", {}).get("fields", []),
|
|
319
|
+
anonymize_method=output_data.get("anonymization", {}).get("method", "hash"),
|
|
154
320
|
)
|
|
155
|
-
|
|
321
|
+
|
|
156
322
|
# Process cache settings
|
|
157
|
-
cache_data = data.get(
|
|
323
|
+
cache_data = data.get("cache", {})
|
|
324
|
+
cache_dir = cache_data.get("directory", ".gitflow-cache")
|
|
325
|
+
cache_path = Path(cache_dir)
|
|
326
|
+
# If relative path, make it relative to config file directory
|
|
327
|
+
if not cache_path.is_absolute():
|
|
328
|
+
cache_path = config_path.parent / cache_path
|
|
329
|
+
|
|
158
330
|
cache_config = CacheConfig(
|
|
159
|
-
directory=
|
|
160
|
-
ttl_hours=cache_data.get(
|
|
161
|
-
max_size_mb=cache_data.get(
|
|
331
|
+
directory=cache_path.resolve(),
|
|
332
|
+
ttl_hours=cache_data.get("ttl_hours", 168),
|
|
333
|
+
max_size_mb=cache_data.get("max_size_mb", 500),
|
|
162
334
|
)
|
|
163
|
-
|
|
335
|
+
|
|
336
|
+
# Process JIRA settings
|
|
337
|
+
jira_config = None
|
|
338
|
+
jira_data = data.get("jira", {})
|
|
339
|
+
if jira_data:
|
|
340
|
+
access_user = cls._resolve_env_var(jira_data.get("access_user", ""))
|
|
341
|
+
access_token = cls._resolve_env_var(jira_data.get("access_token", ""))
|
|
342
|
+
|
|
343
|
+
# Validate JIRA credentials if JIRA is configured
|
|
344
|
+
if jira_data.get("access_user") and jira_data.get("access_token"):
|
|
345
|
+
if not access_user:
|
|
346
|
+
raise ValueError(
|
|
347
|
+
"JIRA is configured but JIRA_ACCESS_USER environment variable is not set"
|
|
348
|
+
)
|
|
349
|
+
if not access_token:
|
|
350
|
+
raise ValueError(
|
|
351
|
+
"JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set"
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
jira_config = JIRAConfig(
|
|
355
|
+
access_user=access_user,
|
|
356
|
+
access_token=access_token,
|
|
357
|
+
base_url=jira_data.get("base_url"),
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Process JIRA integration settings
|
|
361
|
+
jira_integration_config = None
|
|
362
|
+
jira_integration_data = data.get("jira_integration", {})
|
|
363
|
+
if jira_integration_data:
|
|
364
|
+
jira_integration_config = JIRAIntegrationConfig(
|
|
365
|
+
enabled=jira_integration_data.get("enabled", True),
|
|
366
|
+
fetch_story_points=jira_integration_data.get("fetch_story_points", True),
|
|
367
|
+
project_keys=jira_integration_data.get("project_keys", []),
|
|
368
|
+
story_point_fields=jira_integration_data.get(
|
|
369
|
+
"story_point_fields", ["customfield_10016", "customfield_10021", "Story Points"]
|
|
370
|
+
),
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Process qualitative analysis settings
|
|
374
|
+
qualitative_config = None
|
|
375
|
+
qualitative_data = data.get("qualitative", {})
|
|
376
|
+
if qualitative_data:
|
|
377
|
+
# Import here to avoid circular imports
|
|
378
|
+
try:
|
|
379
|
+
from .qualitative.models.schemas import (
|
|
380
|
+
QualitativeConfig, NLPConfig, LLMConfig, CacheConfig as QualitativeCacheConfig,
|
|
381
|
+
ChangeTypeConfig, IntentConfig, DomainConfig, RiskConfig
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
# Parse NLP configuration
|
|
385
|
+
nlp_data = qualitative_data.get("nlp", {})
|
|
386
|
+
nlp_config = NLPConfig(
|
|
387
|
+
spacy_model=nlp_data.get("spacy_model", "en_core_web_sm"),
|
|
388
|
+
spacy_batch_size=nlp_data.get("spacy_batch_size", 1000),
|
|
389
|
+
fast_mode=nlp_data.get("fast_mode", True),
|
|
390
|
+
enable_parallel_processing=nlp_data.get("enable_parallel_processing", True),
|
|
391
|
+
max_workers=nlp_data.get("max_workers", 4),
|
|
392
|
+
change_type_config=ChangeTypeConfig(**nlp_data.get("change_type", {})),
|
|
393
|
+
intent_config=IntentConfig(**nlp_data.get("intent", {})),
|
|
394
|
+
domain_config=DomainConfig(**nlp_data.get("domain", {})),
|
|
395
|
+
risk_config=RiskConfig(**nlp_data.get("risk", {}))
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Parse LLM configuration
|
|
399
|
+
llm_data = qualitative_data.get("llm", {})
|
|
400
|
+
llm_config = LLMConfig(
|
|
401
|
+
openrouter_api_key=cls._resolve_env_var(llm_data.get("openrouter_api_key", "${OPENROUTER_API_KEY}")),
|
|
402
|
+
base_url=llm_data.get("base_url", "https://openrouter.ai/api/v1"),
|
|
403
|
+
primary_model=llm_data.get("primary_model", "anthropic/claude-3-haiku"),
|
|
404
|
+
fallback_model=llm_data.get("fallback_model", "meta-llama/llama-3.1-8b-instruct:free"),
|
|
405
|
+
complex_model=llm_data.get("complex_model", "anthropic/claude-3-sonnet"),
|
|
406
|
+
complexity_threshold=llm_data.get("complexity_threshold", 0.5),
|
|
407
|
+
cost_threshold_per_1k=llm_data.get("cost_threshold_per_1k", 0.01),
|
|
408
|
+
max_tokens=llm_data.get("max_tokens", 1000),
|
|
409
|
+
temperature=llm_data.get("temperature", 0.1),
|
|
410
|
+
max_group_size=llm_data.get("max_group_size", 10),
|
|
411
|
+
similarity_threshold=llm_data.get("similarity_threshold", 0.8),
|
|
412
|
+
requests_per_minute=llm_data.get("requests_per_minute", 200),
|
|
413
|
+
max_retries=llm_data.get("max_retries", 3),
|
|
414
|
+
max_daily_cost=llm_data.get("max_daily_cost", 5.0),
|
|
415
|
+
enable_cost_tracking=llm_data.get("enable_cost_tracking", True)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Parse cache configuration
|
|
419
|
+
cache_data = qualitative_data.get("cache", {})
|
|
420
|
+
qualitative_cache_config = QualitativeCacheConfig(
|
|
421
|
+
cache_dir=cache_data.get("cache_dir", ".qualitative_cache"),
|
|
422
|
+
semantic_cache_size=cache_data.get("semantic_cache_size", 10000),
|
|
423
|
+
pattern_cache_ttl_hours=cache_data.get("pattern_cache_ttl_hours", 168),
|
|
424
|
+
enable_pattern_learning=cache_data.get("enable_pattern_learning", True),
|
|
425
|
+
learning_threshold=cache_data.get("learning_threshold", 10),
|
|
426
|
+
confidence_boost_factor=cache_data.get("confidence_boost_factor", 0.1),
|
|
427
|
+
enable_compression=cache_data.get("enable_compression", True),
|
|
428
|
+
max_cache_size_mb=cache_data.get("max_cache_size_mb", 100)
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Create main qualitative configuration
|
|
432
|
+
qualitative_config = QualitativeConfig(
|
|
433
|
+
enabled=qualitative_data.get("enabled", True),
|
|
434
|
+
batch_size=qualitative_data.get("batch_size", 1000),
|
|
435
|
+
max_llm_fallback_pct=qualitative_data.get("max_llm_fallback_pct", 0.15),
|
|
436
|
+
confidence_threshold=qualitative_data.get("confidence_threshold", 0.7),
|
|
437
|
+
nlp_config=nlp_config,
|
|
438
|
+
llm_config=llm_config,
|
|
439
|
+
cache_config=qualitative_cache_config,
|
|
440
|
+
enable_performance_tracking=qualitative_data.get("enable_performance_tracking", True),
|
|
441
|
+
target_processing_time_ms=qualitative_data.get("target_processing_time_ms", 2.0),
|
|
442
|
+
min_overall_confidence=qualitative_data.get("min_overall_confidence", 0.6),
|
|
443
|
+
enable_quality_feedback=qualitative_data.get("enable_quality_feedback", True)
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
except ImportError as e:
|
|
447
|
+
print(f"⚠️ Qualitative analysis dependencies missing: {e}")
|
|
448
|
+
print(" Install with: pip install spacy scikit-learn openai tiktoken")
|
|
449
|
+
qualitative_config = None
|
|
450
|
+
except Exception as e:
|
|
451
|
+
print(f"⚠️ Error parsing qualitative configuration: {e}")
|
|
452
|
+
qualitative_config = None
|
|
453
|
+
|
|
164
454
|
return Config(
|
|
165
455
|
repositories=repositories,
|
|
166
456
|
github=github_config,
|
|
167
457
|
analysis=analysis_config,
|
|
168
458
|
output=output_config,
|
|
169
|
-
cache=cache_config
|
|
459
|
+
cache=cache_config,
|
|
460
|
+
jira=jira_config,
|
|
461
|
+
jira_integration=jira_integration_config,
|
|
462
|
+
qualitative=qualitative_config,
|
|
170
463
|
)
|
|
171
|
-
|
|
464
|
+
|
|
172
465
|
@staticmethod
|
|
173
466
|
def _resolve_env_var(value: Optional[str]) -> Optional[str]:
|
|
174
467
|
"""Resolve environment variable references."""
|
|
175
468
|
if not value:
|
|
176
469
|
return None
|
|
177
|
-
|
|
178
|
-
if value.startswith(
|
|
470
|
+
|
|
471
|
+
if value.startswith("${") and value.endswith("}"):
|
|
179
472
|
env_var = value[2:-1]
|
|
180
473
|
resolved = os.environ.get(env_var)
|
|
181
474
|
if not resolved:
|
|
182
475
|
raise ValueError(f"Environment variable {env_var} not set")
|
|
183
476
|
return resolved
|
|
184
|
-
|
|
477
|
+
|
|
185
478
|
return value
|
|
186
|
-
|
|
479
|
+
|
|
187
480
|
@staticmethod
|
|
188
|
-
def validate_config(config: Config) ->
|
|
481
|
+
def validate_config(config: Config) -> list[str]:
|
|
189
482
|
"""Validate configuration and return list of warnings."""
|
|
190
483
|
warnings = []
|
|
191
|
-
|
|
484
|
+
|
|
192
485
|
# Check repository paths exist
|
|
193
486
|
for repo in config.repositories:
|
|
194
487
|
if not repo.path.exists():
|
|
195
488
|
warnings.append(f"Repository path does not exist: {repo.path}")
|
|
196
|
-
elif not (repo.path /
|
|
489
|
+
elif not (repo.path / ".git").exists():
|
|
197
490
|
warnings.append(f"Path is not a git repository: {repo.path}")
|
|
198
|
-
|
|
491
|
+
|
|
199
492
|
# Check GitHub token if GitHub repos are specified
|
|
200
493
|
has_github_repos = any(r.github_repo for r in config.repositories)
|
|
201
494
|
if has_github_repos and not config.github.token:
|
|
202
495
|
warnings.append("GitHub repositories specified but no GitHub token provided")
|
|
203
|
-
|
|
496
|
+
|
|
204
497
|
# Check if owner is needed
|
|
205
498
|
for repo in config.repositories:
|
|
206
|
-
if repo.github_repo and
|
|
499
|
+
if repo.github_repo and "/" not in repo.github_repo and not config.github.owner:
|
|
207
500
|
warnings.append(f"Repository {repo.github_repo} needs owner specified")
|
|
208
|
-
|
|
501
|
+
|
|
209
502
|
# Check cache directory permissions
|
|
210
503
|
try:
|
|
211
504
|
config.cache.directory.mkdir(exist_ok=True, parents=True)
|
|
212
505
|
except PermissionError:
|
|
213
506
|
warnings.append(f"Cannot create cache directory: {config.cache.directory}")
|
|
214
|
-
|
|
215
|
-
return warnings
|
|
507
|
+
|
|
508
|
+
return warnings
|