gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +612 -258
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +251 -141
- gitflow_analytics/core/analyzer.py +140 -103
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +240 -169
- gitflow_analytics/core/identity.py +210 -173
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +101 -87
- gitflow_analytics/integrations/github_integration.py +84 -77
- gitflow_analytics/integrations/jira_integration.py +116 -104
- gitflow_analytics/integrations/orchestrator.py +86 -85
- gitflow_analytics/metrics/dora.py +181 -177
- gitflow_analytics/models/database.py +190 -53
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +11 -4
- gitflow_analytics/reports/csv_writer.py +51 -31
- gitflow_analytics/reports/narrative_writer.py +16 -14
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
gitflow_analytics/config.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Configuration management for GitFlow Analytics."""
|
|
2
|
+
|
|
2
3
|
import os
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Optional
|
|
6
7
|
|
|
7
8
|
import yaml
|
|
8
9
|
from dotenv import load_dotenv
|
|
@@ -11,214 +12,235 @@ from dotenv import load_dotenv
|
|
|
11
12
|
@dataclass
|
|
12
13
|
class RepositoryConfig:
|
|
13
14
|
"""Configuration for a single repository."""
|
|
15
|
+
|
|
14
16
|
name: str
|
|
15
17
|
path: Path
|
|
16
18
|
github_repo: Optional[str] = None
|
|
17
19
|
project_key: Optional[str] = None
|
|
18
20
|
branch: Optional[str] = None
|
|
19
|
-
|
|
20
|
-
def __post_init__(self):
|
|
21
|
+
|
|
22
|
+
def __post_init__(self) -> None:
|
|
21
23
|
self.path = Path(self.path).expanduser().resolve()
|
|
22
24
|
if not self.project_key:
|
|
23
|
-
self.project_key = self.name.upper().replace(
|
|
25
|
+
self.project_key = self.name.upper().replace("-", "_")
|
|
26
|
+
|
|
24
27
|
|
|
25
28
|
@dataclass
|
|
26
29
|
class GitHubConfig:
|
|
27
30
|
"""GitHub API configuration."""
|
|
31
|
+
|
|
28
32
|
token: Optional[str] = None
|
|
29
33
|
owner: Optional[str] = None
|
|
30
34
|
organization: Optional[str] = None
|
|
31
35
|
base_url: str = "https://api.github.com"
|
|
32
36
|
max_retries: int = 3
|
|
33
37
|
backoff_factor: int = 2
|
|
34
|
-
|
|
38
|
+
|
|
35
39
|
def get_repo_full_name(self, repo_name: str) -> str:
|
|
36
40
|
"""Get full repository name including owner."""
|
|
37
|
-
if
|
|
41
|
+
if "/" in repo_name:
|
|
38
42
|
return repo_name
|
|
39
43
|
if self.owner:
|
|
40
44
|
return f"{self.owner}/{repo_name}"
|
|
41
45
|
raise ValueError(f"Repository {repo_name} needs owner specified")
|
|
42
46
|
|
|
47
|
+
|
|
43
48
|
@dataclass
|
|
44
49
|
class AnalysisConfig:
|
|
45
50
|
"""Analysis-specific configuration."""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
|
|
52
|
+
story_point_patterns: list[str] = field(default_factory=list)
|
|
53
|
+
exclude_authors: list[str] = field(default_factory=list)
|
|
54
|
+
exclude_message_patterns: list[str] = field(default_factory=list)
|
|
55
|
+
exclude_paths: list[str] = field(default_factory=list)
|
|
50
56
|
similarity_threshold: float = 0.85
|
|
51
|
-
manual_identity_mappings:
|
|
57
|
+
manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
|
|
52
58
|
default_ticket_platform: Optional[str] = None
|
|
53
|
-
branch_mapping_rules:
|
|
54
|
-
ticket_platforms: Optional[
|
|
59
|
+
branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
|
|
60
|
+
ticket_platforms: Optional[list[str]] = None
|
|
61
|
+
|
|
55
62
|
|
|
56
63
|
@dataclass
|
|
57
64
|
class OutputConfig:
|
|
58
65
|
"""Output configuration."""
|
|
66
|
+
|
|
59
67
|
directory: Optional[Path] = None
|
|
60
|
-
formats:
|
|
68
|
+
formats: list[str] = field(default_factory=lambda: ["csv", "markdown"])
|
|
61
69
|
csv_delimiter: str = ","
|
|
62
70
|
csv_encoding: str = "utf-8"
|
|
63
71
|
anonymize_enabled: bool = False
|
|
64
|
-
anonymize_fields:
|
|
72
|
+
anonymize_fields: list[str] = field(default_factory=list)
|
|
65
73
|
anonymize_method: str = "hash"
|
|
66
74
|
|
|
75
|
+
|
|
67
76
|
@dataclass
|
|
68
77
|
class CacheConfig:
|
|
69
78
|
"""Cache configuration."""
|
|
79
|
+
|
|
70
80
|
directory: Path = Path(".gitflow-cache")
|
|
71
81
|
ttl_hours: int = 168
|
|
72
82
|
max_size_mb: int = 500
|
|
73
83
|
|
|
84
|
+
|
|
74
85
|
@dataclass
|
|
75
86
|
class JIRAConfig:
|
|
76
87
|
"""JIRA configuration."""
|
|
88
|
+
|
|
77
89
|
access_user: str
|
|
78
90
|
access_token: str
|
|
79
91
|
base_url: Optional[str] = None
|
|
80
92
|
|
|
93
|
+
|
|
81
94
|
@dataclass
|
|
82
95
|
class JIRAIntegrationConfig:
|
|
83
96
|
"""JIRA integration specific configuration."""
|
|
97
|
+
|
|
84
98
|
enabled: bool = True
|
|
85
99
|
fetch_story_points: bool = True
|
|
86
|
-
project_keys:
|
|
87
|
-
story_point_fields:
|
|
88
|
-
"customfield_10016",
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
])
|
|
100
|
+
project_keys: list[str] = field(default_factory=list)
|
|
101
|
+
story_point_fields: list[str] = field(
|
|
102
|
+
default_factory=lambda: ["customfield_10016", "customfield_10021", "Story Points"]
|
|
103
|
+
)
|
|
104
|
+
|
|
92
105
|
|
|
93
106
|
@dataclass
|
|
94
107
|
class Config:
|
|
95
108
|
"""Main configuration container."""
|
|
96
|
-
|
|
109
|
+
|
|
110
|
+
repositories: list[RepositoryConfig]
|
|
97
111
|
github: GitHubConfig
|
|
98
112
|
analysis: AnalysisConfig
|
|
99
113
|
output: OutputConfig
|
|
100
114
|
cache: CacheConfig
|
|
101
115
|
jira: Optional[JIRAConfig] = None
|
|
102
116
|
jira_integration: Optional[JIRAIntegrationConfig] = None
|
|
103
|
-
|
|
104
|
-
|
|
117
|
+
qualitative: Optional['QualitativeConfig'] = None
|
|
118
|
+
|
|
119
|
+
def discover_organization_repositories(
|
|
120
|
+
self, clone_base_path: Optional[Path] = None
|
|
121
|
+
) -> list[RepositoryConfig]:
|
|
105
122
|
"""Discover repositories from GitHub organization.
|
|
106
|
-
|
|
123
|
+
|
|
107
124
|
Args:
|
|
108
125
|
clone_base_path: Base directory where repos should be cloned/found.
|
|
109
126
|
If None, uses output directory.
|
|
110
|
-
|
|
127
|
+
|
|
111
128
|
Returns:
|
|
112
129
|
List of discovered repository configurations.
|
|
113
130
|
"""
|
|
114
131
|
if not self.github.organization or not self.github.token:
|
|
115
132
|
return []
|
|
116
|
-
|
|
133
|
+
|
|
117
134
|
from github import Github
|
|
118
|
-
|
|
135
|
+
|
|
119
136
|
github_client = Github(self.github.token, base_url=self.github.base_url)
|
|
120
|
-
|
|
137
|
+
|
|
121
138
|
try:
|
|
122
139
|
org = github_client.get_organization(self.github.organization)
|
|
123
140
|
discovered_repos = []
|
|
124
|
-
|
|
141
|
+
|
|
125
142
|
base_path = clone_base_path or self.output.directory
|
|
126
143
|
if base_path is None:
|
|
127
144
|
raise ValueError("No base path available for repository cloning")
|
|
128
|
-
|
|
145
|
+
|
|
129
146
|
for repo in org.get_repos():
|
|
130
147
|
# Skip archived repositories
|
|
131
148
|
if repo.archived:
|
|
132
149
|
continue
|
|
133
|
-
|
|
150
|
+
|
|
134
151
|
# Create repository configuration
|
|
135
152
|
repo_path = base_path / repo.name
|
|
136
153
|
repo_config = RepositoryConfig(
|
|
137
154
|
name=repo.name,
|
|
138
155
|
path=repo_path,
|
|
139
156
|
github_repo=repo.full_name,
|
|
140
|
-
project_key=repo.name.upper().replace(
|
|
141
|
-
branch=repo.default_branch
|
|
157
|
+
project_key=repo.name.upper().replace("-", "_"),
|
|
158
|
+
branch=repo.default_branch,
|
|
142
159
|
)
|
|
143
160
|
discovered_repos.append(repo_config)
|
|
144
|
-
|
|
161
|
+
|
|
145
162
|
return discovered_repos
|
|
146
|
-
|
|
163
|
+
|
|
147
164
|
except Exception as e:
|
|
148
|
-
raise ValueError(
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"Failed to discover repositories from organization {self.github.organization}: {e}"
|
|
167
|
+
) from e
|
|
168
|
+
|
|
149
169
|
|
|
150
170
|
class ConfigLoader:
|
|
151
171
|
"""Load and validate configuration from YAML files."""
|
|
152
|
-
|
|
172
|
+
|
|
153
173
|
@classmethod
|
|
154
174
|
def load(cls, config_path: Path) -> Config:
|
|
155
175
|
"""Load configuration from YAML file."""
|
|
156
176
|
# Load .env file from the same directory as the config file if it exists
|
|
157
177
|
config_dir = config_path.parent
|
|
158
|
-
env_file = config_dir /
|
|
178
|
+
env_file = config_dir / ".env"
|
|
159
179
|
if env_file.exists():
|
|
160
180
|
load_dotenv(env_file, override=True)
|
|
161
181
|
print(f"📋 Loaded environment variables from {env_file}")
|
|
162
|
-
|
|
182
|
+
|
|
163
183
|
with open(config_path) as f:
|
|
164
184
|
data = yaml.safe_load(f)
|
|
165
|
-
|
|
185
|
+
|
|
166
186
|
# Validate version
|
|
167
|
-
version = data.get(
|
|
168
|
-
if version not in [
|
|
187
|
+
version = data.get("version", "1.0")
|
|
188
|
+
if version not in ["1.0"]:
|
|
169
189
|
raise ValueError(f"Unsupported config version: {version}")
|
|
170
|
-
|
|
190
|
+
|
|
171
191
|
# Process GitHub config
|
|
172
|
-
github_data = data.get(
|
|
173
|
-
|
|
192
|
+
github_data = data.get("github", {})
|
|
193
|
+
|
|
174
194
|
# Resolve GitHub token
|
|
175
|
-
github_token = cls._resolve_env_var(github_data.get(
|
|
176
|
-
if github_data.get(
|
|
177
|
-
raise ValueError(
|
|
178
|
-
|
|
195
|
+
github_token = cls._resolve_env_var(github_data.get("token"))
|
|
196
|
+
if github_data.get("token") and not github_token:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
"GitHub is configured but GITHUB_TOKEN environment variable is not set"
|
|
199
|
+
)
|
|
200
|
+
|
|
179
201
|
github_config = GitHubConfig(
|
|
180
202
|
token=github_token,
|
|
181
|
-
owner=cls._resolve_env_var(github_data.get(
|
|
182
|
-
organization=cls._resolve_env_var(github_data.get(
|
|
183
|
-
base_url=github_data.get(
|
|
184
|
-
max_retries=github_data.get(
|
|
185
|
-
backoff_factor=github_data.get(
|
|
203
|
+
owner=cls._resolve_env_var(github_data.get("owner")),
|
|
204
|
+
organization=cls._resolve_env_var(github_data.get("organization")),
|
|
205
|
+
base_url=github_data.get("base_url", "https://api.github.com"),
|
|
206
|
+
max_retries=github_data.get("rate_limit", {}).get("max_retries", 3),
|
|
207
|
+
backoff_factor=github_data.get("rate_limit", {}).get("backoff_factor", 2),
|
|
186
208
|
)
|
|
187
|
-
|
|
209
|
+
|
|
188
210
|
# Process repositories
|
|
189
211
|
repositories = []
|
|
190
|
-
|
|
212
|
+
|
|
191
213
|
# Handle organization-based repository discovery
|
|
192
|
-
if github_config.organization and not data.get(
|
|
214
|
+
if github_config.organization and not data.get("repositories"):
|
|
193
215
|
# Organization specified but no explicit repositories - will be discovered at runtime
|
|
194
216
|
pass
|
|
195
217
|
else:
|
|
196
218
|
# Process explicitly defined repositories
|
|
197
|
-
for repo_data in data.get(
|
|
219
|
+
for repo_data in data.get("repositories", []):
|
|
198
220
|
# Handle github_repo with owner/organization fallback
|
|
199
|
-
github_repo = repo_data.get(
|
|
200
|
-
if github_repo and
|
|
221
|
+
github_repo = repo_data.get("github_repo")
|
|
222
|
+
if github_repo and "/" not in github_repo:
|
|
201
223
|
if github_config.organization:
|
|
202
224
|
github_repo = f"{github_config.organization}/{github_repo}"
|
|
203
225
|
elif github_config.owner:
|
|
204
226
|
github_repo = f"{github_config.owner}/{github_repo}"
|
|
205
|
-
|
|
227
|
+
|
|
206
228
|
repo_config = RepositoryConfig(
|
|
207
|
-
name=repo_data[
|
|
208
|
-
path=repo_data[
|
|
229
|
+
name=repo_data["name"],
|
|
230
|
+
path=repo_data["path"],
|
|
209
231
|
github_repo=github_repo,
|
|
210
|
-
project_key=repo_data.get(
|
|
211
|
-
branch=repo_data.get(
|
|
232
|
+
project_key=repo_data.get("project_key"),
|
|
233
|
+
branch=repo_data.get("branch"),
|
|
212
234
|
)
|
|
213
235
|
repositories.append(repo_config)
|
|
214
|
-
|
|
236
|
+
|
|
215
237
|
# Allow empty repositories list if organization is specified
|
|
216
238
|
if not repositories and not github_config.organization:
|
|
217
239
|
raise ValueError("No repositories defined and no organization specified for discovery")
|
|
218
|
-
|
|
240
|
+
|
|
219
241
|
# Process analysis settings
|
|
220
|
-
analysis_data = data.get(
|
|
221
|
-
|
|
242
|
+
analysis_data = data.get("analysis", {})
|
|
243
|
+
|
|
222
244
|
# Default exclude paths for common boilerplate/generated files
|
|
223
245
|
default_exclude_paths = [
|
|
224
246
|
"**/node_modules/**",
|
|
@@ -244,35 +266,39 @@ class ConfigLoader:
|
|
|
244
266
|
"**/coverage/**",
|
|
245
267
|
"**/.coverage/**",
|
|
246
268
|
"**/htmlcov/**",
|
|
247
|
-
"**/*.map"
|
|
269
|
+
"**/*.map",
|
|
248
270
|
]
|
|
249
|
-
|
|
271
|
+
|
|
250
272
|
# Merge user-provided paths with defaults (user paths take precedence)
|
|
251
|
-
user_exclude_paths = analysis_data.get(
|
|
273
|
+
user_exclude_paths = analysis_data.get("exclude", {}).get("paths", [])
|
|
252
274
|
exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
|
|
253
|
-
|
|
275
|
+
|
|
254
276
|
analysis_config = AnalysisConfig(
|
|
255
|
-
story_point_patterns=analysis_data.get(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
277
|
+
story_point_patterns=analysis_data.get(
|
|
278
|
+
"story_point_patterns",
|
|
279
|
+
[
|
|
280
|
+
r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
|
|
281
|
+
r"\[(\d+)\s*(?:sp|pts?)\]",
|
|
282
|
+
r"#(\d+)sp",
|
|
283
|
+
],
|
|
284
|
+
),
|
|
285
|
+
exclude_authors=analysis_data.get("exclude", {}).get(
|
|
286
|
+
"authors", ["dependabot[bot]", "renovate[bot]"]
|
|
287
|
+
),
|
|
288
|
+
exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
|
|
265
289
|
exclude_paths=exclude_paths,
|
|
266
|
-
similarity_threshold=analysis_data.get(
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
290
|
+
similarity_threshold=analysis_data.get("identity", {}).get(
|
|
291
|
+
"similarity_threshold", 0.85
|
|
292
|
+
),
|
|
293
|
+
manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
|
|
294
|
+
default_ticket_platform=analysis_data.get("default_ticket_platform"),
|
|
295
|
+
branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
|
|
296
|
+
ticket_platforms=analysis_data.get("ticket_platforms"),
|
|
271
297
|
)
|
|
272
|
-
|
|
298
|
+
|
|
273
299
|
# Process output settings
|
|
274
|
-
output_data = data.get(
|
|
275
|
-
output_dir = output_data.get(
|
|
300
|
+
output_data = data.get("output", {})
|
|
301
|
+
output_dir = output_data.get("directory")
|
|
276
302
|
if output_dir:
|
|
277
303
|
output_dir = Path(output_dir).expanduser()
|
|
278
304
|
# If relative path, make it relative to config file directory
|
|
@@ -282,66 +308,149 @@ class ConfigLoader:
|
|
|
282
308
|
else:
|
|
283
309
|
# Default to config file directory if not specified
|
|
284
310
|
output_dir = config_path.parent
|
|
285
|
-
|
|
311
|
+
|
|
286
312
|
output_config = OutputConfig(
|
|
287
313
|
directory=output_dir,
|
|
288
|
-
formats=output_data.get(
|
|
289
|
-
csv_delimiter=output_data.get(
|
|
290
|
-
csv_encoding=output_data.get(
|
|
291
|
-
anonymize_enabled=output_data.get(
|
|
292
|
-
anonymize_fields=output_data.get(
|
|
293
|
-
anonymize_method=output_data.get(
|
|
314
|
+
formats=output_data.get("formats", ["csv", "markdown"]),
|
|
315
|
+
csv_delimiter=output_data.get("csv", {}).get("delimiter", ","),
|
|
316
|
+
csv_encoding=output_data.get("csv", {}).get("encoding", "utf-8"),
|
|
317
|
+
anonymize_enabled=output_data.get("anonymization", {}).get("enabled", False),
|
|
318
|
+
anonymize_fields=output_data.get("anonymization", {}).get("fields", []),
|
|
319
|
+
anonymize_method=output_data.get("anonymization", {}).get("method", "hash"),
|
|
294
320
|
)
|
|
295
|
-
|
|
321
|
+
|
|
296
322
|
# Process cache settings
|
|
297
|
-
cache_data = data.get(
|
|
298
|
-
cache_dir = cache_data.get(
|
|
323
|
+
cache_data = data.get("cache", {})
|
|
324
|
+
cache_dir = cache_data.get("directory", ".gitflow-cache")
|
|
299
325
|
cache_path = Path(cache_dir)
|
|
300
326
|
# If relative path, make it relative to config file directory
|
|
301
327
|
if not cache_path.is_absolute():
|
|
302
328
|
cache_path = config_path.parent / cache_path
|
|
303
|
-
|
|
329
|
+
|
|
304
330
|
cache_config = CacheConfig(
|
|
305
331
|
directory=cache_path.resolve(),
|
|
306
|
-
ttl_hours=cache_data.get(
|
|
307
|
-
max_size_mb=cache_data.get(
|
|
332
|
+
ttl_hours=cache_data.get("ttl_hours", 168),
|
|
333
|
+
max_size_mb=cache_data.get("max_size_mb", 500),
|
|
308
334
|
)
|
|
309
|
-
|
|
335
|
+
|
|
310
336
|
# Process JIRA settings
|
|
311
337
|
jira_config = None
|
|
312
|
-
jira_data = data.get(
|
|
338
|
+
jira_data = data.get("jira", {})
|
|
313
339
|
if jira_data:
|
|
314
|
-
access_user = cls._resolve_env_var(jira_data.get(
|
|
315
|
-
access_token = cls._resolve_env_var(jira_data.get(
|
|
316
|
-
|
|
340
|
+
access_user = cls._resolve_env_var(jira_data.get("access_user", ""))
|
|
341
|
+
access_token = cls._resolve_env_var(jira_data.get("access_token", ""))
|
|
342
|
+
|
|
317
343
|
# Validate JIRA credentials if JIRA is configured
|
|
318
|
-
if jira_data.get(
|
|
344
|
+
if jira_data.get("access_user") and jira_data.get("access_token"):
|
|
319
345
|
if not access_user:
|
|
320
|
-
raise ValueError(
|
|
346
|
+
raise ValueError(
|
|
347
|
+
"JIRA is configured but JIRA_ACCESS_USER environment variable is not set"
|
|
348
|
+
)
|
|
321
349
|
if not access_token:
|
|
322
|
-
raise ValueError(
|
|
323
|
-
|
|
350
|
+
raise ValueError(
|
|
351
|
+
"JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set"
|
|
352
|
+
)
|
|
353
|
+
|
|
324
354
|
jira_config = JIRAConfig(
|
|
325
355
|
access_user=access_user,
|
|
326
356
|
access_token=access_token,
|
|
327
|
-
base_url=jira_data.get(
|
|
357
|
+
base_url=jira_data.get("base_url"),
|
|
328
358
|
)
|
|
329
|
-
|
|
359
|
+
|
|
330
360
|
# Process JIRA integration settings
|
|
331
361
|
jira_integration_config = None
|
|
332
|
-
jira_integration_data = data.get(
|
|
362
|
+
jira_integration_data = data.get("jira_integration", {})
|
|
333
363
|
if jira_integration_data:
|
|
334
364
|
jira_integration_config = JIRAIntegrationConfig(
|
|
335
|
-
enabled=jira_integration_data.get(
|
|
336
|
-
fetch_story_points=jira_integration_data.get(
|
|
337
|
-
project_keys=jira_integration_data.get(
|
|
338
|
-
story_point_fields=jira_integration_data.get(
|
|
339
|
-
"customfield_10016",
|
|
340
|
-
|
|
341
|
-
"Story Points"
|
|
342
|
-
])
|
|
365
|
+
enabled=jira_integration_data.get("enabled", True),
|
|
366
|
+
fetch_story_points=jira_integration_data.get("fetch_story_points", True),
|
|
367
|
+
project_keys=jira_integration_data.get("project_keys", []),
|
|
368
|
+
story_point_fields=jira_integration_data.get(
|
|
369
|
+
"story_point_fields", ["customfield_10016", "customfield_10021", "Story Points"]
|
|
370
|
+
),
|
|
343
371
|
)
|
|
344
|
-
|
|
372
|
+
|
|
373
|
+
# Process qualitative analysis settings
|
|
374
|
+
qualitative_config = None
|
|
375
|
+
qualitative_data = data.get("qualitative", {})
|
|
376
|
+
if qualitative_data:
|
|
377
|
+
# Import here to avoid circular imports
|
|
378
|
+
try:
|
|
379
|
+
from .qualitative.models.schemas import (
|
|
380
|
+
QualitativeConfig, NLPConfig, LLMConfig, CacheConfig as QualitativeCacheConfig,
|
|
381
|
+
ChangeTypeConfig, IntentConfig, DomainConfig, RiskConfig
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
# Parse NLP configuration
|
|
385
|
+
nlp_data = qualitative_data.get("nlp", {})
|
|
386
|
+
nlp_config = NLPConfig(
|
|
387
|
+
spacy_model=nlp_data.get("spacy_model", "en_core_web_sm"),
|
|
388
|
+
spacy_batch_size=nlp_data.get("spacy_batch_size", 1000),
|
|
389
|
+
fast_mode=nlp_data.get("fast_mode", True),
|
|
390
|
+
enable_parallel_processing=nlp_data.get("enable_parallel_processing", True),
|
|
391
|
+
max_workers=nlp_data.get("max_workers", 4),
|
|
392
|
+
change_type_config=ChangeTypeConfig(**nlp_data.get("change_type", {})),
|
|
393
|
+
intent_config=IntentConfig(**nlp_data.get("intent", {})),
|
|
394
|
+
domain_config=DomainConfig(**nlp_data.get("domain", {})),
|
|
395
|
+
risk_config=RiskConfig(**nlp_data.get("risk", {}))
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Parse LLM configuration
|
|
399
|
+
llm_data = qualitative_data.get("llm", {})
|
|
400
|
+
llm_config = LLMConfig(
|
|
401
|
+
openrouter_api_key=cls._resolve_env_var(llm_data.get("openrouter_api_key", "${OPENROUTER_API_KEY}")),
|
|
402
|
+
base_url=llm_data.get("base_url", "https://openrouter.ai/api/v1"),
|
|
403
|
+
primary_model=llm_data.get("primary_model", "anthropic/claude-3-haiku"),
|
|
404
|
+
fallback_model=llm_data.get("fallback_model", "meta-llama/llama-3.1-8b-instruct:free"),
|
|
405
|
+
complex_model=llm_data.get("complex_model", "anthropic/claude-3-sonnet"),
|
|
406
|
+
complexity_threshold=llm_data.get("complexity_threshold", 0.5),
|
|
407
|
+
cost_threshold_per_1k=llm_data.get("cost_threshold_per_1k", 0.01),
|
|
408
|
+
max_tokens=llm_data.get("max_tokens", 1000),
|
|
409
|
+
temperature=llm_data.get("temperature", 0.1),
|
|
410
|
+
max_group_size=llm_data.get("max_group_size", 10),
|
|
411
|
+
similarity_threshold=llm_data.get("similarity_threshold", 0.8),
|
|
412
|
+
requests_per_minute=llm_data.get("requests_per_minute", 200),
|
|
413
|
+
max_retries=llm_data.get("max_retries", 3),
|
|
414
|
+
max_daily_cost=llm_data.get("max_daily_cost", 5.0),
|
|
415
|
+
enable_cost_tracking=llm_data.get("enable_cost_tracking", True)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Parse cache configuration
|
|
419
|
+
cache_data = qualitative_data.get("cache", {})
|
|
420
|
+
qualitative_cache_config = QualitativeCacheConfig(
|
|
421
|
+
cache_dir=cache_data.get("cache_dir", ".qualitative_cache"),
|
|
422
|
+
semantic_cache_size=cache_data.get("semantic_cache_size", 10000),
|
|
423
|
+
pattern_cache_ttl_hours=cache_data.get("pattern_cache_ttl_hours", 168),
|
|
424
|
+
enable_pattern_learning=cache_data.get("enable_pattern_learning", True),
|
|
425
|
+
learning_threshold=cache_data.get("learning_threshold", 10),
|
|
426
|
+
confidence_boost_factor=cache_data.get("confidence_boost_factor", 0.1),
|
|
427
|
+
enable_compression=cache_data.get("enable_compression", True),
|
|
428
|
+
max_cache_size_mb=cache_data.get("max_cache_size_mb", 100)
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Create main qualitative configuration
|
|
432
|
+
qualitative_config = QualitativeConfig(
|
|
433
|
+
enabled=qualitative_data.get("enabled", True),
|
|
434
|
+
batch_size=qualitative_data.get("batch_size", 1000),
|
|
435
|
+
max_llm_fallback_pct=qualitative_data.get("max_llm_fallback_pct", 0.15),
|
|
436
|
+
confidence_threshold=qualitative_data.get("confidence_threshold", 0.7),
|
|
437
|
+
nlp_config=nlp_config,
|
|
438
|
+
llm_config=llm_config,
|
|
439
|
+
cache_config=qualitative_cache_config,
|
|
440
|
+
enable_performance_tracking=qualitative_data.get("enable_performance_tracking", True),
|
|
441
|
+
target_processing_time_ms=qualitative_data.get("target_processing_time_ms", 2.0),
|
|
442
|
+
min_overall_confidence=qualitative_data.get("min_overall_confidence", 0.6),
|
|
443
|
+
enable_quality_feedback=qualitative_data.get("enable_quality_feedback", True)
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
except ImportError as e:
|
|
447
|
+
print(f"⚠️ Qualitative analysis dependencies missing: {e}")
|
|
448
|
+
print(" Install with: pip install spacy scikit-learn openai tiktoken")
|
|
449
|
+
qualitative_config = None
|
|
450
|
+
except Exception as e:
|
|
451
|
+
print(f"⚠️ Error parsing qualitative configuration: {e}")
|
|
452
|
+
qualitative_config = None
|
|
453
|
+
|
|
345
454
|
return Config(
|
|
346
455
|
repositories=repositories,
|
|
347
456
|
github=github_config,
|
|
@@ -349,50 +458,51 @@ class ConfigLoader:
|
|
|
349
458
|
output=output_config,
|
|
350
459
|
cache=cache_config,
|
|
351
460
|
jira=jira_config,
|
|
352
|
-
jira_integration=jira_integration_config
|
|
461
|
+
jira_integration=jira_integration_config,
|
|
462
|
+
qualitative=qualitative_config,
|
|
353
463
|
)
|
|
354
|
-
|
|
464
|
+
|
|
355
465
|
@staticmethod
|
|
356
466
|
def _resolve_env_var(value: Optional[str]) -> Optional[str]:
|
|
357
467
|
"""Resolve environment variable references."""
|
|
358
468
|
if not value:
|
|
359
469
|
return None
|
|
360
|
-
|
|
361
|
-
if value.startswith(
|
|
470
|
+
|
|
471
|
+
if value.startswith("${") and value.endswith("}"):
|
|
362
472
|
env_var = value[2:-1]
|
|
363
473
|
resolved = os.environ.get(env_var)
|
|
364
474
|
if not resolved:
|
|
365
475
|
raise ValueError(f"Environment variable {env_var} not set")
|
|
366
476
|
return resolved
|
|
367
|
-
|
|
477
|
+
|
|
368
478
|
return value
|
|
369
|
-
|
|
479
|
+
|
|
370
480
|
@staticmethod
|
|
371
|
-
def validate_config(config: Config) ->
|
|
481
|
+
def validate_config(config: Config) -> list[str]:
|
|
372
482
|
"""Validate configuration and return list of warnings."""
|
|
373
483
|
warnings = []
|
|
374
|
-
|
|
484
|
+
|
|
375
485
|
# Check repository paths exist
|
|
376
486
|
for repo in config.repositories:
|
|
377
487
|
if not repo.path.exists():
|
|
378
488
|
warnings.append(f"Repository path does not exist: {repo.path}")
|
|
379
|
-
elif not (repo.path /
|
|
489
|
+
elif not (repo.path / ".git").exists():
|
|
380
490
|
warnings.append(f"Path is not a git repository: {repo.path}")
|
|
381
|
-
|
|
491
|
+
|
|
382
492
|
# Check GitHub token if GitHub repos are specified
|
|
383
493
|
has_github_repos = any(r.github_repo for r in config.repositories)
|
|
384
494
|
if has_github_repos and not config.github.token:
|
|
385
495
|
warnings.append("GitHub repositories specified but no GitHub token provided")
|
|
386
|
-
|
|
496
|
+
|
|
387
497
|
# Check if owner is needed
|
|
388
498
|
for repo in config.repositories:
|
|
389
|
-
if repo.github_repo and
|
|
499
|
+
if repo.github_repo and "/" not in repo.github_repo and not config.github.owner:
|
|
390
500
|
warnings.append(f"Repository {repo.github_repo} needs owner specified")
|
|
391
|
-
|
|
501
|
+
|
|
392
502
|
# Check cache directory permissions
|
|
393
503
|
try:
|
|
394
504
|
config.cache.directory.mkdir(exist_ok=True, parents=True)
|
|
395
505
|
except PermissionError:
|
|
396
506
|
warnings.append(f"Cannot create cache directory: {config.cache.directory}")
|
|
397
|
-
|
|
398
|
-
return warnings
|
|
507
|
+
|
|
508
|
+
return warnings
|