gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gitflow_analytics/__init__.py +11 -9
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +691 -243
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +389 -96
  6. gitflow_analytics/core/analyzer.py +175 -78
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +242 -173
  9. gitflow_analytics/core/identity.py +214 -178
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +111 -88
  13. gitflow_analytics/integrations/github_integration.py +91 -77
  14. gitflow_analytics/integrations/jira_integration.py +284 -0
  15. gitflow_analytics/integrations/orchestrator.py +99 -72
  16. gitflow_analytics/metrics/dora.py +183 -179
  17. gitflow_analytics/models/database.py +191 -54
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +25 -8
  37. gitflow_analytics/reports/csv_writer.py +60 -32
  38. gitflow_analytics/reports/narrative_writer.py +21 -15
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
  54. gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
  55. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  56. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  57. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  58. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -1,215 +1,508 @@
1
1
  """Configuration management for GitFlow Analytics."""
2
+
2
3
  import os
3
- import yaml
4
- from pathlib import Path
5
- from typing import Dict, Any, Optional, List
6
4
  from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ import yaml
9
+ from dotenv import load_dotenv
10
+
7
11
 
8
12
  @dataclass
9
13
  class RepositoryConfig:
10
14
  """Configuration for a single repository."""
15
+
11
16
  name: str
12
17
  path: Path
13
18
  github_repo: Optional[str] = None
14
19
  project_key: Optional[str] = None
15
20
  branch: Optional[str] = None
16
-
17
- def __post_init__(self):
21
+
22
+ def __post_init__(self) -> None:
18
23
  self.path = Path(self.path).expanduser().resolve()
19
24
  if not self.project_key:
20
- self.project_key = self.name.upper().replace('-', '_')
25
+ self.project_key = self.name.upper().replace("-", "_")
26
+
21
27
 
22
28
  @dataclass
23
29
  class GitHubConfig:
24
30
  """GitHub API configuration."""
31
+
25
32
  token: Optional[str] = None
26
33
  owner: Optional[str] = None
34
+ organization: Optional[str] = None
27
35
  base_url: str = "https://api.github.com"
28
36
  max_retries: int = 3
29
37
  backoff_factor: int = 2
30
-
38
+
31
39
  def get_repo_full_name(self, repo_name: str) -> str:
32
40
  """Get full repository name including owner."""
33
- if '/' in repo_name:
41
+ if "/" in repo_name:
34
42
  return repo_name
35
43
  if self.owner:
36
44
  return f"{self.owner}/{repo_name}"
37
45
  raise ValueError(f"Repository {repo_name} needs owner specified")
38
46
 
47
+
39
48
  @dataclass
40
49
  class AnalysisConfig:
41
50
  """Analysis-specific configuration."""
42
- story_point_patterns: List[str] = field(default_factory=list)
43
- exclude_authors: List[str] = field(default_factory=list)
44
- exclude_message_patterns: List[str] = field(default_factory=list)
51
+
52
+ story_point_patterns: list[str] = field(default_factory=list)
53
+ exclude_authors: list[str] = field(default_factory=list)
54
+ exclude_message_patterns: list[str] = field(default_factory=list)
55
+ exclude_paths: list[str] = field(default_factory=list)
45
56
  similarity_threshold: float = 0.85
46
- manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
57
+ manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
47
58
  default_ticket_platform: Optional[str] = None
48
- branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
59
+ branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
60
+ ticket_platforms: Optional[list[str]] = None
61
+
49
62
 
50
63
  @dataclass
51
64
  class OutputConfig:
52
65
  """Output configuration."""
66
+
53
67
  directory: Optional[Path] = None
54
- formats: List[str] = field(default_factory=lambda: ["csv", "markdown"])
68
+ formats: list[str] = field(default_factory=lambda: ["csv", "markdown"])
55
69
  csv_delimiter: str = ","
56
70
  csv_encoding: str = "utf-8"
57
71
  anonymize_enabled: bool = False
58
- anonymize_fields: List[str] = field(default_factory=list)
72
+ anonymize_fields: list[str] = field(default_factory=list)
59
73
  anonymize_method: str = "hash"
60
74
 
75
+
61
76
  @dataclass
62
77
  class CacheConfig:
63
78
  """Cache configuration."""
79
+
64
80
  directory: Path = Path(".gitflow-cache")
65
81
  ttl_hours: int = 168
66
82
  max_size_mb: int = 500
67
83
 
84
+
85
+ @dataclass
86
+ class JIRAConfig:
87
+ """JIRA configuration."""
88
+
89
+ access_user: str
90
+ access_token: str
91
+ base_url: Optional[str] = None
92
+
93
+
94
+ @dataclass
95
+ class JIRAIntegrationConfig:
96
+ """JIRA integration specific configuration."""
97
+
98
+ enabled: bool = True
99
+ fetch_story_points: bool = True
100
+ project_keys: list[str] = field(default_factory=list)
101
+ story_point_fields: list[str] = field(
102
+ default_factory=lambda: ["customfield_10016", "customfield_10021", "Story Points"]
103
+ )
104
+
105
+
68
106
  @dataclass
69
107
  class Config:
70
108
  """Main configuration container."""
71
- repositories: List[RepositoryConfig]
109
+
110
+ repositories: list[RepositoryConfig]
72
111
  github: GitHubConfig
73
112
  analysis: AnalysisConfig
74
113
  output: OutputConfig
75
114
  cache: CacheConfig
115
+ jira: Optional[JIRAConfig] = None
116
+ jira_integration: Optional[JIRAIntegrationConfig] = None
117
+ qualitative: Optional['QualitativeConfig'] = None
118
+
119
+ def discover_organization_repositories(
120
+ self, clone_base_path: Optional[Path] = None
121
+ ) -> list[RepositoryConfig]:
122
+ """Discover repositories from GitHub organization.
123
+
124
+ Args:
125
+ clone_base_path: Base directory where repos should be cloned/found.
126
+ If None, uses output directory.
127
+
128
+ Returns:
129
+ List of discovered repository configurations.
130
+ """
131
+ if not self.github.organization or not self.github.token:
132
+ return []
133
+
134
+ from github import Github
135
+
136
+ github_client = Github(self.github.token, base_url=self.github.base_url)
137
+
138
+ try:
139
+ org = github_client.get_organization(self.github.organization)
140
+ discovered_repos = []
141
+
142
+ base_path = clone_base_path or self.output.directory
143
+ if base_path is None:
144
+ raise ValueError("No base path available for repository cloning")
145
+
146
+ for repo in org.get_repos():
147
+ # Skip archived repositories
148
+ if repo.archived:
149
+ continue
150
+
151
+ # Create repository configuration
152
+ repo_path = base_path / repo.name
153
+ repo_config = RepositoryConfig(
154
+ name=repo.name,
155
+ path=repo_path,
156
+ github_repo=repo.full_name,
157
+ project_key=repo.name.upper().replace("-", "_"),
158
+ branch=repo.default_branch,
159
+ )
160
+ discovered_repos.append(repo_config)
161
+
162
+ return discovered_repos
163
+
164
+ except Exception as e:
165
+ raise ValueError(
166
+ f"Failed to discover repositories from organization {self.github.organization}: {e}"
167
+ ) from e
168
+
76
169
 
77
170
  class ConfigLoader:
78
171
  """Load and validate configuration from YAML files."""
79
-
80
- @staticmethod
81
- def load(config_path: Path) -> Config:
172
+
173
+ @classmethod
174
+ def load(cls, config_path: Path) -> Config:
82
175
  """Load configuration from YAML file."""
83
- with open(config_path, 'r') as f:
176
+ # Load .env file from the same directory as the config file if it exists
177
+ config_dir = config_path.parent
178
+ env_file = config_dir / ".env"
179
+ if env_file.exists():
180
+ load_dotenv(env_file, override=True)
181
+ print(f"📋 Loaded environment variables from {env_file}")
182
+
183
+ with open(config_path) as f:
84
184
  data = yaml.safe_load(f)
85
-
185
+
86
186
  # Validate version
87
- version = data.get('version', '1.0')
88
- if version not in ['1.0']:
187
+ version = data.get("version", "1.0")
188
+ if version not in ["1.0"]:
89
189
  raise ValueError(f"Unsupported config version: {version}")
90
-
190
+
91
191
  # Process GitHub config
92
- github_data = data.get('github', {})
192
+ github_data = data.get("github", {})
193
+
194
+ # Resolve GitHub token
195
+ github_token = cls._resolve_env_var(github_data.get("token"))
196
+ if github_data.get("token") and not github_token:
197
+ raise ValueError(
198
+ "GitHub is configured but GITHUB_TOKEN environment variable is not set"
199
+ )
200
+
93
201
  github_config = GitHubConfig(
94
- token=ConfigLoader._resolve_env_var(github_data.get('token')),
95
- owner=ConfigLoader._resolve_env_var(github_data.get('owner')),
96
- base_url=github_data.get('base_url', 'https://api.github.com'),
97
- max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
98
- backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
202
+ token=github_token,
203
+ owner=cls._resolve_env_var(github_data.get("owner")),
204
+ organization=cls._resolve_env_var(github_data.get("organization")),
205
+ base_url=github_data.get("base_url", "https://api.github.com"),
206
+ max_retries=github_data.get("rate_limit", {}).get("max_retries", 3),
207
+ backoff_factor=github_data.get("rate_limit", {}).get("backoff_factor", 2),
99
208
  )
100
-
209
+
101
210
  # Process repositories
102
211
  repositories = []
103
- for repo_data in data.get('repositories', []):
104
- # Handle github_repo with owner fallback
105
- github_repo = repo_data.get('github_repo')
106
- if github_repo and github_config.owner and '/' not in github_repo:
107
- github_repo = f"{github_config.owner}/{github_repo}"
108
-
109
- repo_config = RepositoryConfig(
110
- name=repo_data['name'],
111
- path=repo_data['path'],
112
- github_repo=github_repo,
113
- project_key=repo_data.get('project_key'),
114
- branch=repo_data.get('branch')
115
- )
116
- repositories.append(repo_config)
117
-
118
- if not repositories:
119
- raise ValueError("No repositories defined in configuration")
120
-
212
+
213
+ # Handle organization-based repository discovery
214
+ if github_config.organization and not data.get("repositories"):
215
+ # Organization specified but no explicit repositories - will be discovered at runtime
216
+ pass
217
+ else:
218
+ # Process explicitly defined repositories
219
+ for repo_data in data.get("repositories", []):
220
+ # Handle github_repo with owner/organization fallback
221
+ github_repo = repo_data.get("github_repo")
222
+ if github_repo and "/" not in github_repo:
223
+ if github_config.organization:
224
+ github_repo = f"{github_config.organization}/{github_repo}"
225
+ elif github_config.owner:
226
+ github_repo = f"{github_config.owner}/{github_repo}"
227
+
228
+ repo_config = RepositoryConfig(
229
+ name=repo_data["name"],
230
+ path=repo_data["path"],
231
+ github_repo=github_repo,
232
+ project_key=repo_data.get("project_key"),
233
+ branch=repo_data.get("branch"),
234
+ )
235
+ repositories.append(repo_config)
236
+
237
+ # Allow empty repositories list if organization is specified
238
+ if not repositories and not github_config.organization:
239
+ raise ValueError("No repositories defined and no organization specified for discovery")
240
+
121
241
  # Process analysis settings
122
- analysis_data = data.get('analysis', {})
242
+ analysis_data = data.get("analysis", {})
243
+
244
+ # Default exclude paths for common boilerplate/generated files
245
+ default_exclude_paths = [
246
+ "**/node_modules/**",
247
+ "**/vendor/**",
248
+ "**/dist/**",
249
+ "**/build/**",
250
+ "**/.next/**",
251
+ "**/__pycache__/**",
252
+ "**/*.min.js",
253
+ "**/*.min.css",
254
+ "**/*.bundle.js",
255
+ "**/*.bundle.css",
256
+ "**/package-lock.json",
257
+ "**/yarn.lock",
258
+ "**/poetry.lock",
259
+ "**/Pipfile.lock",
260
+ "**/composer.lock",
261
+ "**/Gemfile.lock",
262
+ "**/Cargo.lock",
263
+ "**/go.sum",
264
+ "**/*.generated.*",
265
+ "**/generated/**",
266
+ "**/coverage/**",
267
+ "**/.coverage/**",
268
+ "**/htmlcov/**",
269
+ "**/*.map",
270
+ ]
271
+
272
+ # Merge user-provided paths with defaults (user paths take precedence)
273
+ user_exclude_paths = analysis_data.get("exclude", {}).get("paths", [])
274
+ exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
275
+
123
276
  analysis_config = AnalysisConfig(
124
- story_point_patterns=analysis_data.get('story_point_patterns', [
125
- r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
126
- r"\[(\d+)\s*(?:sp|pts?)\]",
127
- r"#(\d+)sp"
128
- ]),
129
- exclude_authors=analysis_data.get('exclude', {}).get('authors', [
130
- "dependabot[bot]",
131
- "renovate[bot]"
132
- ]),
133
- exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
134
- similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
135
- manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
136
- default_ticket_platform=analysis_data.get('default_ticket_platform'),
137
- branch_mapping_rules=analysis_data.get('branch_mapping_rules', {})
277
+ story_point_patterns=analysis_data.get(
278
+ "story_point_patterns",
279
+ [
280
+ r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
281
+ r"\[(\d+)\s*(?:sp|pts?)\]",
282
+ r"#(\d+)sp",
283
+ ],
284
+ ),
285
+ exclude_authors=analysis_data.get("exclude", {}).get(
286
+ "authors", ["dependabot[bot]", "renovate[bot]"]
287
+ ),
288
+ exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
289
+ exclude_paths=exclude_paths,
290
+ similarity_threshold=analysis_data.get("identity", {}).get(
291
+ "similarity_threshold", 0.85
292
+ ),
293
+ manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
294
+ default_ticket_platform=analysis_data.get("default_ticket_platform"),
295
+ branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
296
+ ticket_platforms=analysis_data.get("ticket_platforms"),
138
297
  )
139
-
298
+
140
299
  # Process output settings
141
- output_data = data.get('output', {})
142
- output_dir = output_data.get('directory')
300
+ output_data = data.get("output", {})
301
+ output_dir = output_data.get("directory")
143
302
  if output_dir:
144
- output_dir = Path(output_dir).expanduser().resolve()
145
-
303
+ output_dir = Path(output_dir).expanduser()
304
+ # If relative path, make it relative to config file directory
305
+ if not output_dir.is_absolute():
306
+ output_dir = config_path.parent / output_dir
307
+ output_dir = output_dir.resolve()
308
+ else:
309
+ # Default to config file directory if not specified
310
+ output_dir = config_path.parent
311
+
146
312
  output_config = OutputConfig(
147
313
  directory=output_dir,
148
- formats=output_data.get('formats', ['csv', 'markdown']),
149
- csv_delimiter=output_data.get('csv', {}).get('delimiter', ','),
150
- csv_encoding=output_data.get('csv', {}).get('encoding', 'utf-8'),
151
- anonymize_enabled=output_data.get('anonymization', {}).get('enabled', False),
152
- anonymize_fields=output_data.get('anonymization', {}).get('fields', []),
153
- anonymize_method=output_data.get('anonymization', {}).get('method', 'hash')
314
+ formats=output_data.get("formats", ["csv", "markdown"]),
315
+ csv_delimiter=output_data.get("csv", {}).get("delimiter", ","),
316
+ csv_encoding=output_data.get("csv", {}).get("encoding", "utf-8"),
317
+ anonymize_enabled=output_data.get("anonymization", {}).get("enabled", False),
318
+ anonymize_fields=output_data.get("anonymization", {}).get("fields", []),
319
+ anonymize_method=output_data.get("anonymization", {}).get("method", "hash"),
154
320
  )
155
-
321
+
156
322
  # Process cache settings
157
- cache_data = data.get('cache', {})
323
+ cache_data = data.get("cache", {})
324
+ cache_dir = cache_data.get("directory", ".gitflow-cache")
325
+ cache_path = Path(cache_dir)
326
+ # If relative path, make it relative to config file directory
327
+ if not cache_path.is_absolute():
328
+ cache_path = config_path.parent / cache_path
329
+
158
330
  cache_config = CacheConfig(
159
- directory=Path(cache_data.get('directory', '.gitflow-cache')),
160
- ttl_hours=cache_data.get('ttl_hours', 168),
161
- max_size_mb=cache_data.get('max_size_mb', 500)
331
+ directory=cache_path.resolve(),
332
+ ttl_hours=cache_data.get("ttl_hours", 168),
333
+ max_size_mb=cache_data.get("max_size_mb", 500),
162
334
  )
163
-
335
+
336
+ # Process JIRA settings
337
+ jira_config = None
338
+ jira_data = data.get("jira", {})
339
+ if jira_data:
340
+ access_user = cls._resolve_env_var(jira_data.get("access_user", ""))
341
+ access_token = cls._resolve_env_var(jira_data.get("access_token", ""))
342
+
343
+ # Validate JIRA credentials if JIRA is configured
344
+ if jira_data.get("access_user") and jira_data.get("access_token"):
345
+ if not access_user:
346
+ raise ValueError(
347
+ "JIRA is configured but JIRA_ACCESS_USER environment variable is not set"
348
+ )
349
+ if not access_token:
350
+ raise ValueError(
351
+ "JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set"
352
+ )
353
+
354
+ jira_config = JIRAConfig(
355
+ access_user=access_user,
356
+ access_token=access_token,
357
+ base_url=jira_data.get("base_url"),
358
+ )
359
+
360
+ # Process JIRA integration settings
361
+ jira_integration_config = None
362
+ jira_integration_data = data.get("jira_integration", {})
363
+ if jira_integration_data:
364
+ jira_integration_config = JIRAIntegrationConfig(
365
+ enabled=jira_integration_data.get("enabled", True),
366
+ fetch_story_points=jira_integration_data.get("fetch_story_points", True),
367
+ project_keys=jira_integration_data.get("project_keys", []),
368
+ story_point_fields=jira_integration_data.get(
369
+ "story_point_fields", ["customfield_10016", "customfield_10021", "Story Points"]
370
+ ),
371
+ )
372
+
373
+ # Process qualitative analysis settings
374
+ qualitative_config = None
375
+ qualitative_data = data.get("qualitative", {})
376
+ if qualitative_data:
377
+ # Import here to avoid circular imports
378
+ try:
379
+ from .qualitative.models.schemas import (
380
+ QualitativeConfig, NLPConfig, LLMConfig, CacheConfig as QualitativeCacheConfig,
381
+ ChangeTypeConfig, IntentConfig, DomainConfig, RiskConfig
382
+ )
383
+
384
+ # Parse NLP configuration
385
+ nlp_data = qualitative_data.get("nlp", {})
386
+ nlp_config = NLPConfig(
387
+ spacy_model=nlp_data.get("spacy_model", "en_core_web_sm"),
388
+ spacy_batch_size=nlp_data.get("spacy_batch_size", 1000),
389
+ fast_mode=nlp_data.get("fast_mode", True),
390
+ enable_parallel_processing=nlp_data.get("enable_parallel_processing", True),
391
+ max_workers=nlp_data.get("max_workers", 4),
392
+ change_type_config=ChangeTypeConfig(**nlp_data.get("change_type", {})),
393
+ intent_config=IntentConfig(**nlp_data.get("intent", {})),
394
+ domain_config=DomainConfig(**nlp_data.get("domain", {})),
395
+ risk_config=RiskConfig(**nlp_data.get("risk", {}))
396
+ )
397
+
398
+ # Parse LLM configuration
399
+ llm_data = qualitative_data.get("llm", {})
400
+ llm_config = LLMConfig(
401
+ openrouter_api_key=cls._resolve_env_var(llm_data.get("openrouter_api_key", "${OPENROUTER_API_KEY}")),
402
+ base_url=llm_data.get("base_url", "https://openrouter.ai/api/v1"),
403
+ primary_model=llm_data.get("primary_model", "anthropic/claude-3-haiku"),
404
+ fallback_model=llm_data.get("fallback_model", "meta-llama/llama-3.1-8b-instruct:free"),
405
+ complex_model=llm_data.get("complex_model", "anthropic/claude-3-sonnet"),
406
+ complexity_threshold=llm_data.get("complexity_threshold", 0.5),
407
+ cost_threshold_per_1k=llm_data.get("cost_threshold_per_1k", 0.01),
408
+ max_tokens=llm_data.get("max_tokens", 1000),
409
+ temperature=llm_data.get("temperature", 0.1),
410
+ max_group_size=llm_data.get("max_group_size", 10),
411
+ similarity_threshold=llm_data.get("similarity_threshold", 0.8),
412
+ requests_per_minute=llm_data.get("requests_per_minute", 200),
413
+ max_retries=llm_data.get("max_retries", 3),
414
+ max_daily_cost=llm_data.get("max_daily_cost", 5.0),
415
+ enable_cost_tracking=llm_data.get("enable_cost_tracking", True)
416
+ )
417
+
418
+ # Parse cache configuration
419
+ cache_data = qualitative_data.get("cache", {})
420
+ qualitative_cache_config = QualitativeCacheConfig(
421
+ cache_dir=cache_data.get("cache_dir", ".qualitative_cache"),
422
+ semantic_cache_size=cache_data.get("semantic_cache_size", 10000),
423
+ pattern_cache_ttl_hours=cache_data.get("pattern_cache_ttl_hours", 168),
424
+ enable_pattern_learning=cache_data.get("enable_pattern_learning", True),
425
+ learning_threshold=cache_data.get("learning_threshold", 10),
426
+ confidence_boost_factor=cache_data.get("confidence_boost_factor", 0.1),
427
+ enable_compression=cache_data.get("enable_compression", True),
428
+ max_cache_size_mb=cache_data.get("max_cache_size_mb", 100)
429
+ )
430
+
431
+ # Create main qualitative configuration
432
+ qualitative_config = QualitativeConfig(
433
+ enabled=qualitative_data.get("enabled", True),
434
+ batch_size=qualitative_data.get("batch_size", 1000),
435
+ max_llm_fallback_pct=qualitative_data.get("max_llm_fallback_pct", 0.15),
436
+ confidence_threshold=qualitative_data.get("confidence_threshold", 0.7),
437
+ nlp_config=nlp_config,
438
+ llm_config=llm_config,
439
+ cache_config=qualitative_cache_config,
440
+ enable_performance_tracking=qualitative_data.get("enable_performance_tracking", True),
441
+ target_processing_time_ms=qualitative_data.get("target_processing_time_ms", 2.0),
442
+ min_overall_confidence=qualitative_data.get("min_overall_confidence", 0.6),
443
+ enable_quality_feedback=qualitative_data.get("enable_quality_feedback", True)
444
+ )
445
+
446
+ except ImportError as e:
447
+ print(f"⚠️ Qualitative analysis dependencies missing: {e}")
448
+ print(" Install with: pip install spacy scikit-learn openai tiktoken")
449
+ qualitative_config = None
450
+ except Exception as e:
451
+ print(f"⚠️ Error parsing qualitative configuration: {e}")
452
+ qualitative_config = None
453
+
164
454
  return Config(
165
455
  repositories=repositories,
166
456
  github=github_config,
167
457
  analysis=analysis_config,
168
458
  output=output_config,
169
- cache=cache_config
459
+ cache=cache_config,
460
+ jira=jira_config,
461
+ jira_integration=jira_integration_config,
462
+ qualitative=qualitative_config,
170
463
  )
171
-
464
+
172
465
  @staticmethod
173
466
  def _resolve_env_var(value: Optional[str]) -> Optional[str]:
174
467
  """Resolve environment variable references."""
175
468
  if not value:
176
469
  return None
177
-
178
- if value.startswith('${') and value.endswith('}'):
470
+
471
+ if value.startswith("${") and value.endswith("}"):
179
472
  env_var = value[2:-1]
180
473
  resolved = os.environ.get(env_var)
181
474
  if not resolved:
182
475
  raise ValueError(f"Environment variable {env_var} not set")
183
476
  return resolved
184
-
477
+
185
478
  return value
186
-
479
+
187
480
  @staticmethod
188
- def validate_config(config: Config) -> List[str]:
481
+ def validate_config(config: Config) -> list[str]:
189
482
  """Validate configuration and return list of warnings."""
190
483
  warnings = []
191
-
484
+
192
485
  # Check repository paths exist
193
486
  for repo in config.repositories:
194
487
  if not repo.path.exists():
195
488
  warnings.append(f"Repository path does not exist: {repo.path}")
196
- elif not (repo.path / '.git').exists():
489
+ elif not (repo.path / ".git").exists():
197
490
  warnings.append(f"Path is not a git repository: {repo.path}")
198
-
491
+
199
492
  # Check GitHub token if GitHub repos are specified
200
493
  has_github_repos = any(r.github_repo for r in config.repositories)
201
494
  if has_github_repos and not config.github.token:
202
495
  warnings.append("GitHub repositories specified but no GitHub token provided")
203
-
496
+
204
497
  # Check if owner is needed
205
498
  for repo in config.repositories:
206
- if repo.github_repo and '/' not in repo.github_repo and not config.github.owner:
499
+ if repo.github_repo and "/" not in repo.github_repo and not config.github.owner:
207
500
  warnings.append(f"Repository {repo.github_repo} needs owner specified")
208
-
501
+
209
502
  # Check cache directory permissions
210
503
  try:
211
504
  config.cache.directory.mkdir(exist_ok=True, parents=True)
212
505
  except PermissionError:
213
506
  warnings.append(f"Cannot create cache directory: {config.cache.directory}")
214
-
215
- return warnings
507
+
508
+ return warnings