gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
1
  """Configuration management for GitFlow Analytics."""
2
+
2
3
  import os
3
4
  from dataclasses import dataclass, field
4
5
  from pathlib import Path
5
- from typing import Any, Dict, List, Optional
6
+ from typing import Any, Optional
6
7
 
7
8
  import yaml
8
9
  from dotenv import load_dotenv
@@ -11,214 +12,235 @@ from dotenv import load_dotenv
11
12
  @dataclass
12
13
  class RepositoryConfig:
13
14
  """Configuration for a single repository."""
15
+
14
16
  name: str
15
17
  path: Path
16
18
  github_repo: Optional[str] = None
17
19
  project_key: Optional[str] = None
18
20
  branch: Optional[str] = None
19
-
20
- def __post_init__(self):
21
+
22
+ def __post_init__(self) -> None:
21
23
  self.path = Path(self.path).expanduser().resolve()
22
24
  if not self.project_key:
23
- self.project_key = self.name.upper().replace('-', '_')
25
+ self.project_key = self.name.upper().replace("-", "_")
26
+
24
27
 
25
28
  @dataclass
26
29
  class GitHubConfig:
27
30
  """GitHub API configuration."""
31
+
28
32
  token: Optional[str] = None
29
33
  owner: Optional[str] = None
30
34
  organization: Optional[str] = None
31
35
  base_url: str = "https://api.github.com"
32
36
  max_retries: int = 3
33
37
  backoff_factor: int = 2
34
-
38
+
35
39
  def get_repo_full_name(self, repo_name: str) -> str:
36
40
  """Get full repository name including owner."""
37
- if '/' in repo_name:
41
+ if "/" in repo_name:
38
42
  return repo_name
39
43
  if self.owner:
40
44
  return f"{self.owner}/{repo_name}"
41
45
  raise ValueError(f"Repository {repo_name} needs owner specified")
42
46
 
47
+
43
48
  @dataclass
44
49
  class AnalysisConfig:
45
50
  """Analysis-specific configuration."""
46
- story_point_patterns: List[str] = field(default_factory=list)
47
- exclude_authors: List[str] = field(default_factory=list)
48
- exclude_message_patterns: List[str] = field(default_factory=list)
49
- exclude_paths: List[str] = field(default_factory=list)
51
+
52
+ story_point_patterns: list[str] = field(default_factory=list)
53
+ exclude_authors: list[str] = field(default_factory=list)
54
+ exclude_message_patterns: list[str] = field(default_factory=list)
55
+ exclude_paths: list[str] = field(default_factory=list)
50
56
  similarity_threshold: float = 0.85
51
- manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
57
+ manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
52
58
  default_ticket_platform: Optional[str] = None
53
- branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
54
- ticket_platforms: Optional[List[str]] = None
59
+ branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
60
+ ticket_platforms: Optional[list[str]] = None
61
+
55
62
 
56
63
  @dataclass
57
64
  class OutputConfig:
58
65
  """Output configuration."""
66
+
59
67
  directory: Optional[Path] = None
60
- formats: List[str] = field(default_factory=lambda: ["csv", "markdown"])
68
+ formats: list[str] = field(default_factory=lambda: ["csv", "markdown"])
61
69
  csv_delimiter: str = ","
62
70
  csv_encoding: str = "utf-8"
63
71
  anonymize_enabled: bool = False
64
- anonymize_fields: List[str] = field(default_factory=list)
72
+ anonymize_fields: list[str] = field(default_factory=list)
65
73
  anonymize_method: str = "hash"
66
74
 
75
+
67
76
  @dataclass
68
77
  class CacheConfig:
69
78
  """Cache configuration."""
79
+
70
80
  directory: Path = Path(".gitflow-cache")
71
81
  ttl_hours: int = 168
72
82
  max_size_mb: int = 500
73
83
 
84
+
74
85
  @dataclass
75
86
  class JIRAConfig:
76
87
  """JIRA configuration."""
88
+
77
89
  access_user: str
78
90
  access_token: str
79
91
  base_url: Optional[str] = None
80
92
 
93
+
81
94
  @dataclass
82
95
  class JIRAIntegrationConfig:
83
96
  """JIRA integration specific configuration."""
97
+
84
98
  enabled: bool = True
85
99
  fetch_story_points: bool = True
86
- project_keys: List[str] = field(default_factory=list)
87
- story_point_fields: List[str] = field(default_factory=lambda: [
88
- "customfield_10016",
89
- "customfield_10021",
90
- "Story Points"
91
- ])
100
+ project_keys: list[str] = field(default_factory=list)
101
+ story_point_fields: list[str] = field(
102
+ default_factory=lambda: ["customfield_10016", "customfield_10021", "Story Points"]
103
+ )
104
+
92
105
 
93
106
  @dataclass
94
107
  class Config:
95
108
  """Main configuration container."""
96
- repositories: List[RepositoryConfig]
109
+
110
+ repositories: list[RepositoryConfig]
97
111
  github: GitHubConfig
98
112
  analysis: AnalysisConfig
99
113
  output: OutputConfig
100
114
  cache: CacheConfig
101
115
  jira: Optional[JIRAConfig] = None
102
116
  jira_integration: Optional[JIRAIntegrationConfig] = None
103
-
104
- def discover_organization_repositories(self, clone_base_path: Optional[Path] = None) -> List[RepositoryConfig]:
117
+ qualitative: Optional['QualitativeConfig'] = None
118
+
119
+ def discover_organization_repositories(
120
+ self, clone_base_path: Optional[Path] = None
121
+ ) -> list[RepositoryConfig]:
105
122
  """Discover repositories from GitHub organization.
106
-
123
+
107
124
  Args:
108
125
  clone_base_path: Base directory where repos should be cloned/found.
109
126
  If None, uses output directory.
110
-
127
+
111
128
  Returns:
112
129
  List of discovered repository configurations.
113
130
  """
114
131
  if not self.github.organization or not self.github.token:
115
132
  return []
116
-
133
+
117
134
  from github import Github
118
-
135
+
119
136
  github_client = Github(self.github.token, base_url=self.github.base_url)
120
-
137
+
121
138
  try:
122
139
  org = github_client.get_organization(self.github.organization)
123
140
  discovered_repos = []
124
-
141
+
125
142
  base_path = clone_base_path or self.output.directory
126
143
  if base_path is None:
127
144
  raise ValueError("No base path available for repository cloning")
128
-
145
+
129
146
  for repo in org.get_repos():
130
147
  # Skip archived repositories
131
148
  if repo.archived:
132
149
  continue
133
-
150
+
134
151
  # Create repository configuration
135
152
  repo_path = base_path / repo.name
136
153
  repo_config = RepositoryConfig(
137
154
  name=repo.name,
138
155
  path=repo_path,
139
156
  github_repo=repo.full_name,
140
- project_key=repo.name.upper().replace('-', '_'),
141
- branch=repo.default_branch
157
+ project_key=repo.name.upper().replace("-", "_"),
158
+ branch=repo.default_branch,
142
159
  )
143
160
  discovered_repos.append(repo_config)
144
-
161
+
145
162
  return discovered_repos
146
-
163
+
147
164
  except Exception as e:
148
- raise ValueError(f"Failed to discover repositories from organization {self.github.organization}: {e}") from e
165
+ raise ValueError(
166
+ f"Failed to discover repositories from organization {self.github.organization}: {e}"
167
+ ) from e
168
+
149
169
 
150
170
  class ConfigLoader:
151
171
  """Load and validate configuration from YAML files."""
152
-
172
+
153
173
  @classmethod
154
174
  def load(cls, config_path: Path) -> Config:
155
175
  """Load configuration from YAML file."""
156
176
  # Load .env file from the same directory as the config file if it exists
157
177
  config_dir = config_path.parent
158
- env_file = config_dir / '.env'
178
+ env_file = config_dir / ".env"
159
179
  if env_file.exists():
160
180
  load_dotenv(env_file, override=True)
161
181
  print(f"📋 Loaded environment variables from {env_file}")
162
-
182
+
163
183
  with open(config_path) as f:
164
184
  data = yaml.safe_load(f)
165
-
185
+
166
186
  # Validate version
167
- version = data.get('version', '1.0')
168
- if version not in ['1.0']:
187
+ version = data.get("version", "1.0")
188
+ if version not in ["1.0"]:
169
189
  raise ValueError(f"Unsupported config version: {version}")
170
-
190
+
171
191
  # Process GitHub config
172
- github_data = data.get('github', {})
173
-
192
+ github_data = data.get("github", {})
193
+
174
194
  # Resolve GitHub token
175
- github_token = cls._resolve_env_var(github_data.get('token'))
176
- if github_data.get('token') and not github_token:
177
- raise ValueError("GitHub is configured but GITHUB_TOKEN environment variable is not set")
178
-
195
+ github_token = cls._resolve_env_var(github_data.get("token"))
196
+ if github_data.get("token") and not github_token:
197
+ raise ValueError(
198
+ "GitHub is configured but GITHUB_TOKEN environment variable is not set"
199
+ )
200
+
179
201
  github_config = GitHubConfig(
180
202
  token=github_token,
181
- owner=cls._resolve_env_var(github_data.get('owner')),
182
- organization=cls._resolve_env_var(github_data.get('organization')),
183
- base_url=github_data.get('base_url', 'https://api.github.com'),
184
- max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
185
- backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
203
+ owner=cls._resolve_env_var(github_data.get("owner")),
204
+ organization=cls._resolve_env_var(github_data.get("organization")),
205
+ base_url=github_data.get("base_url", "https://api.github.com"),
206
+ max_retries=github_data.get("rate_limit", {}).get("max_retries", 3),
207
+ backoff_factor=github_data.get("rate_limit", {}).get("backoff_factor", 2),
186
208
  )
187
-
209
+
188
210
  # Process repositories
189
211
  repositories = []
190
-
212
+
191
213
  # Handle organization-based repository discovery
192
- if github_config.organization and not data.get('repositories'):
214
+ if github_config.organization and not data.get("repositories"):
193
215
  # Organization specified but no explicit repositories - will be discovered at runtime
194
216
  pass
195
217
  else:
196
218
  # Process explicitly defined repositories
197
- for repo_data in data.get('repositories', []):
219
+ for repo_data in data.get("repositories", []):
198
220
  # Handle github_repo with owner/organization fallback
199
- github_repo = repo_data.get('github_repo')
200
- if github_repo and '/' not in github_repo:
221
+ github_repo = repo_data.get("github_repo")
222
+ if github_repo and "/" not in github_repo:
201
223
  if github_config.organization:
202
224
  github_repo = f"{github_config.organization}/{github_repo}"
203
225
  elif github_config.owner:
204
226
  github_repo = f"{github_config.owner}/{github_repo}"
205
-
227
+
206
228
  repo_config = RepositoryConfig(
207
- name=repo_data['name'],
208
- path=repo_data['path'],
229
+ name=repo_data["name"],
230
+ path=repo_data["path"],
209
231
  github_repo=github_repo,
210
- project_key=repo_data.get('project_key'),
211
- branch=repo_data.get('branch')
232
+ project_key=repo_data.get("project_key"),
233
+ branch=repo_data.get("branch"),
212
234
  )
213
235
  repositories.append(repo_config)
214
-
236
+
215
237
  # Allow empty repositories list if organization is specified
216
238
  if not repositories and not github_config.organization:
217
239
  raise ValueError("No repositories defined and no organization specified for discovery")
218
-
240
+
219
241
  # Process analysis settings
220
- analysis_data = data.get('analysis', {})
221
-
242
+ analysis_data = data.get("analysis", {})
243
+
222
244
  # Default exclude paths for common boilerplate/generated files
223
245
  default_exclude_paths = [
224
246
  "**/node_modules/**",
@@ -244,35 +266,39 @@ class ConfigLoader:
244
266
  "**/coverage/**",
245
267
  "**/.coverage/**",
246
268
  "**/htmlcov/**",
247
- "**/*.map"
269
+ "**/*.map",
248
270
  ]
249
-
271
+
250
272
  # Merge user-provided paths with defaults (user paths take precedence)
251
- user_exclude_paths = analysis_data.get('exclude', {}).get('paths', [])
273
+ user_exclude_paths = analysis_data.get("exclude", {}).get("paths", [])
252
274
  exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
253
-
275
+
254
276
  analysis_config = AnalysisConfig(
255
- story_point_patterns=analysis_data.get('story_point_patterns', [
256
- r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
257
- r"\[(\d+)\s*(?:sp|pts?)\]",
258
- r"#(\d+)sp"
259
- ]),
260
- exclude_authors=analysis_data.get('exclude', {}).get('authors', [
261
- "dependabot[bot]",
262
- "renovate[bot]"
263
- ]),
264
- exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
277
+ story_point_patterns=analysis_data.get(
278
+ "story_point_patterns",
279
+ [
280
+ r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
281
+ r"\[(\d+)\s*(?:sp|pts?)\]",
282
+ r"#(\d+)sp",
283
+ ],
284
+ ),
285
+ exclude_authors=analysis_data.get("exclude", {}).get(
286
+ "authors", ["dependabot[bot]", "renovate[bot]"]
287
+ ),
288
+ exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
265
289
  exclude_paths=exclude_paths,
266
- similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
267
- manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
268
- default_ticket_platform=analysis_data.get('default_ticket_platform'),
269
- branch_mapping_rules=analysis_data.get('branch_mapping_rules', {}),
270
- ticket_platforms=analysis_data.get('ticket_platforms')
290
+ similarity_threshold=analysis_data.get("identity", {}).get(
291
+ "similarity_threshold", 0.85
292
+ ),
293
+ manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
294
+ default_ticket_platform=analysis_data.get("default_ticket_platform"),
295
+ branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
296
+ ticket_platforms=analysis_data.get("ticket_platforms"),
271
297
  )
272
-
298
+
273
299
  # Process output settings
274
- output_data = data.get('output', {})
275
- output_dir = output_data.get('directory')
300
+ output_data = data.get("output", {})
301
+ output_dir = output_data.get("directory")
276
302
  if output_dir:
277
303
  output_dir = Path(output_dir).expanduser()
278
304
  # If relative path, make it relative to config file directory
@@ -282,66 +308,149 @@ class ConfigLoader:
282
308
  else:
283
309
  # Default to config file directory if not specified
284
310
  output_dir = config_path.parent
285
-
311
+
286
312
  output_config = OutputConfig(
287
313
  directory=output_dir,
288
- formats=output_data.get('formats', ['csv', 'markdown']),
289
- csv_delimiter=output_data.get('csv', {}).get('delimiter', ','),
290
- csv_encoding=output_data.get('csv', {}).get('encoding', 'utf-8'),
291
- anonymize_enabled=output_data.get('anonymization', {}).get('enabled', False),
292
- anonymize_fields=output_data.get('anonymization', {}).get('fields', []),
293
- anonymize_method=output_data.get('anonymization', {}).get('method', 'hash')
314
+ formats=output_data.get("formats", ["csv", "markdown"]),
315
+ csv_delimiter=output_data.get("csv", {}).get("delimiter", ","),
316
+ csv_encoding=output_data.get("csv", {}).get("encoding", "utf-8"),
317
+ anonymize_enabled=output_data.get("anonymization", {}).get("enabled", False),
318
+ anonymize_fields=output_data.get("anonymization", {}).get("fields", []),
319
+ anonymize_method=output_data.get("anonymization", {}).get("method", "hash"),
294
320
  )
295
-
321
+
296
322
  # Process cache settings
297
- cache_data = data.get('cache', {})
298
- cache_dir = cache_data.get('directory', '.gitflow-cache')
323
+ cache_data = data.get("cache", {})
324
+ cache_dir = cache_data.get("directory", ".gitflow-cache")
299
325
  cache_path = Path(cache_dir)
300
326
  # If relative path, make it relative to config file directory
301
327
  if not cache_path.is_absolute():
302
328
  cache_path = config_path.parent / cache_path
303
-
329
+
304
330
  cache_config = CacheConfig(
305
331
  directory=cache_path.resolve(),
306
- ttl_hours=cache_data.get('ttl_hours', 168),
307
- max_size_mb=cache_data.get('max_size_mb', 500)
332
+ ttl_hours=cache_data.get("ttl_hours", 168),
333
+ max_size_mb=cache_data.get("max_size_mb", 500),
308
334
  )
309
-
335
+
310
336
  # Process JIRA settings
311
337
  jira_config = None
312
- jira_data = data.get('jira', {})
338
+ jira_data = data.get("jira", {})
313
339
  if jira_data:
314
- access_user = cls._resolve_env_var(jira_data.get('access_user', ''))
315
- access_token = cls._resolve_env_var(jira_data.get('access_token', ''))
316
-
340
+ access_user = cls._resolve_env_var(jira_data.get("access_user", ""))
341
+ access_token = cls._resolve_env_var(jira_data.get("access_token", ""))
342
+
317
343
  # Validate JIRA credentials if JIRA is configured
318
- if jira_data.get('access_user') and jira_data.get('access_token'):
344
+ if jira_data.get("access_user") and jira_data.get("access_token"):
319
345
  if not access_user:
320
- raise ValueError("JIRA is configured but JIRA_ACCESS_USER environment variable is not set")
346
+ raise ValueError(
347
+ "JIRA is configured but JIRA_ACCESS_USER environment variable is not set"
348
+ )
321
349
  if not access_token:
322
- raise ValueError("JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set")
323
-
350
+ raise ValueError(
351
+ "JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set"
352
+ )
353
+
324
354
  jira_config = JIRAConfig(
325
355
  access_user=access_user,
326
356
  access_token=access_token,
327
- base_url=jira_data.get('base_url')
357
+ base_url=jira_data.get("base_url"),
328
358
  )
329
-
359
+
330
360
  # Process JIRA integration settings
331
361
  jira_integration_config = None
332
- jira_integration_data = data.get('jira_integration', {})
362
+ jira_integration_data = data.get("jira_integration", {})
333
363
  if jira_integration_data:
334
364
  jira_integration_config = JIRAIntegrationConfig(
335
- enabled=jira_integration_data.get('enabled', True),
336
- fetch_story_points=jira_integration_data.get('fetch_story_points', True),
337
- project_keys=jira_integration_data.get('project_keys', []),
338
- story_point_fields=jira_integration_data.get('story_point_fields', [
339
- "customfield_10016",
340
- "customfield_10021",
341
- "Story Points"
342
- ])
365
+ enabled=jira_integration_data.get("enabled", True),
366
+ fetch_story_points=jira_integration_data.get("fetch_story_points", True),
367
+ project_keys=jira_integration_data.get("project_keys", []),
368
+ story_point_fields=jira_integration_data.get(
369
+ "story_point_fields", ["customfield_10016", "customfield_10021", "Story Points"]
370
+ ),
343
371
  )
344
-
372
+
373
+ # Process qualitative analysis settings
374
+ qualitative_config = None
375
+ qualitative_data = data.get("qualitative", {})
376
+ if qualitative_data:
377
+ # Import here to avoid circular imports
378
+ try:
379
+ from .qualitative.models.schemas import (
380
+ QualitativeConfig, NLPConfig, LLMConfig, CacheConfig as QualitativeCacheConfig,
381
+ ChangeTypeConfig, IntentConfig, DomainConfig, RiskConfig
382
+ )
383
+
384
+ # Parse NLP configuration
385
+ nlp_data = qualitative_data.get("nlp", {})
386
+ nlp_config = NLPConfig(
387
+ spacy_model=nlp_data.get("spacy_model", "en_core_web_sm"),
388
+ spacy_batch_size=nlp_data.get("spacy_batch_size", 1000),
389
+ fast_mode=nlp_data.get("fast_mode", True),
390
+ enable_parallel_processing=nlp_data.get("enable_parallel_processing", True),
391
+ max_workers=nlp_data.get("max_workers", 4),
392
+ change_type_config=ChangeTypeConfig(**nlp_data.get("change_type", {})),
393
+ intent_config=IntentConfig(**nlp_data.get("intent", {})),
394
+ domain_config=DomainConfig(**nlp_data.get("domain", {})),
395
+ risk_config=RiskConfig(**nlp_data.get("risk", {}))
396
+ )
397
+
398
+ # Parse LLM configuration
399
+ llm_data = qualitative_data.get("llm", {})
400
+ llm_config = LLMConfig(
401
+ openrouter_api_key=cls._resolve_env_var(llm_data.get("openrouter_api_key", "${OPENROUTER_API_KEY}")),
402
+ base_url=llm_data.get("base_url", "https://openrouter.ai/api/v1"),
403
+ primary_model=llm_data.get("primary_model", "anthropic/claude-3-haiku"),
404
+ fallback_model=llm_data.get("fallback_model", "meta-llama/llama-3.1-8b-instruct:free"),
405
+ complex_model=llm_data.get("complex_model", "anthropic/claude-3-sonnet"),
406
+ complexity_threshold=llm_data.get("complexity_threshold", 0.5),
407
+ cost_threshold_per_1k=llm_data.get("cost_threshold_per_1k", 0.01),
408
+ max_tokens=llm_data.get("max_tokens", 1000),
409
+ temperature=llm_data.get("temperature", 0.1),
410
+ max_group_size=llm_data.get("max_group_size", 10),
411
+ similarity_threshold=llm_data.get("similarity_threshold", 0.8),
412
+ requests_per_minute=llm_data.get("requests_per_minute", 200),
413
+ max_retries=llm_data.get("max_retries", 3),
414
+ max_daily_cost=llm_data.get("max_daily_cost", 5.0),
415
+ enable_cost_tracking=llm_data.get("enable_cost_tracking", True)
416
+ )
417
+
418
+ # Parse cache configuration
419
+ cache_data = qualitative_data.get("cache", {})
420
+ qualitative_cache_config = QualitativeCacheConfig(
421
+ cache_dir=cache_data.get("cache_dir", ".qualitative_cache"),
422
+ semantic_cache_size=cache_data.get("semantic_cache_size", 10000),
423
+ pattern_cache_ttl_hours=cache_data.get("pattern_cache_ttl_hours", 168),
424
+ enable_pattern_learning=cache_data.get("enable_pattern_learning", True),
425
+ learning_threshold=cache_data.get("learning_threshold", 10),
426
+ confidence_boost_factor=cache_data.get("confidence_boost_factor", 0.1),
427
+ enable_compression=cache_data.get("enable_compression", True),
428
+ max_cache_size_mb=cache_data.get("max_cache_size_mb", 100)
429
+ )
430
+
431
+ # Create main qualitative configuration
432
+ qualitative_config = QualitativeConfig(
433
+ enabled=qualitative_data.get("enabled", True),
434
+ batch_size=qualitative_data.get("batch_size", 1000),
435
+ max_llm_fallback_pct=qualitative_data.get("max_llm_fallback_pct", 0.15),
436
+ confidence_threshold=qualitative_data.get("confidence_threshold", 0.7),
437
+ nlp_config=nlp_config,
438
+ llm_config=llm_config,
439
+ cache_config=qualitative_cache_config,
440
+ enable_performance_tracking=qualitative_data.get("enable_performance_tracking", True),
441
+ target_processing_time_ms=qualitative_data.get("target_processing_time_ms", 2.0),
442
+ min_overall_confidence=qualitative_data.get("min_overall_confidence", 0.6),
443
+ enable_quality_feedback=qualitative_data.get("enable_quality_feedback", True)
444
+ )
445
+
446
+ except ImportError as e:
447
+ print(f"⚠️ Qualitative analysis dependencies missing: {e}")
448
+ print(" Install with: pip install spacy scikit-learn openai tiktoken")
449
+ qualitative_config = None
450
+ except Exception as e:
451
+ print(f"⚠️ Error parsing qualitative configuration: {e}")
452
+ qualitative_config = None
453
+
345
454
  return Config(
346
455
  repositories=repositories,
347
456
  github=github_config,
@@ -349,50 +458,51 @@ class ConfigLoader:
349
458
  output=output_config,
350
459
  cache=cache_config,
351
460
  jira=jira_config,
352
- jira_integration=jira_integration_config
461
+ jira_integration=jira_integration_config,
462
+ qualitative=qualitative_config,
353
463
  )
354
-
464
+
355
465
  @staticmethod
356
466
  def _resolve_env_var(value: Optional[str]) -> Optional[str]:
357
467
  """Resolve environment variable references."""
358
468
  if not value:
359
469
  return None
360
-
361
- if value.startswith('${') and value.endswith('}'):
470
+
471
+ if value.startswith("${") and value.endswith("}"):
362
472
  env_var = value[2:-1]
363
473
  resolved = os.environ.get(env_var)
364
474
  if not resolved:
365
475
  raise ValueError(f"Environment variable {env_var} not set")
366
476
  return resolved
367
-
477
+
368
478
  return value
369
-
479
+
370
480
  @staticmethod
371
- def validate_config(config: Config) -> List[str]:
481
+ def validate_config(config: Config) -> list[str]:
372
482
  """Validate configuration and return list of warnings."""
373
483
  warnings = []
374
-
484
+
375
485
  # Check repository paths exist
376
486
  for repo in config.repositories:
377
487
  if not repo.path.exists():
378
488
  warnings.append(f"Repository path does not exist: {repo.path}")
379
- elif not (repo.path / '.git').exists():
489
+ elif not (repo.path / ".git").exists():
380
490
  warnings.append(f"Path is not a git repository: {repo.path}")
381
-
491
+
382
492
  # Check GitHub token if GitHub repos are specified
383
493
  has_github_repos = any(r.github_repo for r in config.repositories)
384
494
  if has_github_repos and not config.github.token:
385
495
  warnings.append("GitHub repositories specified but no GitHub token provided")
386
-
496
+
387
497
  # Check if owner is needed
388
498
  for repo in config.repositories:
389
- if repo.github_repo and '/' not in repo.github_repo and not config.github.owner:
499
+ if repo.github_repo and "/" not in repo.github_repo and not config.github.owner:
390
500
  warnings.append(f"Repository {repo.github_repo} needs owner specified")
391
-
501
+
392
502
  # Check cache directory permissions
393
503
  try:
394
504
  config.cache.directory.mkdir(exist_ok=True, parents=True)
395
505
  except PermissionError:
396
506
  warnings.append(f"Cannot create cache directory: {config.cache.directory}")
397
-
398
- return warnings
507
+
508
+ return warnings