gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,904 @@
1
+ """YAML configuration loading and environment variable expansion."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any, Optional, Union
6
+
7
+ import yaml
8
+ from dotenv import load_dotenv
9
+
10
+ from .errors import (
11
+ ConfigurationError,
12
+ EnvironmentVariableError,
13
+ InvalidValueError,
14
+ handle_yaml_error,
15
+ )
16
+ from .profiles import ProfileManager
17
+ from .repository import RepositoryManager
18
+ from .schema import (
19
+ AnalysisConfig,
20
+ BranchAnalysisConfig,
21
+ CacheConfig,
22
+ CommitClassificationConfig,
23
+ Config,
24
+ GitHubConfig,
25
+ JIRAConfig,
26
+ JIRAIntegrationConfig,
27
+ LLMClassificationConfig,
28
+ MLCategorization,
29
+ OutputConfig,
30
+ PMIntegrationConfig,
31
+ PMPlatformConfig,
32
+ RepositoryConfig,
33
+ )
34
+ from .validator import ConfigValidator
35
+
36
+
37
+ class ConfigLoader:
38
+ """Load and validate configuration from YAML files."""
39
+
40
+ # Default exclude paths for common boilerplate/generated files
41
+ DEFAULT_EXCLUDE_PATHS = [
42
+ "**/node_modules/**",
43
+ "**/vendor/**",
44
+ "**/dist/**",
45
+ "**/build/**",
46
+ "**/.next/**",
47
+ "**/__pycache__/**",
48
+ "**/*.min.js",
49
+ "**/*.min.css",
50
+ "**/*.bundle.js",
51
+ "**/*.bundle.css",
52
+ "**/package-lock.json",
53
+ "**/yarn.lock",
54
+ "**/poetry.lock",
55
+ "**/Pipfile.lock",
56
+ "**/composer.lock",
57
+ "**/Gemfile.lock",
58
+ "**/Cargo.lock",
59
+ "**/go.sum",
60
+ "**/*.generated.*",
61
+ "**/generated/**",
62
+ "**/coverage/**",
63
+ "**/.coverage/**",
64
+ "**/htmlcov/**",
65
+ "**/*.map",
66
+ # Additional framework/boilerplate patterns
67
+ "**/public/assets/**",
68
+ "**/public/css/**",
69
+ "**/public/js/**",
70
+ "**/public/fonts/**",
71
+ "**/public/build/**",
72
+ "**/storage/framework/**",
73
+ "**/bootstrap/cache/**",
74
+ "**/.nuxt/**",
75
+ "**/.cache/**",
76
+ "**/cache/**",
77
+ "**/*.lock",
78
+ "**/*.log",
79
+ "**/logs/**",
80
+ "**/tmp/**",
81
+ "**/temp/**",
82
+ "**/.sass-cache/**",
83
+ "**/bower_components/**",
84
+ # Database migrations and seeds (often auto-generated)
85
+ "**/migrations/*.php",
86
+ "**/database/migrations/**",
87
+ "**/db/migrate/**",
88
+ # Compiled assets
89
+ "**/public/mix-manifest.json",
90
+ "**/public/hot",
91
+ "**/*.map.js",
92
+ "**/webpack.mix.js",
93
+ # IDE and OS files
94
+ "**/.idea/**",
95
+ "**/.vscode/**",
96
+ "**/.DS_Store",
97
+ "**/Thumbs.db",
98
+ # Generated documentation (but not source docs)
99
+ "**/docs/build/**",
100
+ "**/docs/_build/**",
101
+ "**/documentation/build/**",
102
+ "**/site/**", # For mkdocs generated sites
103
+ # Test coverage
104
+ "**/test-results/**",
105
+ "**/.nyc_output/**",
106
+ # Framework-specific
107
+ "**/artisan",
108
+ "**/spark",
109
+ "**/.env",
110
+ "**/.env.*",
111
+ "**/storage/logs/**",
112
+ "**/storage/debugbar/**",
113
+ # CMS-specific patterns
114
+ "**/wp-content/uploads/**",
115
+ "**/wp-content/cache/**",
116
+ "**/uploads/**",
117
+ "**/media/**",
118
+ "**/static/**",
119
+ "**/staticfiles/**",
120
+ # More aggressive filtering for generated content
121
+ "**/*.sql",
122
+ "**/*.dump",
123
+ "**/backups/**",
124
+ "**/backup/**",
125
+ "**/*.bak",
126
+ # Compiled/concatenated files (only in build/dist directories)
127
+ "**/dist/**/all.js",
128
+ "**/dist/**/all.css",
129
+ "**/build/**/all.js",
130
+ "**/build/**/all.css",
131
+ "**/public/**/app.js",
132
+ "**/public/**/app.css",
133
+ "**/dist/**/app.js",
134
+ "**/dist/**/app.css",
135
+ "**/build/**/app.js",
136
+ "**/build/**/app.css",
137
+ "**/public/**/main.js",
138
+ "**/public/**/main.css",
139
+ "**/dist/**/main.js",
140
+ "**/dist/**/main.css",
141
+ "**/build/**/main.js",
142
+ "**/build/**/main.css",
143
+ "**/bundle.*",
144
+ "**/chunk.*",
145
+ "**/*-chunk-*",
146
+ "**/*.chunk.*",
147
+ # Framework scaffolding
148
+ "**/scaffolding/**",
149
+ "**/stubs/**",
150
+ "**/templates/**",
151
+ "**/views/vendor/**",
152
+ "**/resources/views/vendor/**",
153
+ # Package managers
154
+ "**/packages/**",
155
+ "**/node_modules/**",
156
+ "**/.pnpm/**",
157
+ "**/.yarn/**",
158
+ # Build artifacts
159
+ "**/out/**",
160
+ "**/output/**",
161
+ "**/.parcel-cache/**",
162
+ "**/parcel-cache/**",
163
+ # Large data files (only in specific directories)
164
+ "**/data/*.csv",
165
+ "**/data/*.json",
166
+ "**/fixtures/*.json",
167
+ "**/seeds/*.json",
168
+ "**/*.geojson",
169
+ "**/package.json.bak",
170
+ "**/composer.json.bak",
171
+ # Exclude large framework upgrades
172
+ "**/upgrade/**",
173
+ "**/upgrades/**",
174
+ # Common CMS patterns (specific to avoid excluding legitimate source)
175
+ "**/wordpress/wp-core/**",
176
+ "**/drupal/core/**",
177
+ "**/joomla/libraries/cms/**",
178
+ "**/modules/**/tests/**",
179
+ "**/plugins/**/vendor/**",
180
+ "**/themes/**/vendor/**",
181
+ "**/themes/**/node_modules/**",
182
+ # Framework-specific third-party directories (not generic lib/libs)
183
+ "**/vendor/**",
184
+ "**/vendors/**",
185
+ "**/bower_components/**",
186
+ # Only exclude specific known third-party package directories
187
+ "**/third-party/packages/**",
188
+ "**/third_party/packages/**",
189
+ "**/external/vendor/**",
190
+ "**/external/packages/**",
191
+ # Generated assets
192
+ "**/*.min.js",
193
+ "**/*.min.css",
194
+ "**/dist/**",
195
+ "**/build/**",
196
+ "**/compiled/**",
197
+ # Package lock files
198
+ "**/composer.lock",
199
+ "**/package-lock.json",
200
+ "**/yarn.lock",
201
+ "**/pnpm-lock.yaml",
202
+ # Documentation/assets
203
+ "**/*.pdf",
204
+ "**/*.doc",
205
+ "**/*.docx",
206
+ "**/fonts/**",
207
+ "**/font/**",
208
+ # Database/migrations (auto-generated files)
209
+ "**/migrations/*.php",
210
+ "**/database/migrations/**",
211
+ ]
212
+
213
+ @classmethod
214
+ def load(cls, config_path: Union[Path, str]) -> Config:
215
+ """Load configuration from YAML file.
216
+
217
+ Args:
218
+ config_path: Path to the configuration file
219
+
220
+ Returns:
221
+ Loaded and validated configuration
222
+
223
+ Raises:
224
+ ConfigurationError: If configuration is invalid
225
+ YAMLParseError: If YAML parsing fails
226
+ """
227
+ # Ensure config_path is a Path object
228
+ config_path = Path(config_path)
229
+
230
+ # Load environment variables
231
+ cls._load_environment(config_path)
232
+
233
+ # Load and parse YAML
234
+ data = cls._load_yaml(config_path)
235
+
236
+ # Check for configuration profile
237
+ if "profile" in data:
238
+ data = ProfileManager.apply_profile(data, data["profile"])
239
+
240
+ # Check for base configuration extension
241
+ if "extends" in data:
242
+ base_data = cls._load_base_config(data["extends"], config_path)
243
+ data = ProfileManager._deep_merge(base_data, data)
244
+
245
+ # Validate version
246
+ cls._validate_version(data)
247
+
248
+ # Process configuration sections
249
+ github_config = cls._process_github_config(data.get("github", {}), config_path)
250
+ repositories = cls._process_repositories(data, github_config, config_path)
251
+ analysis_config = cls._process_analysis_config(data.get("analysis", {}), config_path)
252
+ output_config = cls._process_output_config(data.get("output", {}), config_path)
253
+ cache_config = cls._process_cache_config(data.get("cache", {}), config_path)
254
+ jira_config = cls._process_jira_config(data.get("jira", {}), config_path)
255
+ jira_integration_config = cls._process_jira_integration_config(
256
+ data.get("jira_integration", {})
257
+ )
258
+ qualitative_config = cls._process_qualitative_config(data.get("qualitative", {}))
259
+ pm_config = cls._process_pm_config(data.get("pm", {}))
260
+ pm_integration_config = cls._process_pm_integration_config(data.get("pm_integration", {}))
261
+
262
+ # Create configuration object
263
+ config = Config(
264
+ repositories=repositories,
265
+ github=github_config,
266
+ analysis=analysis_config,
267
+ output=output_config,
268
+ cache=cache_config,
269
+ jira=jira_config,
270
+ jira_integration=jira_integration_config,
271
+ pm=pm_config,
272
+ pm_integration=pm_integration_config,
273
+ qualitative=qualitative_config,
274
+ )
275
+
276
+ # Validate configuration
277
+ warnings = ConfigValidator.validate_config(config)
278
+ if warnings:
279
+ for warning in warnings:
280
+ print(f"⚠️ {warning}")
281
+
282
+ return config
283
+
284
+ @classmethod
285
+ def _load_environment(cls, config_path: Path) -> None:
286
+ """Load environment variables from .env file if present.
287
+
288
+ Args:
289
+ config_path: Path to configuration file
290
+ """
291
+ config_dir = config_path.parent
292
+ env_file = config_dir / ".env"
293
+ if env_file.exists():
294
+ load_dotenv(env_file, override=True)
295
+ print(f"📋 Loaded environment variables from {env_file}")
296
+
297
+ @classmethod
298
+ def _load_yaml(cls, config_path: Path) -> dict[str, Any]:
299
+ """Load and parse YAML file.
300
+
301
+ Args:
302
+ config_path: Path to YAML file
303
+
304
+ Returns:
305
+ Parsed YAML data
306
+
307
+ Raises:
308
+ YAMLParseError: If YAML parsing fails
309
+ ConfigurationError: If file is invalid
310
+ """
311
+ try:
312
+ with open(config_path) as f:
313
+ data = yaml.safe_load(f)
314
+ except yaml.YAMLError as e:
315
+ handle_yaml_error(e, config_path)
316
+ except FileNotFoundError as e:
317
+ raise ConfigurationError(
318
+ f"Configuration file not found: {config_path}", config_path
319
+ ) from e
320
+ except PermissionError as e:
321
+ raise ConfigurationError(
322
+ f"Permission denied reading configuration file: {config_path}", config_path
323
+ ) from e
324
+ except Exception as e:
325
+ raise ConfigurationError(f"Failed to read configuration file: {e}", config_path) from e
326
+
327
+ # Handle empty or null YAML files
328
+ if data is None:
329
+ raise ConfigurationError(
330
+ "Configuration file is empty or contains only null values",
331
+ config_path,
332
+ suggestion=(
333
+ "Add proper YAML configuration content to the file.\n"
334
+ " Example minimal configuration:\n"
335
+ " ```yaml\n"
336
+ ' version: "1.0"\n'
337
+ " github:\n"
338
+ ' token: "${GITHUB_TOKEN}"\n'
339
+ ' owner: "your-username"\n'
340
+ " repositories:\n"
341
+ ' - name: "your-repo"\n'
342
+ ' path: "/path/to/repo"\n'
343
+ " ```"
344
+ ),
345
+ )
346
+
347
+ # Validate that data is a dictionary
348
+ if not isinstance(data, dict):
349
+ raise InvalidValueError(
350
+ "root",
351
+ type(data).__name__,
352
+ "Configuration file must contain a YAML object (key-value pairs)",
353
+ config_path,
354
+ )
355
+
356
+ return data
357
+
358
+ @classmethod
359
+ def _load_base_config(cls, base_path: str, config_path: Path) -> dict[str, Any]:
360
+ """Load base configuration to extend from.
361
+
362
+ Args:
363
+ base_path: Path to base configuration (relative or absolute)
364
+ config_path: Path to current configuration file
365
+
366
+ Returns:
367
+ Base configuration data
368
+ """
369
+ # Resolve base path relative to current config
370
+ if not Path(base_path).is_absolute():
371
+ base_path = config_path.parent / base_path
372
+ else:
373
+ base_path = Path(base_path)
374
+
375
+ return cls._load_yaml(base_path)
376
+
377
+ @classmethod
378
+ def _validate_version(cls, data: dict[str, Any]) -> None:
379
+ """Validate configuration version.
380
+
381
+ Args:
382
+ data: Configuration data
383
+
384
+ Raises:
385
+ InvalidValueError: If version is not supported
386
+ """
387
+ version = data.get("version", "1.0")
388
+ if version not in ["1.0"]:
389
+ raise InvalidValueError(
390
+ "version", version, "Unsupported configuration version", None, valid_values=["1.0"]
391
+ )
392
+
393
+ @classmethod
394
+ def _process_github_config(cls, github_data: dict[str, Any], config_path: Path) -> GitHubConfig:
395
+ """Process GitHub configuration section.
396
+
397
+ Args:
398
+ github_data: GitHub configuration data
399
+ config_path: Path to configuration file
400
+
401
+ Returns:
402
+ GitHubConfig instance
403
+ """
404
+ # Resolve GitHub token
405
+ github_token = cls._resolve_env_var(github_data.get("token"))
406
+ if github_data.get("token") and not github_token:
407
+ raise EnvironmentVariableError("GITHUB_TOKEN", "GitHub", config_path)
408
+
409
+ return GitHubConfig(
410
+ token=github_token,
411
+ owner=cls._resolve_env_var(github_data.get("owner")),
412
+ organization=cls._resolve_env_var(github_data.get("organization")),
413
+ base_url=github_data.get("base_url", "https://api.github.com"),
414
+ max_retries=github_data.get("rate_limit", {}).get("max_retries", 3),
415
+ backoff_factor=github_data.get("rate_limit", {}).get("backoff_factor", 2),
416
+ )
417
+
418
+ @classmethod
419
+ def _process_repositories(
420
+ cls, data: dict[str, Any], github_config: GitHubConfig, config_path: Path
421
+ ) -> list[RepositoryConfig]:
422
+ """Process repositories configuration.
423
+
424
+ Args:
425
+ data: Configuration data
426
+ github_config: GitHub configuration
427
+ config_path: Path to configuration file
428
+
429
+ Returns:
430
+ List of RepositoryConfig instances
431
+ """
432
+ repositories = []
433
+ repo_manager = RepositoryManager(github_config)
434
+
435
+ # Handle organization-based repository discovery
436
+ if github_config.organization and not data.get("repositories"):
437
+ # Organization specified but no explicit repositories - will be discovered at runtime
438
+ pass
439
+ else:
440
+ # Process explicitly defined repositories
441
+ for i, repo_data in enumerate(data.get("repositories", [])):
442
+ repo_config = repo_manager.process_repository_config(repo_data, i, config_path)
443
+ repositories.append(repo_config)
444
+
445
+ # Allow empty repositories list if organization is specified
446
+ if not repositories and not github_config.organization:
447
+ raise ConfigurationError(
448
+ "No repositories defined and no organization specified for discovery",
449
+ config_path,
450
+ suggestion=(
451
+ "Either define repositories explicitly or specify a GitHub organization:\n"
452
+ " repositories:\n"
453
+ ' - name: "repo-name"\n'
454
+ ' path: "/path/to/repo"\n'
455
+ " OR\n"
456
+ " github:\n"
457
+ ' organization: "your-org"'
458
+ ),
459
+ )
460
+
461
+ return repositories
462
+
463
+ @classmethod
464
+ def _process_analysis_config(
465
+ cls, analysis_data: dict[str, Any], config_path: Path
466
+ ) -> AnalysisConfig:
467
+ """Process analysis configuration section.
468
+
469
+ Args:
470
+ analysis_data: Analysis configuration data
471
+ config_path: Path to configuration file
472
+
473
+ Returns:
474
+ AnalysisConfig instance
475
+ """
476
+ # Validate settings
477
+ ConfigValidator.validate_analysis_config(analysis_data, config_path)
478
+
479
+ # Process exclude paths
480
+ user_exclude_paths = analysis_data.get("exclude", {}).get("paths", [])
481
+ exclude_paths = user_exclude_paths if user_exclude_paths else cls.DEFAULT_EXCLUDE_PATHS
482
+
483
+ # Process ML categorization settings
484
+ ml_data = analysis_data.get("ml_categorization", {})
485
+ ml_categorization_config = MLCategorization(
486
+ enabled=ml_data.get("enabled", True),
487
+ min_confidence=ml_data.get("min_confidence", 0.6),
488
+ semantic_weight=ml_data.get("semantic_weight", 0.7),
489
+ file_pattern_weight=ml_data.get("file_pattern_weight", 0.3),
490
+ hybrid_threshold=ml_data.get("hybrid_threshold", 0.5),
491
+ cache_duration_days=ml_data.get("cache_duration_days", 30),
492
+ batch_size=ml_data.get("batch_size", 100),
493
+ enable_caching=ml_data.get("enable_caching", True),
494
+ spacy_model=ml_data.get("spacy_model", "en_core_web_sm"),
495
+ )
496
+
497
+ # Process commit classification settings
498
+ classification_data = analysis_data.get("commit_classification", {})
499
+ commit_classification_config = CommitClassificationConfig(
500
+ enabled=classification_data.get("enabled", True),
501
+ confidence_threshold=classification_data.get("confidence_threshold", 0.5),
502
+ batch_size=classification_data.get("batch_size", 100),
503
+ auto_retrain=classification_data.get("auto_retrain", True),
504
+ retrain_threshold_days=classification_data.get("retrain_threshold_days", 30),
505
+ model=classification_data.get("model", {}),
506
+ feature_extraction=classification_data.get("feature_extraction", {}),
507
+ training=classification_data.get("training", {}),
508
+ categories=classification_data.get("categories", {}),
509
+ )
510
+
511
+ # Process LLM classification configuration
512
+ llm_classification_data = analysis_data.get("llm_classification", {})
513
+ llm_classification_config = LLMClassificationConfig(
514
+ enabled=llm_classification_data.get("enabled", False),
515
+ api_key=cls._resolve_env_var(llm_classification_data.get("api_key")),
516
+ api_base_url=llm_classification_data.get(
517
+ "api_base_url", "https://openrouter.ai/api/v1"
518
+ ),
519
+ model=llm_classification_data.get("model", "mistralai/mistral-7b-instruct"),
520
+ confidence_threshold=llm_classification_data.get("confidence_threshold", 0.7),
521
+ max_tokens=llm_classification_data.get("max_tokens", 50),
522
+ temperature=llm_classification_data.get("temperature", 0.1),
523
+ timeout_seconds=llm_classification_data.get("timeout_seconds", 30.0),
524
+ cache_duration_days=llm_classification_data.get("cache_duration_days", 90),
525
+ enable_caching=llm_classification_data.get("enable_caching", True),
526
+ max_daily_requests=llm_classification_data.get("max_daily_requests", 1000),
527
+ domain_terms=llm_classification_data.get("domain_terms", {}),
528
+ )
529
+
530
+ # Process branch analysis settings
531
+ branch_data = analysis_data.get("branch_analysis", {})
532
+ branch_analysis_config = (
533
+ BranchAnalysisConfig(**branch_data) if branch_data else BranchAnalysisConfig()
534
+ )
535
+
536
+ return AnalysisConfig(
537
+ story_point_patterns=analysis_data.get(
538
+ "story_point_patterns",
539
+ [
540
+ r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
541
+ r"\[(\d+)\s*(?:sp|pts?)\]",
542
+ r"#(\d+)sp",
543
+ ],
544
+ ),
545
+ exclude_authors=analysis_data.get("exclude", {}).get(
546
+ "authors", ["dependabot[bot]", "renovate[bot]"]
547
+ ),
548
+ exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
549
+ exclude_paths=exclude_paths,
550
+ similarity_threshold=analysis_data.get("identity", {}).get(
551
+ "similarity_threshold", 0.85
552
+ ),
553
+ manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
554
+ default_ticket_platform=analysis_data.get("default_ticket_platform"),
555
+ branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
556
+ ticket_platforms=analysis_data.get("ticket_platforms"),
557
+ auto_identity_analysis=analysis_data.get("identity", {}).get("auto_analysis", True),
558
+ branch_analysis=branch_analysis_config,
559
+ ml_categorization=ml_categorization_config,
560
+ commit_classification=commit_classification_config,
561
+ llm_classification=llm_classification_config,
562
+ )
563
+
564
+ @classmethod
565
+ def _process_output_config(cls, output_data: dict[str, Any], config_path: Path) -> OutputConfig:
566
+ """Process output configuration section.
567
+
568
+ Args:
569
+ output_data: Output configuration data
570
+ config_path: Path to configuration file
571
+
572
+ Returns:
573
+ OutputConfig instance
574
+ """
575
+ # Validate settings
576
+ ConfigValidator.validate_output_config(output_data, config_path)
577
+
578
+ # Process output directory
579
+ output_dir = output_data.get("directory")
580
+ if output_dir:
581
+ output_dir = Path(output_dir).expanduser()
582
+ # If relative path, make it relative to config file directory
583
+ if not output_dir.is_absolute():
584
+ output_dir = config_path.parent / output_dir
585
+ output_dir = output_dir.resolve()
586
+ else:
587
+ # Default to config file directory if not specified
588
+ output_dir = config_path.parent
589
+
590
+ return OutputConfig(
591
+ directory=output_dir,
592
+ formats=output_data.get("formats", ["csv", "markdown"]),
593
+ csv_delimiter=output_data.get("csv", {}).get("delimiter", ","),
594
+ csv_encoding=output_data.get("csv", {}).get("encoding", "utf-8"),
595
+ anonymize_enabled=output_data.get("anonymization", {}).get("enabled", False),
596
+ anonymize_fields=output_data.get("anonymization", {}).get("fields", []),
597
+ anonymize_method=output_data.get("anonymization", {}).get("method", "hash"),
598
+ )
599
+
600
+ @classmethod
601
+ def _process_cache_config(cls, cache_data: dict[str, Any], config_path: Path) -> CacheConfig:
602
+ """Process cache configuration section.
603
+
604
+ Args:
605
+ cache_data: Cache configuration data
606
+ config_path: Path to configuration file
607
+
608
+ Returns:
609
+ CacheConfig instance
610
+ """
611
+ cache_dir = cache_data.get("directory", ".gitflow-cache")
612
+ cache_path = Path(cache_dir)
613
+ # If relative path, make it relative to config file directory
614
+ if not cache_path.is_absolute():
615
+ cache_path = config_path.parent / cache_path
616
+
617
+ return CacheConfig(
618
+ directory=cache_path.resolve(),
619
+ ttl_hours=cache_data.get("ttl_hours", 168),
620
+ max_size_mb=cache_data.get("max_size_mb", 500),
621
+ )
622
+
623
+ @classmethod
624
+ def _process_jira_config(
625
+ cls, jira_data: dict[str, Any], config_path: Path
626
+ ) -> Optional[JIRAConfig]:
627
+ """Process JIRA configuration section.
628
+
629
+ Args:
630
+ jira_data: JIRA configuration data
631
+ config_path: Path to configuration file
632
+
633
+ Returns:
634
+ JIRAConfig instance or None
635
+ """
636
+ if not jira_data:
637
+ return None
638
+
639
+ access_user = cls._resolve_env_var(jira_data.get("access_user", ""))
640
+ access_token = cls._resolve_env_var(jira_data.get("access_token", ""))
641
+
642
+ # Validate JIRA credentials if JIRA is configured
643
+ if jira_data.get("access_user") and jira_data.get("access_token"):
644
+ if not access_user:
645
+ raise EnvironmentVariableError("JIRA_ACCESS_USER", "JIRA", config_path)
646
+ if not access_token:
647
+ raise EnvironmentVariableError("JIRA_ACCESS_TOKEN", "JIRA", config_path)
648
+
649
+ return JIRAConfig(
650
+ access_user=access_user,
651
+ access_token=access_token,
652
+ base_url=jira_data.get("base_url"),
653
+ )
654
+
655
+ @classmethod
656
+ def _process_jira_integration_config(
657
+ cls, jira_integration_data: dict[str, Any]
658
+ ) -> Optional[JIRAIntegrationConfig]:
659
+ """Process JIRA integration configuration section.
660
+
661
+ Args:
662
+ jira_integration_data: JIRA integration configuration data
663
+
664
+ Returns:
665
+ JIRAIntegrationConfig instance or None
666
+ """
667
+ if not jira_integration_data:
668
+ return None
669
+
670
+ return JIRAIntegrationConfig(
671
+ enabled=jira_integration_data.get("enabled", True),
672
+ fetch_story_points=jira_integration_data.get("fetch_story_points", True),
673
+ project_keys=jira_integration_data.get("project_keys", []),
674
+ story_point_fields=jira_integration_data.get(
675
+ "story_point_fields", ["customfield_10016", "customfield_10021", "Story Points"]
676
+ ),
677
+ )
678
+
679
+ @classmethod
680
+ def _process_qualitative_config(cls, qualitative_data: dict[str, Any]) -> Optional[Any]:
681
+ """Process qualitative analysis configuration section.
682
+
683
+ Args:
684
+ qualitative_data: Qualitative configuration data
685
+
686
+ Returns:
687
+ QualitativeConfig instance or None
688
+ """
689
+ if not qualitative_data:
690
+ return None
691
+
692
+ # Import here to avoid circular imports
693
+ try:
694
+ from ..qualitative.models.schemas import CacheConfig as QualitativeCacheConfig
695
+ from ..qualitative.models.schemas import (
696
+ ChangeTypeConfig,
697
+ DomainConfig,
698
+ IntentConfig,
699
+ LLMConfig,
700
+ NLPConfig,
701
+ QualitativeConfig,
702
+ RiskConfig,
703
+ )
704
+
705
+ # Parse NLP configuration
706
+ nlp_data = qualitative_data.get("nlp", {})
707
+ nlp_config = NLPConfig(
708
+ spacy_model=nlp_data.get("spacy_model", "en_core_web_sm"),
709
+ spacy_batch_size=nlp_data.get("spacy_batch_size", 1000),
710
+ fast_mode=nlp_data.get("fast_mode", True),
711
+ enable_parallel_processing=nlp_data.get("enable_parallel_processing", True),
712
+ max_workers=nlp_data.get("max_workers", 4),
713
+ change_type_config=ChangeTypeConfig(**nlp_data.get("change_type", {})),
714
+ intent_config=IntentConfig(**nlp_data.get("intent", {})),
715
+ domain_config=DomainConfig(**nlp_data.get("domain", {})),
716
+ risk_config=RiskConfig(**nlp_data.get("risk", {})),
717
+ )
718
+
719
+ # Parse LLM configuration
720
+ llm_data = qualitative_data.get("llm", {})
721
+ cost_tracking_data = qualitative_data.get("cost_tracking", {})
722
+ llm_config = LLMConfig(
723
+ openrouter_api_key=cls._resolve_env_var(
724
+ llm_data.get("openrouter_api_key")
725
+ or llm_data.get("api_key", "${OPENROUTER_API_KEY}")
726
+ ),
727
+ base_url=llm_data.get("base_url", "https://openrouter.ai/api/v1"),
728
+ primary_model=llm_data.get("primary_model")
729
+ or llm_data.get("model", "anthropic/claude-3-haiku"),
730
+ fallback_model=llm_data.get(
731
+ "fallback_model", "meta-llama/llama-3.1-8b-instruct:free"
732
+ ),
733
+ complex_model=llm_data.get("complex_model", "anthropic/claude-3-sonnet"),
734
+ complexity_threshold=llm_data.get("complexity_threshold", 0.5),
735
+ cost_threshold_per_1k=llm_data.get("cost_threshold_per_1k", 0.01),
736
+ max_tokens=llm_data.get("max_tokens", 1000),
737
+ temperature=llm_data.get("temperature", 0.1),
738
+ max_group_size=llm_data.get("max_group_size", 10),
739
+ similarity_threshold=llm_data.get("similarity_threshold", 0.8),
740
+ requests_per_minute=llm_data.get("requests_per_minute", 200),
741
+ max_retries=llm_data.get("max_retries", 3),
742
+ max_daily_cost=cost_tracking_data.get("daily_budget_usd")
743
+ or llm_data.get("max_daily_cost", 5.0),
744
+ enable_cost_tracking=(
745
+ cost_tracking_data.get("enabled")
746
+ if cost_tracking_data.get("enabled") is not None
747
+ else llm_data.get("enable_cost_tracking", True)
748
+ ),
749
+ )
750
+
751
+ # Parse cache configuration
752
+ cache_data = qualitative_data.get("cache", {})
753
+ qualitative_cache_config = QualitativeCacheConfig(
754
+ cache_dir=cache_data.get("cache_dir", ".qualitative_cache"),
755
+ semantic_cache_size=cache_data.get("semantic_cache_size", 10000),
756
+ pattern_cache_ttl_hours=cache_data.get("pattern_cache_ttl_hours", 168),
757
+ enable_pattern_learning=cache_data.get("enable_pattern_learning", True),
758
+ learning_threshold=cache_data.get("learning_threshold", 10),
759
+ confidence_boost_factor=cache_data.get("confidence_boost_factor", 0.1),
760
+ enable_compression=cache_data.get("enable_compression", True),
761
+ max_cache_size_mb=cache_data.get("max_cache_size_mb", 100),
762
+ )
763
+
764
+ # Create main qualitative configuration
765
+ return QualitativeConfig(
766
+ enabled=qualitative_data.get("enabled", True),
767
+ batch_size=qualitative_data.get("batch_size", 1000),
768
+ max_llm_fallback_pct=qualitative_data.get("max_llm_fallback_pct", 0.15),
769
+ confidence_threshold=qualitative_data.get("confidence_threshold", 0.7),
770
+ nlp_config=nlp_config,
771
+ llm_config=llm_config,
772
+ cache_config=qualitative_cache_config,
773
+ enable_performance_tracking=qualitative_data.get(
774
+ "enable_performance_tracking", True
775
+ ),
776
+ target_processing_time_ms=qualitative_data.get("target_processing_time_ms", 2.0),
777
+ min_overall_confidence=qualitative_data.get("min_overall_confidence", 0.6),
778
+ enable_quality_feedback=qualitative_data.get("enable_quality_feedback", True),
779
+ )
780
+
781
+ except ImportError as e:
782
+ print(f"⚠️ Qualitative analysis dependencies missing: {e}")
783
+ print(" Install with: pip install spacy scikit-learn openai tiktoken")
784
+ return None
785
+ except Exception as e:
786
+ print(f"⚠️ Error parsing qualitative configuration: {e}")
787
+ return None
788
+
789
+ @classmethod
790
+ def _process_pm_config(cls, pm_data: dict[str, Any]) -> Optional[Any]:
791
+ """Process PM configuration section.
792
+
793
+ Args:
794
+ pm_data: PM configuration data
795
+
796
+ Returns:
797
+ PM configuration object or None
798
+ """
799
+ if not pm_data:
800
+ return None
801
+
802
+ pm_config = type("PMConfig", (), {})() # Dynamic class
803
+
804
+ # Parse JIRA section within PM
805
+ if "jira" in pm_data:
806
+ jira_pm_data = pm_data["jira"]
807
+ pm_config.jira = type(
808
+ "PMJIRAConfig",
809
+ (),
810
+ {
811
+ "enabled": jira_pm_data.get("enabled", True),
812
+ "base_url": jira_pm_data.get("base_url"),
813
+ "username": cls._resolve_env_var(jira_pm_data.get("username")),
814
+ "api_token": cls._resolve_env_var(jira_pm_data.get("api_token")),
815
+ "story_point_fields": jira_pm_data.get(
816
+ "story_point_fields",
817
+ ["customfield_10016", "customfield_10021", "Story Points"],
818
+ ),
819
+ },
820
+ )()
821
+
822
+ return pm_config
823
+
824
+ @classmethod
825
+ def _process_pm_integration_config(
826
+ cls, pm_integration_data: dict[str, Any]
827
+ ) -> Optional[PMIntegrationConfig]:
828
+ """Process PM integration configuration section.
829
+
830
+ Args:
831
+ pm_integration_data: PM integration configuration data
832
+
833
+ Returns:
834
+ PMIntegrationConfig instance or None
835
+ """
836
+ if not pm_integration_data:
837
+ return None
838
+
839
+ # Parse platform configurations
840
+ platforms_config = {}
841
+ platforms_data = pm_integration_data.get("platforms", {})
842
+
843
+ for platform_name, platform_data in platforms_data.items():
844
+ platforms_config[platform_name] = PMPlatformConfig(
845
+ enabled=platform_data.get("enabled", True),
846
+ platform_type=platform_data.get("platform_type", platform_name),
847
+ config=platform_data.get("config", {}),
848
+ )
849
+
850
+ # Parse correlation settings with defaults
851
+ correlation_defaults = {
852
+ "fuzzy_matching": True,
853
+ "temporal_window_hours": 72,
854
+ "confidence_threshold": 0.8,
855
+ }
856
+ correlation_config = {**correlation_defaults, **pm_integration_data.get("correlation", {})}
857
+
858
+ return PMIntegrationConfig(
859
+ enabled=pm_integration_data.get("enabled", False),
860
+ primary_platform=pm_integration_data.get("primary_platform"),
861
+ correlation=correlation_config,
862
+ platforms=platforms_config,
863
+ )
864
+
865
+ @staticmethod
866
+ def _resolve_env_var(value: Optional[str]) -> Optional[str]:
867
+ """Resolve environment variable references.
868
+
869
+ Args:
870
+ value: Value that may contain environment variable reference
871
+
872
+ Returns:
873
+ Resolved value or None
874
+
875
+ Raises:
876
+ EnvironmentVariableError: If environment variable is not set
877
+ """
878
+ if not value:
879
+ return None
880
+
881
+ if value.startswith("${") and value.endswith("}"):
882
+ env_var = value[2:-1]
883
+ resolved = os.environ.get(env_var)
884
+ if not resolved:
885
+ # Note: We don't raise here directly, let the caller handle it
886
+ # based on whether the field is required
887
+ return None
888
+ return resolved
889
+
890
+ return value
891
+
892
+ @staticmethod
893
+ def validate_config(config: Config) -> list[str]:
894
+ """Validate configuration and return list of warnings.
895
+
896
+ This method is kept for backward compatibility.
897
+
898
+ Args:
899
+ config: Configuration to validate
900
+
901
+ Returns:
902
+ List of warning messages
903
+ """
904
+ return ConfigValidator.validate_config(config)