devsquad 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. devsquad-3.6.0.dist-info/METADATA +944 -0
  2. devsquad-3.6.0.dist-info/RECORD +95 -0
  3. devsquad-3.6.0.dist-info/WHEEL +5 -0
  4. devsquad-3.6.0.dist-info/entry_points.txt +2 -0
  5. devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
  6. devsquad-3.6.0.dist-info/top_level.txt +2 -0
  7. scripts/__init__.py +0 -0
  8. scripts/ai_semantic_matcher.py +512 -0
  9. scripts/alert_manager.py +505 -0
  10. scripts/api/__init__.py +43 -0
  11. scripts/api/models.py +386 -0
  12. scripts/api/routes/__init__.py +20 -0
  13. scripts/api/routes/dispatch.py +348 -0
  14. scripts/api/routes/lifecycle.py +330 -0
  15. scripts/api/routes/metrics_gates.py +347 -0
  16. scripts/api_server.py +318 -0
  17. scripts/auth.py +451 -0
  18. scripts/cli/__init__.py +1 -0
  19. scripts/cli/cli_visual.py +642 -0
  20. scripts/cli.py +1094 -0
  21. scripts/collaboration/__init__.py +212 -0
  22. scripts/collaboration/_version.py +1 -0
  23. scripts/collaboration/agent_briefing.py +656 -0
  24. scripts/collaboration/ai_semantic_matcher.py +260 -0
  25. scripts/collaboration/anchor_checker.py +281 -0
  26. scripts/collaboration/anti_rationalization.py +470 -0
  27. scripts/collaboration/async_integration_example.py +255 -0
  28. scripts/collaboration/batch_scheduler.py +149 -0
  29. scripts/collaboration/checkpoint_manager.py +561 -0
  30. scripts/collaboration/ci_feedback_adapter.py +351 -0
  31. scripts/collaboration/code_map_generator.py +247 -0
  32. scripts/collaboration/concern_pack_loader.py +352 -0
  33. scripts/collaboration/confidence_score.py +496 -0
  34. scripts/collaboration/config_loader.py +188 -0
  35. scripts/collaboration/consensus.py +244 -0
  36. scripts/collaboration/context_compressor.py +533 -0
  37. scripts/collaboration/coordinator.py +668 -0
  38. scripts/collaboration/dispatcher.py +1636 -0
  39. scripts/collaboration/dual_layer_context.py +128 -0
  40. scripts/collaboration/enhanced_worker.py +539 -0
  41. scripts/collaboration/feature_usage_tracker.py +206 -0
  42. scripts/collaboration/five_axis_consensus.py +334 -0
  43. scripts/collaboration/input_validator.py +401 -0
  44. scripts/collaboration/integration_example.py +287 -0
  45. scripts/collaboration/intent_workflow_mapper.py +350 -0
  46. scripts/collaboration/language_parsers.py +269 -0
  47. scripts/collaboration/lifecycle_protocol.py +1446 -0
  48. scripts/collaboration/llm_backend.py +453 -0
  49. scripts/collaboration/llm_cache.py +448 -0
  50. scripts/collaboration/llm_cache_async.py +347 -0
  51. scripts/collaboration/llm_retry.py +387 -0
  52. scripts/collaboration/llm_retry_async.py +389 -0
  53. scripts/collaboration/mce_adapter.py +597 -0
  54. scripts/collaboration/memory_bridge.py +1607 -0
  55. scripts/collaboration/models.py +537 -0
  56. scripts/collaboration/null_providers.py +297 -0
  57. scripts/collaboration/operation_classifier.py +289 -0
  58. scripts/collaboration/output_slicer.py +225 -0
  59. scripts/collaboration/performance_monitor.py +462 -0
  60. scripts/collaboration/permission_guard.py +865 -0
  61. scripts/collaboration/prompt_assembler.py +756 -0
  62. scripts/collaboration/prompt_variant_generator.py +483 -0
  63. scripts/collaboration/protocols.py +267 -0
  64. scripts/collaboration/report_formatter.py +352 -0
  65. scripts/collaboration/retrospective.py +279 -0
  66. scripts/collaboration/role_matcher.py +92 -0
  67. scripts/collaboration/role_template_market.py +352 -0
  68. scripts/collaboration/rule_collector.py +678 -0
  69. scripts/collaboration/scratchpad.py +346 -0
  70. scripts/collaboration/skill_registry.py +151 -0
  71. scripts/collaboration/skillifier.py +878 -0
  72. scripts/collaboration/standardized_role_template.py +317 -0
  73. scripts/collaboration/task_completion_checker.py +237 -0
  74. scripts/collaboration/test_quality_guard.py +695 -0
  75. scripts/collaboration/unified_gate_engine.py +598 -0
  76. scripts/collaboration/usage_tracker.py +309 -0
  77. scripts/collaboration/user_friendly_error.py +176 -0
  78. scripts/collaboration/verification_gate.py +312 -0
  79. scripts/collaboration/warmup_manager.py +635 -0
  80. scripts/collaboration/worker.py +513 -0
  81. scripts/collaboration/workflow_engine.py +684 -0
  82. scripts/dashboard.py +1088 -0
  83. scripts/generate_benchmark_report.py +786 -0
  84. scripts/history_manager.py +604 -0
  85. scripts/mcp_server.py +289 -0
  86. skills/__init__.py +32 -0
  87. skills/dispatch/handler.py +52 -0
  88. skills/intent/handler.py +59 -0
  89. skills/registry.py +67 -0
  90. skills/retrospective/__init__.py +0 -0
  91. skills/retrospective/handler.py +125 -0
  92. skills/review/handler.py +356 -0
  93. skills/security/handler.py +454 -0
  94. skills/test/__init__.py +0 -0
  95. skills/test/handler.py +78 -0
@@ -0,0 +1,496 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Confidence Score System
5
+
6
+ Provides confidence scoring for LLM responses to help agents make better decisions.
7
+
8
+ Features:
9
+ - Multi-factor confidence calculation
10
+ - Response quality assessment
11
+ - Uncertainty detection
12
+ - Threshold-based decision making
13
+ - Historical confidence tracking
14
+
15
+ Usage:
16
+ from scripts.collaboration.confidence_score import ConfidenceScorer
17
+
18
+ scorer = ConfidenceScorer()
19
+
20
+ # Calculate confidence for a response
21
+ score = scorer.calculate_confidence(
22
+ prompt="Design a REST API",
23
+ response="Here's the API design...",
24
+ metadata={"model": "gpt-4", "temperature": 0.7}
25
+ )
26
+
27
+ # Check if confidence meets threshold
28
+ if score.is_confident(threshold=0.7):
29
+ # Proceed with high confidence
30
+ pass
31
+ else:
32
+ # Request human review or retry
33
+ pass
34
+ """
35
+
36
+ import re
37
+ import logging
38
+ from typing import Dict, Any, List, Optional, Tuple
39
+ from dataclasses import dataclass, field
40
+ from datetime import datetime
41
+ from enum import Enum
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class ConfidenceLevel(Enum):
48
+ """Confidence level categories"""
49
+ VERY_HIGH = "very_high" # >= 0.9
50
+ HIGH = "high" # >= 0.7
51
+ MEDIUM = "medium" # >= 0.5
52
+ LOW = "low" # >= 0.3
53
+ VERY_LOW = "very_low" # < 0.3
54
+
55
+
56
+ @dataclass
57
+ class ConfidenceScore:
58
+ """Confidence score result"""
59
+ overall_score: float # 0.0 to 1.0
60
+ level: ConfidenceLevel
61
+ factors: Dict[str, float] # Individual factor scores
62
+ reasoning: List[str] # Explanation of score
63
+ metadata: Dict[str, Any] = field(default_factory=dict)
64
+ timestamp: float = field(default_factory=lambda: datetime.now().timestamp())
65
+
66
+ def is_confident(self, threshold: float = 0.7) -> bool:
67
+ """Check if confidence meets threshold"""
68
+ return self.overall_score >= threshold
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ """Convert to dictionary"""
72
+ return {
73
+ "overall_score": self.overall_score,
74
+ "level": self.level.value,
75
+ "factors": self.factors,
76
+ "reasoning": self.reasoning,
77
+ "metadata": self.metadata,
78
+ "timestamp": self.timestamp
79
+ }
80
+
81
+
82
+ class ConfidenceScorer:
83
+ """
84
+ Confidence Scorer for LLM Responses
85
+
86
+ Calculates confidence based on multiple factors:
87
+ 1. Response completeness
88
+ 2. Uncertainty indicators
89
+ 3. Specificity and detail
90
+ 4. Consistency
91
+ 5. Model metadata (temperature, model quality)
92
+ """
93
+
94
+ # Uncertainty phrases that indicate low confidence
95
+ UNCERTAINTY_PHRASES = [
96
+ "i think", "maybe", "perhaps", "possibly", "might be",
97
+ "could be", "not sure", "uncertain", "unclear", "ambiguous",
98
+ "i'm not certain", "i don't know", "hard to say",
99
+ "it depends", "it's possible", "it seems", "appears to be"
100
+ ]
101
+
102
+ # Hedging words that reduce confidence
103
+ HEDGING_WORDS = [
104
+ "probably", "likely", "unlikely", "somewhat", "fairly",
105
+ "relatively", "generally", "typically", "usually", "often"
106
+ ]
107
+
108
+ def __init__(
109
+ self,
110
+ weights: Optional[Dict[str, float]] = None,
111
+ min_response_length: int = 50
112
+ ):
113
+ """
114
+ Initialize confidence scorer
115
+
116
+ Args:
117
+ weights: Custom weights for each factor (default: equal weights)
118
+ min_response_length: Minimum expected response length
119
+ """
120
+ self.weights = weights or {
121
+ "completeness": 0.25,
122
+ "certainty": 0.25,
123
+ "specificity": 0.20,
124
+ "consistency": 0.15,
125
+ "model_quality": 0.15
126
+ }
127
+ self.min_response_length = min_response_length
128
+ self._max_history = 1000
129
+
130
+ self.history: List[ConfidenceScore] = []
131
+
132
+ def calculate_confidence(
133
+ self,
134
+ prompt: str,
135
+ response: str,
136
+ metadata: Optional[Dict[str, Any]] = None
137
+ ) -> ConfidenceScore:
138
+ """
139
+ Calculate confidence score for a response
140
+
141
+ Args:
142
+ prompt: Original prompt
143
+ response: LLM response
144
+ metadata: Additional metadata (model, temperature, etc.)
145
+
146
+ Returns:
147
+ ConfidenceScore object
148
+ """
149
+ metadata = metadata or {}
150
+ factors = {}
151
+ reasoning = []
152
+
153
+ # 1. Completeness score
154
+ completeness, comp_reason = self._calculate_completeness(prompt, response)
155
+ factors["completeness"] = completeness
156
+ reasoning.extend(comp_reason)
157
+
158
+ # 2. Certainty score (inverse of uncertainty)
159
+ certainty, cert_reason = self._calculate_certainty(response)
160
+ factors["certainty"] = certainty
161
+ reasoning.extend(cert_reason)
162
+
163
+ # 3. Specificity score
164
+ specificity, spec_reason = self._calculate_specificity(response)
165
+ factors["specificity"] = specificity
166
+ reasoning.extend(spec_reason)
167
+
168
+ # 4. Consistency score
169
+ consistency, cons_reason = self._calculate_consistency(response)
170
+ factors["consistency"] = consistency
171
+ reasoning.extend(cons_reason)
172
+
173
+ # 5. Model quality score
174
+ model_quality, model_reason = self._calculate_model_quality(metadata)
175
+ factors["model_quality"] = model_quality
176
+ reasoning.extend(model_reason)
177
+
178
+ # Calculate weighted overall score
179
+ overall_score = sum(
180
+ factors[factor] * self.weights[factor]
181
+ for factor in self.weights
182
+ )
183
+
184
+ # Determine confidence level
185
+ level = self._determine_level(overall_score)
186
+
187
+ # Create confidence score object
188
+ score = ConfidenceScore(
189
+ overall_score=overall_score,
190
+ level=level,
191
+ factors=factors,
192
+ reasoning=reasoning,
193
+ metadata=metadata
194
+ )
195
+
196
+ # Add to history
197
+ self.history.append(score)
198
+
199
+ if len(self.history) > self._max_history:
200
+ self.history = self.history[-self._max_history:]
201
+
202
+ return score
203
+
204
+ def _calculate_completeness(
205
+ self,
206
+ prompt: str,
207
+ response: str
208
+ ) -> Tuple[float, List[str]]:
209
+ """Calculate response completeness"""
210
+ reasoning = []
211
+ score = 0.0
212
+
213
+ # Check response length
214
+ if len(response) < self.min_response_length:
215
+ score = 0.3
216
+ reasoning.append(f"Response too short ({len(response)} chars)")
217
+ elif len(response) < self.min_response_length * 2:
218
+ score = 0.6
219
+ reasoning.append("Response length adequate")
220
+ else:
221
+ score = 0.9
222
+ reasoning.append("Response length good")
223
+
224
+ # Check for incomplete sentences
225
+ if response.endswith("...") or response.count("...") > 2:
226
+ score *= 0.7
227
+ reasoning.append("Response appears incomplete (ellipsis)")
228
+
229
+ # Check for truncation indicators
230
+ truncation_indicators = ["[truncated]", "[continued]", "...and more"]
231
+ if any(indicator in response.lower() for indicator in truncation_indicators):
232
+ score *= 0.5
233
+ reasoning.append("Response appears truncated")
234
+
235
+ return min(score, 1.0), reasoning
236
+
237
+ def _calculate_certainty(self, response: str) -> Tuple[float, List[str]]:
238
+ """Calculate response certainty (inverse of uncertainty)"""
239
+ reasoning = []
240
+ response_lower = response.lower()
241
+
242
+ # Count uncertainty phrases
243
+ uncertainty_count = sum(
244
+ 1 for phrase in self.UNCERTAINTY_PHRASES
245
+ if phrase in response_lower
246
+ )
247
+
248
+ # Count hedging words
249
+ hedging_count = sum(
250
+ 1 for word in self.HEDGING_WORDS
251
+ if f" {word} " in f" {response_lower} "
252
+ )
253
+
254
+ # Calculate certainty score
255
+ total_uncertainty = uncertainty_count + (hedging_count * 0.5)
256
+
257
+ if total_uncertainty == 0:
258
+ score = 1.0
259
+ reasoning.append("No uncertainty indicators found")
260
+ elif total_uncertainty <= 2:
261
+ score = 0.8
262
+ reasoning.append(f"Minor uncertainty ({int(total_uncertainty)} indicators)")
263
+ elif total_uncertainty <= 5:
264
+ score = 0.5
265
+ reasoning.append(f"Moderate uncertainty ({int(total_uncertainty)} indicators)")
266
+ else:
267
+ score = 0.2
268
+ reasoning.append(f"High uncertainty ({int(total_uncertainty)} indicators)")
269
+
270
+ return score, reasoning
271
+
272
+ def _calculate_specificity(self, response: str) -> Tuple[float, List[str]]:
273
+ """Calculate response specificity and detail level"""
274
+ reasoning = []
275
+
276
+ # Check for specific details (numbers, code, examples)
277
+ has_numbers = bool(re.search(r'\d+', response))
278
+ has_code = bool(re.search(r'```|`[^`]+`', response))
279
+ has_examples = bool(re.search(r'(for example|e\.g\.|such as|like)', response, re.IGNORECASE))
280
+ has_lists = bool(re.search(r'^\s*[-*\d]+\.?\s', response, re.MULTILINE))
281
+
282
+ specificity_indicators = sum([has_numbers, has_code, has_examples, has_lists])
283
+
284
+ if specificity_indicators >= 3:
285
+ score = 1.0
286
+ reasoning.append("High specificity (numbers, code, examples, lists)")
287
+ elif specificity_indicators == 2:
288
+ score = 0.7
289
+ reasoning.append("Good specificity (some concrete details)")
290
+ elif specificity_indicators == 1:
291
+ score = 0.5
292
+ reasoning.append("Moderate specificity (limited details)")
293
+ else:
294
+ score = 0.3
295
+ reasoning.append("Low specificity (mostly abstract)")
296
+
297
+ # Check for vague language
298
+ vague_phrases = ["something", "somehow", "various", "several", "many", "some"]
299
+ vague_count = sum(1 for phrase in vague_phrases if phrase in response.lower())
300
+
301
+ if vague_count > 5:
302
+ score *= 0.7
303
+ reasoning.append(f"Contains vague language ({vague_count} instances)")
304
+
305
+ return score, reasoning
306
+
307
+ def _calculate_consistency(self, response: str) -> Tuple[float, List[str]]:
308
+ """Calculate internal consistency of response"""
309
+ reasoning = []
310
+ score = 1.0
311
+
312
+ # Check for contradictions
313
+ contradiction_patterns = [
314
+ (r"(yes|true|correct).*?(no|false|incorrect)", "yes/no contradiction"),
315
+ (r"(always|never).*?(sometimes|occasionally)", "absolute/conditional contradiction"),
316
+ (r"(should|must).*?(should not|must not)", "directive contradiction")
317
+ ]
318
+
319
+ for pattern, description in contradiction_patterns:
320
+ if re.search(pattern, response, re.IGNORECASE | re.DOTALL):
321
+ score *= 0.6
322
+ reasoning.append(f"Potential contradiction: {description}")
323
+
324
+ # Check for self-corrections
325
+ correction_phrases = ["actually", "correction", "i mean", "rather", "instead"]
326
+ correction_count = sum(
327
+ 1 for phrase in correction_phrases
328
+ if phrase in response.lower()
329
+ )
330
+
331
+ if correction_count > 0:
332
+ score *= (1.0 - (correction_count * 0.1))
333
+ reasoning.append(f"Contains self-corrections ({correction_count})")
334
+
335
+ if score == 1.0:
336
+ reasoning.append("No consistency issues detected")
337
+
338
+ return max(score, 0.0), reasoning
339
+
340
+ def _calculate_model_quality(
341
+ self,
342
+ metadata: Dict[str, Any]
343
+ ) -> Tuple[float, List[str]]:
344
+ """Calculate score based on model metadata"""
345
+ reasoning = []
346
+ score = 0.7 # Default score
347
+
348
+ # Model quality tiers
349
+ model = metadata.get("model", "").lower()
350
+ if "gpt-4" in model or "claude-3" in model:
351
+ score = 0.95
352
+ reasoning.append("High-quality model (GPT-4/Claude-3)")
353
+ elif "gpt-3.5" in model or "claude-2" in model:
354
+ score = 0.8
355
+ reasoning.append("Good quality model (GPT-3.5/Claude-2)")
356
+ elif model:
357
+ score = 0.6
358
+ reasoning.append(f"Standard model ({model})")
359
+ else:
360
+ reasoning.append("Model unknown (default score)")
361
+
362
+ # Temperature adjustment
363
+ temperature = metadata.get("temperature")
364
+ if temperature is not None:
365
+ if temperature <= 0.3:
366
+ score *= 1.1 # More deterministic = higher confidence
367
+ reasoning.append(f"Low temperature ({temperature}) increases confidence")
368
+ elif temperature >= 0.9:
369
+ score *= 0.9 # More creative = lower confidence
370
+ reasoning.append(f"High temperature ({temperature}) reduces confidence")
371
+
372
+ # Token count (longer responses may indicate more thought)
373
+ token_count = metadata.get("token_count", 0)
374
+ if token_count > 1000:
375
+ score *= 1.05
376
+ reasoning.append("Detailed response (high token count)")
377
+
378
+ return min(score, 1.0), reasoning
379
+
380
+ def _determine_level(self, score: float) -> ConfidenceLevel:
381
+ """Determine confidence level from score"""
382
+ if score >= 0.9:
383
+ return ConfidenceLevel.VERY_HIGH
384
+ elif score >= 0.7:
385
+ return ConfidenceLevel.HIGH
386
+ elif score >= 0.5:
387
+ return ConfidenceLevel.MEDIUM
388
+ elif score >= 0.3:
389
+ return ConfidenceLevel.LOW
390
+ else:
391
+ return ConfidenceLevel.VERY_LOW
392
+
393
+ def get_average_confidence(self, limit: Optional[int] = None) -> float:
394
+ """Get average confidence from recent history"""
395
+ if not self.history:
396
+ return 0.0
397
+
398
+ recent = self.history[-limit:] if limit else self.history
399
+ return sum(score.overall_score for score in recent) / len(recent)
400
+
401
+ def get_confidence_trend(self, window: int = 10) -> str:
402
+ """Get confidence trend (improving/declining/stable)"""
403
+ if len(self.history) < window:
404
+ return "insufficient_data"
405
+
406
+ recent = self.history[-window:]
407
+ first_half = recent[:window//2]
408
+ second_half = recent[window//2:]
409
+
410
+ first_avg = sum(s.overall_score for s in first_half) / len(first_half)
411
+ second_avg = sum(s.overall_score for s in second_half) / len(second_half)
412
+
413
+ diff = second_avg - first_avg
414
+
415
+ if diff > 0.1:
416
+ return "improving"
417
+ elif diff < -0.1:
418
+ return "declining"
419
+ else:
420
+ return "stable"
421
+
422
+ def export_stats(self) -> Dict[str, Any]:
423
+ """Export confidence statistics"""
424
+ if not self.history:
425
+ return {
426
+ "total_scores": 0,
427
+ "average_confidence": 0.0,
428
+ "trend": "no_data"
429
+ }
430
+
431
+ return {
432
+ "total_scores": len(self.history),
433
+ "average_confidence": self.get_average_confidence(),
434
+ "recent_average": self.get_average_confidence(limit=10),
435
+ "trend": self.get_confidence_trend(),
436
+ "level_distribution": self._get_level_distribution(),
437
+ "factor_averages": self._get_factor_averages()
438
+ }
439
+
440
+ def _get_level_distribution(self) -> Dict[str, int]:
441
+ """Get distribution of confidence levels"""
442
+ distribution = {level.value: 0 for level in ConfidenceLevel}
443
+ for score in self.history:
444
+ distribution[score.level.value] += 1
445
+ return distribution
446
+
447
+ def _get_factor_averages(self) -> Dict[str, float]:
448
+ """Get average scores for each factor"""
449
+ if not self.history:
450
+ return {}
451
+
452
+ factor_sums = {}
453
+ for score in self.history:
454
+ for factor, value in score.factors.items():
455
+ factor_sums[factor] = factor_sums.get(factor, 0) + value
456
+
457
+ return {
458
+ factor: total / len(self.history)
459
+ for factor, total in factor_sums.items()
460
+ }
461
+
462
+
463
+ # Global scorer instance
464
+ _scorer_instance: Optional[ConfidenceScorer] = None
465
+
466
+
467
+ def get_confidence_scorer(
468
+ weights: Optional[Dict[str, float]] = None,
469
+ min_response_length: int = 50
470
+ ) -> ConfidenceScorer:
471
+ """Get or create global confidence scorer instance"""
472
+ global _scorer_instance
473
+
474
+ if _scorer_instance is None:
475
+ _scorer_instance = ConfidenceScorer(
476
+ weights=weights,
477
+ min_response_length=min_response_length
478
+ )
479
+
480
+ return _scorer_instance
481
+
482
+
483
+ def reset_scorer() -> None:
484
+ """Reset global scorer instance (for testing)"""
485
+ global _scorer_instance
486
+ _scorer_instance = None
487
+
488
+
489
+ __version__ = "1.0.0"
490
+ __all__ = [
491
+ "ConfidenceScorer",
492
+ "ConfidenceScore",
493
+ "ConfidenceLevel",
494
+ "get_confidence_scorer",
495
+ "reset_scorer",
496
+ ]
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Dict, Any, Optional
6
+ from dataclasses import dataclass, field
7
+
8
+ try:
9
+ import yaml
10
+ except ImportError:
11
+ yaml = None
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ DEFAULT_CONFIG_PATHS = [
16
+ Path.home() / ".devsquad.yaml",
17
+ Path.home() / ".devsquad" / "config.yaml",
18
+ Path(".devsquad.yaml"),
19
+ Path("devsquad.yaml"),
20
+ ]
21
+
22
+
23
+ @dataclass
24
+ class DevSquadConfig:
25
+ backend: str = "mock"
26
+ base_url: Optional[str] = None
27
+ model: Optional[str] = None
28
+ timeout: int = 120
29
+ max_roles: int = 10
30
+ max_task_length: int = 10000
31
+ min_task_length: int = 5
32
+ strict_validation: bool = False
33
+ output_format: str = "structured"
34
+ checkpoint_enabled: bool = True
35
+ checkpoint_dir: str = "./checkpoints"
36
+ workflow_enabled: bool = False
37
+ workflow_dir: str = "./workflows"
38
+ cache_enabled: bool = True
39
+ cache_dir: str = "./data/llm_cache"
40
+ log_level: str = "WARNING"
41
+
42
+ def to_dict(self) -> Dict[str, Any]:
43
+ return {
44
+ 'backend': self.backend,
45
+ 'base_url': self.base_url,
46
+ 'model': self.model,
47
+ 'timeout': self.timeout,
48
+ 'max_roles': self.max_roles,
49
+ 'max_task_length': self.max_task_length,
50
+ 'min_task_length': self.min_task_length,
51
+ 'strict_validation': self.strict_validation,
52
+ 'output_format': self.output_format,
53
+ 'checkpoint_enabled': self.checkpoint_enabled,
54
+ 'checkpoint_dir': self.checkpoint_dir,
55
+ 'workflow_enabled': self.workflow_enabled,
56
+ 'workflow_dir': self.workflow_dir,
57
+ 'cache_enabled': self.cache_enabled,
58
+ 'cache_dir': self.cache_dir,
59
+ 'log_level': self.log_level,
60
+ }
61
+
62
+
63
+ class ConfigManager:
64
+ """
65
+ Configuration manager for DevSquad.
66
+
67
+ Loads config from (in order of priority):
68
+ 1. Environment variables (highest)
69
+ 2. ~/.devsquad.yaml or ./devsquad.yaml
70
+ 3. Built-in defaults (lowest)
71
+ """
72
+
73
+ ENV_MAP = {
74
+ "DEVSQUAD_LLM_BACKEND": "backend",
75
+ "DEVSQUAD_BACKEND": "backend",
76
+ "DEVSQUAD_BASE_URL": "base_url",
77
+ "DEVSQUAD_MODEL": "model",
78
+ "DEVSQUAD_TIMEOUT": ("timeout", int),
79
+ "DEVSQUAD_MAX_ROLES": ("max_roles", int),
80
+ "DEVSQUAD_OUTPUT_FORMAT": "output_format",
81
+ "DEVSQUAD_STRICT": ("strict_validation", lambda v: v.lower() in ("true", "1", "yes")),
82
+ "DEVSQUAD_STRICT_VALIDATION": ("strict_validation", lambda v: v.lower() in ("true", "1", "yes")),
83
+ "DEVSQUAD_LOG_LEVEL": "log_level",
84
+ "DEVSQUAD_CHECKPOINT_DIR": "checkpoint_dir",
85
+ "DEVSQUAD_CACHE_DIR": "cache_dir",
86
+ "DEVSQUAD_CHECKPOINT_ENABLED": ("checkpoint_enabled", lambda v: v.lower() in ("true", "1", "yes")),
87
+ "DEVSQUAD_CACHE_ENABLED": ("cache_enabled", lambda v: v.lower() in ("true", "1", "yes")),
88
+ }
89
+
90
+ def __init__(self, config_path: Optional[str] = None):
91
+ self.config = DevSquadConfig()
92
+ self._config_path = None
93
+
94
+ if config_path:
95
+ self._config_path = Path(config_path)
96
+ else:
97
+ for path in DEFAULT_CONFIG_PATHS:
98
+ if path.exists():
99
+ self._config_path = path
100
+ break
101
+
102
+ self._load()
103
+
104
+ def _load(self):
105
+ if self._config_path and self._config_path.exists():
106
+ self._load_from_file(self._config_path)
107
+
108
+ self._load_from_env()
109
+
110
+ def _load_from_file(self, path: Path):
111
+ try:
112
+ if yaml is None:
113
+ logger.warning("pyyaml not installed, skipping config file %s", path)
114
+ return
115
+ with open(path, 'r', encoding='utf-8') as f:
116
+ data = yaml.safe_load(f) or {}
117
+
118
+ devsquad_data = data.get('devsquad', data)
119
+
120
+ for key, value in devsquad_data.items():
121
+ key_mapped = key.replace('-', '_')
122
+ if hasattr(self.config, key_mapped):
123
+ current = getattr(self.config, key_mapped)
124
+ if isinstance(current, bool) and not isinstance(value, bool):
125
+ value = str(value).lower() in ('true', '1', 'yes')
126
+ elif isinstance(current, int) and not isinstance(value, int):
127
+ try:
128
+ value = int(value)
129
+ except (ValueError, TypeError):
130
+ logger.warning("Invalid int value for %s: %s", key, value)
131
+ continue
132
+ elif isinstance(current, float) and not isinstance(value, (int, float)):
133
+ try:
134
+ value = float(value)
135
+ except (ValueError, TypeError):
136
+ logger.warning("Invalid float value for %s: %s", key, value)
137
+ continue
138
+ setattr(self.config, key_mapped, value)
139
+
140
+ logger.info("Config loaded from %s", path)
141
+ except Exception as e:
142
+ logger.warning("Failed to load config from %s: %s", path, e)
143
+
144
+ def _load_from_env(self):
145
+ for env_key, mapping in self.ENV_MAP.items():
146
+ env_value = os.environ.get(env_key)
147
+ if env_value is None:
148
+ continue
149
+
150
+ if isinstance(mapping, tuple):
151
+ attr_name, converter = mapping
152
+ try:
153
+ setattr(self.config, attr_name, converter(env_value))
154
+ except (ValueError, TypeError):
155
+ logger.warning("Invalid env value for %s: %s", env_key, env_value)
156
+ else:
157
+ setattr(self.config, mapping, env_value)
158
+
159
+ def get(self, key: str, default: Any = None) -> Any:
160
+ return getattr(self.config, key, default)
161
+
162
+ def set(self, key: str, value: Any):
163
+ if hasattr(self.config, key):
164
+ setattr(self.config, key, value)
165
+ else:
166
+ logger.warning("Unknown config key: %s", key)
167
+
168
+ def save(self, path: Optional[str] = None):
169
+ save_path = Path(path) if path else (self._config_path or Path.home() / ".devsquad.yaml")
170
+ save_path.parent.mkdir(parents=True, exist_ok=True)
171
+
172
+ try:
173
+ if yaml is None:
174
+ logger.warning("pyyaml not installed, cannot save config to %s", save_path)
175
+ return
176
+ data = {'devsquad': self.config.to_dict()}
177
+ with open(save_path, 'w', encoding='utf-8') as f:
178
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
179
+ logger.info("Config saved to %s", save_path)
180
+ except Exception as e:
181
+ logger.warning("Failed to save config: %s", e)
182
+
183
+ @property
184
+ def config_path(self) -> Optional[str]:
185
+ return str(self._config_path) if self._config_path else None
186
+
187
+ def to_dict(self) -> Dict[str, Any]:
188
+ return self.config.to_dict()