devsquad 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. devsquad-3.6.0.dist-info/METADATA +944 -0
  2. devsquad-3.6.0.dist-info/RECORD +95 -0
  3. devsquad-3.6.0.dist-info/WHEEL +5 -0
  4. devsquad-3.6.0.dist-info/entry_points.txt +2 -0
  5. devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
  6. devsquad-3.6.0.dist-info/top_level.txt +2 -0
  7. scripts/__init__.py +0 -0
  8. scripts/ai_semantic_matcher.py +512 -0
  9. scripts/alert_manager.py +505 -0
  10. scripts/api/__init__.py +43 -0
  11. scripts/api/models.py +386 -0
  12. scripts/api/routes/__init__.py +20 -0
  13. scripts/api/routes/dispatch.py +348 -0
  14. scripts/api/routes/lifecycle.py +330 -0
  15. scripts/api/routes/metrics_gates.py +347 -0
  16. scripts/api_server.py +318 -0
  17. scripts/auth.py +451 -0
  18. scripts/cli/__init__.py +1 -0
  19. scripts/cli/cli_visual.py +642 -0
  20. scripts/cli.py +1094 -0
  21. scripts/collaboration/__init__.py +212 -0
  22. scripts/collaboration/_version.py +1 -0
  23. scripts/collaboration/agent_briefing.py +656 -0
  24. scripts/collaboration/ai_semantic_matcher.py +260 -0
  25. scripts/collaboration/anchor_checker.py +281 -0
  26. scripts/collaboration/anti_rationalization.py +470 -0
  27. scripts/collaboration/async_integration_example.py +255 -0
  28. scripts/collaboration/batch_scheduler.py +149 -0
  29. scripts/collaboration/checkpoint_manager.py +561 -0
  30. scripts/collaboration/ci_feedback_adapter.py +351 -0
  31. scripts/collaboration/code_map_generator.py +247 -0
  32. scripts/collaboration/concern_pack_loader.py +352 -0
  33. scripts/collaboration/confidence_score.py +496 -0
  34. scripts/collaboration/config_loader.py +188 -0
  35. scripts/collaboration/consensus.py +244 -0
  36. scripts/collaboration/context_compressor.py +533 -0
  37. scripts/collaboration/coordinator.py +668 -0
  38. scripts/collaboration/dispatcher.py +1636 -0
  39. scripts/collaboration/dual_layer_context.py +128 -0
  40. scripts/collaboration/enhanced_worker.py +539 -0
  41. scripts/collaboration/feature_usage_tracker.py +206 -0
  42. scripts/collaboration/five_axis_consensus.py +334 -0
  43. scripts/collaboration/input_validator.py +401 -0
  44. scripts/collaboration/integration_example.py +287 -0
  45. scripts/collaboration/intent_workflow_mapper.py +350 -0
  46. scripts/collaboration/language_parsers.py +269 -0
  47. scripts/collaboration/lifecycle_protocol.py +1446 -0
  48. scripts/collaboration/llm_backend.py +453 -0
  49. scripts/collaboration/llm_cache.py +448 -0
  50. scripts/collaboration/llm_cache_async.py +347 -0
  51. scripts/collaboration/llm_retry.py +387 -0
  52. scripts/collaboration/llm_retry_async.py +389 -0
  53. scripts/collaboration/mce_adapter.py +597 -0
  54. scripts/collaboration/memory_bridge.py +1607 -0
  55. scripts/collaboration/models.py +537 -0
  56. scripts/collaboration/null_providers.py +297 -0
  57. scripts/collaboration/operation_classifier.py +289 -0
  58. scripts/collaboration/output_slicer.py +225 -0
  59. scripts/collaboration/performance_monitor.py +462 -0
  60. scripts/collaboration/permission_guard.py +865 -0
  61. scripts/collaboration/prompt_assembler.py +756 -0
  62. scripts/collaboration/prompt_variant_generator.py +483 -0
  63. scripts/collaboration/protocols.py +267 -0
  64. scripts/collaboration/report_formatter.py +352 -0
  65. scripts/collaboration/retrospective.py +279 -0
  66. scripts/collaboration/role_matcher.py +92 -0
  67. scripts/collaboration/role_template_market.py +352 -0
  68. scripts/collaboration/rule_collector.py +678 -0
  69. scripts/collaboration/scratchpad.py +346 -0
  70. scripts/collaboration/skill_registry.py +151 -0
  71. scripts/collaboration/skillifier.py +878 -0
  72. scripts/collaboration/standardized_role_template.py +317 -0
  73. scripts/collaboration/task_completion_checker.py +237 -0
  74. scripts/collaboration/test_quality_guard.py +695 -0
  75. scripts/collaboration/unified_gate_engine.py +598 -0
  76. scripts/collaboration/usage_tracker.py +309 -0
  77. scripts/collaboration/user_friendly_error.py +176 -0
  78. scripts/collaboration/verification_gate.py +312 -0
  79. scripts/collaboration/warmup_manager.py +635 -0
  80. scripts/collaboration/worker.py +513 -0
  81. scripts/collaboration/workflow_engine.py +684 -0
  82. scripts/dashboard.py +1088 -0
  83. scripts/generate_benchmark_report.py +786 -0
  84. scripts/history_manager.py +604 -0
  85. scripts/mcp_server.py +289 -0
  86. skills/__init__.py +32 -0
  87. skills/dispatch/handler.py +52 -0
  88. skills/intent/handler.py +59 -0
  89. skills/registry.py +67 -0
  90. skills/retrospective/__init__.py +0 -0
  91. skills/retrospective/handler.py +125 -0
  92. skills/review/handler.py +356 -0
  93. skills/security/handler.py +454 -0
  94. skills/test/__init__.py +0 -0
  95. skills/test/handler.py +78 -0
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ VerificationGate - Hardened verification requirements for TaskCompletionChecker
5
+
6
+ Enforces Agent Skills' principle: "Seems right" is NEVER sufficient.
7
+ Every completion claim must have supporting evidence.
8
+
9
+ Integration point: Called by TaskCompletionChecker.check_dispatch_result()
10
+ to validate Worker output quality before accepting completion claims.
11
+
12
+ Spec reference: SPEC_V35_Agent_Skills_Quality_Framework.md Section 6.2
13
+ """
14
+
15
+ import logging
16
+ from dataclasses import dataclass, field
17
+ from typing import Any, Callable, Dict, List, Optional
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class RedFlag:
24
+ """A warning signal indicating something may be wrong with Worker output."""
25
+ id: str
26
+ severity: str # "critical" / "warning" / "info"
27
+ description: str
28
+ detection: Callable[[Any], bool]
29
+
30
+
31
+ @dataclass
32
+ class EvidenceItem:
33
+ """A piece of evidence that a Worker should provide to prove completion."""
34
+ key: str
35
+ required: bool = False
36
+ required_for: Optional[List[str]] = None
37
+ description: str = ""
38
+ format_hint: str = ""
39
+
40
+
41
+ @dataclass
42
+ class CompletionContext:
43
+ """Context data extracted from a Worker's result for gate evaluation."""
44
+ role_id: str
45
+ has_code_changes: bool = False
46
+ has_test_changes: bool = False
47
+ is_bug_fix: bool = False
48
+ has_repro_test: bool = False
49
+ test_run_count: int = 0
50
+ all_passed: bool = False
51
+ tests_skipped: int = 0
52
+ coverage_delta: float = 0.0
53
+ output_lines: int = 0
54
+ was_sliced: bool = False
55
+ claims_complete: bool = False
56
+ evidence: Dict[str, Any] = field(default_factory=dict)
57
+
58
+
59
+ @dataclass
60
+ class GateResult:
61
+ """Result of running VerificationGate against a CompletionContext."""
62
+ passed: bool
63
+ red_flags: List[RedFlag] = field(default_factory=list)
64
+ missing_evidence: List[EvidenceItem] = field(default_factory=list)
65
+ verdict: str = "APPROVE"
66
+
67
+
68
+ class VerificationGate:
69
+ """
70
+ Hardened verification requirements for TaskCompletionChecker.
71
+
72
+ Enforces mandatory evidence requirements and detects Red Flags that
73
+ indicate problems with Worker output quality.
74
+
75
+ Design borrowed from Agent Skills (addyosmani/agent-skills):
76
+ - Every skill ends with mandatory evidence checklist
77
+ - Red Flags provide early warning signals
78
+ - "Seems right" is NEVER sufficient as acceptance criteria
79
+ """
80
+
81
+ RED_FLAGS: List[RedFlag] = [
82
+ RedFlag(
83
+ id="no_test_for_new_behavior",
84
+ severity="critical",
85
+ description="Worker produced code changes without corresponding tests",
86
+ detection=lambda ctx: ctx.has_code_changes and not ctx.has_test_changes,
87
+ ),
88
+ RedFlag(
89
+ id="tests_pass_first_run",
90
+ severity="warning",
91
+ description=(
92
+ "Tests pass on first run — may not be testing intended behavior"
93
+ ),
94
+ detection=lambda ctx: (
95
+ ctx.test_run_count == 1 and ctx.all_passed and ctx.has_test_changes
96
+ ),
97
+ ),
98
+ RedFlag(
99
+ id="no_regression_test_for_bugfix",
100
+ severity="critical",
101
+ description="Bug fix task without failing reproduction test",
102
+ detection=lambda ctx: ctx.is_bug_fix and not ctx.has_repro_test,
103
+ ),
104
+ RedFlag(
105
+ id="tests_skipped_or_disabled",
106
+ severity="critical",
107
+ description="Tests were skipped or disabled to make suite pass",
108
+ detection=lambda ctx: ctx.tests_skipped > 0,
109
+ ),
110
+ RedFlag(
111
+ id="coverage_decreased",
112
+ severity="warning",
113
+ description="Code coverage decreased from baseline",
114
+ detection=lambda ctx: ctx.coverage_delta < -0.01,
115
+ ),
116
+ RedFlag(
117
+ id="output_exceeds_limit",
118
+ severity="warning",
119
+ description=(
120
+ "Single Worker output exceeds 100 lines without slicing"
121
+ ),
122
+ detection=lambda ctx: ctx.output_lines > 100 and not ctx.was_sliced,
123
+ ),
124
+ RedFlag(
125
+ id="no_evidence_provided",
126
+ severity="critical",
127
+ description="Worker claims completion without providing evidence",
128
+ detection=lambda ctx: (
129
+ ctx.claims_complete and len(ctx.evidence) == 0
130
+ ),
131
+ ),
132
+ ]
133
+
134
+ MANDATORY_EVIDENCE: List[EvidenceItem] = [
135
+ EvidenceItem(
136
+ key="test_results",
137
+ required=True,
138
+ description="Test execution output showing pass/fail status",
139
+ format_hint='e.g., "pytest: 142 passed, 0 failed in 3.2s"',
140
+ ),
141
+ EvidenceItem(
142
+ key="build_status",
143
+ required_for=["architect", "solo-coder"],
144
+ description="Build success/failure with output",
145
+ format_hint='e.g., "Build succeeded in 1.2s"',
146
+ ),
147
+ EvidenceItem(
148
+ key="diff_summary",
149
+ required=True,
150
+ description="Summary of changes made (files affected, lines changed)",
151
+ format_hint=(
152
+ 'e.g., "Modified: dispatcher.py (+23/-5), '
153
+ 'Added: ar_engine.py (+89)"'
154
+ ),
155
+ ),
156
+ ]
157
+
158
+ def __init__(self, strict_mode: bool = True):
159
+ """
160
+ Initialize VerificationGate.
161
+
162
+ Args:
163
+ strict_mode: If True, any critical flag or missing evidence blocks
164
+ approval. If False, only logs warnings.
165
+ """
166
+ self.strict_mode = strict_mode
167
+
168
+ def check(self, context: CompletionContext) -> GateResult:
169
+ """
170
+ Run verification gate against completion context.
171
+
172
+ Args:
173
+ context: CompletionContext with Worker result data
174
+
175
+ Returns:
176
+ GateResult with passed status, triggered flags,
177
+ missing evidence, and verdict
178
+ """
179
+ triggered_flags = []
180
+ for flag in self.RED_FLAGS:
181
+ try:
182
+ if flag.detection(context):
183
+ triggered_flags.append(flag)
184
+ logger.warning(
185
+ "Red Flag [%s]: %s (role=%s)",
186
+ flag.id, flag.description, context.role_id,
187
+ )
188
+ except Exception as e:
189
+ logger.debug("Red flag detection error for %s: %s", flag.id, e)
190
+
191
+ missing = self._check_missing_evidence(context)
192
+
193
+ critical_flags = [f for f in triggered_flags if f.severity == "critical"]
194
+ critical_missing = [e for e in missing if e.required]
195
+
196
+ if critical_flags or critical_missing:
197
+ verdict = "REJECT"
198
+ elif triggered_flags or missing:
199
+ verdict = "CONDITIONAL"
200
+ else:
201
+ verdict = "APPROVE"
202
+
203
+ return GateResult(
204
+ passed=(verdict == "APPROVE"),
205
+ red_flags=triggered_flags,
206
+ missing_evidence=missing,
207
+ verdict=verdict,
208
+ )
209
+
210
+ def _check_missing_evidence(self, context: CompletionContext) -> List[EvidenceItem]:
211
+ """Check which mandatory evidence items are missing."""
212
+ missing = []
213
+ for item in self.MANDATORY_EVIDENCE:
214
+ if item.required:
215
+ if item.key not in context.evidence:
216
+ missing.append(item)
217
+ elif item.required_for:
218
+ if context.role_id in item.required_for:
219
+ if item.key not in context.evidence:
220
+ missing.append(item)
221
+ return missing
222
+
223
+ def build_context_from_worker_result(
224
+ self, worker_result: Dict[str, Any]
225
+ ) -> CompletionContext:
226
+ """
227
+ Build CompletionContext from a raw worker result dict.
228
+
229
+ Extracts available fields heuristically from worker result structure.
230
+
231
+ Args:
232
+ worker_result: Dict from DispatchResult.worker_results
233
+
234
+ Returns:
235
+ Populated CompletionContext
236
+ """
237
+ role_id = worker_result.get(
238
+ "role_id", worker_result.get("role", "unknown")
239
+ )
240
+ output = str(worker_result.get("output", ""))
241
+ success = worker_result.get("success", False)
242
+ errors = worker_result.get("errors", [])
243
+
244
+ output_lines = len(output.split("\n")) if output else 0
245
+
246
+ evidence = {}
247
+ verification = worker_result.get("verification")
248
+ if isinstance(verification, dict) and verification.get("passed"):
249
+ evidence["verification"] = verification
250
+
251
+ return CompletionContext(
252
+ role_id=role_id,
253
+ has_code_changes=output_lines > 10 and success,
254
+ has_test_changes="test" in output.lower()[:500],
255
+ is_bug_fix=self._is_likely_bug_fix(worker_result),
256
+ has_repro_test="reproduce" in output.lower() or "test_" in output.lower(),
257
+ test_run_count=1 if "test" in output.lower() else 0,
258
+ all_passed=success and not errors,
259
+ tests_skipped=worker_result.get("tests_skipped", 0),
260
+ coverage_delta=0.0,
261
+ output_lines=output_lines,
262
+ was_sliced=worker_result.get("was_sliced", False),
263
+ claims_complete=success,
264
+ evidence=evidence,
265
+ )
266
+
267
+ @staticmethod
268
+ def _is_likely_bug_fix(worker_result: Dict[str, Any]) -> bool:
269
+ """Heuristically determine if this looks like a bug fix task."""
270
+ task_desc = str(
271
+ worker_result.get("task_description", "")
272
+ or worker_result.get("original_task", "")
273
+ ).lower()
274
+ bug_keywords = [
275
+ "fix", "bug", "error", "fail", "crash", "broken",
276
+ "修复", "错误", "失败", "崩溃", "异常",
277
+ ]
278
+ return any(kw in task_desc for kw in bug_keywords)
279
+
280
+ def get_red_flag_by_id(self, flag_id: str) -> Optional[RedFlag]:
281
+ """Look up a specific RedFlag by ID."""
282
+ for flag in self.RED_FLAGS:
283
+ if flag.id == flag_id:
284
+ return flag
285
+ return None
286
+
287
+ @property
288
+ def red_flag_count(self) -> int:
289
+ """Total number of defined Red Flags."""
290
+ return len(self.RED_FLAGS)
291
+
292
+ @property
293
+ def evidence_item_count(self) -> int:
294
+ """Total number of defined EvidenceItems."""
295
+ return len(self.MANDATORY_EVIDENCE)
296
+
297
+
298
+ def get_shared_gate(strict_mode: bool = True) -> VerificationGate:
299
+ """
300
+ Get or create shared singleton instance.
301
+
302
+ Args:
303
+ strict_mode: If True, critical flags block approval
304
+
305
+ Returns:
306
+ Shared VerificationGate instance
307
+ """
308
+ if not hasattr(get_shared_gate, "_instance"):
309
+ get_shared_gate._instance = VerificationGate(
310
+ strict_mode=strict_mode
311
+ )
312
+ return get_shared_gate._instance