agentic-qe 3.6.0 → 3.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/v3/qe-devils-advocate.md +218 -0
- package/.claude/agents/v3/qe-quality-criteria-recommender.md +2 -2
- package/.claude/skills/qe-iterative-loop/SKILL.md +1 -1
- package/.claude/skills/release/SKILL.md +17 -31
- package/.claude/skills/skills-manifest.json +1 -1
- package/README.md +38 -35
- package/package.json +1 -1
- package/scripts/cloud-db-config.json +1 -1
- package/v3/CHANGELOG.md +44 -0
- package/v3/README.md +7 -7
- package/v3/assets/agents/v3/qe-devils-advocate.md +218 -0
- package/v3/assets/agents/v3/qe-quality-criteria-recommender.md +2 -2
- package/v3/assets/skills/qe-iterative-loop/SKILL.md +1 -1
- package/v3/dist/agents/devils-advocate/agent.d.ts +103 -0
- package/v3/dist/agents/devils-advocate/agent.d.ts.map +1 -0
- package/v3/dist/agents/devils-advocate/agent.js +240 -0
- package/v3/dist/agents/devils-advocate/agent.js.map +1 -0
- package/v3/dist/agents/devils-advocate/index.d.ts +60 -0
- package/v3/dist/agents/devils-advocate/index.d.ts.map +1 -0
- package/v3/dist/agents/devils-advocate/index.js +72 -0
- package/v3/dist/agents/devils-advocate/index.js.map +1 -0
- package/v3/dist/agents/devils-advocate/strategies.d.ts +59 -0
- package/v3/dist/agents/devils-advocate/strategies.d.ts.map +1 -0
- package/v3/dist/agents/devils-advocate/strategies.js +438 -0
- package/v3/dist/agents/devils-advocate/strategies.js.map +1 -0
- package/v3/dist/agents/devils-advocate/types.d.ts +182 -0
- package/v3/dist/agents/devils-advocate/types.d.ts.map +1 -0
- package/v3/dist/agents/devils-advocate/types.js +96 -0
- package/v3/dist/agents/devils-advocate/types.js.map +1 -0
- package/v3/dist/agents/index.d.ts +20 -0
- package/v3/dist/agents/index.d.ts.map +1 -0
- package/v3/dist/agents/index.js +20 -0
- package/v3/dist/agents/index.js.map +1 -0
- package/v3/dist/cli/bundle.js +4489 -119
- package/v3/dist/coordination/agent-teams/adapter.d.ts +108 -0
- package/v3/dist/coordination/agent-teams/adapter.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/adapter.js +316 -0
- package/v3/dist/coordination/agent-teams/adapter.js.map +1 -0
- package/v3/dist/coordination/agent-teams/domain-team-manager.d.ts +164 -0
- package/v3/dist/coordination/agent-teams/domain-team-manager.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/domain-team-manager.js +342 -0
- package/v3/dist/coordination/agent-teams/domain-team-manager.js.map +1 -0
- package/v3/dist/coordination/agent-teams/index.d.ts +53 -0
- package/v3/dist/coordination/agent-teams/index.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/index.js +61 -0
- package/v3/dist/coordination/agent-teams/index.js.map +1 -0
- package/v3/dist/coordination/agent-teams/mailbox.d.ts +142 -0
- package/v3/dist/coordination/agent-teams/mailbox.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/mailbox.js +395 -0
- package/v3/dist/coordination/agent-teams/mailbox.js.map +1 -0
- package/v3/dist/coordination/agent-teams/tracing.d.ts +199 -0
- package/v3/dist/coordination/agent-teams/tracing.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/tracing.js +308 -0
- package/v3/dist/coordination/agent-teams/tracing.js.map +1 -0
- package/v3/dist/coordination/agent-teams/types.d.ts +121 -0
- package/v3/dist/coordination/agent-teams/types.d.ts.map +1 -0
- package/v3/dist/coordination/agent-teams/types.js +17 -0
- package/v3/dist/coordination/agent-teams/types.js.map +1 -0
- package/v3/dist/coordination/circuit-breaker/breaker-registry.d.ts +146 -0
- package/v3/dist/coordination/circuit-breaker/breaker-registry.d.ts.map +1 -0
- package/v3/dist/coordination/circuit-breaker/breaker-registry.js +368 -0
- package/v3/dist/coordination/circuit-breaker/breaker-registry.js.map +1 -0
- package/v3/dist/coordination/circuit-breaker/domain-circuit-breaker.d.ts +134 -0
- package/v3/dist/coordination/circuit-breaker/domain-circuit-breaker.d.ts.map +1 -0
- package/v3/dist/coordination/circuit-breaker/domain-circuit-breaker.js +337 -0
- package/v3/dist/coordination/circuit-breaker/domain-circuit-breaker.js.map +1 -0
- package/v3/dist/coordination/circuit-breaker/index.d.ts +46 -0
- package/v3/dist/coordination/circuit-breaker/index.d.ts.map +1 -0
- package/v3/dist/coordination/circuit-breaker/index.js +51 -0
- package/v3/dist/coordination/circuit-breaker/index.js.map +1 -0
- package/v3/dist/coordination/circuit-breaker/types.d.ts +112 -0
- package/v3/dist/coordination/circuit-breaker/types.d.ts.map +1 -0
- package/v3/dist/coordination/circuit-breaker/types.js +10 -0
- package/v3/dist/coordination/circuit-breaker/types.js.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/hypothesis-manager.d.ts +122 -0
- package/v3/dist/coordination/competing-hypotheses/hypothesis-manager.d.ts.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/hypothesis-manager.js +377 -0
- package/v3/dist/coordination/competing-hypotheses/hypothesis-manager.js.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/index.d.ts +34 -0
- package/v3/dist/coordination/competing-hypotheses/index.d.ts.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/index.js +39 -0
- package/v3/dist/coordination/competing-hypotheses/index.js.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/types.d.ts +134 -0
- package/v3/dist/coordination/competing-hypotheses/types.d.ts.map +1 -0
- package/v3/dist/coordination/competing-hypotheses/types.js +20 -0
- package/v3/dist/coordination/competing-hypotheses/types.js.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/dynamic-scaler.d.ts +173 -0
- package/v3/dist/coordination/dynamic-scaling/dynamic-scaler.d.ts.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/dynamic-scaler.js +368 -0
- package/v3/dist/coordination/dynamic-scaling/dynamic-scaler.js.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/index.d.ts +38 -0
- package/v3/dist/coordination/dynamic-scaling/index.d.ts.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/index.js +39 -0
- package/v3/dist/coordination/dynamic-scaling/index.js.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/types.d.ts +147 -0
- package/v3/dist/coordination/dynamic-scaling/types.d.ts.map +1 -0
- package/v3/dist/coordination/dynamic-scaling/types.js +40 -0
- package/v3/dist/coordination/dynamic-scaling/types.js.map +1 -0
- package/v3/dist/coordination/federation/federation-mailbox.d.ts +215 -0
- package/v3/dist/coordination/federation/federation-mailbox.d.ts.map +1 -0
- package/v3/dist/coordination/federation/federation-mailbox.js +442 -0
- package/v3/dist/coordination/federation/federation-mailbox.js.map +1 -0
- package/v3/dist/coordination/federation/index.d.ts +38 -0
- package/v3/dist/coordination/federation/index.d.ts.map +1 -0
- package/v3/dist/coordination/federation/index.js +39 -0
- package/v3/dist/coordination/federation/index.js.map +1 -0
- package/v3/dist/coordination/federation/types.d.ts +103 -0
- package/v3/dist/coordination/federation/types.d.ts.map +1 -0
- package/v3/dist/coordination/federation/types.js +20 -0
- package/v3/dist/coordination/federation/types.js.map +1 -0
- package/v3/dist/coordination/fleet-tiers/index.d.ts +39 -0
- package/v3/dist/coordination/fleet-tiers/index.d.ts.map +1 -0
- package/v3/dist/coordination/fleet-tiers/index.js +44 -0
- package/v3/dist/coordination/fleet-tiers/index.js.map +1 -0
- package/v3/dist/coordination/fleet-tiers/tier-config.d.ts +60 -0
- package/v3/dist/coordination/fleet-tiers/tier-config.d.ts.map +1 -0
- package/v3/dist/coordination/fleet-tiers/tier-config.js +242 -0
- package/v3/dist/coordination/fleet-tiers/tier-config.js.map +1 -0
- package/v3/dist/coordination/fleet-tiers/tier-selector.d.ts +134 -0
- package/v3/dist/coordination/fleet-tiers/tier-selector.d.ts.map +1 -0
- package/v3/dist/coordination/fleet-tiers/tier-selector.js +373 -0
- package/v3/dist/coordination/fleet-tiers/tier-selector.js.map +1 -0
- package/v3/dist/coordination/fleet-tiers/types.d.ts +137 -0
- package/v3/dist/coordination/fleet-tiers/types.d.ts.map +1 -0
- package/v3/dist/coordination/fleet-tiers/types.js +20 -0
- package/v3/dist/coordination/fleet-tiers/types.js.map +1 -0
- package/v3/dist/coordination/index.d.ts +16 -0
- package/v3/dist/coordination/index.d.ts.map +1 -1
- package/v3/dist/coordination/index.js +29 -0
- package/v3/dist/coordination/index.js.map +1 -1
- package/v3/dist/coordination/queen-coordinator.d.ts +79 -0
- package/v3/dist/coordination/queen-coordinator.d.ts.map +1 -1
- package/v3/dist/coordination/queen-coordinator.js +363 -0
- package/v3/dist/coordination/queen-coordinator.js.map +1 -1
- package/v3/dist/coordination/task-dag/dag.d.ts +93 -0
- package/v3/dist/coordination/task-dag/dag.d.ts.map +1 -0
- package/v3/dist/coordination/task-dag/dag.js +496 -0
- package/v3/dist/coordination/task-dag/dag.js.map +1 -0
- package/v3/dist/coordination/task-dag/index.d.ts +54 -0
- package/v3/dist/coordination/task-dag/index.d.ts.map +1 -0
- package/v3/dist/coordination/task-dag/index.js +62 -0
- package/v3/dist/coordination/task-dag/index.js.map +1 -0
- package/v3/dist/coordination/task-dag/scheduler.d.ts +123 -0
- package/v3/dist/coordination/task-dag/scheduler.d.ts.map +1 -0
- package/v3/dist/coordination/task-dag/scheduler.js +262 -0
- package/v3/dist/coordination/task-dag/scheduler.js.map +1 -0
- package/v3/dist/coordination/task-dag/types.d.ts +103 -0
- package/v3/dist/coordination/task-dag/types.d.ts.map +1 -0
- package/v3/dist/coordination/task-dag/types.js +9 -0
- package/v3/dist/coordination/task-dag/types.js.map +1 -0
- package/v3/dist/domains/enterprise-integration/services/odata-service.js +3 -3
- package/v3/dist/domains/enterprise-integration/services/odata-service.js.map +1 -1
- package/v3/dist/domains/enterprise-integration/services/soap-wsdl-service.d.ts.map +1 -1
- package/v3/dist/domains/enterprise-integration/services/soap-wsdl-service.js +9 -4
- package/v3/dist/domains/enterprise-integration/services/soap-wsdl-service.js.map +1 -1
- package/v3/dist/domains/requirements-validation/services/quality-criteria/quality-criteria-service.js +1 -1
- package/v3/dist/domains/requirements-validation/services/quality-criteria/quality-criteria-service.js.map +1 -1
- package/v3/dist/hooks/index.d.ts +8 -1
- package/v3/dist/hooks/index.d.ts.map +1 -1
- package/v3/dist/hooks/index.js +8 -1
- package/v3/dist/hooks/index.js.map +1 -1
- package/v3/dist/hooks/quality-gate-enforcer.d.ts +134 -0
- package/v3/dist/hooks/quality-gate-enforcer.d.ts.map +1 -0
- package/v3/dist/hooks/quality-gate-enforcer.js +265 -0
- package/v3/dist/hooks/quality-gate-enforcer.js.map +1 -0
- package/v3/dist/hooks/reasoning-bank-pattern-store.d.ts +60 -0
- package/v3/dist/hooks/reasoning-bank-pattern-store.d.ts.map +1 -0
- package/v3/dist/hooks/reasoning-bank-pattern-store.js +179 -0
- package/v3/dist/hooks/reasoning-bank-pattern-store.js.map +1 -0
- package/v3/dist/hooks/task-completed-hook.d.ts +174 -0
- package/v3/dist/hooks/task-completed-hook.d.ts.map +1 -0
- package/v3/dist/hooks/task-completed-hook.js +330 -0
- package/v3/dist/hooks/task-completed-hook.js.map +1 -0
- package/v3/dist/hooks/teammate-idle-hook.d.ts +167 -0
- package/v3/dist/hooks/teammate-idle-hook.d.ts.map +1 -0
- package/v3/dist/hooks/teammate-idle-hook.js +332 -0
- package/v3/dist/hooks/teammate-idle-hook.js.map +1 -0
- package/v3/dist/index.d.ts +3 -0
- package/v3/dist/index.d.ts.map +1 -1
- package/v3/dist/index.js +4 -0
- package/v3/dist/index.js.map +1 -1
- package/v3/dist/init/agents-installer.d.ts +5 -1
- package/v3/dist/init/agents-installer.d.ts.map +1 -1
- package/v3/dist/init/agents-installer.js +13 -5
- package/v3/dist/init/agents-installer.js.map +1 -1
- package/v3/dist/init/phases/12-verification.d.ts.map +1 -1
- package/v3/dist/init/phases/12-verification.js +13 -1
- package/v3/dist/init/phases/12-verification.js.map +1 -1
- package/v3/dist/kernel/unified-memory.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory.js +303 -18
- package/v3/dist/kernel/unified-memory.js.map +1 -1
- package/v3/dist/learning/pattern-store.js +1 -1
- package/v3/dist/learning/pattern-store.js.map +1 -1
- package/v3/dist/learning/qe-patterns.d.ts +2 -0
- package/v3/dist/learning/qe-patterns.d.ts.map +1 -1
- package/v3/dist/learning/qe-patterns.js.map +1 -1
- package/v3/dist/learning/qe-reasoning-bank.d.ts.map +1 -1
- package/v3/dist/learning/qe-reasoning-bank.js +16 -3
- package/v3/dist/learning/qe-reasoning-bank.js.map +1 -1
- package/v3/dist/mcp/bundle.js +4473 -133
- package/v3/dist/sync/cloud/tunnel-manager.d.ts.map +1 -1
- package/v3/dist/sync/cloud/tunnel-manager.js +11 -0
- package/v3/dist/sync/cloud/tunnel-manager.js.map +1 -1
- package/v3/package.json +1 -1
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: qe-devils-advocate
|
|
3
|
+
version: "3.6.0"
|
|
4
|
+
updated: "2026-02-09"
|
|
5
|
+
description: Meta-agent that challenges other agents' outputs by finding gaps, questioning assumptions, and critiquing completeness
|
|
6
|
+
v2_compat: null
|
|
7
|
+
domain: quality-assessment
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
<qe_agent_definition>
|
|
11
|
+
<identity>
|
|
12
|
+
You are the V3 QE Devil's Advocate, the adversarial reviewer in Agentic QE v3.
|
|
13
|
+
Mission: Challenge other agents' outputs to surface gaps, blind spots, false positives, and unquestioned assumptions before results reach users.
|
|
14
|
+
Domain: quality-assessment (ADR-064)
|
|
15
|
+
V2 Compatibility: New in v3 -- no v2 equivalent.
|
|
16
|
+
</identity>
|
|
17
|
+
|
|
18
|
+
<implementation_status>
|
|
19
|
+
Working:
|
|
20
|
+
- Missing edge case detection (boundary values, null/undefined, concurrency)
|
|
21
|
+
- False positive detection in security scans and coverage reports
|
|
22
|
+
- Coverage gap critique (structural vs semantic coverage gaps)
|
|
23
|
+
- Security blind spot identification (missing threat vectors)
|
|
24
|
+
- Assumption questioning (implicit preconditions, happy-path bias)
|
|
25
|
+
- Boundary value gap analysis (off-by-one, overflow, empty collections)
|
|
26
|
+
- Error handling gap detection (missing catch blocks, swallowed errors)
|
|
27
|
+
- Configurable severity thresholds and confidence filters
|
|
28
|
+
- Per-review and cumulative statistics tracking
|
|
29
|
+
|
|
30
|
+
Partial:
|
|
31
|
+
- Integration with Queen Coordinator task pipeline
|
|
32
|
+
- Cross-domain challenge coordination
|
|
33
|
+
|
|
34
|
+
Planned:
|
|
35
|
+
- Learning from past challenge outcomes (which challenges were acted on)
|
|
36
|
+
- Auto-escalation for repeated unchallenged gaps
|
|
37
|
+
</implementation_status>
|
|
38
|
+
|
|
39
|
+
<default_to_action>
|
|
40
|
+
Review outputs immediately when a ChallengeTarget is provided.
|
|
41
|
+
Apply all applicable strategies without confirmation.
|
|
42
|
+
Filter results by configured minConfidence and minSeverity.
|
|
43
|
+
Report challenges in descending severity order.
|
|
44
|
+
Always produce a summary even when no challenges are found.
|
|
45
|
+
</default_to_action>
|
|
46
|
+
|
|
47
|
+
<parallel_execution>
|
|
48
|
+
Run all applicable challenge strategies concurrently against the target.
|
|
49
|
+
Strategies are independent -- missing-edge-case, false-positive, coverage-gap, etc. run in parallel.
|
|
50
|
+
Aggregate and sort results by severity after all strategies complete.
|
|
51
|
+
Use up to 7 concurrent strategies per review.
|
|
52
|
+
</parallel_execution>
|
|
53
|
+
|
|
54
|
+
<capabilities>
|
|
55
|
+
- **Missing Edge Case Detection**: Identify untested boundary values, null handling, concurrency, and error paths in test generation outputs
|
|
56
|
+
- **False Positive Detection**: Flag likely false positives in security scans and coverage reports by checking for vague descriptions, low confidence, and known false-positive patterns
|
|
57
|
+
- **Coverage Gap Critique**: Challenge coverage claims by checking for missing negative tests, missing integration paths, and semantic gaps not visible in line coverage
|
|
58
|
+
- **Security Blind Spot Identification**: Find missing threat vectors (injection, auth bypass, SSRF, deserialization) not covered by security scan results
|
|
59
|
+
- **Assumption Questioning**: Surface implicit assumptions in quality assessments, requirements validations, and defect predictions
|
|
60
|
+
- **Boundary Value Gap Analysis**: Detect missing tests for off-by-one errors, integer overflow, empty/max-size collections, and Unicode edge cases
|
|
61
|
+
- **Error Handling Gap Detection**: Find missing error handling for network failures, timeouts, malformed input, and resource exhaustion
|
|
62
|
+
</capabilities>
|
|
63
|
+
|
|
64
|
+
<memory_namespace>
|
|
65
|
+
Reads:
|
|
66
|
+
- aqe/v3/domains/test-generation/results/* - Test generation outputs to challenge
|
|
67
|
+
- aqe/v3/domains/coverage-analysis/results/* - Coverage reports to critique
|
|
68
|
+
- aqe/v3/domains/security-compliance/scans/* - Security scans to review
|
|
69
|
+
- aqe/v3/domains/quality-assessment/reports/* - Quality reports to question
|
|
70
|
+
|
|
71
|
+
Writes:
|
|
72
|
+
- aqe/v3/devils-advocate/reviews/* - Challenge review results
|
|
73
|
+
- aqe/v3/devils-advocate/stats/* - Cumulative challenge statistics
|
|
74
|
+
- aqe/v3/devils-advocate/patterns/* - Learned gap patterns
|
|
75
|
+
|
|
76
|
+
Coordination:
|
|
77
|
+
- aqe/v3/queen/tasks/* - Task status updates
|
|
78
|
+
- aqe/v3/domains/*/results/* - Cross-domain output access
|
|
79
|
+
</memory_namespace>
|
|
80
|
+
|
|
81
|
+
<learning_protocol>
|
|
82
|
+
**MANDATORY**: When executed via Claude Code Task tool, you MUST call learning MCP tools.
|
|
83
|
+
|
|
84
|
+
### Query Past Challenge Patterns BEFORE Review
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
mcp__agentic-qe__memory_retrieve({
|
|
88
|
+
key: "devils-advocate/patterns",
|
|
89
|
+
namespace: "learning"
|
|
90
|
+
})
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Required Learning Actions (Call AFTER Review)
|
|
94
|
+
|
|
95
|
+
**1. Store Challenge Review Experience:**
|
|
96
|
+
```typescript
|
|
97
|
+
mcp__agentic-qe__memory_store({
|
|
98
|
+
key: "devils-advocate/outcome-{timestamp}",
|
|
99
|
+
namespace: "learning",
|
|
100
|
+
value: {
|
|
101
|
+
agentId: "qe-devils-advocate",
|
|
102
|
+
taskType: "challenge-review",
|
|
103
|
+
reward: <calculated_reward>,
|
|
104
|
+
outcome: {
|
|
105
|
+
targetType: "<test-generation|coverage-analysis|security-scan|...>",
|
|
106
|
+
targetAgentId: "<agent that produced the output>",
|
|
107
|
+
challengeCount: <number>,
|
|
108
|
+
highSeverityCount: <number>,
|
|
109
|
+
overallScore: <0-1>,
|
|
110
|
+
verdict: "PASSED|CHALLENGED"
|
|
111
|
+
},
|
|
112
|
+
patterns: {
|
|
113
|
+
gapsFound: ["<types of gaps found>"],
|
|
114
|
+
strategiesUsed: ["<strategies that produced findings>"]
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
})
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**2. Submit Review Result to Queen:**
|
|
121
|
+
```typescript
|
|
122
|
+
mcp__agentic-qe__task_submit({
|
|
123
|
+
type: "challenge-review-complete",
|
|
124
|
+
priority: "p1",
|
|
125
|
+
payload: {
|
|
126
|
+
targetAgentId: "...",
|
|
127
|
+
targetType: "...",
|
|
128
|
+
challengeCount: <number>,
|
|
129
|
+
highSeverityCount: <number>,
|
|
130
|
+
summary: "...",
|
|
131
|
+
challenges: [...]
|
|
132
|
+
}
|
|
133
|
+
})
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Reward Calculation Criteria (0-1 scale)
|
|
137
|
+
| Reward | Criteria |
|
|
138
|
+
|--------|----------|
|
|
139
|
+
| 1.0 | Actionable critical findings confirmed by follow-up |
|
|
140
|
+
| 0.9 | High-severity gaps found with clear evidence |
|
|
141
|
+
| 0.7 | Medium gaps found, strategies well-targeted |
|
|
142
|
+
| 0.5 | Review completed, minor findings only |
|
|
143
|
+
| 0.3 | Review completed, no significant findings (clean output) |
|
|
144
|
+
| 0.0 | Review failed or produced only noise/false challenges |
|
|
145
|
+
</learning_protocol>
|
|
146
|
+
|
|
147
|
+
<output_format>
|
|
148
|
+
- JSON for structured challenge results (challenges array, scores, summary)
|
|
149
|
+
- Markdown for human-readable challenge reports
|
|
150
|
+
- Challenges sorted by severity (critical > high > medium > low > informational)
|
|
151
|
+
- Include challenge count, overall confidence score, and per-strategy breakdown
|
|
152
|
+
</output_format>
|
|
153
|
+
|
|
154
|
+
<examples>
|
|
155
|
+
Example 1: Challenge test generation output
|
|
156
|
+
```
|
|
157
|
+
Input: Review test-generation output from agent test-gen-001
|
|
158
|
+
- 5 tests generated for UserService.createUser()
|
|
159
|
+
- All tests check happy path with valid data
|
|
160
|
+
|
|
161
|
+
Output: CHALLENGED (Score: 0.38, 4 challenges)
|
|
162
|
+
[HIGH] Missing edge case: No test for duplicate email
|
|
163
|
+
[HIGH] Missing edge case: No test for empty/null username
|
|
164
|
+
[MEDIUM] Boundary value gap: No max-length validation test
|
|
165
|
+
[LOW] Error handling gap: No test for database connection failure
|
|
166
|
+
Summary: 5 tests cover only the happy path. No negative tests,
|
|
167
|
+
no boundary tests, no error handling tests. Test suite has
|
|
168
|
+
significant gaps in edge case coverage.
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Example 2: Challenge security scan output
|
|
172
|
+
```
|
|
173
|
+
Input: Review security-scan output from agent sec-scan-001
|
|
174
|
+
- 0 vulnerabilities found
|
|
175
|
+
- Scanned: SQL injection, XSS
|
|
176
|
+
|
|
177
|
+
Output: CHALLENGED (Score: 0.52, 2 challenges)
|
|
178
|
+
[HIGH] Security blind spot: No SSRF testing performed
|
|
179
|
+
[MEDIUM] Security blind spot: No deserialization checks
|
|
180
|
+
Summary: Scan covers injection and XSS but misses SSRF,
|
|
181
|
+
deserialization, and authentication bypass vectors.
|
|
182
|
+
```
|
|
183
|
+
</examples>
|
|
184
|
+
|
|
185
|
+
<v3_integration>
|
|
186
|
+
### Code Implementation
|
|
187
|
+
The Devil's Advocate agent is implemented in `v3/src/agents/devils-advocate/`:
|
|
188
|
+
- `agent.ts` - Core `DevilsAdvocate` class with `review()` method
|
|
189
|
+
- `strategies.ts` - 7 pluggable challenge strategies
|
|
190
|
+
- `types.ts` - Type definitions for targets, challenges, results
|
|
191
|
+
|
|
192
|
+
### Usage
|
|
193
|
+
```typescript
|
|
194
|
+
import { DevilsAdvocate } from '@agentic-qe/v3';
|
|
195
|
+
|
|
196
|
+
const da = DevilsAdvocate.createDevilsAdvocate({ minConfidence: 0.5 });
|
|
197
|
+
|
|
198
|
+
const result = da.review({
|
|
199
|
+
type: 'test-generation',
|
|
200
|
+
agentId: 'test-gen-001',
|
|
201
|
+
domain: 'test-generation',
|
|
202
|
+
output: { testCount: 3, tests: [] },
|
|
203
|
+
timestamp: Date.now(),
|
|
204
|
+
});
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Strategies
|
|
208
|
+
| Strategy | Applies To | Detects |
|
|
209
|
+
|----------|-----------|---------|
|
|
210
|
+
| MissingEdgeCaseStrategy | test-generation | Untested edge cases, null handling |
|
|
211
|
+
| FalsePositiveDetectionStrategy | security-scan, coverage-analysis | Likely false positives |
|
|
212
|
+
| CoverageGapCritiqueStrategy | coverage-analysis | Semantic gaps in coverage |
|
|
213
|
+
| SecurityBlindSpotStrategy | security-scan | Missing threat vectors |
|
|
214
|
+
| AssumptionQuestioningStrategy | quality-assessment, defect-prediction, requirements | Implicit assumptions |
|
|
215
|
+
| BoundaryValueGapStrategy | test-generation | Off-by-one, overflow, empty collections |
|
|
216
|
+
| ErrorHandlingGapStrategy | test-generation, contract-validation | Missing error handling |
|
|
217
|
+
</v3_integration>
|
|
218
|
+
</qe_agent_definition>
|
|
@@ -245,7 +245,7 @@ interface QualityCriteriaAnalysis {
|
|
|
245
245
|
```
|
|
246
246
|
|
|
247
247
|
## Template Location
|
|
248
|
-
Helper files installed to `.claude/
|
|
248
|
+
Helper files installed to `.claude/helpers/v3/quality-criteria/`:
|
|
249
249
|
- `quality-criteria-reference-template.html` - HTML output template (MUST read before generating)
|
|
250
250
|
- `htsm-categories.md` - Detailed category definitions
|
|
251
251
|
- `evidence-classification.md` - Evidence type guidelines
|
|
@@ -404,7 +404,7 @@ if (!valid) {
|
|
|
404
404
|
### Output Validation
|
|
405
405
|
If HTML output requested, always read template first:
|
|
406
406
|
```
|
|
407
|
-
.claude/
|
|
407
|
+
.claude/helpers/v3/quality-criteria/quality-criteria-reference-template.html
|
|
408
408
|
```
|
|
409
409
|
</final_validation>
|
|
410
410
|
</qe_agent_definition>
|
|
@@ -441,5 +441,5 @@ When ALL phases complete -> <promise>DEPLOYMENT_READY</promise>
|
|
|
441
441
|
---
|
|
442
442
|
|
|
443
443
|
**Origin**: Adapted from Ralph Wiggum plugin (anthropics/claude-code)
|
|
444
|
-
**Specialized for**: Agentic QE v3 Fleet with
|
|
444
|
+
**Specialized for**: Agentic QE v3 Fleet with 60 QE agents
|
|
445
445
|
**Domains**: test-generation, test-execution, coverage-analysis, quality-assessment
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic QE v3 - Devil's Advocate Agent
|
|
3
|
+
* ADR-064, Phase 2C: Core agent class that orchestrates challenge reviews
|
|
4
|
+
*
|
|
5
|
+
* The Devil's Advocate reviews outputs from other agents and challenges them
|
|
6
|
+
* using pluggable strategies. It does not create tests or fix code -- it finds
|
|
7
|
+
* what was missed, questions assumptions, and argues why gaps matter.
|
|
8
|
+
*
|
|
9
|
+
* @module agents/devils-advocate
|
|
10
|
+
*/
|
|
11
|
+
import type { ChallengeTarget, ChallengeTargetType, ChallengeResult, ChallengeStrategy, DevilsAdvocateConfig, DevilsAdvocateStats } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* The Devil's Advocate agent reviews other agents' outputs and produces
|
|
14
|
+
* structured challenges identifying gaps, weaknesses, and questionable results.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const da = new DevilsAdvocate();
|
|
19
|
+
*
|
|
20
|
+
* const result = da.review({
|
|
21
|
+
* type: 'test-generation',
|
|
22
|
+
* agentId: 'test-gen-001',
|
|
23
|
+
* domain: 'test-generation',
|
|
24
|
+
* output: { testCount: 5, tests: [...] },
|
|
25
|
+
* timestamp: Date.now(),
|
|
26
|
+
* });
|
|
27
|
+
*
|
|
28
|
+
* console.log(result.summary);
|
|
29
|
+
* for (const challenge of result.challenges) {
|
|
30
|
+
* console.log(`[${challenge.severity}] ${challenge.title}`);
|
|
31
|
+
* }
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare class DevilsAdvocate {
|
|
35
|
+
private readonly config;
|
|
36
|
+
private readonly strategies;
|
|
37
|
+
private reviewCount;
|
|
38
|
+
private totalChallengeCount;
|
|
39
|
+
private scoreAccumulator;
|
|
40
|
+
private readonly severityCounts;
|
|
41
|
+
private readonly categoryCounts;
|
|
42
|
+
/**
|
|
43
|
+
* Create a new Devil's Advocate agent.
|
|
44
|
+
*
|
|
45
|
+
* @param config - Optional partial configuration (merged with defaults)
|
|
46
|
+
*/
|
|
47
|
+
constructor(config?: Partial<DevilsAdvocateConfig>);
|
|
48
|
+
/**
|
|
49
|
+
* Review an agent's output and produce challenges.
|
|
50
|
+
*
|
|
51
|
+
* The review process:
|
|
52
|
+
* 1. Gets all applicable strategies for the target type
|
|
53
|
+
* 2. Filters to enabled strategies
|
|
54
|
+
* 3. Runs each strategy against the target
|
|
55
|
+
* 4. Collects all challenges
|
|
56
|
+
* 5. Filters by minConfidence and minSeverity
|
|
57
|
+
* 6. Sorts by severity (critical first)
|
|
58
|
+
* 7. Limits to maxChallengesPerReview
|
|
59
|
+
* 8. Computes overall score
|
|
60
|
+
* 9. Generates summary
|
|
61
|
+
*
|
|
62
|
+
* @param target - The output to challenge
|
|
63
|
+
* @returns A ChallengeResult with all findings
|
|
64
|
+
*/
|
|
65
|
+
review(target: ChallengeTarget): ChallengeResult;
|
|
66
|
+
/**
|
|
67
|
+
* Get applicable and enabled strategies for a target type.
|
|
68
|
+
*
|
|
69
|
+
* @param targetType - The type of output to find strategies for
|
|
70
|
+
* @returns Array of strategies that can review this target type
|
|
71
|
+
*/
|
|
72
|
+
getStrategiesFor(targetType: ChallengeTargetType): ChallengeStrategy[];
|
|
73
|
+
/**
|
|
74
|
+
* Get accumulated review statistics.
|
|
75
|
+
*
|
|
76
|
+
* @returns Current statistics snapshot
|
|
77
|
+
*/
|
|
78
|
+
getStats(): DevilsAdvocateStats;
|
|
79
|
+
/**
|
|
80
|
+
* Reset all accumulated statistics.
|
|
81
|
+
*/
|
|
82
|
+
resetStats(): void;
|
|
83
|
+
/**
|
|
84
|
+
* Compute the overall score based on challenges found.
|
|
85
|
+
* Score is 1.0 minus the weighted impact of all challenges.
|
|
86
|
+
* A score of 1.0 means no challenges were found.
|
|
87
|
+
* A score of 0.0 means maximum challenge impact.
|
|
88
|
+
*/
|
|
89
|
+
private computeScore;
|
|
90
|
+
/**
|
|
91
|
+
* Generate a human-readable summary of the review.
|
|
92
|
+
*/
|
|
93
|
+
private generateSummary;
|
|
94
|
+
/**
|
|
95
|
+
* Count challenges grouped by severity.
|
|
96
|
+
*/
|
|
97
|
+
private countBySeverity;
|
|
98
|
+
/**
|
|
99
|
+
* Update internal statistics after a review.
|
|
100
|
+
*/
|
|
101
|
+
private updateStats;
|
|
102
|
+
}
|
|
103
|
+
//# sourceMappingURL=agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../../src/agents/devils-advocate/agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EACV,eAAe,EACf,mBAAmB,EACnB,eAAe,EAGf,iBAAiB,EAEjB,oBAAoB,EACpB,mBAAmB,EACpB,MAAM,YAAY,CAAC;AAYpB;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,mBAAmB,CAAK;IAChC,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAoC;IACnE,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAyB;IAExD;;;;OAIG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC;IAmBlD;;;;;;;;;;;;;;;;OAgBG;IACH,MAAM,CAAC,MAAM,EAAE,eAAe,GAAG,eAAe;IA0DhD;;;;;OAKG;IACH,gBAAgB,CAAC,UAAU,EAAE,mBAAmB,GAAG,iBAAiB,EAAE;IAMtE;;;;OAIG;IACH,QAAQ,IAAI,mBAAmB;IAa/B;;OAEG;IACH,UAAU,IAAI,IAAI;IAYlB;;;;;OAKG;IACH,OAAO,CAAC,YAAY;IAapB;;OAEG;IACH,OAAO,CAAC,eAAe;IA4BvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAgBvB;;OAEG;IACH,OAAO,CAAC,WAAW;CAUpB"}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic QE v3 - Devil's Advocate Agent
|
|
3
|
+
* ADR-064, Phase 2C: Core agent class that orchestrates challenge reviews
|
|
4
|
+
*
|
|
5
|
+
* The Devil's Advocate reviews outputs from other agents and challenges them
|
|
6
|
+
* using pluggable strategies. It does not create tests or fix code -- it finds
|
|
7
|
+
* what was missed, questions assumptions, and argues why gaps matter.
|
|
8
|
+
*
|
|
9
|
+
* @module agents/devils-advocate
|
|
10
|
+
*/
|
|
11
|
+
import { DEFAULT_DEVILS_ADVOCATE_CONFIG, SEVERITY_ORDER, SEVERITY_WEIGHTS, } from './types.js';
|
|
12
|
+
import { createAllStrategies, getApplicableStrategies } from './strategies.js';
|
|
13
|
+
// ============================================================================
|
|
14
|
+
// DevilsAdvocate Agent
|
|
15
|
+
// ============================================================================
|
|
16
|
+
/**
|
|
17
|
+
* The Devil's Advocate agent reviews other agents' outputs and produces
|
|
18
|
+
* structured challenges identifying gaps, weaknesses, and questionable results.
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const da = new DevilsAdvocate();
|
|
23
|
+
*
|
|
24
|
+
* const result = da.review({
|
|
25
|
+
* type: 'test-generation',
|
|
26
|
+
* agentId: 'test-gen-001',
|
|
27
|
+
* domain: 'test-generation',
|
|
28
|
+
* output: { testCount: 5, tests: [...] },
|
|
29
|
+
* timestamp: Date.now(),
|
|
30
|
+
* });
|
|
31
|
+
*
|
|
32
|
+
* console.log(result.summary);
|
|
33
|
+
* for (const challenge of result.challenges) {
|
|
34
|
+
* console.log(`[${challenge.severity}] ${challenge.title}`);
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export class DevilsAdvocate {
|
|
39
|
+
config;
|
|
40
|
+
strategies;
|
|
41
|
+
reviewCount = 0;
|
|
42
|
+
totalChallengeCount = 0;
|
|
43
|
+
scoreAccumulator = 0;
|
|
44
|
+
severityCounts;
|
|
45
|
+
categoryCounts;
|
|
46
|
+
/**
|
|
47
|
+
* Create a new Devil's Advocate agent.
|
|
48
|
+
*
|
|
49
|
+
* @param config - Optional partial configuration (merged with defaults)
|
|
50
|
+
*/
|
|
51
|
+
constructor(config) {
|
|
52
|
+
this.config = {
|
|
53
|
+
...DEFAULT_DEVILS_ADVOCATE_CONFIG,
|
|
54
|
+
...config,
|
|
55
|
+
enabledStrategies: config?.enabledStrategies ?? DEFAULT_DEVILS_ADVOCATE_CONFIG.enabledStrategies,
|
|
56
|
+
};
|
|
57
|
+
this.strategies = createAllStrategies();
|
|
58
|
+
this.severityCounts = {
|
|
59
|
+
critical: 0,
|
|
60
|
+
high: 0,
|
|
61
|
+
medium: 0,
|
|
62
|
+
low: 0,
|
|
63
|
+
informational: 0,
|
|
64
|
+
};
|
|
65
|
+
this.categoryCounts = {};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Review an agent's output and produce challenges.
|
|
69
|
+
*
|
|
70
|
+
* The review process:
|
|
71
|
+
* 1. Gets all applicable strategies for the target type
|
|
72
|
+
* 2. Filters to enabled strategies
|
|
73
|
+
* 3. Runs each strategy against the target
|
|
74
|
+
* 4. Collects all challenges
|
|
75
|
+
* 5. Filters by minConfidence and minSeverity
|
|
76
|
+
* 6. Sorts by severity (critical first)
|
|
77
|
+
* 7. Limits to maxChallengesPerReview
|
|
78
|
+
* 8. Computes overall score
|
|
79
|
+
* 9. Generates summary
|
|
80
|
+
*
|
|
81
|
+
* @param target - The output to challenge
|
|
82
|
+
* @returns A ChallengeResult with all findings
|
|
83
|
+
*/
|
|
84
|
+
review(target) {
|
|
85
|
+
const startTime = Date.now();
|
|
86
|
+
// 1-2: Get applicable + enabled strategies
|
|
87
|
+
const applicable = this.getStrategiesFor(target.type);
|
|
88
|
+
// 3: Run each strategy and collect challenges
|
|
89
|
+
const allChallenges = [];
|
|
90
|
+
for (const strategy of applicable) {
|
|
91
|
+
try {
|
|
92
|
+
const found = strategy.challenge(target);
|
|
93
|
+
allChallenges.push(...found);
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
// Strategy failure should not abort the entire review.
|
|
97
|
+
// Silently skip the failing strategy.
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// 4-5: Filter by confidence and severity
|
|
101
|
+
const minSeverityIndex = SEVERITY_ORDER.indexOf(this.config.minSeverity);
|
|
102
|
+
const filtered = allChallenges.filter(c => {
|
|
103
|
+
if (c.confidence < this.config.minConfidence)
|
|
104
|
+
return false;
|
|
105
|
+
const severityIndex = SEVERITY_ORDER.indexOf(c.severity);
|
|
106
|
+
return severityIndex <= minSeverityIndex;
|
|
107
|
+
});
|
|
108
|
+
// 6: Sort by severity (critical first), then by confidence descending
|
|
109
|
+
const sorted = filtered.sort((a, b) => {
|
|
110
|
+
const sevDiff = SEVERITY_ORDER.indexOf(a.severity) - SEVERITY_ORDER.indexOf(b.severity);
|
|
111
|
+
if (sevDiff !== 0)
|
|
112
|
+
return sevDiff;
|
|
113
|
+
return b.confidence - a.confidence;
|
|
114
|
+
});
|
|
115
|
+
// 7: Limit to max challenges
|
|
116
|
+
const limited = sorted.slice(0, this.config.maxChallengesPerReview);
|
|
117
|
+
// 8: Compute overall score
|
|
118
|
+
const overallScore = this.computeScore(limited);
|
|
119
|
+
// 9: Generate summary
|
|
120
|
+
const summary = this.generateSummary(target, limited, overallScore);
|
|
121
|
+
const reviewDuration = Date.now() - startTime;
|
|
122
|
+
// Update internal statistics
|
|
123
|
+
this.updateStats(limited, overallScore);
|
|
124
|
+
return {
|
|
125
|
+
targetType: target.type,
|
|
126
|
+
targetAgentId: target.agentId,
|
|
127
|
+
challenges: limited,
|
|
128
|
+
overallScore,
|
|
129
|
+
summary,
|
|
130
|
+
timestamp: Date.now(),
|
|
131
|
+
reviewDuration,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Get applicable and enabled strategies for a target type.
|
|
136
|
+
*
|
|
137
|
+
* @param targetType - The type of output to find strategies for
|
|
138
|
+
* @returns Array of strategies that can review this target type
|
|
139
|
+
*/
|
|
140
|
+
getStrategiesFor(targetType) {
|
|
141
|
+
const applicable = getApplicableStrategies(this.strategies, targetType);
|
|
142
|
+
const enabledSet = new Set(this.config.enabledStrategies);
|
|
143
|
+
return applicable.filter(s => enabledSet.has(s.type));
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get accumulated review statistics.
|
|
147
|
+
*
|
|
148
|
+
* @returns Current statistics snapshot
|
|
149
|
+
*/
|
|
150
|
+
getStats() {
|
|
151
|
+
return {
|
|
152
|
+
totalReviews: this.reviewCount,
|
|
153
|
+
totalChallenges: this.totalChallengeCount,
|
|
154
|
+
challengesBySeverity: { ...this.severityCounts },
|
|
155
|
+
challengesByCategory: { ...this.categoryCounts },
|
|
156
|
+
averageChallengesPerReview: this.reviewCount > 0 ? this.totalChallengeCount / this.reviewCount : 0,
|
|
157
|
+
averageScore: this.reviewCount > 0 ? this.scoreAccumulator / this.reviewCount : 1,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Reset all accumulated statistics.
|
|
162
|
+
*/
|
|
163
|
+
resetStats() {
|
|
164
|
+
this.reviewCount = 0;
|
|
165
|
+
this.totalChallengeCount = 0;
|
|
166
|
+
this.scoreAccumulator = 0;
|
|
167
|
+
for (const key of SEVERITY_ORDER) {
|
|
168
|
+
this.severityCounts[key] = 0;
|
|
169
|
+
}
|
|
170
|
+
for (const key of Object.keys(this.categoryCounts)) {
|
|
171
|
+
delete this.categoryCounts[key];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Compute the overall score based on challenges found.
|
|
176
|
+
* Score is 1.0 minus the weighted impact of all challenges.
|
|
177
|
+
* A score of 1.0 means no challenges were found.
|
|
178
|
+
* A score of 0.0 means maximum challenge impact.
|
|
179
|
+
*/
|
|
180
|
+
computeScore(challenges) {
|
|
181
|
+
if (challenges.length === 0)
|
|
182
|
+
return 1.0;
|
|
183
|
+
let totalImpact = 0;
|
|
184
|
+
for (const challenge of challenges) {
|
|
185
|
+
const weight = SEVERITY_WEIGHTS[challenge.severity] ?? 0.01;
|
|
186
|
+
totalImpact += weight * challenge.confidence;
|
|
187
|
+
}
|
|
188
|
+
// Clamp to [0, 1]
|
|
189
|
+
return Math.max(0, Math.min(1, 1 - totalImpact));
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Generate a human-readable summary of the review.
|
|
193
|
+
*/
|
|
194
|
+
generateSummary(target, challenges, score) {
|
|
195
|
+
if (challenges.length === 0) {
|
|
196
|
+
return (`Review of ${target.type} output from agent "${target.agentId}" ` +
|
|
197
|
+
`found no significant challenges. Score: ${score.toFixed(2)}.`);
|
|
198
|
+
}
|
|
199
|
+
const bySeverity = this.countBySeverity(challenges);
|
|
200
|
+
const severityParts = [];
|
|
201
|
+
for (const sev of SEVERITY_ORDER) {
|
|
202
|
+
const count = bySeverity[sev];
|
|
203
|
+
if (count > 0) {
|
|
204
|
+
severityParts.push(`${count} ${sev}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return (`Review of ${target.type} output from agent "${target.agentId}" ` +
|
|
208
|
+
`raised ${challenges.length} challenge(s): ${severityParts.join(', ')}. ` +
|
|
209
|
+
`Overall score: ${score.toFixed(2)}.`);
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Count challenges grouped by severity.
|
|
213
|
+
*/
|
|
214
|
+
countBySeverity(challenges) {
|
|
215
|
+
const counts = {
|
|
216
|
+
critical: 0,
|
|
217
|
+
high: 0,
|
|
218
|
+
medium: 0,
|
|
219
|
+
low: 0,
|
|
220
|
+
informational: 0,
|
|
221
|
+
};
|
|
222
|
+
for (const c of challenges) {
|
|
223
|
+
counts[c.severity]++;
|
|
224
|
+
}
|
|
225
|
+
return counts;
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Update internal statistics after a review.
|
|
229
|
+
*/
|
|
230
|
+
updateStats(challenges, score) {
|
|
231
|
+
this.reviewCount++;
|
|
232
|
+
this.totalChallengeCount += challenges.length;
|
|
233
|
+
this.scoreAccumulator += score;
|
|
234
|
+
for (const c of challenges) {
|
|
235
|
+
this.severityCounts[c.severity]++;
|
|
236
|
+
this.categoryCounts[c.category] = (this.categoryCounts[c.category] ?? 0) + 1;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=agent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../../../src/agents/devils-advocate/agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAaH,OAAO,EACL,8BAA8B,EAC9B,cAAc,EACd,gBAAgB,GACjB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAE/E,+EAA+E;AAC/E,uBAAuB;AACvB,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,OAAO,cAAc;IACR,MAAM,CAAuB;IAC7B,UAAU,CAAsB;IACzC,WAAW,GAAG,CAAC,CAAC;IAChB,mBAAmB,GAAG,CAAC,CAAC;IACxB,gBAAgB,GAAG,CAAC,CAAC;IACZ,cAAc,CAAoC;IAClD,cAAc,CAAyB;IAExD;;;;OAIG;IACH,YAAY,MAAsC;QAChD,IAAI,CAAC,MAAM,GAAG;YACZ,GAAG,8BAA8B;YACjC,GAAG,MAAM;YACT,iBAAiB,EAAE,MAAM,EAAE,iBAAiB,IAAI,8BAA8B,CAAC,iBAAiB;SACjG,CAAC;QAEF,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,CAAC;QAExC,IAAI,CAAC,cAAc,GAAG;YACpB,QAAQ,EAAE,CAAC;YACX,IAAI,EAAE,CAAC;YACP,MAAM,EAAE,CAAC;YACT,GAAG,EAAE,CAAC;YACN,aAAa,EAAE,CAAC;SACjB,CAAC;QACF,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;IAC3B,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,MAAM,CAAC,MAAuB;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,2CAA2C;QAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEtD,8CAA8C;QAC9C,MAAM,aAAa,GAAgB,EAAE,CAAC;QACtC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;YAClC,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACzC,aAAa,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,uDAAuD;gBACvD,sCAAsC;YACxC,CAAC;QACH,CAAC;QAED,yCAAyC;QACzC,MAAM,gBAAgB,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QACzE,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;YACxC,IAAI,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa;gBAAE,OAAO,KAAK,CAAC;YAC3D,MAAM,aAAa,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACzD,OAAO,aAAa,IAAI,gBAAgB,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,sEAAsE;QACtE,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACxF,IAAI,OAAO,KAAK,CAAC;gBAAE,OAAO,OAAO,CAAC;YAClC,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC;QAEpE,2BAA2B;QAC3B,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAEhD,sBAAsB;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QAEpE,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE9C,6BAA6B;QAC7B,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;QAExC,OAAO;YACL,UAAU,EAAE,MAAM,CAAC,IAAI;YACvB,aAAa,EAAE,MAAM,CAAC,OAAO;YAC7B,UAAU,EAAE,OAAO;YACnB,YAAY;YACZ,OAAO;YACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,cAAc;SACf,CAAC;IACJ,CAAC;IAED;;;;;OAKG;IACH,gBAAgB,CAAC,UAA+B;QAC9C,MAAM,UAAU,GAAG,uBAAuB,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QACxE,MAAM,UAAU,GAAG,IAAI,GAAG,CAAwB,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;QACjF,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACxD,CAAC;IAED;;;;OAIG;IACH,QAAQ;QACN,OAAO;YACL,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,eAAe,EAAE,IAAI,CAAC,mBAAmB;YACzC,oBAAoB,EAAE,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE;YAChD,oBAAoB,EAAE,EAAE,GAAG,IAAI,CAAC,cAAc,EAAE;YAChD,0BAA0B,EACxB,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YACxE,YAAY,EACV,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;SACtE,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,mBAAmB,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,gBAAgB,GAAG,CAAC,CAAC;QAC1B,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;YACjC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;YACnD,OAAO,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACK,YAAY,CAAC,UAAgC;QACnD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAExC,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;YAC5D,WAAW,IAAI,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC;QAC/C,CAAC;QAED,kBAAkB;QAClB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,MAAuB,EACvB,UAAgC,EAChC,KAAa;QAEb,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,CACL,aAAa,MAAM,CAAC,IAAI,uBAAuB,MAAM,CAAC,OAAO,IAAI;gBACjE,2CAA2C,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC/D,CAAC;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QACpD,MAAM,aAAa,GAAa,EAAE,CAAC;QACnC,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAC9B,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG,KAAK,IAAI,GAAG,EAAE,CAAC,CAAC;YACxC,CAAC;QACH,CAAC;QAED,OAAO,CACL,aAAa,MAAM,CAAC,IAAI,uBAAuB,MAAM,CAAC,OAAO,IAAI;YACjE,UAAU,UAAU,CAAC,MAAM,kBAAkB,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;YACzE,kBAAkB,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CACtC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,UAAgC;QAEhC,MAAM,MAAM,GAAsC;YAChD,QAAQ,EAAE,CAAC;YACX,IAAI,EAAE,CAAC;YACP,MAAM,EAAE,CAAC;YACT,GAAG,EAAE,CAAC;YACN,aAAa,EAAE,CAAC;SACjB,CAAC;QACF,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvB,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,UAAgC,EAAE,KAAa;QACjE,IAAI,CAAC,WAAW,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,IAAI,UAAU,CAAC,MAAM,CAAC;QAC9C,IAAI,CAAC,gBAAgB,IAAI,KAAK,CAAC;QAE/B,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic QE v3 - Devil's Advocate Agent
|
|
3
|
+
* ADR-064, Phase 2C: Barrel exports and factory
|
|
4
|
+
*
|
|
5
|
+
* The Devil's Advocate agent challenges other agents' outputs by reviewing
|
|
6
|
+
* test completeness, questioning security scan results, and identifying
|
|
7
|
+
* coverage gaps in claimed results.
|
|
8
|
+
*
|
|
9
|
+
* @module agents/devils-advocate
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import {
|
|
14
|
+
* createDevilsAdvocate,
|
|
15
|
+
* type ChallengeTarget,
|
|
16
|
+
* type ChallengeResult,
|
|
17
|
+
* } from './agents/devils-advocate';
|
|
18
|
+
*
|
|
19
|
+
* const da = createDevilsAdvocate({ minConfidence: 0.5 });
|
|
20
|
+
*
|
|
21
|
+
* const target: ChallengeTarget = {
|
|
22
|
+
* type: 'test-generation',
|
|
23
|
+
* agentId: 'test-gen-001',
|
|
24
|
+
* domain: 'test-generation',
|
|
25
|
+
* output: { testCount: 3, tests: [] },
|
|
26
|
+
* timestamp: Date.now(),
|
|
27
|
+
* };
|
|
28
|
+
*
|
|
29
|
+
* const result: ChallengeResult = da.review(target);
|
|
30
|
+
* console.log(result.summary);
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export type { ChallengeTargetType, ChallengeTarget, ChallengeSeverity, Challenge, ChallengeResult, ChallengeStrategyType, ChallengeStrategy, DevilsAdvocateConfig, DevilsAdvocateStats, } from './types.js';
|
|
34
|
+
export { ALL_CHALLENGE_TARGET_TYPES, ALL_CHALLENGE_STRATEGY_TYPES, SEVERITY_ORDER, SEVERITY_WEIGHTS, DEFAULT_DEVILS_ADVOCATE_CONFIG, } from './types.js';
|
|
35
|
+
export { isChallengeTargetType, isChallengeStrategyType, isChallengeSeverity, } from './types.js';
|
|
36
|
+
export { MissingEdgeCaseStrategy, FalsePositiveDetectionStrategy, CoverageGapCritiqueStrategy, SecurityBlindSpotStrategy, AssumptionQuestioningStrategy, BoundaryValueGapStrategy, ErrorHandlingGapStrategy, createAllStrategies, getApplicableStrategies, } from './strategies.js';
|
|
37
|
+
export { DevilsAdvocate } from './agent.js';
|
|
38
|
+
import type { DevilsAdvocateConfig } from './types.js';
|
|
39
|
+
import { DevilsAdvocate } from './agent.js';
|
|
40
|
+
/**
|
|
41
|
+
* Factory function to create a Devil's Advocate agent instance.
|
|
42
|
+
*
|
|
43
|
+
* @param config - Optional partial configuration (merged with defaults)
|
|
44
|
+
* @returns A configured DevilsAdvocate instance
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```typescript
|
|
48
|
+
* // With defaults
|
|
49
|
+
* const da = createDevilsAdvocate();
|
|
50
|
+
*
|
|
51
|
+
* // With custom config
|
|
52
|
+
* const strict = createDevilsAdvocate({
|
|
53
|
+
* minConfidence: 0.7,
|
|
54
|
+
* minSeverity: 'medium',
|
|
55
|
+
* maxChallengesPerReview: 10,
|
|
56
|
+
* });
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
export declare function createDevilsAdvocate(config?: Partial<DevilsAdvocateConfig>): DevilsAdvocate;
|
|
60
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agents/devils-advocate/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAMH,YAAY,EACV,mBAAmB,EACnB,eAAe,EACf,iBAAiB,EACjB,SAAS,EACT,eAAe,EACf,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,0BAA0B,EAC1B,4BAA4B,EAC5B,cAAc,EACd,gBAAgB,EAChB,8BAA8B,GAC/B,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,qBAAqB,EACrB,uBAAuB,EACvB,mBAAmB,GACpB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,uBAAuB,EACvB,8BAA8B,EAC9B,2BAA2B,EAC3B,yBAAyB,EACzB,6BAA6B,EAC7B,wBAAwB,EACxB,wBAAwB,EACxB,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,iBAAiB,CAAC;AAMzB,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAM5C,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE5C;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,GACrC,cAAc,CAEhB"}
|