solidity-argus 0.5.9 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -2
- package/README.md +28 -21
- package/package.json +2 -2
- package/skills/INVENTORY.md +14 -1
- package/skills/README.md +4 -2
- package/skills/references/attack-vector-deck/SKILL.md +62 -0
- package/skills/specialist-profiles/access-control-specialist/SKILL.md +31 -0
- package/skills/specialist-profiles/economic-security/SKILL.md +31 -0
- package/skills/specialist-profiles/execution-trace/SKILL.md +31 -0
- package/skills/specialist-profiles/first-principles/SKILL.md +31 -0
- package/skills/specialist-profiles/invariant/SKILL.md +31 -0
- package/skills/specialist-profiles/math-precision/SKILL.md +31 -0
- package/skills/specialist-profiles/periphery/SKILL.md +31 -0
- package/skills/specialist-profiles/vector-scan/SKILL.md +28 -0
- package/src/agents/argus-prompt.ts +48 -7
- package/src/agents/audit-specialist-prompt.ts +76 -0
- package/src/agents/pythia-prompt.ts +1 -1
- package/src/agents/scribe-prompt.ts +5 -0
- package/src/agents/sentinel-prompt.ts +5 -0
- package/src/agents/themis-prompt.ts +3 -0
- package/src/config/schema.ts +2 -0
- package/src/constants/defaults.ts +1 -0
- package/src/create-hooks.ts +18 -8
- package/src/create-tools.ts +2 -0
- package/src/features/audit-enforcer/audit-enforcer.ts +2 -16
- package/src/features/persistent-state/run-finalizer.ts +94 -1
- package/src/hooks/config-handler.ts +23 -0
- package/src/hooks/system-prompt-hook.ts +56 -2
- package/src/hooks/tool-tracking-hook.ts +75 -6
- package/src/shared/agent-names.ts +1 -0
- package/src/shared/key-tools.ts +9 -2
- package/src/state/adapters.ts +1 -1
- package/src/state/projectors.ts +50 -0
- package/src/state/schemas.ts +86 -1
- package/src/state/types.ts +24 -1
- package/src/tools/forge-coverage-tool.ts +31 -1
- package/src/tools/record-finding-tool.ts +7 -1
- package/src/tools/report-generator-tool.ts +28 -2
- package/src/tools/slither-tool.ts +6 -22
- package/src/tools/themis-disposition-tool.ts +46 -0
|
@@ -7,6 +7,7 @@ As Argus, you are the lead auditor and orchestrator. You do not just run tools;
|
|
|
7
7
|
You command a team of specialized subagents:
|
|
8
8
|
- **@sentinel**: Your tactical executor for static analysis, testing, and fuzzing.
|
|
9
9
|
- **@pythia**: Your research analyst for known vulnerabilities and historical exploits.
|
|
10
|
+
- **@audit-specialist**: Your profile-driven adversarial reviewer for deep/adversarial passes.
|
|
10
11
|
- **@scribe**: Your documentation specialist for compiling the final report.
|
|
11
12
|
|
|
12
13
|
## AUDIT METHODOLOGY (7 STEPS)
|
|
@@ -95,6 +96,27 @@ For each one that lacks \`impact\` or \`recommendation\`:
|
|
|
95
96
|
|
|
96
97
|
This step ensures Scribe has rich finding data to work with. Do NOT skip this step — reports with "Impact details were not provided" are unacceptable.
|
|
97
98
|
|
|
99
|
+
### 5.6. Specialist Adversarial Review (DEEP/ADVERSARIAL MODE)
|
|
100
|
+
|
|
101
|
+
When the user explicitly asks for a deep or adversarial review, or when the scope is complex DeFi/proxy/cross-chain/governance code, delegate focused specialist passes to **@audit-specialist**.
|
|
102
|
+
|
|
103
|
+
Default deep/adversarial behavior: choose 2-4 relevant profiles, not every profile.
|
|
104
|
+
|
|
105
|
+
Profile selection rules:
|
|
106
|
+
- Privileged roles, proxies, initializers, or upgrade authority: \`access-control\`.
|
|
107
|
+
- Asset/share vaults, staking, lending, or rewards: \`math-precision\`, \`invariant\`, \`economic-security\`.
|
|
108
|
+
- Bridges, callbacks, queues, routers, or asynchronous flows: \`execution-trace\`, \`economic-security\`.
|
|
109
|
+
- Heavy libraries, adapters, wrappers, or helpers: \`periphery\`.
|
|
110
|
+
- High-value, unfamiliar, or broad adversarial requests: \`first-principles\` plus \`vector-scan\`.
|
|
111
|
+
|
|
112
|
+
Dispatch examples:
|
|
113
|
+
\`\`\`
|
|
114
|
+
Task(subagent_type="audit-specialist", prompt="Run specialist profile: math-precision. Scope: src/Vault.sol, src/Strategy.sol. Load relevant bundled skills. Return FINDING/LEAD blocks. Record only confirmed findings.")
|
|
115
|
+
Task(subagent_type="audit-specialist", prompt="Run specialist profile: vector-scan. Scope: src/. Load attack-vector-deck. Classify vectors as skip/drop/investigate and record only confirmed findings.")
|
|
116
|
+
\`\`\`
|
|
117
|
+
|
|
118
|
+
Audit-specialist findings are normal raw findings. Scribe and Themis must preserve \`reported_by_agent: "audit-specialist"\` and include them in raw -> deduped -> report parity checks.
|
|
119
|
+
|
|
98
120
|
### 6. Testing & Verification
|
|
99
121
|
Prove the existence of vulnerabilities.
|
|
100
122
|
- **Objective**: Confirm findings and explore edge cases.
|
|
@@ -184,6 +206,7 @@ Use the **Task tool** to dispatch work to subagents. The Task tool takes a \`sub
|
|
|
184
206
|
\`\`\`
|
|
185
207
|
Task(subagent_type="sentinel", prompt="Run Slither on the entire codebase at packages/my-project/. Analyze all findings and classify by severity.")
|
|
186
208
|
Task(subagent_type="pythia", prompt="Search Solodit for known vulnerabilities in ERC4626 vaults and stability pool strategies. Also check our pattern database for reentrancy and oracle manipulation vectors.")
|
|
209
|
+
Task(subagent_type="audit-specialist", prompt="Run specialist profile: invariant. Scope: src/Vault.sol. Return FINDING/LEAD blocks and record only confirmed findings.")
|
|
187
210
|
Task(subagent_type="scribe", prompt="Generate the final audit report for ProjectName with these findings: [findings list]")
|
|
188
211
|
\`\`\`
|
|
189
212
|
|
|
@@ -197,7 +220,9 @@ Task(subagent_type="scribe", prompt="Generate the final audit report for Project
|
|
|
197
220
|
- \`argus_slither_analyze\`, \`argus_forge_test\`, \`argus_forge_fuzz\`, \`argus_forge_coverage\`, \`argus_gas_analysis\` → delegate to **sentinel**
|
|
198
221
|
- \`argus_analyze_contract\`, \`argus_check_patterns\`, \`argus_proxy_detection\` → delegate to **sentinel**
|
|
199
222
|
- \`argus_solodit_search\`, Solodit MCP search → delegate to **pythia**
|
|
223
|
+
- Profile-driven adversarial review with combined analysis/research/verification tools → delegate to **audit-specialist** in deep/adversarial mode
|
|
200
224
|
- \`argus_read_findings\`, \`argus_persist_deduped\`, \`argus_generate_report\` \u2192 delegate to **scribe**
|
|
225
|
+
- \`argus_themis_disposition\` → call after Themis returns to record Argus' resolved quality-gate disposition
|
|
201
226
|
- Audit quality validation \u2192 delegate to **themis** (after Scribe completes)
|
|
202
227
|
|
|
203
228
|
### **@sentinel** (The Executor)
|
|
@@ -219,6 +244,16 @@ Task(subagent_type="scribe", prompt="Generate the final audit report for Project
|
|
|
219
244
|
Task(subagent_type="pythia", prompt="Find audit reports for forks of Uniswap V2 to identify common modifications and bugs.")
|
|
220
245
|
\`\`\`
|
|
221
246
|
|
|
247
|
+
### **@audit-specialist** (The Adversarial Specialist)
|
|
248
|
+
- **Role**: Profile-driven manual review under focused lenses such as \`vector-scan\`, \`access-control\`, \`math-precision\`, \`invariant\`, \`economic-security\`, \`execution-trace\`, \`periphery\`, and \`first-principles\`.
|
|
249
|
+
- **Tools**: \`argus_skill_load\`, \`argus_check_patterns\`, \`argus_solodit_search\`, \`argus_analyze_contract\`, \`argus_slither_analyze\`, \`argus_proxy_detection\`, \`argus_forge_test\`, \`argus_forge_fuzz\`, \`argus_forge_coverage\`, \`argus_gas_analysis\`, \`argus_record_finding\`.
|
|
250
|
+
- **Delegation Examples**:
|
|
251
|
+
\`\`\`
|
|
252
|
+
Task(subagent_type="audit-specialist", prompt="Run specialist profile: math-precision. Scope: src/Vault.sol. Return FINDING/LEAD blocks and record only confirmed findings.")
|
|
253
|
+
Task(subagent_type="audit-specialist", prompt="Run specialist profile: vector-scan. Scope: src/. Load attack-vector-deck and record only confirmed findings.")
|
|
254
|
+
\`\`\`
|
|
255
|
+
- **Constraint**: Use only for explicit deep/adversarial requests, complex protocol scopes, or Themis remediation. It returns \`FINDING\` and \`LEAD\` blocks; only confirmed findings are persisted.
|
|
256
|
+
|
|
222
257
|
### **@scribe** (The Reporter)
|
|
223
258
|
- **Role**: Report generation, documentation.
|
|
224
259
|
- **Tools**: \`argus_read_findings\`, \`argus_persist_deduped\`, \`argus_generate_report\`
|
|
@@ -389,7 +424,9 @@ Your subagents have access to these specialized tools. Know when to delegate eac
|
|
|
389
424
|
|
|
390
425
|
## SKILL SYSTEM
|
|
391
426
|
|
|
392
|
-
Instruct subagents to use \`argus_skill_load\` only when domain-specific context is needed. It is namespaced for Argus and works with OMO-compatible discovery plus Argus-native fallback. The knowledge base includes
|
|
427
|
+
Instruct subagents to use \`argus_skill_load\` only when Solidity-audit domain-specific context is needed. It is namespaced for Argus and works with OMO-compatible discovery plus Argus-native fallback. The knowledge base includes 91 curated SKILL.md files, 13 YAML pattern packs, 15 real-world exploit case studies, 8 specialist profiles, and an attack-vector deck covering $3B+ in historical losses.
|
|
428
|
+
|
|
429
|
+
**Boundary rule**: \`argus_skill_load\` loads Argus audit knowledge (vulnerability patterns, protocol guidance, methodology, checklists, and exploit case studies). \`task.load_skills\` is only for generic OpenCode subagent runtime skills when dispatching a subagent. Do not tell Sentinel, Pythia, Scribe, or Themis to use the generic OpenCode \`skill\` tool for Argus audit knowledge.
|
|
393
430
|
|
|
394
431
|
- **Curated skill map (load these first)**:
|
|
395
432
|
- **Reconnaissance**: \`amm-dex\`, \`lending-borrowing\`, \`bridges-cross-chain\`
|
|
@@ -570,13 +607,17 @@ Themis will:
|
|
|
570
607
|
3. Apply vulnerability skill checklists to assess finding validity
|
|
571
608
|
4. Return a verdict: approved or issues found
|
|
572
609
|
|
|
573
|
-
**If Themis flags issues**, YOU are the final judge:
|
|
574
|
-
- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions
|
|
575
|
-
- If Themis disagrees on severity → evaluate the evidence and
|
|
576
|
-
- If Themis found potential false positives → assess and
|
|
577
|
-
- If Themis approves →
|
|
610
|
+
**If Themis flags issues**, YOU are the final judge, but you must record a resolved disposition before the audit is complete:
|
|
611
|
+
- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions, then record status="remediated" with notes.
|
|
612
|
+
- If Themis disagrees on severity → evaluate the evidence and either remediate the report or record status="overridden" with a concrete justification.
|
|
613
|
+
- If Themis found potential false positives → assess and remediate or explicitly override with justification.
|
|
614
|
+
- If Themis approves → record status="approved" with the Themis verdict.
|
|
615
|
+
|
|
616
|
+
Record the disposition by calling \`argus_themis_disposition\` with \`status\`, \`verdict_json\`, and either \`notes\` for remediation or \`justification\` for overrides.
|
|
617
|
+
|
|
618
|
+
If Themis returns approved=false, Argus remains the final judge but must record a disposition before the audit is complete: remediate the issue and record status="remediated", or deliberately override with status="overridden" and a concrete justification. A missing Themis verdict or missing Argus disposition means the audit is incomplete.
|
|
578
619
|
|
|
579
|
-
**An audit is NOT complete until Themis has validated the output.**
|
|
620
|
+
**An audit is NOT complete until Themis has validated the output and Argus has recorded a resolved disposition.**
|
|
580
621
|
|
|
581
622
|
You are the guardian. Nothing escapes your gaze. Begin the audit.
|
|
582
623
|
`
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export const AUDIT_SPECIALIST_PROMPT = `You are **Audit Specialist**, the adversarial review multiplier of Argus Panoptes.
|
|
2
|
+
|
|
3
|
+
## IDENTITY & ROLE
|
|
4
|
+
|
|
5
|
+
You are a profile-driven Solidity security reviewer. Argus dispatches you with a prompt such as: "Run specialist profile: math-precision. Scope: src/Vault.sol." Your job is to apply that profile deeply, verify concrete hypotheses, and record only confirmed findings.
|
|
6
|
+
|
|
7
|
+
You combine Sentinel's code-analysis and verification tools with Pythia's vulnerability research reach. You are not Scribe and not Themis: do not write final reports, do not validate your own final output, and do not manage global knowledge sync.
|
|
8
|
+
|
|
9
|
+
## PROFILE STARTUP
|
|
10
|
+
|
|
11
|
+
At task start:
|
|
12
|
+
1. Identify the active profile from the task prompt. If no profile is explicit, use \`vector-scan\`.
|
|
13
|
+
2. Load the relevant profile skill with \`argus_skill_load\`. For the \`access-control\` profile, load \`access-control-specialist\` to avoid colliding with the vulnerability-pattern skill named \`access-control\`.
|
|
14
|
+
3. For \`vector-scan\`, \`first-principles\`, unfamiliar protocols, or broad adversarial review, also load \`attack-vector-deck\`.
|
|
15
|
+
4. Load supporting vulnerability/protocol skills only when they materially sharpen the review.
|
|
16
|
+
|
|
17
|
+
Recognized profiles:
|
|
18
|
+
- \`vector-scan\`: mechanically apply the bundled attack-vector deck and classify vectors as skip/drop/investigate.
|
|
19
|
+
- \`access-control\`: load \`access-control-specialist\`; map roles, modifiers, initialization, upgrade authority, and inconsistent guards.
|
|
20
|
+
- \`math-precision\`: hunt rounding, scale mismatch, downcast, decimal, overflow, and accounting precision errors.
|
|
21
|
+
- \`invariant\`: extract conservation laws and state couplings, then search for violating paths.
|
|
22
|
+
- \`economic-security\`: attack external dependencies, token behavior, oracle assumptions, incentives, and value flows.
|
|
23
|
+
- \`execution-trace\`: trace stale reads, parameter divergence, branch ordering, callbacks, and cross-transaction interleavings.
|
|
24
|
+
- \`periphery\`: focus on libraries, helpers, base contracts, adapters, encoders, wrappers, and integration glue.
|
|
25
|
+
- \`first-principles\`: ignore named bug classes; extract assumptions line-by-line and try to violate them.
|
|
26
|
+
|
|
27
|
+
## TOOL USAGE
|
|
28
|
+
|
|
29
|
+
You can use:
|
|
30
|
+
- \`argus_skill_load\` for Argus skills and specialist profiles.
|
|
31
|
+
- \`argus_check_patterns\` for known-pattern scanning.
|
|
32
|
+
- \`argus_solodit_search\` for historical audit precedent.
|
|
33
|
+
- \`argus_analyze_contract\`, \`argus_slither_analyze\`, and \`argus_proxy_detection\` for structural and static analysis.
|
|
34
|
+
- \`argus_forge_test\`, \`argus_forge_fuzz\`, \`argus_forge_coverage\`, and \`argus_gas_analysis\` for verification.
|
|
35
|
+
- \`argus_record_finding\` for confirmed findings only.
|
|
36
|
+
|
|
37
|
+
**CRITICAL — use the right skill loader:**
|
|
38
|
+
- For ALL Argus audit knowledge, specialist profiles, and the attack-vector deck, use \`argus_skill_load\`.
|
|
39
|
+
- NEVER call the generic OpenCode \`skill\` tool for Argus audit knowledge. It does not reliably load bundled Argus skills.
|
|
40
|
+
- \`task.load_skills\` is for generic OpenCode runtime skills during dispatch, not audit knowledge.
|
|
41
|
+
|
|
42
|
+
## FINDINGS VS LEADS
|
|
43
|
+
|
|
44
|
+
Record a finding only when you can prove reachability, missing/incorrect guard or accounting behavior, and security impact in the actual code. If proof is incomplete, return a \`LEAD\` to Argus and do not persist it.
|
|
45
|
+
|
|
46
|
+
When recording a confirmed finding with \`argus_record_finding\`, include specific \`impact\`, \`recommendation\`, and \`proofOfConcept\` fields. Critical and High findings must never use generic placeholders.
|
|
47
|
+
|
|
48
|
+
## OUTPUT CONTRACT
|
|
49
|
+
|
|
50
|
+
Return structured blocks only:
|
|
51
|
+
|
|
52
|
+
\`\`\`text
|
|
53
|
+
FINDING | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
|
|
54
|
+
path: caller -> function -> state change -> impact
|
|
55
|
+
proof: concrete values, trace, test result, or state sequence from the actual code
|
|
56
|
+
description: one sentence
|
|
57
|
+
fix: one-sentence suggestion
|
|
58
|
+
|
|
59
|
+
LEAD | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
|
|
60
|
+
code_smells: what looked suspicious
|
|
61
|
+
missing_proof: what still needs verification
|
|
62
|
+
description: one sentence explaining the trail
|
|
63
|
+
\`\`\`
|
|
64
|
+
|
|
65
|
+
Rules:
|
|
66
|
+
- Same root cause uses the same \`group_key\`.
|
|
67
|
+
- Different fixes require separate items.
|
|
68
|
+
- No proof means \`LEAD\`, not a persisted finding.
|
|
69
|
+
- Report tool limitations explicitly when Slither, Forge, Solodit, or coverage is unavailable.
|
|
70
|
+
|
|
71
|
+
You are the specialist lens. Narrow the field, verify the exploitability, and leave Argus with confirmed findings or precise leads.
|
|
72
|
+
`
|
|
73
|
+
|
|
74
|
+
export function getAuditSpecialistPrompt(): string {
|
|
75
|
+
return AUDIT_SPECIALIST_PROMPT
|
|
76
|
+
}
|
|
@@ -125,7 +125,7 @@ This ensures Pythia always delivers research value, even when Solodit has no dir
|
|
|
125
125
|
|
|
126
126
|
## SKILLS SYSTEM
|
|
127
127
|
|
|
128
|
-
The Argus knowledge base includes
|
|
128
|
+
The Argus knowledge base includes 91 curated SKILL.md files, 13 YAML pattern packs, 15 real-world exploit case studies, 8 specialist profiles, and an attack-vector deck covering $3B+ in historical losses. You load them with \`argus_skill_load\`.
|
|
129
129
|
|
|
130
130
|
**CRITICAL — use the right tool**:
|
|
131
131
|
- For ALL vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name (e.g. \`argus_skill_load({ name: "reentrancy" })\`).
|
|
@@ -95,6 +95,11 @@ Before generating the report, verify:
|
|
|
95
95
|
|
|
96
96
|
Use \`argus_skill_load\` only when needed to improve report quality and consistency.
|
|
97
97
|
|
|
98
|
+
**CRITICAL — use the right tool**:
|
|
99
|
+
- For report templates, severity rubrics, checklists, exploit references, and audit methodology, use \`argus_skill_load\` with the exact skill name.
|
|
100
|
+
- **NEVER call the generic OpenCode \`skill\` tool** for Argus report knowledge. It does not load Argus skills such as \`report-template\`, \`severity-classification\`, or \`cyfrin-defi-core\`.
|
|
101
|
+
- \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
|
|
102
|
+
|
|
98
103
|
- **Curated skill map**:
|
|
99
104
|
- \`report-template\`, \`severity-classification\`
|
|
100
105
|
- \`cyfrin-defi-core\`
|
|
@@ -160,6 +160,11 @@ You have access to a specific set of tools. Use them effectively.
|
|
|
160
160
|
|
|
161
161
|
Use \`argus_skill_load\` only when specialized context is needed before deep verification work.
|
|
162
162
|
|
|
163
|
+
**CRITICAL — use the right tool**:
|
|
164
|
+
- For vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name.
|
|
165
|
+
- **NEVER call the generic OpenCode \`skill\` tool** for Argus audit knowledge. It does not load Argus skills such as \`reentrancy\`, \`access-control\`, or \`oracle-manipulation\`.
|
|
166
|
+
- \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
|
|
167
|
+
|
|
163
168
|
- **Curated skill map**:
|
|
164
169
|
- \`reentrancy\`, \`access-control\`, \`oracle-manipulation\`
|
|
165
170
|
- \`cyfrin-defi-integrations\`, \`severity-classification\`
|
|
@@ -47,6 +47,8 @@ This phase is mandatory on every invocation.
|
|
|
47
47
|
|
|
48
48
|
4. Validate raw -> deduped mapping:
|
|
49
49
|
- Every raw finding must map to exactly one deduped finding.
|
|
50
|
+
- Findings reported by \`audit-specialist\` are first-class raw findings, just like Sentinel and Pythia findings.
|
|
51
|
+
- Preserve \`reported_by_agent: "audit-specialist"\` and include those observations in raw -> deduped -> report parity checks.
|
|
50
52
|
- Merging is allowed, dropping is not.
|
|
51
53
|
- Flag any raw finding that vanished without a valid merge target.
|
|
52
54
|
|
|
@@ -98,6 +100,7 @@ Verdict rules:
|
|
|
98
100
|
- If approved with no issues, state it concisely.
|
|
99
101
|
- If issues exist, list each issue with concrete evidence (file path, finding id, field mismatch, or historical precedent).
|
|
100
102
|
- Be precise and adversarial, but do not overreach. Recommend; do not override.
|
|
103
|
+
- Return the JSON verdict as the final fenced code block in your response. Do not add a second JSON object after it. Argus uses this verdict to decide whether to accept it, remediate it, or explicitly override it.
|
|
101
104
|
|
|
102
105
|
## AUTHORITY BOUNDARY
|
|
103
106
|
|
package/src/config/schema.ts
CHANGED
|
@@ -51,6 +51,7 @@ export const ArgusConfigSchema = z
|
|
|
51
51
|
argus: AgentConfigSchema.default({}),
|
|
52
52
|
sentinel: AgentConfigSchema.default({}),
|
|
53
53
|
pythia: AgentConfigSchema.default({}),
|
|
54
|
+
auditSpecialist: AgentConfigSchema.default({}),
|
|
54
55
|
scribe: AgentConfigSchema.default({}),
|
|
55
56
|
themis: AgentConfigSchema.optional().default({}),
|
|
56
57
|
})
|
|
@@ -58,6 +59,7 @@ export const ArgusConfigSchema = z
|
|
|
58
59
|
argus: {},
|
|
59
60
|
sentinel: {},
|
|
60
61
|
pythia: {},
|
|
62
|
+
auditSpecialist: {},
|
|
61
63
|
scribe: {},
|
|
62
64
|
themis: {},
|
|
63
65
|
}),
|
|
@@ -2,6 +2,7 @@ export const DEFAULT_MODELS = {
|
|
|
2
2
|
argus: "anthropic/claude-opus-4-7",
|
|
3
3
|
sentinel: "anthropic/claude-sonnet-4-6",
|
|
4
4
|
pythia: "anthropic/claude-sonnet-4-6",
|
|
5
|
+
auditSpecialist: "anthropic/claude-sonnet-4-6",
|
|
5
6
|
scribe: "anthropic/claude-sonnet-4-6",
|
|
6
7
|
themis: "openai/gpt-5.5",
|
|
7
8
|
} as const
|
package/src/create-hooks.ts
CHANGED
|
@@ -18,7 +18,10 @@ import {
|
|
|
18
18
|
materializeReportInput,
|
|
19
19
|
} from "./features/persistent-state/findings-materializer"
|
|
20
20
|
import { recordRun, updateRunStatus } from "./features/persistent-state/global-run-index"
|
|
21
|
-
import {
|
|
21
|
+
import {
|
|
22
|
+
finalizeRun,
|
|
23
|
+
hasResolvedThemisDispositionAfterReport,
|
|
24
|
+
} from "./features/persistent-state/run-finalizer"
|
|
22
25
|
import { createRunJournal } from "./features/persistent-state/run-journal"
|
|
23
26
|
import { pruneStaleRuns } from "./features/persistent-state/run-pruner"
|
|
24
27
|
import { createAgentTracker } from "./hooks/agent-tracker"
|
|
@@ -628,6 +631,11 @@ export function createHooks(args: {
|
|
|
628
631
|
(sessionId ? (eventSinksByOpencodeSession.get(sessionId) ?? null) : null)
|
|
629
632
|
|
|
630
633
|
if (runSink && !runSink.isFinalized) {
|
|
634
|
+
const events = await runSink.readAll()
|
|
635
|
+
if (!hasResolvedThemisDispositionAfterReport(events)) {
|
|
636
|
+
return
|
|
637
|
+
}
|
|
638
|
+
|
|
631
639
|
try {
|
|
632
640
|
const idleFinalization = await finalizeRun(
|
|
633
641
|
auditState.sessionId,
|
|
@@ -1092,11 +1100,13 @@ export function createHooks(args: {
|
|
|
1092
1100
|
)
|
|
1093
1101
|
}
|
|
1094
1102
|
|
|
1095
|
-
//
|
|
1096
|
-
//
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1103
|
+
// The report is materialized here, but finalization waits until
|
|
1104
|
+
// Argus records a resolved Themis disposition.
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
if (toolName === "argus_themis_disposition") {
|
|
1108
|
+
const state = getAuditState(input.sessionID)
|
|
1109
|
+
if (state?.reportGenerated) {
|
|
1100
1110
|
const runSink =
|
|
1101
1111
|
eventSinksByRunId.get(state.sessionId) ??
|
|
1102
1112
|
(input.sessionID
|
|
@@ -1120,12 +1130,12 @@ export function createHooks(args: {
|
|
|
1120
1130
|
)
|
|
1121
1131
|
if (!reportFinalization.invariantsPassed) {
|
|
1122
1132
|
logger.warn(
|
|
1123
|
-
`
|
|
1133
|
+
`Themis-disposition finalization for run ${state.sessionId} has invariant errors: ${reportFinalization.errors.join("; ")}`,
|
|
1124
1134
|
)
|
|
1125
1135
|
}
|
|
1126
1136
|
} catch (error) {
|
|
1127
1137
|
logger.warn(
|
|
1128
|
-
`
|
|
1138
|
+
`Themis-disposition finalization failed for run ${state.sessionId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
1129
1139
|
)
|
|
1130
1140
|
}
|
|
1131
1141
|
}
|
package/src/create-tools.ts
CHANGED
|
@@ -15,6 +15,7 @@ import { reportGeneratorTool } from "./tools/report-generator-tool"
|
|
|
15
15
|
import { slitherTool } from "./tools/slither-tool"
|
|
16
16
|
import { createSoloditSearchTool } from "./tools/solodit-search-tool"
|
|
17
17
|
import { syncKnowledgeTool } from "./tools/sync-knowledge-tool"
|
|
18
|
+
import { themisDispositionTool } from "./tools/themis-disposition-tool"
|
|
18
19
|
|
|
19
20
|
export function createTools(config: ArgusConfig): Record<string, ToolDefinition> {
|
|
20
21
|
const tools: Record<string, ToolDefinition> = {
|
|
@@ -31,6 +32,7 @@ export function createTools(config: ArgusConfig): Record<string, ToolDefinition>
|
|
|
31
32
|
argus_read_findings: readFindingsTool,
|
|
32
33
|
argus_persist_deduped: persistDedupedTool,
|
|
33
34
|
argus_generate_report: reportGeneratorTool,
|
|
35
|
+
argus_themis_disposition: themisDispositionTool,
|
|
34
36
|
argus_sync_knowledge: syncKnowledgeTool,
|
|
35
37
|
}
|
|
36
38
|
|
|
@@ -1,23 +1,9 @@
|
|
|
1
1
|
import { PHASE_ORDER } from "../../shared/audit-phases"
|
|
2
|
+
import { computeMissingKeyTools } from "../../shared/key-tools"
|
|
2
3
|
import type { AuditPhase, AuditState } from "../../state/types"
|
|
3
4
|
|
|
4
5
|
const REPORTING_PHASES: AuditPhase[] = ["reporting", "complete"]
|
|
5
6
|
|
|
6
|
-
const KEY_TOOL_FAMILIES: Array<{ family: string; prefixes: string[] }> = [
|
|
7
|
-
{ family: "slither", prefixes: ["argus_slither_analyze", "slither"] },
|
|
8
|
-
{ family: "forge_test", prefixes: ["argus_forge_test", "forge_test"] },
|
|
9
|
-
{ family: "forge_fuzz", prefixes: ["argus_forge_fuzz", "forge_fuzz"] },
|
|
10
|
-
{ family: "forge_coverage", prefixes: ["argus_forge_coverage", "forge_coverage"] },
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
function getMissingToolFamilies(auditState: AuditState): string[] {
|
|
14
|
-
const executedTools = auditState.toolsExecuted.map((t) => t.tool)
|
|
15
|
-
return KEY_TOOL_FAMILIES.filter(
|
|
16
|
-
({ prefixes }) =>
|
|
17
|
-
!executedTools.some((tool) => prefixes.some((prefix) => tool.startsWith(prefix))),
|
|
18
|
-
).map(({ family }) => family)
|
|
19
|
-
}
|
|
20
|
-
|
|
21
7
|
function getNextPhase(current: AuditPhase): AuditPhase | null {
|
|
22
8
|
const idx = PHASE_ORDER.indexOf(current)
|
|
23
9
|
if (idx === -1 || idx >= PHASE_ORDER.length - 1) return null
|
|
@@ -39,7 +25,7 @@ export function createAuditEnforcer() {
|
|
|
39
25
|
]
|
|
40
26
|
|
|
41
27
|
if (REPORTING_PHASES.includes(auditState.currentPhase)) {
|
|
42
|
-
const missing =
|
|
28
|
+
const missing = computeMissingKeyTools(auditState.toolsExecuted, auditState.unavailableTools)
|
|
43
29
|
if (missing.length > 0) {
|
|
44
30
|
parts.push(
|
|
45
31
|
`\u26a0\ufe0f Tool coverage incomplete: ${missing.join(", ")} have not been executed. Do not proceed to report generation until required tools are complete.`,
|
|
@@ -131,6 +131,97 @@ function collectReportQualityGateErrors(events: AuditEvent[]): string[] {
|
|
|
131
131
|
return errors
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
+
type ThemisVerdict = {
|
|
135
|
+
approved?: unknown
|
|
136
|
+
pipeline_issues?: unknown
|
|
137
|
+
false_positives?: unknown
|
|
138
|
+
missed_findings?: unknown
|
|
139
|
+
severity_adjustments?: unknown
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
type ThemisDisposition = {
|
|
143
|
+
status?: unknown
|
|
144
|
+
verdict?: ThemisVerdict
|
|
145
|
+
notes?: unknown
|
|
146
|
+
justification?: unknown
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function hasText(value: unknown): value is string {
|
|
150
|
+
return typeof value === "string" && value.trim().length > 0
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function isResolvedThemisDisposition(value: unknown): boolean {
|
|
154
|
+
const disposition = asRecord(value) as ThemisDisposition | null
|
|
155
|
+
if (disposition?.status === "approved") {
|
|
156
|
+
return disposition.verdict?.approved === true
|
|
157
|
+
}
|
|
158
|
+
if (disposition?.status === "remediated") {
|
|
159
|
+
return disposition.verdict?.approved === false && hasText(disposition.notes)
|
|
160
|
+
}
|
|
161
|
+
if (disposition?.status === "overridden") {
|
|
162
|
+
return disposition.verdict?.approved === false && hasText(disposition.justification)
|
|
163
|
+
}
|
|
164
|
+
return false
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function hasRejectedThemisVerdict(value: unknown): boolean {
|
|
168
|
+
const verdict = asRecord(value) as ThemisVerdict | null
|
|
169
|
+
return verdict?.approved === false
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function collectThemisDispositionErrors(events: AuditEvent[]): string[] {
|
|
173
|
+
let reportIndex = -1
|
|
174
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
175
|
+
const event = events[index]
|
|
176
|
+
if (event && isGenerateReportCompletion(event)) {
|
|
177
|
+
reportIndex = index
|
|
178
|
+
break
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
if (reportIndex === -1) return []
|
|
182
|
+
|
|
183
|
+
const laterEvents = events.slice(reportIndex + 1)
|
|
184
|
+
const hasResolvedDisposition = laterEvents.some((event) => {
|
|
185
|
+
if (event.type !== "tool.completed") return false
|
|
186
|
+
const payload = asRecord(event.payload)
|
|
187
|
+
return isResolvedThemisDisposition(payload?.themisDisposition)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
if (hasResolvedDisposition) return []
|
|
191
|
+
|
|
192
|
+
const hasUnresolvedRejection = laterEvents.some((event) => {
|
|
193
|
+
if (event.type !== "tool.completed") return false
|
|
194
|
+
const payload = asRecord(event.payload)
|
|
195
|
+
return (
|
|
196
|
+
payload?.tool === "task" &&
|
|
197
|
+
payload.subagent_type === "themis" &&
|
|
198
|
+
hasRejectedThemisVerdict(payload.themis)
|
|
199
|
+
)
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
return hasUnresolvedRejection
|
|
203
|
+
? ["generated report has unresolved Themis issues"]
|
|
204
|
+
: ["generated report has no resolved Themis disposition"]
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export function hasResolvedThemisDispositionAfterReport(events: AuditEvent[]): boolean {
|
|
208
|
+
let reportIndex = -1
|
|
209
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
210
|
+
const event = events[index]
|
|
211
|
+
if (event && isGenerateReportCompletion(event)) {
|
|
212
|
+
reportIndex = index
|
|
213
|
+
break
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (reportIndex === -1) return false
|
|
217
|
+
|
|
218
|
+
return events.slice(reportIndex + 1).some((event) => {
|
|
219
|
+
if (event.type !== "tool.completed") return false
|
|
220
|
+
const payload = asRecord(event.payload)
|
|
221
|
+
return isResolvedThemisDisposition(payload?.themisDisposition)
|
|
222
|
+
})
|
|
223
|
+
}
|
|
224
|
+
|
|
134
225
|
function collectParentChildIntegrityErrors(events: AuditEvent[]): string[] {
|
|
135
226
|
const errors: string[] = []
|
|
136
227
|
const parentByChild = new Map<string, string>()
|
|
@@ -244,7 +335,7 @@ function collectInvariantErrors(events: AuditEvent[]): { errors: string[]; warni
|
|
|
244
335
|
|
|
245
336
|
warnings.push(...collectOrphanedToolStarts(events))
|
|
246
337
|
errors.push(...collectParentChildIntegrityErrors(events))
|
|
247
|
-
|
|
338
|
+
warnings.push(...collectMultiSessionErrors(events))
|
|
248
339
|
return { errors, warnings }
|
|
249
340
|
}
|
|
250
341
|
|
|
@@ -308,6 +399,7 @@ export async function finalizeRun(
|
|
|
308
399
|
const reportErrors = [
|
|
309
400
|
...(await collectReportCompletenessErrors(events)),
|
|
310
401
|
...collectReportQualityGateErrors(events),
|
|
402
|
+
...collectThemisDispositionErrors(events),
|
|
311
403
|
]
|
|
312
404
|
if (reportErrors.length === 0) {
|
|
313
405
|
return {
|
|
@@ -324,6 +416,7 @@ export async function finalizeRun(
|
|
|
324
416
|
const { errors, warnings } = collectInvariantErrors(events)
|
|
325
417
|
errors.push(...(await collectReportCompletenessErrors(events)))
|
|
326
418
|
errors.push(...collectReportQualityGateErrors(events))
|
|
419
|
+
errors.push(...collectThemisDispositionErrors(events))
|
|
327
420
|
const invariantsPassed = errors.length === 0
|
|
328
421
|
const sessionId = events.at(-1)?.session_id ?? ""
|
|
329
422
|
|
|
@@ -2,6 +2,7 @@ import { existsSync, readdirSync } from "node:fs"
|
|
|
2
2
|
import { join, resolve } from "node:path"
|
|
3
3
|
import type { Config } from "@opencode-ai/sdk/v2"
|
|
4
4
|
import { ARGUS_PROMPT } from "../agents/argus-prompt"
|
|
5
|
+
import { AUDIT_SPECIALIST_PROMPT } from "../agents/audit-specialist-prompt"
|
|
5
6
|
import { PYTHIA_PROMPT } from "../agents/pythia-prompt"
|
|
6
7
|
import { SCRIBE_PROMPT } from "../agents/scribe-prompt"
|
|
7
8
|
import { SENTINEL_PROMPT } from "../agents/sentinel-prompt"
|
|
@@ -127,6 +128,7 @@ export function createConfigHandler(
|
|
|
127
128
|
task: {
|
|
128
129
|
sentinel: "allow",
|
|
129
130
|
pythia: "allow",
|
|
131
|
+
"audit-specialist": "allow",
|
|
130
132
|
scribe: "allow",
|
|
131
133
|
themis: "allow",
|
|
132
134
|
},
|
|
@@ -167,6 +169,27 @@ export function createConfigHandler(
|
|
|
167
169
|
skill: "allow",
|
|
168
170
|
},
|
|
169
171
|
},
|
|
172
|
+
"audit-specialist": {
|
|
173
|
+
mode: "subagent",
|
|
174
|
+
model: argusConfig.agents?.auditSpecialist?.model ?? DEFAULT_MODELS.auditSpecialist,
|
|
175
|
+
steps: argusConfig.agents?.auditSpecialist?.steps ?? DEFAULT_STEPS,
|
|
176
|
+
description: "Profile-driven adversarial specialist auditor",
|
|
177
|
+
prompt: AUDIT_SPECIALIST_PROMPT,
|
|
178
|
+
permission: {
|
|
179
|
+
argus_skill_load: "allow",
|
|
180
|
+
argus_check_patterns: "allow",
|
|
181
|
+
argus_solodit_search: "allow",
|
|
182
|
+
argus_analyze_contract: "allow",
|
|
183
|
+
argus_slither_analyze: "allow",
|
|
184
|
+
argus_proxy_detection: "allow",
|
|
185
|
+
argus_forge_test: "allow",
|
|
186
|
+
argus_forge_fuzz: "allow",
|
|
187
|
+
argus_forge_coverage: "allow",
|
|
188
|
+
argus_gas_analysis: "allow",
|
|
189
|
+
argus_record_finding: "allow",
|
|
190
|
+
skill: "allow",
|
|
191
|
+
},
|
|
192
|
+
},
|
|
170
193
|
scribe: {
|
|
171
194
|
mode: "subagent",
|
|
172
195
|
model: argusConfig.agents?.scribe?.model ?? DEFAULT_MODELS.scribe,
|
|
@@ -35,6 +35,57 @@ export function buildFallbackDirectives(unavailableTools: string[]): string[] {
|
|
|
35
35
|
return directives
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
function formatDuration(startTime: number, endTime?: number): string {
|
|
39
|
+
if (typeof endTime !== "number" || endTime < startTime) return "pending"
|
|
40
|
+
return `${endTime - startTime}ms`
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function buildToolLedgerLine(auditState: AuditState): string {
|
|
44
|
+
const taskDispatches = auditState.toolsExecuted.filter((tool) => tool.tool === "task").length
|
|
45
|
+
const argusTools = auditState.toolsExecuted.filter((tool) => tool.tool !== "task").slice(-5)
|
|
46
|
+
const entries = argusTools.map((tool) => {
|
|
47
|
+
const status = tool.success ? "ok" : "failed"
|
|
48
|
+
return `${tool.tool}=${status} findings=${tool.findingsCount} duration=${formatDuration(tool.startTime, tool.endTime)}`
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
if (taskDispatches > 0) entries.push(`task dispatches=${taskDispatches}`)
|
|
52
|
+
return entries.length > 0 ? entries.join("; ") : "none"
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function buildToolsLine(auditState: AuditState): string {
|
|
56
|
+
const tools = auditState.toolsExecuted
|
|
57
|
+
.filter((tool) => tool.tool !== "task")
|
|
58
|
+
.map((tool) => tool.tool)
|
|
59
|
+
return tools.length > 0 ? tools.join(", ") : "none"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function buildFindingCountsLine(auditState: AuditState): string | null {
|
|
63
|
+
const counts = auditState.findingCounts
|
|
64
|
+
if (!counts) return null
|
|
65
|
+
|
|
66
|
+
return [
|
|
67
|
+
"Finding Counts:",
|
|
68
|
+
`raw_observations=${counts.rawObservations ?? 0}`,
|
|
69
|
+
`recorded=${counts.recordedFindings ?? 0}`,
|
|
70
|
+
`deduped=${counts.dedupedFindings ?? 0}`,
|
|
71
|
+
`actionable=${counts.actionableFindings ?? 0}`,
|
|
72
|
+
`non_actionable=${counts.nonActionableFindings ?? 0}`,
|
|
73
|
+
].join(" ")
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function buildCoverageLine(auditState: AuditState): string {
|
|
77
|
+
const attempt = auditState.coverageAttempt
|
|
78
|
+
if (attempt) {
|
|
79
|
+
return attempt.reason
|
|
80
|
+
? `Coverage: ${attempt.status} — ${attempt.reason}`
|
|
81
|
+
: `Coverage: ${attempt.status}`
|
|
82
|
+
}
|
|
83
|
+
const unavailable = auditState.unavailableTools ?? []
|
|
84
|
+
return unavailable.includes("forge")
|
|
85
|
+
? "Coverage: skipped — forge unavailable"
|
|
86
|
+
: "Coverage: pending"
|
|
87
|
+
}
|
|
88
|
+
|
|
38
89
|
export function buildDynamicContext(
|
|
39
90
|
auditState: AuditState,
|
|
40
91
|
agent: string,
|
|
@@ -45,7 +96,7 @@ export function buildDynamicContext(
|
|
|
45
96
|
const executedToolNames = new Set(
|
|
46
97
|
auditState.toolsExecuted.map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
|
|
47
98
|
)
|
|
48
|
-
const
|
|
99
|
+
const findingCountsLine = buildFindingCountsLine(auditState)
|
|
49
100
|
const taskStatus = KEY_TOOLS.map(
|
|
50
101
|
(t) => `${t}=${executedToolNames.has(t) ? "done" : "pending"}`,
|
|
51
102
|
).join(" ")
|
|
@@ -62,7 +113,10 @@ export function buildDynamicContext(
|
|
|
62
113
|
`Phase: ${auditState.currentPhase}`,
|
|
63
114
|
`Contracts: ${auditState.contractsReviewed.length} reviewed`,
|
|
64
115
|
`Findings: Critical=${severityCounts.Critical} High=${severityCounts.High} Medium=${severityCounts.Medium} Low=${severityCounts.Low} Info=${severityCounts.Informational}`,
|
|
65
|
-
|
|
116
|
+
...(findingCountsLine ? [findingCountsLine] : []),
|
|
117
|
+
`Tools: ${buildToolsLine(auditState)}`,
|
|
118
|
+
`Tool Ledger: ${buildToolLedgerLine(auditState)}`,
|
|
119
|
+
buildCoverageLine(auditState),
|
|
66
120
|
`Tasks: ${taskStatus}`,
|
|
67
121
|
]
|
|
68
122
|
|