solidity-argus 0.5.10 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -1
- package/README.md +27 -21
- package/package.json +2 -2
- package/skills/INVENTORY.md +14 -1
- package/skills/README.md +4 -2
- package/skills/references/attack-vector-deck/SKILL.md +62 -0
- package/skills/specialist-profiles/access-control-specialist/SKILL.md +31 -0
- package/skills/specialist-profiles/economic-security/SKILL.md +31 -0
- package/skills/specialist-profiles/execution-trace/SKILL.md +31 -0
- package/skills/specialist-profiles/first-principles/SKILL.md +31 -0
- package/skills/specialist-profiles/invariant/SKILL.md +31 -0
- package/skills/specialist-profiles/math-precision/SKILL.md +31 -0
- package/skills/specialist-profiles/periphery/SKILL.md +31 -0
- package/skills/specialist-profiles/vector-scan/SKILL.md +28 -0
- package/src/agents/argus-prompt.ts +37 -1
- package/src/agents/audit-specialist-prompt.ts +76 -0
- package/src/agents/pythia-prompt.ts +1 -1
- package/src/agents/scribe-prompt.ts +5 -0
- package/src/agents/sentinel-prompt.ts +5 -0
- package/src/agents/themis-prompt.ts +2 -0
- package/src/config/schema.ts +2 -0
- package/src/constants/defaults.ts +1 -0
- package/src/create-hooks.ts +9 -1
- package/src/features/persistent-state/run-finalizer.ts +18 -0
- package/src/hooks/config-handler.ts +23 -0
- package/src/hooks/system-prompt-hook.ts +56 -2
- package/src/hooks/tool-tracking-hook.ts +50 -6
- package/src/shared/agent-names.ts +1 -0
- package/src/state/adapters.ts +1 -1
- package/src/state/projectors.ts +50 -0
- package/src/state/schemas.ts +86 -1
- package/src/state/types.ts +24 -1
- package/src/tools/record-finding-tool.ts +7 -1
- package/src/tools/report-generator-tool.ts +1 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export const AUDIT_SPECIALIST_PROMPT = `You are **Audit Specialist**, the adversarial review multiplier of Argus Panoptes.
|
|
2
|
+
|
|
3
|
+
## IDENTITY & ROLE
|
|
4
|
+
|
|
5
|
+
You are a profile-driven Solidity security reviewer. Argus dispatches you with a prompt such as: "Run specialist profile: math-precision. Scope: src/Vault.sol." Your job is to apply that profile deeply, verify concrete hypotheses, and record only confirmed findings.
|
|
6
|
+
|
|
7
|
+
You combine Sentinel's code-analysis and verification tools with Pythia's vulnerability research reach. You are not Scribe and not Themis: do not write final reports, do not validate your own final output, and do not manage global knowledge sync.
|
|
8
|
+
|
|
9
|
+
## PROFILE STARTUP
|
|
10
|
+
|
|
11
|
+
At task start:
|
|
12
|
+
1. Identify the active profile from the task prompt. If no profile is explicit, use \`vector-scan\`.
|
|
13
|
+
2. Load the relevant profile skill with \`argus_skill_load\`. For the \`access-control\` profile, load \`access-control-specialist\` to avoid colliding with the vulnerability-pattern skill named \`access-control\`.
|
|
14
|
+
3. For \`vector-scan\`, \`first-principles\`, unfamiliar protocols, or broad adversarial review, also load \`attack-vector-deck\`.
|
|
15
|
+
4. Load supporting vulnerability/protocol skills only when they materially sharpen the review.
|
|
16
|
+
|
|
17
|
+
Recognized profiles:
|
|
18
|
+
- \`vector-scan\`: mechanically apply the bundled attack-vector deck and classify vectors as skip/drop/investigate.
|
|
19
|
+
- \`access-control\`: load \`access-control-specialist\`; map roles, modifiers, initialization, upgrade authority, and inconsistent guards.
|
|
20
|
+
- \`math-precision\`: hunt rounding, scale mismatch, downcast, decimal, overflow, and accounting precision errors.
|
|
21
|
+
- \`invariant\`: extract conservation laws and state couplings, then search for violating paths.
|
|
22
|
+
- \`economic-security\`: attack external dependencies, token behavior, oracle assumptions, incentives, and value flows.
|
|
23
|
+
- \`execution-trace\`: trace stale reads, parameter divergence, branch ordering, callbacks, and cross-transaction interleavings.
|
|
24
|
+
- \`periphery\`: focus on libraries, helpers, base contracts, adapters, encoders, wrappers, and integration glue.
|
|
25
|
+
- \`first-principles\`: ignore named bug classes; extract assumptions line-by-line and try to violate them.
|
|
26
|
+
|
|
27
|
+
## TOOL USAGE
|
|
28
|
+
|
|
29
|
+
You can use:
|
|
30
|
+
- \`argus_skill_load\` for Argus skills and specialist profiles.
|
|
31
|
+
- \`argus_check_patterns\` for known-pattern scanning.
|
|
32
|
+
- \`argus_solodit_search\` for historical audit precedent.
|
|
33
|
+
- \`argus_analyze_contract\`, \`argus_slither_analyze\`, and \`argus_proxy_detection\` for structural and static analysis.
|
|
34
|
+
- \`argus_forge_test\`, \`argus_forge_fuzz\`, \`argus_forge_coverage\`, and \`argus_gas_analysis\` for verification.
|
|
35
|
+
- \`argus_record_finding\` for confirmed findings only.
|
|
36
|
+
|
|
37
|
+
**CRITICAL — use the right skill loader:**
|
|
38
|
+
- For ALL Argus audit knowledge, specialist profiles, and the attack-vector deck, use \`argus_skill_load\`.
|
|
39
|
+
- NEVER call the generic OpenCode \`skill\` tool for Argus audit knowledge. It does not reliably load bundled Argus skills.
|
|
40
|
+
- \`task.load_skills\` is for generic OpenCode runtime skills during dispatch, not audit knowledge.
|
|
41
|
+
|
|
42
|
+
## FINDINGS VS LEADS
|
|
43
|
+
|
|
44
|
+
Record a finding only when you can prove reachability, missing/incorrect guard or accounting behavior, and security impact in the actual code. If proof is incomplete, return a \`LEAD\` to Argus and do not persist it.
|
|
45
|
+
|
|
46
|
+
When recording a confirmed finding with \`argus_record_finding\`, include specific \`impact\`, \`recommendation\`, and \`proofOfConcept\` fields. Critical and High findings must never use generic placeholders.
|
|
47
|
+
|
|
48
|
+
## OUTPUT CONTRACT
|
|
49
|
+
|
|
50
|
+
Return structured blocks only:
|
|
51
|
+
|
|
52
|
+
\`\`\`text
|
|
53
|
+
FINDING | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
|
|
54
|
+
path: caller -> function -> state change -> impact
|
|
55
|
+
proof: concrete values, trace, test result, or state sequence from the actual code
|
|
56
|
+
description: one sentence
|
|
57
|
+
fix: one-sentence suggestion
|
|
58
|
+
|
|
59
|
+
LEAD | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
|
|
60
|
+
code_smells: what looked suspicious
|
|
61
|
+
missing_proof: what still needs verification
|
|
62
|
+
description: one sentence explaining the trail
|
|
63
|
+
\`\`\`
|
|
64
|
+
|
|
65
|
+
Rules:
|
|
66
|
+
- Same root cause uses the same \`group_key\`.
|
|
67
|
+
- Different fixes require separate items.
|
|
68
|
+
- No proof means \`LEAD\`, not a persisted finding.
|
|
69
|
+
- Report tool limitations explicitly when Slither, Forge, Solodit, or coverage is unavailable.
|
|
70
|
+
|
|
71
|
+
You are the specialist lens. Narrow the field, verify the exploitability, and leave Argus with confirmed findings or precise leads.
|
|
72
|
+
`
|
|
73
|
+
|
|
74
|
+
export function getAuditSpecialistPrompt(): string {
|
|
75
|
+
return AUDIT_SPECIALIST_PROMPT
|
|
76
|
+
}
|
|
@@ -125,7 +125,7 @@ This ensures Pythia always delivers research value, even when Solodit has no dir
|
|
|
125
125
|
|
|
126
126
|
## SKILLS SYSTEM
|
|
127
127
|
|
|
128
|
-
The Argus knowledge base includes
|
|
128
|
+
The Argus knowledge base includes 91 curated SKILL.md files, 13 YAML pattern packs, 15 real-world exploit case studies, 8 specialist profiles, and an attack-vector deck covering $3B+ in historical losses. You load them with \`argus_skill_load\`.
|
|
129
129
|
|
|
130
130
|
**CRITICAL — use the right tool**:
|
|
131
131
|
- For ALL vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name (e.g. \`argus_skill_load({ name: "reentrancy" })\`).
|
|
@@ -95,6 +95,11 @@ Before generating the report, verify:
|
|
|
95
95
|
|
|
96
96
|
Use \`argus_skill_load\` only when needed to improve report quality and consistency.
|
|
97
97
|
|
|
98
|
+
**CRITICAL — use the right tool**:
|
|
99
|
+
- For report templates, severity rubrics, checklists, exploit references, and audit methodology, use \`argus_skill_load\` with the exact skill name.
|
|
100
|
+
- **NEVER call the generic OpenCode \`skill\` tool** for Argus report knowledge. It does not load Argus skills such as \`report-template\`, \`severity-classification\`, or \`cyfrin-defi-core\`.
|
|
101
|
+
- \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
|
|
102
|
+
|
|
98
103
|
- **Curated skill map**:
|
|
99
104
|
- \`report-template\`, \`severity-classification\`
|
|
100
105
|
- \`cyfrin-defi-core\`
|
|
@@ -160,6 +160,11 @@ You have access to a specific set of tools. Use them effectively.
|
|
|
160
160
|
|
|
161
161
|
Use \`argus_skill_load\` only when specialized context is needed before deep verification work.
|
|
162
162
|
|
|
163
|
+
**CRITICAL — use the right tool**:
|
|
164
|
+
- For vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name.
|
|
165
|
+
- **NEVER call the generic OpenCode \`skill\` tool** for Argus audit knowledge. It does not load Argus skills such as \`reentrancy\`, \`access-control\`, or \`oracle-manipulation\`.
|
|
166
|
+
- \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
|
|
167
|
+
|
|
163
168
|
- **Curated skill map**:
|
|
164
169
|
- \`reentrancy\`, \`access-control\`, \`oracle-manipulation\`
|
|
165
170
|
- \`cyfrin-defi-integrations\`, \`severity-classification\`
|
|
@@ -47,6 +47,8 @@ This phase is mandatory on every invocation.
|
|
|
47
47
|
|
|
48
48
|
4. Validate raw -> deduped mapping:
|
|
49
49
|
- Every raw finding must map to exactly one deduped finding.
|
|
50
|
+
- Findings reported by \`audit-specialist\` are first-class raw findings, just like Sentinel and Pythia findings.
|
|
51
|
+
- Preserve \`reported_by_agent: "audit-specialist"\` and include those observations in raw -> deduped -> report parity checks.
|
|
50
52
|
- Merging is allowed, dropping is not.
|
|
51
53
|
- Flag any raw finding that vanished without a valid merge target.
|
|
52
54
|
|
package/src/config/schema.ts
CHANGED
|
@@ -51,6 +51,7 @@ export const ArgusConfigSchema = z
|
|
|
51
51
|
argus: AgentConfigSchema.default({}),
|
|
52
52
|
sentinel: AgentConfigSchema.default({}),
|
|
53
53
|
pythia: AgentConfigSchema.default({}),
|
|
54
|
+
auditSpecialist: AgentConfigSchema.default({}),
|
|
54
55
|
scribe: AgentConfigSchema.default({}),
|
|
55
56
|
themis: AgentConfigSchema.optional().default({}),
|
|
56
57
|
})
|
|
@@ -58,6 +59,7 @@ export const ArgusConfigSchema = z
|
|
|
58
59
|
argus: {},
|
|
59
60
|
sentinel: {},
|
|
60
61
|
pythia: {},
|
|
62
|
+
auditSpecialist: {},
|
|
61
63
|
scribe: {},
|
|
62
64
|
themis: {},
|
|
63
65
|
}),
|
|
@@ -2,6 +2,7 @@ export const DEFAULT_MODELS = {
|
|
|
2
2
|
argus: "anthropic/claude-opus-4-7",
|
|
3
3
|
sentinel: "anthropic/claude-sonnet-4-6",
|
|
4
4
|
pythia: "anthropic/claude-sonnet-4-6",
|
|
5
|
+
auditSpecialist: "anthropic/claude-sonnet-4-6",
|
|
5
6
|
scribe: "anthropic/claude-sonnet-4-6",
|
|
6
7
|
themis: "openai/gpt-5.5",
|
|
7
8
|
} as const
|
package/src/create-hooks.ts
CHANGED
|
@@ -18,7 +18,10 @@ import {
|
|
|
18
18
|
materializeReportInput,
|
|
19
19
|
} from "./features/persistent-state/findings-materializer"
|
|
20
20
|
import { recordRun, updateRunStatus } from "./features/persistent-state/global-run-index"
|
|
21
|
-
import {
|
|
21
|
+
import {
|
|
22
|
+
finalizeRun,
|
|
23
|
+
hasResolvedThemisDispositionAfterReport,
|
|
24
|
+
} from "./features/persistent-state/run-finalizer"
|
|
22
25
|
import { createRunJournal } from "./features/persistent-state/run-journal"
|
|
23
26
|
import { pruneStaleRuns } from "./features/persistent-state/run-pruner"
|
|
24
27
|
import { createAgentTracker } from "./hooks/agent-tracker"
|
|
@@ -628,6 +631,11 @@ export function createHooks(args: {
|
|
|
628
631
|
(sessionId ? (eventSinksByOpencodeSession.get(sessionId) ?? null) : null)
|
|
629
632
|
|
|
630
633
|
if (runSink && !runSink.isFinalized) {
|
|
634
|
+
const events = await runSink.readAll()
|
|
635
|
+
if (!hasResolvedThemisDispositionAfterReport(events)) {
|
|
636
|
+
return
|
|
637
|
+
}
|
|
638
|
+
|
|
631
639
|
try {
|
|
632
640
|
const idleFinalization = await finalizeRun(
|
|
633
641
|
auditState.sessionId,
|
|
@@ -204,6 +204,24 @@ function collectThemisDispositionErrors(events: AuditEvent[]): string[] {
|
|
|
204
204
|
: ["generated report has no resolved Themis disposition"]
|
|
205
205
|
}
|
|
206
206
|
|
|
207
|
+
export function hasResolvedThemisDispositionAfterReport(events: AuditEvent[]): boolean {
|
|
208
|
+
let reportIndex = -1
|
|
209
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
210
|
+
const event = events[index]
|
|
211
|
+
if (event && isGenerateReportCompletion(event)) {
|
|
212
|
+
reportIndex = index
|
|
213
|
+
break
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (reportIndex === -1) return false
|
|
217
|
+
|
|
218
|
+
return events.slice(reportIndex + 1).some((event) => {
|
|
219
|
+
if (event.type !== "tool.completed") return false
|
|
220
|
+
const payload = asRecord(event.payload)
|
|
221
|
+
return isResolvedThemisDisposition(payload?.themisDisposition)
|
|
222
|
+
})
|
|
223
|
+
}
|
|
224
|
+
|
|
207
225
|
function collectParentChildIntegrityErrors(events: AuditEvent[]): string[] {
|
|
208
226
|
const errors: string[] = []
|
|
209
227
|
const parentByChild = new Map<string, string>()
|
|
@@ -2,6 +2,7 @@ import { existsSync, readdirSync } from "node:fs"
|
|
|
2
2
|
import { join, resolve } from "node:path"
|
|
3
3
|
import type { Config } from "@opencode-ai/sdk/v2"
|
|
4
4
|
import { ARGUS_PROMPT } from "../agents/argus-prompt"
|
|
5
|
+
import { AUDIT_SPECIALIST_PROMPT } from "../agents/audit-specialist-prompt"
|
|
5
6
|
import { PYTHIA_PROMPT } from "../agents/pythia-prompt"
|
|
6
7
|
import { SCRIBE_PROMPT } from "../agents/scribe-prompt"
|
|
7
8
|
import { SENTINEL_PROMPT } from "../agents/sentinel-prompt"
|
|
@@ -127,6 +128,7 @@ export function createConfigHandler(
|
|
|
127
128
|
task: {
|
|
128
129
|
sentinel: "allow",
|
|
129
130
|
pythia: "allow",
|
|
131
|
+
"audit-specialist": "allow",
|
|
130
132
|
scribe: "allow",
|
|
131
133
|
themis: "allow",
|
|
132
134
|
},
|
|
@@ -167,6 +169,27 @@ export function createConfigHandler(
|
|
|
167
169
|
skill: "allow",
|
|
168
170
|
},
|
|
169
171
|
},
|
|
172
|
+
"audit-specialist": {
|
|
173
|
+
mode: "subagent",
|
|
174
|
+
model: argusConfig.agents?.auditSpecialist?.model ?? DEFAULT_MODELS.auditSpecialist,
|
|
175
|
+
steps: argusConfig.agents?.auditSpecialist?.steps ?? DEFAULT_STEPS,
|
|
176
|
+
description: "Profile-driven adversarial specialist auditor",
|
|
177
|
+
prompt: AUDIT_SPECIALIST_PROMPT,
|
|
178
|
+
permission: {
|
|
179
|
+
argus_skill_load: "allow",
|
|
180
|
+
argus_check_patterns: "allow",
|
|
181
|
+
argus_solodit_search: "allow",
|
|
182
|
+
argus_analyze_contract: "allow",
|
|
183
|
+
argus_slither_analyze: "allow",
|
|
184
|
+
argus_proxy_detection: "allow",
|
|
185
|
+
argus_forge_test: "allow",
|
|
186
|
+
argus_forge_fuzz: "allow",
|
|
187
|
+
argus_forge_coverage: "allow",
|
|
188
|
+
argus_gas_analysis: "allow",
|
|
189
|
+
argus_record_finding: "allow",
|
|
190
|
+
skill: "allow",
|
|
191
|
+
},
|
|
192
|
+
},
|
|
170
193
|
scribe: {
|
|
171
194
|
mode: "subagent",
|
|
172
195
|
model: argusConfig.agents?.scribe?.model ?? DEFAULT_MODELS.scribe,
|
|
@@ -35,6 +35,57 @@ export function buildFallbackDirectives(unavailableTools: string[]): string[] {
|
|
|
35
35
|
return directives
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
function formatDuration(startTime: number, endTime?: number): string {
|
|
39
|
+
if (typeof endTime !== "number" || endTime < startTime) return "pending"
|
|
40
|
+
return `${endTime - startTime}ms`
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function buildToolLedgerLine(auditState: AuditState): string {
|
|
44
|
+
const taskDispatches = auditState.toolsExecuted.filter((tool) => tool.tool === "task").length
|
|
45
|
+
const argusTools = auditState.toolsExecuted.filter((tool) => tool.tool !== "task").slice(-5)
|
|
46
|
+
const entries = argusTools.map((tool) => {
|
|
47
|
+
const status = tool.success ? "ok" : "failed"
|
|
48
|
+
return `${tool.tool}=${status} findings=${tool.findingsCount} duration=${formatDuration(tool.startTime, tool.endTime)}`
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
if (taskDispatches > 0) entries.push(`task dispatches=${taskDispatches}`)
|
|
52
|
+
return entries.length > 0 ? entries.join("; ") : "none"
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function buildToolsLine(auditState: AuditState): string {
|
|
56
|
+
const tools = auditState.toolsExecuted
|
|
57
|
+
.filter((tool) => tool.tool !== "task")
|
|
58
|
+
.map((tool) => tool.tool)
|
|
59
|
+
return tools.length > 0 ? tools.join(", ") : "none"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function buildFindingCountsLine(auditState: AuditState): string | null {
|
|
63
|
+
const counts = auditState.findingCounts
|
|
64
|
+
if (!counts) return null
|
|
65
|
+
|
|
66
|
+
return [
|
|
67
|
+
"Finding Counts:",
|
|
68
|
+
`raw_observations=${counts.rawObservations ?? 0}`,
|
|
69
|
+
`recorded=${counts.recordedFindings ?? 0}`,
|
|
70
|
+
`deduped=${counts.dedupedFindings ?? 0}`,
|
|
71
|
+
`actionable=${counts.actionableFindings ?? 0}`,
|
|
72
|
+
`non_actionable=${counts.nonActionableFindings ?? 0}`,
|
|
73
|
+
].join(" ")
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function buildCoverageLine(auditState: AuditState): string {
|
|
77
|
+
const attempt = auditState.coverageAttempt
|
|
78
|
+
if (attempt) {
|
|
79
|
+
return attempt.reason
|
|
80
|
+
? `Coverage: ${attempt.status} — ${attempt.reason}`
|
|
81
|
+
: `Coverage: ${attempt.status}`
|
|
82
|
+
}
|
|
83
|
+
const unavailable = auditState.unavailableTools ?? []
|
|
84
|
+
return unavailable.includes("forge")
|
|
85
|
+
? "Coverage: skipped — forge unavailable"
|
|
86
|
+
: "Coverage: pending"
|
|
87
|
+
}
|
|
88
|
+
|
|
38
89
|
export function buildDynamicContext(
|
|
39
90
|
auditState: AuditState,
|
|
40
91
|
agent: string,
|
|
@@ -45,7 +96,7 @@ export function buildDynamicContext(
|
|
|
45
96
|
const executedToolNames = new Set(
|
|
46
97
|
auditState.toolsExecuted.map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
|
|
47
98
|
)
|
|
48
|
-
const
|
|
99
|
+
const findingCountsLine = buildFindingCountsLine(auditState)
|
|
49
100
|
const taskStatus = KEY_TOOLS.map(
|
|
50
101
|
(t) => `${t}=${executedToolNames.has(t) ? "done" : "pending"}`,
|
|
51
102
|
).join(" ")
|
|
@@ -62,7 +113,10 @@ export function buildDynamicContext(
|
|
|
62
113
|
`Phase: ${auditState.currentPhase}`,
|
|
63
114
|
`Contracts: ${auditState.contractsReviewed.length} reviewed`,
|
|
64
115
|
`Findings: Critical=${severityCounts.Critical} High=${severityCounts.High} Medium=${severityCounts.Medium} Low=${severityCounts.Low} Info=${severityCounts.Informational}`,
|
|
65
|
-
|
|
116
|
+
...(findingCountsLine ? [findingCountsLine] : []),
|
|
117
|
+
`Tools: ${buildToolsLine(auditState)}`,
|
|
118
|
+
`Tool Ledger: ${buildToolLedgerLine(auditState)}`,
|
|
119
|
+
buildCoverageLine(auditState),
|
|
66
120
|
`Tasks: ${taskStatus}`,
|
|
67
121
|
]
|
|
68
122
|
|
|
@@ -20,6 +20,7 @@ import type {
|
|
|
20
20
|
ArgusAgentName,
|
|
21
21
|
AuditState,
|
|
22
22
|
Finding,
|
|
23
|
+
FindingCounts,
|
|
23
24
|
FindingSeverity,
|
|
24
25
|
FuzzCounterexample,
|
|
25
26
|
SoloditResult,
|
|
@@ -465,14 +466,38 @@ function processSoloditResult(parsed: Record<string, unknown>, state: AuditState
|
|
|
465
466
|
})
|
|
466
467
|
}
|
|
467
468
|
|
|
468
|
-
function
|
|
469
|
+
function buildFindingCounts(state: AuditState, findingsCount: number): FindingCounts {
|
|
470
|
+
return {
|
|
471
|
+
rawObservations: Math.max(0, findingsCount),
|
|
472
|
+
recordedFindings: state.findings.length,
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
function readErrorReason(record: Record<string, unknown>): string | undefined {
|
|
477
|
+
if (typeof record.error === "string" && record.error.trim().length > 0) return record.error
|
|
478
|
+
const errorRecord = toRecord(record.error)
|
|
479
|
+
if (typeof errorRecord?.message === "string" && errorRecord.message.trim().length > 0) {
|
|
480
|
+
return errorRecord.message
|
|
481
|
+
}
|
|
482
|
+
if (typeof record.stderr === "string" && record.stderr.trim().length > 0) return record.stderr
|
|
483
|
+
return undefined
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
function recordToolExecution(
|
|
487
|
+
state: AuditState,
|
|
488
|
+
toolName: string,
|
|
489
|
+
findingsCount: number,
|
|
490
|
+
success: boolean,
|
|
491
|
+
findingCounts?: FindingCounts,
|
|
492
|
+
): void {
|
|
469
493
|
const now = Date.now()
|
|
470
494
|
state.toolsExecuted.push({
|
|
471
495
|
tool: toolName,
|
|
472
496
|
startTime: now,
|
|
473
497
|
endTime: now,
|
|
474
|
-
success
|
|
498
|
+
success,
|
|
475
499
|
findingsCount,
|
|
500
|
+
findingCounts,
|
|
476
501
|
})
|
|
477
502
|
}
|
|
478
503
|
|
|
@@ -616,7 +641,7 @@ export function createToolTrackingHook(
|
|
|
616
641
|
}
|
|
617
642
|
|
|
618
643
|
if (resolved) {
|
|
619
|
-
recordToolExecution(resolved.state, "task", 0)
|
|
644
|
+
recordToolExecution(resolved.state, "task", 0, true, buildFindingCounts(resolved.state, 0))
|
|
620
645
|
onStateChanged?.({ tool: "task", findingsCount: 0, sessionId: input.sessionID })
|
|
621
646
|
}
|
|
622
647
|
|
|
@@ -875,9 +900,16 @@ export function createToolTrackingHook(
|
|
|
875
900
|
break
|
|
876
901
|
}
|
|
877
902
|
case "argus_forge_coverage": {
|
|
903
|
+
const now = Date.now()
|
|
878
904
|
const reportObj = toRecord(record.report)
|
|
879
905
|
const files = reportObj?.files
|
|
880
|
-
if (
|
|
906
|
+
if (record.success === false) {
|
|
907
|
+
auditState.coverageAttempt = {
|
|
908
|
+
status: "failed",
|
|
909
|
+
attemptedAt: now,
|
|
910
|
+
reason: readErrorReason(record),
|
|
911
|
+
}
|
|
912
|
+
} else if (Array.isArray(files)) {
|
|
881
913
|
auditState.coverageReport = {
|
|
882
914
|
files: files
|
|
883
915
|
.filter((f): f is Record<string, unknown> => !!f && typeof f === "object")
|
|
@@ -889,6 +921,13 @@ export function createToolTrackingHook(
|
|
|
889
921
|
functionsPct: typeof f.functionsPct === "number" ? f.functionsPct : 0,
|
|
890
922
|
})),
|
|
891
923
|
}
|
|
924
|
+
auditState.coverageAttempt = { status: "run", attemptedAt: now }
|
|
925
|
+
} else {
|
|
926
|
+
auditState.coverageAttempt = {
|
|
927
|
+
status: "failed",
|
|
928
|
+
attemptedAt: now,
|
|
929
|
+
reason: "coverage report was missing or invalid",
|
|
930
|
+
}
|
|
892
931
|
}
|
|
893
932
|
break
|
|
894
933
|
}
|
|
@@ -963,10 +1002,12 @@ export function createToolTrackingHook(
|
|
|
963
1002
|
}
|
|
964
1003
|
}
|
|
965
1004
|
|
|
966
|
-
completedSuccess =
|
|
1005
|
+
completedSuccess = record.success !== false
|
|
967
1006
|
}
|
|
968
1007
|
|
|
969
|
-
|
|
1008
|
+
const findingCounts = buildFindingCounts(auditState, findingsCount)
|
|
1009
|
+
auditState.findingCounts = findingCounts
|
|
1010
|
+
recordToolExecution(auditState, input.tool, findingsCount, completedSuccess, findingCounts)
|
|
970
1011
|
|
|
971
1012
|
const nextPhase = inferPhaseAdvancement(auditState, input.tool)
|
|
972
1013
|
if (nextPhase) {
|
|
@@ -1003,6 +1044,8 @@ export function createToolTrackingHook(
|
|
|
1003
1044
|
break
|
|
1004
1045
|
case "argus_forge_coverage":
|
|
1005
1046
|
if (auditState.coverageReport) enrichment.coverageReport = auditState.coverageReport
|
|
1047
|
+
if (auditState.coverageAttempt)
|
|
1048
|
+
enrichment.coverageAttempt = auditState.coverageAttempt
|
|
1006
1049
|
break
|
|
1007
1050
|
case "argus_gas_analysis":
|
|
1008
1051
|
if (auditState.gasHotspots) enrichment.gasHotspots = auditState.gasHotspots
|
|
@@ -1028,6 +1071,7 @@ export function createToolTrackingHook(
|
|
|
1028
1071
|
buildEvent("tool.completed", runId, sessionId, toolCallId, {
|
|
1029
1072
|
tool: input.tool,
|
|
1030
1073
|
findingsCount,
|
|
1074
|
+
findingCounts: completedSuccess ? auditState.findingCounts : undefined,
|
|
1031
1075
|
success: completedSuccess,
|
|
1032
1076
|
...(completionError ? { error: completionError } : {}),
|
|
1033
1077
|
...enrichment,
|
package/src/state/adapters.ts
CHANGED
|
@@ -246,9 +246,9 @@ export function normalizeToCanonicalFinding(
|
|
|
246
246
|
: "manual"
|
|
247
247
|
|
|
248
248
|
const reportedByAgentRaw =
|
|
249
|
+
options.reportedByAgent ??
|
|
249
250
|
(typeof input.reported_by_agent === "string" ? input.reported_by_agent : undefined) ??
|
|
250
251
|
(typeof input.reportedByAgent === "string" ? input.reportedByAgent : undefined) ??
|
|
251
|
-
options.reportedByAgent ??
|
|
252
252
|
"unknown"
|
|
253
253
|
const reportedByAgent: ArgusAgentName = VALID_AGENTS.has(reportedByAgentRaw as ArgusAgentName)
|
|
254
254
|
? (reportedByAgentRaw as ArgusAgentName)
|
package/src/state/projectors.ts
CHANGED
|
@@ -12,7 +12,9 @@ import {
|
|
|
12
12
|
import type {
|
|
13
13
|
AuditPhase,
|
|
14
14
|
AuditState,
|
|
15
|
+
CoverageAttemptState,
|
|
15
16
|
Finding,
|
|
17
|
+
FindingCounts,
|
|
16
18
|
FuzzCounterexample,
|
|
17
19
|
SoloditResult,
|
|
18
20
|
ToolExecution,
|
|
@@ -99,6 +101,48 @@ function resolveToolSuccess(payload: Record<string, unknown>): boolean {
|
|
|
99
101
|
return payload.success !== false
|
|
100
102
|
}
|
|
101
103
|
|
|
104
|
+
const FINDING_COUNT_FIELDS = [
|
|
105
|
+
"rawObservations",
|
|
106
|
+
"recordedFindings",
|
|
107
|
+
"dedupedFindings",
|
|
108
|
+
"actionableFindings",
|
|
109
|
+
"nonActionableFindings",
|
|
110
|
+
] as const
|
|
111
|
+
|
|
112
|
+
function asFindingCounts(value: unknown): FindingCounts | undefined {
|
|
113
|
+
if (!isRecord(value)) return undefined
|
|
114
|
+
const counts: FindingCounts = {}
|
|
115
|
+
for (const field of FINDING_COUNT_FIELDS) {
|
|
116
|
+
const count = value[field]
|
|
117
|
+
if (
|
|
118
|
+
typeof count === "number" &&
|
|
119
|
+
Number.isFinite(count) &&
|
|
120
|
+
Number.isInteger(count) &&
|
|
121
|
+
count >= 0
|
|
122
|
+
) {
|
|
123
|
+
counts[field] = count
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return Object.keys(counts).length > 0 ? counts : undefined
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function asCoverageAttempt(value: unknown): CoverageAttemptState | undefined {
|
|
130
|
+
if (!isRecord(value)) return undefined
|
|
131
|
+
if (
|
|
132
|
+
value.status !== "pending" &&
|
|
133
|
+
value.status !== "run" &&
|
|
134
|
+
value.status !== "skipped" &&
|
|
135
|
+
value.status !== "failed"
|
|
136
|
+
) {
|
|
137
|
+
return undefined
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
status: value.status,
|
|
141
|
+
attemptedAt: typeof value.attemptedAt === "number" ? value.attemptedAt : undefined,
|
|
142
|
+
reason: typeof value.reason === "string" ? value.reason : undefined,
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
102
146
|
function asStringArray(value: unknown): string[] | undefined {
|
|
103
147
|
if (!Array.isArray(value)) return undefined
|
|
104
148
|
return value.filter((item): item is string => typeof item === "string")
|
|
@@ -321,6 +365,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
|
|
|
321
365
|
endTime: existing?.endTime,
|
|
322
366
|
success: existing?.success ?? false,
|
|
323
367
|
findingsCount: existing?.findingsCount ?? 0,
|
|
368
|
+
findingCounts: existing?.findingCounts,
|
|
324
369
|
})
|
|
325
370
|
continue
|
|
326
371
|
}
|
|
@@ -340,6 +385,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
|
|
|
340
385
|
endTime: event.timestamp,
|
|
341
386
|
success: resolveToolSuccess(payload),
|
|
342
387
|
findingsCount: resolveFindingsCount(payload),
|
|
388
|
+
findingCounts: asFindingCounts(payload.findingCounts),
|
|
343
389
|
run_id: event.run_id,
|
|
344
390
|
schema_version: event.schema_version,
|
|
345
391
|
})
|
|
@@ -408,6 +454,8 @@ export function projectReportInput(
|
|
|
408
454
|
asFuzzCounterexamples,
|
|
409
455
|
)
|
|
410
456
|
const coverageReport = extractLatestFromPayload(events, "coverageReport", asCoverageReport)
|
|
457
|
+
const coverageAttempt = extractLatestFromPayload(events, "coverageAttempt", asCoverageAttempt)
|
|
458
|
+
const findingCounts = extractLatestFromPayload(events, "findingCounts", asFindingCounts)
|
|
411
459
|
const gasHotspots = extractLatestFromPayload(events, "gasHotspots", asGasHotspots)
|
|
412
460
|
const proxyContracts = extractLatestFromPayload(events, "proxyContracts", asProxyContracts)
|
|
413
461
|
const patternVersion = extractLatestFromPayload(events, "patternVersion", asString)
|
|
@@ -424,10 +472,12 @@ export function projectReportInput(
|
|
|
424
472
|
projectDir,
|
|
425
473
|
findings,
|
|
426
474
|
toolsExecuted,
|
|
475
|
+
findingCounts,
|
|
427
476
|
scope,
|
|
428
477
|
soloditResults,
|
|
429
478
|
fuzzCounterexamples,
|
|
430
479
|
coverageReport,
|
|
480
|
+
coverageAttempt,
|
|
431
481
|
gasHotspots,
|
|
432
482
|
proxyContracts,
|
|
433
483
|
patternVersion,
|