solidity-argus 0.5.10 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/AGENTS.md +8 -1
  2. package/README.md +27 -21
  3. package/package.json +2 -2
  4. package/skills/INVENTORY.md +14 -1
  5. package/skills/README.md +4 -2
  6. package/skills/references/attack-vector-deck/SKILL.md +62 -0
  7. package/skills/specialist-profiles/access-control-specialist/SKILL.md +31 -0
  8. package/skills/specialist-profiles/economic-security/SKILL.md +31 -0
  9. package/skills/specialist-profiles/execution-trace/SKILL.md +31 -0
  10. package/skills/specialist-profiles/first-principles/SKILL.md +31 -0
  11. package/skills/specialist-profiles/invariant/SKILL.md +31 -0
  12. package/skills/specialist-profiles/math-precision/SKILL.md +31 -0
  13. package/skills/specialist-profiles/periphery/SKILL.md +31 -0
  14. package/skills/specialist-profiles/vector-scan/SKILL.md +28 -0
  15. package/src/agents/argus-prompt.ts +37 -1
  16. package/src/agents/audit-specialist-prompt.ts +76 -0
  17. package/src/agents/pythia-prompt.ts +1 -1
  18. package/src/agents/scribe-prompt.ts +5 -0
  19. package/src/agents/sentinel-prompt.ts +5 -0
  20. package/src/agents/themis-prompt.ts +2 -0
  21. package/src/config/schema.ts +2 -0
  22. package/src/constants/defaults.ts +1 -0
  23. package/src/create-hooks.ts +9 -1
  24. package/src/features/persistent-state/run-finalizer.ts +18 -0
  25. package/src/hooks/config-handler.ts +23 -0
  26. package/src/hooks/system-prompt-hook.ts +56 -2
  27. package/src/hooks/tool-tracking-hook.ts +50 -6
  28. package/src/shared/agent-names.ts +1 -0
  29. package/src/state/adapters.ts +1 -1
  30. package/src/state/projectors.ts +50 -0
  31. package/src/state/schemas.ts +86 -1
  32. package/src/state/types.ts +24 -1
  33. package/src/tools/record-finding-tool.ts +7 -1
  34. package/src/tools/report-generator-tool.ts +1 -0
@@ -0,0 +1,76 @@
1
+ export const AUDIT_SPECIALIST_PROMPT = `You are **Audit Specialist**, the adversarial review multiplier of Argus Panoptes.
2
+
3
+ ## IDENTITY & ROLE
4
+
5
+ You are a profile-driven Solidity security reviewer. Argus dispatches you with a prompt such as: "Run specialist profile: math-precision. Scope: src/Vault.sol." Your job is to apply that profile deeply, verify concrete hypotheses, and record only confirmed findings.
6
+
7
+ You combine Sentinel's code-analysis and verification tools with Pythia's vulnerability research reach. You are not Scribe and not Themis: do not write final reports, do not validate your own final output, and do not manage global knowledge sync.
8
+
9
+ ## PROFILE STARTUP
10
+
11
+ At task start:
12
+ 1. Identify the active profile from the task prompt. If no profile is explicit, use \`vector-scan\`.
13
+ 2. Load the relevant profile skill with \`argus_skill_load\`. For the \`access-control\` profile, load \`access-control-specialist\` to avoid colliding with the vulnerability-pattern skill named \`access-control\`.
14
+ 3. For \`vector-scan\`, \`first-principles\`, unfamiliar protocols, or broad adversarial review, also load \`attack-vector-deck\`.
15
+ 4. Load supporting vulnerability/protocol skills only when they materially sharpen the review.
16
+
17
+ Recognized profiles:
18
+ - \`vector-scan\`: mechanically apply the bundled attack-vector deck and classify vectors as skip/drop/investigate.
19
+ - \`access-control\`: load \`access-control-specialist\`; map roles, modifiers, initialization, upgrade authority, and inconsistent guards.
20
+ - \`math-precision\`: hunt rounding, scale mismatch, downcast, decimal, overflow, and accounting precision errors.
21
+ - \`invariant\`: extract conservation laws and state couplings, then search for violating paths.
22
+ - \`economic-security\`: attack external dependencies, token behavior, oracle assumptions, incentives, and value flows.
23
+ - \`execution-trace\`: trace stale reads, parameter divergence, branch ordering, callbacks, and cross-transaction interleavings.
24
+ - \`periphery\`: focus on libraries, helpers, base contracts, adapters, encoders, wrappers, and integration glue.
25
+ - \`first-principles\`: ignore named bug classes; extract assumptions line-by-line and try to violate them.
26
+
27
+ ## TOOL USAGE
28
+
29
+ You can use:
30
+ - \`argus_skill_load\` for Argus skills and specialist profiles.
31
+ - \`argus_check_patterns\` for known-pattern scanning.
32
+ - \`argus_solodit_search\` for historical audit precedent.
33
+ - \`argus_analyze_contract\`, \`argus_slither_analyze\`, and \`argus_proxy_detection\` for structural and static analysis.
34
+ - \`argus_forge_test\`, \`argus_forge_fuzz\`, \`argus_forge_coverage\`, and \`argus_gas_analysis\` for verification.
35
+ - \`argus_record_finding\` for confirmed findings only.
36
+
37
+ **CRITICAL — use the right skill loader:**
38
+ - For ALL Argus audit knowledge, specialist profiles, and the attack-vector deck, use \`argus_skill_load\`.
39
+ - NEVER call the generic OpenCode \`skill\` tool for Argus audit knowledge. It does not reliably load bundled Argus skills.
40
+ - \`task.load_skills\` is for generic OpenCode runtime skills during dispatch, not audit knowledge.
41
+
42
+ ## FINDINGS VS LEADS
43
+
44
+ Record a finding only when you can prove reachability, missing/incorrect guard or accounting behavior, and security impact in the actual code. If proof is incomplete, return a \`LEAD\` to Argus and do not persist it.
45
+
46
+ When recording a confirmed finding with \`argus_record_finding\`, include specific \`impact\`, \`recommendation\`, and \`proofOfConcept\` fields. Critical and High findings must never use generic placeholders.
47
+
48
+ ## OUTPUT CONTRACT
49
+
50
+ Return structured blocks only:
51
+
52
+ \`\`\`text
53
+ FINDING | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
54
+ path: caller -> function -> state change -> impact
55
+ proof: concrete values, trace, test result, or state sequence from the actual code
56
+ description: one sentence
57
+ fix: one-sentence suggestion
58
+
59
+ LEAD | contract: Name | function: func | bug_class: kebab-tag | profile: math-precision | group_key: Name | func | bug-class
60
+ code_smells: what looked suspicious
61
+ missing_proof: what still needs verification
62
+ description: one sentence explaining the trail
63
+ \`\`\`
64
+
65
+ Rules:
66
+ - Same root cause uses the same \`group_key\`.
67
+ - Different fixes require separate items.
68
+ - No proof means \`LEAD\`, not a persisted finding.
69
+ - Report tool limitations explicitly when Slither, Forge, Solodit, or coverage is unavailable.
70
+
71
+ You are the specialist lens. Narrow the field, verify the exploitability, and leave Argus with confirmed findings or precise leads.
72
+ `
73
+
74
+ export function getAuditSpecialistPrompt(): string {
75
+ return AUDIT_SPECIALIST_PROMPT
76
+ }
@@ -125,7 +125,7 @@ This ensures Pythia always delivers research value, even when Solodit has no dir
125
125
 
126
126
  ## SKILLS SYSTEM
127
127
 
128
- The Argus knowledge base includes 75+ curated SKILL.md files, 13 YAML pattern packs, and 15 real-world exploit case studies covering $3B+ in losses. You load them with \`argus_skill_load\`.
128
+ The Argus knowledge base includes 91 curated SKILL.md files, 13 YAML pattern packs, 15 real-world exploit case studies, 8 specialist profiles, and an attack-vector deck covering $3B+ in historical losses. You load them with \`argus_skill_load\`.
129
129
 
130
130
  **CRITICAL — use the right tool**:
131
131
  - For ALL vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name (e.g. \`argus_skill_load({ name: "reentrancy" })\`).
@@ -95,6 +95,11 @@ Before generating the report, verify:
95
95
 
96
96
  Use \`argus_skill_load\` only when needed to improve report quality and consistency.
97
97
 
98
+ **CRITICAL — use the right tool**:
99
+ - For report templates, severity rubrics, checklists, exploit references, and audit methodology, use \`argus_skill_load\` with the exact skill name.
100
+ - **NEVER call the generic OpenCode \`skill\` tool** for Argus report knowledge. It does not load Argus skills such as \`report-template\`, \`severity-classification\`, or \`cyfrin-defi-core\`.
101
+ - \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
102
+
98
103
  - **Curated skill map**:
99
104
  - \`report-template\`, \`severity-classification\`
100
105
  - \`cyfrin-defi-core\`
@@ -160,6 +160,11 @@ You have access to a specific set of tools. Use them effectively.
160
160
 
161
161
  Use \`argus_skill_load\` only when specialized context is needed before deep verification work.
162
162
 
163
+ **CRITICAL — use the right tool**:
164
+ - For vulnerability, protocol, checklist, methodology, and case-study knowledge, use \`argus_skill_load\` with the exact skill name.
165
+ - **NEVER call the generic OpenCode \`skill\` tool** for Argus audit knowledge. It does not load Argus skills such as \`reentrancy\`, \`access-control\`, or \`oracle-manipulation\`.
166
+ - \`task.load_skills\` is only a subagent dispatch parameter for generic OpenCode runtime skills, not an audit knowledge loader.
167
+
163
168
  - **Curated skill map**:
164
169
  - \`reentrancy\`, \`access-control\`, \`oracle-manipulation\`
165
170
  - \`cyfrin-defi-integrations\`, \`severity-classification\`
@@ -47,6 +47,8 @@ This phase is mandatory on every invocation.
47
47
 
48
48
  4. Validate raw -> deduped mapping:
49
49
  - Every raw finding must map to exactly one deduped finding.
50
+ - Findings reported by \`audit-specialist\` are first-class raw findings, just like Sentinel and Pythia findings.
51
+ - Preserve \`reported_by_agent: "audit-specialist"\` and include those observations in raw -> deduped -> report parity checks.
50
52
  - Merging is allowed, dropping is not.
51
53
  - Flag any raw finding that vanished without a valid merge target.
52
54
 
@@ -51,6 +51,7 @@ export const ArgusConfigSchema = z
51
51
  argus: AgentConfigSchema.default({}),
52
52
  sentinel: AgentConfigSchema.default({}),
53
53
  pythia: AgentConfigSchema.default({}),
54
+ auditSpecialist: AgentConfigSchema.default({}),
54
55
  scribe: AgentConfigSchema.default({}),
55
56
  themis: AgentConfigSchema.optional().default({}),
56
57
  })
@@ -58,6 +59,7 @@ export const ArgusConfigSchema = z
58
59
  argus: {},
59
60
  sentinel: {},
60
61
  pythia: {},
62
+ auditSpecialist: {},
61
63
  scribe: {},
62
64
  themis: {},
63
65
  }),
@@ -2,6 +2,7 @@ export const DEFAULT_MODELS = {
2
2
  argus: "anthropic/claude-opus-4-7",
3
3
  sentinel: "anthropic/claude-sonnet-4-6",
4
4
  pythia: "anthropic/claude-sonnet-4-6",
5
+ auditSpecialist: "anthropic/claude-sonnet-4-6",
5
6
  scribe: "anthropic/claude-sonnet-4-6",
6
7
  themis: "openai/gpt-5.5",
7
8
  } as const
@@ -18,7 +18,10 @@ import {
18
18
  materializeReportInput,
19
19
  } from "./features/persistent-state/findings-materializer"
20
20
  import { recordRun, updateRunStatus } from "./features/persistent-state/global-run-index"
21
- import { finalizeRun } from "./features/persistent-state/run-finalizer"
21
+ import {
22
+ finalizeRun,
23
+ hasResolvedThemisDispositionAfterReport,
24
+ } from "./features/persistent-state/run-finalizer"
22
25
  import { createRunJournal } from "./features/persistent-state/run-journal"
23
26
  import { pruneStaleRuns } from "./features/persistent-state/run-pruner"
24
27
  import { createAgentTracker } from "./hooks/agent-tracker"
@@ -628,6 +631,11 @@ export function createHooks(args: {
628
631
  (sessionId ? (eventSinksByOpencodeSession.get(sessionId) ?? null) : null)
629
632
 
630
633
  if (runSink && !runSink.isFinalized) {
634
+ const events = await runSink.readAll()
635
+ if (!hasResolvedThemisDispositionAfterReport(events)) {
636
+ return
637
+ }
638
+
631
639
  try {
632
640
  const idleFinalization = await finalizeRun(
633
641
  auditState.sessionId,
@@ -204,6 +204,24 @@ function collectThemisDispositionErrors(events: AuditEvent[]): string[] {
204
204
  : ["generated report has no resolved Themis disposition"]
205
205
  }
206
206
 
207
+ export function hasResolvedThemisDispositionAfterReport(events: AuditEvent[]): boolean {
208
+ let reportIndex = -1
209
+ for (let index = events.length - 1; index >= 0; index -= 1) {
210
+ const event = events[index]
211
+ if (event && isGenerateReportCompletion(event)) {
212
+ reportIndex = index
213
+ break
214
+ }
215
+ }
216
+ if (reportIndex === -1) return false
217
+
218
+ return events.slice(reportIndex + 1).some((event) => {
219
+ if (event.type !== "tool.completed") return false
220
+ const payload = asRecord(event.payload)
221
+ return isResolvedThemisDisposition(payload?.themisDisposition)
222
+ })
223
+ }
224
+
207
225
  function collectParentChildIntegrityErrors(events: AuditEvent[]): string[] {
208
226
  const errors: string[] = []
209
227
  const parentByChild = new Map<string, string>()
@@ -2,6 +2,7 @@ import { existsSync, readdirSync } from "node:fs"
2
2
  import { join, resolve } from "node:path"
3
3
  import type { Config } from "@opencode-ai/sdk/v2"
4
4
  import { ARGUS_PROMPT } from "../agents/argus-prompt"
5
+ import { AUDIT_SPECIALIST_PROMPT } from "../agents/audit-specialist-prompt"
5
6
  import { PYTHIA_PROMPT } from "../agents/pythia-prompt"
6
7
  import { SCRIBE_PROMPT } from "../agents/scribe-prompt"
7
8
  import { SENTINEL_PROMPT } from "../agents/sentinel-prompt"
@@ -127,6 +128,7 @@ export function createConfigHandler(
127
128
  task: {
128
129
  sentinel: "allow",
129
130
  pythia: "allow",
131
+ "audit-specialist": "allow",
130
132
  scribe: "allow",
131
133
  themis: "allow",
132
134
  },
@@ -167,6 +169,27 @@ export function createConfigHandler(
167
169
  skill: "allow",
168
170
  },
169
171
  },
172
+ "audit-specialist": {
173
+ mode: "subagent",
174
+ model: argusConfig.agents?.auditSpecialist?.model ?? DEFAULT_MODELS.auditSpecialist,
175
+ steps: argusConfig.agents?.auditSpecialist?.steps ?? DEFAULT_STEPS,
176
+ description: "Profile-driven adversarial specialist auditor",
177
+ prompt: AUDIT_SPECIALIST_PROMPT,
178
+ permission: {
179
+ argus_skill_load: "allow",
180
+ argus_check_patterns: "allow",
181
+ argus_solodit_search: "allow",
182
+ argus_analyze_contract: "allow",
183
+ argus_slither_analyze: "allow",
184
+ argus_proxy_detection: "allow",
185
+ argus_forge_test: "allow",
186
+ argus_forge_fuzz: "allow",
187
+ argus_forge_coverage: "allow",
188
+ argus_gas_analysis: "allow",
189
+ argus_record_finding: "allow",
190
+ skill: "allow",
191
+ },
192
+ },
170
193
  scribe: {
171
194
  mode: "subagent",
172
195
  model: argusConfig.agents?.scribe?.model ?? DEFAULT_MODELS.scribe,
@@ -35,6 +35,57 @@ export function buildFallbackDirectives(unavailableTools: string[]): string[] {
35
35
  return directives
36
36
  }
37
37
 
38
+ function formatDuration(startTime: number, endTime?: number): string {
39
+ if (typeof endTime !== "number" || endTime < startTime) return "pending"
40
+ return `${endTime - startTime}ms`
41
+ }
42
+
43
+ function buildToolLedgerLine(auditState: AuditState): string {
44
+ const taskDispatches = auditState.toolsExecuted.filter((tool) => tool.tool === "task").length
45
+ const argusTools = auditState.toolsExecuted.filter((tool) => tool.tool !== "task").slice(-5)
46
+ const entries = argusTools.map((tool) => {
47
+ const status = tool.success ? "ok" : "failed"
48
+ return `${tool.tool}=${status} findings=${tool.findingsCount} duration=${formatDuration(tool.startTime, tool.endTime)}`
49
+ })
50
+
51
+ if (taskDispatches > 0) entries.push(`task dispatches=${taskDispatches}`)
52
+ return entries.length > 0 ? entries.join("; ") : "none"
53
+ }
54
+
55
+ function buildToolsLine(auditState: AuditState): string {
56
+ const tools = auditState.toolsExecuted
57
+ .filter((tool) => tool.tool !== "task")
58
+ .map((tool) => tool.tool)
59
+ return tools.length > 0 ? tools.join(", ") : "none"
60
+ }
61
+
62
+ function buildFindingCountsLine(auditState: AuditState): string | null {
63
+ const counts = auditState.findingCounts
64
+ if (!counts) return null
65
+
66
+ return [
67
+ "Finding Counts:",
68
+ `raw_observations=${counts.rawObservations ?? 0}`,
69
+ `recorded=${counts.recordedFindings ?? 0}`,
70
+ `deduped=${counts.dedupedFindings ?? 0}`,
71
+ `actionable=${counts.actionableFindings ?? 0}`,
72
+ `non_actionable=${counts.nonActionableFindings ?? 0}`,
73
+ ].join(" ")
74
+ }
75
+
76
+ function buildCoverageLine(auditState: AuditState): string {
77
+ const attempt = auditState.coverageAttempt
78
+ if (attempt) {
79
+ return attempt.reason
80
+ ? `Coverage: ${attempt.status} — ${attempt.reason}`
81
+ : `Coverage: ${attempt.status}`
82
+ }
83
+ const unavailable = auditState.unavailableTools ?? []
84
+ return unavailable.includes("forge")
85
+ ? "Coverage: skipped — forge unavailable"
86
+ : "Coverage: pending"
87
+ }
88
+
38
89
  export function buildDynamicContext(
39
90
  auditState: AuditState,
40
91
  agent: string,
@@ -45,7 +96,7 @@ export function buildDynamicContext(
45
96
  const executedToolNames = new Set(
46
97
  auditState.toolsExecuted.map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
47
98
  )
48
- const tools = auditState.toolsExecuted.map((tool) => tool.tool).join(", ") || "none"
99
+ const findingCountsLine = buildFindingCountsLine(auditState)
49
100
  const taskStatus = KEY_TOOLS.map(
50
101
  (t) => `${t}=${executedToolNames.has(t) ? "done" : "pending"}`,
51
102
  ).join(" ")
@@ -62,7 +113,10 @@ export function buildDynamicContext(
62
113
  `Phase: ${auditState.currentPhase}`,
63
114
  `Contracts: ${auditState.contractsReviewed.length} reviewed`,
64
115
  `Findings: Critical=${severityCounts.Critical} High=${severityCounts.High} Medium=${severityCounts.Medium} Low=${severityCounts.Low} Info=${severityCounts.Informational}`,
65
- `Tools: ${tools}`,
116
+ ...(findingCountsLine ? [findingCountsLine] : []),
117
+ `Tools: ${buildToolsLine(auditState)}`,
118
+ `Tool Ledger: ${buildToolLedgerLine(auditState)}`,
119
+ buildCoverageLine(auditState),
66
120
  `Tasks: ${taskStatus}`,
67
121
  ]
68
122
 
@@ -20,6 +20,7 @@ import type {
20
20
  ArgusAgentName,
21
21
  AuditState,
22
22
  Finding,
23
+ FindingCounts,
23
24
  FindingSeverity,
24
25
  FuzzCounterexample,
25
26
  SoloditResult,
@@ -465,14 +466,38 @@ function processSoloditResult(parsed: Record<string, unknown>, state: AuditState
465
466
  })
466
467
  }
467
468
 
468
- function recordToolExecution(state: AuditState, toolName: string, findingsCount: number): void {
469
+ function buildFindingCounts(state: AuditState, findingsCount: number): FindingCounts {
470
+ return {
471
+ rawObservations: Math.max(0, findingsCount),
472
+ recordedFindings: state.findings.length,
473
+ }
474
+ }
475
+
476
+ function readErrorReason(record: Record<string, unknown>): string | undefined {
477
+ if (typeof record.error === "string" && record.error.trim().length > 0) return record.error
478
+ const errorRecord = toRecord(record.error)
479
+ if (typeof errorRecord?.message === "string" && errorRecord.message.trim().length > 0) {
480
+ return errorRecord.message
481
+ }
482
+ if (typeof record.stderr === "string" && record.stderr.trim().length > 0) return record.stderr
483
+ return undefined
484
+ }
485
+
486
+ function recordToolExecution(
487
+ state: AuditState,
488
+ toolName: string,
489
+ findingsCount: number,
490
+ success: boolean,
491
+ findingCounts?: FindingCounts,
492
+ ): void {
469
493
  const now = Date.now()
470
494
  state.toolsExecuted.push({
471
495
  tool: toolName,
472
496
  startTime: now,
473
497
  endTime: now,
474
- success: true,
498
+ success,
475
499
  findingsCount,
500
+ findingCounts,
476
501
  })
477
502
  }
478
503
 
@@ -616,7 +641,7 @@ export function createToolTrackingHook(
616
641
  }
617
642
 
618
643
  if (resolved) {
619
- recordToolExecution(resolved.state, "task", 0)
644
+ recordToolExecution(resolved.state, "task", 0, true, buildFindingCounts(resolved.state, 0))
620
645
  onStateChanged?.({ tool: "task", findingsCount: 0, sessionId: input.sessionID })
621
646
  }
622
647
 
@@ -875,9 +900,16 @@ export function createToolTrackingHook(
875
900
  break
876
901
  }
877
902
  case "argus_forge_coverage": {
903
+ const now = Date.now()
878
904
  const reportObj = toRecord(record.report)
879
905
  const files = reportObj?.files
880
- if (Array.isArray(files)) {
906
+ if (record.success === false) {
907
+ auditState.coverageAttempt = {
908
+ status: "failed",
909
+ attemptedAt: now,
910
+ reason: readErrorReason(record),
911
+ }
912
+ } else if (Array.isArray(files)) {
881
913
  auditState.coverageReport = {
882
914
  files: files
883
915
  .filter((f): f is Record<string, unknown> => !!f && typeof f === "object")
@@ -889,6 +921,13 @@ export function createToolTrackingHook(
889
921
  functionsPct: typeof f.functionsPct === "number" ? f.functionsPct : 0,
890
922
  })),
891
923
  }
924
+ auditState.coverageAttempt = { status: "run", attemptedAt: now }
925
+ } else {
926
+ auditState.coverageAttempt = {
927
+ status: "failed",
928
+ attemptedAt: now,
929
+ reason: "coverage report was missing or invalid",
930
+ }
892
931
  }
893
932
  break
894
933
  }
@@ -963,10 +1002,12 @@ export function createToolTrackingHook(
963
1002
  }
964
1003
  }
965
1004
 
966
- completedSuccess = true
1005
+ completedSuccess = record.success !== false
967
1006
  }
968
1007
 
969
- recordToolExecution(auditState, input.tool, findingsCount)
1008
+ const findingCounts = buildFindingCounts(auditState, findingsCount)
1009
+ auditState.findingCounts = findingCounts
1010
+ recordToolExecution(auditState, input.tool, findingsCount, completedSuccess, findingCounts)
970
1011
 
971
1012
  const nextPhase = inferPhaseAdvancement(auditState, input.tool)
972
1013
  if (nextPhase) {
@@ -1003,6 +1044,8 @@ export function createToolTrackingHook(
1003
1044
  break
1004
1045
  case "argus_forge_coverage":
1005
1046
  if (auditState.coverageReport) enrichment.coverageReport = auditState.coverageReport
1047
+ if (auditState.coverageAttempt)
1048
+ enrichment.coverageAttempt = auditState.coverageAttempt
1006
1049
  break
1007
1050
  case "argus_gas_analysis":
1008
1051
  if (auditState.gasHotspots) enrichment.gasHotspots = auditState.gasHotspots
@@ -1028,6 +1071,7 @@ export function createToolTrackingHook(
1028
1071
  buildEvent("tool.completed", runId, sessionId, toolCallId, {
1029
1072
  tool: input.tool,
1030
1073
  findingsCount,
1074
+ findingCounts: completedSuccess ? auditState.findingCounts : undefined,
1031
1075
  success: completedSuccess,
1032
1076
  ...(completionError ? { error: completionError } : {}),
1033
1077
  ...enrichment,
@@ -2,6 +2,7 @@ export const ARGUS_ORCHESTRATOR: ReadonlySet<string> = new Set(["argus"])
2
2
  export const ARGUS_SUBAGENTS: ReadonlySet<string> = new Set([
3
3
  "sentinel",
4
4
  "pythia",
5
+ "audit-specialist",
5
6
  "scribe",
6
7
  "themis",
7
8
  ])
@@ -246,9 +246,9 @@ export function normalizeToCanonicalFinding(
246
246
  : "manual"
247
247
 
248
248
  const reportedByAgentRaw =
249
+ options.reportedByAgent ??
249
250
  (typeof input.reported_by_agent === "string" ? input.reported_by_agent : undefined) ??
250
251
  (typeof input.reportedByAgent === "string" ? input.reportedByAgent : undefined) ??
251
- options.reportedByAgent ??
252
252
  "unknown"
253
253
  const reportedByAgent: ArgusAgentName = VALID_AGENTS.has(reportedByAgentRaw as ArgusAgentName)
254
254
  ? (reportedByAgentRaw as ArgusAgentName)
@@ -12,7 +12,9 @@ import {
12
12
  import type {
13
13
  AuditPhase,
14
14
  AuditState,
15
+ CoverageAttemptState,
15
16
  Finding,
17
+ FindingCounts,
16
18
  FuzzCounterexample,
17
19
  SoloditResult,
18
20
  ToolExecution,
@@ -99,6 +101,48 @@ function resolveToolSuccess(payload: Record<string, unknown>): boolean {
99
101
  return payload.success !== false
100
102
  }
101
103
 
104
+ const FINDING_COUNT_FIELDS = [
105
+ "rawObservations",
106
+ "recordedFindings",
107
+ "dedupedFindings",
108
+ "actionableFindings",
109
+ "nonActionableFindings",
110
+ ] as const
111
+
112
+ function asFindingCounts(value: unknown): FindingCounts | undefined {
113
+ if (!isRecord(value)) return undefined
114
+ const counts: FindingCounts = {}
115
+ for (const field of FINDING_COUNT_FIELDS) {
116
+ const count = value[field]
117
+ if (
118
+ typeof count === "number" &&
119
+ Number.isFinite(count) &&
120
+ Number.isInteger(count) &&
121
+ count >= 0
122
+ ) {
123
+ counts[field] = count
124
+ }
125
+ }
126
+ return Object.keys(counts).length > 0 ? counts : undefined
127
+ }
128
+
129
+ function asCoverageAttempt(value: unknown): CoverageAttemptState | undefined {
130
+ if (!isRecord(value)) return undefined
131
+ if (
132
+ value.status !== "pending" &&
133
+ value.status !== "run" &&
134
+ value.status !== "skipped" &&
135
+ value.status !== "failed"
136
+ ) {
137
+ return undefined
138
+ }
139
+ return {
140
+ status: value.status,
141
+ attemptedAt: typeof value.attemptedAt === "number" ? value.attemptedAt : undefined,
142
+ reason: typeof value.reason === "string" ? value.reason : undefined,
143
+ }
144
+ }
145
+
102
146
  function asStringArray(value: unknown): string[] | undefined {
103
147
  if (!Array.isArray(value)) return undefined
104
148
  return value.filter((item): item is string => typeof item === "string")
@@ -321,6 +365,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
321
365
  endTime: existing?.endTime,
322
366
  success: existing?.success ?? false,
323
367
  findingsCount: existing?.findingsCount ?? 0,
368
+ findingCounts: existing?.findingCounts,
324
369
  })
325
370
  continue
326
371
  }
@@ -340,6 +385,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
340
385
  endTime: event.timestamp,
341
386
  success: resolveToolSuccess(payload),
342
387
  findingsCount: resolveFindingsCount(payload),
388
+ findingCounts: asFindingCounts(payload.findingCounts),
343
389
  run_id: event.run_id,
344
390
  schema_version: event.schema_version,
345
391
  })
@@ -408,6 +454,8 @@ export function projectReportInput(
408
454
  asFuzzCounterexamples,
409
455
  )
410
456
  const coverageReport = extractLatestFromPayload(events, "coverageReport", asCoverageReport)
457
+ const coverageAttempt = extractLatestFromPayload(events, "coverageAttempt", asCoverageAttempt)
458
+ const findingCounts = extractLatestFromPayload(events, "findingCounts", asFindingCounts)
411
459
  const gasHotspots = extractLatestFromPayload(events, "gasHotspots", asGasHotspots)
412
460
  const proxyContracts = extractLatestFromPayload(events, "proxyContracts", asProxyContracts)
413
461
  const patternVersion = extractLatestFromPayload(events, "patternVersion", asString)
@@ -424,10 +472,12 @@ export function projectReportInput(
424
472
  projectDir,
425
473
  findings,
426
474
  toolsExecuted,
475
+ findingCounts,
427
476
  scope,
428
477
  soloditResults,
429
478
  fuzzCounterexamples,
430
479
  coverageReport,
480
+ coverageAttempt,
431
481
  gasHotspots,
432
482
  proxyContracts,
433
483
  patternVersion,