solidity-argus 0.5.9 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/AGENTS.md +9 -2
  2. package/README.md +28 -21
  3. package/package.json +2 -2
  4. package/skills/INVENTORY.md +14 -1
  5. package/skills/README.md +4 -2
  6. package/skills/references/attack-vector-deck/SKILL.md +62 -0
  7. package/skills/specialist-profiles/access-control-specialist/SKILL.md +31 -0
  8. package/skills/specialist-profiles/economic-security/SKILL.md +31 -0
  9. package/skills/specialist-profiles/execution-trace/SKILL.md +31 -0
  10. package/skills/specialist-profiles/first-principles/SKILL.md +31 -0
  11. package/skills/specialist-profiles/invariant/SKILL.md +31 -0
  12. package/skills/specialist-profiles/math-precision/SKILL.md +31 -0
  13. package/skills/specialist-profiles/periphery/SKILL.md +31 -0
  14. package/skills/specialist-profiles/vector-scan/SKILL.md +28 -0
  15. package/src/agents/argus-prompt.ts +48 -7
  16. package/src/agents/audit-specialist-prompt.ts +76 -0
  17. package/src/agents/pythia-prompt.ts +1 -1
  18. package/src/agents/scribe-prompt.ts +5 -0
  19. package/src/agents/sentinel-prompt.ts +5 -0
  20. package/src/agents/themis-prompt.ts +3 -0
  21. package/src/config/schema.ts +2 -0
  22. package/src/constants/defaults.ts +1 -0
  23. package/src/create-hooks.ts +18 -8
  24. package/src/create-tools.ts +2 -0
  25. package/src/features/audit-enforcer/audit-enforcer.ts +2 -16
  26. package/src/features/persistent-state/run-finalizer.ts +94 -1
  27. package/src/hooks/config-handler.ts +23 -0
  28. package/src/hooks/system-prompt-hook.ts +56 -2
  29. package/src/hooks/tool-tracking-hook.ts +75 -6
  30. package/src/shared/agent-names.ts +1 -0
  31. package/src/shared/key-tools.ts +9 -2
  32. package/src/state/adapters.ts +1 -1
  33. package/src/state/projectors.ts +50 -0
  34. package/src/state/schemas.ts +86 -1
  35. package/src/state/types.ts +24 -1
  36. package/src/tools/forge-coverage-tool.ts +31 -1
  37. package/src/tools/record-finding-tool.ts +7 -1
  38. package/src/tools/report-generator-tool.ts +28 -2
  39. package/src/tools/slither-tool.ts +6 -22
  40. package/src/tools/themis-disposition-tool.ts +46 -0
@@ -20,6 +20,7 @@ import type {
20
20
  ArgusAgentName,
21
21
  AuditState,
22
22
  Finding,
23
+ FindingCounts,
23
24
  FindingSeverity,
24
25
  FuzzCounterexample,
25
26
  SoloditResult,
@@ -426,6 +427,21 @@ function processFuzzResult(parsed: Record<string, unknown>, state: AuditState):
426
427
  }
427
428
  }
428
429
 
430
+ function countReadFindingsResult(parsed: Record<string, unknown>): number {
431
+ const summary = toRecord(parsed.summary)
432
+ if (
433
+ summary &&
434
+ typeof summary.findingsCount === "number" &&
435
+ Number.isFinite(summary.findingsCount)
436
+ ) {
437
+ return Math.max(0, summary.findingsCount)
438
+ }
439
+
440
+ const reportInput = toRecord(parsed.reportInput)
441
+ const findings = reportInput?.findings
442
+ return Array.isArray(findings) ? findings.length : 0
443
+ }
444
+
429
445
  function processSoloditResult(parsed: Record<string, unknown>, state: AuditState): void {
430
446
  const query = typeof parsed.query === "string" ? parsed.query : ""
431
447
  const results = Array.isArray(parsed.results) ? parsed.results : []
@@ -450,14 +466,38 @@ function processSoloditResult(parsed: Record<string, unknown>, state: AuditState
450
466
  })
451
467
  }
452
468
 
453
- function recordToolExecution(state: AuditState, toolName: string, findingsCount: number): void {
469
+ function buildFindingCounts(state: AuditState, findingsCount: number): FindingCounts {
470
+ return {
471
+ rawObservations: Math.max(0, findingsCount),
472
+ recordedFindings: state.findings.length,
473
+ }
474
+ }
475
+
476
+ function readErrorReason(record: Record<string, unknown>): string | undefined {
477
+ if (typeof record.error === "string" && record.error.trim().length > 0) return record.error
478
+ const errorRecord = toRecord(record.error)
479
+ if (typeof errorRecord?.message === "string" && errorRecord.message.trim().length > 0) {
480
+ return errorRecord.message
481
+ }
482
+ if (typeof record.stderr === "string" && record.stderr.trim().length > 0) return record.stderr
483
+ return undefined
484
+ }
485
+
486
+ function recordToolExecution(
487
+ state: AuditState,
488
+ toolName: string,
489
+ findingsCount: number,
490
+ success: boolean,
491
+ findingCounts?: FindingCounts,
492
+ ): void {
454
493
  const now = Date.now()
455
494
  state.toolsExecuted.push({
456
495
  tool: toolName,
457
496
  startTime: now,
458
497
  endTime: now,
459
- success: true,
498
+ success,
460
499
  findingsCount,
500
+ findingCounts,
461
501
  })
462
502
  }
463
503
 
@@ -601,7 +641,7 @@ export function createToolTrackingHook(
601
641
  }
602
642
 
603
643
  if (resolved) {
604
- recordToolExecution(resolved.state, "task", 0)
644
+ recordToolExecution(resolved.state, "task", 0, true, buildFindingCounts(resolved.state, 0))
605
645
  onStateChanged?.({ tool: "task", findingsCount: 0, sessionId: input.sessionID })
606
646
  }
607
647
 
@@ -709,6 +749,7 @@ export function createToolTrackingHook(
709
749
  let findingsCount = 0
710
750
  let completedSuccess = false
711
751
  let completionError: string | undefined
752
+ let completedRecord: Record<string, unknown> | null = null
712
753
 
713
754
  try {
714
755
  if (input.tool === "argus_skill_load") {
@@ -763,6 +804,7 @@ export function createToolTrackingHook(
763
804
  }
764
805
  return
765
806
  }
807
+ completedRecord = record
766
808
 
767
809
  switch (input.tool) {
768
810
  case "argus_slither_analyze": {
@@ -812,6 +854,9 @@ export function createToolTrackingHook(
812
854
  projectDir,
813
855
  )
814
856
  break
857
+ case "argus_read_findings":
858
+ findingsCount = countReadFindingsResult(record)
859
+ break
815
860
  case "argus_analyze_contract": {
816
861
  processContractAnalyzerResult(record, auditState)
817
862
  const filePath = (input.args as Record<string, unknown>)?.file_path as string
@@ -855,9 +900,16 @@ export function createToolTrackingHook(
855
900
  break
856
901
  }
857
902
  case "argus_forge_coverage": {
903
+ const now = Date.now()
858
904
  const reportObj = toRecord(record.report)
859
905
  const files = reportObj?.files
860
- if (Array.isArray(files)) {
906
+ if (record.success === false) {
907
+ auditState.coverageAttempt = {
908
+ status: "failed",
909
+ attemptedAt: now,
910
+ reason: readErrorReason(record),
911
+ }
912
+ } else if (Array.isArray(files)) {
861
913
  auditState.coverageReport = {
862
914
  files: files
863
915
  .filter((f): f is Record<string, unknown> => !!f && typeof f === "object")
@@ -869,6 +921,13 @@ export function createToolTrackingHook(
869
921
  functionsPct: typeof f.functionsPct === "number" ? f.functionsPct : 0,
870
922
  })),
871
923
  }
924
+ auditState.coverageAttempt = { status: "run", attemptedAt: now }
925
+ } else {
926
+ auditState.coverageAttempt = {
927
+ status: "failed",
928
+ attemptedAt: now,
929
+ reason: "coverage report was missing or invalid",
930
+ }
872
931
  }
873
932
  break
874
933
  }
@@ -943,10 +1002,12 @@ export function createToolTrackingHook(
943
1002
  }
944
1003
  }
945
1004
 
946
- completedSuccess = true
1005
+ completedSuccess = record.success !== false
947
1006
  }
948
1007
 
949
- recordToolExecution(auditState, input.tool, findingsCount)
1008
+ const findingCounts = buildFindingCounts(auditState, findingsCount)
1009
+ auditState.findingCounts = findingCounts
1010
+ recordToolExecution(auditState, input.tool, findingsCount, completedSuccess, findingCounts)
950
1011
 
951
1012
  const nextPhase = inferPhaseAdvancement(auditState, input.tool)
952
1013
  if (nextPhase) {
@@ -983,6 +1044,8 @@ export function createToolTrackingHook(
983
1044
  break
984
1045
  case "argus_forge_coverage":
985
1046
  if (auditState.coverageReport) enrichment.coverageReport = auditState.coverageReport
1047
+ if (auditState.coverageAttempt)
1048
+ enrichment.coverageAttempt = auditState.coverageAttempt
986
1049
  break
987
1050
  case "argus_gas_analysis":
988
1051
  if (auditState.gasHotspots) enrichment.gasHotspots = auditState.gasHotspots
@@ -996,6 +1059,11 @@ export function createToolTrackingHook(
996
1059
  case "argus_check_patterns":
997
1060
  if (auditState.patternVersion) enrichment.patternVersion = auditState.patternVersion
998
1061
  break
1062
+ case "argus_themis_disposition":
1063
+ if (completedRecord?.themisDisposition) {
1064
+ enrichment.themisDisposition = completedRecord.themisDisposition
1065
+ }
1066
+ break
999
1067
  }
1000
1068
  }
1001
1069
  await emitToSink(
@@ -1003,6 +1071,7 @@ export function createToolTrackingHook(
1003
1071
  buildEvent("tool.completed", runId, sessionId, toolCallId, {
1004
1072
  tool: input.tool,
1005
1073
  findingsCount,
1074
+ findingCounts: completedSuccess ? auditState.findingCounts : undefined,
1006
1075
  success: completedSuccess,
1007
1076
  ...(completionError ? { error: completionError } : {}),
1008
1077
  ...enrichment,
@@ -2,6 +2,7 @@ export const ARGUS_ORCHESTRATOR: ReadonlySet<string> = new Set(["argus"])
2
2
  export const ARGUS_SUBAGENTS: ReadonlySet<string> = new Set([
3
3
  "sentinel",
4
4
  "pythia",
5
+ "audit-specialist",
5
6
  "scribe",
6
7
  "themis",
7
8
  ])
@@ -23,15 +23,22 @@ export const UNAVAILABLE_TO_KEY_TOOL: Record<string, string> = {
23
23
  solodit: "solodit",
24
24
  }
25
25
 
26
+ type ToolCoverageRecord = {
27
+ tool: string
28
+ success?: boolean
29
+ }
30
+
26
31
  /**
27
32
  * Compute which key tools have not yet been executed, excusing any that are
28
33
  * declared unavailable.
29
34
  */
30
35
  export function computeMissingKeyTools(
31
- toolsExecuted: Array<{ tool: string }>,
36
+ toolsExecuted: ToolCoverageRecord[],
32
37
  unavailableTools?: string[],
33
38
  ): string[] {
34
- const executedShortNames = new Set(toolsExecuted.map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool))
39
+ const executedShortNames = new Set(
40
+ toolsExecuted.filter((t) => t.success === true).map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
41
+ )
35
42
  const excused = new Set(
36
43
  (unavailableTools ?? []).map((t) => UNAVAILABLE_TO_KEY_TOOL[t]).filter(Boolean),
37
44
  )
@@ -246,9 +246,9 @@ export function normalizeToCanonicalFinding(
246
246
  : "manual"
247
247
 
248
248
  const reportedByAgentRaw =
249
+ options.reportedByAgent ??
249
250
  (typeof input.reported_by_agent === "string" ? input.reported_by_agent : undefined) ??
250
251
  (typeof input.reportedByAgent === "string" ? input.reportedByAgent : undefined) ??
251
- options.reportedByAgent ??
252
252
  "unknown"
253
253
  const reportedByAgent: ArgusAgentName = VALID_AGENTS.has(reportedByAgentRaw as ArgusAgentName)
254
254
  ? (reportedByAgentRaw as ArgusAgentName)
@@ -12,7 +12,9 @@ import {
12
12
  import type {
13
13
  AuditPhase,
14
14
  AuditState,
15
+ CoverageAttemptState,
15
16
  Finding,
17
+ FindingCounts,
16
18
  FuzzCounterexample,
17
19
  SoloditResult,
18
20
  ToolExecution,
@@ -99,6 +101,48 @@ function resolveToolSuccess(payload: Record<string, unknown>): boolean {
99
101
  return payload.success !== false
100
102
  }
101
103
 
104
+ const FINDING_COUNT_FIELDS = [
105
+ "rawObservations",
106
+ "recordedFindings",
107
+ "dedupedFindings",
108
+ "actionableFindings",
109
+ "nonActionableFindings",
110
+ ] as const
111
+
112
+ function asFindingCounts(value: unknown): FindingCounts | undefined {
113
+ if (!isRecord(value)) return undefined
114
+ const counts: FindingCounts = {}
115
+ for (const field of FINDING_COUNT_FIELDS) {
116
+ const count = value[field]
117
+ if (
118
+ typeof count === "number" &&
119
+ Number.isFinite(count) &&
120
+ Number.isInteger(count) &&
121
+ count >= 0
122
+ ) {
123
+ counts[field] = count
124
+ }
125
+ }
126
+ return Object.keys(counts).length > 0 ? counts : undefined
127
+ }
128
+
129
+ function asCoverageAttempt(value: unknown): CoverageAttemptState | undefined {
130
+ if (!isRecord(value)) return undefined
131
+ if (
132
+ value.status !== "pending" &&
133
+ value.status !== "run" &&
134
+ value.status !== "skipped" &&
135
+ value.status !== "failed"
136
+ ) {
137
+ return undefined
138
+ }
139
+ return {
140
+ status: value.status,
141
+ attemptedAt: typeof value.attemptedAt === "number" ? value.attemptedAt : undefined,
142
+ reason: typeof value.reason === "string" ? value.reason : undefined,
143
+ }
144
+ }
145
+
102
146
  function asStringArray(value: unknown): string[] | undefined {
103
147
  if (!Array.isArray(value)) return undefined
104
148
  return value.filter((item): item is string => typeof item === "string")
@@ -321,6 +365,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
321
365
  endTime: existing?.endTime,
322
366
  success: existing?.success ?? false,
323
367
  findingsCount: existing?.findingsCount ?? 0,
368
+ findingCounts: existing?.findingCounts,
324
369
  })
325
370
  continue
326
371
  }
@@ -340,6 +385,7 @@ export function projectToolExecutions(events: AuditEvent[]): CanonicalToolExecut
340
385
  endTime: event.timestamp,
341
386
  success: resolveToolSuccess(payload),
342
387
  findingsCount: resolveFindingsCount(payload),
388
+ findingCounts: asFindingCounts(payload.findingCounts),
343
389
  run_id: event.run_id,
344
390
  schema_version: event.schema_version,
345
391
  })
@@ -408,6 +454,8 @@ export function projectReportInput(
408
454
  asFuzzCounterexamples,
409
455
  )
410
456
  const coverageReport = extractLatestFromPayload(events, "coverageReport", asCoverageReport)
457
+ const coverageAttempt = extractLatestFromPayload(events, "coverageAttempt", asCoverageAttempt)
458
+ const findingCounts = extractLatestFromPayload(events, "findingCounts", asFindingCounts)
411
459
  const gasHotspots = extractLatestFromPayload(events, "gasHotspots", asGasHotspots)
412
460
  const proxyContracts = extractLatestFromPayload(events, "proxyContracts", asProxyContracts)
413
461
  const patternVersion = extractLatestFromPayload(events, "patternVersion", asString)
@@ -424,10 +472,12 @@ export function projectReportInput(
424
472
  projectDir,
425
473
  findings,
426
474
  toolsExecuted,
475
+ findingCounts,
427
476
  scope,
428
477
  soloditResults,
429
478
  fuzzCounterexamples,
430
479
  coverageReport,
480
+ coverageAttempt,
431
481
  gasHotspots,
432
482
  proxyContracts,
433
483
  patternVersion,
@@ -8,7 +8,9 @@ import {
8
8
  import type {
9
9
  ArgusAgentName,
10
10
  AuditPhase,
11
+ CoverageAttemptState,
11
12
  Finding,
13
+ FindingCounts,
12
14
  FindingSeverity,
13
15
  FuzzCounterexample,
14
16
  SoloditResult,
@@ -111,6 +113,7 @@ export interface ReportInput {
111
113
  projectDir: string
112
114
  findings: CanonicalFinding[]
113
115
  toolsExecuted: CanonicalToolExecution[]
116
+ findingCounts?: FindingCounts
114
117
  scope: string[]
115
118
  soloditResults?: SoloditResult[]
116
119
  fuzzCounterexamples?: FuzzCounterexample[]
@@ -120,6 +123,82 @@ export interface ReportInput {
120
123
  patternVersion?: string
121
124
  skillsLoaded?: string[]
122
125
  unavailableTools?: string[]
126
+ coverageAttempt?: CoverageAttemptState
127
+ }
128
+
129
+ const FINDING_COUNT_FIELDS = [
130
+ "rawObservations",
131
+ "recordedFindings",
132
+ "dedupedFindings",
133
+ "actionableFindings",
134
+ "nonActionableFindings",
135
+ ] as const
136
+
137
+ const COVERAGE_ATTEMPT_STATUSES = new Set(["pending", "run", "skipped", "failed"])
138
+
139
+ function pushFindingCountsErrors(errors: ValidationError[], raw: unknown, prefix: string): void {
140
+ if (raw == null) return
141
+ if (!isRecord(raw)) {
142
+ errors.push({
143
+ field: prefix,
144
+ code: "invalid",
145
+ message: `${prefix} must be an object when provided`,
146
+ })
147
+ return
148
+ }
149
+
150
+ for (const field of FINDING_COUNT_FIELDS) {
151
+ const value = raw[field]
152
+ if (value == null) continue
153
+ if (typeof value !== "number" || !Number.isInteger(value) || value < 0) {
154
+ errors.push({
155
+ field: `${prefix}.${field}`,
156
+ code: "invalid",
157
+ message: `${prefix}.${field} must be a non-negative integer when provided`,
158
+ })
159
+ }
160
+ }
161
+ }
162
+
163
+ function pushCoverageAttemptErrors(errors: ValidationError[], raw: unknown): void {
164
+ if (raw == null) return
165
+ if (!isRecord(raw)) {
166
+ errors.push({
167
+ field: "coverageAttempt",
168
+ code: "invalid",
169
+ message: "coverageAttempt must be an object when provided",
170
+ })
171
+ return
172
+ }
173
+
174
+ if (typeof raw.status !== "string" || !COVERAGE_ATTEMPT_STATUSES.has(raw.status)) {
175
+ errors.push({
176
+ field: "coverageAttempt.status",
177
+ code: "enum",
178
+ message: "coverageAttempt.status must be one of: pending, run, skipped, failed",
179
+ })
180
+ }
181
+
182
+ if (
183
+ raw.attemptedAt != null &&
184
+ (typeof raw.attemptedAt !== "number" ||
185
+ !Number.isInteger(raw.attemptedAt) ||
186
+ raw.attemptedAt <= 0)
187
+ ) {
188
+ errors.push({
189
+ field: "coverageAttempt.attemptedAt",
190
+ code: "invalid",
191
+ message: "coverageAttempt.attemptedAt must be a positive integer when provided",
192
+ })
193
+ }
194
+
195
+ if (raw.reason != null && (typeof raw.reason !== "string" || raw.reason.trim().length === 0)) {
196
+ errors.push({
197
+ field: "coverageAttempt.reason",
198
+ code: "invalid",
199
+ message: "coverageAttempt.reason must be a non-empty string when provided",
200
+ })
201
+ }
123
202
  }
124
203
 
125
204
  function pushRequiredRootStringError(
@@ -253,7 +332,8 @@ export function validateCanonicalFinding(raw: unknown): ValidationResult<Canonic
253
332
  errors.push({
254
333
  field: "reported_by_agent",
255
334
  code: "enum",
256
- message: "reported_by_agent must be one of: argus, sentinel, pythia, scribe, unknown",
335
+ message:
336
+ "reported_by_agent must be one of: argus, sentinel, pythia, audit-specialist, scribe, unknown",
257
337
  })
258
338
  }
259
339
 
@@ -346,6 +426,8 @@ export function validateCanonicalToolExecution(
346
426
  })
347
427
  }
348
428
 
429
+ pushFindingCountsErrors(errors, raw.findingCounts, "findingCounts")
430
+
349
431
  if (typeof raw.run_id !== "string" || raw.run_id.trim().length === 0) {
350
432
  errors.push({
351
433
  field: "run_id",
@@ -400,6 +482,9 @@ export function validateReportInput(raw: unknown): ValidationResult<ReportInput>
400
482
  })
401
483
  }
402
484
 
485
+ pushFindingCountsErrors(errors, raw.findingCounts, "findingCounts")
486
+ pushCoverageAttemptErrors(errors, raw.coverageAttempt)
487
+
403
488
  if (!Array.isArray(raw.scope) || !raw.scope.every((item) => typeof item === "string")) {
404
489
  errors.push({
405
490
  field: "scope",
@@ -1,5 +1,11 @@
1
1
  export type FindingSeverity = "Critical" | "High" | "Medium" | "Low" | "Informational"
2
- export type ArgusAgentName = "argus" | "sentinel" | "pythia" | "scribe" | "unknown"
2
+ export type ArgusAgentName =
3
+ | "argus"
4
+ | "sentinel"
5
+ | "pythia"
6
+ | "audit-specialist"
7
+ | "scribe"
8
+ | "unknown"
3
9
  export type AuditPhase =
4
10
  | "reconnaissance"
5
11
  | "scanning"
@@ -88,6 +94,21 @@ export interface ToolExecution {
88
94
  endTime?: number
89
95
  success: boolean
90
96
  findingsCount: number
97
+ findingCounts?: FindingCounts
98
+ }
99
+
100
+ export interface FindingCounts {
101
+ rawObservations?: number
102
+ recordedFindings?: number
103
+ dedupedFindings?: number
104
+ actionableFindings?: number
105
+ nonActionableFindings?: number
106
+ }
107
+
108
+ export interface CoverageAttemptState {
109
+ status: "pending" | "run" | "skipped" | "failed"
110
+ attemptedAt?: number
111
+ reason?: string
91
112
  }
92
113
 
93
114
  export interface AuditState {
@@ -105,7 +126,9 @@ export interface AuditState {
105
126
  skillsLoaded?: string[]
106
127
  unavailableTools?: string[]
107
128
  reportGenerated?: boolean
129
+ findingCounts?: FindingCounts
108
130
  knowledgeSynced?: { success: boolean; timestamp: number }
131
+ coverageAttempt?: CoverageAttemptState
109
132
  coverageReport?: {
110
133
  files: Array<{
111
134
  path: string
@@ -5,10 +5,14 @@ import { resolveProjectDir } from "../shared/project-utils"
5
5
 
6
6
  type ForgeCoverageArgs = {
7
7
  target?: string
8
+ match_path?: string
9
+ ir_minimum?: boolean
8
10
  }
9
11
 
10
12
  type NormalizedForgeCoverageArgs = {
11
13
  target: string
14
+ match_path?: string
15
+ ir_minimum: boolean
12
16
  }
13
17
 
14
18
  type ForgeCoverageFile = {
@@ -53,9 +57,22 @@ const EMPTY_SUMMARY: ForgeCoverageSummary = {
53
57
  function normalizeArgs(args: ForgeCoverageArgs, context: ToolContext): NormalizedForgeCoverageArgs {
54
58
  return {
55
59
  target: args.target ?? resolveProjectDir(context),
60
+ match_path: args.match_path,
61
+ ir_minimum: args.ir_minimum ?? false,
56
62
  }
57
63
  }
58
64
 
65
+ function buildCoverageCommand(args: NormalizedForgeCoverageArgs, forceIrMinimum = false): string[] {
66
+ const command = ["forge", "coverage", "--report", "summary"]
67
+ if (args.match_path) command.push("--match-path", args.match_path)
68
+ if (args.ir_minimum || forceIrMinimum) command.push("--ir-minimum")
69
+ return command
70
+ }
71
+
72
+ function isStackTooDeep(stderr: string): boolean {
73
+ return /stack too deep/i.test(stderr)
74
+ }
75
+
59
76
  function parsePercent(input: string): number {
60
77
  const match = input.match(/(\d+(?:\.\d+)?)%/)
61
78
  if (!match?.[1]) {
@@ -156,11 +173,22 @@ export async function executeForgeCoverage(
156
173
  })
157
174
 
158
175
  try {
159
- const runResult = await runCommand(["forge", "coverage"], {
176
+ let runResult = await runCommand(buildCoverageCommand(normalizedArgs), {
160
177
  signal: context.abort,
161
178
  cwd: normalizedArgs.target,
162
179
  })
163
180
 
181
+ if (
182
+ runResult.exitCode !== 0 &&
183
+ !normalizedArgs.ir_minimum &&
184
+ isStackTooDeep(runResult.stderr)
185
+ ) {
186
+ runResult = await runCommand(buildCoverageCommand(normalizedArgs, true), {
187
+ signal: context.abort,
188
+ cwd: normalizedArgs.target,
189
+ })
190
+ }
191
+
164
192
  if (runResult.exitCode !== 0) {
165
193
  return fail(
166
194
  runResult.stderr.trim() || `forge coverage exited with code ${runResult.exitCode}`,
@@ -193,6 +221,8 @@ export const forgeCoverageTool = tool({
193
221
  "Run forge coverage analysis and return structured per-file coverage metrics (lines, statements, branches, functions).",
194
222
  args: {
195
223
  target: tool.schema.string().optional(),
224
+ match_path: tool.schema.string().optional(),
225
+ ir_minimum: tool.schema.boolean().optional(),
196
226
  },
197
227
  async execute(args, context) {
198
228
  const result = await executeForgeCoverage(args, context)
@@ -63,7 +63,13 @@ function parseFindingObject(raw: string, label: "finding" | "findings"): ParseRe
63
63
  }
64
64
 
65
65
  function normalizeAgent(value: string): ArgusAgentName {
66
- if (value === "argus" || value === "sentinel" || value === "pythia" || value === "scribe") {
66
+ if (
67
+ value === "argus" ||
68
+ value === "sentinel" ||
69
+ value === "pythia" ||
70
+ value === "audit-specialist" ||
71
+ value === "scribe"
72
+ ) {
67
73
  return value
68
74
  }
69
75
 
@@ -309,6 +309,7 @@ const VALID_AGENT_VALUES = new Set<ArgusAgentName>([
309
309
  "argus",
310
310
  "sentinel",
311
311
  "pythia",
312
+ "audit-specialist",
312
313
  "scribe",
313
314
  "unknown",
314
315
  ])
@@ -746,6 +747,22 @@ function formatLocation(finding: Finding): string {
746
747
  return `${finding.file}:${finding.lines[0]}-${finding.lines[1]}`
747
748
  }
748
749
 
750
+ function sourceExcerpt(projectDir: string, finding: Finding): string | null {
751
+ if (!finding.file || !Array.isArray(finding.lines) || finding.lines.length < 2) return null
752
+ const start = finding.lines[0]
753
+ const end = finding.lines[1]
754
+ if (!Number.isInteger(start) || !Number.isInteger(end) || start <= 0 || end < start) {
755
+ return null
756
+ }
757
+ const absolutePath = path.isAbsolute(finding.file)
758
+ ? finding.file
759
+ : path.join(projectDir, finding.file)
760
+ if (!existsSync(absolutePath) || !statSync(absolutePath).isFile()) return null
761
+ const contents = readFileSync(absolutePath, "utf-8").split(/\r?\n/)
762
+ const excerpt = contents.slice(start - 1, end).join("\n")
763
+ return excerpt.trim().length > 0 ? excerpt : null
764
+ }
765
+
749
766
  function shouldIncludeFinding(finding: Finding, threshold: SeverityThreshold): boolean {
750
767
  return FINDING_WEIGHT[finding.severity] >= THRESHOLD_WEIGHT[threshold]
751
768
  }
@@ -1005,7 +1022,7 @@ function buildRecommendations(counts: FindingsCount): string[] {
1005
1022
  return items
1006
1023
  }
1007
1024
 
1008
- function buildFindingsSection(findings: Finding[]): string {
1025
+ function buildFindingsSection(findings: Finding[], projectDir: string): string {
1009
1026
  if (findings.length === 0) {
1010
1027
  return "## Findings\nNo findings meet the configured severity threshold."
1011
1028
  }
@@ -1031,6 +1048,15 @@ function buildFindingsSection(findings: Finding[]): string {
1031
1048
  lines.push(`**Severity**: ${finding.severity}`)
1032
1049
  lines.push(`**Confidence**: ${finding.confidence}`)
1033
1050
  lines.push(`**Location**: ${formatLocation(finding)}`)
1051
+ const excerpt = sourceExcerpt(projectDir, finding)
1052
+ if (excerpt) {
1053
+ lines.push("")
1054
+ lines.push("**Source Excerpt**:")
1055
+ lines.push("")
1056
+ lines.push("```solidity")
1057
+ lines.push(excerpt)
1058
+ lines.push("```")
1059
+ }
1034
1060
  lines.push("")
1035
1061
  lines.push(`**Description**: ${finding.description}`)
1036
1062
  lines.push("")
@@ -1387,7 +1413,7 @@ export async function executeReportGeneration(
1387
1413
  "Approach: Findings are normalized, deterministically ordered by severity/file/line, and validated against report quality gates before emission.",
1388
1414
  )
1389
1415
 
1390
- sections.push(buildFindingsSection(findings))
1416
+ sections.push(buildFindingsSection(findings, reportInput.projectDir))
1391
1417
 
1392
1418
  sections.push("## Recommendations")
1393
1419
  for (const item of buildRecommendations(counts)) {
@@ -470,26 +470,6 @@ export async function executeSlitherAnalyze(
470
470
  }
471
471
  }
472
472
 
473
- if (args.via_ir) {
474
- const fallbackResult = await flattenFallback(args, context, {
475
- ...getDefaultFlattenDeps(),
476
- runCommand,
477
- cwd: projectDir,
478
- })
479
- if (fallbackResult) return fallbackResult
480
- return {
481
- success: false,
482
- findingsCount: 0,
483
- findings: [],
484
- executionTime: Date.now() - startedAt,
485
- errors: [
486
- "via_ir enabled — flatten fallback failed. Ensure forge and solc-select are installed.",
487
- ],
488
- error:
489
- "Project uses via_ir which is incompatible with Slither direct analysis. Flatten fallback also failed.",
490
- }
491
- }
492
-
493
473
  const command = buildCommand(args)
494
474
 
495
475
  try {
@@ -508,7 +488,7 @@ export async function executeSlitherAnalyze(
508
488
  payload = JSON.parse(runResult.stdout) as SlitherPayload
509
489
  } catch (error) {
510
490
  const message = error instanceof Error ? error.message : "Unknown parse error"
511
- if (shouldTryFlattenFallback(errors, runResult.stderr)) {
491
+ if (args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr)) {
512
492
  const fallbackResult = await flattenFallback(args, context, {
513
493
  ...getDefaultFlattenDeps(),
514
494
  runCommand,
@@ -533,7 +513,11 @@ export async function executeSlitherAnalyze(
533
513
  const findings = parseFindings(payload)
534
514
  const success = findings.length > 0 || (runResult.exitCode === 0 && payload.success !== false)
535
515
 
536
- if (!success && findings.length === 0 && shouldTryFlattenFallback(errors, runResult.stderr)) {
516
+ if (
517
+ !success &&
518
+ findings.length === 0 &&
519
+ (args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr))
520
+ ) {
537
521
  const fallbackResult = await flattenFallback(args, context, {
538
522
  ...getDefaultFlattenDeps(),
539
523
  runCommand,