npm - solidity-argus - Versions diffs - 0.5.8 → 0.5.10 - Mend

solidity-argus 0.5.8 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/AGENTS.md +4 -4
package/README.md +7 -6
package/package.json +2 -2
package/src/agents/argus-prompt.ts +13 -8
package/src/agents/scribe-prompt.ts +2 -1
package/src/agents/themis-prompt.ts +1 -0
package/src/cli/commands/doctor.ts +9 -15
package/src/constants/defaults.ts +3 -3
package/src/create-hooks.ts +9 -7
package/src/create-tools.ts +2 -0
package/src/features/audit-enforcer/audit-enforcer.ts +2 -16
package/src/features/persistent-state/run-finalizer.ts +76 -1
package/src/hooks/tool-tracking-hook.ts +25 -0
package/src/shared/key-tools.ts +9 -2
package/src/state/adapters.ts +35 -0
package/src/tools/forge-coverage-tool.ts +31 -1
package/src/tools/persist-deduped-tool.ts +1 -1
package/src/tools/report-generator-tool.ts +58 -5
package/src/tools/slither-tool.ts +6 -22
package/src/tools/themis-disposition-tool.ts +46 -0

package/AGENTS.md CHANGED Viewed

@@ -13,27 +13,27 @@ CLI: `argus doctor`, `argus init`, `argus install`.
 **Role**: Primary security audit orchestrator
 **Description**: Argus Panoptes, the All-Seeing Guardian. Coordinates full Solidity security audits by dispatching Sentinel (analysis), Pythia (research), Scribe (reporting), and Themis (validation). Follows a rigorous 7-step methodology: Reconnaissance, Automated Scanning, Manual Review, Attack Surface Mapping, Vulnerability Research, Testing & Verification, and Reporting.
 **Model**: anthropic/claude-opus-4-7
-**Tools**: 14 orchestrator-accessible argus_* tools (argus_slither_analyze, argus_analyze_contract, argus_check_patterns, argus_proxy_detection, argus_solodit_search, argus_forge_test, argus_gas_analysis, argus_forge_fuzz, argus_forge_coverage, argus_skill_load, argus_generate_report, argus_record_finding, argus_read_findings, argus_sync_knowledge). `argus_persist_deduped` is reserved for Scribe.
+**Tools**: 15 orchestrator-accessible argus_* tools (argus_slither_analyze, argus_analyze_contract, argus_check_patterns, argus_proxy_detection, argus_solodit_search, argus_forge_test, argus_gas_analysis, argus_forge_fuzz, argus_forge_coverage, argus_skill_load, argus_generate_report, argus_record_finding, argus_read_findings, argus_sync_knowledge, argus_themis_disposition). `argus_persist_deduped` is reserved for Scribe.
 ## sentinel
 **Role**: Static analysis and testing specialist
 **Description**: Finds vulnerabilities through Slither static analysis, Foundry testing, fuzzing, and pattern matching. The tactical executor — runs tools, writes PoC tests, and verifies findings. Dispatched by Argus during Automated Scanning and Testing & Verification phases.
-**Model**: anthropic/claude-sonnet-4-7
+**Model**: anthropic/claude-sonnet-4-6
 **Tools**: argus_slither_analyze, argus_forge_test, argus_gas_analysis, argus_forge_fuzz, argus_forge_coverage, argus_analyze_contract, argus_check_patterns, argus_proxy_detection, argus_record_finding, skill
 ## pythia
 **Role**: Vulnerability researcher
 **Description**: Consults Solodit, SCVD, and the knowledge base to find historical precedents and known attack vectors. Searches 7,769+ real-world audit findings and 51 curated vulnerability pattern files. Dispatched by Argus during Vulnerability Research phase.
-**Model**: anthropic/claude-sonnet-4-7
+**Model**: anthropic/claude-sonnet-4-6
 **Tools**: argus_solodit_search, argus_check_patterns, argus_record_finding, skill
 ## scribe
 **Role**: Audit report writer
 **Description**: Transforms raw findings into professional markdown audit reports. Produces structured output with severity classifications (Critical/High/Medium/Low/Informational), impact assessments, proof-of-concept steps, and actionable recommendations. Dispatched by Argus only after all analysis is complete.
-**Model**: anthropic/claude-sonnet-4-7
+**Model**: anthropic/claude-sonnet-4-6
 **Tools**: argus_read_findings, argus_persist_deduped, argus_generate_report, skill
 ## themis

package/README.md CHANGED Viewed

@@ -66,9 +66,9 @@ Argus will automatically:
 | Agent | Role | Model |
 |-------|------|-------|
 | `@argus` | Orchestrator — coordinates the full audit | claude-opus-4-7 |
-| `@sentinel` | Static analysis & testing specialist | claude-sonnet-4-7 |
-| `@pythia` | Vulnerability researcher | claude-sonnet-4-7 |
-| `@scribe` | Audit report writer | claude-sonnet-4-7 |
+| `@sentinel` | Static analysis & testing specialist | claude-sonnet-4-6 |
+| `@pythia` | Vulnerability researcher | claude-sonnet-4-6 |
+| `@scribe` | Audit report writer | claude-sonnet-4-6 |
 | `@themis` | Independent audit quality gate | gpt-5.5 |
 ### @argus — The Orchestrator
@@ -106,6 +106,7 @@ Validates the completed audit by comparing raw findings, deduped findings, and t
 | `argus_read_findings` | Scribe, Themis | Reads persisted findings and audit artifacts for report generation and validation |
 | `argus_persist_deduped` | Scribe | Persists deduplicated findings before final report generation and validation |
 | `argus_generate_report` | Scribe | Generates the final structured audit report in professional markdown format |
+| `argus_themis_disposition` | Argus | Records Argus' resolved disposition for Themis validation: approved, remediated, or explicitly overridden |
 | `argus_sync_knowledge` | Argus | Syncs the local vulnerability database from SCVD (api.scvd.dev) |
 ---
@@ -285,9 +286,9 @@ Create `.argus/solidity-argus.jsonc` in your project root. `.opencode/solidity-a
 {
   "agents": {
     "argus": { "model": "anthropic/claude-opus-4-7" },
-    "sentinel": { "model": "anthropic/claude-sonnet-4-7" },
-    "pythia": { "model": "anthropic/claude-sonnet-4-7" },
-    "scribe": { "model": "anthropic/claude-sonnet-4-7" },
+    "sentinel": { "model": "anthropic/claude-sonnet-4-6" },
+    "pythia": { "model": "anthropic/claude-sonnet-4-6" },
+    "scribe": { "model": "anthropic/claude-sonnet-4-6" },
     "themis": { "model": "openai/gpt-5.5" }
   },

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "solidity-argus",
-  "version": "0.5.8",
-  "description": "Solidity smart contract security auditing plugin for OpenCode — 5 specialized agents, 15 tools (14 core + optional Solodit), and a curated vulnerability knowledge base",
+  "version": "0.5.10",
+  "description": "Solidity smart contract security auditing plugin for OpenCode — 5 specialized agents, 16 tools (15 core + optional Solodit), and a curated vulnerability knowledge base",
   "keywords": [
     "solidity",
     "security",

package/src/agents/argus-prompt.ts CHANGED Viewed

@@ -198,6 +198,7 @@ Task(subagent_type="scribe", prompt="Generate the final audit report for Project
 - \`argus_analyze_contract\`, \`argus_check_patterns\`, \`argus_proxy_detection\` → delegate to **sentinel**
 - \`argus_solodit_search\`, Solodit MCP search → delegate to **pythia**
 - \`argus_read_findings\`, \`argus_persist_deduped\`, \`argus_generate_report\` \u2192 delegate to **scribe**
+- \`argus_themis_disposition\` → call after Themis returns to record Argus' resolved quality-gate disposition
 - Audit quality validation \u2192 delegate to **themis** (after Scribe completes)
 ### **@sentinel** (The Executor)
@@ -527,7 +528,7 @@ Scope: {list of audited files}
 STEPS:
 1. Call argus_read_findings with run_id above to load all findings
-2. Deduplicate: group findings by vulnerability class + code location, merge into single entries
+2. Deduplicate: group findings by vulnerability class + code location, merge into single entries. Include \`observation_ids\` on every deduped finding so each raw finding maps to exactly one report entry.
 3. Enrich: for each Critical/High finding, write specific impact and recommendation
 4. Call argus_persist_deduped with run_id and your deduped findings array — this writes the source-of-truth JSON to disk
 5. Call argus_generate_report with run_id, project_name, and scope — the tool reads deduped findings from disk
@@ -538,7 +539,7 @@ Overall risk assessment: {your assessment}
 Scribe will:
 1. Read raw findings (may contain duplicates from different tools)
-2. Semantically deduplicate (e.g., merge reentrancy-eth + reentrancy-cei-violation at same location)
+2. Semantically deduplicate (e.g., merge reentrancy-eth + reentrancy-cei-violation at same location) while preserving \`observation_ids\` lineage for every raw finding
 3. Enrich Critical/High findings with specific impact and recommendation text
 4. Persist deduped findings to disk via \`argus_persist_deduped\` (source-of-truth JSON)
 5. Call \`argus_generate_report\` with \`run_id\` — the tool reads from disk and renders markdown
@@ -570,13 +571,17 @@ Themis will:
 3. Apply vulnerability skill checklists to assess finding validity
 4. Return a verdict: approved or issues found
-**If Themis flags issues**, YOU are the final judge:
-- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions
-- If Themis disagrees on severity → evaluate the evidence and make the final call
-- If Themis found potential false positives → assess and note in the report if warranted
-- If Themis approves → audit is complete
+**If Themis flags issues**, YOU are the final judge, but you must record a resolved disposition before the audit is complete:
+- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions, then record status="remediated" with notes.
+- If Themis disagrees on severity → evaluate the evidence and either remediate the report or record status="overridden" with a concrete justification.
+- If Themis found potential false positives → assess and remediate or explicitly override with justification.
+- If Themis approves → record status="approved" with the Themis verdict.
-**An audit is NOT complete until Themis has validated the output.**
+Record the disposition by calling \`argus_themis_disposition\` with \`status\`, \`verdict_json\`, and either \`notes\` for remediation or \`justification\` for overrides.
+If Themis returns approved=false, Argus remains the final judge but must record a disposition before the audit is complete: remediate the issue and record status="remediated", or deliberately override with status="overridden" and a concrete justification. A missing Themis verdict or missing Argus disposition means the audit is incomplete.
+**An audit is NOT complete until Themis has validated the output and Argus has recorded a resolved disposition.**
 You are the guardian. Nothing escapes your gaze. Begin the audit.
 `

package/src/agents/scribe-prompt.ts CHANGED Viewed

@@ -53,6 +53,7 @@ Argus provides you with a \`run_id\`. Your job: read findings, deduplicate, enri
    - Add "**Detected by:**" listing all tools/checks that flagged it
    - Example: reentrancy-eth + reentrancy-cei-violation + reentrancy-eth-withdraw-state-after-call at VulnerableVault.sol:18-23 → ONE finding
    - **PRESERVATION RULE**: Every raw finding MUST map to exactly one deduped finding. Only merge findings that are genuinely the SAME vulnerability at the SAME location. Different vulnerability classes (e.g., default-visibility vs dos-revert) are SEPARATE findings even if both are Informational. NEVER drop findings during deduplication.
+   - **LINEAGE RULE**: Every deduped finding MUST include \`observation_ids\` containing each raw finding's \`observation_id\`, plus \`observation_count\`, \`sources\`, and \`reported_by_agents\` when available. This lets \`argus_generate_report\` prove raw-to-deduped parity instead of emitting a "Finding parity not verifiable" warning.
 3. **Enrich** (MANDATORY for Critical/High):
    - Write specific \`impact\` (concrete consequence, not "could be exploited")
@@ -61,7 +62,7 @@ Argus provides you with a \`run_id\`. Your job: read findings, deduplicate, enri
 4. **Persist deduped findings**: Call \`argus_persist_deduped\` with:
    - \`run_id\`: the run ID from Argus
-   - \`deduped_findings\`: JSON array of your deduped and enriched findings
+   - \`deduped_findings\`: JSON array of your deduped and enriched findings, including \`observation_ids\` lineage for every merged raw observation
    This writes the source-of-truth JSON to disk at \`.argus/runs/{run_id}/deduped-findings.json\`.

package/src/agents/themis-prompt.ts CHANGED Viewed

@@ -98,6 +98,7 @@ Verdict rules:
 - If approved with no issues, state it concisely.
 - If issues exist, list each issue with concrete evidence (file path, finding id, field mismatch, or historical precedent).
 - Be precise and adversarial, but do not overreach. Recommend; do not override.
+- Return the JSON verdict as the final fenced code block in your response. Do not add a second JSON object after it. Argus uses this verdict to decide whether to accept it, remediate it, or explicitly override it.
 ## AUTHORITY BOUNDARY

package/src/cli/commands/doctor.ts CHANGED Viewed

@@ -13,6 +13,8 @@ import {
 } from "../../skills/argus-skill-resolver"
 import { parseFrontmatter, validateSkillFrontmatter } from "../../skills/skill-schema"
 import { detectViaIr } from "../../tools/slither-tool"
+import { DEFAULT_SOLODIT_PORT } from "../../tools/solodit-search-tool"
+import { checkSoloditHealth } from "../../utils/solodit-health"
 import { cliOutput } from "../cli-output"
 import type { CliCommand } from "../types"
@@ -459,21 +461,13 @@ export const doctorCommand: CliCommand = {
     const soloditEnabled = config?.solodit?.enabled !== false
     if (soloditEnabled) {
-      try {
-        const response = await fetch(
-          "https://solodit.cyfrin.io/api/trpc/findings.get?batch=1&input=" +
-            encodeURIComponent(JSON.stringify({ 0: "[]" })),
-          {
-            signal: AbortSignal.timeout(5000),
-          },
-        )
-        if (response.ok) {
-          cliOutput.log(`${GREEN}✓${RESET} Solodit API: reachable`)
-        } else {
-          cliOutput.log(`${YELLOW}⚠${RESET} Solodit API: returned ${response.status}`)
-        }
-      } catch {
-        cliOutput.log(`${YELLOW}⚠${RESET} Solodit API: unreachable`)
+      const port = config?.solodit?.port ?? DEFAULT_SOLODIT_PORT
+      const status = await checkSoloditHealth(port, true)
+      if (status.reachable) {
+        cliOutput.log(`${GREEN}✓${RESET} Solodit MCP: reachable on port ${port}`)
+      } else {
+        const suffix = status.error ? ` (${status.error})` : ""
+        cliOutput.log(`${YELLOW}⚠${RESET} Solodit MCP: unreachable on port ${port}${suffix}`)
       }
     } else {
       cliOutput.log(`${YELLOW}⚠${RESET} Solodit: disabled in config`)

package/src/constants/defaults.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 export const DEFAULT_MODELS = {
   argus: "anthropic/claude-opus-4-7",
-  sentinel: "anthropic/claude-sonnet-4-7",
-  pythia: "anthropic/claude-sonnet-4-7",
-  scribe: "anthropic/claude-sonnet-4-7",
+  sentinel: "anthropic/claude-sonnet-4-6",
+  pythia: "anthropic/claude-sonnet-4-6",
+  scribe: "anthropic/claude-sonnet-4-6",
   themis: "openai/gpt-5.5",
 } as const

package/src/create-hooks.ts CHANGED Viewed

@@ -1092,11 +1092,13 @@ export function createHooks(args: {
               )
             }
-            // Trigger finalization immediately after report generation.
-            // The session.idle handler also checks reportGenerated, but in
-            // `opencode run` mode the process may exit before another idle
-            // event fires.  Finalizing here guarantees the run is closed.
-            if (state.reportGenerated) {
+            // The report is materialized here, but finalization waits until
+            // Argus records a resolved Themis disposition.
+          }
+          if (toolName === "argus_themis_disposition") {
+            const state = getAuditState(input.sessionID)
+            if (state?.reportGenerated) {
               const runSink =
                 eventSinksByRunId.get(state.sessionId) ??
                 (input.sessionID
@@ -1120,12 +1122,12 @@ export function createHooks(args: {
                   )
                   if (!reportFinalization.invariantsPassed) {
                     logger.warn(
-                      `Report-triggered finalization for run ${state.sessionId} has invariant errors: ${reportFinalization.errors.join("; ")}`,
+                      `Themis-disposition finalization for run ${state.sessionId} has invariant errors: ${reportFinalization.errors.join("; ")}`,
                     )
                   }
                 } catch (error) {
                   logger.warn(
-                    `Report-triggered finalization failed for run ${state.sessionId}: ${error instanceof Error ? error.message : String(error)}`,
+                    `Themis-disposition finalization failed for run ${state.sessionId}: ${error instanceof Error ? error.message : String(error)}`,
                   )
                 }
               }

package/src/create-tools.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import { reportGeneratorTool } from "./tools/report-generator-tool"
 import { slitherTool } from "./tools/slither-tool"
 import { createSoloditSearchTool } from "./tools/solodit-search-tool"
 import { syncKnowledgeTool } from "./tools/sync-knowledge-tool"
+import { themisDispositionTool } from "./tools/themis-disposition-tool"
 export function createTools(config: ArgusConfig): Record<string, ToolDefinition> {
   const tools: Record<string, ToolDefinition> = {
@@ -31,6 +32,7 @@ export function createTools(config: ArgusConfig): Record<string, ToolDefinition>
     argus_read_findings: readFindingsTool,
     argus_persist_deduped: persistDedupedTool,
     argus_generate_report: reportGeneratorTool,
+    argus_themis_disposition: themisDispositionTool,
     argus_sync_knowledge: syncKnowledgeTool,
   }

package/src/features/audit-enforcer/audit-enforcer.ts CHANGED Viewed

@@ -1,23 +1,9 @@
 import { PHASE_ORDER } from "../../shared/audit-phases"
+import { computeMissingKeyTools } from "../../shared/key-tools"
 import type { AuditPhase, AuditState } from "../../state/types"
 const REPORTING_PHASES: AuditPhase[] = ["reporting", "complete"]
-const KEY_TOOL_FAMILIES: Array<{ family: string; prefixes: string[] }> = [
-  { family: "slither", prefixes: ["argus_slither_analyze", "slither"] },
-  { family: "forge_test", prefixes: ["argus_forge_test", "forge_test"] },
-  { family: "forge_fuzz", prefixes: ["argus_forge_fuzz", "forge_fuzz"] },
-  { family: "forge_coverage", prefixes: ["argus_forge_coverage", "forge_coverage"] },
-]
-function getMissingToolFamilies(auditState: AuditState): string[] {
-  const executedTools = auditState.toolsExecuted.map((t) => t.tool)
-  return KEY_TOOL_FAMILIES.filter(
-    ({ prefixes }) =>
-      !executedTools.some((tool) => prefixes.some((prefix) => tool.startsWith(prefix))),
-  ).map(({ family }) => family)
-}
 function getNextPhase(current: AuditPhase): AuditPhase | null {
   const idx = PHASE_ORDER.indexOf(current)
   if (idx === -1 || idx >= PHASE_ORDER.length - 1) return null
@@ -39,7 +25,7 @@ export function createAuditEnforcer() {
     ]
     if (REPORTING_PHASES.includes(auditState.currentPhase)) {
-      const missing = getMissingToolFamilies(auditState)
+      const missing = computeMissingKeyTools(auditState.toolsExecuted, auditState.unavailableTools)
       if (missing.length > 0) {
         parts.push(
           `\u26a0\ufe0f Tool coverage incomplete: ${missing.join(", ")} have not been executed. Do not proceed to report generation until required tools are complete.`,

package/src/features/persistent-state/run-finalizer.ts CHANGED Viewed

@@ -131,6 +131,79 @@ function collectReportQualityGateErrors(events: AuditEvent[]): string[] {
   return errors
 }
+type ThemisVerdict = {
+  approved?: unknown
+  pipeline_issues?: unknown
+  false_positives?: unknown
+  missed_findings?: unknown
+  severity_adjustments?: unknown
+}
+type ThemisDisposition = {
+  status?: unknown
+  verdict?: ThemisVerdict
+  notes?: unknown
+  justification?: unknown
+}
+function hasText(value: unknown): value is string {
+  return typeof value === "string" && value.trim().length > 0
+}
+function isResolvedThemisDisposition(value: unknown): boolean {
+  const disposition = asRecord(value) as ThemisDisposition | null
+  if (disposition?.status === "approved") {
+    return disposition.verdict?.approved === true
+  }
+  if (disposition?.status === "remediated") {
+    return disposition.verdict?.approved === false && hasText(disposition.notes)
+  }
+  if (disposition?.status === "overridden") {
+    return disposition.verdict?.approved === false && hasText(disposition.justification)
+  }
+  return false
+}
+function hasRejectedThemisVerdict(value: unknown): boolean {
+  const verdict = asRecord(value) as ThemisVerdict | null
+  return verdict?.approved === false
+}
+function collectThemisDispositionErrors(events: AuditEvent[]): string[] {
+  let reportIndex = -1
+  for (let index = events.length - 1; index >= 0; index -= 1) {
+    const event = events[index]
+    if (event && isGenerateReportCompletion(event)) {
+      reportIndex = index
+      break
+    }
+  }
+  if (reportIndex === -1) return []
+  const laterEvents = events.slice(reportIndex + 1)
+  const hasResolvedDisposition = laterEvents.some((event) => {
+    if (event.type !== "tool.completed") return false
+    const payload = asRecord(event.payload)
+    return isResolvedThemisDisposition(payload?.themisDisposition)
+  })
+  if (hasResolvedDisposition) return []
+  const hasUnresolvedRejection = laterEvents.some((event) => {
+    if (event.type !== "tool.completed") return false
+    const payload = asRecord(event.payload)
+    return (
+      payload?.tool === "task" &&
+      payload.subagent_type === "themis" &&
+      hasRejectedThemisVerdict(payload.themis)
+    )
+  })
+  return hasUnresolvedRejection
+    ? ["generated report has unresolved Themis issues"]
+    : ["generated report has no resolved Themis disposition"]
+}
 function collectParentChildIntegrityErrors(events: AuditEvent[]): string[] {
   const errors: string[] = []
   const parentByChild = new Map<string, string>()
@@ -244,7 +317,7 @@ function collectInvariantErrors(events: AuditEvent[]): { errors: string[]; warni
   warnings.push(...collectOrphanedToolStarts(events))
   errors.push(...collectParentChildIntegrityErrors(events))
-  errors.push(...collectMultiSessionErrors(events))
+  warnings.push(...collectMultiSessionErrors(events))
   return { errors, warnings }
 }
@@ -308,6 +381,7 @@ export async function finalizeRun(
     const reportErrors = [
       ...(await collectReportCompletenessErrors(events)),
       ...collectReportQualityGateErrors(events),
+      ...collectThemisDispositionErrors(events),
     ]
     if (reportErrors.length === 0) {
       return {
@@ -324,6 +398,7 @@ export async function finalizeRun(
   const { errors, warnings } = collectInvariantErrors(events)
   errors.push(...(await collectReportCompletenessErrors(events)))
   errors.push(...collectReportQualityGateErrors(events))
+  errors.push(...collectThemisDispositionErrors(events))
   const invariantsPassed = errors.length === 0
   const sessionId = events.at(-1)?.session_id ?? ""

package/src/hooks/tool-tracking-hook.ts CHANGED Viewed

@@ -426,6 +426,21 @@ function processFuzzResult(parsed: Record<string, unknown>, state: AuditState):
   }
 }
+function countReadFindingsResult(parsed: Record<string, unknown>): number {
+  const summary = toRecord(parsed.summary)
+  if (
+    summary &&
+    typeof summary.findingsCount === "number" &&
+    Number.isFinite(summary.findingsCount)
+  ) {
+    return Math.max(0, summary.findingsCount)
+  }
+  const reportInput = toRecord(parsed.reportInput)
+  const findings = reportInput?.findings
+  return Array.isArray(findings) ? findings.length : 0
+}
 function processSoloditResult(parsed: Record<string, unknown>, state: AuditState): void {
   const query = typeof parsed.query === "string" ? parsed.query : ""
   const results = Array.isArray(parsed.results) ? parsed.results : []
@@ -709,6 +724,7 @@ export function createToolTrackingHook(
     let findingsCount = 0
     let completedSuccess = false
     let completionError: string | undefined
+    let completedRecord: Record<string, unknown> | null = null
     try {
       if (input.tool === "argus_skill_load") {
@@ -763,6 +779,7 @@ export function createToolTrackingHook(
           }
           return
         }
+        completedRecord = record
         switch (input.tool) {
           case "argus_slither_analyze": {
@@ -812,6 +829,9 @@ export function createToolTrackingHook(
               projectDir,
             )
             break
+          case "argus_read_findings":
+            findingsCount = countReadFindingsResult(record)
+            break
           case "argus_analyze_contract": {
             processContractAnalyzerResult(record, auditState)
             const filePath = (input.args as Record<string, unknown>)?.file_path as string
@@ -996,6 +1016,11 @@ export function createToolTrackingHook(
             case "argus_check_patterns":
               if (auditState.patternVersion) enrichment.patternVersion = auditState.patternVersion
               break
+            case "argus_themis_disposition":
+              if (completedRecord?.themisDisposition) {
+                enrichment.themisDisposition = completedRecord.themisDisposition
+              }
+              break
           }
         }
         await emitToSink(

package/src/shared/key-tools.ts CHANGED Viewed

@@ -23,15 +23,22 @@ export const UNAVAILABLE_TO_KEY_TOOL: Record<string, string> = {
   solodit: "solodit",
 }
+type ToolCoverageRecord = {
+  tool: string
+  success?: boolean
+}
 /**
  * Compute which key tools have not yet been executed, excusing any that are
  * declared unavailable.
  */
 export function computeMissingKeyTools(
-  toolsExecuted: Array<{ tool: string }>,
+  toolsExecuted: ToolCoverageRecord[],
   unavailableTools?: string[],
 ): string[] {
-  const executedShortNames = new Set(toolsExecuted.map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool))
+  const executedShortNames = new Set(
+    toolsExecuted.filter((t) => t.success === true).map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
+  )
   const excused = new Set(
     (unavailableTools ?? []).map((t) => UNAVAILABLE_TO_KEY_TOOL[t]).filter(Boolean),
   )

package/src/state/adapters.ts CHANGED Viewed

@@ -62,6 +62,13 @@ const KNOWN_INPUT_FIELDS = new Set([
   "observationId",
   "observationFingerprint",
   "issueFingerprint",
+  "observation_ids",
+  "observationIds",
+  "observation_count",
+  "observationCount",
+  "reported_by_agents",
+  "reportedByAgents",
+  "sources",
   "elements",
   "location",
 ])
@@ -157,6 +164,20 @@ function pushValidationDiagnostics(errors: ValidationError[]): Diagnostic[] {
   }))
 }
+function normalizeStringArray(value: unknown): string[] | undefined {
+  if (!Array.isArray(value)) return undefined
+  const strings = value.filter(
+    (item): item is string => typeof item === "string" && item.length > 0,
+  )
+  return strings.length > 0
+    ? Array.from(new Set(strings)).sort((a, b) => a.localeCompare(b))
+    : undefined
+}
+function normalizePositiveInteger(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined
+}
 export function normalizeToCanonicalFinding(
   raw: Finding | Record<string, unknown>,
   runId: string,
@@ -288,6 +309,16 @@ export function normalizeToCanonicalFinding(
       observationId,
     })
+  const observationIds =
+    normalizeStringArray(input.observation_ids) ?? normalizeStringArray(input.observationIds)
+  const reportedByAgents =
+    normalizeStringArray(input.reported_by_agents) ?? normalizeStringArray(input.reportedByAgents)
+  const sources = normalizeStringArray(input.sources)
+  const observationCount =
+    normalizePositiveInteger(input.observation_count) ??
+    normalizePositiveInteger(input.observationCount) ??
+    observationIds?.length
   const canonical: CanonicalFinding = {
     id: observationId,
     check,
@@ -302,6 +333,10 @@ export function normalizeToCanonicalFinding(
     issue_fingerprint: issueFingerprint,
     observation_fingerprint: observationFingerprint,
     observation_id: observationId,
+    observation_ids: observationIds,
+    observation_count: observationCount,
+    reported_by_agents: reportedByAgents,
+    sources,
     impact: typeof input.impact === "string" && input.impact.length > 0 ? input.impact : undefined,
     recommendation:
       typeof input.recommendation === "string" && input.recommendation.length > 0

package/src/tools/forge-coverage-tool.ts CHANGED Viewed

@@ -5,10 +5,14 @@ import { resolveProjectDir } from "../shared/project-utils"
 type ForgeCoverageArgs = {
   target?: string
+  match_path?: string
+  ir_minimum?: boolean
 }
 type NormalizedForgeCoverageArgs = {
   target: string
+  match_path?: string
+  ir_minimum: boolean
 }
 type ForgeCoverageFile = {
@@ -53,9 +57,22 @@ const EMPTY_SUMMARY: ForgeCoverageSummary = {
 function normalizeArgs(args: ForgeCoverageArgs, context: ToolContext): NormalizedForgeCoverageArgs {
   return {
     target: args.target ?? resolveProjectDir(context),
+    match_path: args.match_path,
+    ir_minimum: args.ir_minimum ?? false,
   }
 }
+function buildCoverageCommand(args: NormalizedForgeCoverageArgs, forceIrMinimum = false): string[] {
+  const command = ["forge", "coverage", "--report", "summary"]
+  if (args.match_path) command.push("--match-path", args.match_path)
+  if (args.ir_minimum || forceIrMinimum) command.push("--ir-minimum")
+  return command
+}
+function isStackTooDeep(stderr: string): boolean {
+  return /stack too deep/i.test(stderr)
+}
 function parsePercent(input: string): number {
   const match = input.match(/(\d+(?:\.\d+)?)%/)
   if (!match?.[1]) {
@@ -156,11 +173,22 @@ export async function executeForgeCoverage(
   })
   try {
-    const runResult = await runCommand(["forge", "coverage"], {
+    let runResult = await runCommand(buildCoverageCommand(normalizedArgs), {
       signal: context.abort,
       cwd: normalizedArgs.target,
     })
+    if (
+      runResult.exitCode !== 0 &&
+      !normalizedArgs.ir_minimum &&
+      isStackTooDeep(runResult.stderr)
+    ) {
+      runResult = await runCommand(buildCoverageCommand(normalizedArgs, true), {
+        signal: context.abort,
+        cwd: normalizedArgs.target,
+      })
+    }
     if (runResult.exitCode !== 0) {
       return fail(
         runResult.stderr.trim() || `forge coverage exited with code ${runResult.exitCode}`,
@@ -193,6 +221,8 @@ export const forgeCoverageTool = tool({
     "Run forge coverage analysis and return structured per-file coverage metrics (lines, statements, branches, functions).",
   args: {
     target: tool.schema.string().optional(),
+    match_path: tool.schema.string().optional(),
+    ir_minimum: tool.schema.boolean().optional(),
   },
   async execute(args, context) {
     const result = await executeForgeCoverage(args, context)

package/src/tools/persist-deduped-tool.ts CHANGED Viewed

@@ -85,7 +85,7 @@ export const persistDedupedTool = tool({
     deduped_findings: tool.schema
       .string()
       .describe(
-        "Serialized JSON array of deduplicated and enriched findings. Each finding should have: check, severity, confidence, description, file, lines, source, impact, recommendation, proofOfConcept.",
+        "Serialized JSON array of deduplicated and enriched findings. Each finding should have: check, severity, confidence, description, file, lines, source, impact, recommendation, proofOfConcept, and observation_ids lineage proving which raw findings were merged.",
       ),
   },
   async execute(args, context) {

package/src/tools/report-generator-tool.ts CHANGED Viewed

@@ -746,6 +746,22 @@ function formatLocation(finding: Finding): string {
   return `${finding.file}:${finding.lines[0]}-${finding.lines[1]}`
 }
+function sourceExcerpt(projectDir: string, finding: Finding): string | null {
+  if (!finding.file || !Array.isArray(finding.lines) || finding.lines.length < 2) return null
+  const start = finding.lines[0]
+  const end = finding.lines[1]
+  if (!Number.isInteger(start) || !Number.isInteger(end) || start <= 0 || end < start) {
+    return null
+  }
+  const absolutePath = path.isAbsolute(finding.file)
+    ? finding.file
+    : path.join(projectDir, finding.file)
+  if (!existsSync(absolutePath) || !statSync(absolutePath).isFile()) return null
+  const contents = readFileSync(absolutePath, "utf-8").split(/\r?\n/)
+  const excerpt = contents.slice(start - 1, end).join("\n")
+  return excerpt.trim().length > 0 ? excerpt : null
+}
 function shouldIncludeFinding(finding: Finding, threshold: SeverityThreshold): boolean {
   return FINDING_WEIGHT[finding.severity] >= THRESHOLD_WEIGHT[threshold]
 }
@@ -860,6 +876,31 @@ function hasDedupLineage(findings: Finding[]): boolean {
   })
 }
+function observationIdsForFinding(finding: Finding): string[] {
+  const observationIds = (finding as { observation_ids?: unknown }).observation_ids
+  if (Array.isArray(observationIds)) {
+    return observationIds.filter((id): id is string => typeof id === "string" && id.length > 0)
+  }
+  return typeof finding.observation_id === "string" && finding.observation_id.length > 0
+    ? [finding.observation_id]
+    : []
+}
+function compareObservationLineage(
+  eventFindings: Finding[],
+  reportFindings: Finding[],
+): { missing: string[]; extra: string[]; matches: boolean } {
+  const expected = new Set(eventFindings.flatMap(observationIdsForFinding))
+  const actual = new Set(reportFindings.flatMap(observationIdsForFinding))
+  const missing = Array.from(expected)
+    .filter((id) => !actual.has(id))
+    .sort((a, b) => a.localeCompare(b))
+  const extra = Array.from(actual)
+    .filter((id) => !expected.has(id))
+    .sort((a, b) => a.localeCompare(b))
+  return { missing, extra, matches: missing.length === 0 && extra.length === 0 }
+}
 export function validateReportQuality(
   findings: Finding[],
   policy: QualityGatePolicy,
@@ -980,7 +1021,7 @@ function buildRecommendations(counts: FindingsCount): string[] {
   return items
 }
-function buildFindingsSection(findings: Finding[]): string {
+function buildFindingsSection(findings: Finding[], projectDir: string): string {
   if (findings.length === 0) {
     return "## Findings\nNo findings meet the configured severity threshold."
   }
@@ -1006,6 +1047,15 @@ function buildFindingsSection(findings: Finding[]): string {
       lines.push(`**Severity**: ${finding.severity}`)
       lines.push(`**Confidence**: ${finding.confidence}`)
       lines.push(`**Location**: ${formatLocation(finding)}`)
+      const excerpt = sourceExcerpt(projectDir, finding)
+      if (excerpt) {
+        lines.push("")
+        lines.push("**Source Excerpt**:")
+        lines.push("")
+        lines.push("```solidity")
+        lines.push(excerpt)
+        lines.push("```")
+      }
       lines.push("")
       lines.push(`**Description**: ${finding.description}`)
       lines.push("")
@@ -1235,7 +1285,9 @@ export async function executeReportGeneration(
     const hasLineage = hasDedupLineage(reportInput.findings)
     const shouldCheckParity = eventFindings.length === inputFindings.length || hasLineage
     const parity = shouldCheckParity
-      ? compareIssueFingerprintSets(eventFindings, inputFindings)
+      ? hasLineage
+        ? compareObservationLineage(projectFindings(events), reportInput.findings)
+        : compareIssueFingerprintSets(eventFindings, inputFindings)
       : { missing: [], extra: [], matches: true }
     if (!shouldCheckParity) {
@@ -1260,11 +1312,12 @@ export async function executeReportGeneration(
       }
       warningBullets.push(`- Finding parity mismatch: ${mismatchSummary}`)
+      const parityLabel = hasLineage ? "observation IDs" : "issue fingerprints"
       if (parity.missing.length > 0) {
-        warningBullets.push(`- Missing issue fingerprints: ${parity.missing.join(", ")}`)
+        warningBullets.push(`- Missing ${parityLabel}: ${parity.missing.join(", ")}`)
       }
       if (parity.extra.length > 0) {
-        warningBullets.push(`- Extra issue fingerprints: ${parity.extra.join(", ")}`)
+        warningBullets.push(`- Extra ${parityLabel}: ${parity.extra.join(", ")}`)
       }
     }
   } catch (err) {
@@ -1359,7 +1412,7 @@ export async function executeReportGeneration(
     "Approach: Findings are normalized, deterministically ordered by severity/file/line, and validated against report quality gates before emission.",
   )
-  sections.push(buildFindingsSection(findings))
+  sections.push(buildFindingsSection(findings, reportInput.projectDir))
   sections.push("## Recommendations")
   for (const item of buildRecommendations(counts)) {

package/src/tools/slither-tool.ts CHANGED Viewed

@@ -470,26 +470,6 @@ export async function executeSlitherAnalyze(
     }
   }
-  if (args.via_ir) {
-    const fallbackResult = await flattenFallback(args, context, {
-      ...getDefaultFlattenDeps(),
-      runCommand,
-      cwd: projectDir,
-    })
-    if (fallbackResult) return fallbackResult
-    return {
-      success: false,
-      findingsCount: 0,
-      findings: [],
-      executionTime: Date.now() - startedAt,
-      errors: [
-        "via_ir enabled — flatten fallback failed. Ensure forge and solc-select are installed.",
-      ],
-      error:
-        "Project uses via_ir which is incompatible with Slither direct analysis. Flatten fallback also failed.",
-    }
-  }
   const command = buildCommand(args)
   try {
@@ -508,7 +488,7 @@ export async function executeSlitherAnalyze(
       payload = JSON.parse(runResult.stdout) as SlitherPayload
     } catch (error) {
       const message = error instanceof Error ? error.message : "Unknown parse error"
-      if (shouldTryFlattenFallback(errors, runResult.stderr)) {
+      if (args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr)) {
         const fallbackResult = await flattenFallback(args, context, {
           ...getDefaultFlattenDeps(),
           runCommand,
@@ -533,7 +513,11 @@ export async function executeSlitherAnalyze(
     const findings = parseFindings(payload)
     const success = findings.length > 0 || (runResult.exitCode === 0 && payload.success !== false)
-    if (!success && findings.length === 0 && shouldTryFlattenFallback(errors, runResult.stderr)) {
+    if (
+      !success &&
+      findings.length === 0 &&
+      (args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr))
+    ) {
       const fallbackResult = await flattenFallback(args, context, {
         ...getDefaultFlattenDeps(),
         runCommand,

package/src/tools/themis-disposition-tool.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import { type ToolContext, tool } from "@opencode-ai/plugin"
+type ThemisDispositionStatus = "approved" | "remediated" | "overridden"
+type ThemisDispositionArgs = {
+  status: ThemisDispositionStatus
+  verdict_json: string
+  notes?: string
+  justification?: string
+}
+function parseVerdict(verdictJson: string): unknown {
+  try {
+    return JSON.parse(verdictJson)
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error)
+    throw new Error(`Invalid Themis verdict JSON: ${message}`)
+  }
+}
+export function executeThemisDisposition(args: ThemisDispositionArgs, context: ToolContext) {
+  context.metadata({ title: `Themis disposition: ${args.status}` })
+  return {
+    success: true,
+    themisDisposition: {
+      status: args.status,
+      verdict: parseVerdict(args.verdict_json),
+      ...(args.notes ? { notes: args.notes } : {}),
+      ...(args.justification ? { justification: args.justification } : {}),
+    },
+  }
+}
+export const themisDispositionTool = tool({
+  description:
+    "Record Argus' resolved disposition for a Themis quality-gate verdict: approved, remediated, or overridden.",
+  args: {
+    status: tool.schema.enum(["approved", "remediated", "overridden"]),
+    verdict_json: tool.schema.string(),
+    notes: tool.schema.string().optional(),
+    justification: tool.schema.string().optional(),
+  },
+  async execute(args, context) {
+    return JSON.stringify(executeThemisDisposition(args, context))
+  },
+})