solidity-argus 0.5.8 → 0.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +4 -4
- package/README.md +7 -6
- package/package.json +2 -2
- package/src/agents/argus-prompt.ts +13 -8
- package/src/agents/scribe-prompt.ts +2 -1
- package/src/agents/themis-prompt.ts +1 -0
- package/src/cli/commands/doctor.ts +9 -15
- package/src/constants/defaults.ts +3 -3
- package/src/create-hooks.ts +9 -7
- package/src/create-tools.ts +2 -0
- package/src/features/audit-enforcer/audit-enforcer.ts +2 -16
- package/src/features/persistent-state/run-finalizer.ts +76 -1
- package/src/hooks/tool-tracking-hook.ts +25 -0
- package/src/shared/key-tools.ts +9 -2
- package/src/state/adapters.ts +35 -0
- package/src/tools/forge-coverage-tool.ts +31 -1
- package/src/tools/persist-deduped-tool.ts +1 -1
- package/src/tools/report-generator-tool.ts +58 -5
- package/src/tools/slither-tool.ts +6 -22
- package/src/tools/themis-disposition-tool.ts +46 -0
package/AGENTS.md
CHANGED
|
@@ -13,27 +13,27 @@ CLI: `argus doctor`, `argus init`, `argus install`.
|
|
|
13
13
|
**Role**: Primary security audit orchestrator
|
|
14
14
|
**Description**: Argus Panoptes, the All-Seeing Guardian. Coordinates full Solidity security audits by dispatching Sentinel (analysis), Pythia (research), Scribe (reporting), and Themis (validation). Follows a rigorous 7-step methodology: Reconnaissance, Automated Scanning, Manual Review, Attack Surface Mapping, Vulnerability Research, Testing & Verification, and Reporting.
|
|
15
15
|
**Model**: anthropic/claude-opus-4-7
|
|
16
|
-
**Tools**:
|
|
16
|
+
**Tools**: 15 orchestrator-accessible argus_* tools (argus_slither_analyze, argus_analyze_contract, argus_check_patterns, argus_proxy_detection, argus_solodit_search, argus_forge_test, argus_gas_analysis, argus_forge_fuzz, argus_forge_coverage, argus_skill_load, argus_generate_report, argus_record_finding, argus_read_findings, argus_sync_knowledge, argus_themis_disposition). `argus_persist_deduped` is reserved for Scribe.
|
|
17
17
|
|
|
18
18
|
## sentinel
|
|
19
19
|
|
|
20
20
|
**Role**: Static analysis and testing specialist
|
|
21
21
|
**Description**: Finds vulnerabilities through Slither static analysis, Foundry testing, fuzzing, and pattern matching. The tactical executor — runs tools, writes PoC tests, and verifies findings. Dispatched by Argus during Automated Scanning and Testing & Verification phases.
|
|
22
|
-
**Model**: anthropic/claude-sonnet-4-
|
|
22
|
+
**Model**: anthropic/claude-sonnet-4-6
|
|
23
23
|
**Tools**: argus_slither_analyze, argus_forge_test, argus_gas_analysis, argus_forge_fuzz, argus_forge_coverage, argus_analyze_contract, argus_check_patterns, argus_proxy_detection, argus_record_finding, skill
|
|
24
24
|
|
|
25
25
|
## pythia
|
|
26
26
|
|
|
27
27
|
**Role**: Vulnerability researcher
|
|
28
28
|
**Description**: Consults Solodit, SCVD, and the knowledge base to find historical precedents and known attack vectors. Searches 7,769+ real-world audit findings and 51 curated vulnerability pattern files. Dispatched by Argus during Vulnerability Research phase.
|
|
29
|
-
**Model**: anthropic/claude-sonnet-4-
|
|
29
|
+
**Model**: anthropic/claude-sonnet-4-6
|
|
30
30
|
**Tools**: argus_solodit_search, argus_check_patterns, argus_record_finding, skill
|
|
31
31
|
|
|
32
32
|
## scribe
|
|
33
33
|
|
|
34
34
|
**Role**: Audit report writer
|
|
35
35
|
**Description**: Transforms raw findings into professional markdown audit reports. Produces structured output with severity classifications (Critical/High/Medium/Low/Informational), impact assessments, proof-of-concept steps, and actionable recommendations. Dispatched by Argus only after all analysis is complete.
|
|
36
|
-
**Model**: anthropic/claude-sonnet-4-
|
|
36
|
+
**Model**: anthropic/claude-sonnet-4-6
|
|
37
37
|
**Tools**: argus_read_findings, argus_persist_deduped, argus_generate_report, skill
|
|
38
38
|
|
|
39
39
|
## themis
|
package/README.md
CHANGED
|
@@ -66,9 +66,9 @@ Argus will automatically:
|
|
|
66
66
|
| Agent | Role | Model |
|
|
67
67
|
|-------|------|-------|
|
|
68
68
|
| `@argus` | Orchestrator — coordinates the full audit | claude-opus-4-7 |
|
|
69
|
-
| `@sentinel` | Static analysis & testing specialist | claude-sonnet-4-
|
|
70
|
-
| `@pythia` | Vulnerability researcher | claude-sonnet-4-
|
|
71
|
-
| `@scribe` | Audit report writer | claude-sonnet-4-
|
|
69
|
+
| `@sentinel` | Static analysis & testing specialist | claude-sonnet-4-6 |
|
|
70
|
+
| `@pythia` | Vulnerability researcher | claude-sonnet-4-6 |
|
|
71
|
+
| `@scribe` | Audit report writer | claude-sonnet-4-6 |
|
|
72
72
|
| `@themis` | Independent audit quality gate | gpt-5.5 |
|
|
73
73
|
|
|
74
74
|
### @argus — The Orchestrator
|
|
@@ -106,6 +106,7 @@ Validates the completed audit by comparing raw findings, deduped findings, and t
|
|
|
106
106
|
| `argus_read_findings` | Scribe, Themis | Reads persisted findings and audit artifacts for report generation and validation |
|
|
107
107
|
| `argus_persist_deduped` | Scribe | Persists deduplicated findings before final report generation and validation |
|
|
108
108
|
| `argus_generate_report` | Scribe | Generates the final structured audit report in professional markdown format |
|
|
109
|
+
| `argus_themis_disposition` | Argus | Records Argus' resolved disposition for Themis validation: approved, remediated, or explicitly overridden |
|
|
109
110
|
| `argus_sync_knowledge` | Argus | Syncs the local vulnerability database from SCVD (api.scvd.dev) |
|
|
110
111
|
|
|
111
112
|
---
|
|
@@ -285,9 +286,9 @@ Create `.argus/solidity-argus.jsonc` in your project root. `.opencode/solidity-a
|
|
|
285
286
|
{
|
|
286
287
|
"agents": {
|
|
287
288
|
"argus": { "model": "anthropic/claude-opus-4-7" },
|
|
288
|
-
"sentinel": { "model": "anthropic/claude-sonnet-4-
|
|
289
|
-
"pythia": { "model": "anthropic/claude-sonnet-4-
|
|
290
|
-
"scribe": { "model": "anthropic/claude-sonnet-4-
|
|
289
|
+
"sentinel": { "model": "anthropic/claude-sonnet-4-6" },
|
|
290
|
+
"pythia": { "model": "anthropic/claude-sonnet-4-6" },
|
|
291
|
+
"scribe": { "model": "anthropic/claude-sonnet-4-6" },
|
|
291
292
|
"themis": { "model": "openai/gpt-5.5" }
|
|
292
293
|
},
|
|
293
294
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "solidity-argus",
|
|
3
|
-
"version": "0.5.
|
|
4
|
-
"description": "Solidity smart contract security auditing plugin for OpenCode — 5 specialized agents,
|
|
3
|
+
"version": "0.5.10",
|
|
4
|
+
"description": "Solidity smart contract security auditing plugin for OpenCode — 5 specialized agents, 16 tools (15 core + optional Solodit), and a curated vulnerability knowledge base",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"solidity",
|
|
7
7
|
"security",
|
|
@@ -198,6 +198,7 @@ Task(subagent_type="scribe", prompt="Generate the final audit report for Project
|
|
|
198
198
|
- \`argus_analyze_contract\`, \`argus_check_patterns\`, \`argus_proxy_detection\` → delegate to **sentinel**
|
|
199
199
|
- \`argus_solodit_search\`, Solodit MCP search → delegate to **pythia**
|
|
200
200
|
- \`argus_read_findings\`, \`argus_persist_deduped\`, \`argus_generate_report\` \u2192 delegate to **scribe**
|
|
201
|
+
- \`argus_themis_disposition\` → call after Themis returns to record Argus' resolved quality-gate disposition
|
|
201
202
|
- Audit quality validation \u2192 delegate to **themis** (after Scribe completes)
|
|
202
203
|
|
|
203
204
|
### **@sentinel** (The Executor)
|
|
@@ -527,7 +528,7 @@ Scope: {list of audited files}
|
|
|
527
528
|
|
|
528
529
|
STEPS:
|
|
529
530
|
1. Call argus_read_findings with run_id above to load all findings
|
|
530
|
-
2. Deduplicate: group findings by vulnerability class + code location, merge into single entries
|
|
531
|
+
2. Deduplicate: group findings by vulnerability class + code location, merge into single entries. Include \`observation_ids\` on every deduped finding so each raw finding maps to exactly one report entry.
|
|
531
532
|
3. Enrich: for each Critical/High finding, write specific impact and recommendation
|
|
532
533
|
4. Call argus_persist_deduped with run_id and your deduped findings array — this writes the source-of-truth JSON to disk
|
|
533
534
|
5. Call argus_generate_report with run_id, project_name, and scope — the tool reads deduped findings from disk
|
|
@@ -538,7 +539,7 @@ Overall risk assessment: {your assessment}
|
|
|
538
539
|
|
|
539
540
|
Scribe will:
|
|
540
541
|
1. Read raw findings (may contain duplicates from different tools)
|
|
541
|
-
2. Semantically deduplicate (e.g., merge reentrancy-eth + reentrancy-cei-violation at same location)
|
|
542
|
+
2. Semantically deduplicate (e.g., merge reentrancy-eth + reentrancy-cei-violation at same location) while preserving \`observation_ids\` lineage for every raw finding
|
|
542
543
|
3. Enrich Critical/High findings with specific impact and recommendation text
|
|
543
544
|
4. Persist deduped findings to disk via \`argus_persist_deduped\` (source-of-truth JSON)
|
|
544
545
|
5. Call \`argus_generate_report\` with \`run_id\` — the tool reads from disk and renders markdown
|
|
@@ -570,13 +571,17 @@ Themis will:
|
|
|
570
571
|
3. Apply vulnerability skill checklists to assess finding validity
|
|
571
572
|
4. Return a verdict: approved or issues found
|
|
572
573
|
|
|
573
|
-
**If Themis flags issues**, YOU are the final judge:
|
|
574
|
-
- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions
|
|
575
|
-
- If Themis disagrees on severity → evaluate the evidence and
|
|
576
|
-
- If Themis found potential false positives → assess and
|
|
577
|
-
- If Themis approves →
|
|
574
|
+
**If Themis flags issues**, YOU are the final judge, but you must record a resolved disposition before the audit is complete:
|
|
575
|
+
- If Themis found genuinely dropped findings → re-dispatch Scribe with specific correction instructions, then record status="remediated" with notes.
|
|
576
|
+
- If Themis disagrees on severity → evaluate the evidence and either remediate the report or record status="overridden" with a concrete justification.
|
|
577
|
+
- If Themis found potential false positives → assess and remediate or explicitly override with justification.
|
|
578
|
+
- If Themis approves → record status="approved" with the Themis verdict.
|
|
578
579
|
|
|
579
|
-
|
|
580
|
+
Record the disposition by calling \`argus_themis_disposition\` with \`status\`, \`verdict_json\`, and either \`notes\` for remediation or \`justification\` for overrides.
|
|
581
|
+
|
|
582
|
+
If Themis returns approved=false, Argus remains the final judge but must record a disposition before the audit is complete: remediate the issue and record status="remediated", or deliberately override with status="overridden" and a concrete justification. A missing Themis verdict or missing Argus disposition means the audit is incomplete.
|
|
583
|
+
|
|
584
|
+
**An audit is NOT complete until Themis has validated the output and Argus has recorded a resolved disposition.**
|
|
580
585
|
|
|
581
586
|
You are the guardian. Nothing escapes your gaze. Begin the audit.
|
|
582
587
|
`
|
|
@@ -53,6 +53,7 @@ Argus provides you with a \`run_id\`. Your job: read findings, deduplicate, enri
|
|
|
53
53
|
- Add "**Detected by:**" listing all tools/checks that flagged it
|
|
54
54
|
- Example: reentrancy-eth + reentrancy-cei-violation + reentrancy-eth-withdraw-state-after-call at VulnerableVault.sol:18-23 → ONE finding
|
|
55
55
|
- **PRESERVATION RULE**: Every raw finding MUST map to exactly one deduped finding. Only merge findings that are genuinely the SAME vulnerability at the SAME location. Different vulnerability classes (e.g., default-visibility vs dos-revert) are SEPARATE findings even if both are Informational. NEVER drop findings during deduplication.
|
|
56
|
+
- **LINEAGE RULE**: Every deduped finding MUST include \`observation_ids\` containing each raw finding's \`observation_id\`, plus \`observation_count\`, \`sources\`, and \`reported_by_agents\` when available. This lets \`argus_generate_report\` prove raw-to-deduped parity instead of emitting a "Finding parity not verifiable" warning.
|
|
56
57
|
|
|
57
58
|
3. **Enrich** (MANDATORY for Critical/High):
|
|
58
59
|
- Write specific \`impact\` (concrete consequence, not "could be exploited")
|
|
@@ -61,7 +62,7 @@ Argus provides you with a \`run_id\`. Your job: read findings, deduplicate, enri
|
|
|
61
62
|
|
|
62
63
|
4. **Persist deduped findings**: Call \`argus_persist_deduped\` with:
|
|
63
64
|
- \`run_id\`: the run ID from Argus
|
|
64
|
-
- \`deduped_findings\`: JSON array of your deduped and enriched findings
|
|
65
|
+
- \`deduped_findings\`: JSON array of your deduped and enriched findings, including \`observation_ids\` lineage for every merged raw observation
|
|
65
66
|
|
|
66
67
|
This writes the source-of-truth JSON to disk at \`.argus/runs/{run_id}/deduped-findings.json\`.
|
|
67
68
|
|
|
@@ -98,6 +98,7 @@ Verdict rules:
|
|
|
98
98
|
- If approved with no issues, state it concisely.
|
|
99
99
|
- If issues exist, list each issue with concrete evidence (file path, finding id, field mismatch, or historical precedent).
|
|
100
100
|
- Be precise and adversarial, but do not overreach. Recommend; do not override.
|
|
101
|
+
- Return the JSON verdict as the final fenced code block in your response. Do not add a second JSON object after it. Argus uses this verdict to decide whether to accept it, remediate it, or explicitly override it.
|
|
101
102
|
|
|
102
103
|
## AUTHORITY BOUNDARY
|
|
103
104
|
|
|
@@ -13,6 +13,8 @@ import {
|
|
|
13
13
|
} from "../../skills/argus-skill-resolver"
|
|
14
14
|
import { parseFrontmatter, validateSkillFrontmatter } from "../../skills/skill-schema"
|
|
15
15
|
import { detectViaIr } from "../../tools/slither-tool"
|
|
16
|
+
import { DEFAULT_SOLODIT_PORT } from "../../tools/solodit-search-tool"
|
|
17
|
+
import { checkSoloditHealth } from "../../utils/solodit-health"
|
|
16
18
|
import { cliOutput } from "../cli-output"
|
|
17
19
|
import type { CliCommand } from "../types"
|
|
18
20
|
|
|
@@ -459,21 +461,13 @@ export const doctorCommand: CliCommand = {
|
|
|
459
461
|
|
|
460
462
|
const soloditEnabled = config?.solodit?.enabled !== false
|
|
461
463
|
if (soloditEnabled) {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
)
|
|
470
|
-
if (response.ok) {
|
|
471
|
-
cliOutput.log(`${GREEN}✓${RESET} Solodit API: reachable`)
|
|
472
|
-
} else {
|
|
473
|
-
cliOutput.log(`${YELLOW}⚠${RESET} Solodit API: returned ${response.status}`)
|
|
474
|
-
}
|
|
475
|
-
} catch {
|
|
476
|
-
cliOutput.log(`${YELLOW}⚠${RESET} Solodit API: unreachable`)
|
|
464
|
+
const port = config?.solodit?.port ?? DEFAULT_SOLODIT_PORT
|
|
465
|
+
const status = await checkSoloditHealth(port, true)
|
|
466
|
+
if (status.reachable) {
|
|
467
|
+
cliOutput.log(`${GREEN}✓${RESET} Solodit MCP: reachable on port ${port}`)
|
|
468
|
+
} else {
|
|
469
|
+
const suffix = status.error ? ` (${status.error})` : ""
|
|
470
|
+
cliOutput.log(`${YELLOW}⚠${RESET} Solodit MCP: unreachable on port ${port}${suffix}`)
|
|
477
471
|
}
|
|
478
472
|
} else {
|
|
479
473
|
cliOutput.log(`${YELLOW}⚠${RESET} Solodit: disabled in config`)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
export const DEFAULT_MODELS = {
|
|
2
2
|
argus: "anthropic/claude-opus-4-7",
|
|
3
|
-
sentinel: "anthropic/claude-sonnet-4-
|
|
4
|
-
pythia: "anthropic/claude-sonnet-4-
|
|
5
|
-
scribe: "anthropic/claude-sonnet-4-
|
|
3
|
+
sentinel: "anthropic/claude-sonnet-4-6",
|
|
4
|
+
pythia: "anthropic/claude-sonnet-4-6",
|
|
5
|
+
scribe: "anthropic/claude-sonnet-4-6",
|
|
6
6
|
themis: "openai/gpt-5.5",
|
|
7
7
|
} as const
|
|
8
8
|
|
package/src/create-hooks.ts
CHANGED
|
@@ -1092,11 +1092,13 @@ export function createHooks(args: {
|
|
|
1092
1092
|
)
|
|
1093
1093
|
}
|
|
1094
1094
|
|
|
1095
|
-
//
|
|
1096
|
-
//
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1095
|
+
// The report is materialized here, but finalization waits until
|
|
1096
|
+
// Argus records a resolved Themis disposition.
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
if (toolName === "argus_themis_disposition") {
|
|
1100
|
+
const state = getAuditState(input.sessionID)
|
|
1101
|
+
if (state?.reportGenerated) {
|
|
1100
1102
|
const runSink =
|
|
1101
1103
|
eventSinksByRunId.get(state.sessionId) ??
|
|
1102
1104
|
(input.sessionID
|
|
@@ -1120,12 +1122,12 @@ export function createHooks(args: {
|
|
|
1120
1122
|
)
|
|
1121
1123
|
if (!reportFinalization.invariantsPassed) {
|
|
1122
1124
|
logger.warn(
|
|
1123
|
-
`
|
|
1125
|
+
`Themis-disposition finalization for run ${state.sessionId} has invariant errors: ${reportFinalization.errors.join("; ")}`,
|
|
1124
1126
|
)
|
|
1125
1127
|
}
|
|
1126
1128
|
} catch (error) {
|
|
1127
1129
|
logger.warn(
|
|
1128
|
-
`
|
|
1130
|
+
`Themis-disposition finalization failed for run ${state.sessionId}: ${error instanceof Error ? error.message : String(error)}`,
|
|
1129
1131
|
)
|
|
1130
1132
|
}
|
|
1131
1133
|
}
|
package/src/create-tools.ts
CHANGED
|
@@ -15,6 +15,7 @@ import { reportGeneratorTool } from "./tools/report-generator-tool"
|
|
|
15
15
|
import { slitherTool } from "./tools/slither-tool"
|
|
16
16
|
import { createSoloditSearchTool } from "./tools/solodit-search-tool"
|
|
17
17
|
import { syncKnowledgeTool } from "./tools/sync-knowledge-tool"
|
|
18
|
+
import { themisDispositionTool } from "./tools/themis-disposition-tool"
|
|
18
19
|
|
|
19
20
|
export function createTools(config: ArgusConfig): Record<string, ToolDefinition> {
|
|
20
21
|
const tools: Record<string, ToolDefinition> = {
|
|
@@ -31,6 +32,7 @@ export function createTools(config: ArgusConfig): Record<string, ToolDefinition>
|
|
|
31
32
|
argus_read_findings: readFindingsTool,
|
|
32
33
|
argus_persist_deduped: persistDedupedTool,
|
|
33
34
|
argus_generate_report: reportGeneratorTool,
|
|
35
|
+
argus_themis_disposition: themisDispositionTool,
|
|
34
36
|
argus_sync_knowledge: syncKnowledgeTool,
|
|
35
37
|
}
|
|
36
38
|
|
|
@@ -1,23 +1,9 @@
|
|
|
1
1
|
import { PHASE_ORDER } from "../../shared/audit-phases"
|
|
2
|
+
import { computeMissingKeyTools } from "../../shared/key-tools"
|
|
2
3
|
import type { AuditPhase, AuditState } from "../../state/types"
|
|
3
4
|
|
|
4
5
|
const REPORTING_PHASES: AuditPhase[] = ["reporting", "complete"]
|
|
5
6
|
|
|
6
|
-
const KEY_TOOL_FAMILIES: Array<{ family: string; prefixes: string[] }> = [
|
|
7
|
-
{ family: "slither", prefixes: ["argus_slither_analyze", "slither"] },
|
|
8
|
-
{ family: "forge_test", prefixes: ["argus_forge_test", "forge_test"] },
|
|
9
|
-
{ family: "forge_fuzz", prefixes: ["argus_forge_fuzz", "forge_fuzz"] },
|
|
10
|
-
{ family: "forge_coverage", prefixes: ["argus_forge_coverage", "forge_coverage"] },
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
function getMissingToolFamilies(auditState: AuditState): string[] {
|
|
14
|
-
const executedTools = auditState.toolsExecuted.map((t) => t.tool)
|
|
15
|
-
return KEY_TOOL_FAMILIES.filter(
|
|
16
|
-
({ prefixes }) =>
|
|
17
|
-
!executedTools.some((tool) => prefixes.some((prefix) => tool.startsWith(prefix))),
|
|
18
|
-
).map(({ family }) => family)
|
|
19
|
-
}
|
|
20
|
-
|
|
21
7
|
function getNextPhase(current: AuditPhase): AuditPhase | null {
|
|
22
8
|
const idx = PHASE_ORDER.indexOf(current)
|
|
23
9
|
if (idx === -1 || idx >= PHASE_ORDER.length - 1) return null
|
|
@@ -39,7 +25,7 @@ export function createAuditEnforcer() {
|
|
|
39
25
|
]
|
|
40
26
|
|
|
41
27
|
if (REPORTING_PHASES.includes(auditState.currentPhase)) {
|
|
42
|
-
const missing =
|
|
28
|
+
const missing = computeMissingKeyTools(auditState.toolsExecuted, auditState.unavailableTools)
|
|
43
29
|
if (missing.length > 0) {
|
|
44
30
|
parts.push(
|
|
45
31
|
`\u26a0\ufe0f Tool coverage incomplete: ${missing.join(", ")} have not been executed. Do not proceed to report generation until required tools are complete.`,
|
|
@@ -131,6 +131,79 @@ function collectReportQualityGateErrors(events: AuditEvent[]): string[] {
|
|
|
131
131
|
return errors
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
+
type ThemisVerdict = {
|
|
135
|
+
approved?: unknown
|
|
136
|
+
pipeline_issues?: unknown
|
|
137
|
+
false_positives?: unknown
|
|
138
|
+
missed_findings?: unknown
|
|
139
|
+
severity_adjustments?: unknown
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
type ThemisDisposition = {
|
|
143
|
+
status?: unknown
|
|
144
|
+
verdict?: ThemisVerdict
|
|
145
|
+
notes?: unknown
|
|
146
|
+
justification?: unknown
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function hasText(value: unknown): value is string {
|
|
150
|
+
return typeof value === "string" && value.trim().length > 0
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function isResolvedThemisDisposition(value: unknown): boolean {
|
|
154
|
+
const disposition = asRecord(value) as ThemisDisposition | null
|
|
155
|
+
if (disposition?.status === "approved") {
|
|
156
|
+
return disposition.verdict?.approved === true
|
|
157
|
+
}
|
|
158
|
+
if (disposition?.status === "remediated") {
|
|
159
|
+
return disposition.verdict?.approved === false && hasText(disposition.notes)
|
|
160
|
+
}
|
|
161
|
+
if (disposition?.status === "overridden") {
|
|
162
|
+
return disposition.verdict?.approved === false && hasText(disposition.justification)
|
|
163
|
+
}
|
|
164
|
+
return false
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function hasRejectedThemisVerdict(value: unknown): boolean {
|
|
168
|
+
const verdict = asRecord(value) as ThemisVerdict | null
|
|
169
|
+
return verdict?.approved === false
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function collectThemisDispositionErrors(events: AuditEvent[]): string[] {
|
|
173
|
+
let reportIndex = -1
|
|
174
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
175
|
+
const event = events[index]
|
|
176
|
+
if (event && isGenerateReportCompletion(event)) {
|
|
177
|
+
reportIndex = index
|
|
178
|
+
break
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
if (reportIndex === -1) return []
|
|
182
|
+
|
|
183
|
+
const laterEvents = events.slice(reportIndex + 1)
|
|
184
|
+
const hasResolvedDisposition = laterEvents.some((event) => {
|
|
185
|
+
if (event.type !== "tool.completed") return false
|
|
186
|
+
const payload = asRecord(event.payload)
|
|
187
|
+
return isResolvedThemisDisposition(payload?.themisDisposition)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
if (hasResolvedDisposition) return []
|
|
191
|
+
|
|
192
|
+
const hasUnresolvedRejection = laterEvents.some((event) => {
|
|
193
|
+
if (event.type !== "tool.completed") return false
|
|
194
|
+
const payload = asRecord(event.payload)
|
|
195
|
+
return (
|
|
196
|
+
payload?.tool === "task" &&
|
|
197
|
+
payload.subagent_type === "themis" &&
|
|
198
|
+
hasRejectedThemisVerdict(payload.themis)
|
|
199
|
+
)
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
return hasUnresolvedRejection
|
|
203
|
+
? ["generated report has unresolved Themis issues"]
|
|
204
|
+
: ["generated report has no resolved Themis disposition"]
|
|
205
|
+
}
|
|
206
|
+
|
|
134
207
|
function collectParentChildIntegrityErrors(events: AuditEvent[]): string[] {
|
|
135
208
|
const errors: string[] = []
|
|
136
209
|
const parentByChild = new Map<string, string>()
|
|
@@ -244,7 +317,7 @@ function collectInvariantErrors(events: AuditEvent[]): { errors: string[]; warni
|
|
|
244
317
|
|
|
245
318
|
warnings.push(...collectOrphanedToolStarts(events))
|
|
246
319
|
errors.push(...collectParentChildIntegrityErrors(events))
|
|
247
|
-
|
|
320
|
+
warnings.push(...collectMultiSessionErrors(events))
|
|
248
321
|
return { errors, warnings }
|
|
249
322
|
}
|
|
250
323
|
|
|
@@ -308,6 +381,7 @@ export async function finalizeRun(
|
|
|
308
381
|
const reportErrors = [
|
|
309
382
|
...(await collectReportCompletenessErrors(events)),
|
|
310
383
|
...collectReportQualityGateErrors(events),
|
|
384
|
+
...collectThemisDispositionErrors(events),
|
|
311
385
|
]
|
|
312
386
|
if (reportErrors.length === 0) {
|
|
313
387
|
return {
|
|
@@ -324,6 +398,7 @@ export async function finalizeRun(
|
|
|
324
398
|
const { errors, warnings } = collectInvariantErrors(events)
|
|
325
399
|
errors.push(...(await collectReportCompletenessErrors(events)))
|
|
326
400
|
errors.push(...collectReportQualityGateErrors(events))
|
|
401
|
+
errors.push(...collectThemisDispositionErrors(events))
|
|
327
402
|
const invariantsPassed = errors.length === 0
|
|
328
403
|
const sessionId = events.at(-1)?.session_id ?? ""
|
|
329
404
|
|
|
@@ -426,6 +426,21 @@ function processFuzzResult(parsed: Record<string, unknown>, state: AuditState):
|
|
|
426
426
|
}
|
|
427
427
|
}
|
|
428
428
|
|
|
429
|
+
function countReadFindingsResult(parsed: Record<string, unknown>): number {
|
|
430
|
+
const summary = toRecord(parsed.summary)
|
|
431
|
+
if (
|
|
432
|
+
summary &&
|
|
433
|
+
typeof summary.findingsCount === "number" &&
|
|
434
|
+
Number.isFinite(summary.findingsCount)
|
|
435
|
+
) {
|
|
436
|
+
return Math.max(0, summary.findingsCount)
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const reportInput = toRecord(parsed.reportInput)
|
|
440
|
+
const findings = reportInput?.findings
|
|
441
|
+
return Array.isArray(findings) ? findings.length : 0
|
|
442
|
+
}
|
|
443
|
+
|
|
429
444
|
function processSoloditResult(parsed: Record<string, unknown>, state: AuditState): void {
|
|
430
445
|
const query = typeof parsed.query === "string" ? parsed.query : ""
|
|
431
446
|
const results = Array.isArray(parsed.results) ? parsed.results : []
|
|
@@ -709,6 +724,7 @@ export function createToolTrackingHook(
|
|
|
709
724
|
let findingsCount = 0
|
|
710
725
|
let completedSuccess = false
|
|
711
726
|
let completionError: string | undefined
|
|
727
|
+
let completedRecord: Record<string, unknown> | null = null
|
|
712
728
|
|
|
713
729
|
try {
|
|
714
730
|
if (input.tool === "argus_skill_load") {
|
|
@@ -763,6 +779,7 @@ export function createToolTrackingHook(
|
|
|
763
779
|
}
|
|
764
780
|
return
|
|
765
781
|
}
|
|
782
|
+
completedRecord = record
|
|
766
783
|
|
|
767
784
|
switch (input.tool) {
|
|
768
785
|
case "argus_slither_analyze": {
|
|
@@ -812,6 +829,9 @@ export function createToolTrackingHook(
|
|
|
812
829
|
projectDir,
|
|
813
830
|
)
|
|
814
831
|
break
|
|
832
|
+
case "argus_read_findings":
|
|
833
|
+
findingsCount = countReadFindingsResult(record)
|
|
834
|
+
break
|
|
815
835
|
case "argus_analyze_contract": {
|
|
816
836
|
processContractAnalyzerResult(record, auditState)
|
|
817
837
|
const filePath = (input.args as Record<string, unknown>)?.file_path as string
|
|
@@ -996,6 +1016,11 @@ export function createToolTrackingHook(
|
|
|
996
1016
|
case "argus_check_patterns":
|
|
997
1017
|
if (auditState.patternVersion) enrichment.patternVersion = auditState.patternVersion
|
|
998
1018
|
break
|
|
1019
|
+
case "argus_themis_disposition":
|
|
1020
|
+
if (completedRecord?.themisDisposition) {
|
|
1021
|
+
enrichment.themisDisposition = completedRecord.themisDisposition
|
|
1022
|
+
}
|
|
1023
|
+
break
|
|
999
1024
|
}
|
|
1000
1025
|
}
|
|
1001
1026
|
await emitToSink(
|
package/src/shared/key-tools.ts
CHANGED
|
@@ -23,15 +23,22 @@ export const UNAVAILABLE_TO_KEY_TOOL: Record<string, string> = {
|
|
|
23
23
|
solodit: "solodit",
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
+
type ToolCoverageRecord = {
|
|
27
|
+
tool: string
|
|
28
|
+
success?: boolean
|
|
29
|
+
}
|
|
30
|
+
|
|
26
31
|
/**
|
|
27
32
|
* Compute which key tools have not yet been executed, excusing any that are
|
|
28
33
|
* declared unavailable.
|
|
29
34
|
*/
|
|
30
35
|
export function computeMissingKeyTools(
|
|
31
|
-
toolsExecuted:
|
|
36
|
+
toolsExecuted: ToolCoverageRecord[],
|
|
32
37
|
unavailableTools?: string[],
|
|
33
38
|
): string[] {
|
|
34
|
-
const executedShortNames = new Set(
|
|
39
|
+
const executedShortNames = new Set(
|
|
40
|
+
toolsExecuted.filter((t) => t.success === true).map((t) => TOOL_SHORT_NAMES[t.tool] ?? t.tool),
|
|
41
|
+
)
|
|
35
42
|
const excused = new Set(
|
|
36
43
|
(unavailableTools ?? []).map((t) => UNAVAILABLE_TO_KEY_TOOL[t]).filter(Boolean),
|
|
37
44
|
)
|
package/src/state/adapters.ts
CHANGED
|
@@ -62,6 +62,13 @@ const KNOWN_INPUT_FIELDS = new Set([
|
|
|
62
62
|
"observationId",
|
|
63
63
|
"observationFingerprint",
|
|
64
64
|
"issueFingerprint",
|
|
65
|
+
"observation_ids",
|
|
66
|
+
"observationIds",
|
|
67
|
+
"observation_count",
|
|
68
|
+
"observationCount",
|
|
69
|
+
"reported_by_agents",
|
|
70
|
+
"reportedByAgents",
|
|
71
|
+
"sources",
|
|
65
72
|
"elements",
|
|
66
73
|
"location",
|
|
67
74
|
])
|
|
@@ -157,6 +164,20 @@ function pushValidationDiagnostics(errors: ValidationError[]): Diagnostic[] {
|
|
|
157
164
|
}))
|
|
158
165
|
}
|
|
159
166
|
|
|
167
|
+
function normalizeStringArray(value: unknown): string[] | undefined {
|
|
168
|
+
if (!Array.isArray(value)) return undefined
|
|
169
|
+
const strings = value.filter(
|
|
170
|
+
(item): item is string => typeof item === "string" && item.length > 0,
|
|
171
|
+
)
|
|
172
|
+
return strings.length > 0
|
|
173
|
+
? Array.from(new Set(strings)).sort((a, b) => a.localeCompare(b))
|
|
174
|
+
: undefined
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function normalizePositiveInteger(value: unknown): number | undefined {
|
|
178
|
+
return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined
|
|
179
|
+
}
|
|
180
|
+
|
|
160
181
|
export function normalizeToCanonicalFinding(
|
|
161
182
|
raw: Finding | Record<string, unknown>,
|
|
162
183
|
runId: string,
|
|
@@ -288,6 +309,16 @@ export function normalizeToCanonicalFinding(
|
|
|
288
309
|
observationId,
|
|
289
310
|
})
|
|
290
311
|
|
|
312
|
+
const observationIds =
|
|
313
|
+
normalizeStringArray(input.observation_ids) ?? normalizeStringArray(input.observationIds)
|
|
314
|
+
const reportedByAgents =
|
|
315
|
+
normalizeStringArray(input.reported_by_agents) ?? normalizeStringArray(input.reportedByAgents)
|
|
316
|
+
const sources = normalizeStringArray(input.sources)
|
|
317
|
+
const observationCount =
|
|
318
|
+
normalizePositiveInteger(input.observation_count) ??
|
|
319
|
+
normalizePositiveInteger(input.observationCount) ??
|
|
320
|
+
observationIds?.length
|
|
321
|
+
|
|
291
322
|
const canonical: CanonicalFinding = {
|
|
292
323
|
id: observationId,
|
|
293
324
|
check,
|
|
@@ -302,6 +333,10 @@ export function normalizeToCanonicalFinding(
|
|
|
302
333
|
issue_fingerprint: issueFingerprint,
|
|
303
334
|
observation_fingerprint: observationFingerprint,
|
|
304
335
|
observation_id: observationId,
|
|
336
|
+
observation_ids: observationIds,
|
|
337
|
+
observation_count: observationCount,
|
|
338
|
+
reported_by_agents: reportedByAgents,
|
|
339
|
+
sources,
|
|
305
340
|
impact: typeof input.impact === "string" && input.impact.length > 0 ? input.impact : undefined,
|
|
306
341
|
recommendation:
|
|
307
342
|
typeof input.recommendation === "string" && input.recommendation.length > 0
|
|
@@ -5,10 +5,14 @@ import { resolveProjectDir } from "../shared/project-utils"
|
|
|
5
5
|
|
|
6
6
|
type ForgeCoverageArgs = {
|
|
7
7
|
target?: string
|
|
8
|
+
match_path?: string
|
|
9
|
+
ir_minimum?: boolean
|
|
8
10
|
}
|
|
9
11
|
|
|
10
12
|
type NormalizedForgeCoverageArgs = {
|
|
11
13
|
target: string
|
|
14
|
+
match_path?: string
|
|
15
|
+
ir_minimum: boolean
|
|
12
16
|
}
|
|
13
17
|
|
|
14
18
|
type ForgeCoverageFile = {
|
|
@@ -53,9 +57,22 @@ const EMPTY_SUMMARY: ForgeCoverageSummary = {
|
|
|
53
57
|
function normalizeArgs(args: ForgeCoverageArgs, context: ToolContext): NormalizedForgeCoverageArgs {
|
|
54
58
|
return {
|
|
55
59
|
target: args.target ?? resolveProjectDir(context),
|
|
60
|
+
match_path: args.match_path,
|
|
61
|
+
ir_minimum: args.ir_minimum ?? false,
|
|
56
62
|
}
|
|
57
63
|
}
|
|
58
64
|
|
|
65
|
+
function buildCoverageCommand(args: NormalizedForgeCoverageArgs, forceIrMinimum = false): string[] {
|
|
66
|
+
const command = ["forge", "coverage", "--report", "summary"]
|
|
67
|
+
if (args.match_path) command.push("--match-path", args.match_path)
|
|
68
|
+
if (args.ir_minimum || forceIrMinimum) command.push("--ir-minimum")
|
|
69
|
+
return command
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function isStackTooDeep(stderr: string): boolean {
|
|
73
|
+
return /stack too deep/i.test(stderr)
|
|
74
|
+
}
|
|
75
|
+
|
|
59
76
|
function parsePercent(input: string): number {
|
|
60
77
|
const match = input.match(/(\d+(?:\.\d+)?)%/)
|
|
61
78
|
if (!match?.[1]) {
|
|
@@ -156,11 +173,22 @@ export async function executeForgeCoverage(
|
|
|
156
173
|
})
|
|
157
174
|
|
|
158
175
|
try {
|
|
159
|
-
|
|
176
|
+
let runResult = await runCommand(buildCoverageCommand(normalizedArgs), {
|
|
160
177
|
signal: context.abort,
|
|
161
178
|
cwd: normalizedArgs.target,
|
|
162
179
|
})
|
|
163
180
|
|
|
181
|
+
if (
|
|
182
|
+
runResult.exitCode !== 0 &&
|
|
183
|
+
!normalizedArgs.ir_minimum &&
|
|
184
|
+
isStackTooDeep(runResult.stderr)
|
|
185
|
+
) {
|
|
186
|
+
runResult = await runCommand(buildCoverageCommand(normalizedArgs, true), {
|
|
187
|
+
signal: context.abort,
|
|
188
|
+
cwd: normalizedArgs.target,
|
|
189
|
+
})
|
|
190
|
+
}
|
|
191
|
+
|
|
164
192
|
if (runResult.exitCode !== 0) {
|
|
165
193
|
return fail(
|
|
166
194
|
runResult.stderr.trim() || `forge coverage exited with code ${runResult.exitCode}`,
|
|
@@ -193,6 +221,8 @@ export const forgeCoverageTool = tool({
|
|
|
193
221
|
"Run forge coverage analysis and return structured per-file coverage metrics (lines, statements, branches, functions).",
|
|
194
222
|
args: {
|
|
195
223
|
target: tool.schema.string().optional(),
|
|
224
|
+
match_path: tool.schema.string().optional(),
|
|
225
|
+
ir_minimum: tool.schema.boolean().optional(),
|
|
196
226
|
},
|
|
197
227
|
async execute(args, context) {
|
|
198
228
|
const result = await executeForgeCoverage(args, context)
|
|
@@ -85,7 +85,7 @@ export const persistDedupedTool = tool({
|
|
|
85
85
|
deduped_findings: tool.schema
|
|
86
86
|
.string()
|
|
87
87
|
.describe(
|
|
88
|
-
"Serialized JSON array of deduplicated and enriched findings. Each finding should have: check, severity, confidence, description, file, lines, source, impact, recommendation, proofOfConcept.",
|
|
88
|
+
"Serialized JSON array of deduplicated and enriched findings. Each finding should have: check, severity, confidence, description, file, lines, source, impact, recommendation, proofOfConcept, and observation_ids lineage proving which raw findings were merged.",
|
|
89
89
|
),
|
|
90
90
|
},
|
|
91
91
|
async execute(args, context) {
|
|
@@ -746,6 +746,22 @@ function formatLocation(finding: Finding): string {
|
|
|
746
746
|
return `${finding.file}:${finding.lines[0]}-${finding.lines[1]}`
|
|
747
747
|
}
|
|
748
748
|
|
|
749
|
+
function sourceExcerpt(projectDir: string, finding: Finding): string | null {
|
|
750
|
+
if (!finding.file || !Array.isArray(finding.lines) || finding.lines.length < 2) return null
|
|
751
|
+
const start = finding.lines[0]
|
|
752
|
+
const end = finding.lines[1]
|
|
753
|
+
if (!Number.isInteger(start) || !Number.isInteger(end) || start <= 0 || end < start) {
|
|
754
|
+
return null
|
|
755
|
+
}
|
|
756
|
+
const absolutePath = path.isAbsolute(finding.file)
|
|
757
|
+
? finding.file
|
|
758
|
+
: path.join(projectDir, finding.file)
|
|
759
|
+
if (!existsSync(absolutePath) || !statSync(absolutePath).isFile()) return null
|
|
760
|
+
const contents = readFileSync(absolutePath, "utf-8").split(/\r?\n/)
|
|
761
|
+
const excerpt = contents.slice(start - 1, end).join("\n")
|
|
762
|
+
return excerpt.trim().length > 0 ? excerpt : null
|
|
763
|
+
}
|
|
764
|
+
|
|
749
765
|
function shouldIncludeFinding(finding: Finding, threshold: SeverityThreshold): boolean {
|
|
750
766
|
return FINDING_WEIGHT[finding.severity] >= THRESHOLD_WEIGHT[threshold]
|
|
751
767
|
}
|
|
@@ -860,6 +876,31 @@ function hasDedupLineage(findings: Finding[]): boolean {
|
|
|
860
876
|
})
|
|
861
877
|
}
|
|
862
878
|
|
|
879
|
+
function observationIdsForFinding(finding: Finding): string[] {
|
|
880
|
+
const observationIds = (finding as { observation_ids?: unknown }).observation_ids
|
|
881
|
+
if (Array.isArray(observationIds)) {
|
|
882
|
+
return observationIds.filter((id): id is string => typeof id === "string" && id.length > 0)
|
|
883
|
+
}
|
|
884
|
+
return typeof finding.observation_id === "string" && finding.observation_id.length > 0
|
|
885
|
+
? [finding.observation_id]
|
|
886
|
+
: []
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
function compareObservationLineage(
|
|
890
|
+
eventFindings: Finding[],
|
|
891
|
+
reportFindings: Finding[],
|
|
892
|
+
): { missing: string[]; extra: string[]; matches: boolean } {
|
|
893
|
+
const expected = new Set(eventFindings.flatMap(observationIdsForFinding))
|
|
894
|
+
const actual = new Set(reportFindings.flatMap(observationIdsForFinding))
|
|
895
|
+
const missing = Array.from(expected)
|
|
896
|
+
.filter((id) => !actual.has(id))
|
|
897
|
+
.sort((a, b) => a.localeCompare(b))
|
|
898
|
+
const extra = Array.from(actual)
|
|
899
|
+
.filter((id) => !expected.has(id))
|
|
900
|
+
.sort((a, b) => a.localeCompare(b))
|
|
901
|
+
return { missing, extra, matches: missing.length === 0 && extra.length === 0 }
|
|
902
|
+
}
|
|
903
|
+
|
|
863
904
|
export function validateReportQuality(
|
|
864
905
|
findings: Finding[],
|
|
865
906
|
policy: QualityGatePolicy,
|
|
@@ -980,7 +1021,7 @@ function buildRecommendations(counts: FindingsCount): string[] {
|
|
|
980
1021
|
return items
|
|
981
1022
|
}
|
|
982
1023
|
|
|
983
|
-
function buildFindingsSection(findings: Finding[]): string {
|
|
1024
|
+
function buildFindingsSection(findings: Finding[], projectDir: string): string {
|
|
984
1025
|
if (findings.length === 0) {
|
|
985
1026
|
return "## Findings\nNo findings meet the configured severity threshold."
|
|
986
1027
|
}
|
|
@@ -1006,6 +1047,15 @@ function buildFindingsSection(findings: Finding[]): string {
|
|
|
1006
1047
|
lines.push(`**Severity**: ${finding.severity}`)
|
|
1007
1048
|
lines.push(`**Confidence**: ${finding.confidence}`)
|
|
1008
1049
|
lines.push(`**Location**: ${formatLocation(finding)}`)
|
|
1050
|
+
const excerpt = sourceExcerpt(projectDir, finding)
|
|
1051
|
+
if (excerpt) {
|
|
1052
|
+
lines.push("")
|
|
1053
|
+
lines.push("**Source Excerpt**:")
|
|
1054
|
+
lines.push("")
|
|
1055
|
+
lines.push("```solidity")
|
|
1056
|
+
lines.push(excerpt)
|
|
1057
|
+
lines.push("```")
|
|
1058
|
+
}
|
|
1009
1059
|
lines.push("")
|
|
1010
1060
|
lines.push(`**Description**: ${finding.description}`)
|
|
1011
1061
|
lines.push("")
|
|
@@ -1235,7 +1285,9 @@ export async function executeReportGeneration(
|
|
|
1235
1285
|
const hasLineage = hasDedupLineage(reportInput.findings)
|
|
1236
1286
|
const shouldCheckParity = eventFindings.length === inputFindings.length || hasLineage
|
|
1237
1287
|
const parity = shouldCheckParity
|
|
1238
|
-
?
|
|
1288
|
+
? hasLineage
|
|
1289
|
+
? compareObservationLineage(projectFindings(events), reportInput.findings)
|
|
1290
|
+
: compareIssueFingerprintSets(eventFindings, inputFindings)
|
|
1239
1291
|
: { missing: [], extra: [], matches: true }
|
|
1240
1292
|
|
|
1241
1293
|
if (!shouldCheckParity) {
|
|
@@ -1260,11 +1312,12 @@ export async function executeReportGeneration(
|
|
|
1260
1312
|
}
|
|
1261
1313
|
|
|
1262
1314
|
warningBullets.push(`- Finding parity mismatch: ${mismatchSummary}`)
|
|
1315
|
+
const parityLabel = hasLineage ? "observation IDs" : "issue fingerprints"
|
|
1263
1316
|
if (parity.missing.length > 0) {
|
|
1264
|
-
warningBullets.push(`- Missing
|
|
1317
|
+
warningBullets.push(`- Missing ${parityLabel}: ${parity.missing.join(", ")}`)
|
|
1265
1318
|
}
|
|
1266
1319
|
if (parity.extra.length > 0) {
|
|
1267
|
-
warningBullets.push(`- Extra
|
|
1320
|
+
warningBullets.push(`- Extra ${parityLabel}: ${parity.extra.join(", ")}`)
|
|
1268
1321
|
}
|
|
1269
1322
|
}
|
|
1270
1323
|
} catch (err) {
|
|
@@ -1359,7 +1412,7 @@ export async function executeReportGeneration(
|
|
|
1359
1412
|
"Approach: Findings are normalized, deterministically ordered by severity/file/line, and validated against report quality gates before emission.",
|
|
1360
1413
|
)
|
|
1361
1414
|
|
|
1362
|
-
sections.push(buildFindingsSection(findings))
|
|
1415
|
+
sections.push(buildFindingsSection(findings, reportInput.projectDir))
|
|
1363
1416
|
|
|
1364
1417
|
sections.push("## Recommendations")
|
|
1365
1418
|
for (const item of buildRecommendations(counts)) {
|
|
@@ -470,26 +470,6 @@ export async function executeSlitherAnalyze(
|
|
|
470
470
|
}
|
|
471
471
|
}
|
|
472
472
|
|
|
473
|
-
if (args.via_ir) {
|
|
474
|
-
const fallbackResult = await flattenFallback(args, context, {
|
|
475
|
-
...getDefaultFlattenDeps(),
|
|
476
|
-
runCommand,
|
|
477
|
-
cwd: projectDir,
|
|
478
|
-
})
|
|
479
|
-
if (fallbackResult) return fallbackResult
|
|
480
|
-
return {
|
|
481
|
-
success: false,
|
|
482
|
-
findingsCount: 0,
|
|
483
|
-
findings: [],
|
|
484
|
-
executionTime: Date.now() - startedAt,
|
|
485
|
-
errors: [
|
|
486
|
-
"via_ir enabled — flatten fallback failed. Ensure forge and solc-select are installed.",
|
|
487
|
-
],
|
|
488
|
-
error:
|
|
489
|
-
"Project uses via_ir which is incompatible with Slither direct analysis. Flatten fallback also failed.",
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
|
|
493
473
|
const command = buildCommand(args)
|
|
494
474
|
|
|
495
475
|
try {
|
|
@@ -508,7 +488,7 @@ export async function executeSlitherAnalyze(
|
|
|
508
488
|
payload = JSON.parse(runResult.stdout) as SlitherPayload
|
|
509
489
|
} catch (error) {
|
|
510
490
|
const message = error instanceof Error ? error.message : "Unknown parse error"
|
|
511
|
-
if (shouldTryFlattenFallback(errors, runResult.stderr)) {
|
|
491
|
+
if (args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr)) {
|
|
512
492
|
const fallbackResult = await flattenFallback(args, context, {
|
|
513
493
|
...getDefaultFlattenDeps(),
|
|
514
494
|
runCommand,
|
|
@@ -533,7 +513,11 @@ export async function executeSlitherAnalyze(
|
|
|
533
513
|
const findings = parseFindings(payload)
|
|
534
514
|
const success = findings.length > 0 || (runResult.exitCode === 0 && payload.success !== false)
|
|
535
515
|
|
|
536
|
-
if (
|
|
516
|
+
if (
|
|
517
|
+
!success &&
|
|
518
|
+
findings.length === 0 &&
|
|
519
|
+
(args.via_ir || shouldTryFlattenFallback(errors, runResult.stderr))
|
|
520
|
+
) {
|
|
537
521
|
const fallbackResult = await flattenFallback(args, context, {
|
|
538
522
|
...getDefaultFlattenDeps(),
|
|
539
523
|
runCommand,
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { type ToolContext, tool } from "@opencode-ai/plugin"
|
|
2
|
+
|
|
3
|
+
type ThemisDispositionStatus = "approved" | "remediated" | "overridden"
|
|
4
|
+
|
|
5
|
+
type ThemisDispositionArgs = {
|
|
6
|
+
status: ThemisDispositionStatus
|
|
7
|
+
verdict_json: string
|
|
8
|
+
notes?: string
|
|
9
|
+
justification?: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function parseVerdict(verdictJson: string): unknown {
|
|
13
|
+
try {
|
|
14
|
+
return JSON.parse(verdictJson)
|
|
15
|
+
} catch (error) {
|
|
16
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
17
|
+
throw new Error(`Invalid Themis verdict JSON: ${message}`)
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function executeThemisDisposition(args: ThemisDispositionArgs, context: ToolContext) {
|
|
22
|
+
context.metadata({ title: `Themis disposition: ${args.status}` })
|
|
23
|
+
return {
|
|
24
|
+
success: true,
|
|
25
|
+
themisDisposition: {
|
|
26
|
+
status: args.status,
|
|
27
|
+
verdict: parseVerdict(args.verdict_json),
|
|
28
|
+
...(args.notes ? { notes: args.notes } : {}),
|
|
29
|
+
...(args.justification ? { justification: args.justification } : {}),
|
|
30
|
+
},
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export const themisDispositionTool = tool({
|
|
35
|
+
description:
|
|
36
|
+
"Record Argus' resolved disposition for a Themis quality-gate verdict: approved, remediated, or overridden.",
|
|
37
|
+
args: {
|
|
38
|
+
status: tool.schema.enum(["approved", "remediated", "overridden"]),
|
|
39
|
+
verdict_json: tool.schema.string(),
|
|
40
|
+
notes: tool.schema.string().optional(),
|
|
41
|
+
justification: tool.schema.string().optional(),
|
|
42
|
+
},
|
|
43
|
+
async execute(args, context) {
|
|
44
|
+
return JSON.stringify(executeThemisDisposition(args, context))
|
|
45
|
+
},
|
|
46
|
+
})
|