solidity-argus 0.3.6 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/AGENTS.md +13 -6
  2. package/README.md +24 -12
  3. package/package.json +7 -3
  4. package/skills/checklists/cyfrin-best-practices-runtime/SKILL.md +1 -0
  5. package/skills/checklists/cyfrin-best-practices-upgrades/SKILL.md +1 -0
  6. package/skills/checklists/cyfrin-defi-core/SKILL.md +1 -0
  7. package/skills/checklists/cyfrin-defi-integrations/SKILL.md +1 -0
  8. package/skills/checklists/cyfrin-gas/SKILL.md +1 -0
  9. package/skills/checklists/general-audit/SKILL.md +1 -0
  10. package/skills/methodology/audit-workflow/SKILL.md +1 -0
  11. package/skills/methodology/report-template/SKILL.md +1 -0
  12. package/skills/methodology/severity-classification/SKILL.md +1 -0
  13. package/skills/protocol-patterns/amm-dex/SKILL.md +1 -0
  14. package/skills/protocol-patterns/bridges-cross-chain/SKILL.md +1 -0
  15. package/skills/protocol-patterns/dao-governance/SKILL.md +1 -0
  16. package/skills/protocol-patterns/lending-borrowing/SKILL.md +1 -0
  17. package/skills/protocol-patterns/staking-vesting/SKILL.md +1 -0
  18. package/skills/vulnerability-patterns/flash-loan-attacks/SKILL.md +0 -50
  19. package/skills/vulnerability-patterns/oracle-manipulation/SKILL.md +0 -63
  20. package/src/agents/argus-prompt.ts +98 -33
  21. package/src/agents/pythia-prompt.ts +18 -1
  22. package/src/agents/scribe-prompt.ts +32 -10
  23. package/src/agents/sentinel-prompt.ts +19 -0
  24. package/src/agents/themis-prompt.ts +110 -0
  25. package/src/cli/commands/doctor.ts +29 -17
  26. package/src/config/loader.ts +29 -5
  27. package/src/config/schema.ts +45 -45
  28. package/src/constants/defaults.ts +1 -0
  29. package/src/create-hooks.ts +851 -142
  30. package/src/create-managers.ts +4 -2
  31. package/src/create-tools.ts +5 -1
  32. package/src/features/audit-enforcer/audit-enforcer.ts +1 -11
  33. package/src/features/background-agent/background-manager.ts +32 -5
  34. package/src/features/error-recovery/tool-error-recovery.ts +1 -0
  35. package/src/features/persistent-state/audit-state-manager.ts +272 -29
  36. package/src/features/persistent-state/event-sink.ts +96 -25
  37. package/src/features/persistent-state/findings-materializer.ts +57 -3
  38. package/src/features/persistent-state/global-run-index.ts +86 -8
  39. package/src/features/persistent-state/index.ts +7 -1
  40. package/src/features/persistent-state/run-finalizer.ts +116 -7
  41. package/src/features/persistent-state/run-pruner.ts +93 -0
  42. package/src/hooks/agent-tracker.ts +14 -2
  43. package/src/hooks/compaction-hook.ts +7 -16
  44. package/src/hooks/config-handler.ts +83 -29
  45. package/src/hooks/context-budget.ts +4 -5
  46. package/src/hooks/event-hook.ts +213 -57
  47. package/src/hooks/knowledge-sync-hook.ts +2 -3
  48. package/src/hooks/safe-create-hook.ts +13 -1
  49. package/src/hooks/system-prompt-hook.ts +20 -39
  50. package/src/hooks/tool-tracking-hook.ts +606 -326
  51. package/src/index.ts +15 -1
  52. package/src/knowledge/scvd-client.ts +2 -4
  53. package/src/knowledge/scvd-errors.ts +25 -2
  54. package/src/knowledge/scvd-index.ts +7 -5
  55. package/src/knowledge/scvd-sync.ts +6 -6
  56. package/src/managers/types.ts +20 -2
  57. package/src/shared/agent-names.ts +23 -0
  58. package/src/shared/audit-artifact-resolver.ts +8 -3
  59. package/src/shared/audit-phases.ts +12 -0
  60. package/src/shared/cache-paths.ts +41 -0
  61. package/src/shared/drop-diagnostics.ts +2 -2
  62. package/src/shared/forge-errors.ts +31 -0
  63. package/src/shared/forge-runner.ts +30 -0
  64. package/src/shared/format-error.ts +3 -0
  65. package/src/shared/index.ts +9 -0
  66. package/src/shared/key-tools.ts +39 -0
  67. package/src/shared/logger.ts +7 -7
  68. package/src/shared/path-containment.ts +25 -0
  69. package/src/shared/path-utils.ts +11 -0
  70. package/src/shared/report-path-resolver.ts +4 -2
  71. package/src/shared/safe-emit.ts +24 -0
  72. package/src/shared/token-utils.ts +5 -0
  73. package/src/shared/type-guards.ts +8 -0
  74. package/src/shared/validation-constants.ts +52 -0
  75. package/src/skills/analysis/cluster.ts +1 -114
  76. package/src/skills/analysis/normalize.ts +2 -114
  77. package/src/skills/analysis/stopwords.ts +109 -0
  78. package/src/skills/argus-skill-resolver.ts +6 -3
  79. package/src/solodit-lifecycle.ts +153 -37
  80. package/src/state/adapters.ts +60 -66
  81. package/src/state/finding-aggregation.ts +6 -8
  82. package/src/state/finding-fingerprint.ts +1 -1
  83. package/src/state/finding-store.ts +31 -9
  84. package/src/state/index.ts +1 -1
  85. package/src/state/projectors.ts +27 -19
  86. package/src/state/schemas.ts +8 -32
  87. package/src/state/types.ts +3 -0
  88. package/src/tools/contract-analyzer-tool.ts +4 -6
  89. package/src/tools/forge-coverage-tool.ts +10 -35
  90. package/src/tools/forge-fuzz-tool.ts +21 -51
  91. package/src/tools/forge-test-tool.ts +25 -47
  92. package/src/tools/gas-analysis-tool.ts +12 -41
  93. package/src/tools/pattern-checker-tool.ts +37 -15
  94. package/src/tools/pattern-loader.ts +18 -4
  95. package/src/tools/persist-deduped-tool.ts +94 -0
  96. package/src/tools/proxy-detection-tool.ts +35 -34
  97. package/src/tools/read-findings-tool.ts +390 -0
  98. package/src/tools/record-finding-tool.ts +120 -25
  99. package/src/tools/report-generator-tool.ts +396 -328
  100. package/src/tools/report-preflight.ts +5 -1
  101. package/src/tools/slither-tool.ts +55 -16
  102. package/src/tools/solodit-search-tool.ts +260 -112
  103. package/src/tools/sync-knowledge-tool.ts +2 -3
  104. package/src/utils/solidity-parser.ts +39 -24
  105. package/src/features/migration/index.ts +0 -14
  106. package/src/features/migration/migration-adapter.ts +0 -151
  107. package/src/features/migration/parity-telemetry.ts +0 -133
@@ -1,5 +1,7 @@
1
1
  import { randomUUID } from "node:crypto"
2
2
  import type { EventSink } from "../features/persistent-state/event-sink"
3
+ import { isArgusFamily } from "../shared/agent-names"
4
+ import { PHASE_ORDER } from "../shared/audit-phases"
3
5
  import type {
4
6
  DropDiagnostic,
5
7
  DropDiagnosticsCollector,
@@ -7,6 +9,8 @@ import type {
7
9
  } from "../shared/drop-diagnostics"
8
10
  import { createDropDiagnosticsCollector } from "../shared/drop-diagnostics"
9
11
  import { createLogger } from "../shared/logger"
12
+ import { normalizeFilePath } from "../shared/path-utils"
13
+ import { safeEmitToSink } from "../shared/safe-emit"
10
14
  import { normalizeToCanonicalFinding } from "../state/adapters"
11
15
  import type { FindingStore } from "../state/finding-store"
12
16
  import { createFindingStore } from "../state/finding-store"
@@ -27,19 +31,27 @@ type ToolHookInput = {
27
31
  tool: string
28
32
  args: unknown
29
33
  result: string
34
+ sessionID?: string
35
+ callID?: string
30
36
  }
31
37
 
32
38
  type ToolExecutionMetadata = {
33
39
  tool: string
34
40
  findingsCount: number
41
+ sessionId?: string
35
42
  }
36
43
 
37
44
  export type ToolTrackingOptions = {
38
45
  getEventSink?: () => EventSink | null
46
+ getEventSinkForSession?: (sessionId: string) => EventSink | null
47
+ getEventSinkForRun?: (runId: string) => EventSink | null
48
+ getActiveRunSinks?: () => EventSink[]
39
49
  getSessionId?: () => string
40
50
  getAgentName?: () => ArgusAgentName | undefined
51
+ getAgentNameForSession?: (sessionId: string) => ArgusAgentName | undefined
41
52
  dropPolicy?: DropPolicy
42
53
  onChildSessionDetected?: (parentSessionId: string, childSessionId: string) => void
54
+ projectDir?: string
43
55
  }
44
56
 
45
57
  const VALID_SEVERITIES: ReadonlySet<string> = new Set([
@@ -100,22 +112,7 @@ function toFindingSource(value: unknown): Finding["source"] {
100
112
  return "manual"
101
113
  }
102
114
 
103
- async function emitToSink(
104
- sink: EventSink,
105
- event: AuditEvent,
106
- options?: { failFast?: boolean },
107
- ): Promise<void> {
108
- try {
109
- await sink.append(event)
110
- } catch (error) {
111
- const message = `Failed to emit ${event.type} event to sink: ${error instanceof Error ? error.message : String(error)}`
112
- logger.error(message)
113
-
114
- if (options?.failFast) {
115
- throw new Error(message)
116
- }
117
- }
118
- }
115
+ const emitToSink = safeEmitToSink
119
116
 
120
117
  function buildEvent(
121
118
  type: AuditEvent["type"],
@@ -143,6 +140,7 @@ function buildEvent(
143
140
  * or plain text with an embedded JSON fragment.
144
141
  */
145
142
  function parseChildSessionId(result: string): string | null {
143
+ // Strategy 1: Full JSON parse (structured tool output)
146
144
  try {
147
145
  const parsed = JSON.parse(result)
148
146
  if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
@@ -160,11 +158,32 @@ function parseChildSessionId(result: string): string | null {
160
158
  }
161
159
  }
162
160
  } catch {
163
- const match = result.match(/"session_id"\s*:\s*"([^"]+)"/)
164
- if (match?.[1]) {
165
- return match[1]
166
- }
161
+ // Not valid JSON — fall through to regex strategies
162
+ }
163
+
164
+ // Strategy 2: OpenCode task tool XML format
165
+ // <task_metadata>
166
+ // session_id: ses_xxx
167
+ // </task_metadata>
168
+ const xmlMatch = result.match(
169
+ /<task_metadata>[\s\S]*?session_id:\s*(ses_\S+)[\s\S]*?<\/task_metadata>/,
170
+ )
171
+ if (xmlMatch?.[1]) {
172
+ return xmlMatch[1]
173
+ }
174
+
175
+ // Strategy 3: JSON fragment in plain text
176
+ const jsonFragmentMatch = result.match(/"session_id"\s*:\s*"([^"]+)"/)
177
+ if (jsonFragmentMatch?.[1]) {
178
+ return jsonFragmentMatch[1]
179
+ }
180
+
181
+ // Strategy 4: Bare session_id line (e.g. "session_id: ses_xxx" outside XML tags)
182
+ const bareMatch = result.match(/session_id:\s*(ses_\S+)/)
183
+ if (bareMatch?.[1]) {
184
+ return bareMatch[1]
167
185
  }
186
+
168
187
  return null
169
188
  }
170
189
 
@@ -187,141 +206,100 @@ const SLITHER_REQUIRED = ["check", "description", "file", "lines"] as const
187
206
  const PATTERN_REQUIRED = ["pattern", "description", "file", "lines"] as const
188
207
  const MANUAL_REQUIRED = ["check", "description", "file", "lines"] as const
189
208
 
190
- function processSlitherResult(
191
- parsed: Record<string, unknown>,
192
- store: FindingStore,
193
- diag: DropDiagnosticsCollector,
194
- metadata: { reportedByAgent: ArgusAgentName; reportedBySessionId: string },
195
- ): number {
196
- const findings = parsed.findings
197
- if (!Array.isArray(findings)) return 0
198
-
199
- let count = 0
200
- for (const raw of findings) {
201
- const finding = toRecord(raw)
202
- if (!finding) continue
203
-
204
- const check = finding.check
205
- const description = finding.description
206
- const file = finding.file
207
- const lines = toLines(finding.lines)
209
+ type ProcessorConfig = {
210
+ toolLabel: string
211
+ arrayKey: string
212
+ nestedArrayKey?: string
213
+ primaryIdField: string
214
+ requiredFields: readonly string[]
215
+ sourceLabel: string | "dynamic"
216
+ confidenceMode: "read" | "fixed"
217
+ confidenceDefault?: string
218
+ extractOptionalFields: boolean
219
+ allowReportedByOverride: boolean
220
+ }
208
221
 
209
- if (
210
- typeof check !== "string" ||
211
- typeof description !== "string" ||
212
- typeof file !== "string" ||
213
- !lines
214
- ) {
215
- const missing = identifyMissingFields(finding, SLITHER_REQUIRED)
216
- diag.error(
217
- "MISSING_REQUIRED_FIELD",
218
- `Slither finding skipped: missing ${missing.join(", ")}`,
219
- missing[0],
220
- )
221
- continue
222
- }
222
+ const SLITHER_CONFIG: ProcessorConfig = {
223
+ toolLabel: "Slither",
224
+ arrayKey: "findings",
225
+ primaryIdField: "check",
226
+ requiredFields: SLITHER_REQUIRED,
227
+ sourceLabel: "slither",
228
+ confidenceMode: "read",
229
+ extractOptionalFields: false,
230
+ allowReportedByOverride: false,
231
+ }
223
232
 
224
- store.addFinding({
225
- check,
226
- severity: toSeverity(finding.severity),
227
- confidence: toConfidence(finding.confidence),
228
- description,
229
- file,
230
- lines,
231
- source: "slither",
232
- reported_by_agent: metadata.reportedByAgent,
233
- reported_by_session_id: metadata.reportedBySessionId,
234
- })
235
- count++
236
- }
233
+ const PATTERN_CONFIG: ProcessorConfig = {
234
+ toolLabel: "Pattern",
235
+ arrayKey: "sources",
236
+ nestedArrayKey: "matches",
237
+ primaryIdField: "pattern",
238
+ requiredFields: PATTERN_REQUIRED,
239
+ sourceLabel: "pattern",
240
+ confidenceMode: "fixed",
241
+ confidenceDefault: "Medium",
242
+ extractOptionalFields: false,
243
+ allowReportedByOverride: false,
244
+ }
237
245
 
238
- return count
246
+ const RECORDED_CONFIG: ProcessorConfig = {
247
+ toolLabel: "Recorded",
248
+ arrayKey: "findings",
249
+ primaryIdField: "check",
250
+ requiredFields: MANUAL_REQUIRED,
251
+ sourceLabel: "dynamic",
252
+ confidenceMode: "read",
253
+ extractOptionalFields: true,
254
+ allowReportedByOverride: true,
239
255
  }
240
256
 
241
- function processPatternResult(
257
+ function processToolResult(
242
258
  parsed: Record<string, unknown>,
243
259
  store: FindingStore,
244
260
  diag: DropDiagnosticsCollector,
245
261
  metadata: { reportedByAgent: ArgusAgentName; reportedBySessionId: string },
262
+ config: ProcessorConfig,
263
+ projectDir?: string,
246
264
  ): number {
247
- const sources = parsed.sources
248
- if (!Array.isArray(sources)) return 0
249
-
250
- let count = 0
251
- for (const rawSource of sources) {
252
- const source = toRecord(rawSource)
253
- if (!source) continue
254
-
255
- const matches = source.matches
256
- if (!Array.isArray(matches)) continue
257
-
258
- for (const rawMatch of matches) {
259
- const match = toRecord(rawMatch)
260
- if (!match) continue
261
-
262
- const pattern = match.pattern
263
- const description = match.description
264
- const file = match.file
265
- const lines = toLines(match.lines)
266
-
267
- if (
268
- typeof pattern !== "string" ||
269
- typeof description !== "string" ||
270
- typeof file !== "string" ||
271
- !lines
272
- ) {
273
- const missing = identifyMissingFields(match, PATTERN_REQUIRED)
274
- diag.error(
275
- "MISSING_REQUIRED_FIELD",
276
- `Pattern finding skipped: missing ${missing.join(", ")}`,
277
- missing[0],
278
- )
279
- continue
280
- }
281
-
282
- store.addFinding({
283
- check: pattern,
284
- severity: toSeverity(match.severity),
285
- confidence: "Medium",
286
- description,
287
- file,
288
- lines,
289
- source: "pattern",
290
- reported_by_agent: metadata.reportedByAgent,
291
- reported_by_session_id: metadata.reportedBySessionId,
292
- })
293
- count++
265
+ const topLevel = parsed[config.arrayKey]
266
+ if (!Array.isArray(topLevel)) {
267
+ if (config.toolLabel === "Recorded") {
268
+ diag.error(
269
+ "MISSING_REQUIRED_FIELD",
270
+ "argus_record_finding result missing findings array",
271
+ "findings",
272
+ )
294
273
  }
274
+ return 0
295
275
  }
296
276
 
297
- return count
298
- }
277
+ const items: unknown[] = []
278
+ if (config.nestedArrayKey) {
279
+ for (const rawOuter of topLevel) {
280
+ const outer = toRecord(rawOuter)
281
+ if (!outer) continue
299
282
 
300
- function processRecordedFindingResult(
301
- parsed: Record<string, unknown>,
302
- store: FindingStore,
303
- diag: DropDiagnosticsCollector,
304
- metadata: { reportedByAgent: ArgusAgentName; reportedBySessionId: string },
305
- ): number {
306
- const findings = parsed.findings
307
- if (!Array.isArray(findings)) {
308
- diag.error(
309
- "MISSING_REQUIRED_FIELD",
310
- "argus_record_finding result missing findings array",
311
- "findings",
312
- )
313
- return 0
283
+ const nested = outer[config.nestedArrayKey]
284
+ if (!Array.isArray(nested)) continue
285
+
286
+ items.push(...nested)
287
+ }
288
+ } else {
289
+ items.push(...topLevel)
314
290
  }
315
291
 
316
292
  let count = 0
317
- for (const raw of findings) {
318
- const finding = toRecord(raw)
319
- if (!finding) continue
293
+ for (const rawItem of items) {
294
+ const item = toRecord(rawItem)
295
+ if (!item) continue
320
296
 
321
- const check = finding.check
322
- const description = finding.description
323
- const file = finding.file
324
- const lines = toLines(finding.lines)
297
+ const check = item[config.primaryIdField]
298
+ const description = item.description
299
+ const rawFile = item.file
300
+ const file =
301
+ typeof rawFile === "string" && projectDir ? normalizeFilePath(rawFile, projectDir) : rawFile
302
+ const lines = toLines(item.lines)
325
303
 
326
304
  if (
327
305
  typeof check !== "string" ||
@@ -329,51 +307,60 @@ function processRecordedFindingResult(
329
307
  typeof file !== "string" ||
330
308
  !lines
331
309
  ) {
332
- const missing = identifyMissingFields(finding, MANUAL_REQUIRED)
310
+ const missing = identifyMissingFields(item, config.requiredFields)
333
311
  diag.error(
334
312
  "MISSING_REQUIRED_FIELD",
335
- `Recorded finding skipped: missing ${missing.join(", ")}`,
313
+ `${config.toolLabel} finding skipped: missing ${missing.join(", ")}`,
336
314
  missing[0],
337
315
  )
338
316
  continue
339
317
  }
340
318
 
341
- const reportedByAgentRaw = finding.reported_by_agent
319
+ const reportedByAgentRaw = item.reported_by_agent
342
320
  const reportedByAgent =
343
- reportedByAgentRaw === "argus" ||
344
- reportedByAgentRaw === "sentinel" ||
345
- reportedByAgentRaw === "pythia" ||
346
- reportedByAgentRaw === "scribe" ||
347
- reportedByAgentRaw === "unknown"
321
+ config.allowReportedByOverride &&
322
+ typeof reportedByAgentRaw === "string" &&
323
+ (isArgusFamily(reportedByAgentRaw) || reportedByAgentRaw === "unknown")
348
324
  ? (reportedByAgentRaw as ArgusAgentName)
349
325
  : metadata.reportedByAgent
350
326
 
351
- store.addFinding({
327
+ const findingPayload: Parameters<FindingStore["addFinding"]>[0] = {
352
328
  check,
353
- severity: toSeverity(finding.severity),
354
- confidence: toConfidence(finding.confidence),
329
+ severity: toSeverity(item.severity),
330
+ confidence:
331
+ config.confidenceMode === "read"
332
+ ? toConfidence(item.confidence)
333
+ : toConfidence(config.confidenceDefault),
355
334
  description,
356
335
  file,
357
336
  lines,
358
- source: toFindingSource(finding.source),
359
- remediation: typeof finding.remediation === "string" ? finding.remediation : undefined,
360
- exploitReference:
361
- typeof finding.exploitReference === "string" ? finding.exploitReference : undefined,
337
+ source:
338
+ config.sourceLabel === "dynamic"
339
+ ? toFindingSource(item.source)
340
+ : toFindingSource(config.sourceLabel),
362
341
  reported_by_agent: reportedByAgent,
363
342
  reported_by_session_id:
364
- typeof finding.reported_by_session_id === "string" &&
365
- finding.reported_by_session_id.length > 0
366
- ? finding.reported_by_session_id
343
+ config.allowReportedByOverride &&
344
+ typeof item.reported_by_session_id === "string" &&
345
+ item.reported_by_session_id.length > 0
346
+ ? item.reported_by_session_id
367
347
  : metadata.reportedBySessionId,
368
- issue_fingerprint:
369
- typeof finding.issue_fingerprint === "string" ? finding.issue_fingerprint : undefined,
370
- observation_fingerprint:
371
- typeof finding.observation_fingerprint === "string"
372
- ? finding.observation_fingerprint
373
- : undefined,
374
- observation_id:
375
- typeof finding.observation_id === "string" ? finding.observation_id : undefined,
376
- })
348
+ }
349
+
350
+ if (config.extractOptionalFields) {
351
+ findingPayload.remediation =
352
+ typeof item.remediation === "string" ? item.remediation : undefined
353
+ findingPayload.exploitReference =
354
+ typeof item.exploitReference === "string" ? item.exploitReference : undefined
355
+ findingPayload.issue_fingerprint =
356
+ typeof item.issue_fingerprint === "string" ? item.issue_fingerprint : undefined
357
+ findingPayload.observation_fingerprint =
358
+ typeof item.observation_fingerprint === "string" ? item.observation_fingerprint : undefined
359
+ findingPayload.observation_id =
360
+ typeof item.observation_id === "string" ? item.observation_id : undefined
361
+ }
362
+
363
+ store.addFinding(findingPayload)
377
364
  count++
378
365
  }
379
366
 
@@ -469,21 +456,77 @@ function recordToolExecution(state: AuditState, toolName: string, findingsCount:
469
456
  })
470
457
  }
471
458
 
459
+ const TOOL_PHASE_MAP: Record<string, AuditState["currentPhase"]> = {
460
+ argus_slither_analyze: "scanning",
461
+ argus_check_patterns: "scanning",
462
+ argus_analyze_contract: "scanning",
463
+ argus_proxy_detection: "scanning",
464
+ argus_solodit_search: "research",
465
+ argus_forge_test: "testing",
466
+ argus_forge_fuzz: "testing",
467
+ argus_forge_coverage: "testing",
468
+ argus_gas_analysis: "testing",
469
+ argus_generate_report: "reporting",
470
+ }
471
+
472
+ function inferPhaseAdvancement(
473
+ state: AuditState,
474
+ toolName: string,
475
+ ): AuditState["currentPhase"] | null {
476
+ const inferredPhase = TOOL_PHASE_MAP[toolName]
477
+ if (!inferredPhase) return null
478
+
479
+ const currentIdx = PHASE_ORDER.indexOf(state.currentPhase)
480
+ const inferredIdx = PHASE_ORDER.indexOf(inferredPhase)
481
+ if (inferredIdx <= currentIdx) return null
482
+
483
+ return inferredPhase
484
+ }
485
+
486
+ type OrphanEvent = {
487
+ event: AuditEvent
488
+ failFast: boolean
489
+ bufferedAt: number
490
+ }
491
+
492
+ const ORPHAN_BUFFER_TTL_MS = 60_000
493
+ const MAX_ORPHAN_EVENTS_PER_SESSION = 50
494
+
472
495
  export type ToolTrackingHook = {
473
496
  (input: ToolHookInput): Promise<void>
474
497
  getLastDiagnostics(): DropDiagnostic[]
498
+ flushOrphanEvents(sessionId: string, sink: EventSink): Promise<number>
475
499
  }
476
500
 
477
501
  export function createToolTrackingHook(
478
- getAuditState: () => AuditState | null,
502
+ getAuditState: (sessionId?: string) => AuditState | null,
479
503
  onStateChanged?: (metadata: ToolExecutionMetadata) => void,
480
504
  options?: ToolTrackingOptions,
481
505
  ): ToolTrackingHook {
506
+ const projectDir = options?.projectDir
482
507
  const storesByState = new WeakMap<AuditState, FindingStore>()
483
508
  let lastDiagnostics: DropDiagnostic[] = []
509
+ const orphanBuffer = new Map<string, OrphanEvent[]>()
484
510
 
485
- function resolveStateAndStore(): { state: AuditState; store: FindingStore } | null {
486
- const state = getAuditState()
511
+ function bufferOrphanEvent(sessionId: string, entry: OrphanEvent): void {
512
+ let entries = orphanBuffer.get(sessionId)
513
+ if (!entries) {
514
+ entries = []
515
+ orphanBuffer.set(sessionId, entries)
516
+ }
517
+ if (entries.length >= MAX_ORPHAN_EVENTS_PER_SESSION) {
518
+ logger.warn(
519
+ `Orphan event buffer full for session ${sessionId} (${MAX_ORPHAN_EVENTS_PER_SESSION} events) — dropping oldest`,
520
+ )
521
+ entries.shift()
522
+ }
523
+ entries.push(entry)
524
+ }
525
+
526
+ function resolveStateAndStore(
527
+ sessionId?: string,
528
+ ): { state: AuditState; store: FindingStore } | null {
529
+ const state = getAuditState(sessionId)
487
530
  if (!state) return null
488
531
 
489
532
  let store = storesByState.get(state)
@@ -500,15 +543,34 @@ export function createToolTrackingHook(
500
543
  if (input.tool === "task") {
501
544
  const childSessionId = parseChildSessionId(input.result)
502
545
  const correlationId = randomUUID()
503
- const resolved = resolveStateAndStore()
504
- const sink = options?.getEventSink?.()
505
- const sessionId = options?.getSessionId?.() ?? ""
546
+ const resolved = resolveStateAndStore(input.sessionID)
547
+ const sessionId = input.sessionID ?? options?.getSessionId?.() ?? ""
506
548
  const toolCallId = randomUUID()
507
549
 
508
550
  if (childSessionId) {
509
551
  options?.onChildSessionDetected?.(sessionId, childSessionId)
510
552
  }
511
553
 
554
+ let sink: EventSink | null =
555
+ (sessionId ? options?.getEventSinkForSession?.(sessionId) : null) ??
556
+ options?.getEventSink?.() ??
557
+ null
558
+
559
+ if (sink && resolved) {
560
+ const runId = resolved.state.sessionId
561
+ if (sink.runId !== runId) {
562
+ const runScopedSink = options?.getEventSinkForRun?.(runId) ?? null
563
+ if (runScopedSink && runScopedSink.runId === runId) {
564
+ sink = runScopedSink
565
+ } else {
566
+ logger.warn(
567
+ `Skipping task sink emission due to run mismatch: state run ${runId}, sink run ${sink.runId}`,
568
+ )
569
+ sink = null
570
+ }
571
+ }
572
+ }
573
+
512
574
  if (sink && resolved) {
513
575
  const runId = resolved.state.sessionId
514
576
  await emitToSink(
@@ -535,7 +597,7 @@ export function createToolTrackingHook(
535
597
 
536
598
  if (resolved) {
537
599
  recordToolExecution(resolved.state, "task", 0)
538
- onStateChanged?.({ tool: "task", findingsCount: 0 })
600
+ onStateChanged?.({ tool: "task", findingsCount: 0, sessionId: input.sessionID })
539
601
  }
540
602
 
541
603
  return
@@ -545,7 +607,7 @@ export function createToolTrackingHook(
545
607
  return
546
608
  }
547
609
 
548
- const resolved = resolveStateAndStore()
610
+ const resolved = resolveStateAndStore(input.sessionID)
549
611
  if (!resolved) {
550
612
  if (input.tool === "argus_record_finding") {
551
613
  throw new Error("argus_record_finding requires active audit state")
@@ -574,10 +636,38 @@ export function createToolTrackingHook(
574
636
  }
575
637
 
576
638
  const { state: auditState, store } = resolved
577
- const sink = options?.getEventSink?.()
578
639
  const runId = auditState.sessionId
579
- const sessionId = options?.getSessionId?.() ?? ""
580
- const reportedByAgent = options?.getAgentName?.() ?? "unknown"
640
+ const sessionId = input.sessionID ?? options?.getSessionId?.() ?? ""
641
+ let sink: EventSink | null =
642
+ (sessionId ? options?.getEventSinkForSession?.(sessionId) : null) ??
643
+ options?.getEventSink?.() ??
644
+ null
645
+ if (sink && sink.runId !== runId) {
646
+ const runScopedSink = options?.getEventSinkForRun?.(runId) ?? null
647
+ if (runScopedSink && runScopedSink.runId === runId) {
648
+ sink = runScopedSink
649
+ } else {
650
+ // Single-run coalescence: if exactly one active (non-finalized) sink
651
+ // exists, use it rather than dropping events silently.
652
+ const activeSinks = options?.getActiveRunSinks?.() ?? []
653
+ const coalescedSink = activeSinks.length === 1 ? activeSinks[0] : undefined
654
+ if (coalescedSink) {
655
+ logger.debug(
656
+ `Coalescing tool ${input.tool} from session ${sessionId} into active run ${coalescedSink.runId} (state run ${runId}, original sink run ${sink.runId})`,
657
+ )
658
+ sink = coalescedSink
659
+ } else {
660
+ logger.warn(
661
+ `Skipping sink emission for ${input.tool} due to run mismatch: state run ${runId}, sink run ${sink.runId}`,
662
+ )
663
+ sink = null
664
+ }
665
+ }
666
+ }
667
+ const reportedByAgent =
668
+ (input.sessionID ? options?.getAgentNameForSession?.(input.sessionID) : undefined) ??
669
+ options?.getAgentName?.() ??
670
+ "unknown"
581
671
  const findingMetadata = {
582
672
  reportedByAgent,
583
673
  reportedBySessionId: sessionId,
@@ -595,186 +685,376 @@ export function createToolTrackingHook(
595
685
  }),
596
686
  { failFast: input.tool === "argus_record_finding" },
597
687
  )
688
+ } else if (sessionId) {
689
+ const event = buildEvent("tool.started", runId, sessionId, toolCallId, {
690
+ tool: input.tool,
691
+ args: input.args,
692
+ })
693
+ bufferOrphanEvent(sessionId, {
694
+ event,
695
+ failFast: input.tool === "argus_record_finding",
696
+ bufferedAt: Date.now(),
697
+ })
698
+ logger.warn(
699
+ `Buffered orphan tool.started event for ${input.tool} from session ${sessionId} (run_id=${runId})`,
700
+ )
598
701
  }
599
702
 
600
703
  const findingsCountBefore = auditState.findings.length
704
+ let findingsCount = 0
705
+ let completedSuccess = false
706
+ let completionError: string | undefined
601
707
 
602
- if (input.tool === "argus_skill_load") {
603
- const nameMatch = input.result.match(/^##\s+Argus Skill:\s+(.+?)(?:\s+\[|$)/m)
604
- const skillName = nameMatch?.[1]?.trim()
605
- if (skillName) {
606
- auditState.skillsLoaded ??= []
607
- if (!auditState.skillsLoaded.includes(skillName)) {
608
- auditState.skillsLoaded.push(skillName)
708
+ try {
709
+ if (input.tool === "argus_skill_load") {
710
+ const nameMatch = input.result.match(/^##\s+Argus Skill:\s+(.+?)(?:\s+\[|$)/m)
711
+ const skillName = nameMatch?.[1]?.trim()
712
+ if (skillName) {
713
+ auditState.skillsLoaded ??= []
714
+ if (!auditState.skillsLoaded.includes(skillName)) {
715
+ auditState.skillsLoaded.push(skillName)
716
+ }
717
+ }
718
+ findingsCount = 0
719
+ completedSuccess = true
720
+ } else {
721
+ let parsed: unknown
722
+ try {
723
+ parsed = JSON.parse(input.result)
724
+ } catch {
725
+ // For large tool outputs (e.g. argus_check_patterns can produce 3MB+),
726
+ // OpenCode may truncate the result before it reaches this hook.
727
+ // Two truncation modes:
728
+ // 1. Partial JSON — first N bytes of valid JSON (check for "success": true)
729
+ // 2. OpenCode replacement — full output replaced with "...N bytes truncated..."
730
+ const successInPartialJson = input.result.match(/"success"\s*:\s*(true|false)/)
731
+ const opencodeTruncation = input.result.match(
732
+ /bytes truncated|output was truncated|tool call succeeded/i,
733
+ )
734
+ const truncatedSuccess = successInPartialJson?.[1] === "true" || !!opencodeTruncation
735
+ if (truncatedSuccess) {
736
+ diag.error(
737
+ "TRUNCATED_OUTPUT",
738
+ `${input.tool} output was truncated (${input.result.length} chars) — tool likely succeeded`,
739
+ )
740
+ logger.warn(
741
+ `Tool output truncated — findings may be incomplete (${input.tool}, ${input.result.length} chars)`,
742
+ )
743
+ completionError = "Tool output truncated — findings may be incomplete"
744
+ } else {
745
+ diag.error("MALFORMED_JSON", `Failed to parse JSON result from ${input.tool}`)
746
+ if (input.tool === "argus_record_finding") {
747
+ throw new Error("argus_record_finding returned malformed JSON")
748
+ }
749
+ }
750
+ diag.throwIfStrict()
751
+ return
609
752
  }
610
- }
611
- recordToolExecution(auditState, input.tool, 0)
612
- onStateChanged?.({ tool: input.tool, findingsCount: 0 })
613
-
614
- if (sink) {
615
- await emitToSink(
616
- sink,
617
- buildEvent("tool.completed", runId, sessionId, toolCallId, {
618
- tool: input.tool,
619
- findingsCount: 0,
620
- success: true,
621
- }),
622
- )
623
- }
624
753
 
625
- lastDiagnostics = diag.getDiagnostics()
626
- return
627
- }
754
+ const record = toRecord(parsed)
755
+ if (!record) {
756
+ if (input.tool === "argus_record_finding") {
757
+ throw new Error("argus_record_finding response must be a JSON object")
758
+ }
759
+ return
760
+ }
628
761
 
629
- let parsed: unknown
630
- try {
631
- parsed = JSON.parse(input.result)
632
- } catch {
633
- diag.error("MALFORMED_JSON", `Failed to parse JSON result from ${input.tool}`)
634
- lastDiagnostics = diag.getDiagnostics()
635
- if (input.tool === "argus_record_finding") {
636
- throw new Error("argus_record_finding returned malformed JSON")
637
- }
638
- diag.throwIfStrict()
639
- return
640
- }
762
+ switch (input.tool) {
763
+ case "argus_slither_analyze": {
764
+ findingsCount = processToolResult(
765
+ record,
766
+ store,
767
+ diag,
768
+ findingMetadata,
769
+ SLITHER_CONFIG,
770
+ projectDir,
771
+ )
772
+ if (auditState.scope.length === 0 && findingsCount > 0) {
773
+ const slitherFindings = Array.isArray(record.findings) ? record.findings : []
774
+ const files = [
775
+ ...new Set(
776
+ slitherFindings
777
+ .map((f: Record<string, unknown>) => f.file as string)
778
+ .filter(Boolean),
779
+ ),
780
+ ]
781
+ if (files.length > 0) {
782
+ auditState.scope = files
783
+ }
784
+ }
785
+ break
786
+ }
787
+ case "argus_check_patterns":
788
+ findingsCount = processToolResult(
789
+ record,
790
+ store,
791
+ diag,
792
+ findingMetadata,
793
+ PATTERN_CONFIG,
794
+ projectDir,
795
+ )
796
+ if (typeof record.patternVersion === "string") {
797
+ auditState.patternVersion = record.patternVersion
798
+ }
799
+ break
800
+ case "argus_record_finding":
801
+ findingsCount = processToolResult(
802
+ record,
803
+ store,
804
+ diag,
805
+ findingMetadata,
806
+ RECORDED_CONFIG,
807
+ projectDir,
808
+ )
809
+ break
810
+ case "argus_analyze_contract": {
811
+ processContractAnalyzerResult(record, auditState)
812
+ const filePath = (input.args as Record<string, unknown>)?.file_path as string
813
+ if (filePath && !auditState.scope.includes(filePath)) {
814
+ auditState.scope = [...auditState.scope, filePath]
815
+ }
816
+ break
817
+ }
818
+ case "argus_solodit_search":
819
+ processSoloditResult(record, auditState)
820
+ break
821
+ case "argus_forge_test": {
822
+ const summary = toRecord(record.summary)
823
+ if (summary && typeof summary.failed === "number") {
824
+ findingsCount = summary.failed
825
+ }
826
+ break
827
+ }
828
+ case "argus_forge_fuzz":
829
+ processFuzzResult(record, auditState)
830
+ break
831
+ case "argus_generate_report": {
832
+ const reportError = toRecord(record.error)
833
+ const filePath = record.filePath
834
+ if (reportError) {
835
+ const errorMessage =
836
+ typeof reportError.message === "string"
837
+ ? reportError.message
838
+ : "argus_generate_report reported an unknown error"
839
+ throw new Error(`argus_generate_report failed: ${errorMessage}`)
840
+ }
841
+ if (typeof filePath !== "string" || filePath.length === 0) {
842
+ throw new Error("argus_generate_report completed without filePath")
843
+ }
844
+ auditState.reportGenerated = true
845
+ break
846
+ }
847
+ case "argus_sync_knowledge": {
848
+ const success = record.success === true
849
+ auditState.knowledgeSynced = { success, timestamp: Date.now() }
850
+ break
851
+ }
852
+ case "argus_forge_coverage": {
853
+ const reportObj = toRecord(record.report)
854
+ const files = reportObj?.files
855
+ if (Array.isArray(files)) {
856
+ auditState.coverageReport = {
857
+ files: files
858
+ .filter((f): f is Record<string, unknown> => !!f && typeof f === "object")
859
+ .map((f) => ({
860
+ path: typeof f.path === "string" ? f.path : "unknown",
861
+ linesPct: typeof f.linesPct === "number" ? f.linesPct : 0,
862
+ statementsPct: typeof f.statementsPct === "number" ? f.statementsPct : 0,
863
+ branchesPct: typeof f.branchesPct === "number" ? f.branchesPct : 0,
864
+ functionsPct: typeof f.functionsPct === "number" ? f.functionsPct : 0,
865
+ })),
866
+ }
867
+ }
868
+ break
869
+ }
870
+ case "argus_proxy_detection": {
871
+ if (record.isProxy === true) {
872
+ auditState.proxyContracts ??= []
873
+ auditState.proxyContracts.push({
874
+ file: typeof record.file === "string" ? record.file : "unknown",
875
+ proxyType: typeof record.proxyType === "string" ? record.proxyType : "unknown",
876
+ indicators: Array.isArray(record.indicators)
877
+ ? record.indicators.filter((i): i is string => typeof i === "string")
878
+ : [],
879
+ })
880
+ }
881
+ break
882
+ }
883
+ case "argus_gas_analysis": {
884
+ const hotspots = record.hotspots
885
+ if (Array.isArray(hotspots)) {
886
+ auditState.gasHotspots = hotspots
887
+ .filter((h): h is Record<string, unknown> => !!h && typeof h === "object")
888
+ .map((h) => ({
889
+ contract: typeof h.contract === "string" ? h.contract : "unknown",
890
+ function: typeof h.function === "string" ? h.function : "unknown",
891
+ avgGas: typeof h.avgGas === "number" ? h.avgGas : 0,
892
+ }))
893
+ }
894
+ break
895
+ }
896
+ }
641
897
 
642
- const record = toRecord(parsed)
643
- if (!record) {
644
- lastDiagnostics = diag.getDiagnostics()
645
- if (input.tool === "argus_record_finding") {
646
- throw new Error("argus_record_finding response must be a JSON object")
647
- }
648
- return
649
- }
898
+ diag.throwIfStrict()
650
899
 
651
- let findingsCount = 0
900
+ if (input.tool === "argus_record_finding" && findingsCount === 0) {
901
+ throw new Error("argus_record_finding did not persist any findings")
902
+ }
652
903
 
653
- switch (input.tool) {
654
- case "argus_slither_analyze":
655
- findingsCount = processSlitherResult(record, store, diag, findingMetadata)
656
- break
657
- case "argus_check_patterns":
658
- findingsCount = processPatternResult(record, store, diag, findingMetadata)
659
- break
660
- case "argus_record_finding":
661
- findingsCount = processRecordedFindingResult(record, store, diag, findingMetadata)
662
- break
663
- case "argus_analyze_contract":
664
- processContractAnalyzerResult(record, auditState)
665
- break
666
- case "argus_solodit_search":
667
- processSoloditResult(record, auditState)
668
- break
669
- case "argus_forge_test": {
670
- const summary = toRecord(record.summary)
671
- if (summary && typeof summary.failed === "number") {
672
- findingsCount = summary.failed
904
+ if (input.tool === "argus_record_finding" && !sink) {
905
+ const newFindings = auditState.findings.slice(findingsCountBefore)
906
+ if (newFindings.length > 0) {
907
+ throw new Error(
908
+ `argus_record_finding produced ${newFindings.length} finding(s) but no event sink is available — findings would be lost from the report`,
909
+ )
910
+ }
911
+ diag.error(
912
+ "NO_EVENT_SINK",
913
+ "argus_record_finding: no active event sink — no new findings to emit",
914
+ )
673
915
  }
674
- break
675
- }
676
- case "argus_forge_fuzz":
677
- processFuzzResult(record, auditState)
678
- break
679
- case "argus_generate_report": {
680
- auditState.reportGenerated = true
681
- break
682
- }
683
- case "argus_sync_knowledge": {
684
- const success = record.success === true
685
- auditState.knowledgeSynced = { success, timestamp: Date.now() }
686
- break
687
- }
688
- case "argus_forge_coverage": {
689
- const reportObj = toRecord(record.report)
690
- const files = reportObj?.files
691
- if (Array.isArray(files)) {
692
- auditState.coverageReport = {
693
- files: files
694
- .filter((f): f is Record<string, unknown> => !!f && typeof f === "object")
695
- .map((f) => ({
696
- path: typeof f.path === "string" ? f.path : "unknown",
697
- linesPct: typeof f.linesPct === "number" ? f.linesPct : 0,
698
- statementsPct: typeof f.statementsPct === "number" ? f.statementsPct : 0,
699
- branchesPct: typeof f.branchesPct === "number" ? f.branchesPct : 0,
700
- functionsPct: typeof f.functionsPct === "number" ? f.functionsPct : 0,
701
- })),
916
+
917
+ if (sink) {
918
+ const failFast = input.tool === "argus_record_finding"
919
+ const newFindings = auditState.findings.slice(findingsCountBefore)
920
+ for (const [index, finding] of newFindings.entries()) {
921
+ const { data: canonical } = normalizeToCanonicalFinding(
922
+ finding,
923
+ runId,
924
+ 0,
925
+ {
926
+ reportedByAgent,
927
+ reportedBySessionId: sessionId,
928
+ toolCallId,
929
+ observationId: `${toolCallId}:${index + 1}`,
930
+ },
931
+ projectDir,
932
+ )
933
+ await emitToSink(
934
+ sink,
935
+ buildEvent("finding.added", runId, sessionId, toolCallId, canonical),
936
+ { failFast },
937
+ )
702
938
  }
703
939
  }
704
- break
940
+
941
+ completedSuccess = true
705
942
  }
706
- case "argus_proxy_detection": {
707
- if (record.isProxy === true) {
708
- auditState.proxyContracts ??= []
709
- auditState.proxyContracts.push({
710
- file: typeof record.file === "string" ? record.file : "unknown",
711
- proxyType: typeof record.proxyType === "string" ? record.proxyType : "unknown",
712
- indicators: Array.isArray(record.indicators)
713
- ? record.indicators.filter((i): i is string => typeof i === "string")
714
- : [],
715
- })
943
+
944
+ recordToolExecution(auditState, input.tool, findingsCount)
945
+
946
+ const nextPhase = inferPhaseAdvancement(auditState, input.tool)
947
+ if (nextPhase) {
948
+ auditState.currentPhase = nextPhase
949
+ if (sink) {
950
+ await emitToSink(
951
+ sink,
952
+ buildEvent("phase.changed", runId, sessionId, toolCallId, {
953
+ phase: nextPhase,
954
+ trigger: input.tool,
955
+ }),
956
+ )
716
957
  }
717
- break
718
958
  }
719
- case "argus_gas_analysis": {
720
- const hotspots = record.hotspots
721
- if (Array.isArray(hotspots)) {
722
- auditState.gasHotspots = hotspots
723
- .filter((h): h is Record<string, unknown> => !!h && typeof h === "object")
724
- .map((h) => ({
725
- contract: typeof h.contract === "string" ? h.contract : "unknown",
726
- function: typeof h.function === "string" ? h.function : "unknown",
727
- avgGas: typeof h.avgGas === "number" ? h.avgGas : 0,
728
- }))
959
+
960
+ onStateChanged?.({ tool: input.tool, findingsCount, sessionId: input.sessionID })
961
+ } catch (error) {
962
+ completionError = error instanceof Error ? error.message : String(error)
963
+ throw error
964
+ } finally {
965
+ lastDiagnostics = diag.getDiagnostics()
966
+ if (sink) {
967
+ const failFast = input.tool === "argus_record_finding"
968
+ // Enrichment data for event replay — projector extracts these from payloads
969
+ const enrichment: Record<string, unknown> = {}
970
+ if (completedSuccess) {
971
+ switch (input.tool) {
972
+ case "argus_solodit_search":
973
+ if (auditState.soloditResults) enrichment.soloditResults = auditState.soloditResults
974
+ break
975
+ case "argus_forge_fuzz":
976
+ if (auditState.fuzzCounterexamples)
977
+ enrichment.fuzzCounterexamples = auditState.fuzzCounterexamples
978
+ break
979
+ case "argus_forge_coverage":
980
+ if (auditState.coverageReport) enrichment.coverageReport = auditState.coverageReport
981
+ break
982
+ case "argus_gas_analysis":
983
+ if (auditState.gasHotspots) enrichment.gasHotspots = auditState.gasHotspots
984
+ break
985
+ case "argus_proxy_detection":
986
+ if (auditState.proxyContracts) enrichment.proxyContracts = auditState.proxyContracts
987
+ break
988
+ case "argus_skill_load":
989
+ if (auditState.skillsLoaded) enrichment.skillsLoaded = auditState.skillsLoaded
990
+ break
991
+ case "argus_check_patterns":
992
+ if (auditState.patternVersion) enrichment.patternVersion = auditState.patternVersion
993
+ break
994
+ }
729
995
  }
730
- break
996
+ await emitToSink(
997
+ sink,
998
+ buildEvent("tool.completed", runId, sessionId, toolCallId, {
999
+ tool: input.tool,
1000
+ findingsCount,
1001
+ success: completedSuccess,
1002
+ ...(completionError ? { error: completionError } : {}),
1003
+ ...enrichment,
1004
+ }),
1005
+ { failFast },
1006
+ )
1007
+ } else if (sessionId) {
1008
+ const enrichment: Record<string, unknown> = {}
1009
+ const event = buildEvent("tool.completed", runId, sessionId, toolCallId, {
1010
+ tool: input.tool,
1011
+ findingsCount,
1012
+ success: completedSuccess,
1013
+ ...(completionError ? { error: completionError } : {}),
1014
+ ...enrichment,
1015
+ })
1016
+ bufferOrphanEvent(sessionId, {
1017
+ event,
1018
+ failFast: input.tool === "argus_record_finding",
1019
+ bufferedAt: Date.now(),
1020
+ })
1021
+ logger.warn(
1022
+ `Buffered orphan tool.completed event for ${input.tool} from session ${sessionId} (run_id=${runId}, findings=${findingsCount})`,
1023
+ )
731
1024
  }
732
1025
  }
1026
+ }
733
1027
 
734
- lastDiagnostics = diag.getDiagnostics()
735
- diag.throwIfStrict()
1028
+ hookFn.getLastDiagnostics = (): DropDiagnostic[] => lastDiagnostics
736
1029
 
737
- if (input.tool === "argus_record_finding" && findingsCount === 0) {
738
- throw new Error("argus_record_finding did not persist any findings")
1030
+ hookFn.flushOrphanEvents = async (sessionId: string, sink: EventSink): Promise<number> => {
1031
+ const entries = orphanBuffer.get(sessionId)
1032
+ if (!entries || entries.length === 0) {
1033
+ return 0
739
1034
  }
740
1035
 
741
- recordToolExecution(auditState, input.tool, findingsCount)
742
- onStateChanged?.({ tool: input.tool, findingsCount })
1036
+ orphanBuffer.delete(sessionId)
1037
+ const now = Date.now()
1038
+ const fresh = entries.filter((e) => now - e.bufferedAt < ORPHAN_BUFFER_TTL_MS)
743
1039
 
744
- if (input.tool === "argus_record_finding" && !sink) {
745
- throw new Error("argus_record_finding requires an active event sink for durable persistence")
1040
+ if (fresh.length < entries.length) {
1041
+ logger.debug(
1042
+ `Discarded ${entries.length - fresh.length} stale orphan events for session ${sessionId}`,
1043
+ )
746
1044
  }
747
1045
 
748
- if (sink) {
749
- const failFast = input.tool === "argus_record_finding"
750
- const newFindings = auditState.findings.slice(findingsCountBefore)
751
- for (const [index, finding] of newFindings.entries()) {
752
- const { data: canonical } = normalizeToCanonicalFinding(finding, runId, 0, {
753
- reportedByAgent,
754
- reportedBySessionId: sessionId,
755
- toolCallId,
756
- observationId: `${toolCallId}:${index + 1}`,
757
- })
758
- await emitToSink(
759
- sink,
760
- buildEvent("finding.added", runId, sessionId, toolCallId, canonical),
761
- { failFast },
762
- )
763
- }
1046
+ let flushed = 0
1047
+ for (const entry of fresh) {
1048
+ await emitToSink(sink, entry.event, { failFast: entry.failFast })
1049
+ flushed++
1050
+ }
764
1051
 
765
- await emitToSink(
766
- sink,
767
- buildEvent("tool.completed", runId, sessionId, toolCallId, {
768
- tool: input.tool,
769
- findingsCount,
770
- success: true,
771
- }),
772
- { failFast },
773
- )
1052
+ if (flushed > 0) {
1053
+ logger.info(`Flushed ${flushed} orphan events for session ${sessionId} to sink ${sink.runId}`)
774
1054
  }
775
- }
776
1055
 
777
- hookFn.getLastDiagnostics = (): DropDiagnostic[] => lastDiagnostics
1056
+ return flushed
1057
+ }
778
1058
 
779
1059
  return hookFn
780
1060
  }