cipher-security 2.0.8 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cipher.js +11 -1
- package/lib/agent-runtime/handlers/architect.js +199 -0
- package/lib/agent-runtime/handlers/base.js +240 -0
- package/lib/agent-runtime/handlers/blue.js +220 -0
- package/lib/agent-runtime/handlers/incident.js +161 -0
- package/lib/agent-runtime/handlers/privacy.js +190 -0
- package/lib/agent-runtime/handlers/purple.js +209 -0
- package/lib/agent-runtime/handlers/recon.js +174 -0
- package/lib/agent-runtime/handlers/red.js +246 -0
- package/lib/agent-runtime/handlers/researcher.js +170 -0
- package/lib/agent-runtime/handlers.js +35 -0
- package/lib/agent-runtime/index.js +196 -0
- package/lib/agent-runtime/parser.js +316 -0
- package/lib/analyze/consistency.js +566 -0
- package/lib/analyze/constitution.js +110 -0
- package/lib/analyze/sharding.js +251 -0
- package/lib/autonomous/agent-tool.js +165 -0
- package/lib/autonomous/feedback-loop.js +13 -6
- package/lib/autonomous/framework.js +17 -0
- package/lib/autonomous/handoff.js +506 -0
- package/lib/autonomous/modes/blue.js +26 -0
- package/lib/autonomous/modes/red.js +585 -0
- package/lib/autonomous/modes/researcher.js +322 -0
- package/lib/autonomous/researcher.js +12 -45
- package/lib/autonomous/runner.js +9 -537
- package/lib/benchmark/agent.js +88 -26
- package/lib/benchmark/baselines.js +3 -0
- package/lib/benchmark/claude-code-solver.js +254 -0
- package/lib/benchmark/cognitive.js +283 -0
- package/lib/benchmark/index.js +12 -2
- package/lib/benchmark/knowledge.js +281 -0
- package/lib/benchmark/llm.js +156 -15
- package/lib/benchmark/models.js +5 -2
- package/lib/benchmark/nyu-ctf.js +192 -0
- package/lib/benchmark/overthewire.js +347 -0
- package/lib/benchmark/picoctf.js +281 -0
- package/lib/benchmark/prompts.js +280 -0
- package/lib/benchmark/registry.js +219 -0
- package/lib/benchmark/remote-solver.js +356 -0
- package/lib/benchmark/remote-target.js +263 -0
- package/lib/benchmark/reporter.js +35 -0
- package/lib/benchmark/runner.js +174 -10
- package/lib/benchmark/sandbox.js +35 -0
- package/lib/benchmark/scorer.js +22 -4
- package/lib/benchmark/solver.js +34 -1
- package/lib/benchmark/tools.js +262 -16
- package/lib/commands.js +9 -0
- package/lib/execution/council.js +434 -0
- package/lib/execution/parallel.js +292 -0
- package/lib/gates/circuit-breaker.js +135 -0
- package/lib/gates/confidence.js +302 -0
- package/lib/gates/corrections.js +219 -0
- package/lib/gates/self-check.js +245 -0
- package/lib/gateway/commands.js +727 -0
- package/lib/guardrails/engine.js +364 -0
- package/lib/mcp/server.js +349 -3
- package/lib/memory/compressor.js +94 -7
- package/lib/pipeline/hooks.js +288 -0
- package/lib/pipeline/index.js +11 -0
- package/lib/review/budget.js +210 -0
- package/lib/review/engine.js +526 -0
- package/lib/review/layers/acceptance-auditor.js +279 -0
- package/lib/review/layers/blind-hunter.js +500 -0
- package/lib/review/layers/defense-in-depth.js +209 -0
- package/lib/review/layers/edge-case-hunter.js +266 -0
- package/lib/review/panel.js +519 -0
- package/lib/review/two-stage.js +244 -0
- package/lib/session/cost-tracker.js +203 -0
- package/lib/session/logger.js +349 -0
- package/package.json +1 -1
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
// Copyright (c) 2026 defconxt. All rights reserved.
|
|
2
|
+
// Licensed under AGPL-3.0 — see LICENSE file for details.
|
|
3
|
+
// CIPHER is a trademark of defconxt.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* CIPHER Semantic Document Sharding
|
|
7
|
+
*
|
|
8
|
+
* Splits large markdown documents into semantically coherent chunks
|
|
9
|
+
* at heading boundaries. Produces numbered shard files with an index.
|
|
10
|
+
*
|
|
11
|
+
* @module analyze/sharding
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
15
|
+
import { join, basename, dirname, extname } from 'node:path';
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Shard
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* A single document shard.
|
|
23
|
+
*/
|
|
24
|
+
export class Shard {
|
|
25
|
+
/**
|
|
26
|
+
* @param {object} opts
|
|
27
|
+
* @param {number} opts.number - Shard number (1-indexed)
|
|
28
|
+
* @param {string} opts.title - Section heading
|
|
29
|
+
* @param {string} opts.content - Section content (including heading)
|
|
30
|
+
* @param {number} opts.lines - Line count
|
|
31
|
+
* @param {number} opts.startLine - Start line in source (1-indexed)
|
|
32
|
+
* @param {number} opts.endLine - End line in source (1-indexed)
|
|
33
|
+
*/
|
|
34
|
+
constructor(opts = {}) {
|
|
35
|
+
this.number = opts.number ?? 0;
|
|
36
|
+
this.title = opts.title ?? '';
|
|
37
|
+
this.content = opts.content ?? '';
|
|
38
|
+
this.lines = opts.lines ?? 0;
|
|
39
|
+
this.startLine = opts.startLine ?? 0;
|
|
40
|
+
this.endLine = opts.endLine ?? 0;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Generate shard filename. */
|
|
44
|
+
filename(sourceBase) {
|
|
45
|
+
const padded = String(this.number).padStart(2, '0');
|
|
46
|
+
const slug = this.title
|
|
47
|
+
.toLowerCase()
|
|
48
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
49
|
+
.replace(/^-|-$/g, '')
|
|
50
|
+
.slice(0, 50);
|
|
51
|
+
return `${sourceBase}-${padded}-${slug}.md`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Generate shard content with frontmatter. */
|
|
55
|
+
toFile(sourcePath, totalShards) {
|
|
56
|
+
return [
|
|
57
|
+
'---',
|
|
58
|
+
`source: ${sourcePath}`,
|
|
59
|
+
`shard: ${this.number}/${totalShards}`,
|
|
60
|
+
`title: "${this.title}"`,
|
|
61
|
+
`lines: ${this.startLine}-${this.endLine}`,
|
|
62
|
+
'---',
|
|
63
|
+
'',
|
|
64
|
+
this.content,
|
|
65
|
+
].join('\n');
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// ShardResult
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
export class ShardResult {
|
|
74
|
+
/**
|
|
75
|
+
* @param {object} opts
|
|
76
|
+
* @param {Shard[]} opts.shards
|
|
77
|
+
* @param {string} opts.sourcePath
|
|
78
|
+
* @param {number} opts.sourceLines
|
|
79
|
+
* @param {number} opts.headingLevel
|
|
80
|
+
*/
|
|
81
|
+
constructor(opts = {}) {
|
|
82
|
+
this.shards = opts.shards ?? [];
|
|
83
|
+
this.sourcePath = opts.sourcePath ?? '';
|
|
84
|
+
this.sourceLines = opts.sourceLines ?? 0;
|
|
85
|
+
this.headingLevel = opts.headingLevel ?? 2;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Generate index content. */
|
|
89
|
+
toIndex() {
|
|
90
|
+
const lines = [
|
|
91
|
+
`# Shard Index — ${basename(this.sourcePath)}`,
|
|
92
|
+
'',
|
|
93
|
+
`Source: \`${this.sourcePath}\``,
|
|
94
|
+
`Total lines: ${this.sourceLines}`,
|
|
95
|
+
`Heading level: H${this.headingLevel}`,
|
|
96
|
+
`Shards: ${this.shards.length}`,
|
|
97
|
+
'',
|
|
98
|
+
'| # | Title | Lines | Range |',
|
|
99
|
+
'|---|-------|-------|-------|',
|
|
100
|
+
];
|
|
101
|
+
const sourceBase = basename(this.sourcePath, extname(this.sourcePath));
|
|
102
|
+
for (const shard of this.shards) {
|
|
103
|
+
const fn = shard.filename(sourceBase);
|
|
104
|
+
lines.push(`| ${shard.number} | [${shard.title}](${fn}) | ${shard.lines} | ${shard.startLine}-${shard.endLine} |`);
|
|
105
|
+
}
|
|
106
|
+
return lines.join('\n');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
toReport() {
|
|
110
|
+
const lines = [
|
|
111
|
+
`Sharding: ${basename(this.sourcePath)}`,
|
|
112
|
+
` Source: ${this.sourceLines} lines → ${this.shards.length} shards at H${this.headingLevel}`,
|
|
113
|
+
'',
|
|
114
|
+
];
|
|
115
|
+
for (const shard of this.shards) {
|
|
116
|
+
lines.push(` [${String(shard.number).padStart(2, '0')}] ${shard.title} (${shard.lines} lines)`);
|
|
117
|
+
}
|
|
118
|
+
return lines.join('\n');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
toJSON() {
|
|
122
|
+
return {
|
|
123
|
+
sourcePath: this.sourcePath,
|
|
124
|
+
sourceLines: this.sourceLines,
|
|
125
|
+
headingLevel: this.headingLevel,
|
|
126
|
+
shardCount: this.shards.length,
|
|
127
|
+
shards: this.shards.map((s) => ({
|
|
128
|
+
number: s.number,
|
|
129
|
+
title: s.title,
|
|
130
|
+
lines: s.lines,
|
|
131
|
+
startLine: s.startLine,
|
|
132
|
+
endLine: s.endLine,
|
|
133
|
+
})),
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
// Sharding logic
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Split a markdown document into shards at heading boundaries.
|
|
144
|
+
*
|
|
145
|
+
* @param {string} filePath - Path to the markdown file
|
|
146
|
+
* @param {object} [options]
|
|
147
|
+
* @param {number} [options.level=2] - Heading level to split at (1-6)
|
|
148
|
+
* @param {string} [options.output] - Output directory (default: <filename>-shards/)
|
|
149
|
+
* @param {boolean} [options.dryRun=false] - Preview only, don't write files
|
|
150
|
+
* @returns {ShardResult}
|
|
151
|
+
*/
|
|
152
|
+
export function shardDocument(filePath, options = {}) {
|
|
153
|
+
const level = options.level ?? 2;
|
|
154
|
+
const dryRun = options.dryRun ?? false;
|
|
155
|
+
|
|
156
|
+
if (!existsSync(filePath)) {
|
|
157
|
+
throw new Error(`File not found: ${filePath}`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
161
|
+
const lines = content.split('\n');
|
|
162
|
+
const headingPrefix = '#'.repeat(level) + ' ';
|
|
163
|
+
|
|
164
|
+
// Find heading positions
|
|
165
|
+
const headings = [];
|
|
166
|
+
for (let i = 0; i < lines.length; i++) {
|
|
167
|
+
if (lines[i].startsWith(headingPrefix)) {
|
|
168
|
+
headings.push({
|
|
169
|
+
line: i,
|
|
170
|
+
title: lines[i].slice(headingPrefix.length).trim(),
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Handle edge cases
|
|
176
|
+
if (headings.length === 0) {
|
|
177
|
+
// No headings at this level — return entire doc as one shard
|
|
178
|
+
return new ShardResult({
|
|
179
|
+
shards: [new Shard({
|
|
180
|
+
number: 1,
|
|
181
|
+
title: basename(filePath, extname(filePath)),
|
|
182
|
+
content: content,
|
|
183
|
+
lines: lines.length,
|
|
184
|
+
startLine: 1,
|
|
185
|
+
endLine: lines.length,
|
|
186
|
+
})],
|
|
187
|
+
sourcePath: filePath,
|
|
188
|
+
sourceLines: lines.length,
|
|
189
|
+
headingLevel: level,
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Build shards
|
|
194
|
+
const shards = [];
|
|
195
|
+
|
|
196
|
+
// Preamble (content before first heading)
|
|
197
|
+
if (headings[0].line > 0) {
|
|
198
|
+
const preambleContent = lines.slice(0, headings[0].line).join('\n').trim();
|
|
199
|
+
if (preambleContent.length > 0) {
|
|
200
|
+
shards.push(new Shard({
|
|
201
|
+
number: shards.length + 1,
|
|
202
|
+
title: 'Preamble',
|
|
203
|
+
content: preambleContent,
|
|
204
|
+
lines: headings[0].line,
|
|
205
|
+
startLine: 1,
|
|
206
|
+
endLine: headings[0].line,
|
|
207
|
+
}));
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Heading sections
|
|
212
|
+
for (let h = 0; h < headings.length; h++) {
|
|
213
|
+
const start = headings[h].line;
|
|
214
|
+
const end = h + 1 < headings.length ? headings[h + 1].line : lines.length;
|
|
215
|
+
const sectionContent = lines.slice(start, end).join('\n').trimEnd();
|
|
216
|
+
|
|
217
|
+
shards.push(new Shard({
|
|
218
|
+
number: shards.length + 1,
|
|
219
|
+
title: headings[h].title,
|
|
220
|
+
content: sectionContent,
|
|
221
|
+
lines: end - start,
|
|
222
|
+
startLine: start + 1,
|
|
223
|
+
endLine: end,
|
|
224
|
+
}));
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const result = new ShardResult({
|
|
228
|
+
shards,
|
|
229
|
+
sourcePath: filePath,
|
|
230
|
+
sourceLines: lines.length,
|
|
231
|
+
headingLevel: level,
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Write files unless dry run
|
|
235
|
+
if (!dryRun) {
|
|
236
|
+
const sourceBase = basename(filePath, extname(filePath));
|
|
237
|
+
const outputDir = options.output ?? join(dirname(filePath), `${sourceBase}-shards`);
|
|
238
|
+
mkdirSync(outputDir, { recursive: true });
|
|
239
|
+
|
|
240
|
+
// Write index
|
|
241
|
+
writeFileSync(join(outputDir, 'INDEX.md'), result.toIndex());
|
|
242
|
+
|
|
243
|
+
// Write shard files
|
|
244
|
+
for (const shard of shards) {
|
|
245
|
+
const fn = shard.filename(sourceBase);
|
|
246
|
+
writeFileSync(join(outputDir, fn), shard.toFile(filePath, shards.length));
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return result;
|
|
251
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
// Copyright (c) 2026 defconxt. All rights reserved.
|
|
2
|
+
// Licensed under AGPL-3.0 — see LICENSE file for details.
|
|
3
|
+
// CIPHER is a trademark of defconxt.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Agent-as-Tool — Use one mode agent as a callable tool for another.
|
|
7
|
+
*
|
|
8
|
+
* Enables patterns like PURPLE invoking RED for a quick scan without
|
|
9
|
+
* fully handing off control. The calling agent keeps its context;
|
|
10
|
+
* the sub-agent runs independently and returns structured results.
|
|
11
|
+
*
|
|
12
|
+
* Key exports:
|
|
13
|
+
* - agentAsTool: creates a tool schema + handler for any mode
|
|
14
|
+
* - AgentToolResult: structured sub-agent response
|
|
15
|
+
*
|
|
16
|
+
* @module autonomous/agent-tool
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { ModeAgentResult } from './framework.js';
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// AgentToolResult
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Structured result from a sub-agent invocation.
|
|
27
|
+
*/
|
|
28
|
+
export class AgentToolResult {
|
|
29
|
+
/**
|
|
30
|
+
* @param {object} opts
|
|
31
|
+
* @param {string} opts.mode - Sub-agent mode
|
|
32
|
+
* @param {string} opts.outputText - Sub-agent text output
|
|
33
|
+
* @param {object} opts.outputData - Sub-agent structured data
|
|
34
|
+
* @param {boolean} opts.valid - Whether sub-agent validation passed
|
|
35
|
+
* @param {number} opts.durationS - Sub-agent wall-clock seconds
|
|
36
|
+
* @param {number} opts.tokensIn - Sub-agent input tokens
|
|
37
|
+
* @param {number} opts.tokensOut - Sub-agent output tokens
|
|
38
|
+
* @param {string|null} [opts.error] - Error if sub-agent failed
|
|
39
|
+
*/
|
|
40
|
+
constructor(opts = {}) {
|
|
41
|
+
this.mode = opts.mode ?? '';
|
|
42
|
+
this.outputText = opts.outputText ?? '';
|
|
43
|
+
this.outputData = opts.outputData ?? {};
|
|
44
|
+
this.valid = opts.valid ?? false;
|
|
45
|
+
this.durationS = opts.durationS ?? 0;
|
|
46
|
+
this.tokensIn = opts.tokensIn ?? 0;
|
|
47
|
+
this.tokensOut = opts.tokensOut ?? 0;
|
|
48
|
+
this.error = opts.error ?? null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Format as a string for tool output. */
|
|
52
|
+
toString() {
|
|
53
|
+
if (this.error) {
|
|
54
|
+
return `[Agent-as-Tool: ${this.mode}] ERROR: ${this.error}`;
|
|
55
|
+
}
|
|
56
|
+
const parts = [
|
|
57
|
+
`[Agent-as-Tool: ${this.mode}]`,
|
|
58
|
+
`Valid: ${this.valid}`,
|
|
59
|
+
`Duration: ${this.durationS.toFixed(2)}s`,
|
|
60
|
+
`Tokens: ${this.tokensIn}in/${this.tokensOut}out`,
|
|
61
|
+
'',
|
|
62
|
+
this.outputText,
|
|
63
|
+
];
|
|
64
|
+
return parts.join('\n');
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// agentAsTool
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Create a tool schema + handler that invokes a mode agent as a sub-tool.
|
|
74
|
+
*
|
|
75
|
+
* The returned tool can be registered in any mode's ToolRegistry. When called,
|
|
76
|
+
* it runs the target mode as a sub-agent and returns structured results.
|
|
77
|
+
*
|
|
78
|
+
* @param {string} mode - Target mode to invoke as a tool
|
|
79
|
+
* @param {string} name - Tool name (e.g. 'invoke_red_scan')
|
|
80
|
+
* @param {string} description - Tool description for the LLM
|
|
81
|
+
* @param {object} [opts]
|
|
82
|
+
* @param {Function} [opts.agentRunner] - Injectable runner for testing
|
|
83
|
+
* @param {import('./handoff.js').HandoffEngine} [opts.handoffEngine] - Shared depth tracking
|
|
84
|
+
* @returns {{ schema: object, handler: Function }}
|
|
85
|
+
*/
|
|
86
|
+
export function agentAsTool(mode, name, description, opts = {}) {
|
|
87
|
+
const targetMode = mode.toUpperCase();
|
|
88
|
+
|
|
89
|
+
const schema = {
|
|
90
|
+
name,
|
|
91
|
+
description: description || `Invoke ${targetMode} mode as a sub-agent tool.`,
|
|
92
|
+
input_schema: {
|
|
93
|
+
type: 'object',
|
|
94
|
+
properties: {
|
|
95
|
+
task: {
|
|
96
|
+
type: 'string',
|
|
97
|
+
description: 'Task description for the sub-agent',
|
|
98
|
+
},
|
|
99
|
+
parameters: {
|
|
100
|
+
type: 'object',
|
|
101
|
+
description: 'Optional parameters for the sub-agent task',
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
required: ['task'],
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Handler function — invoked by ToolRegistry.dispatch().
|
|
110
|
+
*
|
|
111
|
+
* @param {*} context - Calling agent's context (not passed to sub-agent)
|
|
112
|
+
* @param {object} toolInput - { task: string, parameters?: object }
|
|
113
|
+
* @returns {Promise<string>}
|
|
114
|
+
*/
|
|
115
|
+
async function handler(context, toolInput) {
|
|
116
|
+
const engine = opts.handoffEngine || null;
|
|
117
|
+
|
|
118
|
+
// Check depth limit if engine is available
|
|
119
|
+
if (engine) {
|
|
120
|
+
if (engine.depth >= engine.maxDepth) {
|
|
121
|
+
return new AgentToolResult({
|
|
122
|
+
mode: targetMode,
|
|
123
|
+
error: `Max depth (${engine.maxDepth}) reached — cannot invoke sub-agent`,
|
|
124
|
+
}).toString();
|
|
125
|
+
}
|
|
126
|
+
engine.incrementDepth();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const runner = opts.agentRunner || (await import('./runner.js')).runAutonomous;
|
|
130
|
+
|
|
131
|
+
const taskInput = {
|
|
132
|
+
task: toolInput.task,
|
|
133
|
+
user_message: `[Sub-agent invocation] ${toolInput.task}`,
|
|
134
|
+
...(toolInput.parameters || {}),
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let subResult;
|
|
138
|
+
try {
|
|
139
|
+
subResult = await runner(targetMode, taskInput, null, null);
|
|
140
|
+
} catch (e) {
|
|
141
|
+
if (engine) engine.decrementDepth();
|
|
142
|
+
return new AgentToolResult({
|
|
143
|
+
mode: targetMode,
|
|
144
|
+
error: e.message,
|
|
145
|
+
}).toString();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (engine) engine.decrementDepth();
|
|
149
|
+
|
|
150
|
+
const result = new AgentToolResult({
|
|
151
|
+
mode: targetMode,
|
|
152
|
+
outputText: subResult.outputText,
|
|
153
|
+
outputData: subResult.outputData,
|
|
154
|
+
valid: subResult.validation?.valid ?? false,
|
|
155
|
+
durationS: subResult.durationS,
|
|
156
|
+
tokensIn: subResult.tokensIn,
|
|
157
|
+
tokensOut: subResult.tokensOut,
|
|
158
|
+
error: subResult.error,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
return result.toString();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return { schema, handler };
|
|
165
|
+
}
|
|
@@ -177,13 +177,20 @@ export class SkillQualityAnalyzer {
|
|
|
177
177
|
if (existsSync(agentJs)) {
|
|
178
178
|
const agentContent = readFileSync(agentJs, 'utf-8');
|
|
179
179
|
const agentLines = agentContent.trim().split('\n');
|
|
180
|
-
|
|
181
|
-
|
|
180
|
+
// New runtime pattern: 3-line wrapper importing from agent-runtime
|
|
181
|
+
const usesRuntime = agentContent.includes('agent-runtime');
|
|
182
|
+
if (usesRuntime) {
|
|
183
|
+
// Runtime wrapper delegates to shared runtime — full quality
|
|
184
|
+
scores.agent_quality = 1.0;
|
|
185
|
+
} else {
|
|
186
|
+
// Legacy standalone agent.js — validate inline content
|
|
187
|
+
if (agentLines.length < SkillQualityAnalyzer.MIN_AGENT_PY_LINES) {
|
|
188
|
+
issues.push(`agent.js too short (${agentLines.length} lines)`);
|
|
189
|
+
}
|
|
190
|
+
scores.agent_quality = Math.min(agentLines.length / 50, 1.0);
|
|
191
|
+
if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI dispatch');
|
|
192
|
+
if (!agentContent.includes('json')) issues.push('agent.js missing JSON output');
|
|
182
193
|
}
|
|
183
|
-
scores.agent_quality = Math.min(agentLines.length / 50, 1.0);
|
|
184
|
-
if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI dispatch');
|
|
185
|
-
if (!agentContent.includes('json')) issues.push('agent.js missing JSON output');
|
|
186
|
-
if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI entry point');
|
|
187
194
|
} else {
|
|
188
195
|
issues.push('scripts/agent.js missing');
|
|
189
196
|
scores.agent_quality = 0;
|
|
@@ -109,6 +109,8 @@ export class ModeAgentResult {
|
|
|
109
109
|
this.validation = validation;
|
|
110
110
|
this.error = error;
|
|
111
111
|
this.durationS = durationS;
|
|
112
|
+
/** @type {import('../gates/self-check.js').SelfCheckResult|null} */
|
|
113
|
+
this.selfCheck = null;
|
|
112
114
|
}
|
|
113
115
|
}
|
|
114
116
|
|
|
@@ -240,6 +242,7 @@ export class ModeAgentConfig {
|
|
|
240
242
|
* @param {boolean} [opts.requiresSandbox=false]
|
|
241
243
|
* @param {Function|null} [opts.completionCheck=null] - (text: string) => boolean
|
|
242
244
|
* @param {Function|null} [opts.outputParser=null] - (text: string) => Object
|
|
245
|
+
* @param {import('./handoff.js').HandoffFilter|null} [opts.handoffFilter=null] - Context filter for incoming handoffs
|
|
243
246
|
*/
|
|
244
247
|
constructor({
|
|
245
248
|
mode,
|
|
@@ -252,6 +255,7 @@ export class ModeAgentConfig {
|
|
|
252
255
|
requiresSandbox = false,
|
|
253
256
|
completionCheck = null,
|
|
254
257
|
outputParser = null,
|
|
258
|
+
handoffFilter = null,
|
|
255
259
|
}) {
|
|
256
260
|
this.mode = mode;
|
|
257
261
|
this.toolRegistry = toolRegistry;
|
|
@@ -263,6 +267,7 @@ export class ModeAgentConfig {
|
|
|
263
267
|
this.requiresSandbox = requiresSandbox;
|
|
264
268
|
this.completionCheck = completionCheck;
|
|
265
269
|
this.outputParser = outputParser;
|
|
270
|
+
this.handoffFilter = handoffFilter;
|
|
266
271
|
}
|
|
267
272
|
}
|
|
268
273
|
|
|
@@ -504,6 +509,18 @@ export class BaseAgent {
|
|
|
504
509
|
// --- Post-loop: validation ---
|
|
505
510
|
result.validation = this._config.validator.validate(result);
|
|
506
511
|
|
|
512
|
+
// --- Post-loop: self-check (hallucination/hedging detection) ---
|
|
513
|
+
try {
|
|
514
|
+
const { SelfChecker } = await import('../gates/self-check.js');
|
|
515
|
+
const checker = new SelfChecker();
|
|
516
|
+
result.selfCheck = checker.check(lastAssistantText);
|
|
517
|
+
if (result.selfCheck.findings.length > 0) {
|
|
518
|
+
debug(`Self-check: ${result.selfCheck.findings.length} findings, score ${result.selfCheck.score}`);
|
|
519
|
+
}
|
|
520
|
+
} catch {
|
|
521
|
+
// Self-check is non-critical — don't fail the agent if it can't load
|
|
522
|
+
}
|
|
523
|
+
|
|
507
524
|
// --- Duration ---
|
|
508
525
|
result.durationS = (performance.now() / 1000) - startTime;
|
|
509
526
|
|