@polylogicai/polycode 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/README.md +107 -0
- package/bin/polycode.mjs +317 -0
- package/lib/agency-receipt.mjs +45 -0
- package/lib/agentic.mjs +505 -0
- package/lib/canon.mjs +123 -0
- package/lib/commitment.mjs +59 -0
- package/lib/compiler.mjs +166 -0
- package/lib/context-builder.mjs +79 -0
- package/lib/hooks.mjs +118 -0
- package/lib/inference-router.mjs +67 -0
- package/lib/intent.mjs +31 -0
- package/lib/repl-ui.mjs +91 -0
- package/lib/slash-commands.mjs +83 -0
- package/lib/witness/conservativity.mjs +90 -0
- package/lib/witness/g-fidelity.mjs +80 -0
- package/lib/witness/ground-truth.mjs +56 -0
- package/lib/witness/index.mjs +70 -0
- package/lib/witness/rule-compliance.mjs +82 -0
- package/lib/witness/secret-scrubber.mjs +51 -0
- package/package.json +45 -0
- package/rules/default.yaml +58 -0
package/lib/agentic.mjs
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
// lib/agentic.mjs
|
|
2
|
+
// polycode agentic loop. Runs a standard intent -> plan -> act -> record cycle
|
|
3
|
+
// per user turn. The context for each turn is built from the session log via
|
|
4
|
+
// lib/compiler.mjs, passed through lib/witness/secret-scrubber.mjs before any
|
|
5
|
+
// network call, and every tool call lands as a signed row in the session log.
|
|
6
|
+
//
|
|
7
|
+
// polycode is inspired by Claude Code's public architecture at docs.claude.com.
|
|
8
|
+
// polycode is not affiliated with Anthropic. No Claude Code code or system
|
|
9
|
+
// prompts are copied. The hook contract event names match the public Claude
|
|
10
|
+
// Code hook spec so that portable hook scripts work in both tools.
|
|
11
|
+
|
|
12
|
+
import Groq from 'groq-sdk';
|
|
13
|
+
import { promises as fs } from 'node:fs';
|
|
14
|
+
import { exec } from 'node:child_process';
|
|
15
|
+
import { promisify } from 'node:util';
|
|
16
|
+
import { resolve, relative, sep, dirname } from 'node:path';
|
|
17
|
+
import { compilePacket } from './compiler.mjs';
|
|
18
|
+
import { mintCommitment } from './commitment.mjs';
|
|
19
|
+
import { ensureActiveIntent } from './intent.mjs';
|
|
20
|
+
import { scrubSecrets } from './witness/secret-scrubber.mjs';
|
|
21
|
+
|
|
22
|
+
const execAsync = promisify(exec);
|
|
23
|
+
|
|
24
|
+
const DEFAULT_MODEL = 'moonshotai/kimi-k2-instruct';
|
|
25
|
+
const FALLBACK_MODEL = 'llama-3.3-70b-versatile';
|
|
26
|
+
const DEFAULT_MAX_ITERATIONS = 12;
|
|
27
|
+
const TEMPERATURE = 0.2;
|
|
28
|
+
const MAX_TOKENS = 4096;
|
|
29
|
+
const MAX_BASH_TIMEOUT_MS = 30_000;
|
|
30
|
+
const MAX_OUTPUT_BYTES = 3200;
|
|
31
|
+
|
|
32
|
+
const SYSTEM_PROMPT = `You are polycode, a terminal coding agent. Each turn you receive the current user message along with a small set of context rows selected from the session log by a separate selection step. You do not need to hold conversation history in your own memory. Produce a short plan and the tool calls needed to address the current message, then call task_done with a one or two sentence summary.
|
|
33
|
+
|
|
34
|
+
Discipline:
|
|
35
|
+
- Use periods, commas, or colons. Not em dashes.
|
|
36
|
+
- No hype words: no "revolutionary", "game-changer", "unprecedented".
|
|
37
|
+
- Read files before asserting their content. Test before claiming something works.
|
|
38
|
+
- Tools available: task_done, bash, read_file, write_file, edit_file, glob, grep.
|
|
39
|
+
- When the current user message has been addressed, call task_done.
|
|
40
|
+
|
|
41
|
+
Every tool call you make is checked by a deterministic verification layer before it is written to the session log. Checks include content grounding, file existence, rule compliance against a forbidden list, and secret scrubbing on tool output. If a check fails, the record is marked REFUTED and you should acknowledge and correct rather than retry the same action.`;
|
|
42
|
+
|
|
43
|
+
const TOOL_SCHEMAS = [
|
|
44
|
+
{
|
|
45
|
+
type: 'function',
|
|
46
|
+
function: {
|
|
47
|
+
name: 'task_done',
|
|
48
|
+
description: "Call this when the user's current message has been addressed. Summary is the final user-facing message for this turn.",
|
|
49
|
+
parameters: {
|
|
50
|
+
type: 'object',
|
|
51
|
+
properties: { summary: { type: 'string' } },
|
|
52
|
+
required: ['summary'],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
type: 'function',
|
|
58
|
+
function: {
|
|
59
|
+
name: 'bash',
|
|
60
|
+
description: 'Run a shell command in the working directory. 30 second timeout. Output truncated at 3200 bytes.',
|
|
61
|
+
parameters: {
|
|
62
|
+
type: 'object',
|
|
63
|
+
properties: { command: { type: 'string' } },
|
|
64
|
+
required: ['command'],
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
type: 'function',
|
|
70
|
+
function: {
|
|
71
|
+
name: 'read_file',
|
|
72
|
+
description: 'Read a file relative to the working directory. Returns the first 3200 bytes.',
|
|
73
|
+
parameters: {
|
|
74
|
+
type: 'object',
|
|
75
|
+
properties: { path: { type: 'string' } },
|
|
76
|
+
required: ['path'],
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
type: 'function',
|
|
82
|
+
function: {
|
|
83
|
+
name: 'write_file',
|
|
84
|
+
description: 'Write content to a file. Creates parent directories. Overwrites if exists.',
|
|
85
|
+
parameters: {
|
|
86
|
+
type: 'object',
|
|
87
|
+
properties: {
|
|
88
|
+
path: { type: 'string' },
|
|
89
|
+
content: { type: 'string' },
|
|
90
|
+
},
|
|
91
|
+
required: ['path', 'content'],
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
type: 'function',
|
|
97
|
+
function: {
|
|
98
|
+
name: 'edit_file',
|
|
99
|
+
description: 'Replace a single exact substring in a file. old_string must appear exactly once.',
|
|
100
|
+
parameters: {
|
|
101
|
+
type: 'object',
|
|
102
|
+
properties: {
|
|
103
|
+
path: { type: 'string' },
|
|
104
|
+
old_string: { type: 'string' },
|
|
105
|
+
new_string: { type: 'string' },
|
|
106
|
+
},
|
|
107
|
+
required: ['path', 'old_string', 'new_string'],
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
type: 'function',
|
|
113
|
+
function: {
|
|
114
|
+
name: 'glob',
|
|
115
|
+
description: 'List files matching a glob rooted at the working directory. First 200 matches.',
|
|
116
|
+
parameters: {
|
|
117
|
+
type: 'object',
|
|
118
|
+
properties: { pattern: { type: 'string' } },
|
|
119
|
+
required: ['pattern'],
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
type: 'function',
|
|
125
|
+
function: {
|
|
126
|
+
name: 'grep',
|
|
127
|
+
description: 'Search for a regex in files rooted at the working directory. First 100 matching lines.',
|
|
128
|
+
parameters: {
|
|
129
|
+
type: 'object',
|
|
130
|
+
properties: {
|
|
131
|
+
pattern: { type: 'string' },
|
|
132
|
+
glob: { type: 'string' },
|
|
133
|
+
},
|
|
134
|
+
required: ['pattern'],
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
function ensureInsideCwd(cwd, targetPath) {
|
|
141
|
+
const abs = resolve(cwd, targetPath);
|
|
142
|
+
const rel = relative(cwd, abs);
|
|
143
|
+
if (rel.startsWith('..') || rel.includes(`..${sep}`)) {
|
|
144
|
+
throw new Error(`path escapes working directory: ${targetPath}`);
|
|
145
|
+
}
|
|
146
|
+
return abs;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function truncateStr(s, bytes = MAX_OUTPUT_BYTES) {
|
|
150
|
+
const buf = Buffer.from(String(s || ''));
|
|
151
|
+
if (buf.length <= bytes) return buf.toString();
|
|
152
|
+
return buf.slice(0, bytes).toString() + `\n...[truncated ${buf.length - bytes} bytes]`;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function runTool(name, args, cwd) {
|
|
156
|
+
switch (name) {
|
|
157
|
+
case 'bash': {
|
|
158
|
+
const cmd = String(args.command ?? '');
|
|
159
|
+
try {
|
|
160
|
+
const { stdout, stderr } = await execAsync(cmd, {
|
|
161
|
+
cwd,
|
|
162
|
+
timeout: MAX_BASH_TIMEOUT_MS,
|
|
163
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
164
|
+
});
|
|
165
|
+
return truncateStr([stdout, stderr].filter(Boolean).join('\n') || '(no output)');
|
|
166
|
+
} catch (err) {
|
|
167
|
+
return truncateStr(`exit=${err.code ?? 'unknown'}\n${err.message}\nstdout=${err.stdout ?? ''}\nstderr=${err.stderr ?? ''}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
case 'read_file': {
|
|
171
|
+
const abs = ensureInsideCwd(cwd, String(args.path ?? ''));
|
|
172
|
+
try {
|
|
173
|
+
return truncateStr(await fs.readFile(abs, 'utf8'));
|
|
174
|
+
} catch (err) {
|
|
175
|
+
return `error: ${err.message}`;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
case 'write_file': {
|
|
179
|
+
const abs = ensureInsideCwd(cwd, String(args.path ?? ''));
|
|
180
|
+
try {
|
|
181
|
+
await fs.mkdir(dirname(abs), { recursive: true });
|
|
182
|
+
await fs.writeFile(abs, String(args.content ?? ''), 'utf8');
|
|
183
|
+
return `ok: wrote ${Buffer.byteLength(String(args.content ?? ''))} bytes to ${args.path}`;
|
|
184
|
+
} catch (err) {
|
|
185
|
+
return `error: ${err.message}`;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
case 'edit_file': {
|
|
189
|
+
const abs = ensureInsideCwd(cwd, String(args.path ?? ''));
|
|
190
|
+
try {
|
|
191
|
+
const content = await fs.readFile(abs, 'utf8');
|
|
192
|
+
const oldStr = String(args.old_string ?? '');
|
|
193
|
+
const newStr = String(args.new_string ?? '');
|
|
194
|
+
if (!content.includes(oldStr)) return `error: old_string not found in ${args.path}`;
|
|
195
|
+
const occ = content.split(oldStr).length - 1;
|
|
196
|
+
if (occ > 1) return `error: old_string appears ${occ} times in ${args.path}, must be unique`;
|
|
197
|
+
await fs.writeFile(abs, content.replace(oldStr, newStr), 'utf8');
|
|
198
|
+
return `ok: edited ${args.path} (${oldStr.length} -> ${newStr.length} bytes)`;
|
|
199
|
+
} catch (err) {
|
|
200
|
+
return `error: ${err.message}`;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
case 'glob': {
|
|
204
|
+
const pattern = String(args.pattern ?? '').replace(/'/g, "'\\''");
|
|
205
|
+
try {
|
|
206
|
+
const { stdout } = await execAsync(`find . -type f -name '${pattern}' 2>/dev/null | head -200`, { cwd });
|
|
207
|
+
return truncateStr(stdout || '(no matches)');
|
|
208
|
+
} catch (err) {
|
|
209
|
+
return `error: ${err.message}`;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
case 'grep': {
|
|
213
|
+
const pattern = String(args.pattern ?? '').replace(/'/g, "'\\''");
|
|
214
|
+
const glob = String(args.glob ?? '').replace(/'/g, "'\\''");
|
|
215
|
+
try {
|
|
216
|
+
const cmd = glob
|
|
217
|
+
? `grep -rn --include='${glob}' -E '${pattern}' . 2>/dev/null | head -100`
|
|
218
|
+
: `grep -rn -E '${pattern}' . 2>/dev/null | head -100`;
|
|
219
|
+
const { stdout } = await execAsync(cmd, { cwd });
|
|
220
|
+
return truncateStr(stdout || '(no matches)');
|
|
221
|
+
} catch (err) {
|
|
222
|
+
return `error: ${err.message}`;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
default:
|
|
226
|
+
return `error: unknown tool "${name}"`;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function recoverInlineToolCalls(content) {
|
|
231
|
+
if (!content) return [];
|
|
232
|
+
const out = [];
|
|
233
|
+
const re = /<function=([\w_-]+)>?(\{[\s\S]*?\})\s*<\/function>/g;
|
|
234
|
+
let m;
|
|
235
|
+
let n = 0;
|
|
236
|
+
while ((m = re.exec(content)) !== null) {
|
|
237
|
+
try { JSON.parse(m[2]); } catch { continue; }
|
|
238
|
+
out.push({ id: `recovered_${Date.now()}_${n++}`, name: m[1], argsJson: m[2] });
|
|
239
|
+
}
|
|
240
|
+
return out;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
export class AgenticLoop {
|
|
244
|
+
constructor({ apiKey, model, logger, rules } = {}) {
|
|
245
|
+
this.apiKey = apiKey;
|
|
246
|
+
this.defaultModel = model || DEFAULT_MODEL;
|
|
247
|
+
this.logger = logger || console;
|
|
248
|
+
this.rules = rules || {};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async runTurn({ canon, userMessage, cwd, onEvent, maxIterations }) {
|
|
252
|
+
const start = Date.now();
|
|
253
|
+
const groq = new Groq({ apiKey: this.apiKey || process.env.GROQ_API_KEY });
|
|
254
|
+
let model = this.defaultModel;
|
|
255
|
+
let didFallback = false;
|
|
256
|
+
const limit = maxIterations || DEFAULT_MAX_ITERATIONS;
|
|
257
|
+
const emit = onEvent || (() => {});
|
|
258
|
+
const primitivesList = [];
|
|
259
|
+
const commitments = [];
|
|
260
|
+
|
|
261
|
+
// Phase 1: INTENT
|
|
262
|
+
emit({ phase: 'intent' });
|
|
263
|
+
const intent = ensureActiveIntent(canon, userMessage);
|
|
264
|
+
const intentId = intent.payload.intent_id;
|
|
265
|
+
const turnId = `turn_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
|
|
266
|
+
canon.append('user_turn', { intent_id: intentId, turn_id: turnId, message: userMessage });
|
|
267
|
+
|
|
268
|
+
// Phase 2: GROUND. Build the bounded turn context from the session log.
|
|
269
|
+
const ctx = await compilePacket(canon, userMessage, cwd);
|
|
270
|
+
canon.append('compile_result', {
|
|
271
|
+
intent_id: intentId,
|
|
272
|
+
turn_id: turnId,
|
|
273
|
+
compiler_provider: ctx.compilerProvider,
|
|
274
|
+
selected_rows: ctx.selectedRows,
|
|
275
|
+
estimated_tokens: ctx.estimatedTokens,
|
|
276
|
+
fallback: ctx.fallback,
|
|
277
|
+
usage: ctx.compilerUsage,
|
|
278
|
+
});
|
|
279
|
+
emit({
|
|
280
|
+
phase: 'ground_complete',
|
|
281
|
+
estimatedTokens: ctx.estimatedTokens,
|
|
282
|
+
compilerProvider: ctx.compilerProvider,
|
|
283
|
+
selectedRows: ctx.selectedRows?.length || 0,
|
|
284
|
+
fallback: ctx.fallback,
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// Phase 2b: SCRUB -> non-LLM secret scrubber must PASS before any dispatch
|
|
288
|
+
const scrub = scrubSecrets(ctx.prompt);
|
|
289
|
+
if (scrub.blocked) {
|
|
290
|
+
emit({ phase: 'scrub_blocked', findings: scrub.findings });
|
|
291
|
+
const c = await mintCommitment(canon, {
|
|
292
|
+
intentId,
|
|
293
|
+
turnId,
|
|
294
|
+
claim: `secret_bleed_blocked: ${scrub.reason}`,
|
|
295
|
+
substrate: `(redacted: scrubber blocked dispatch on ${scrub.findings.length} secret patterns)`,
|
|
296
|
+
toolName: null,
|
|
297
|
+
toolArgs: null,
|
|
298
|
+
toolResult: scrub.reason,
|
|
299
|
+
cwd,
|
|
300
|
+
packet: '(redacted)',
|
|
301
|
+
rules: this.rules,
|
|
302
|
+
});
|
|
303
|
+
commitments.push(c);
|
|
304
|
+
primitivesList.push(c.primitives);
|
|
305
|
+
emit({ phase: 'record', commitment: c });
|
|
306
|
+
return {
|
|
307
|
+
finalMessage: `(secret bleed blocked: ${scrub.reason}. turn aborted before dispatch, no network call was made.)`,
|
|
308
|
+
iterations: 0,
|
|
309
|
+
durationMs: Date.now() - start,
|
|
310
|
+
commitments,
|
|
311
|
+
primitivesList,
|
|
312
|
+
intentId,
|
|
313
|
+
turnId,
|
|
314
|
+
promptTokensUsed: 0,
|
|
315
|
+
scrubBlocked: true,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Phase 3 + Phase 4: DISPATCH and ACT
|
|
320
|
+
const messages = [
|
|
321
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
322
|
+
{ role: 'user', content: ctx.prompt },
|
|
323
|
+
];
|
|
324
|
+
|
|
325
|
+
let iteration = 0;
|
|
326
|
+
let finalMessage = '';
|
|
327
|
+
let done = false;
|
|
328
|
+
|
|
329
|
+
while (iteration < limit && !done) {
|
|
330
|
+
iteration++;
|
|
331
|
+
emit({ phase: 'dispatch', iteration });
|
|
332
|
+
|
|
333
|
+
let completion;
|
|
334
|
+
try {
|
|
335
|
+
completion = await groq.chat.completions.create({
|
|
336
|
+
model,
|
|
337
|
+
messages,
|
|
338
|
+
tools: TOOL_SCHEMAS,
|
|
339
|
+
tool_choice: 'auto',
|
|
340
|
+
temperature: TEMPERATURE,
|
|
341
|
+
max_tokens: MAX_TOKENS,
|
|
342
|
+
});
|
|
343
|
+
} catch (err) {
|
|
344
|
+
if (!didFallback && model !== FALLBACK_MODEL) {
|
|
345
|
+
didFallback = true;
|
|
346
|
+
model = FALLBACK_MODEL;
|
|
347
|
+
emit({ phase: 'fallback', to: FALLBACK_MODEL, reason: err.message });
|
|
348
|
+
iteration--;
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
emit({ phase: 'error', message: `groq call failed: ${err.message}` });
|
|
352
|
+
break;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const choice = completion.choices?.[0];
|
|
356
|
+
if (!choice) {
|
|
357
|
+
emit({ phase: 'error', message: 'no choice in groq response' });
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const assistantMsg = choice.message;
|
|
362
|
+
let toolCalls = assistantMsg.tool_calls;
|
|
363
|
+
|
|
364
|
+
if ((!toolCalls || toolCalls.length === 0) && assistantMsg.content) {
|
|
365
|
+
const recovered = recoverInlineToolCalls(assistantMsg.content);
|
|
366
|
+
if (recovered.length > 0) {
|
|
367
|
+
toolCalls = recovered.map((r) => ({
|
|
368
|
+
id: r.id,
|
|
369
|
+
type: 'function',
|
|
370
|
+
function: { name: r.name, arguments: r.argsJson },
|
|
371
|
+
}));
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
messages.push({
|
|
376
|
+
role: 'assistant',
|
|
377
|
+
content: assistantMsg.content ?? null,
|
|
378
|
+
tool_calls: toolCalls?.map((tc) => ({
|
|
379
|
+
id: tc.id,
|
|
380
|
+
type: 'function',
|
|
381
|
+
function: { name: tc.function.name, arguments: tc.function.arguments },
|
|
382
|
+
})),
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
if (assistantMsg.content) {
|
|
386
|
+
const cleaned = assistantMsg.content
|
|
387
|
+
.replace(/<function=[\w_-]+>?\{[\s\S]*?\}\s*<\/function>/g, '')
|
|
388
|
+
.trim();
|
|
389
|
+
if (cleaned) emit({ phase: 'act', kind: 'message', content: cleaned, iteration });
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (!toolCalls || toolCalls.length === 0) {
|
|
393
|
+
finalMessage = assistantMsg.content ?? '';
|
|
394
|
+
const c = await mintCommitment(canon, {
|
|
395
|
+
intentId,
|
|
396
|
+
turnId,
|
|
397
|
+
claim: finalMessage || '(empty final message)',
|
|
398
|
+
substrate: ctx.prompt,
|
|
399
|
+
toolName: null,
|
|
400
|
+
toolArgs: null,
|
|
401
|
+
toolResult: null,
|
|
402
|
+
cwd,
|
|
403
|
+
packet: ctx.prompt,
|
|
404
|
+
rules: this.rules,
|
|
405
|
+
});
|
|
406
|
+
commitments.push(c);
|
|
407
|
+
primitivesList.push(c.primitives);
|
|
408
|
+
emit({ phase: 'record', commitment: c });
|
|
409
|
+
done = true;
|
|
410
|
+
break;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
for (const tc of toolCalls) {
|
|
414
|
+
let parsedArgs = {};
|
|
415
|
+
try {
|
|
416
|
+
parsedArgs = JSON.parse(tc.function.arguments);
|
|
417
|
+
} catch {
|
|
418
|
+
messages.push({ role: 'tool', tool_call_id: tc.id, content: JSON.stringify({ error: 'invalid JSON args' }) });
|
|
419
|
+
continue;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
emit({ phase: 'act', kind: 'tool_call', name: tc.function.name, args: parsedArgs, iteration });
|
|
423
|
+
|
|
424
|
+
if (tc.function.name === 'task_done') {
|
|
425
|
+
let summary = String(parsedArgs.summary ?? '');
|
|
426
|
+
// Scrub secrets out of the task_done summary before it is printed or
|
|
427
|
+
// stored in the canon.
|
|
428
|
+
const summaryScrub = scrubSecrets(summary);
|
|
429
|
+
if (summaryScrub.blocked) {
|
|
430
|
+
summary = `(secrets redacted in summary: ${summaryScrub.findings.map((f) => f.pattern).join(', ')}) ${summaryScrub.redacted}`;
|
|
431
|
+
}
|
|
432
|
+
finalMessage = summary;
|
|
433
|
+
const c = await mintCommitment(canon, {
|
|
434
|
+
intentId,
|
|
435
|
+
turnId,
|
|
436
|
+
claim: summary,
|
|
437
|
+
substrate: ctx.prompt,
|
|
438
|
+
toolName: 'task_done',
|
|
439
|
+
toolArgs: parsedArgs,
|
|
440
|
+
toolResult: summary,
|
|
441
|
+
cwd,
|
|
442
|
+
packet: ctx.prompt,
|
|
443
|
+
rules: this.rules,
|
|
444
|
+
});
|
|
445
|
+
commitments.push(c);
|
|
446
|
+
primitivesList.push(c.primitives);
|
|
447
|
+
emit({ phase: 'record', commitment: c });
|
|
448
|
+
messages.push({ role: 'tool', tool_call_id: tc.id, content: summary });
|
|
449
|
+
done = true;
|
|
450
|
+
break;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
let result;
|
|
454
|
+
try {
|
|
455
|
+
result = await runTool(tc.function.name, parsedArgs, cwd);
|
|
456
|
+
} catch (err) {
|
|
457
|
+
result = `error: ${err.message}`;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Scrub secrets out of the tool result BEFORE it reaches the terminal,
|
|
461
|
+
// the canon, or the next model iteration. Tool results are the primary
|
|
462
|
+
// vector for accidental secret bleed (.env files, private keys on disk,
|
|
463
|
+
// hardcoded tokens). The scrubber redacts in place.
|
|
464
|
+
const resultScrub = scrubSecrets(typeof result === 'string' ? result : String(result));
|
|
465
|
+
if (resultScrub.blocked) {
|
|
466
|
+
result = `(secrets redacted in tool result: ${resultScrub.findings.map((f) => f.pattern).join(', ')})\n${resultScrub.redacted}`;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
emit({ phase: 'act', kind: 'tool_result', name: tc.function.name, result, iteration });
|
|
470
|
+
|
|
471
|
+
const claim = assistantMsg.content ? assistantMsg.content.slice(0, 500) : `${tc.function.name} call`;
|
|
472
|
+
const c = await mintCommitment(canon, {
|
|
473
|
+
intentId,
|
|
474
|
+
turnId,
|
|
475
|
+
claim,
|
|
476
|
+
substrate: typeof result === 'string' ? result : String(result),
|
|
477
|
+
toolName: tc.function.name,
|
|
478
|
+
toolArgs: parsedArgs,
|
|
479
|
+
toolResult: result,
|
|
480
|
+
cwd,
|
|
481
|
+
packet: ctx.prompt,
|
|
482
|
+
rules: this.rules,
|
|
483
|
+
});
|
|
484
|
+
commitments.push(c);
|
|
485
|
+
primitivesList.push(c.primitives);
|
|
486
|
+
emit({ phase: 'record', commitment: c });
|
|
487
|
+
|
|
488
|
+
messages.push({ role: 'tool', tool_call_id: tc.id, content: result });
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const durationMs = Date.now() - start;
|
|
493
|
+
return {
|
|
494
|
+
finalMessage,
|
|
495
|
+
iterations: iteration,
|
|
496
|
+
durationMs,
|
|
497
|
+
commitments,
|
|
498
|
+
primitivesList,
|
|
499
|
+
intentId,
|
|
500
|
+
turnId,
|
|
501
|
+
promptTokensUsed: ctx.estimatedTokens,
|
|
502
|
+
compilerProvider: ctx.compilerProvider,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
}
|
package/lib/canon.mjs
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
// lib/canon.mjs
|
|
2
|
+
// SHA-256 chained append-only JSONL session log. Every turn and every
|
|
3
|
+
// verified tool call is a row. Rows chain by prev_hash to row_hash so a
|
|
4
|
+
// single edit anywhere in the file breaks chain verification at that row.
|
|
5
|
+
// The session is replayable by walking the chain forward from the first row.
|
|
6
|
+
|
|
7
|
+
import { appendFileSync, existsSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
8
|
+
import { dirname } from 'node:path';
|
|
9
|
+
import { createHash } from 'node:crypto';
|
|
10
|
+
|
|
11
|
+
const ZERO_HASH = '0000000000000000000000000000000000000000000000000000000000000000';
|
|
12
|
+
|
|
13
|
+
function hashRow(row, prevHash) {
|
|
14
|
+
const base = JSON.stringify({
|
|
15
|
+
row_index: row.row_index,
|
|
16
|
+
ts: row.ts,
|
|
17
|
+
type: row.type,
|
|
18
|
+
payload: row.payload,
|
|
19
|
+
prev_hash: prevHash,
|
|
20
|
+
});
|
|
21
|
+
return createHash('sha256').update(base).digest('hex');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function createCanon(canonFile) {
|
|
25
|
+
const dir = dirname(canonFile);
|
|
26
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
27
|
+
if (!existsSync(canonFile)) writeFileSync(canonFile, '');
|
|
28
|
+
|
|
29
|
+
let rows = loadRows();
|
|
30
|
+
let lastHash = rows.length > 0 ? rows[rows.length - 1].row_hash : ZERO_HASH;
|
|
31
|
+
let nextIndex = rows.length;
|
|
32
|
+
|
|
33
|
+
function loadRows() {
|
|
34
|
+
if (!existsSync(canonFile)) return [];
|
|
35
|
+
const raw = readFileSync(canonFile, 'utf8');
|
|
36
|
+
const lines = raw.split('\n').filter(Boolean);
|
|
37
|
+
const out = [];
|
|
38
|
+
for (const l of lines) {
|
|
39
|
+
try {
|
|
40
|
+
out.push(JSON.parse(l));
|
|
41
|
+
} catch {
|
|
42
|
+
// Skip malformed lines. The chain verify will catch any integrity break.
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function verify() {
|
|
49
|
+
let prev = ZERO_HASH;
|
|
50
|
+
for (let i = 0; i < rows.length; i++) {
|
|
51
|
+
const r = rows[i];
|
|
52
|
+
if (r.prev_hash !== prev) {
|
|
53
|
+
return { valid: false, broken_at: i, reason: `prev_hash mismatch at row ${i}` };
|
|
54
|
+
}
|
|
55
|
+
const expected = hashRow(r, prev);
|
|
56
|
+
if (r.row_hash !== expected) {
|
|
57
|
+
return { valid: false, broken_at: i, reason: `row_hash mismatch at row ${i}` };
|
|
58
|
+
}
|
|
59
|
+
prev = r.row_hash;
|
|
60
|
+
}
|
|
61
|
+
return { valid: true, rows: rows.length, last_hash: prev };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function append(type, payload) {
|
|
65
|
+
const row = {
|
|
66
|
+
row_index: nextIndex,
|
|
67
|
+
ts: new Date().toISOString(),
|
|
68
|
+
type,
|
|
69
|
+
payload,
|
|
70
|
+
prev_hash: lastHash,
|
|
71
|
+
};
|
|
72
|
+
row.row_hash = hashRow(row, lastHash);
|
|
73
|
+
appendFileSync(canonFile, JSON.stringify(row) + '\n');
|
|
74
|
+
rows.push(row);
|
|
75
|
+
lastHash = row.row_hash;
|
|
76
|
+
nextIndex++;
|
|
77
|
+
return row;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function queryByIntent(intentId, limit = 20) {
|
|
81
|
+
const matched = [];
|
|
82
|
+
for (let i = rows.length - 1; i >= 0 && matched.length < limit; i--) {
|
|
83
|
+
const r = rows[i];
|
|
84
|
+
if (r.payload && r.payload.intent_id === intentId) matched.push(r);
|
|
85
|
+
}
|
|
86
|
+
return matched.reverse();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function queryByType(type, limit = 10000) {
|
|
90
|
+
const matched = [];
|
|
91
|
+
for (let i = rows.length - 1; i >= 0 && matched.length < limit; i--) {
|
|
92
|
+
if (rows[i].type === type) matched.push(rows[i]);
|
|
93
|
+
}
|
|
94
|
+
return matched.reverse();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function activeIntent() {
|
|
98
|
+
for (let i = rows.length - 1; i >= 0; i--) {
|
|
99
|
+
if (rows[i].type === 'user_intent') return rows[i];
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function size() {
|
|
105
|
+
return rows.length;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function tail(n = 5) {
|
|
109
|
+
return rows.slice(-n);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
append,
|
|
114
|
+
queryByIntent,
|
|
115
|
+
queryByType,
|
|
116
|
+
activeIntent,
|
|
117
|
+
size,
|
|
118
|
+
tail,
|
|
119
|
+
verify,
|
|
120
|
+
path: canonFile,
|
|
121
|
+
lastHash: () => lastHash,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// lib/commitment.mjs
|
|
2
|
+
// The atomic commit-a-row-to-the-session-log operation. Every tool call and
|
|
3
|
+
// every final message lands in this function. The verification layer runs
|
|
4
|
+
// first; its verdict determines the trust delta and whether the row is
|
|
5
|
+
// marked VERIFIED or REFUTED. The row is then appended to the session log.
|
|
6
|
+
|
|
7
|
+
import { witness } from './witness/index.mjs';
|
|
8
|
+
|
|
9
|
+
function trustDelta(verdict) {
|
|
10
|
+
return (
|
|
11
|
+
verdict === 'SURVIVED' ? 3 :
|
|
12
|
+
verdict === 'VERIFIED' ? 1 :
|
|
13
|
+
verdict === 'REFUTED' ? -2 :
|
|
14
|
+
verdict === 'CHALLENGED' ? -1 :
|
|
15
|
+
verdict === 'PENDING' ? -0.1 : 0
|
|
16
|
+
);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function mintCommitment(canon, {
|
|
20
|
+
intentId,
|
|
21
|
+
turnId,
|
|
22
|
+
claim,
|
|
23
|
+
substrate,
|
|
24
|
+
toolName,
|
|
25
|
+
toolArgs,
|
|
26
|
+
toolResult,
|
|
27
|
+
cwd,
|
|
28
|
+
packet,
|
|
29
|
+
rules,
|
|
30
|
+
}) {
|
|
31
|
+
const verdictResult = await witness({
|
|
32
|
+
claim,
|
|
33
|
+
substrate,
|
|
34
|
+
toolName,
|
|
35
|
+
toolArgs,
|
|
36
|
+
toolResult,
|
|
37
|
+
cwd,
|
|
38
|
+
packet,
|
|
39
|
+
canon,
|
|
40
|
+
intentId,
|
|
41
|
+
rules,
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const delta = trustDelta(verdictResult.verdict);
|
|
45
|
+
|
|
46
|
+
const row = canon.append('witnessed_commitment', {
|
|
47
|
+
intent_id: intentId,
|
|
48
|
+
turn_id: turnId,
|
|
49
|
+
claim,
|
|
50
|
+
tool_name: toolName || null,
|
|
51
|
+
tool_args: toolArgs || null,
|
|
52
|
+
tool_result_snippet: typeof toolResult === 'string' ? toolResult.slice(0, 400) : null,
|
|
53
|
+
verdict: verdictResult.verdict,
|
|
54
|
+
primitives: verdictResult.primitives,
|
|
55
|
+
trust_delta: delta,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
return { row, verdict: verdictResult.verdict, primitives: verdictResult.primitives, trustDelta: delta };
|
|
59
|
+
}
|