@kbediako/codex-orchestrator 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/bin/codex-orchestrator.js +119 -2
- package/dist/orchestrator/src/cli/exec/experience.js +20 -2
- package/dist/orchestrator/src/cli/exec/tfgrpo.js +13 -1
- package/dist/orchestrator/src/cli/orchestrator.js +120 -2
- package/dist/orchestrator/src/cli/rlm/symbolic.js +494 -38
- package/dist/orchestrator/src/cli/rlmRunner.js +248 -15
- package/dist/orchestrator/src/cli/run/manifest.js +200 -4
- package/dist/orchestrator/src/cli/services/pipelineExperience.js +122 -0
- package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +34 -2
- package/docs/README.md +4 -1
- package/package.json +2 -1
- package/skills/collab-deliberation/SKILL.md +72 -8
- package/skills/delegate-early/SKILL.md +13 -41
- package/skills/delegation-usage/DELEGATION_GUIDE.md +18 -4
- package/skills/delegation-usage/SKILL.md +21 -6
- package/skills/docs-first/SKILL.md +1 -0
- package/skills/standalone-review/SKILL.md +13 -1
- package/templates/codex/AGENTS.md +30 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { mkdir, writeFile } from 'node:fs/promises';
|
|
1
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
2
2
|
import { join, relative } from 'node:path';
|
|
3
3
|
const DEFAULT_ALLOWED_PURPOSES = new Set(['summarize', 'extract', 'classify', 'verify']);
|
|
4
|
+
const SUBCALL_POINTER_PREFIX = 'subcall:';
|
|
4
5
|
function byteLength(value) {
|
|
5
6
|
return Buffer.byteLength(value ?? '', 'utf8');
|
|
6
7
|
}
|
|
@@ -40,6 +41,44 @@ function truncateUtf8ToBytes(value, maxBytes) {
|
|
|
40
41
|
}
|
|
41
42
|
return buffer.slice(0, end).toString('utf8');
|
|
42
43
|
}
|
|
44
|
+
function isContextPointer(pointer) {
|
|
45
|
+
return pointer.startsWith('ctx:');
|
|
46
|
+
}
|
|
47
|
+
function parseSubcallPointer(pointer) {
|
|
48
|
+
if (!pointer.startsWith(SUBCALL_POINTER_PREFIX)) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const body = pointer.slice(SUBCALL_POINTER_PREFIX.length);
|
|
52
|
+
const separatorIndex = body.indexOf(':');
|
|
53
|
+
if (separatorIndex <= 0 || separatorIndex >= body.length - 1) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
const iterationRaw = body.slice(0, separatorIndex);
|
|
57
|
+
const subcallId = body.slice(separatorIndex + 1);
|
|
58
|
+
const iteration = Number.parseInt(iterationRaw, 10);
|
|
59
|
+
if (!Number.isFinite(iteration) || iteration <= 0 || !subcallId) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
return { iteration, subcallId };
|
|
63
|
+
}
|
|
64
|
+
function buildSubcallPointer(iteration, subcallId) {
|
|
65
|
+
return `${SUBCALL_POINTER_PREFIX}${iteration}:${subcallId}`;
|
|
66
|
+
}
|
|
67
|
+
async function readSubcallPointerSnippet(pointerStore, pointer, offset, bytes) {
|
|
68
|
+
const record = pointerStore.get(pointer);
|
|
69
|
+
if (!record) {
|
|
70
|
+
throw new Error('plan_validation_error');
|
|
71
|
+
}
|
|
72
|
+
const raw = await readFile(record.outputPath, 'utf8');
|
|
73
|
+
const sourceBytes = Buffer.from(raw, 'utf8');
|
|
74
|
+
const safeOffset = Math.max(0, Math.floor(offset));
|
|
75
|
+
const safeBytes = Math.max(0, Math.floor(bytes));
|
|
76
|
+
if (safeOffset >= sourceBytes.length || safeBytes <= 0) {
|
|
77
|
+
return '';
|
|
78
|
+
}
|
|
79
|
+
const end = Math.min(sourceBytes.length, safeOffset + safeBytes);
|
|
80
|
+
return sourceBytes.subarray(safeOffset, end).toString('utf8');
|
|
81
|
+
}
|
|
43
82
|
async function mapWithConcurrency(items, maxConcurrency, worker) {
|
|
44
83
|
if (items.length === 0) {
|
|
45
84
|
return [];
|
|
@@ -203,6 +242,9 @@ function buildPlannerRetryPrompt(prompt, errors) {
|
|
|
203
242
|
if (errors.includes('final_requires_subcall')) {
|
|
204
243
|
headerLines.push('Do not return intent=final until after at least one subcall.');
|
|
205
244
|
}
|
|
245
|
+
if (errors.includes('final_var_unbound')) {
|
|
246
|
+
headerLines.push('Use final_var only when it matches a previously declared subcalls[].output_var.');
|
|
247
|
+
}
|
|
206
248
|
if (errors.length > 0) {
|
|
207
249
|
headerLines.push(`Previous error: ${errors.join('; ')}`);
|
|
208
250
|
}
|
|
@@ -230,10 +272,26 @@ function validatePlan(plan, budgets) {
|
|
|
230
272
|
if (!['continue', 'final', 'pause', 'fail'].includes(plan.intent)) {
|
|
231
273
|
throw new Error('plan_validation_error');
|
|
232
274
|
}
|
|
275
|
+
const finalVarRaw = plan.final_var;
|
|
276
|
+
const finalVar = typeof finalVarRaw === 'string' && finalVarRaw.trim().length > 0
|
|
277
|
+
? finalVarRaw.trim()
|
|
278
|
+
: undefined;
|
|
279
|
+
if (finalVarRaw !== undefined && !finalVar) {
|
|
280
|
+
throw new Error('plan_validation_error');
|
|
281
|
+
}
|
|
282
|
+
if (finalVar) {
|
|
283
|
+
plan.final_var = finalVar;
|
|
284
|
+
}
|
|
233
285
|
if (plan.intent === 'final') {
|
|
234
|
-
|
|
286
|
+
const finalAnswer = typeof plan.final_answer === 'string' && plan.final_answer.trim().length > 0
|
|
287
|
+
? plan.final_answer.trim()
|
|
288
|
+
: null;
|
|
289
|
+
if (!finalAnswer && !finalVar) {
|
|
235
290
|
throw new Error('plan_validation_error');
|
|
236
291
|
}
|
|
292
|
+
if (finalAnswer) {
|
|
293
|
+
plan.final_answer = finalAnswer;
|
|
294
|
+
}
|
|
237
295
|
}
|
|
238
296
|
const reads = [];
|
|
239
297
|
const rawReads = Array.isArray(plan.reads) ? plan.reads : [];
|
|
@@ -277,6 +335,23 @@ function validatePlan(plan, budgets) {
|
|
|
277
335
|
const rawSubcalls = Array.isArray(plan.subcalls) ? plan.subcalls : [];
|
|
278
336
|
for (const entry of rawSubcalls) {
|
|
279
337
|
const purposeInfo = normalizePurpose(entry?.purpose);
|
|
338
|
+
const parentPointerRaw = entry?.parent_pointer;
|
|
339
|
+
if (parentPointerRaw !== undefined && typeof parentPointerRaw !== 'string') {
|
|
340
|
+
throw new Error('plan_validation_error');
|
|
341
|
+
}
|
|
342
|
+
const outputVarRaw = entry?.output_var;
|
|
343
|
+
if (outputVarRaw !== undefined && typeof outputVarRaw !== 'string') {
|
|
344
|
+
throw new Error('plan_validation_error');
|
|
345
|
+
}
|
|
346
|
+
const parentPointer = typeof parentPointerRaw === 'string' && parentPointerRaw.trim().length > 0
|
|
347
|
+
? parentPointerRaw.trim()
|
|
348
|
+
: undefined;
|
|
349
|
+
const outputVar = typeof outputVarRaw === 'string' && outputVarRaw.trim().length > 0
|
|
350
|
+
? outputVarRaw.trim()
|
|
351
|
+
: undefined;
|
|
352
|
+
if (outputVarRaw !== undefined && !outputVar) {
|
|
353
|
+
throw new Error('plan_validation_error');
|
|
354
|
+
}
|
|
280
355
|
const maxInputBytes = toNumber(entry?.max_input_bytes);
|
|
281
356
|
if (!maxInputBytes || maxInputBytes <= 0) {
|
|
282
357
|
throw new Error('plan_validation_error');
|
|
@@ -320,6 +395,8 @@ function validatePlan(plan, budgets) {
|
|
|
320
395
|
subcalls.push({
|
|
321
396
|
id: '',
|
|
322
397
|
purpose: purposeInfo.value,
|
|
398
|
+
parent_pointer: parentPointer,
|
|
399
|
+
output_var: outputVar,
|
|
323
400
|
snippets,
|
|
324
401
|
spans,
|
|
325
402
|
max_input_bytes: Math.floor(maxInputBytes)
|
|
@@ -340,41 +417,116 @@ function validatePlan(plan, budgets) {
|
|
|
340
417
|
}
|
|
341
418
|
};
|
|
342
419
|
}
|
|
343
|
-
function validatePlanPointers(validation, contextStore) {
|
|
420
|
+
function validatePlanPointers(validation, contextStore, subcallPointers) {
|
|
344
421
|
const ensurePointer = (pointer) => {
|
|
345
422
|
if (!pointer) {
|
|
346
423
|
return;
|
|
347
424
|
}
|
|
348
|
-
if (
|
|
349
|
-
|
|
425
|
+
if (isContextPointer(pointer)) {
|
|
426
|
+
if (!contextStore.validatePointer(pointer)) {
|
|
427
|
+
throw new Error('plan_validation_error');
|
|
428
|
+
}
|
|
429
|
+
return;
|
|
350
430
|
}
|
|
431
|
+
if (parseSubcallPointer(pointer)) {
|
|
432
|
+
if (!subcallPointers.has(pointer)) {
|
|
433
|
+
throw new Error('plan_validation_error');
|
|
434
|
+
}
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
throw new Error('plan_validation_error');
|
|
351
438
|
};
|
|
352
439
|
for (const read of validation.reads) {
|
|
353
440
|
ensurePointer(read.pointer);
|
|
354
441
|
}
|
|
355
442
|
for (const subcall of validation.subcalls) {
|
|
443
|
+
ensurePointer(subcall.parent_pointer);
|
|
356
444
|
for (const snippet of subcall.snippets) {
|
|
357
445
|
ensurePointer(snippet.pointer);
|
|
358
446
|
}
|
|
359
447
|
}
|
|
360
448
|
}
|
|
449
|
+
function formatSubcallSummary(entry) {
|
|
450
|
+
const preview = truncateUtf8ToBytes(entry.preview ?? '', 160).replace(/\s+/g, ' ').trim();
|
|
451
|
+
const outputVarSuffix = entry.output_var ? ` output_var=${entry.output_var}` : '';
|
|
452
|
+
if (!preview) {
|
|
453
|
+
return `${entry.id}: ${entry.pointer} (${entry.output_bytes} bytes)${outputVarSuffix}`;
|
|
454
|
+
}
|
|
455
|
+
return `${entry.id}: ${entry.pointer} (${entry.output_bytes} bytes)${outputVarSuffix} preview="${preview}"`;
|
|
456
|
+
}
|
|
457
|
+
function buildPointerReferenceHint() {
|
|
458
|
+
return 'ctx:<object_id>#chunk:<chunk_id> | subcall:<iteration>:<subcall_id>';
|
|
459
|
+
}
|
|
460
|
+
function buildSubcallHints() {
|
|
461
|
+
return '"parent_pointer": "optional pointer for recursion lineage", "output_var": "optional variable name",';
|
|
462
|
+
}
|
|
463
|
+
function hasSubcallPointer(pointer) {
|
|
464
|
+
return parseSubcallPointer(pointer) !== null;
|
|
465
|
+
}
|
|
466
|
+
async function resolveSnippetText(params) {
|
|
467
|
+
const { snippet, contextStore, subcallPointers } = params;
|
|
468
|
+
if (snippet.pointer) {
|
|
469
|
+
if (isContextPointer(snippet.pointer)) {
|
|
470
|
+
const result = await contextStore.read(snippet.pointer, snippet.offset ?? 0, snippet.bytes);
|
|
471
|
+
return result.text;
|
|
472
|
+
}
|
|
473
|
+
if (hasSubcallPointer(snippet.pointer)) {
|
|
474
|
+
return readSubcallPointerSnippet(subcallPointers, snippet.pointer, snippet.offset ?? 0, snippet.bytes);
|
|
475
|
+
}
|
|
476
|
+
throw new Error('plan_validation_error');
|
|
477
|
+
}
|
|
478
|
+
if (typeof snippet.start_byte === 'number') {
|
|
479
|
+
const result = await contextStore.readSpan(snippet.start_byte, snippet.bytes);
|
|
480
|
+
return result.text;
|
|
481
|
+
}
|
|
482
|
+
throw new Error('plan_validation_error');
|
|
483
|
+
}
|
|
484
|
+
async function resolveReadExcerpt(params) {
|
|
485
|
+
const { read, contextStore, subcallPointers, maxBytes } = params;
|
|
486
|
+
if (read.pointer) {
|
|
487
|
+
if (isContextPointer(read.pointer)) {
|
|
488
|
+
const result = await contextStore.read(read.pointer, read.offset ?? 0, read.bytes);
|
|
489
|
+
return {
|
|
490
|
+
pointer: read.pointer,
|
|
491
|
+
excerpt: truncateUtf8ToBytes(result.text, maxBytes)
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
if (hasSubcallPointer(read.pointer)) {
|
|
495
|
+
const text = await readSubcallPointerSnippet(subcallPointers, read.pointer, read.offset ?? 0, read.bytes);
|
|
496
|
+
return {
|
|
497
|
+
pointer: read.pointer,
|
|
498
|
+
excerpt: truncateUtf8ToBytes(text, maxBytes)
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
throw new Error('plan_validation_error');
|
|
502
|
+
}
|
|
503
|
+
if (typeof read.start_byte === 'number') {
|
|
504
|
+
const result = await contextStore.readSpan(read.start_byte, read.bytes);
|
|
505
|
+
return {
|
|
506
|
+
pointer: `start_byte=${read.start_byte} bytes=${read.bytes}`,
|
|
507
|
+
excerpt: truncateUtf8ToBytes(result.text, maxBytes)
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
return null;
|
|
511
|
+
}
|
|
361
512
|
function buildPlannerPrompt(params) {
|
|
362
|
-
const { goal, contextStore, budgets, priorReads, priorSearches, priorSubcalls } = params;
|
|
513
|
+
const { goal, contextStore, budgets, priorReads, priorSearches, priorSubcalls, deliberationBrief } = params;
|
|
363
514
|
const baseLines = [
|
|
364
515
|
'You are a symbolic RLM planner. Return JSON only (no prose).',
|
|
365
516
|
`Goal: ${goal}`,
|
|
366
517
|
`Context object_id: ${contextStore.objectId}`,
|
|
367
518
|
`Chunk count: ${contextStore.chunkCount}`,
|
|
368
|
-
`Pointer format:
|
|
519
|
+
`Pointer format: ${buildPointerReferenceHint()}`,
|
|
369
520
|
'',
|
|
370
521
|
'Schema (v1):',
|
|
371
522
|
'{',
|
|
372
523
|
' "schema_version": 1,',
|
|
373
524
|
' "intent": "continue | final | pause | fail",',
|
|
374
|
-
|
|
525
|
+
` "reads": [{ "pointer": "${buildPointerReferenceHint()}", "offset": 0, "bytes": 4096, "reason": "..." }],`,
|
|
375
526
|
' "searches": [{ "query": "...", "top_k": 20, "reason": "..." }],',
|
|
376
|
-
|
|
377
|
-
' "final_answer": "required when intent=final"',
|
|
527
|
+
` "subcalls": [{ "purpose": "summarize | extract | classify | verify", ${buildSubcallHints()} "snippets": [{ "pointer": "${buildPointerReferenceHint()}", "offset": 0, "bytes": 2048 }], "max_input_bytes": 120000, "expected_output": "short summary" }],`,
|
|
528
|
+
' "final_answer": "required when intent=final unless final_var is set",',
|
|
529
|
+
' "final_var": "optional variable name bound by subcalls[].output_var"',
|
|
378
530
|
'}',
|
|
379
531
|
'',
|
|
380
532
|
'Constraints:',
|
|
@@ -382,6 +534,7 @@ function buildPlannerPrompt(params) {
|
|
|
382
534
|
`- Max bytes per read: ${budgets.maxBytesPerChunkRead}.`,
|
|
383
535
|
`- Max bytes per snippet: ${budgets.maxBytesPerSnippet}.`,
|
|
384
536
|
'- Do not include full context; use pointers.',
|
|
537
|
+
'- Prefer prior subcall pointers for recursive chaining.',
|
|
385
538
|
'- Request at least one subcall before intent=final.',
|
|
386
539
|
''
|
|
387
540
|
];
|
|
@@ -410,12 +563,18 @@ function buildPlannerPrompt(params) {
|
|
|
410
563
|
sections.push({ key: 'reads_dropped', lines });
|
|
411
564
|
}
|
|
412
565
|
if (priorSubcalls.length > 0) {
|
|
413
|
-
const lines = ['Prior subcall
|
|
566
|
+
const lines = ['Prior subcall references:'];
|
|
414
567
|
for (const entry of priorSubcalls) {
|
|
415
|
-
lines.push(`- ${entry
|
|
568
|
+
lines.push(`- ${formatSubcallSummary(entry)}`);
|
|
416
569
|
}
|
|
417
570
|
sections.push({ key: 'subcalls_dropped', lines });
|
|
418
571
|
}
|
|
572
|
+
if (typeof deliberationBrief === 'string' && deliberationBrief.trim().length > 0) {
|
|
573
|
+
sections.push({
|
|
574
|
+
key: 'deliberation_dropped',
|
|
575
|
+
lines: ['Deliberation brief:', deliberationBrief.trim()]
|
|
576
|
+
});
|
|
577
|
+
}
|
|
419
578
|
let truncation = {};
|
|
420
579
|
let prompt = [...baseLines];
|
|
421
580
|
for (const section of sections) {
|
|
@@ -440,6 +599,176 @@ function buildPlannerPrompt(params) {
|
|
|
440
599
|
}
|
|
441
600
|
return { prompt: promptString, truncation };
|
|
442
601
|
}
|
|
602
|
+
function formatDeliberationReason(reason) {
|
|
603
|
+
switch (reason) {
|
|
604
|
+
case 'bootstrap':
|
|
605
|
+
return 'bootstrap';
|
|
606
|
+
case 'cadence':
|
|
607
|
+
return 'cadence';
|
|
608
|
+
case 'planner_recovery':
|
|
609
|
+
return 'planner_recovery';
|
|
610
|
+
case 'no_subcall_progress':
|
|
611
|
+
return 'no_subcall_progress';
|
|
612
|
+
default:
|
|
613
|
+
return 'cadence';
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
function attachDeliberationArtifactPaths(error, artifactPaths) {
|
|
617
|
+
const normalized = error instanceof Error ? error : new Error(String(error));
|
|
618
|
+
if (artifactPaths) {
|
|
619
|
+
normalized.artifactPaths = artifactPaths;
|
|
620
|
+
}
|
|
621
|
+
return normalized;
|
|
622
|
+
}
|
|
623
|
+
function extractDeliberationArtifactPaths(error) {
|
|
624
|
+
if (!error || typeof error !== 'object') {
|
|
625
|
+
return undefined;
|
|
626
|
+
}
|
|
627
|
+
const rawPaths = error.artifactPaths;
|
|
628
|
+
if (!rawPaths || typeof rawPaths !== 'object') {
|
|
629
|
+
return undefined;
|
|
630
|
+
}
|
|
631
|
+
const typed = rawPaths;
|
|
632
|
+
if (typeof typed.prompt !== 'string' ||
|
|
633
|
+
typeof typed.output !== 'string' ||
|
|
634
|
+
typeof typed.meta !== 'string') {
|
|
635
|
+
return undefined;
|
|
636
|
+
}
|
|
637
|
+
return {
|
|
638
|
+
prompt: typed.prompt,
|
|
639
|
+
output: typed.output,
|
|
640
|
+
meta: typed.meta
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
function selectDeliberationReason(params) {
|
|
644
|
+
if (params.iteration === 1) {
|
|
645
|
+
return 'bootstrap';
|
|
646
|
+
}
|
|
647
|
+
if ((params.previousIteration?.planner_errors?.length ?? 0) > 0) {
|
|
648
|
+
return 'planner_recovery';
|
|
649
|
+
}
|
|
650
|
+
const noSearches = (params.previousIteration?.searches?.length ?? 0) === 0;
|
|
651
|
+
const noReads = (params.previousIteration?.reads?.length ?? 0) === 0;
|
|
652
|
+
const noSubcalls = (params.previousIteration?.subcalls?.length ?? 0) === 0;
|
|
653
|
+
if (noSearches && noReads && noSubcalls) {
|
|
654
|
+
return 'no_subcall_progress';
|
|
655
|
+
}
|
|
656
|
+
const minInterval = Math.max(1, Math.floor(params.minIntervalIterations));
|
|
657
|
+
if (params.lastDeliberationIteration <= 0 ||
|
|
658
|
+
params.iteration - params.lastDeliberationIteration >= minInterval) {
|
|
659
|
+
return 'cadence';
|
|
660
|
+
}
|
|
661
|
+
return null;
|
|
662
|
+
}
|
|
663
|
+
function buildDeliberationPrompt(params) {
|
|
664
|
+
const maxBytes = Math.max(256, Math.floor(params.maxSummaryBytes));
|
|
665
|
+
const lines = [
|
|
666
|
+
'You are a deliberation coordinator for an iterative symbolic planning loop.',
|
|
667
|
+
`Goal: ${params.goal}`,
|
|
668
|
+
`Iteration: ${params.iteration}`,
|
|
669
|
+
`Trigger: ${formatDeliberationReason(params.reason)}`,
|
|
670
|
+
`Context object_id: ${params.contextStore.objectId}`,
|
|
671
|
+
`Context chunks: ${params.contextStore.chunkCount}`,
|
|
672
|
+
'Respond with exactly four labeled lines:',
|
|
673
|
+
'Decision focus: <what to optimize next>',
|
|
674
|
+
'Risks: <top failure modes to avoid>',
|
|
675
|
+
'Context gaps: <missing evidence/plans>',
|
|
676
|
+
'Planner directives: <3 concise directives>',
|
|
677
|
+
`Keep total output under ${maxBytes} bytes and avoid markdown tables.`
|
|
678
|
+
];
|
|
679
|
+
if (params.priorSearches.length > 0) {
|
|
680
|
+
const latestSearch = params.priorSearches[params.priorSearches.length - 1];
|
|
681
|
+
const hitCount = latestSearch?.results?.length ?? 0;
|
|
682
|
+
lines.push(`Latest search: query="${latestSearch?.query ?? ''}" hits=${hitCount}`);
|
|
683
|
+
}
|
|
684
|
+
if (params.priorReads.length > 0) {
|
|
685
|
+
const latestRead = params.priorReads[params.priorReads.length - 1];
|
|
686
|
+
lines.push(`Latest read pointer: ${latestRead?.pointer ?? ''}`);
|
|
687
|
+
}
|
|
688
|
+
if (params.priorSubcalls.length > 0) {
|
|
689
|
+
const latestSubcall = params.priorSubcalls[params.priorSubcalls.length - 1];
|
|
690
|
+
lines.push(`Latest subcall id: ${latestSubcall?.id ?? ''}`);
|
|
691
|
+
lines.push(`Latest subcall pointer: ${latestSubcall?.pointer ?? ''}`);
|
|
692
|
+
lines.push(`Latest subcall preview: ${truncateUtf8ToBytes(latestSubcall?.preview ?? '', 320)}`);
|
|
693
|
+
}
|
|
694
|
+
return lines.join('\n');
|
|
695
|
+
}
|
|
696
|
+
async function runDeliberationStep(params) {
|
|
697
|
+
const prompt = buildDeliberationPrompt({
|
|
698
|
+
goal: params.goal,
|
|
699
|
+
iteration: params.iteration,
|
|
700
|
+
reason: params.reason,
|
|
701
|
+
contextStore: params.contextStore,
|
|
702
|
+
priorReads: params.priorReads,
|
|
703
|
+
priorSearches: params.priorSearches,
|
|
704
|
+
priorSubcalls: params.priorSubcalls,
|
|
705
|
+
maxSummaryBytes: params.options.maxSummaryBytes
|
|
706
|
+
});
|
|
707
|
+
const promptBytes = byteLength(prompt);
|
|
708
|
+
const shouldLogArtifacts = params.options.logArtifacts === true;
|
|
709
|
+
let artifactPaths;
|
|
710
|
+
let outputPath = null;
|
|
711
|
+
let metaPath = null;
|
|
712
|
+
if (shouldLogArtifacts) {
|
|
713
|
+
const deliberationDir = join(params.runDir, 'deliberation');
|
|
714
|
+
await mkdir(deliberationDir, { recursive: true });
|
|
715
|
+
const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
|
|
716
|
+
const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
|
|
717
|
+
outputPath = join(deliberationDir, `${baseName}-output.txt`);
|
|
718
|
+
metaPath = join(deliberationDir, `${baseName}-meta.json`);
|
|
719
|
+
await writeFile(promptPath, prompt, 'utf8');
|
|
720
|
+
artifactPaths = {
|
|
721
|
+
prompt: relative(params.repoRoot, promptPath),
|
|
722
|
+
output: relative(params.repoRoot, outputPath),
|
|
723
|
+
meta: relative(params.repoRoot, metaPath)
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
let output;
|
|
727
|
+
try {
|
|
728
|
+
output = await params.options.run(prompt, {
|
|
729
|
+
iteration: params.iteration,
|
|
730
|
+
reason: formatDeliberationReason(params.reason)
|
|
731
|
+
});
|
|
732
|
+
}
|
|
733
|
+
catch (error) {
|
|
734
|
+
if (shouldLogArtifacts && outputPath && metaPath) {
|
|
735
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
736
|
+
await writeFile(outputPath, '', 'utf8');
|
|
737
|
+
await writeFile(metaPath, JSON.stringify({
|
|
738
|
+
iteration: params.iteration,
|
|
739
|
+
reason: formatDeliberationReason(params.reason),
|
|
740
|
+
strategy: params.options.strategy,
|
|
741
|
+
prompt_bytes: promptBytes,
|
|
742
|
+
output_bytes: 0,
|
|
743
|
+
error: errorMessage
|
|
744
|
+
}, null, 2), 'utf8');
|
|
745
|
+
}
|
|
746
|
+
throw attachDeliberationArtifactPaths(error, artifactPaths);
|
|
747
|
+
}
|
|
748
|
+
const brief = truncateUtf8ToBytes(output ?? '', params.options.maxSummaryBytes);
|
|
749
|
+
const outputBytes = byteLength(brief);
|
|
750
|
+
if (shouldLogArtifacts && outputPath && metaPath) {
|
|
751
|
+
await writeFile(outputPath, brief, 'utf8');
|
|
752
|
+
await writeFile(metaPath, JSON.stringify({
|
|
753
|
+
iteration: params.iteration,
|
|
754
|
+
reason: formatDeliberationReason(params.reason),
|
|
755
|
+
strategy: params.options.strategy,
|
|
756
|
+
prompt_bytes: promptBytes,
|
|
757
|
+
output_bytes: outputBytes
|
|
758
|
+
}, null, 2), 'utf8');
|
|
759
|
+
}
|
|
760
|
+
return {
|
|
761
|
+
record: {
|
|
762
|
+
status: 'ran',
|
|
763
|
+
reason: formatDeliberationReason(params.reason),
|
|
764
|
+
strategy: params.options.strategy,
|
|
765
|
+
prompt_bytes: promptBytes,
|
|
766
|
+
output_bytes: outputBytes,
|
|
767
|
+
artifact_paths: artifactPaths
|
|
768
|
+
},
|
|
769
|
+
brief
|
|
770
|
+
};
|
|
771
|
+
}
|
|
443
772
|
async function writeState(path, state) {
|
|
444
773
|
await writeFile(path, JSON.stringify(state, null, 2), 'utf8');
|
|
445
774
|
}
|
|
@@ -466,6 +795,11 @@ export async function runSymbolicLoop(options) {
|
|
|
466
795
|
let priorReads = [];
|
|
467
796
|
let priorSearches = [];
|
|
468
797
|
let priorSubcalls = [];
|
|
798
|
+
const subcallPointers = new Map();
|
|
799
|
+
const variableBindings = new Map();
|
|
800
|
+
let lastDeliberationIteration = 0;
|
|
801
|
+
let deliberationRuns = 0;
|
|
802
|
+
let latestDeliberationBrief = null;
|
|
469
803
|
const finalize = async (status) => {
|
|
470
804
|
state.final = status ?? { status: 'error', exitCode: 10 };
|
|
471
805
|
await writeState(statePath, state);
|
|
@@ -476,13 +810,81 @@ export async function runSymbolicLoop(options) {
|
|
|
476
810
|
if (timeExceeded()) {
|
|
477
811
|
return await finalize({ status: 'max_minutes', exitCode: 3 });
|
|
478
812
|
}
|
|
813
|
+
const previousIteration = state.symbolic_iterations[state.symbolic_iterations.length - 1] ?? null;
|
|
814
|
+
let deliberation;
|
|
815
|
+
const deliberationOptions = options.deliberation;
|
|
816
|
+
if (!deliberationOptions?.enabled) {
|
|
817
|
+
if (deliberationOptions) {
|
|
818
|
+
deliberation = {
|
|
819
|
+
status: 'skipped',
|
|
820
|
+
reason: 'disabled',
|
|
821
|
+
strategy: deliberationOptions.strategy
|
|
822
|
+
};
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
else {
|
|
826
|
+
const reason = selectDeliberationReason({
|
|
827
|
+
iteration,
|
|
828
|
+
previousIteration,
|
|
829
|
+
lastDeliberationIteration,
|
|
830
|
+
minIntervalIterations: deliberationOptions.minIntervalIterations
|
|
831
|
+
});
|
|
832
|
+
if (!reason) {
|
|
833
|
+
deliberation = {
|
|
834
|
+
status: 'skipped',
|
|
835
|
+
reason: 'not_due',
|
|
836
|
+
strategy: deliberationOptions.strategy
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
else if (deliberationRuns >= deliberationOptions.maxRuns) {
|
|
840
|
+
deliberation = {
|
|
841
|
+
status: 'skipped',
|
|
842
|
+
reason: 'max_runs_reached',
|
|
843
|
+
strategy: deliberationOptions.strategy
|
|
844
|
+
};
|
|
845
|
+
}
|
|
846
|
+
else {
|
|
847
|
+
deliberationRuns += 1;
|
|
848
|
+
try {
|
|
849
|
+
const result = await runDeliberationStep({
|
|
850
|
+
options: deliberationOptions,
|
|
851
|
+
goal: options.goal,
|
|
852
|
+
iteration,
|
|
853
|
+
reason,
|
|
854
|
+
runDir,
|
|
855
|
+
repoRoot: options.repoRoot,
|
|
856
|
+
contextStore: options.contextStore,
|
|
857
|
+
priorReads,
|
|
858
|
+
priorSearches,
|
|
859
|
+
priorSubcalls
|
|
860
|
+
});
|
|
861
|
+
deliberation = result.record;
|
|
862
|
+
latestDeliberationBrief = result.brief;
|
|
863
|
+
lastDeliberationIteration = iteration;
|
|
864
|
+
log(`Deliberation ${formatDeliberationReason(reason)} ran for iteration ${iteration} (${result.record.strategy}).`);
|
|
865
|
+
}
|
|
866
|
+
catch (error) {
|
|
867
|
+
deliberation = {
|
|
868
|
+
status: 'error',
|
|
869
|
+
reason: formatDeliberationReason(reason),
|
|
870
|
+
strategy: deliberationOptions.strategy,
|
|
871
|
+
artifact_paths: extractDeliberationArtifactPaths(error),
|
|
872
|
+
error: error instanceof Error ? error.message : String(error)
|
|
873
|
+
};
|
|
874
|
+
log(`Deliberation ${formatDeliberationReason(reason)} failed for iteration ${iteration}: ${deliberation.error}`);
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
}
|
|
479
878
|
const promptResult = buildPlannerPrompt({
|
|
480
879
|
goal: options.goal,
|
|
481
880
|
contextStore: options.contextStore,
|
|
482
881
|
budgets: options.budgets,
|
|
483
882
|
priorReads,
|
|
484
883
|
priorSearches,
|
|
485
|
-
priorSubcalls
|
|
884
|
+
priorSubcalls,
|
|
885
|
+
deliberationBrief: deliberationOptions?.enabled && deliberationOptions.includeInPlannerPrompt
|
|
886
|
+
? latestDeliberationBrief
|
|
887
|
+
: null
|
|
486
888
|
});
|
|
487
889
|
const plannerPrompt = promptResult.prompt;
|
|
488
890
|
const plannerPromptBytes = byteLength(plannerPrompt);
|
|
@@ -512,7 +914,7 @@ export async function runSymbolicLoop(options) {
|
|
|
512
914
|
let validationError = null;
|
|
513
915
|
try {
|
|
514
916
|
validation = validatePlan(plan, options.budgets);
|
|
515
|
-
validatePlanPointers(validation, options.contextStore);
|
|
917
|
+
validatePlanPointers(validation, options.contextStore, subcallPointers);
|
|
516
918
|
}
|
|
517
919
|
catch {
|
|
518
920
|
validationError = 'plan_validation_error';
|
|
@@ -520,6 +922,11 @@ export async function runSymbolicLoop(options) {
|
|
|
520
922
|
if (!validationError && plan.intent === 'final' && !hasPriorSubcalls) {
|
|
521
923
|
validationError = 'final_requires_subcall';
|
|
522
924
|
}
|
|
925
|
+
if (!validationError && plan.intent === 'final' && plan.final_var) {
|
|
926
|
+
if (!variableBindings.has(plan.final_var)) {
|
|
927
|
+
validationError = 'final_var_unbound';
|
|
928
|
+
}
|
|
929
|
+
}
|
|
523
930
|
if (validationError) {
|
|
524
931
|
plannerErrors.push(validationError);
|
|
525
932
|
await recordPlannerFailure({
|
|
@@ -542,12 +949,27 @@ export async function runSymbolicLoop(options) {
|
|
|
542
949
|
const reads = [];
|
|
543
950
|
const subcalls = [];
|
|
544
951
|
const searches = [];
|
|
952
|
+
const iterationVariableBindings = [];
|
|
545
953
|
if (plan.intent === 'final') {
|
|
954
|
+
let finalAnswer = plan.final_answer;
|
|
955
|
+
if (plan.final_var) {
|
|
956
|
+
const binding = variableBindings.get(plan.final_var);
|
|
957
|
+
if (!binding) {
|
|
958
|
+
return await finalize({ status: 'invalid_config', exitCode: 5 });
|
|
959
|
+
}
|
|
960
|
+
try {
|
|
961
|
+
finalAnswer = await readFile(binding.outputPath, 'utf8');
|
|
962
|
+
}
|
|
963
|
+
catch {
|
|
964
|
+
return await finalize({ status: 'invalid_config', exitCode: 5 });
|
|
965
|
+
}
|
|
966
|
+
}
|
|
546
967
|
state.symbolic_iterations.push({
|
|
547
968
|
iteration,
|
|
548
969
|
planner_prompt_bytes: plannerPromptBytes,
|
|
549
970
|
reads,
|
|
550
971
|
subcalls,
|
|
972
|
+
deliberation,
|
|
551
973
|
searches,
|
|
552
974
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
553
975
|
clamped: {
|
|
@@ -558,7 +980,7 @@ export async function runSymbolicLoop(options) {
|
|
|
558
980
|
truncation: promptResult.truncation
|
|
559
981
|
});
|
|
560
982
|
await writeState(statePath, state);
|
|
561
|
-
return await finalize({ status: 'passed', exitCode: 0, final_answer:
|
|
983
|
+
return await finalize({ status: 'passed', exitCode: 0, final_answer: finalAnswer });
|
|
562
984
|
}
|
|
563
985
|
if (plan.intent === 'pause' || plan.intent === 'fail') {
|
|
564
986
|
state.symbolic_iterations.push({
|
|
@@ -566,6 +988,7 @@ export async function runSymbolicLoop(options) {
|
|
|
566
988
|
planner_prompt_bytes: plannerPromptBytes,
|
|
567
989
|
reads,
|
|
568
990
|
subcalls,
|
|
991
|
+
deliberation,
|
|
569
992
|
searches,
|
|
570
993
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
571
994
|
clamped: {
|
|
@@ -596,19 +1019,14 @@ export async function runSymbolicLoop(options) {
|
|
|
596
1019
|
priorSearches = currentSearches;
|
|
597
1020
|
const readExcerpts = [];
|
|
598
1021
|
for (const read of validation.reads) {
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
const result = await options.contextStore.readSpan(read.start_byte, read.bytes);
|
|
608
|
-
readExcerpts.push({
|
|
609
|
-
pointer: `start_byte=${read.start_byte} bytes=${read.bytes}`,
|
|
610
|
-
excerpt: truncateUtf8ToBytes(result.text, options.budgets.maxBytesPerChunkRead)
|
|
611
|
-
});
|
|
1022
|
+
const excerpt = await resolveReadExcerpt({
|
|
1023
|
+
read,
|
|
1024
|
+
contextStore: options.contextStore,
|
|
1025
|
+
subcallPointers,
|
|
1026
|
+
maxBytes: options.budgets.maxBytesPerChunkRead
|
|
1027
|
+
});
|
|
1028
|
+
if (excerpt) {
|
|
1029
|
+
readExcerpts.push(excerpt);
|
|
612
1030
|
}
|
|
613
1031
|
reads.push(read);
|
|
614
1032
|
}
|
|
@@ -650,14 +1068,12 @@ export async function runSymbolicLoop(options) {
|
|
|
650
1068
|
});
|
|
651
1069
|
const resolvedSnippets = [];
|
|
652
1070
|
for (const snippet of snippets) {
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
resolvedSnippets.push({ meta: snippet, text: result.text });
|
|
660
|
-
}
|
|
1071
|
+
const text = await resolveSnippetText({
|
|
1072
|
+
snippet,
|
|
1073
|
+
contextStore: options.contextStore,
|
|
1074
|
+
subcallPointers
|
|
1075
|
+
});
|
|
1076
|
+
resolvedSnippets.push({ meta: snippet, text });
|
|
661
1077
|
}
|
|
662
1078
|
for (const span of spans) {
|
|
663
1079
|
const spanBytes = Math.max(0, span.end_byte - span.start_byte);
|
|
@@ -697,9 +1113,12 @@ export async function runSymbolicLoop(options) {
|
|
|
697
1113
|
const promptPath = join(subcallDir, 'prompt.txt');
|
|
698
1114
|
const outputPath = join(subcallDir, 'output.txt');
|
|
699
1115
|
const metaPath = join(subcallDir, 'meta.json');
|
|
1116
|
+
const outputPointer = buildSubcallPointer(iteration, subcallId);
|
|
700
1117
|
await writeFile(inputPath, JSON.stringify({
|
|
701
1118
|
id: subcallId,
|
|
702
1119
|
purpose: rawSubcall.purpose,
|
|
1120
|
+
parent_pointer: rawSubcall.parent_pointer,
|
|
1121
|
+
output_var: rawSubcall.output_var,
|
|
703
1122
|
max_input_bytes: maxInput,
|
|
704
1123
|
snippets,
|
|
705
1124
|
spans
|
|
@@ -714,10 +1133,21 @@ export async function runSymbolicLoop(options) {
|
|
|
714
1133
|
clipped
|
|
715
1134
|
}, null, 2), 'utf8');
|
|
716
1135
|
const relativeBase = (filePath) => relative(options.repoRoot, filePath);
|
|
1136
|
+
const outputPreview = truncateUtf8ToBytes(output, options.budgets.maxPreviewBytes);
|
|
1137
|
+
subcallPointers.set(outputPointer, {
|
|
1138
|
+
pointer: outputPointer,
|
|
1139
|
+
iteration,
|
|
1140
|
+
subcallId,
|
|
1141
|
+
outputPath,
|
|
1142
|
+
outputBytes: byteLength(output)
|
|
1143
|
+
});
|
|
717
1144
|
return {
|
|
718
1145
|
subcall: {
|
|
719
1146
|
id: subcallId,
|
|
720
1147
|
purpose: rawSubcall.purpose,
|
|
1148
|
+
parent_pointer: rawSubcall.parent_pointer,
|
|
1149
|
+
output_pointer: outputPointer,
|
|
1150
|
+
output_var: rawSubcall.output_var,
|
|
721
1151
|
snippets: snippets.length > 0 ? snippets : undefined,
|
|
722
1152
|
spans: spans.length > 0 ? spans : undefined,
|
|
723
1153
|
max_input_bytes: rawSubcall.max_input_bytes,
|
|
@@ -735,12 +1165,36 @@ export async function runSymbolicLoop(options) {
|
|
|
735
1165
|
},
|
|
736
1166
|
output: {
|
|
737
1167
|
id: subcallId,
|
|
738
|
-
|
|
739
|
-
|
|
1168
|
+
pointer: outputPointer,
|
|
1169
|
+
preview: outputPreview,
|
|
1170
|
+
output_bytes: byteLength(output),
|
|
1171
|
+
output_var: rawSubcall.output_var
|
|
1172
|
+
},
|
|
1173
|
+
binding: rawSubcall.output_var
|
|
1174
|
+
? {
|
|
1175
|
+
name: rawSubcall.output_var,
|
|
1176
|
+
pointer: outputPointer,
|
|
1177
|
+
iteration,
|
|
1178
|
+
subcallId,
|
|
1179
|
+
outputPath,
|
|
1180
|
+
outputBytes: byteLength(output)
|
|
1181
|
+
}
|
|
1182
|
+
: null
|
|
740
1183
|
};
|
|
741
1184
|
});
|
|
742
1185
|
for (const result of subcallResults) {
|
|
743
1186
|
subcalls.push(result.subcall);
|
|
1187
|
+
if (result.binding) {
|
|
1188
|
+
variableBindings.set(result.binding.name, result.binding);
|
|
1189
|
+
iterationVariableBindings.push({
|
|
1190
|
+
name: result.binding.name,
|
|
1191
|
+
pointer: result.binding.pointer,
|
|
1192
|
+
iteration: result.binding.iteration,
|
|
1193
|
+
subcall_id: result.binding.subcallId,
|
|
1194
|
+
output_bytes: result.binding.outputBytes,
|
|
1195
|
+
output_path: relative(options.repoRoot, result.binding.outputPath)
|
|
1196
|
+
});
|
|
1197
|
+
}
|
|
744
1198
|
}
|
|
745
1199
|
priorSubcalls = subcallResults.map((result) => result.output);
|
|
746
1200
|
state.symbolic_iterations.push({
|
|
@@ -748,6 +1202,8 @@ export async function runSymbolicLoop(options) {
|
|
|
748
1202
|
planner_prompt_bytes: plannerPromptBytes,
|
|
749
1203
|
reads,
|
|
750
1204
|
subcalls,
|
|
1205
|
+
variable_bindings: iterationVariableBindings.length > 0 ? iterationVariableBindings : undefined,
|
|
1206
|
+
deliberation,
|
|
751
1207
|
searches,
|
|
752
1208
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
753
1209
|
clamped: {
|