@kbediako/codex-orchestrator 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/dist/bin/codex-orchestrator.js +119 -2
- package/dist/orchestrator/src/cli/delegationServer.js +4 -1
- package/dist/orchestrator/src/cli/exec/experience.js +20 -2
- package/dist/orchestrator/src/cli/exec/tfgrpo.js +13 -1
- package/dist/orchestrator/src/cli/orchestrator.js +120 -2
- package/dist/orchestrator/src/cli/rlm/symbolic.js +439 -38
- package/dist/orchestrator/src/cli/rlmRunner.js +235 -4
- package/dist/orchestrator/src/cli/run/manifest.js +200 -4
- package/dist/orchestrator/src/cli/services/commandRunner.js +1 -0
- package/dist/orchestrator/src/cli/services/pipelineExperience.js +122 -0
- package/dist/orchestrator/src/cli/utils/delegationGuardRunner.js +80 -0
- package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +25 -0
- package/docs/README.md +4 -1
- package/package.json +2 -1
- package/skills/collab-deliberation/SKILL.md +72 -8
- package/skills/delegate-early/SKILL.md +13 -39
- package/skills/delegation-usage/DELEGATION_GUIDE.md +7 -4
- package/skills/delegation-usage/SKILL.md +17 -6
- package/templates/codex/AGENTS.md +30 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { mkdir, writeFile } from 'node:fs/promises';
|
|
1
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
2
2
|
import { join, relative } from 'node:path';
|
|
3
3
|
const DEFAULT_ALLOWED_PURPOSES = new Set(['summarize', 'extract', 'classify', 'verify']);
|
|
4
|
+
const SUBCALL_POINTER_PREFIX = 'subcall:';
|
|
4
5
|
function byteLength(value) {
|
|
5
6
|
return Buffer.byteLength(value ?? '', 'utf8');
|
|
6
7
|
}
|
|
@@ -40,6 +41,44 @@ function truncateUtf8ToBytes(value, maxBytes) {
|
|
|
40
41
|
}
|
|
41
42
|
return buffer.slice(0, end).toString('utf8');
|
|
42
43
|
}
|
|
44
|
+
function isContextPointer(pointer) {
|
|
45
|
+
return pointer.startsWith('ctx:');
|
|
46
|
+
}
|
|
47
|
+
function parseSubcallPointer(pointer) {
|
|
48
|
+
if (!pointer.startsWith(SUBCALL_POINTER_PREFIX)) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const body = pointer.slice(SUBCALL_POINTER_PREFIX.length);
|
|
52
|
+
const separatorIndex = body.indexOf(':');
|
|
53
|
+
if (separatorIndex <= 0 || separatorIndex >= body.length - 1) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
const iterationRaw = body.slice(0, separatorIndex);
|
|
57
|
+
const subcallId = body.slice(separatorIndex + 1);
|
|
58
|
+
const iteration = Number.parseInt(iterationRaw, 10);
|
|
59
|
+
if (!Number.isFinite(iteration) || iteration <= 0 || !subcallId) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
return { iteration, subcallId };
|
|
63
|
+
}
|
|
64
|
+
function buildSubcallPointer(iteration, subcallId) {
|
|
65
|
+
return `${SUBCALL_POINTER_PREFIX}${iteration}:${subcallId}`;
|
|
66
|
+
}
|
|
67
|
+
async function readSubcallPointerSnippet(pointerStore, pointer, offset, bytes) {
|
|
68
|
+
const record = pointerStore.get(pointer);
|
|
69
|
+
if (!record) {
|
|
70
|
+
throw new Error('plan_validation_error');
|
|
71
|
+
}
|
|
72
|
+
const raw = await readFile(record.outputPath, 'utf8');
|
|
73
|
+
const sourceBytes = Buffer.from(raw, 'utf8');
|
|
74
|
+
const safeOffset = Math.max(0, Math.floor(offset));
|
|
75
|
+
const safeBytes = Math.max(0, Math.floor(bytes));
|
|
76
|
+
if (safeOffset >= sourceBytes.length || safeBytes <= 0) {
|
|
77
|
+
return '';
|
|
78
|
+
}
|
|
79
|
+
const end = Math.min(sourceBytes.length, safeOffset + safeBytes);
|
|
80
|
+
return sourceBytes.subarray(safeOffset, end).toString('utf8');
|
|
81
|
+
}
|
|
43
82
|
async function mapWithConcurrency(items, maxConcurrency, worker) {
|
|
44
83
|
if (items.length === 0) {
|
|
45
84
|
return [];
|
|
@@ -203,6 +242,9 @@ function buildPlannerRetryPrompt(prompt, errors) {
|
|
|
203
242
|
if (errors.includes('final_requires_subcall')) {
|
|
204
243
|
headerLines.push('Do not return intent=final until after at least one subcall.');
|
|
205
244
|
}
|
|
245
|
+
if (errors.includes('final_var_unbound')) {
|
|
246
|
+
headerLines.push('Use final_var only when it matches a previously declared subcalls[].output_var.');
|
|
247
|
+
}
|
|
206
248
|
if (errors.length > 0) {
|
|
207
249
|
headerLines.push(`Previous error: ${errors.join('; ')}`);
|
|
208
250
|
}
|
|
@@ -230,10 +272,26 @@ function validatePlan(plan, budgets) {
|
|
|
230
272
|
if (!['continue', 'final', 'pause', 'fail'].includes(plan.intent)) {
|
|
231
273
|
throw new Error('plan_validation_error');
|
|
232
274
|
}
|
|
275
|
+
const finalVarRaw = plan.final_var;
|
|
276
|
+
const finalVar = typeof finalVarRaw === 'string' && finalVarRaw.trim().length > 0
|
|
277
|
+
? finalVarRaw.trim()
|
|
278
|
+
: undefined;
|
|
279
|
+
if (finalVarRaw !== undefined && !finalVar) {
|
|
280
|
+
throw new Error('plan_validation_error');
|
|
281
|
+
}
|
|
282
|
+
if (finalVar) {
|
|
283
|
+
plan.final_var = finalVar;
|
|
284
|
+
}
|
|
233
285
|
if (plan.intent === 'final') {
|
|
234
|
-
|
|
286
|
+
const finalAnswer = typeof plan.final_answer === 'string' && plan.final_answer.trim().length > 0
|
|
287
|
+
? plan.final_answer.trim()
|
|
288
|
+
: null;
|
|
289
|
+
if (!finalAnswer && !finalVar) {
|
|
235
290
|
throw new Error('plan_validation_error');
|
|
236
291
|
}
|
|
292
|
+
if (finalAnswer) {
|
|
293
|
+
plan.final_answer = finalAnswer;
|
|
294
|
+
}
|
|
237
295
|
}
|
|
238
296
|
const reads = [];
|
|
239
297
|
const rawReads = Array.isArray(plan.reads) ? plan.reads : [];
|
|
@@ -277,6 +335,23 @@ function validatePlan(plan, budgets) {
|
|
|
277
335
|
const rawSubcalls = Array.isArray(plan.subcalls) ? plan.subcalls : [];
|
|
278
336
|
for (const entry of rawSubcalls) {
|
|
279
337
|
const purposeInfo = normalizePurpose(entry?.purpose);
|
|
338
|
+
const parentPointerRaw = entry?.parent_pointer;
|
|
339
|
+
if (parentPointerRaw !== undefined && typeof parentPointerRaw !== 'string') {
|
|
340
|
+
throw new Error('plan_validation_error');
|
|
341
|
+
}
|
|
342
|
+
const outputVarRaw = entry?.output_var;
|
|
343
|
+
if (outputVarRaw !== undefined && typeof outputVarRaw !== 'string') {
|
|
344
|
+
throw new Error('plan_validation_error');
|
|
345
|
+
}
|
|
346
|
+
const parentPointer = typeof parentPointerRaw === 'string' && parentPointerRaw.trim().length > 0
|
|
347
|
+
? parentPointerRaw.trim()
|
|
348
|
+
: undefined;
|
|
349
|
+
const outputVar = typeof outputVarRaw === 'string' && outputVarRaw.trim().length > 0
|
|
350
|
+
? outputVarRaw.trim()
|
|
351
|
+
: undefined;
|
|
352
|
+
if (outputVarRaw !== undefined && !outputVar) {
|
|
353
|
+
throw new Error('plan_validation_error');
|
|
354
|
+
}
|
|
280
355
|
const maxInputBytes = toNumber(entry?.max_input_bytes);
|
|
281
356
|
if (!maxInputBytes || maxInputBytes <= 0) {
|
|
282
357
|
throw new Error('plan_validation_error');
|
|
@@ -320,6 +395,8 @@ function validatePlan(plan, budgets) {
|
|
|
320
395
|
subcalls.push({
|
|
321
396
|
id: '',
|
|
322
397
|
purpose: purposeInfo.value,
|
|
398
|
+
parent_pointer: parentPointer,
|
|
399
|
+
output_var: outputVar,
|
|
323
400
|
snippets,
|
|
324
401
|
spans,
|
|
325
402
|
max_input_bytes: Math.floor(maxInputBytes)
|
|
@@ -340,41 +417,116 @@ function validatePlan(plan, budgets) {
|
|
|
340
417
|
}
|
|
341
418
|
};
|
|
342
419
|
}
|
|
343
|
-
function validatePlanPointers(validation, contextStore) {
|
|
420
|
+
function validatePlanPointers(validation, contextStore, subcallPointers) {
|
|
344
421
|
const ensurePointer = (pointer) => {
|
|
345
422
|
if (!pointer) {
|
|
346
423
|
return;
|
|
347
424
|
}
|
|
348
|
-
if (
|
|
349
|
-
|
|
425
|
+
if (isContextPointer(pointer)) {
|
|
426
|
+
if (!contextStore.validatePointer(pointer)) {
|
|
427
|
+
throw new Error('plan_validation_error');
|
|
428
|
+
}
|
|
429
|
+
return;
|
|
350
430
|
}
|
|
431
|
+
if (parseSubcallPointer(pointer)) {
|
|
432
|
+
if (!subcallPointers.has(pointer)) {
|
|
433
|
+
throw new Error('plan_validation_error');
|
|
434
|
+
}
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
throw new Error('plan_validation_error');
|
|
351
438
|
};
|
|
352
439
|
for (const read of validation.reads) {
|
|
353
440
|
ensurePointer(read.pointer);
|
|
354
441
|
}
|
|
355
442
|
for (const subcall of validation.subcalls) {
|
|
443
|
+
ensurePointer(subcall.parent_pointer);
|
|
356
444
|
for (const snippet of subcall.snippets) {
|
|
357
445
|
ensurePointer(snippet.pointer);
|
|
358
446
|
}
|
|
359
447
|
}
|
|
360
448
|
}
|
|
449
|
+
function formatSubcallSummary(entry) {
|
|
450
|
+
const preview = truncateUtf8ToBytes(entry.preview ?? '', 160).replace(/\s+/g, ' ').trim();
|
|
451
|
+
const outputVarSuffix = entry.output_var ? ` output_var=${entry.output_var}` : '';
|
|
452
|
+
if (!preview) {
|
|
453
|
+
return `${entry.id}: ${entry.pointer} (${entry.output_bytes} bytes)${outputVarSuffix}`;
|
|
454
|
+
}
|
|
455
|
+
return `${entry.id}: ${entry.pointer} (${entry.output_bytes} bytes)${outputVarSuffix} preview="${preview}"`;
|
|
456
|
+
}
|
|
457
|
+
function buildPointerReferenceHint() {
|
|
458
|
+
return 'ctx:<object_id>#chunk:<chunk_id> | subcall:<iteration>:<subcall_id>';
|
|
459
|
+
}
|
|
460
|
+
function buildSubcallHints() {
|
|
461
|
+
return '"parent_pointer": "optional pointer for recursion lineage", "output_var": "optional variable name",';
|
|
462
|
+
}
|
|
463
|
+
function hasSubcallPointer(pointer) {
|
|
464
|
+
return parseSubcallPointer(pointer) !== null;
|
|
465
|
+
}
|
|
466
|
+
async function resolveSnippetText(params) {
|
|
467
|
+
const { snippet, contextStore, subcallPointers } = params;
|
|
468
|
+
if (snippet.pointer) {
|
|
469
|
+
if (isContextPointer(snippet.pointer)) {
|
|
470
|
+
const result = await contextStore.read(snippet.pointer, snippet.offset ?? 0, snippet.bytes);
|
|
471
|
+
return result.text;
|
|
472
|
+
}
|
|
473
|
+
if (hasSubcallPointer(snippet.pointer)) {
|
|
474
|
+
return readSubcallPointerSnippet(subcallPointers, snippet.pointer, snippet.offset ?? 0, snippet.bytes);
|
|
475
|
+
}
|
|
476
|
+
throw new Error('plan_validation_error');
|
|
477
|
+
}
|
|
478
|
+
if (typeof snippet.start_byte === 'number') {
|
|
479
|
+
const result = await contextStore.readSpan(snippet.start_byte, snippet.bytes);
|
|
480
|
+
return result.text;
|
|
481
|
+
}
|
|
482
|
+
throw new Error('plan_validation_error');
|
|
483
|
+
}
|
|
484
|
+
async function resolveReadExcerpt(params) {
|
|
485
|
+
const { read, contextStore, subcallPointers, maxBytes } = params;
|
|
486
|
+
if (read.pointer) {
|
|
487
|
+
if (isContextPointer(read.pointer)) {
|
|
488
|
+
const result = await contextStore.read(read.pointer, read.offset ?? 0, read.bytes);
|
|
489
|
+
return {
|
|
490
|
+
pointer: read.pointer,
|
|
491
|
+
excerpt: truncateUtf8ToBytes(result.text, maxBytes)
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
if (hasSubcallPointer(read.pointer)) {
|
|
495
|
+
const text = await readSubcallPointerSnippet(subcallPointers, read.pointer, read.offset ?? 0, read.bytes);
|
|
496
|
+
return {
|
|
497
|
+
pointer: read.pointer,
|
|
498
|
+
excerpt: truncateUtf8ToBytes(text, maxBytes)
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
throw new Error('plan_validation_error');
|
|
502
|
+
}
|
|
503
|
+
if (typeof read.start_byte === 'number') {
|
|
504
|
+
const result = await contextStore.readSpan(read.start_byte, read.bytes);
|
|
505
|
+
return {
|
|
506
|
+
pointer: `start_byte=${read.start_byte} bytes=${read.bytes}`,
|
|
507
|
+
excerpt: truncateUtf8ToBytes(result.text, maxBytes)
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
return null;
|
|
511
|
+
}
|
|
361
512
|
function buildPlannerPrompt(params) {
|
|
362
|
-
const { goal, contextStore, budgets, priorReads, priorSearches, priorSubcalls } = params;
|
|
513
|
+
const { goal, contextStore, budgets, priorReads, priorSearches, priorSubcalls, deliberationBrief } = params;
|
|
363
514
|
const baseLines = [
|
|
364
515
|
'You are a symbolic RLM planner. Return JSON only (no prose).',
|
|
365
516
|
`Goal: ${goal}`,
|
|
366
517
|
`Context object_id: ${contextStore.objectId}`,
|
|
367
518
|
`Chunk count: ${contextStore.chunkCount}`,
|
|
368
|
-
`Pointer format:
|
|
519
|
+
`Pointer format: ${buildPointerReferenceHint()}`,
|
|
369
520
|
'',
|
|
370
521
|
'Schema (v1):',
|
|
371
522
|
'{',
|
|
372
523
|
' "schema_version": 1,',
|
|
373
524
|
' "intent": "continue | final | pause | fail",',
|
|
374
|
-
|
|
525
|
+
` "reads": [{ "pointer": "${buildPointerReferenceHint()}", "offset": 0, "bytes": 4096, "reason": "..." }],`,
|
|
375
526
|
' "searches": [{ "query": "...", "top_k": 20, "reason": "..." }],',
|
|
376
|
-
|
|
377
|
-
' "final_answer": "required when intent=final"',
|
|
527
|
+
` "subcalls": [{ "purpose": "summarize | extract | classify | verify", ${buildSubcallHints()} "snippets": [{ "pointer": "${buildPointerReferenceHint()}", "offset": 0, "bytes": 2048 }], "max_input_bytes": 120000, "expected_output": "short summary" }],`,
|
|
528
|
+
' "final_answer": "required when intent=final unless final_var is set",',
|
|
529
|
+
' "final_var": "optional variable name bound by subcalls[].output_var"',
|
|
378
530
|
'}',
|
|
379
531
|
'',
|
|
380
532
|
'Constraints:',
|
|
@@ -382,6 +534,7 @@ function buildPlannerPrompt(params) {
|
|
|
382
534
|
`- Max bytes per read: ${budgets.maxBytesPerChunkRead}.`,
|
|
383
535
|
`- Max bytes per snippet: ${budgets.maxBytesPerSnippet}.`,
|
|
384
536
|
'- Do not include full context; use pointers.',
|
|
537
|
+
'- Prefer prior subcall pointers for recursive chaining.',
|
|
385
538
|
'- Request at least one subcall before intent=final.',
|
|
386
539
|
''
|
|
387
540
|
];
|
|
@@ -410,12 +563,18 @@ function buildPlannerPrompt(params) {
|
|
|
410
563
|
sections.push({ key: 'reads_dropped', lines });
|
|
411
564
|
}
|
|
412
565
|
if (priorSubcalls.length > 0) {
|
|
413
|
-
const lines = ['Prior subcall
|
|
566
|
+
const lines = ['Prior subcall references:'];
|
|
414
567
|
for (const entry of priorSubcalls) {
|
|
415
|
-
lines.push(`- ${entry
|
|
568
|
+
lines.push(`- ${formatSubcallSummary(entry)}`);
|
|
416
569
|
}
|
|
417
570
|
sections.push({ key: 'subcalls_dropped', lines });
|
|
418
571
|
}
|
|
572
|
+
if (typeof deliberationBrief === 'string' && deliberationBrief.trim().length > 0) {
|
|
573
|
+
sections.push({
|
|
574
|
+
key: 'deliberation_dropped',
|
|
575
|
+
lines: ['Deliberation brief:', deliberationBrief.trim()]
|
|
576
|
+
});
|
|
577
|
+
}
|
|
419
578
|
let truncation = {};
|
|
420
579
|
let prompt = [...baseLines];
|
|
421
580
|
for (const section of sections) {
|
|
@@ -440,6 +599,122 @@ function buildPlannerPrompt(params) {
|
|
|
440
599
|
}
|
|
441
600
|
return { prompt: promptString, truncation };
|
|
442
601
|
}
|
|
602
|
+
function formatDeliberationReason(reason) {
|
|
603
|
+
switch (reason) {
|
|
604
|
+
case 'bootstrap':
|
|
605
|
+
return 'bootstrap';
|
|
606
|
+
case 'cadence':
|
|
607
|
+
return 'cadence';
|
|
608
|
+
case 'planner_recovery':
|
|
609
|
+
return 'planner_recovery';
|
|
610
|
+
case 'no_subcall_progress':
|
|
611
|
+
return 'no_subcall_progress';
|
|
612
|
+
default:
|
|
613
|
+
return 'cadence';
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
function selectDeliberationReason(params) {
|
|
617
|
+
if (params.iteration === 1) {
|
|
618
|
+
return 'bootstrap';
|
|
619
|
+
}
|
|
620
|
+
if ((params.previousIteration?.planner_errors?.length ?? 0) > 0) {
|
|
621
|
+
return 'planner_recovery';
|
|
622
|
+
}
|
|
623
|
+
const noSearches = (params.previousIteration?.searches?.length ?? 0) === 0;
|
|
624
|
+
const noReads = (params.previousIteration?.reads?.length ?? 0) === 0;
|
|
625
|
+
const noSubcalls = (params.previousIteration?.subcalls?.length ?? 0) === 0;
|
|
626
|
+
if (noSearches && noReads && noSubcalls) {
|
|
627
|
+
return 'no_subcall_progress';
|
|
628
|
+
}
|
|
629
|
+
const minInterval = Math.max(1, Math.floor(params.minIntervalIterations));
|
|
630
|
+
if (params.lastDeliberationIteration <= 0 ||
|
|
631
|
+
params.iteration - params.lastDeliberationIteration >= minInterval) {
|
|
632
|
+
return 'cadence';
|
|
633
|
+
}
|
|
634
|
+
return null;
|
|
635
|
+
}
|
|
636
|
+
function buildDeliberationPrompt(params) {
|
|
637
|
+
const maxBytes = Math.max(256, Math.floor(params.maxSummaryBytes));
|
|
638
|
+
const lines = [
|
|
639
|
+
'You are a deliberation coordinator for an iterative symbolic planning loop.',
|
|
640
|
+
`Goal: ${params.goal}`,
|
|
641
|
+
`Iteration: ${params.iteration}`,
|
|
642
|
+
`Trigger: ${formatDeliberationReason(params.reason)}`,
|
|
643
|
+
`Context object_id: ${params.contextStore.objectId}`,
|
|
644
|
+
`Context chunks: ${params.contextStore.chunkCount}`,
|
|
645
|
+
'Respond with exactly four labeled lines:',
|
|
646
|
+
'Decision focus: <what to optimize next>',
|
|
647
|
+
'Risks: <top failure modes to avoid>',
|
|
648
|
+
'Context gaps: <missing evidence/plans>',
|
|
649
|
+
'Planner directives: <3 concise directives>',
|
|
650
|
+
`Keep total output under ${maxBytes} bytes and avoid markdown tables.`
|
|
651
|
+
];
|
|
652
|
+
if (params.priorSearches.length > 0) {
|
|
653
|
+
const latestSearch = params.priorSearches[params.priorSearches.length - 1];
|
|
654
|
+
const hitCount = latestSearch?.results?.length ?? 0;
|
|
655
|
+
lines.push(`Latest search: query="${latestSearch?.query ?? ''}" hits=${hitCount}`);
|
|
656
|
+
}
|
|
657
|
+
if (params.priorReads.length > 0) {
|
|
658
|
+
const latestRead = params.priorReads[params.priorReads.length - 1];
|
|
659
|
+
lines.push(`Latest read pointer: ${latestRead?.pointer ?? ''}`);
|
|
660
|
+
}
|
|
661
|
+
if (params.priorSubcalls.length > 0) {
|
|
662
|
+
const latestSubcall = params.priorSubcalls[params.priorSubcalls.length - 1];
|
|
663
|
+
lines.push(`Latest subcall id: ${latestSubcall?.id ?? ''}`);
|
|
664
|
+
lines.push(`Latest subcall pointer: ${latestSubcall?.pointer ?? ''}`);
|
|
665
|
+
lines.push(`Latest subcall preview: ${truncateUtf8ToBytes(latestSubcall?.preview ?? '', 320)}`);
|
|
666
|
+
}
|
|
667
|
+
return lines.join('\n');
|
|
668
|
+
}
|
|
669
|
+
async function runDeliberationStep(params) {
|
|
670
|
+
const prompt = buildDeliberationPrompt({
|
|
671
|
+
goal: params.goal,
|
|
672
|
+
iteration: params.iteration,
|
|
673
|
+
reason: params.reason,
|
|
674
|
+
contextStore: params.contextStore,
|
|
675
|
+
priorReads: params.priorReads,
|
|
676
|
+
priorSearches: params.priorSearches,
|
|
677
|
+
priorSubcalls: params.priorSubcalls,
|
|
678
|
+
maxSummaryBytes: params.options.maxSummaryBytes
|
|
679
|
+
});
|
|
680
|
+
const promptBytes = byteLength(prompt);
|
|
681
|
+
const deliberationDir = join(params.runDir, 'deliberation');
|
|
682
|
+
await mkdir(deliberationDir, { recursive: true });
|
|
683
|
+
const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
|
|
684
|
+
const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
|
|
685
|
+
const outputPath = join(deliberationDir, `${baseName}-output.txt`);
|
|
686
|
+
const metaPath = join(deliberationDir, `${baseName}-meta.json`);
|
|
687
|
+
await writeFile(promptPath, prompt, 'utf8');
|
|
688
|
+
const output = await params.options.run(prompt, {
|
|
689
|
+
iteration: params.iteration,
|
|
690
|
+
reason: formatDeliberationReason(params.reason)
|
|
691
|
+
});
|
|
692
|
+
const brief = truncateUtf8ToBytes(output ?? '', params.options.maxSummaryBytes);
|
|
693
|
+
const outputBytes = byteLength(brief);
|
|
694
|
+
await writeFile(outputPath, brief, 'utf8');
|
|
695
|
+
await writeFile(metaPath, JSON.stringify({
|
|
696
|
+
iteration: params.iteration,
|
|
697
|
+
reason: formatDeliberationReason(params.reason),
|
|
698
|
+
strategy: params.options.strategy,
|
|
699
|
+
prompt_bytes: promptBytes,
|
|
700
|
+
output_bytes: outputBytes
|
|
701
|
+
}, null, 2), 'utf8');
|
|
702
|
+
return {
|
|
703
|
+
record: {
|
|
704
|
+
status: 'ran',
|
|
705
|
+
reason: formatDeliberationReason(params.reason),
|
|
706
|
+
strategy: params.options.strategy,
|
|
707
|
+
prompt_bytes: promptBytes,
|
|
708
|
+
output_bytes: outputBytes,
|
|
709
|
+
artifact_paths: {
|
|
710
|
+
prompt: relative(params.repoRoot, promptPath),
|
|
711
|
+
output: relative(params.repoRoot, outputPath),
|
|
712
|
+
meta: relative(params.repoRoot, metaPath)
|
|
713
|
+
}
|
|
714
|
+
},
|
|
715
|
+
brief
|
|
716
|
+
};
|
|
717
|
+
}
|
|
443
718
|
async function writeState(path, state) {
|
|
444
719
|
await writeFile(path, JSON.stringify(state, null, 2), 'utf8');
|
|
445
720
|
}
|
|
@@ -466,6 +741,11 @@ export async function runSymbolicLoop(options) {
|
|
|
466
741
|
let priorReads = [];
|
|
467
742
|
let priorSearches = [];
|
|
468
743
|
let priorSubcalls = [];
|
|
744
|
+
const subcallPointers = new Map();
|
|
745
|
+
const variableBindings = new Map();
|
|
746
|
+
let lastDeliberationIteration = 0;
|
|
747
|
+
let deliberationRuns = 0;
|
|
748
|
+
let latestDeliberationBrief = null;
|
|
469
749
|
const finalize = async (status) => {
|
|
470
750
|
state.final = status ?? { status: 'error', exitCode: 10 };
|
|
471
751
|
await writeState(statePath, state);
|
|
@@ -476,13 +756,80 @@ export async function runSymbolicLoop(options) {
|
|
|
476
756
|
if (timeExceeded()) {
|
|
477
757
|
return await finalize({ status: 'max_minutes', exitCode: 3 });
|
|
478
758
|
}
|
|
759
|
+
const previousIteration = state.symbolic_iterations[state.symbolic_iterations.length - 1] ?? null;
|
|
760
|
+
let deliberation;
|
|
761
|
+
const deliberationOptions = options.deliberation;
|
|
762
|
+
if (!deliberationOptions?.enabled) {
|
|
763
|
+
if (deliberationOptions) {
|
|
764
|
+
deliberation = {
|
|
765
|
+
status: 'skipped',
|
|
766
|
+
reason: 'disabled',
|
|
767
|
+
strategy: deliberationOptions.strategy
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
else {
|
|
772
|
+
const reason = selectDeliberationReason({
|
|
773
|
+
iteration,
|
|
774
|
+
previousIteration,
|
|
775
|
+
lastDeliberationIteration,
|
|
776
|
+
minIntervalIterations: deliberationOptions.minIntervalIterations
|
|
777
|
+
});
|
|
778
|
+
if (!reason) {
|
|
779
|
+
deliberation = {
|
|
780
|
+
status: 'skipped',
|
|
781
|
+
reason: 'not_due',
|
|
782
|
+
strategy: deliberationOptions.strategy
|
|
783
|
+
};
|
|
784
|
+
}
|
|
785
|
+
else if (deliberationRuns >= deliberationOptions.maxRuns) {
|
|
786
|
+
deliberation = {
|
|
787
|
+
status: 'skipped',
|
|
788
|
+
reason: 'max_runs_reached',
|
|
789
|
+
strategy: deliberationOptions.strategy
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
else {
|
|
793
|
+
deliberationRuns += 1;
|
|
794
|
+
try {
|
|
795
|
+
const result = await runDeliberationStep({
|
|
796
|
+
options: deliberationOptions,
|
|
797
|
+
goal: options.goal,
|
|
798
|
+
iteration,
|
|
799
|
+
reason,
|
|
800
|
+
runDir,
|
|
801
|
+
repoRoot: options.repoRoot,
|
|
802
|
+
contextStore: options.contextStore,
|
|
803
|
+
priorReads,
|
|
804
|
+
priorSearches,
|
|
805
|
+
priorSubcalls
|
|
806
|
+
});
|
|
807
|
+
deliberation = result.record;
|
|
808
|
+
latestDeliberationBrief = result.brief;
|
|
809
|
+
lastDeliberationIteration = iteration;
|
|
810
|
+
log(`Deliberation ${formatDeliberationReason(reason)} ran for iteration ${iteration} (${result.record.strategy}).`);
|
|
811
|
+
}
|
|
812
|
+
catch (error) {
|
|
813
|
+
deliberation = {
|
|
814
|
+
status: 'error',
|
|
815
|
+
reason: formatDeliberationReason(reason),
|
|
816
|
+
strategy: deliberationOptions.strategy,
|
|
817
|
+
error: error instanceof Error ? error.message : String(error)
|
|
818
|
+
};
|
|
819
|
+
log(`Deliberation ${formatDeliberationReason(reason)} failed for iteration ${iteration}: ${deliberation.error}`);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
}
|
|
479
823
|
const promptResult = buildPlannerPrompt({
|
|
480
824
|
goal: options.goal,
|
|
481
825
|
contextStore: options.contextStore,
|
|
482
826
|
budgets: options.budgets,
|
|
483
827
|
priorReads,
|
|
484
828
|
priorSearches,
|
|
485
|
-
priorSubcalls
|
|
829
|
+
priorSubcalls,
|
|
830
|
+
deliberationBrief: deliberationOptions?.enabled && deliberationOptions.includeInPlannerPrompt
|
|
831
|
+
? latestDeliberationBrief
|
|
832
|
+
: null
|
|
486
833
|
});
|
|
487
834
|
const plannerPrompt = promptResult.prompt;
|
|
488
835
|
const plannerPromptBytes = byteLength(plannerPrompt);
|
|
@@ -512,7 +859,7 @@ export async function runSymbolicLoop(options) {
|
|
|
512
859
|
let validationError = null;
|
|
513
860
|
try {
|
|
514
861
|
validation = validatePlan(plan, options.budgets);
|
|
515
|
-
validatePlanPointers(validation, options.contextStore);
|
|
862
|
+
validatePlanPointers(validation, options.contextStore, subcallPointers);
|
|
516
863
|
}
|
|
517
864
|
catch {
|
|
518
865
|
validationError = 'plan_validation_error';
|
|
@@ -520,6 +867,11 @@ export async function runSymbolicLoop(options) {
|
|
|
520
867
|
if (!validationError && plan.intent === 'final' && !hasPriorSubcalls) {
|
|
521
868
|
validationError = 'final_requires_subcall';
|
|
522
869
|
}
|
|
870
|
+
if (!validationError && plan.intent === 'final' && plan.final_var) {
|
|
871
|
+
if (!variableBindings.has(plan.final_var)) {
|
|
872
|
+
validationError = 'final_var_unbound';
|
|
873
|
+
}
|
|
874
|
+
}
|
|
523
875
|
if (validationError) {
|
|
524
876
|
plannerErrors.push(validationError);
|
|
525
877
|
await recordPlannerFailure({
|
|
@@ -542,12 +894,27 @@ export async function runSymbolicLoop(options) {
|
|
|
542
894
|
const reads = [];
|
|
543
895
|
const subcalls = [];
|
|
544
896
|
const searches = [];
|
|
897
|
+
const iterationVariableBindings = [];
|
|
545
898
|
if (plan.intent === 'final') {
|
|
899
|
+
let finalAnswer = plan.final_answer;
|
|
900
|
+
if (plan.final_var) {
|
|
901
|
+
const binding = variableBindings.get(plan.final_var);
|
|
902
|
+
if (!binding) {
|
|
903
|
+
return await finalize({ status: 'invalid_config', exitCode: 5 });
|
|
904
|
+
}
|
|
905
|
+
try {
|
|
906
|
+
finalAnswer = await readFile(binding.outputPath, 'utf8');
|
|
907
|
+
}
|
|
908
|
+
catch {
|
|
909
|
+
return await finalize({ status: 'invalid_config', exitCode: 5 });
|
|
910
|
+
}
|
|
911
|
+
}
|
|
546
912
|
state.symbolic_iterations.push({
|
|
547
913
|
iteration,
|
|
548
914
|
planner_prompt_bytes: plannerPromptBytes,
|
|
549
915
|
reads,
|
|
550
916
|
subcalls,
|
|
917
|
+
deliberation,
|
|
551
918
|
searches,
|
|
552
919
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
553
920
|
clamped: {
|
|
@@ -558,7 +925,7 @@ export async function runSymbolicLoop(options) {
|
|
|
558
925
|
truncation: promptResult.truncation
|
|
559
926
|
});
|
|
560
927
|
await writeState(statePath, state);
|
|
561
|
-
return await finalize({ status: 'passed', exitCode: 0, final_answer:
|
|
928
|
+
return await finalize({ status: 'passed', exitCode: 0, final_answer: finalAnswer });
|
|
562
929
|
}
|
|
563
930
|
if (plan.intent === 'pause' || plan.intent === 'fail') {
|
|
564
931
|
state.symbolic_iterations.push({
|
|
@@ -566,6 +933,7 @@ export async function runSymbolicLoop(options) {
|
|
|
566
933
|
planner_prompt_bytes: plannerPromptBytes,
|
|
567
934
|
reads,
|
|
568
935
|
subcalls,
|
|
936
|
+
deliberation,
|
|
569
937
|
searches,
|
|
570
938
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
571
939
|
clamped: {
|
|
@@ -596,19 +964,14 @@ export async function runSymbolicLoop(options) {
|
|
|
596
964
|
priorSearches = currentSearches;
|
|
597
965
|
const readExcerpts = [];
|
|
598
966
|
for (const read of validation.reads) {
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
const result = await options.contextStore.readSpan(read.start_byte, read.bytes);
|
|
608
|
-
readExcerpts.push({
|
|
609
|
-
pointer: `start_byte=${read.start_byte} bytes=${read.bytes}`,
|
|
610
|
-
excerpt: truncateUtf8ToBytes(result.text, options.budgets.maxBytesPerChunkRead)
|
|
611
|
-
});
|
|
967
|
+
const excerpt = await resolveReadExcerpt({
|
|
968
|
+
read,
|
|
969
|
+
contextStore: options.contextStore,
|
|
970
|
+
subcallPointers,
|
|
971
|
+
maxBytes: options.budgets.maxBytesPerChunkRead
|
|
972
|
+
});
|
|
973
|
+
if (excerpt) {
|
|
974
|
+
readExcerpts.push(excerpt);
|
|
612
975
|
}
|
|
613
976
|
reads.push(read);
|
|
614
977
|
}
|
|
@@ -650,14 +1013,12 @@ export async function runSymbolicLoop(options) {
|
|
|
650
1013
|
});
|
|
651
1014
|
const resolvedSnippets = [];
|
|
652
1015
|
for (const snippet of snippets) {
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
resolvedSnippets.push({ meta: snippet, text: result.text });
|
|
660
|
-
}
|
|
1016
|
+
const text = await resolveSnippetText({
|
|
1017
|
+
snippet,
|
|
1018
|
+
contextStore: options.contextStore,
|
|
1019
|
+
subcallPointers
|
|
1020
|
+
});
|
|
1021
|
+
resolvedSnippets.push({ meta: snippet, text });
|
|
661
1022
|
}
|
|
662
1023
|
for (const span of spans) {
|
|
663
1024
|
const spanBytes = Math.max(0, span.end_byte - span.start_byte);
|
|
@@ -697,9 +1058,12 @@ export async function runSymbolicLoop(options) {
|
|
|
697
1058
|
const promptPath = join(subcallDir, 'prompt.txt');
|
|
698
1059
|
const outputPath = join(subcallDir, 'output.txt');
|
|
699
1060
|
const metaPath = join(subcallDir, 'meta.json');
|
|
1061
|
+
const outputPointer = buildSubcallPointer(iteration, subcallId);
|
|
700
1062
|
await writeFile(inputPath, JSON.stringify({
|
|
701
1063
|
id: subcallId,
|
|
702
1064
|
purpose: rawSubcall.purpose,
|
|
1065
|
+
parent_pointer: rawSubcall.parent_pointer,
|
|
1066
|
+
output_var: rawSubcall.output_var,
|
|
703
1067
|
max_input_bytes: maxInput,
|
|
704
1068
|
snippets,
|
|
705
1069
|
spans
|
|
@@ -714,10 +1078,21 @@ export async function runSymbolicLoop(options) {
|
|
|
714
1078
|
clipped
|
|
715
1079
|
}, null, 2), 'utf8');
|
|
716
1080
|
const relativeBase = (filePath) => relative(options.repoRoot, filePath);
|
|
1081
|
+
const outputPreview = truncateUtf8ToBytes(output, options.budgets.maxPreviewBytes);
|
|
1082
|
+
subcallPointers.set(outputPointer, {
|
|
1083
|
+
pointer: outputPointer,
|
|
1084
|
+
iteration,
|
|
1085
|
+
subcallId,
|
|
1086
|
+
outputPath,
|
|
1087
|
+
outputBytes: byteLength(output)
|
|
1088
|
+
});
|
|
717
1089
|
return {
|
|
718
1090
|
subcall: {
|
|
719
1091
|
id: subcallId,
|
|
720
1092
|
purpose: rawSubcall.purpose,
|
|
1093
|
+
parent_pointer: rawSubcall.parent_pointer,
|
|
1094
|
+
output_pointer: outputPointer,
|
|
1095
|
+
output_var: rawSubcall.output_var,
|
|
721
1096
|
snippets: snippets.length > 0 ? snippets : undefined,
|
|
722
1097
|
spans: spans.length > 0 ? spans : undefined,
|
|
723
1098
|
max_input_bytes: rawSubcall.max_input_bytes,
|
|
@@ -735,12 +1110,36 @@ export async function runSymbolicLoop(options) {
|
|
|
735
1110
|
},
|
|
736
1111
|
output: {
|
|
737
1112
|
id: subcallId,
|
|
738
|
-
|
|
739
|
-
|
|
1113
|
+
pointer: outputPointer,
|
|
1114
|
+
preview: outputPreview,
|
|
1115
|
+
output_bytes: byteLength(output),
|
|
1116
|
+
output_var: rawSubcall.output_var
|
|
1117
|
+
},
|
|
1118
|
+
binding: rawSubcall.output_var
|
|
1119
|
+
? {
|
|
1120
|
+
name: rawSubcall.output_var,
|
|
1121
|
+
pointer: outputPointer,
|
|
1122
|
+
iteration,
|
|
1123
|
+
subcallId,
|
|
1124
|
+
outputPath,
|
|
1125
|
+
outputBytes: byteLength(output)
|
|
1126
|
+
}
|
|
1127
|
+
: null
|
|
740
1128
|
};
|
|
741
1129
|
});
|
|
742
1130
|
for (const result of subcallResults) {
|
|
743
1131
|
subcalls.push(result.subcall);
|
|
1132
|
+
if (result.binding) {
|
|
1133
|
+
variableBindings.set(result.binding.name, result.binding);
|
|
1134
|
+
iterationVariableBindings.push({
|
|
1135
|
+
name: result.binding.name,
|
|
1136
|
+
pointer: result.binding.pointer,
|
|
1137
|
+
iteration: result.binding.iteration,
|
|
1138
|
+
subcall_id: result.binding.subcallId,
|
|
1139
|
+
output_bytes: result.binding.outputBytes,
|
|
1140
|
+
output_path: relative(options.repoRoot, result.binding.outputPath)
|
|
1141
|
+
});
|
|
1142
|
+
}
|
|
744
1143
|
}
|
|
745
1144
|
priorSubcalls = subcallResults.map((result) => result.output);
|
|
746
1145
|
state.symbolic_iterations.push({
|
|
@@ -748,6 +1147,8 @@ export async function runSymbolicLoop(options) {
|
|
|
748
1147
|
planner_prompt_bytes: plannerPromptBytes,
|
|
749
1148
|
reads,
|
|
750
1149
|
subcalls,
|
|
1150
|
+
variable_bindings: iterationVariableBindings.length > 0 ? iterationVariableBindings : undefined,
|
|
1151
|
+
deliberation,
|
|
751
1152
|
searches,
|
|
752
1153
|
planner_errors: plannerErrors.length > 0 ? plannerErrors : undefined,
|
|
753
1154
|
clamped: {
|