create-walle 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +2 -2
- package/template/bin/dev.sh +7 -1
- package/template/bin/setup.js +53 -9
- package/template/bin/sync-images.js +53 -0
- package/template/builder-journal.md +17 -0
- package/template/claude-task-manager/api-prompts.js +98 -13
- package/template/claude-task-manager/api-reviews.js +82 -5
- package/template/claude-task-manager/db.js +32 -5
- package/template/claude-task-manager/docs/session-capture-foundation-design.md +1273 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +696 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +49 -1
- package/template/claude-task-manager/lib/session-capture.js +421 -0
- package/template/claude-task-manager/lib/session-history.js +135 -15
- package/template/claude-task-manager/lib/session-jobs.js +10 -5
- package/template/claude-task-manager/lib/session-stream.js +87 -19
- package/template/claude-task-manager/lib/setup-provider-config.js +115 -0
- package/template/claude-task-manager/lib/walle-ctm-history.js +72 -0
- package/template/claude-task-manager/lib/walle-session-context.js +61 -0
- package/template/claude-task-manager/lib/walle-transcript.js +176 -0
- package/template/claude-task-manager/public/css/setup.css +35 -8
- package/template/claude-task-manager/public/css/walle-session.css +56 -0
- package/template/claude-task-manager/public/css/walle.css +120 -0
- package/template/claude-task-manager/public/index.html +814 -181
- package/template/claude-task-manager/public/js/message-renderer.js +148 -19
- package/template/claude-task-manager/public/js/reviews.js +120 -62
- package/template/claude-task-manager/public/js/setup.js +75 -31
- package/template/claude-task-manager/public/js/stream-view.js +115 -55
- package/template/claude-task-manager/public/js/walle-session.js +84 -2
- package/template/claude-task-manager/public/js/walle.js +308 -54
- package/template/claude-task-manager/server.js +1092 -146
- package/template/claude-task-manager/session-integrity.js +181 -54
- package/template/claude-task-manager/session-utils.js +123 -41
- package/template/claude-task-manager/workers/state-detectors/codex.js +5 -2
- package/template/package.json +1 -1
- package/template/wall-e/adapters/ctm.js +39 -18
- package/template/wall-e/agent-runners/contract.js +17 -0
- package/template/wall-e/agent-runners/index.js +22 -0
- package/template/wall-e/agent-runtime/harness.js +212 -0
- package/template/wall-e/agent-runtime/index.js +8 -0
- package/template/wall-e/agent-runtime/registry.js +67 -0
- package/template/wall-e/agent-runtime/session-store.js +179 -0
- package/template/wall-e/agent-runtime/spawn.js +208 -0
- package/template/wall-e/api-walle.js +174 -7
- package/template/wall-e/brain.js +266 -28
- package/template/wall-e/channels/policy.js +88 -0
- package/template/wall-e/channels/registry.js +15 -1
- package/template/wall-e/channels/reply-dispatcher.js +70 -0
- package/template/wall-e/channels/session-bindings.js +51 -0
- package/template/wall-e/chat/code-review-context.js +29 -0
- package/template/wall-e/chat.js +188 -42
- package/template/wall-e/coding/acp-adapter.js +188 -0
- package/template/wall-e/coding/agent-catalog.js +129 -0
- package/template/wall-e/coding/compaction-service.js +247 -0
- package/template/wall-e/coding/execution-trace.js +3 -0
- package/template/wall-e/coding/instruction-service.js +224 -0
- package/template/wall-e/coding/model-message.js +67 -0
- package/template/wall-e/coding/permission-rules-store.js +111 -0
- package/template/wall-e/coding/permission-service.js +266 -0
- package/template/wall-e/coding/prompt-bundle.js +67 -0
- package/template/wall-e/coding/prompt-runtime.js +243 -0
- package/template/wall-e/coding/provider-transform.js +188 -0
- package/template/wall-e/coding/runtime-mode.js +132 -0
- package/template/wall-e/coding/snapshot-service.js +155 -0
- package/template/wall-e/coding/stream-processor.js +268 -0
- package/template/wall-e/coding/task-tool.js +255 -0
- package/template/wall-e/coding/tool-registry.js +361 -0
- package/template/wall-e/coding/transcript-writer.js +143 -0
- package/template/wall-e/coding/workspace-replay.js +324 -0
- package/template/wall-e/coding-context.js +4 -22
- package/template/wall-e/coding-orchestrator.js +307 -18
- package/template/wall-e/coding-prompts.js +44 -3
- package/template/wall-e/context/context-builder.js +43 -1
- package/template/wall-e/context/topic-matcher.js +1 -1
- package/template/wall-e/eval/agent-runner.js +59 -13
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +155 -57
- package/template/wall-e/eval/benchmarks.js +100 -16
- package/template/wall-e/eval/eval-orchestrator.js +218 -8
- package/template/wall-e/eval/harvester.js +62 -5
- package/template/wall-e/eval/head-to-head.js +23 -2
- package/template/wall-e/eval/humaneval-adapter.js +30 -5
- package/template/wall-e/eval/livecodebench-adapter.js +29 -5
- package/template/wall-e/eval/manifest.js +186 -0
- package/template/wall-e/eval/run-agent-benchmarks.js +66 -2
- package/template/wall-e/eval/session-retrieval-benchmark.js +150 -0
- package/template/wall-e/eval/session-transcripts.js +57 -4
- package/template/wall-e/eval/swebench-adapter.js +109 -3
- package/template/wall-e/evaluation/agent-router.js +53 -1
- package/template/wall-e/evaluation/coding-quorum.js +48 -1
- package/template/wall-e/evaluation/router.js +4 -2
- package/template/wall-e/evaluation/tier-selector.js +11 -1
- package/template/wall-e/extraction/contradiction.js +2 -2
- package/template/wall-e/extraction/indexer.js +2 -1
- package/template/wall-e/extraction/knowledge-extractor.js +2 -2
- package/template/wall-e/hooks/cli.js +92 -0
- package/template/wall-e/hooks/discovery.js +119 -0
- package/template/wall-e/hooks/index.js +7 -0
- package/template/wall-e/hooks/manifest.js +55 -0
- package/template/wall-e/hooks/runtime.js +84 -0
- package/template/wall-e/hooks/session-memory.js +225 -0
- package/template/wall-e/http/auth.js +6 -2
- package/template/wall-e/http/chat-api.js +54 -8
- package/template/wall-e/integrations/claude-plugin/hooks/hooks.json +27 -0
- package/template/wall-e/integrations/claude-plugin/hooks/walle-precompact-hook.sh +5 -0
- package/template/wall-e/integrations/claude-plugin/hooks/walle-stop-hook.sh +5 -0
- package/template/wall-e/integrations/codex-plugin/hooks/walle-hook.sh +7 -0
- package/template/wall-e/integrations/codex-plugin/hooks.json +37 -0
- package/template/wall-e/listening/calendar.js +3 -1
- package/template/wall-e/llm/client.js +64 -10
- package/template/wall-e/llm/google.js +39 -5
- package/template/wall-e/llm/ollama.js +1 -1
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/provider-availability.js +10 -0
- package/template/wall-e/llm/provider-error.js +269 -0
- package/template/wall-e/llm/tool-adapter.js +48 -12
- package/template/wall-e/loops/boot.js +2 -1
- package/template/wall-e/loops/initiative.js +2 -2
- package/template/wall-e/loops/tasks.js +8 -47
- package/template/wall-e/loops/workspace-prompts.js +20 -0
- package/template/wall-e/mcp-server.js +442 -1
- package/template/wall-e/memory/session-ingest-service.js +159 -0
- package/template/wall-e/memory/source-indexer.js +289 -0
- package/template/wall-e/plugins/discovery.js +83 -0
- package/template/wall-e/plugins/manifest-loader.js +50 -10
- package/template/wall-e/plugins/manifest-schema.js +69 -0
- package/template/wall-e/plugins/model-catalog.js +55 -0
- package/template/wall-e/prompts/coding/base.txt +2 -0
- package/template/wall-e/prompts/coding/deepseek.txt +1 -0
- package/template/wall-e/prompts/coding/memory-protocol.md +9 -0
- package/template/wall-e/prompts/coding/plan.txt +1 -0
- package/template/wall-e/runtime/execution-trace.js +220 -0
- package/template/wall-e/security/audit.js +266 -0
- package/template/wall-e/security/ssrf.js +236 -0
- package/template/wall-e/session-files.js +303 -0
- package/template/wall-e/skills/_bundled/slack-backfill/SKILL.md +3 -0
- package/template/wall-e/skills/_bundled/slack-sync/SKILL.md +3 -0
- package/template/wall-e/skills/internal-skill-registry.js +2 -2
- package/template/wall-e/skills/script-skill-runner.js +143 -0
- package/template/wall-e/skills/skill-executor.js +5 -6
- package/template/wall-e/skills/skill-fallback.js +3 -1
- package/template/wall-e/skills/skill-harness-registry.js +7 -8
- package/template/wall-e/skills/skill-planner.js +52 -4
- package/template/wall-e/skills/slack-ingest.js +11 -3
- package/template/wall-e/sources/base.js +90 -0
- package/template/wall-e/sources/builtin.js +33 -0
- package/template/wall-e/sources/claude-code-jsonl.js +78 -0
- package/template/wall-e/sources/codex-jsonl.js +125 -0
- package/template/wall-e/sources/coding-session-utils.js +117 -0
- package/template/wall-e/sources/contract-suite.js +59 -0
- package/template/wall-e/sources/gemini-jsonl.js +85 -0
- package/template/wall-e/sources/index.js +9 -0
- package/template/wall-e/sources/jsonl-utils.js +181 -0
- package/template/wall-e/sources/record-types.js +252 -0
- package/template/wall-e/sources/registry.js +92 -0
- package/template/wall-e/sources/transforms.js +100 -0
- package/template/wall-e/sources/walle-jsonl.js +108 -0
- package/template/wall-e/tools/coding-middleware.js +31 -1
- package/template/wall-e/tools/file-tracker.js +25 -1
- package/template/wall-e/tools/local-tools.js +75 -47
- package/template/wall-e/tools/session-sharing.js +68 -1
- package/template/wall-e/tools/shell-analyzer.js +1 -1
- package/template/wall-e/tools/shell-policy.js +47 -0
- package/template/wall-e/tools/snapshot.js +42 -0
- package/template/wall-e/training/harvester.js +62 -5
- package/template/wall-e/utils/repair.js +253 -1
- package/template/website/index.html +3 -3
- package/template/wall-e/skills/_bundled/slack-mentions/.watched-threads.json +0 -18
|
@@ -48,6 +48,12 @@ const {
|
|
|
48
48
|
buildPlanningPrompt,
|
|
49
49
|
buildSubtaskPrompt,
|
|
50
50
|
} = require('./coding-prompts');
|
|
51
|
+
const {
|
|
52
|
+
resolveRuntimeMode,
|
|
53
|
+
filterToolsForRuntimeMode,
|
|
54
|
+
shouldUseStreamProcessor,
|
|
55
|
+
} = require('./coding/runtime-mode');
|
|
56
|
+
const { createCodingTranscript } = require('./coding/transcript-writer');
|
|
51
57
|
|
|
52
58
|
const MAX_CUMULATIVE_CONTEXT = 4000;
|
|
53
59
|
const MAX_DIFF_SIZE = 50 * 1024; // 50KB
|
|
@@ -634,9 +640,51 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
634
640
|
const modelId = resolveModelId(model, llm);
|
|
635
641
|
const resolvedCwd = realpathBestEffort(cwd || process.cwd());
|
|
636
642
|
const taskFileHints = extractTaskFileHints(prompt);
|
|
637
|
-
const
|
|
643
|
+
const runtimeMode = resolveRuntimeMode(opts);
|
|
644
|
+
const requestedTools = filterToolsForRuntimeMode(Array.isArray(tools) ? tools : CODING_TOOLS, runtimeMode);
|
|
645
|
+
const transcript = createCodingTranscript({
|
|
646
|
+
transcript: opts.transcript,
|
|
647
|
+
persistTranscript: opts.persistTranscript,
|
|
648
|
+
sessionId: sid,
|
|
649
|
+
cwd: resolvedCwd,
|
|
650
|
+
chatSessionId: opts.chatSessionId || '',
|
|
651
|
+
label: String(prompt || '').slice(0, 160),
|
|
652
|
+
modelId,
|
|
653
|
+
modelProvider: llm.type || '',
|
|
654
|
+
runtimeMode: runtimeMode.id,
|
|
655
|
+
agent: opts.agent || '',
|
|
656
|
+
mode: opts.mode || '',
|
|
657
|
+
});
|
|
658
|
+
if (transcript?.ensureSession) {
|
|
659
|
+
transcript.ensureSession({
|
|
660
|
+
sessionId: sid,
|
|
661
|
+
cwd: resolvedCwd,
|
|
662
|
+
chatSessionId: opts.chatSessionId || '',
|
|
663
|
+
label: String(prompt || '').slice(0, 160),
|
|
664
|
+
modelId,
|
|
665
|
+
modelProvider: llm.type || '',
|
|
666
|
+
runtimeMode: runtimeMode.id,
|
|
667
|
+
agent: opts.agent || '',
|
|
668
|
+
mode: opts.mode || '',
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
if (!opts._resumeMessages && transcript?.appendUserMessage) {
|
|
672
|
+
transcript.appendUserMessage(prompt, {
|
|
673
|
+
sessionId: sid,
|
|
674
|
+
cwd: resolvedCwd,
|
|
675
|
+
chatSessionId: opts.chatSessionId || '',
|
|
676
|
+
});
|
|
677
|
+
}
|
|
638
678
|
if (requestedTools.length > 0 && !providerSupportsToolCalls(llm)) {
|
|
639
679
|
const providerType = llm.type || 'unknown';
|
|
680
|
+
if (transcript?.appendPart) {
|
|
681
|
+
transcript.appendPart({
|
|
682
|
+
sessionId: sid,
|
|
683
|
+
cwd: resolvedCwd,
|
|
684
|
+
partType: 'error',
|
|
685
|
+
data: { message: `Provider ${providerType} does not support tool calls` },
|
|
686
|
+
});
|
|
687
|
+
}
|
|
640
688
|
return {
|
|
641
689
|
success: false,
|
|
642
690
|
output: '',
|
|
@@ -673,6 +721,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
673
721
|
projectInfo: projectInfo && projectInfo.type !== 'unknown' ? projectInfo : null,
|
|
674
722
|
projectSkills,
|
|
675
723
|
taskFileHints,
|
|
724
|
+
runtimeMode,
|
|
676
725
|
});
|
|
677
726
|
|
|
678
727
|
// Resume support: use restored messages if resuming from checkpoint
|
|
@@ -705,6 +754,58 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
705
754
|
return m;
|
|
706
755
|
})();
|
|
707
756
|
const events = opts.events || new CodingEvents();
|
|
757
|
+
const { PermissionService } = require('./coding/permission-service');
|
|
758
|
+
const permissionService = opts.permissionService || new PermissionService({
|
|
759
|
+
events,
|
|
760
|
+
timeoutMs: opts.permissionTimeoutMs,
|
|
761
|
+
headlessPolicy: opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject'),
|
|
762
|
+
});
|
|
763
|
+
const { AgentCatalog } = require('./coding/agent-catalog');
|
|
764
|
+
const agentCatalog = opts.agentCatalog || new AgentCatalog({ projectRoot: resolvedCwd });
|
|
765
|
+
const defaultTaskRunner = async ({ prompt: childPrompt, agent, taskId, ctx }) => {
|
|
766
|
+
const childTools = agentCatalog.toolsForAgent(requestedTools, agent);
|
|
767
|
+
const childResult = await runAgentLoop(childPrompt, {
|
|
768
|
+
cwd: ctx.cwd || resolvedCwd,
|
|
769
|
+
timeoutMs: agent.timeoutMs || agent.timeout_ms || opts.subtaskTimeoutMs || Math.min(timeout, 300000),
|
|
770
|
+
maxTurns: agent.maxSteps || agent.max_steps || opts.childMaxTurns || Math.min(turns, 10),
|
|
771
|
+
provider,
|
|
772
|
+
model: agent.model || model,
|
|
773
|
+
tools: childTools,
|
|
774
|
+
mode: agent.mode || opts.mode,
|
|
775
|
+
onProgress,
|
|
776
|
+
transcript,
|
|
777
|
+
events,
|
|
778
|
+
permissionService,
|
|
779
|
+
interactive: opts.interactive,
|
|
780
|
+
headless: opts.headless,
|
|
781
|
+
benchmark: opts.benchmark,
|
|
782
|
+
headlessPolicy: opts.headlessPolicy,
|
|
783
|
+
_resumeSessionId: taskId,
|
|
784
|
+
enableTaskTool: false,
|
|
785
|
+
});
|
|
786
|
+
return {
|
|
787
|
+
success: childResult.success,
|
|
788
|
+
output: childResult.output,
|
|
789
|
+
stderr: childResult.stderr,
|
|
790
|
+
exitCode: childResult.exitCode,
|
|
791
|
+
sessionId: childResult.sessionId,
|
|
792
|
+
usage: childResult.usage,
|
|
793
|
+
};
|
|
794
|
+
};
|
|
795
|
+
const { ToolRegistry } = require('./coding/tool-registry');
|
|
796
|
+
const toolRegistry = opts.toolRegistry || new ToolRegistry({
|
|
797
|
+
builtinTools: requestedTools,
|
|
798
|
+
middleware: mw,
|
|
799
|
+
localTools: getLocalTools(),
|
|
800
|
+
mcpClient: opts.mcpClient,
|
|
801
|
+
skillRunner: opts.skillRunner,
|
|
802
|
+
agentCatalog,
|
|
803
|
+
taskRunner: opts.taskRunner || defaultTaskRunner,
|
|
804
|
+
brain: opts.brain || null,
|
|
805
|
+
transcript,
|
|
806
|
+
projectRoot: resolvedCwd,
|
|
807
|
+
enableTaskTool: runtimeMode.canDelegate && opts.enableTaskTool !== false,
|
|
808
|
+
});
|
|
708
809
|
|
|
709
810
|
// ── Interactive Questions (B1) ──
|
|
710
811
|
// Inspired by OpenCode Question service (packages/opencode/src/question/index.ts)
|
|
@@ -712,6 +813,161 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
712
813
|
|
|
713
814
|
// projectInfo already detected above (before system prompt)
|
|
714
815
|
|
|
816
|
+
// Stream-native runtime: model deltas, tool states, snapshots, permissions,
|
|
817
|
+
// and step boundaries are persisted as typed transcript parts while the loop
|
|
818
|
+
// runs. Set WALLE_CODING_STREAM_PROCESSOR=0 or useStreamProcessor:false to
|
|
819
|
+
// fall back to the legacy whole-response loop.
|
|
820
|
+
if (shouldUseStreamProcessor(opts)) {
|
|
821
|
+
const { StreamProcessor } = require('./coding/stream-processor');
|
|
822
|
+
const { SnapshotService } = require('./coding/snapshot-service');
|
|
823
|
+
const processor = new StreamProcessor({
|
|
824
|
+
provider: llm,
|
|
825
|
+
model: modelId,
|
|
826
|
+
transcript,
|
|
827
|
+
snapshotService: opts.snapshotService || new SnapshotService({ cwd: resolvedCwd }),
|
|
828
|
+
permissionService,
|
|
829
|
+
toolExecutor: async (call) => {
|
|
830
|
+
const input = { ...(call.input || {}) };
|
|
831
|
+
if (['read_file', 'write_file', 'edit_file'].includes(call.name)) {
|
|
832
|
+
if (input.file_path && !path.isAbsolute(input.file_path)) input.file_path = path.join(resolvedCwd, input.file_path);
|
|
833
|
+
if (!input.file_path && input.path) input.file_path = path.isAbsolute(input.path) ? input.path : path.join(resolvedCwd, input.path);
|
|
834
|
+
}
|
|
835
|
+
if (call.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
|
|
836
|
+
input.directory = path.join(resolvedCwd, input.directory);
|
|
837
|
+
}
|
|
838
|
+
input.sessionId = sid;
|
|
839
|
+
input.projectRoot = resolvedCwd;
|
|
840
|
+
return toolRegistry.execute(call.name, input, { sessionId: sid, cwd: resolvedCwd, model: modelId, provider: llm.type });
|
|
841
|
+
},
|
|
842
|
+
});
|
|
843
|
+
processor.on('event', (evt) => emitProgress({
|
|
844
|
+
phase: opts.mode || 'executing',
|
|
845
|
+
step: 0,
|
|
846
|
+
message: evt.type,
|
|
847
|
+
detail: evt,
|
|
848
|
+
}));
|
|
849
|
+
|
|
850
|
+
let streamStatus = 'finished';
|
|
851
|
+
let streamStopReason = '';
|
|
852
|
+
let streamModel = modelId;
|
|
853
|
+
const streamErrors = [];
|
|
854
|
+
for (let turnIndex = opts._resumeTurn || 0; turnIndex < turns; turnIndex++) {
|
|
855
|
+
const remaining = deadline - Date.now();
|
|
856
|
+
if (remaining <= 0) {
|
|
857
|
+
streamStatus = 'error';
|
|
858
|
+
streamErrors.push('Timeout reached');
|
|
859
|
+
finalOutput += '\n[Timeout reached]';
|
|
860
|
+
break;
|
|
861
|
+
}
|
|
862
|
+
emitProgress({
|
|
863
|
+
phase: opts.mode || 'executing',
|
|
864
|
+
step: turnIndex,
|
|
865
|
+
message: `Stream turn ${turnIndex + 1}/${turns}...`,
|
|
866
|
+
});
|
|
867
|
+
const perTurnCap = opts.perTurnTimeoutMs || (/ollama|mlx/.test(llm.type || '') ? 600000 : 300000);
|
|
868
|
+
const ac = new AbortController();
|
|
869
|
+
const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
|
|
870
|
+
let turn;
|
|
871
|
+
try {
|
|
872
|
+
const toolsForTurn = turnIndex >= turns - 1
|
|
873
|
+
? []
|
|
874
|
+
: await toolRegistry.getDefinitions({
|
|
875
|
+
provider: llm.type,
|
|
876
|
+
model: modelId,
|
|
877
|
+
mode: opts.mode,
|
|
878
|
+
runtimeMode: runtimeMode.id,
|
|
879
|
+
cwd: resolvedCwd,
|
|
880
|
+
});
|
|
881
|
+
turn = await processor.runTurn({
|
|
882
|
+
sessionId: sid,
|
|
883
|
+
cwd: resolvedCwd,
|
|
884
|
+
system: systemPrompt,
|
|
885
|
+
messages,
|
|
886
|
+
tools: toolsForTurn,
|
|
887
|
+
maxTokens: taskFileHints.length >= 4 ? 8192 : 4096,
|
|
888
|
+
signal: ac.signal,
|
|
889
|
+
});
|
|
890
|
+
} finally {
|
|
891
|
+
clearTimeout(timer);
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
totalInput += turn.usage?.input || 0;
|
|
895
|
+
totalOutput += turn.usage?.output || 0;
|
|
896
|
+
streamStatus = turn.status;
|
|
897
|
+
streamStopReason = turn.stopReason || streamStopReason;
|
|
898
|
+
streamModel = turn.model || streamModel;
|
|
899
|
+
if (turn.errors?.length) streamErrors.push(...turn.errors);
|
|
900
|
+
if (turn.text) finalOutput += turn.text;
|
|
901
|
+
const streamToolCalls = (turn.toolCalls || []).map(tc => ({ name: tc.name, input: tc.input }));
|
|
902
|
+
toolCallHistory.push(...streamToolCalls.map(tc => ({
|
|
903
|
+
name: tc.name,
|
|
904
|
+
inputHash: JSON.stringify(tc.input || {}).slice(0, 500),
|
|
905
|
+
})));
|
|
906
|
+
log.push({
|
|
907
|
+
turn: turnIndex,
|
|
908
|
+
model: turn.model || modelId,
|
|
909
|
+
provider: turn.provider || llm.type,
|
|
910
|
+
toolCalls: streamToolCalls,
|
|
911
|
+
content: turn.text,
|
|
912
|
+
stopReason: turn.stopReason,
|
|
913
|
+
});
|
|
914
|
+
|
|
915
|
+
if (turn.status === 'error') break;
|
|
916
|
+
if ((turn.toolCalls || []).length === 0) {
|
|
917
|
+
const editTools = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
|
|
918
|
+
const madeEdits = toolCallHistory.some(c => editTools.has(c.name));
|
|
919
|
+
const canFinishWithoutEdit = isLegitimateNoEditResponse(turn.text, toolCallHistory);
|
|
920
|
+
if (!madeEdits && !canFinishWithoutEdit && turnIndex < turns - 1 && turnIndex > 1 && !log._noEditNudged) {
|
|
921
|
+
log._noEditNudged = true;
|
|
922
|
+
const nudge = `[SYSTEM] You have not made any code changes yet. ` +
|
|
923
|
+
`The task requires you to use edit_file or write_file to modify code. ` +
|
|
924
|
+
`Do NOT just describe the fix — actually call edit_file with the old_string and new_string to make the change. ` +
|
|
925
|
+
`Here is the working directory: ${resolvedCwd}`;
|
|
926
|
+
if (turn.assistantMessage) messages.push(turn.assistantMessage);
|
|
927
|
+
messages.push({ role: 'user', content: nudge });
|
|
928
|
+
emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'No-edit guard: nudging agent to make changes' });
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
if (turn.assistantMessage) messages.push(turn.assistantMessage);
|
|
933
|
+
if (turn.toolResultMessage) messages.push(turn.toolResultMessage);
|
|
934
|
+
if (turn.next !== 'continue') break;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
if (streamStatus === 'error' && transcript?.appendPart) {
|
|
938
|
+
transcript.appendPart({
|
|
939
|
+
sessionId: sid,
|
|
940
|
+
cwd: resolvedCwd,
|
|
941
|
+
partType: 'error',
|
|
942
|
+
data: { errors: streamErrors },
|
|
943
|
+
});
|
|
944
|
+
}
|
|
945
|
+
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
946
|
+
transcript.appendAssistantMessage(finalOutput, {
|
|
947
|
+
sessionId: sid,
|
|
948
|
+
cwd: resolvedCwd,
|
|
949
|
+
chatSessionId: opts.chatSessionId || '',
|
|
950
|
+
model: streamModel,
|
|
951
|
+
provider: llm.type || '',
|
|
952
|
+
tokens: { input: totalInput, output: totalOutput },
|
|
953
|
+
});
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
return {
|
|
957
|
+
success: streamStatus !== 'error',
|
|
958
|
+
output: finalOutput,
|
|
959
|
+
stderr: streamErrors.join('\n'),
|
|
960
|
+
sessionId: sid,
|
|
961
|
+
exitCode: streamStatus === 'error' ? -1 : 0,
|
|
962
|
+
log,
|
|
963
|
+
usage: { input: totalInput, output: totalOutput },
|
|
964
|
+
provider: llm.type,
|
|
965
|
+
model: streamModel,
|
|
966
|
+
next: 'stop',
|
|
967
|
+
runtimeMode: runtimeMode.id,
|
|
968
|
+
};
|
|
969
|
+
}
|
|
970
|
+
|
|
715
971
|
// ── Bridge: event bus → middleware (A2) ──
|
|
716
972
|
// When the event bus fires, propagate to middleware's onEvent hook so
|
|
717
973
|
// registered middleware can react to file edits, reads, and context overflow.
|
|
@@ -818,10 +1074,10 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
818
1074
|
|
|
819
1075
|
// Middleware: prepare LLM call
|
|
820
1076
|
const llmCtx = { params: { maxTokens: taskFileHints.length >= 4 ? 8192 : 4096 }, system: systemPrompt, cwd: resolvedCwd,
|
|
821
|
-
provider: llm.type, model: modelId, mode: opts.mode, claudeMd: opts.claudeMd, log: {} };
|
|
1077
|
+
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {} };
|
|
822
1078
|
llmCtxRef.current = llmCtx; // expose to event bridge (A2)
|
|
823
1079
|
await mw.run('llm.before', llmCtx);
|
|
824
|
-
let adaptedTools = await
|
|
1080
|
+
let adaptedTools = await toolRegistry.getDefinitions(llmCtx);
|
|
825
1081
|
|
|
826
1082
|
// Context compaction (6b) -- prune old tool results when approaching context limit
|
|
827
1083
|
const estimateTokens = (msgs) => {
|
|
@@ -1034,26 +1290,24 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1034
1290
|
if (tc.name === 'run_shell') {
|
|
1035
1291
|
input.timeout_ms = input.timeout_ms || 30000;
|
|
1036
1292
|
input.cwd = input.cwd || resolvedCwd;
|
|
1293
|
+
}
|
|
1037
1294
|
|
|
1038
|
-
|
|
1039
|
-
const
|
|
1040
|
-
await initShellParser();
|
|
1041
|
-
const analysis = await analyzeShellCommand(input.command, input.cwd || resolvedCwd);
|
|
1042
|
-
const { checkPermission } = require('./tools/permission-checker');
|
|
1043
|
-
const permResult = await checkPermission({
|
|
1044
|
-
tool: 'run_shell',
|
|
1045
|
-
command: input.command,
|
|
1046
|
-
commandTokens: analysis.commandTokens.length > 0 ? analysis.commandTokens[0] : undefined,
|
|
1047
|
-
projectPath: resolvedCwd,
|
|
1295
|
+
if (['run_shell', 'read_file', 'write_file', 'edit_file', 'apply_patch', 'multi_edit', 'applescript', 'claude_code', 'mail_send', 'slack_send_message'].includes(tc.name)) {
|
|
1296
|
+
const permResult = await permissionService.authorize({
|
|
1048
1297
|
sessionId: sid,
|
|
1298
|
+
tool: tc.name,
|
|
1299
|
+
input,
|
|
1300
|
+
cwd: input.cwd || resolvedCwd,
|
|
1301
|
+
projectRoot: resolvedCwd,
|
|
1049
1302
|
mode: opts.mode,
|
|
1303
|
+
headless: Boolean(opts.headless || opts.benchmark),
|
|
1304
|
+
metadata: { toolCallId: tc.id || tc.toolCallId || '' },
|
|
1050
1305
|
});
|
|
1051
|
-
if (permResult.decision
|
|
1052
|
-
result = { error: `Permission denied: ${permResult.reason}` };
|
|
1306
|
+
if (permResult.decision !== 'allow') {
|
|
1307
|
+
result = { error: `Permission denied: ${permResult.reason || permResult.message || permResult.decision}` };
|
|
1053
1308
|
turnHadError = true;
|
|
1054
1309
|
throw new Error('path_blocked');
|
|
1055
1310
|
}
|
|
1056
|
-
// Note: 'ask' decisions would need CTM approval flow — for now, log and allow
|
|
1057
1311
|
}
|
|
1058
1312
|
|
|
1059
1313
|
// Middleware: before tool
|
|
@@ -1087,8 +1341,13 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1087
1341
|
}
|
|
1088
1342
|
}
|
|
1089
1343
|
} else {
|
|
1090
|
-
result = await
|
|
1091
|
-
|
|
1344
|
+
result = await toolRegistry.execute(tc.name, finalInput, {
|
|
1345
|
+
sessionId: sid,
|
|
1346
|
+
cwd: resolvedCwd,
|
|
1347
|
+
model: modelId,
|
|
1348
|
+
provider: llm.type,
|
|
1349
|
+
llmCtx,
|
|
1350
|
+
});
|
|
1092
1351
|
}
|
|
1093
1352
|
|
|
1094
1353
|
// Middleware: after tool
|
|
@@ -1204,6 +1463,24 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1204
1463
|
|
|
1205
1464
|
// Save error checkpoint for crash recovery
|
|
1206
1465
|
saveCheckpointToBrain(sid, Math.max(0, log.length - 1), messages, opts, totalInput, totalOutput, resolvedCwd, 'error');
|
|
1466
|
+
if (transcript?.appendPart) {
|
|
1467
|
+
transcript.appendPart({
|
|
1468
|
+
sessionId: sid,
|
|
1469
|
+
cwd: resolvedCwd,
|
|
1470
|
+
partType: 'error',
|
|
1471
|
+
data: { message: err.message },
|
|
1472
|
+
});
|
|
1473
|
+
}
|
|
1474
|
+
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
1475
|
+
transcript.appendAssistantMessage(finalOutput, {
|
|
1476
|
+
sessionId: sid,
|
|
1477
|
+
cwd: resolvedCwd,
|
|
1478
|
+
chatSessionId: opts.chatSessionId || '',
|
|
1479
|
+
model: modelId,
|
|
1480
|
+
provider: llm?.type || '',
|
|
1481
|
+
tokens: { input: totalInput, output: totalOutput },
|
|
1482
|
+
});
|
|
1483
|
+
}
|
|
1207
1484
|
|
|
1208
1485
|
// Graceful cleanup (6r)
|
|
1209
1486
|
const cleanup = {
|
|
@@ -1252,6 +1529,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1252
1529
|
usage: { input: totalInput, output: totalOutput },
|
|
1253
1530
|
provider: llm?.type,
|
|
1254
1531
|
model: modelId,
|
|
1532
|
+
runtimeMode: runtimeMode.id,
|
|
1255
1533
|
turnCosts,
|
|
1256
1534
|
budgetUsed,
|
|
1257
1535
|
cleanup,
|
|
@@ -1279,6 +1557,16 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1279
1557
|
|
|
1280
1558
|
// Persist activity completion (Phase 2: Activity History)
|
|
1281
1559
|
try { getActivityLog().log({ session_id: sid, type: 'coding_complete', title: 'Coding session completed', body: finalOutput.slice(0, 500), detail: JSON.stringify({ turns: log.length, tokens: totalInput + totalOutput }) }); } catch {}
|
|
1560
|
+
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
1561
|
+
transcript.appendAssistantMessage(finalOutput, {
|
|
1562
|
+
sessionId: sid,
|
|
1563
|
+
cwd: resolvedCwd,
|
|
1564
|
+
chatSessionId: opts.chatSessionId || '',
|
|
1565
|
+
model: modelId,
|
|
1566
|
+
provider: llm?.type || '',
|
|
1567
|
+
tokens: { input: totalInput, output: totalOutput },
|
|
1568
|
+
});
|
|
1569
|
+
}
|
|
1282
1570
|
|
|
1283
1571
|
// Save training log
|
|
1284
1572
|
try {
|
|
@@ -1298,6 +1586,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1298
1586
|
usage: { input: totalInput, output: totalOutput },
|
|
1299
1587
|
provider: llm?.type,
|
|
1300
1588
|
model: modelId,
|
|
1589
|
+
runtimeMode: runtimeMode.id,
|
|
1301
1590
|
turnCosts,
|
|
1302
1591
|
budgetUsed,
|
|
1303
1592
|
};
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const { buildPromptBundle, loadPromptText } = require('./coding/prompt-bundle');
|
|
4
|
+
const { buildPromptEnvelope, renderPromptEnvelope } = require('./coding/prompt-runtime');
|
|
5
|
+
const { runtimeModeInstructions } = require('./coding/runtime-mode');
|
|
6
|
+
|
|
3
7
|
/**
|
|
4
8
|
* Coding-agent system + subtask prompt builders.
|
|
5
9
|
*
|
|
@@ -51,7 +55,17 @@ function extractTaskFileHints(prompt, { limit = 24 } = {}) {
|
|
|
51
55
|
return [...matches];
|
|
52
56
|
}
|
|
53
57
|
|
|
54
|
-
function
|
|
58
|
+
function loadMemoryProtocolBlock({ available = true } = {}) {
|
|
59
|
+
if (!available) {
|
|
60
|
+
return `# Session Memory
|
|
61
|
+
|
|
62
|
+
Wall-E session memory tools are unavailable in this run. Do not claim you searched prior sessions or wrote a diary entry unless a tool result proves it. If past-session context is needed, say what information is missing and continue from the local codebase.`;
|
|
63
|
+
}
|
|
64
|
+
const protocol = loadPromptText('memory-protocol.md');
|
|
65
|
+
return protocol ? `# Session Memory\n\n${protocol}` : '';
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function buildAgentSystemPrompt({ resolvedCwd, projectInfo, projectSkills, taskFileHints, mode, provider, model, runtimeMode, extraInstructions, returnEnvelope = false, runtimeContext = {} } = {}) {
|
|
55
69
|
const projectCtx = projectInfo
|
|
56
70
|
? `\nProject: ${projectInfo.language || 'unknown'}${projectInfo.framework ? ` (${projectInfo.framework})` : ''}${projectInfo.vcs ? `, vcs: ${projectInfo.vcs}` : ''}`
|
|
57
71
|
: '';
|
|
@@ -63,12 +77,20 @@ function buildAgentSystemPrompt({ resolvedCwd, projectInfo, projectSkills, taskF
|
|
|
63
77
|
? `\nLarge task file hints (${taskFileHints.length}): ${taskFileHints.join(', ')}`
|
|
64
78
|
: '';
|
|
65
79
|
|
|
66
|
-
|
|
80
|
+
const runtimeCtx = runtimeModeInstructions(runtimeMode);
|
|
81
|
+
const memoryToolsAvailable = runtimeContext.memoryToolsAvailable !== false;
|
|
82
|
+
const memoryProtocolCtx = loadMemoryProtocolBlock({ available: memoryToolsAvailable });
|
|
83
|
+
|
|
84
|
+
const body = `You are an expert software engineer executing a coding task. Use the provided tools to actually do the work — describing what to do is not completing the task.${projectCtx}${projectSkillCtx}
|
|
67
85
|
|
|
68
86
|
Working directory: ${resolvedCwd || '(not set)'}
|
|
69
87
|
${largeTaskCtx}
|
|
70
88
|
|
|
71
|
-
#
|
|
89
|
+
# Runtime role
|
|
90
|
+
|
|
91
|
+
${runtimeCtx}
|
|
92
|
+
|
|
93
|
+
${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}# Workflow
|
|
72
94
|
|
|
73
95
|
1. Explore first. Use read_file / list_directory / lsp_symbols to learn the relevant code BEFORE editing it. Reading three files cheaply beats one wrong edit.
|
|
74
96
|
2. When acting on a non-trivial change, call update_todos to lay out steps, then mark each completed as you go. This keeps long sessions on track.
|
|
@@ -104,6 +126,22 @@ ${largeTaskCtx}
|
|
|
104
126
|
- If the task is ambiguous, make a best-guess interpretation and note it explicitly: "I interpreted X as Y; tell me if you meant Z." Don't refuse and ask — answer + clarify.
|
|
105
127
|
- If you genuinely cannot complete the task, say what you tried, what you ruled out, and the narrowed-down problem. That's a useful failure; "I couldn't" alone is not.
|
|
106
128
|
- Use ask_user only when the answer is genuinely undeterminable from the codebase + reasonable defaults. Most "ambiguous" tasks aren't — pick the obvious interpretation and proceed.`;
|
|
129
|
+
|
|
130
|
+
const bundle = buildPromptBundle({ body, mode, provider, model, extraInstructions, returnEnvelope: true });
|
|
131
|
+
const envelope = buildPromptEnvelope({
|
|
132
|
+
stablePolicy: bundle.sections.find(section => section.id === 'stable-policy')?.content || '',
|
|
133
|
+
toolPolicy: bundle.sections.find(section => section.id === 'tool-policy')?.content || '',
|
|
134
|
+
providerContribution: runtimeContext.providerContribution || '',
|
|
135
|
+
channelContext: runtimeContext.channelContext || '',
|
|
136
|
+
parentRunContext: runtimeContext.parentRunContext || '',
|
|
137
|
+
childScope: runtimeContext.childScope || '',
|
|
138
|
+
runtimeOnlyContext: runtimeContext.runtimeOnlyContext || '',
|
|
139
|
+
nextTurnContext: runtimeContext.nextTurnContext || '',
|
|
140
|
+
userTask: runtimeContext.userTask || '',
|
|
141
|
+
sections: bundle.sections.filter(section => !['stable-policy', 'tool-policy'].includes(section.id)),
|
|
142
|
+
metadata: { mode, provider, model, resolvedCwd, memoryToolsAvailable },
|
|
143
|
+
});
|
|
144
|
+
return returnEnvelope ? envelope : renderPromptEnvelope(envelope);
|
|
107
145
|
}
|
|
108
146
|
|
|
109
147
|
/**
|
|
@@ -217,7 +255,10 @@ ${subtask.prompt}`);
|
|
|
217
255
|
|
|
218
256
|
module.exports = {
|
|
219
257
|
extractTaskFileHints,
|
|
258
|
+
loadMemoryProtocolBlock,
|
|
220
259
|
buildAgentSystemPrompt,
|
|
221
260
|
buildPlanningPrompt,
|
|
222
261
|
buildSubtaskPrompt,
|
|
262
|
+
buildPromptEnvelope,
|
|
263
|
+
renderPromptEnvelope,
|
|
223
264
|
};
|
|
@@ -515,7 +515,31 @@ function expandWithNeighbors(memory, radius = 1) {
|
|
|
515
515
|
} catch {}
|
|
516
516
|
}
|
|
517
517
|
|
|
518
|
-
// Strategy 2:
|
|
518
|
+
// Strategy 2: Source-adapter sessions — fetch adjacent records with the
|
|
519
|
+
// same stable source/session id. This keeps long coding-session snippets
|
|
520
|
+
// coherent even when individual memory rows are message-sized.
|
|
521
|
+
const stableSourceId = sourceIdForMemory(memory);
|
|
522
|
+
if (stableSourceId) {
|
|
523
|
+
try {
|
|
524
|
+
const db = brain.getDb();
|
|
525
|
+
const siblings = db.prepare(`
|
|
526
|
+
SELECT id, source_id, content, timestamp FROM memories
|
|
527
|
+
WHERE source = ? AND (source_id = ? OR source_id LIKE ?)
|
|
528
|
+
ORDER BY timestamp ASC, source_id ASC
|
|
529
|
+
LIMIT 200
|
|
530
|
+
`).all(memory.source, stableSourceId, `${stableSourceId}:%`);
|
|
531
|
+
if (siblings.length > 1) {
|
|
532
|
+
const idx = siblings.findIndex(s => s.id === memory.id);
|
|
533
|
+
if (idx >= 0) {
|
|
534
|
+
const start = Math.max(0, idx - radius);
|
|
535
|
+
const end = Math.min(siblings.length, idx + radius + 1);
|
|
536
|
+
return capNeighborContent(siblings.slice(start, end).map(s => s.content).join('\n---\n'));
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
} catch {}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Strategy 3: Non-chunked memories — fetch time-adjacent from same source
|
|
519
543
|
if (memory.source && memory.timestamp) {
|
|
520
544
|
try {
|
|
521
545
|
const db = brain.getDb();
|
|
@@ -535,6 +559,24 @@ function expandWithNeighbors(memory, radius = 1) {
|
|
|
535
559
|
return memory.content;
|
|
536
560
|
}
|
|
537
561
|
|
|
562
|
+
function sourceIdForMemory(memory) {
|
|
563
|
+
try {
|
|
564
|
+
const metadata = JSON.parse(memory.metadata || '{}');
|
|
565
|
+
if (metadata?.sourceId) return metadata.sourceId;
|
|
566
|
+
} catch {}
|
|
567
|
+
const sourceId = String(memory.source_id || '');
|
|
568
|
+
const parts = sourceId.split(':');
|
|
569
|
+
if (parts.length >= 3 && /-jsonl$/.test(memory.source || '')) {
|
|
570
|
+
return parts.slice(0, 2).join(':');
|
|
571
|
+
}
|
|
572
|
+
return '';
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
function capNeighborContent(content, max = 5000) {
|
|
576
|
+
const text = String(content || '');
|
|
577
|
+
return text.length > max ? `${text.slice(0, max - 30)}\n[neighbor context truncated]` : text;
|
|
578
|
+
}
|
|
579
|
+
|
|
538
580
|
/** G3: Essential Story — top high-confidence knowledge always loaded in core layer.
|
|
539
581
|
* Gives the AI "who you are" context from the first message. */
|
|
540
582
|
function buildEssentialStory(maxEntries = 15) {
|
|
@@ -25,7 +25,7 @@ const TOPIC_PATTERNS = {
|
|
|
25
25
|
},
|
|
26
26
|
technical: {
|
|
27
27
|
keywords: ['code', 'bug', 'deploy', 'api', 'database', 'server', 'git', 'pr', 'test'],
|
|
28
|
-
regex: /\b(code|code\s+review|review\s+(?:diff|patch|changes?|pr|pull\s+request)|bug|deploy|api|database|server|git|pr|pull\s+request|test|build|pipeline|docker|kubernetes|aws|gcp)\b/i,
|
|
28
|
+
regex: /\b(code|code\s+review|review\s+(?:(?:local|uncommitted|unstaged|staged|working\s+tree|workspace)\s+)?(?:diff|patch|changes?|pr|pull\s+request)|(?:check|show|run)\s+git\s+status|bug|deploy|api|database|server|git|pr|pull\s+request|test|build|pipeline|docker|kubernetes|aws|gcp)\b/i,
|
|
29
29
|
},
|
|
30
30
|
tools: {
|
|
31
31
|
keywords: ['tool', 'mcp', 'skill', 'fetch', 'execute', 'automation', 'disk', 'storage'],
|