create-walle 0.9.26 → 0.9.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -167,7 +167,7 @@ function _applyPersistedBackupDirSetting() {
167
167
  }
168
168
 
169
169
  // --- Schema versioning via PRAGMA user_version ---
170
- const SCHEMA_VERSION = 25; // Bump on every migration addition
170
+ const SCHEMA_VERSION = 26; // Bump on every migration addition
171
171
 
172
172
  const MIGRATIONS = {
173
173
  1: (d) => {
@@ -716,6 +716,34 @@ const MIGRATIONS = {
716
716
  ON model_routing_policy_routes(provider_type, model_id);
717
717
  `);
718
718
  },
719
+ 26: (d) => {
720
+ d.exec(`
721
+ CREATE TABLE IF NOT EXISTS runtime_work_items (
722
+ id TEXT PRIMARY KEY,
723
+ kind TEXT NOT NULL,
724
+ lane INTEGER NOT NULL DEFAULT 2,
725
+ priority INTEGER NOT NULL DEFAULT 5,
726
+ state TEXT NOT NULL DEFAULT 'pending',
727
+ payload_json TEXT NOT NULL DEFAULT '{}',
728
+ cursor_json TEXT NOT NULL DEFAULT '{}',
729
+ attempts INTEGER NOT NULL DEFAULT 0,
730
+ max_attempts INTEGER NOT NULL DEFAULT 5,
731
+ not_before INTEGER,
732
+ lease_owner TEXT,
733
+ lease_expires_at INTEGER,
734
+ last_error TEXT,
735
+ created_at INTEGER NOT NULL,
736
+ updated_at INTEGER NOT NULL,
737
+ completed_at INTEGER
738
+ );
739
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_ready
740
+ ON runtime_work_items(state, lane, priority, not_before, created_at);
741
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_kind
742
+ ON runtime_work_items(kind, state, updated_at);
743
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_lease
744
+ ON runtime_work_items(lease_expires_at);
745
+ `);
746
+ },
719
747
  };
720
748
 
721
749
  // Schema invariants — columns/tables that MUST exist after the named migration.
@@ -741,6 +769,7 @@ const SCHEMA_INVARIANTS = [
741
769
  { migration: 24, table: 'chat_message_parts', column: 'part_type' },
742
770
  { migration: 25, table: 'model_routing_policies', column: 'id' },
743
771
  { migration: 25, table: 'model_routing_policy_routes', column: 'policy_id' },
772
+ { migration: 26, table: 'runtime_work_items', column: 'kind' },
744
773
  ];
745
774
 
746
775
  function _columnExists(d, table, column) {
@@ -2871,6 +2900,226 @@ function deleteSchedulerJobState(job_name) {
2871
2900
  getDb().prepare('DELETE FROM scheduler_job_state WHERE job_name = ?').run(job_name);
2872
2901
  }
2873
2902
 
2903
+ // -- Runtime work items (Migration 26) --
2904
+
2905
+ function _safeParseJson(value, fallback) {
2906
+ if (value == null || value === '') return fallback;
2907
+ try {
2908
+ const parsed = JSON.parse(value);
2909
+ return parsed == null ? fallback : parsed;
2910
+ } catch {
2911
+ return fallback;
2912
+ }
2913
+ }
2914
+
2915
+ function _runtimeWorkItemRowToPublic(row) {
2916
+ if (!row) return null;
2917
+ return {
2918
+ ...row,
2919
+ payload: _safeParseJson(row.payload_json, {}),
2920
+ cursor: _safeParseJson(row.cursor_json, {}),
2921
+ };
2922
+ }
2923
+
2924
+ function enqueueRuntimeWorkItem({
2925
+ id = uuidv4(),
2926
+ kind,
2927
+ lane = 2,
2928
+ priority = 5,
2929
+ state = 'pending',
2930
+ payload = {},
2931
+ cursor = {},
2932
+ max_attempts = 5,
2933
+ not_before = null,
2934
+ } = {}) {
2935
+ if (!kind) throw new Error('enqueueRuntimeWorkItem requires kind');
2936
+ const now = Date.now();
2937
+ getDb().prepare(`
2938
+ INSERT INTO runtime_work_items (
2939
+ id, kind, lane, priority, state, payload_json, cursor_json,
2940
+ attempts, max_attempts, not_before, created_at, updated_at
2941
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, ?)
2942
+ ON CONFLICT(id) DO UPDATE SET
2943
+ kind = excluded.kind,
2944
+ lane = excluded.lane,
2945
+ priority = excluded.priority,
2946
+ state = excluded.state,
2947
+ payload_json = excluded.payload_json,
2948
+ cursor_json = excluded.cursor_json,
2949
+ attempts = 0,
2950
+ max_attempts = excluded.max_attempts,
2951
+ not_before = excluded.not_before,
2952
+ lease_owner = NULL,
2953
+ lease_expires_at = NULL,
2954
+ last_error = NULL,
2955
+ completed_at = NULL,
2956
+ updated_at = excluded.updated_at
2957
+ `).run(
2958
+ id,
2959
+ String(kind),
2960
+ Math.max(0, Math.trunc(Number(lane) || 0)),
2961
+ Math.max(0, Math.trunc(Number(priority) || 0)),
2962
+ String(state || 'pending'),
2963
+ JSON.stringify(payload || {}),
2964
+ JSON.stringify(cursor || {}),
2965
+ Math.max(1, Math.trunc(Number(max_attempts) || 5)),
2966
+ Number.isFinite(Number(not_before)) ? Math.trunc(Number(not_before)) : null,
2967
+ now,
2968
+ now
2969
+ );
2970
+ return getRuntimeWorkItem(id);
2971
+ }
2972
+
2973
+ function getRuntimeWorkItem(id) {
2974
+ return _runtimeWorkItemRowToPublic(
2975
+ getDb().prepare('SELECT * FROM runtime_work_items WHERE id = ?').get(id)
2976
+ );
2977
+ }
2978
+
2979
+ function leaseRuntimeWorkItems({
2980
+ limit = 1,
2981
+ lanes = null,
2982
+ leaseOwner = 'wall-e',
2983
+ leaseMs = 60000,
2984
+ now = Date.now(),
2985
+ } = {}) {
2986
+ const cap = Math.max(1, Math.min(100, Math.trunc(Number(limit) || 1)));
2987
+ const laneValues = Array.isArray(lanes)
2988
+ ? lanes.map((lane) => Math.trunc(Number(lane))).filter((lane) => Number.isFinite(lane))
2989
+ : [];
2990
+ const params = [now, now];
2991
+ let laneClause = '';
2992
+ if (laneValues.length > 0) {
2993
+ laneClause = ` AND lane IN (${laneValues.map(() => '?').join(',')})`;
2994
+ params.push(...laneValues);
2995
+ }
2996
+ params.push(cap);
2997
+ const rows = getDb().prepare(`
2998
+ SELECT *
2999
+ FROM runtime_work_items
3000
+ WHERE
3001
+ (
3002
+ state = 'pending'
3003
+ OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
3004
+ )
3005
+ AND (not_before IS NULL OR not_before <= ?)
3006
+ AND attempts < max_attempts
3007
+ ${laneClause}
3008
+ ORDER BY lane ASC, priority ASC, COALESCE(not_before, 0) ASC, created_at ASC
3009
+ LIMIT ?
3010
+ `).all(...params);
3011
+ const leaseExpiresAt = now + Math.max(1000, Math.trunc(Number(leaseMs) || 60000));
3012
+ const tx = getDb().transaction((items) => {
3013
+ const leased = [];
3014
+ const stmt = getDb().prepare(`
3015
+ UPDATE runtime_work_items
3016
+ SET state = 'leased',
3017
+ lease_owner = ?,
3018
+ lease_expires_at = ?,
3019
+ attempts = attempts + 1,
3020
+ updated_at = ?
3021
+ WHERE id = ?
3022
+ AND (
3023
+ state = 'pending'
3024
+ OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
3025
+ )
3026
+ AND (not_before IS NULL OR not_before <= ?)
3027
+ AND attempts < max_attempts
3028
+ `);
3029
+ for (const row of items) {
3030
+ const result = stmt.run(String(leaseOwner || 'wall-e'), leaseExpiresAt, now, row.id, now, now);
3031
+ if (result.changes > 0) leased.push(getRuntimeWorkItem(row.id));
3032
+ }
3033
+ return leased;
3034
+ });
3035
+ return tx(rows);
3036
+ }
3037
+
3038
+ function completeRuntimeWorkItem(id, { cursor = null } = {}) {
3039
+ const now = Date.now();
3040
+ const cursorJson = cursor == null ? null : JSON.stringify(cursor);
3041
+ const stmt = cursorJson == null
3042
+ ? getDb().prepare(`
3043
+ UPDATE runtime_work_items
3044
+ SET state = 'completed',
3045
+ lease_owner = NULL,
3046
+ lease_expires_at = NULL,
3047
+ completed_at = ?,
3048
+ updated_at = ?
3049
+ WHERE id = ?
3050
+ `)
3051
+ : getDb().prepare(`
3052
+ UPDATE runtime_work_items
3053
+ SET state = 'completed',
3054
+ cursor_json = ?,
3055
+ lease_owner = NULL,
3056
+ lease_expires_at = NULL,
3057
+ completed_at = ?,
3058
+ updated_at = ?
3059
+ WHERE id = ?
3060
+ `);
3061
+ if (cursorJson == null) stmt.run(now, now, id);
3062
+ else stmt.run(cursorJson, now, now, id);
3063
+ return getRuntimeWorkItem(id);
3064
+ }
3065
+
3066
+ function failRuntimeWorkItem(id, { error = null, retryAfterMs = 60000 } = {}) {
3067
+ const row = getRuntimeWorkItem(id);
3068
+ if (!row) return null;
3069
+ const now = Date.now();
3070
+ const exhausted = Number(row.attempts || 0) >= Number(row.max_attempts || 0);
3071
+ getDb().prepare(`
3072
+ UPDATE runtime_work_items
3073
+ SET state = ?,
3074
+ not_before = ?,
3075
+ lease_owner = NULL,
3076
+ lease_expires_at = NULL,
3077
+ last_error = ?,
3078
+ updated_at = ?
3079
+ WHERE id = ?
3080
+ `).run(
3081
+ exhausted ? 'failed' : 'pending',
3082
+ exhausted ? null : now + Math.max(1000, Math.trunc(Number(retryAfterMs) || 60000)),
3083
+ String(error && (error.message || error.code) ? (error.message || error.code) : (error || '')).slice(0, 500),
3084
+ now,
3085
+ id
3086
+ );
3087
+ return getRuntimeWorkItem(id);
3088
+ }
3089
+
3090
+ function listRuntimeWorkItems({ state = null, limit = 50 } = {}) {
3091
+ const cap = Math.max(1, Math.min(500, Math.trunc(Number(limit) || 50)));
3092
+ if (state) {
3093
+ return getDb().prepare(`
3094
+ SELECT * FROM runtime_work_items
3095
+ WHERE state = ?
3096
+ ORDER BY lane ASC, priority ASC, updated_at DESC
3097
+ LIMIT ?
3098
+ `).all(String(state), cap).map(_runtimeWorkItemRowToPublic);
3099
+ }
3100
+ return getDb().prepare(`
3101
+ SELECT * FROM runtime_work_items
3102
+ ORDER BY state ASC, lane ASC, priority ASC, updated_at DESC
3103
+ LIMIT ?
3104
+ `).all(cap).map(_runtimeWorkItemRowToPublic);
3105
+ }
3106
+
3107
+ function summarizeRuntimeWorkItems() {
3108
+ const rows = getDb().prepare(`
3109
+ SELECT state, lane, COUNT(*) AS count
3110
+ FROM runtime_work_items
3111
+ GROUP BY state, lane
3112
+ ORDER BY state ASC, lane ASC
3113
+ `).all();
3114
+ const by_state = {};
3115
+ const by_lane = {};
3116
+ for (const row of rows) {
3117
+ by_state[row.state] = (by_state[row.state] || 0) + row.count;
3118
+ by_lane[row.lane] = (by_lane[row.lane] || 0) + row.count;
3119
+ }
3120
+ return { total: rows.reduce((sum, row) => sum + row.count, 0), by_state, by_lane, rows };
3121
+ }
3122
+
2874
3123
  // -- Backup --
2875
3124
 
2876
3125
  function _backupDirForCurrentDb() {
@@ -6953,6 +7202,14 @@ module.exports = {
6953
7202
  getSchedulerJobState,
6954
7203
  listSchedulerJobStates,
6955
7204
  deleteSchedulerJobState,
7205
+ // Runtime work items (Migration 26)
7206
+ enqueueRuntimeWorkItem,
7207
+ getRuntimeWorkItem,
7208
+ leaseRuntimeWorkItems,
7209
+ completeRuntimeWorkItem,
7210
+ failRuntimeWorkItem,
7211
+ listRuntimeWorkItems,
7212
+ summarizeRuntimeWorkItems,
6956
7213
  // Checkpoints
6957
7214
  upsertCheckpoint,
6958
7215
  getCheckpoint,
@@ -2231,6 +2231,34 @@ async function chat(message, opts = {}) {
2231
2231
  });
2232
2232
  }
2233
2233
 
2234
+ const promptStart = Date.now();
2235
+ let promptCapabilityContextBlock = '';
2236
+ let requestedSkillEvents = [];
2237
+ let hasExplicitRequestedSkill = false;
2238
+ try {
2239
+ const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
2240
+ const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
2241
+ const missingSkillReply = _formatMissingSkillReply(missingSkills);
2242
+ if (missingSkillReply) {
2243
+ return returnSystemReply(missingSkillReply, {
2244
+ provider: 'skill-router',
2245
+ model: 'system',
2246
+ reason: 'skill_not_found',
2247
+ });
2248
+ }
2249
+ if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
2250
+ requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
2251
+ hasExplicitRequestedSkill = (promptCapabilities.resolution?.skills || [])
2252
+ .some(skill => skill && (skill.prefix || skill.requestedName));
2253
+ } catch (err) {
2254
+ console.warn('[chat] Prompt capability resolution failed:', err.message);
2255
+ return returnSystemReply(_formatSkillResolutionFailureReply(err), {
2256
+ provider: 'skill-router',
2257
+ model: 'system',
2258
+ reason: 'skill_resolution_failed',
2259
+ });
2260
+ }
2261
+
2234
2262
  const runtimeFrameContextMessages = (() => {
2235
2263
  const override = normalizeContextMessagesForChat(opts.contextMessages, message);
2236
2264
  if (override.length > 0) return override;
@@ -2289,7 +2317,8 @@ async function chat(message, opts = {}) {
2289
2317
 
2290
2318
  if (isWeatherQuestion(routingMessage, queryTopics)
2291
2319
  && !_hasExplicitWeatherLocation(routingMessage)
2292
- && hasUsableRuntimeWeatherLocation(currentLocationSummary)) {
2320
+ && hasUsableRuntimeWeatherLocation(currentLocationSummary)
2321
+ && !hasExplicitRequestedSkill) {
2293
2322
  try {
2294
2323
  onProgress({
2295
2324
  type: 'tool_call',
@@ -2385,30 +2414,6 @@ async function chat(message, opts = {}) {
2385
2414
 
2386
2415
  const currentLocationContext = currentLocationSummary?.fresh ? currentLocationSummary : null;
2387
2416
 
2388
- const promptStart = Date.now();
2389
- let promptCapabilityContextBlock = '';
2390
- let requestedSkillEvents = [];
2391
- try {
2392
- const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
2393
- const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
2394
- const missingSkillReply = _formatMissingSkillReply(missingSkills);
2395
- if (missingSkillReply) {
2396
- return returnSystemReply(missingSkillReply, {
2397
- provider: 'skill-router',
2398
- model: 'system',
2399
- reason: 'skill_not_found',
2400
- });
2401
- }
2402
- if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
2403
- requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
2404
- } catch (err) {
2405
- console.warn('[chat] Prompt capability resolution failed:', err.message);
2406
- return returnSystemReply(_formatSkillResolutionFailureReply(err), {
2407
- provider: 'skill-router',
2408
- model: 'system',
2409
- reason: 'skill_resolution_failed',
2410
- });
2411
- }
2412
2417
  if (providerAvailability.getConfiguredProviders().length > 0 && !providerAvailability.isAnyProviderAvailable()) {
2413
2418
  const unavailableErr = unavailableProviderError(providerAvailability.getConfiguredProviders(), {
2414
2419
  provider: opts.provider || getDefaultProviderType(),
@@ -0,0 +1,79 @@
1
+ 'use strict';
2
+
3
+ // Persist a coding session's plan/todos across turns.
4
+ //
5
+ // The model already has an `update_todos` tool, but its output lived only in a
6
+ // per-run `currentTodos` variable (coding-orchestrator.js) — wiped the moment a
7
+ // turn ended. So a multi-turn session that yielded ("go ahead", "continue") came
8
+ // back with NO record of the checklist it had laid out, and re-investigated from
9
+ // scratch (session c3f3af97: re-copied the source + re-explored the same files
10
+ // every turn). OpenCode and Claude Code persist TodoWrite in the session store so
11
+ // "continue" resumes the open plan instead of restarting. This mirrors that —
12
+ // stored in brain KV (synchronous, survives restarts) keyed by the CTM chat
13
+ // session id, the same stable key session-workspaces.js uses to group sequential
14
+ // runTurn calls into one logical coding session.
15
+
16
+ const KEY_PREFIX = 'coding_session_plan:';
17
+ const MAX_TODOS = 40;
18
+ const MAX_CONTENT = 500;
19
+
20
+ function _normalizeTodos(todos) {
21
+ if (!Array.isArray(todos)) return [];
22
+ const out = [];
23
+ for (const t of todos) {
24
+ if (!t || typeof t !== 'object') continue;
25
+ const content = typeof t.content === 'string' ? t.content.trim().slice(0, MAX_CONTENT) : '';
26
+ if (!content) continue;
27
+ const status = t.status === 'in_progress' || t.status === 'completed' ? t.status : 'pending';
28
+ out.push({ content, status });
29
+ if (out.length >= MAX_TODOS) break;
30
+ }
31
+ return out;
32
+ }
33
+
34
+ function readSessionPlan(brain, sessionKey) {
35
+ if (!brain || !sessionKey || typeof brain.getKv !== 'function') return [];
36
+ try {
37
+ const raw = brain.getKv(KEY_PREFIX + sessionKey);
38
+ if (!raw) return [];
39
+ const arr = typeof raw === 'string' ? JSON.parse(raw) : raw;
40
+ return _normalizeTodos(arr);
41
+ } catch {
42
+ return [];
43
+ }
44
+ }
45
+
46
+ // Overwrite the session's plan with the latest todos (the tool always sends the
47
+ // full list, not a delta). Never throws — a brain hiccup must not break a coding
48
+ // turn; the worst case is the next turn missing the resume hint.
49
+ function writeSessionPlan(brain, sessionKey, todos) {
50
+ const normalized = _normalizeTodos(todos);
51
+ if (brain && sessionKey && typeof brain.setKv === 'function') {
52
+ try { brain.setKv(KEY_PREFIX + sessionKey, JSON.stringify(normalized)); } catch { /* best-effort */ }
53
+ }
54
+ return normalized;
55
+ }
56
+
57
+ // Render the open plan as a prompt block so the agent resumes it each turn. Returns
58
+ // '' when there is no plan, or when every item is already completed (nothing to
59
+ // resume — surfacing a fully-done list would just be noise).
60
+ function formatSessionPlanForPrompt(plan) {
61
+ const todos = _normalizeTodos(plan);
62
+ if (!todos.length) return '';
63
+ const open = todos.filter((t) => t.status !== 'completed');
64
+ if (!open.length) return '';
65
+ const mark = { completed: '[x]', in_progress: '[~]', pending: '[ ]' };
66
+ const lines = todos.map((t) => `- ${mark[t.status] || '[ ]'} ${t.content}`).join('\n');
67
+ return `\n\n<session_plan>
68
+ You already laid out a plan earlier in THIS coding session (persisted across turns). CONTINUE it — do not re-investigate work you have already done, and do not restart from scratch. Pick up the first unfinished item, apply it, and call update_todos to mark items completed as you finish. Current state:
69
+ ${lines}
70
+ </session_plan>`;
71
+ }
72
+
73
+ module.exports = {
74
+ readSessionPlan,
75
+ writeSessionPlan,
76
+ formatSessionPlanForPrompt,
77
+ KEY_PREFIX,
78
+ MAX_TODOS,
79
+ };
@@ -161,6 +161,7 @@ const { createCodingTranscript } = require('./coding/transcript-writer');
161
161
  const { createCodingCapabilities } = require('./coding/capability-broker');
162
162
  const { gitRootFor } = require('./chat/code-review-context');
163
163
  const { recordSessionWorkspace, recordSessionAnchor } = require('./coding/session-workspaces');
164
+ const { readSessionPlan, writeSessionPlan } = require('./coding/session-plan');
164
165
  const {
165
166
  CompactionService,
166
167
  DEFAULT_CONTEXT_WINDOW,
@@ -2291,6 +2292,10 @@ async function runAgentLoop(prompt, opts = {}) {
2291
2292
  // agent resolves "the project / the site / what you built" to the anchor instead
2292
2293
  // of whatever cwd this turn happens to carry (session c3f3af97).
2293
2294
  const sessionAnchor = recordSessionAnchor(opts.brain, groundingSessionKey, gitRoot || resolvedCwd);
2295
+ // Resume the session's plan/todos: persisted by update_todos in earlier turns, it
2296
+ // is surfaced in the system prompt so "continue"/"go ahead" picks up the open
2297
+ // checklist instead of re-investigating from scratch (session c3f3af97).
2298
+ const sessionPlan = readSessionPlan(opts.brain, groundingSessionKey);
2294
2299
  const systemPrompt = buildAgentSystemPrompt({
2295
2300
  resolvedCwd,
2296
2301
  projectInfo: projectInfo && projectInfo.type !== 'unknown' ? projectInfo : null,
@@ -2308,6 +2313,7 @@ async function runAgentLoop(prompt, opts = {}) {
2308
2313
  environment: { gitRoot, isGitRepo: !!gitRoot, platform: process.platform },
2309
2314
  sessionWorkspaces,
2310
2315
  sessionAnchor,
2316
+ sessionPlan,
2311
2317
  },
2312
2318
  });
2313
2319
 
@@ -2819,7 +2825,7 @@ async function runAgentLoop(prompt, opts = {}) {
2819
2825
  mode: opts.mode || '',
2820
2826
  runtimeMode: runtimeMode.id,
2821
2827
  interactive: opts.interactive,
2822
- onTodos: (todos) => { currentTodos = todos; },
2828
+ onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
2823
2829
  // Forward the turn's abort signal so user Stop kills an in-flight
2824
2830
  // run_shell child, and a heartbeat so a long command shows live motion.
2825
2831
  signal: runtimeCtx.signal,
@@ -3431,7 +3437,7 @@ async function runAgentLoop(prompt, opts = {}) {
3431
3437
  runtimeMode: runtimeMode.id,
3432
3438
  llmCtx,
3433
3439
  interactive: opts.interactive,
3434
- onTodos: (todos) => { currentTodos = todos; },
3440
+ onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
3435
3441
  }),
3436
3442
  });
3437
3443
  emitActionMemoryDecision(execution, turn);
@@ -4860,7 +4866,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
4860
4866
  if (diffErr.code !== 1) throw diffErr;
4861
4867
  }
4862
4868
  const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
4863
- const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
4869
+ const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Codex <noreply@openai.com>`;
4864
4870
  const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
4865
4871
  // Extract commit hash
4866
4872
  const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);
@@ -15,6 +15,7 @@ const {
15
15
  routeArtifactCapabilities,
16
16
  } = require('./coding/capability-router');
17
17
  const { buildResponseLanguagePolicy } = require('./context/response-language');
18
+ const { formatSessionPlanForPrompt } = require('./coding/session-plan');
18
19
 
19
20
  /**
20
21
  * Coding-agent system + subtask prompt builders.
@@ -260,10 +261,13 @@ This coding session is anchored to its home project: ${sessionAnchor}
260
261
  Treat that as the session's primary project. When the user refers to "the project", "the app", "the site", or something you built earlier without naming a directory, resolve it under the anchor — not whichever directory this turn happens to point at.${currentRoot && sessionAnchor && currentRoot !== sessionAnchor ? `\nNOTE: the current working directory (${currentRoot}) is a DIFFERENT project than the anchor. Say which project you are acting on before you change files.` : ''}
261
262
  </session_anchor>`
262
263
  : '';
264
+ // Resume the session's open plan/todos (persisted across turns) so a bare
265
+ // "continue" / "go ahead" picks up the checklist instead of restarting (c3f3af97).
266
+ const planBlock = formatSessionPlanForPrompt(runtimeContext.sessionPlan);
263
267
 
264
268
  const body = `You are an expert software engineer executing a coding task. Use the provided tools to actually do the work — describing what to do is not completing the task.${projectCtx}${projectSkillCtx}
265
269
 
266
- ${envBlock}${anchorBlock}${workspacesBlock}
270
+ ${envBlock}${anchorBlock}${workspacesBlock}${planBlock}
267
271
  ${largeTaskCtx}
268
272
 
269
273
  # Runtime role
@@ -274,7 +278,10 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
274
278
 
275
279
  1. Explore first. Use read_file / list_directory / lsp_symbols to learn the relevant code BEFORE editing it. Reading three files cheaply beats one wrong edit.
276
280
  2. When acting on a non-trivial change, call update_todos to lay out steps, then mark each completed as you go. This keeps long sessions on track.
277
- 3. Edit, don't create. Prefer edit_file (or multi_edit / apply_patch) on existing files. Only write_file when a new file is genuinely required by the task. NEVER create README/docs files unless explicitly asked.
281
+ 3. Edit the real file in place — never clone it. Make changes to the exact file the task names, at its real path, with edit_file / multi_edit / apply_patch. Only write_file when a NEW file is genuinely required. NEVER create README/docs files unless explicitly asked.
282
+ - Do NOT duplicate the target into a copy or variant directory (e.g. site-final, site-improved, site-v2, *-fixed, *-backup, a timestamped dir) as a "safe" place to work. A working copy strands your edits in a throwaway tree the user never sees, branches the session's work, and forces you to re-investigate which copy is current next turn. If you are worried about breaking the file, rely on git/snapshots — not a hand-made copy.
283
+ - If the task explicitly tells you to keep your version in a SEPARATE folder, create that ONE destination a single time, then keep editing THAT SAME path on every later turn. Never make a fresh copy each turn (no \`site-v2-fixed-$(date)\`), and reuse a destination you already created earlier in this session instead of re-copying the source.
284
+ - Never use \`cp -R\`, \`cp -r\`, or shell redirects to clone a project or site as a working copy. Edit the files directly.
278
285
  4. Verify with run_shell. Run the test command, lint, or build that proves the change works. Report the actual output — "tests pass" without showing them is not verification.
279
286
 
280
287
  # Finishing — apply, don't ask to apply
@@ -300,7 +307,7 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
300
307
  # Tool-selection discipline
301
308
 
302
309
  - Prefer dedicated tools over run_shell when one fits: Read for known paths, edit_file/multi_edit for surgical edits, list_directory over \`ls -R\`. Reserve run_shell for things only a shell can do.
303
- - For writing source files, use write_file/edit_file/multi_edit. These tools can write inside the current project/cwd, including temporary project directories. Do not use run_shell heredocs or redirects just to create source files.
310
+ - For writing source files, use write_file/edit_file/multi_edit on the real target files in place. Do not use run_shell heredocs or redirects just to create source files, and do not stage your work in a scratch/temporary copy of the project — edit the actual files the task is about.
304
311
  - run_shell takes a complete shell command string in \`command\`. If you need pipes, redirects, heredocs, or \`cd ... && ...\`, put the whole shell expression in \`command\`, not in \`args\` or an interpreter \`-c\` wrapper.
305
312
  - For static HTML/CSS/JS verification, use browser_screenshot and browser_smoke_test with a \`file://\` URL for the local HTML file. If a static file server is genuinely needed, use start_static_server and check_url. For NON-static long-lived processes (dev servers, watchers, long builds), use run_shell with \`background: true\` and poll with bg_output / stop with bg_kill — never append \`&\` to a command. Never say a localhost/127.0.0.1 preview is live, back up, HTTP 200, or reachable unless the current turn has successful start_static_server/check_url/browser_screenshot/browser_smoke_test evidence for that URL; localhost evidence is Wall-E host loopback only, not phone/remote-browser proof.
306
313
  - Multiple INDEPENDENT tool calls can run in parallel — use that to keep the loop fast. SEQUENTIAL calls (each depends on the previous result) must run one at a time.