create-walle 0.9.26 → 0.9.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +1 -0
  2. package/package.json +1 -1
  3. package/template/claude-task-manager/api-prompts.js +11 -6
  4. package/template/claude-task-manager/docs/session-status-redesign.html +554 -0
  5. package/template/claude-task-manager/docs/terminal-rendering-redesign.html +529 -0
  6. package/template/claude-task-manager/lib/flush-redraw-markers.js +72 -0
  7. package/template/claude-task-manager/lib/macos-capabilities.js +190 -0
  8. package/template/claude-task-manager/lib/session-messages-projection.js +224 -3
  9. package/template/claude-task-manager/lib/ttl-memo.js +61 -0
  10. package/template/claude-task-manager/public/index.html +892 -11
  11. package/template/claude-task-manager/public/js/activation-render-check.js +40 -2
  12. package/template/claude-task-manager/public/js/session-phase.js +370 -0
  13. package/template/claude-task-manager/public/js/setup.js +74 -1
  14. package/template/claude-task-manager/public/js/stream-view.js +56 -2
  15. package/template/claude-task-manager/server.js +643 -68
  16. package/template/claude-task-manager/workers/read-pool-worker.js +10 -0
  17. package/template/package.json +1 -1
  18. package/template/wall-e/agent.js +130 -24
  19. package/template/wall-e/api-walle.js +12 -1
  20. package/template/wall-e/brain.js +290 -4
  21. package/template/wall-e/chat.js +30 -25
  22. package/template/wall-e/coding/session-plan.js +79 -0
  23. package/template/wall-e/coding-orchestrator.js +9 -3
  24. package/template/wall-e/coding-prompts.js +10 -3
  25. package/template/wall-e/embeddings.js +192 -17
  26. package/template/wall-e/http/model-admin.js +109 -0
  27. package/template/wall-e/lib/event-loop-monitor.js +2 -2
  28. package/template/wall-e/lib/scheduler-worker-jobs.js +156 -121
  29. package/template/wall-e/lib/scheduler.js +226 -13
  30. package/template/wall-e/lib/worker-thread-pool.js +58 -4
  31. package/template/wall-e/llm/ollama-library.js +126 -0
  32. package/template/wall-e/llm/ollama.js +13 -0
  33. package/template/wall-e/llm/provider-backpressure.js +134 -0
  34. package/template/wall-e/llm/provider-health-state.js +24 -0
  35. package/template/wall-e/loops/backfill.js +43 -16
  36. package/template/wall-e/loops/initiative.js +1 -0
  37. package/template/wall-e/loops/think.js +38 -5
  38. package/template/wall-e/mcp-server.js +20 -4
  39. package/template/wall-e/skills/skill-fallback.js +34 -1
  40. package/template/wall-e/skills/skill-planner.js +60 -2
  41. package/template/wall-e/sources/jsonl-utils.js +84 -11
  42. package/template/wall-e/telemetry.js +42 -7
  43. package/template/wall-e/tools/local-tools.js +16 -0
  44. package/template/wall-e/workers/runtime-worker.js +33 -1
  45. package/template/website/index.html +5 -0
@@ -26,6 +26,7 @@ const {
26
26
  } = require('./shared/sqlite-storage-policy');
27
27
  const {
28
28
  installSqliteWriteLock,
29
+ retryOnWriteLockBusy,
29
30
  } = require('./shared/sqlite-write-lock');
30
31
  const {
31
32
  createSqliteOwnerWriteQueue,
@@ -167,7 +168,7 @@ function _applyPersistedBackupDirSetting() {
167
168
  }
168
169
 
169
170
  // --- Schema versioning via PRAGMA user_version ---
170
- const SCHEMA_VERSION = 25; // Bump on every migration addition
171
+ const SCHEMA_VERSION = 26; // Bump on every migration addition
171
172
 
172
173
  const MIGRATIONS = {
173
174
  1: (d) => {
@@ -716,6 +717,34 @@ const MIGRATIONS = {
716
717
  ON model_routing_policy_routes(provider_type, model_id);
717
718
  `);
718
719
  },
720
+ 26: (d) => {
721
+ d.exec(`
722
+ CREATE TABLE IF NOT EXISTS runtime_work_items (
723
+ id TEXT PRIMARY KEY,
724
+ kind TEXT NOT NULL,
725
+ lane INTEGER NOT NULL DEFAULT 2,
726
+ priority INTEGER NOT NULL DEFAULT 5,
727
+ state TEXT NOT NULL DEFAULT 'pending',
728
+ payload_json TEXT NOT NULL DEFAULT '{}',
729
+ cursor_json TEXT NOT NULL DEFAULT '{}',
730
+ attempts INTEGER NOT NULL DEFAULT 0,
731
+ max_attempts INTEGER NOT NULL DEFAULT 5,
732
+ not_before INTEGER,
733
+ lease_owner TEXT,
734
+ lease_expires_at INTEGER,
735
+ last_error TEXT,
736
+ created_at INTEGER NOT NULL,
737
+ updated_at INTEGER NOT NULL,
738
+ completed_at INTEGER
739
+ );
740
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_ready
741
+ ON runtime_work_items(state, lane, priority, not_before, created_at);
742
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_kind
743
+ ON runtime_work_items(kind, state, updated_at);
744
+ CREATE INDEX IF NOT EXISTS idx_runtime_work_items_lease
745
+ ON runtime_work_items(lease_expires_at);
746
+ `);
747
+ },
719
748
  };
720
749
 
721
750
  // Schema invariants — columns/tables that MUST exist after the named migration.
@@ -741,6 +770,7 @@ const SCHEMA_INVARIANTS = [
741
770
  { migration: 24, table: 'chat_message_parts', column: 'part_type' },
742
771
  { migration: 25, table: 'model_routing_policies', column: 'id' },
743
772
  { migration: 25, table: 'model_routing_policy_routes', column: 'policy_id' },
773
+ { migration: 26, table: 'runtime_work_items', column: 'kind' },
744
774
  ];
745
775
 
746
776
  function _columnExists(d, table, column) {
@@ -1298,12 +1328,27 @@ function enqueueOwnerWrite(labelOrFn, fnOrOptions, maybeOptions) {
1298
1328
  const op = runtimeHealth.beginOperation(`brain.write.${label}`, {
1299
1329
  queueWaitMs: Date.now() - queuedAt,
1300
1330
  });
1331
+ let retries = 0;
1301
1332
  try {
1302
- const value = await fn(getDb());
1303
- op.end({ ok: true });
1333
+ const value = await retryOnWriteLockBusy(() => fn(getDb()), {
1334
+ retries: _sqlitePositiveInt('WALL_E_SQLITE_OWNER_WRITE_LOCK_RETRIES', 3),
1335
+ backoffMs: _sqlitePositiveInt('WALL_E_SQLITE_OWNER_WRITE_LOCK_BACKOFF_MS', 50),
1336
+ onRetry: ({ attempt, error }) => {
1337
+ retries = attempt;
1338
+ try {
1339
+ require('./telemetry').track('sqlite_write_retry', {
1340
+ source: 'owner_write_queue',
1341
+ label: String(label || 'write').slice(0, 80),
1342
+ attempt,
1343
+ holder_pid: _writeLockHolderPid(error),
1344
+ });
1345
+ } catch {}
1346
+ },
1347
+ });
1348
+ op.end({ ok: true, meta: { retries } });
1304
1349
  return value;
1305
1350
  } catch (error) {
1306
- op.end({ ok: false, error });
1351
+ op.end({ ok: false, error, meta: { retries } });
1307
1352
  throw error;
1308
1353
  }
1309
1354
  }, { ...options, label });
@@ -2871,6 +2916,226 @@ function deleteSchedulerJobState(job_name) {
2871
2916
  getDb().prepare('DELETE FROM scheduler_job_state WHERE job_name = ?').run(job_name);
2872
2917
  }
2873
2918
 
2919
+ // -- Runtime work items (Migration 26) --
2920
+
2921
+ function _safeParseJson(value, fallback) {
2922
+ if (value == null || value === '') return fallback;
2923
+ try {
2924
+ const parsed = JSON.parse(value);
2925
+ return parsed == null ? fallback : parsed;
2926
+ } catch {
2927
+ return fallback;
2928
+ }
2929
+ }
2930
+
2931
+ function _runtimeWorkItemRowToPublic(row) {
2932
+ if (!row) return null;
2933
+ return {
2934
+ ...row,
2935
+ payload: _safeParseJson(row.payload_json, {}),
2936
+ cursor: _safeParseJson(row.cursor_json, {}),
2937
+ };
2938
+ }
2939
+
2940
+ function enqueueRuntimeWorkItem({
2941
+ id = uuidv4(),
2942
+ kind,
2943
+ lane = 2,
2944
+ priority = 5,
2945
+ state = 'pending',
2946
+ payload = {},
2947
+ cursor = {},
2948
+ max_attempts = 5,
2949
+ not_before = null,
2950
+ } = {}) {
2951
+ if (!kind) throw new Error('enqueueRuntimeWorkItem requires kind');
2952
+ const now = Date.now();
2953
+ getDb().prepare(`
2954
+ INSERT INTO runtime_work_items (
2955
+ id, kind, lane, priority, state, payload_json, cursor_json,
2956
+ attempts, max_attempts, not_before, created_at, updated_at
2957
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, ?)
2958
+ ON CONFLICT(id) DO UPDATE SET
2959
+ kind = excluded.kind,
2960
+ lane = excluded.lane,
2961
+ priority = excluded.priority,
2962
+ state = excluded.state,
2963
+ payload_json = excluded.payload_json,
2964
+ cursor_json = excluded.cursor_json,
2965
+ attempts = 0,
2966
+ max_attempts = excluded.max_attempts,
2967
+ not_before = excluded.not_before,
2968
+ lease_owner = NULL,
2969
+ lease_expires_at = NULL,
2970
+ last_error = NULL,
2971
+ completed_at = NULL,
2972
+ updated_at = excluded.updated_at
2973
+ `).run(
2974
+ id,
2975
+ String(kind),
2976
+ Math.max(0, Math.trunc(Number(lane) || 0)),
2977
+ Math.max(0, Math.trunc(Number(priority) || 0)),
2978
+ String(state || 'pending'),
2979
+ JSON.stringify(payload || {}),
2980
+ JSON.stringify(cursor || {}),
2981
+ Math.max(1, Math.trunc(Number(max_attempts) || 5)),
2982
+ Number.isFinite(Number(not_before)) ? Math.trunc(Number(not_before)) : null,
2983
+ now,
2984
+ now
2985
+ );
2986
+ return getRuntimeWorkItem(id);
2987
+ }
2988
+
2989
+ function getRuntimeWorkItem(id) {
2990
+ return _runtimeWorkItemRowToPublic(
2991
+ getDb().prepare('SELECT * FROM runtime_work_items WHERE id = ?').get(id)
2992
+ );
2993
+ }
2994
+
2995
+ function leaseRuntimeWorkItems({
2996
+ limit = 1,
2997
+ lanes = null,
2998
+ leaseOwner = 'wall-e',
2999
+ leaseMs = 60000,
3000
+ now = Date.now(),
3001
+ } = {}) {
3002
+ const cap = Math.max(1, Math.min(100, Math.trunc(Number(limit) || 1)));
3003
+ const laneValues = Array.isArray(lanes)
3004
+ ? lanes.map((lane) => Math.trunc(Number(lane))).filter((lane) => Number.isFinite(lane))
3005
+ : [];
3006
+ const params = [now, now];
3007
+ let laneClause = '';
3008
+ if (laneValues.length > 0) {
3009
+ laneClause = ` AND lane IN (${laneValues.map(() => '?').join(',')})`;
3010
+ params.push(...laneValues);
3011
+ }
3012
+ params.push(cap);
3013
+ const rows = getDb().prepare(`
3014
+ SELECT *
3015
+ FROM runtime_work_items
3016
+ WHERE
3017
+ (
3018
+ state = 'pending'
3019
+ OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
3020
+ )
3021
+ AND (not_before IS NULL OR not_before <= ?)
3022
+ AND attempts < max_attempts
3023
+ ${laneClause}
3024
+ ORDER BY lane ASC, priority ASC, COALESCE(not_before, 0) ASC, created_at ASC
3025
+ LIMIT ?
3026
+ `).all(...params);
3027
+ const leaseExpiresAt = now + Math.max(1000, Math.trunc(Number(leaseMs) || 60000));
3028
+ const tx = getDb().transaction((items) => {
3029
+ const leased = [];
3030
+ const stmt = getDb().prepare(`
3031
+ UPDATE runtime_work_items
3032
+ SET state = 'leased',
3033
+ lease_owner = ?,
3034
+ lease_expires_at = ?,
3035
+ attempts = attempts + 1,
3036
+ updated_at = ?
3037
+ WHERE id = ?
3038
+ AND (
3039
+ state = 'pending'
3040
+ OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
3041
+ )
3042
+ AND (not_before IS NULL OR not_before <= ?)
3043
+ AND attempts < max_attempts
3044
+ `);
3045
+ for (const row of items) {
3046
+ const result = stmt.run(String(leaseOwner || 'wall-e'), leaseExpiresAt, now, row.id, now, now);
3047
+ if (result.changes > 0) leased.push(getRuntimeWorkItem(row.id));
3048
+ }
3049
+ return leased;
3050
+ });
3051
+ return tx(rows);
3052
+ }
3053
+
3054
+ function completeRuntimeWorkItem(id, { cursor = null } = {}) {
3055
+ const now = Date.now();
3056
+ const cursorJson = cursor == null ? null : JSON.stringify(cursor);
3057
+ const stmt = cursorJson == null
3058
+ ? getDb().prepare(`
3059
+ UPDATE runtime_work_items
3060
+ SET state = 'completed',
3061
+ lease_owner = NULL,
3062
+ lease_expires_at = NULL,
3063
+ completed_at = ?,
3064
+ updated_at = ?
3065
+ WHERE id = ?
3066
+ `)
3067
+ : getDb().prepare(`
3068
+ UPDATE runtime_work_items
3069
+ SET state = 'completed',
3070
+ cursor_json = ?,
3071
+ lease_owner = NULL,
3072
+ lease_expires_at = NULL,
3073
+ completed_at = ?,
3074
+ updated_at = ?
3075
+ WHERE id = ?
3076
+ `);
3077
+ if (cursorJson == null) stmt.run(now, now, id);
3078
+ else stmt.run(cursorJson, now, now, id);
3079
+ return getRuntimeWorkItem(id);
3080
+ }
3081
+
3082
+ function failRuntimeWorkItem(id, { error = null, retryAfterMs = 60000 } = {}) {
3083
+ const row = getRuntimeWorkItem(id);
3084
+ if (!row) return null;
3085
+ const now = Date.now();
3086
+ const exhausted = Number(row.attempts || 0) >= Number(row.max_attempts || 0);
3087
+ getDb().prepare(`
3088
+ UPDATE runtime_work_items
3089
+ SET state = ?,
3090
+ not_before = ?,
3091
+ lease_owner = NULL,
3092
+ lease_expires_at = NULL,
3093
+ last_error = ?,
3094
+ updated_at = ?
3095
+ WHERE id = ?
3096
+ `).run(
3097
+ exhausted ? 'failed' : 'pending',
3098
+ exhausted ? null : now + Math.max(1000, Math.trunc(Number(retryAfterMs) || 60000)),
3099
+ String(error && (error.message || error.code) ? (error.message || error.code) : (error || '')).slice(0, 500),
3100
+ now,
3101
+ id
3102
+ );
3103
+ return getRuntimeWorkItem(id);
3104
+ }
3105
+
3106
+ function listRuntimeWorkItems({ state = null, limit = 50 } = {}) {
3107
+ const cap = Math.max(1, Math.min(500, Math.trunc(Number(limit) || 50)));
3108
+ if (state) {
3109
+ return getDb().prepare(`
3110
+ SELECT * FROM runtime_work_items
3111
+ WHERE state = ?
3112
+ ORDER BY lane ASC, priority ASC, updated_at DESC
3113
+ LIMIT ?
3114
+ `).all(String(state), cap).map(_runtimeWorkItemRowToPublic);
3115
+ }
3116
+ return getDb().prepare(`
3117
+ SELECT * FROM runtime_work_items
3118
+ ORDER BY state ASC, lane ASC, priority ASC, updated_at DESC
3119
+ LIMIT ?
3120
+ `).all(cap).map(_runtimeWorkItemRowToPublic);
3121
+ }
3122
+
3123
+ function summarizeRuntimeWorkItems() {
3124
+ const rows = getDb().prepare(`
3125
+ SELECT state, lane, COUNT(*) AS count
3126
+ FROM runtime_work_items
3127
+ GROUP BY state, lane
3128
+ ORDER BY state ASC, lane ASC
3129
+ `).all();
3130
+ const by_state = {};
3131
+ const by_lane = {};
3132
+ for (const row of rows) {
3133
+ by_state[row.state] = (by_state[row.state] || 0) + row.count;
3134
+ by_lane[row.lane] = (by_lane[row.lane] || 0) + row.count;
3135
+ }
3136
+ return { total: rows.reduce((sum, row) => sum + row.count, 0), by_state, by_lane, rows };
3137
+ }
3138
+
2874
3139
  // -- Backup --
2875
3140
 
2876
3141
  function _backupDirForCurrentDb() {
@@ -3569,6 +3834,19 @@ function _runtimeEventPayload(event = {}) {
3569
3834
  }
3570
3835
  if (Object.keys(detail).length) payload.detail = detail;
3571
3836
  }
3837
+ if (event.metrics && typeof event.metrics === 'object') {
3838
+ const metrics = {};
3839
+ for (const [key, value] of Object.entries(event.metrics).slice(0, 30)) {
3840
+ const safeKey = _safeRuntimeText(key, 80);
3841
+ if (!safeKey) continue;
3842
+ if (typeof value === 'number' && Number.isFinite(value)) {
3843
+ metrics[safeKey] = Math.round(value * 1000) / 1000;
3844
+ } else if (typeof value === 'boolean') {
3845
+ metrics[safeKey] = value;
3846
+ }
3847
+ }
3848
+ if (Object.keys(metrics).length) payload.metrics = metrics;
3849
+ }
3572
3850
  let text = JSON.stringify(payload);
3573
3851
  if (text.length > RUNTIME_EVENT_PAYLOAD_LIMIT) {
3574
3852
  text = JSON.stringify({ truncated: true, size: text.length });
@@ -6953,6 +7231,14 @@ module.exports = {
6953
7231
  getSchedulerJobState,
6954
7232
  listSchedulerJobStates,
6955
7233
  deleteSchedulerJobState,
7234
+ // Runtime work items (Migration 26)
7235
+ enqueueRuntimeWorkItem,
7236
+ getRuntimeWorkItem,
7237
+ leaseRuntimeWorkItems,
7238
+ completeRuntimeWorkItem,
7239
+ failRuntimeWorkItem,
7240
+ listRuntimeWorkItems,
7241
+ summarizeRuntimeWorkItems,
6956
7242
  // Checkpoints
6957
7243
  upsertCheckpoint,
6958
7244
  getCheckpoint,
@@ -2231,6 +2231,34 @@ async function chat(message, opts = {}) {
2231
2231
  });
2232
2232
  }
2233
2233
 
2234
+ const promptStart = Date.now();
2235
+ let promptCapabilityContextBlock = '';
2236
+ let requestedSkillEvents = [];
2237
+ let hasExplicitRequestedSkill = false;
2238
+ try {
2239
+ const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
2240
+ const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
2241
+ const missingSkillReply = _formatMissingSkillReply(missingSkills);
2242
+ if (missingSkillReply) {
2243
+ return returnSystemReply(missingSkillReply, {
2244
+ provider: 'skill-router',
2245
+ model: 'system',
2246
+ reason: 'skill_not_found',
2247
+ });
2248
+ }
2249
+ if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
2250
+ requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
2251
+ hasExplicitRequestedSkill = (promptCapabilities.resolution?.skills || [])
2252
+ .some(skill => skill && (skill.prefix || skill.requestedName));
2253
+ } catch (err) {
2254
+ console.warn('[chat] Prompt capability resolution failed:', err.message);
2255
+ return returnSystemReply(_formatSkillResolutionFailureReply(err), {
2256
+ provider: 'skill-router',
2257
+ model: 'system',
2258
+ reason: 'skill_resolution_failed',
2259
+ });
2260
+ }
2261
+
2234
2262
  const runtimeFrameContextMessages = (() => {
2235
2263
  const override = normalizeContextMessagesForChat(opts.contextMessages, message);
2236
2264
  if (override.length > 0) return override;
@@ -2289,7 +2317,8 @@ async function chat(message, opts = {}) {
2289
2317
 
2290
2318
  if (isWeatherQuestion(routingMessage, queryTopics)
2291
2319
  && !_hasExplicitWeatherLocation(routingMessage)
2292
- && hasUsableRuntimeWeatherLocation(currentLocationSummary)) {
2320
+ && hasUsableRuntimeWeatherLocation(currentLocationSummary)
2321
+ && !hasExplicitRequestedSkill) {
2293
2322
  try {
2294
2323
  onProgress({
2295
2324
  type: 'tool_call',
@@ -2385,30 +2414,6 @@ async function chat(message, opts = {}) {
2385
2414
 
2386
2415
  const currentLocationContext = currentLocationSummary?.fresh ? currentLocationSummary : null;
2387
2416
 
2388
- const promptStart = Date.now();
2389
- let promptCapabilityContextBlock = '';
2390
- let requestedSkillEvents = [];
2391
- try {
2392
- const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
2393
- const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
2394
- const missingSkillReply = _formatMissingSkillReply(missingSkills);
2395
- if (missingSkillReply) {
2396
- return returnSystemReply(missingSkillReply, {
2397
- provider: 'skill-router',
2398
- model: 'system',
2399
- reason: 'skill_not_found',
2400
- });
2401
- }
2402
- if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
2403
- requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
2404
- } catch (err) {
2405
- console.warn('[chat] Prompt capability resolution failed:', err.message);
2406
- return returnSystemReply(_formatSkillResolutionFailureReply(err), {
2407
- provider: 'skill-router',
2408
- model: 'system',
2409
- reason: 'skill_resolution_failed',
2410
- });
2411
- }
2412
2417
  if (providerAvailability.getConfiguredProviders().length > 0 && !providerAvailability.isAnyProviderAvailable()) {
2413
2418
  const unavailableErr = unavailableProviderError(providerAvailability.getConfiguredProviders(), {
2414
2419
  provider: opts.provider || getDefaultProviderType(),
@@ -0,0 +1,79 @@
1
+ 'use strict';
2
+
3
+ // Persist a coding session's plan/todos across turns.
4
+ //
5
+ // The model already has an `update_todos` tool, but its output lived only in a
6
+ // per-run `currentTodos` variable (coding-orchestrator.js) — wiped the moment a
7
+ // turn ended. So a multi-turn session that yielded ("go ahead", "continue") came
8
+ // back with NO record of the checklist it had laid out, and re-investigated from
9
+ // scratch (session c3f3af97: re-copied the source + re-explored the same files
10
+ // every turn). OpenCode and Claude Code persist TodoWrite in the session store so
11
+ // "continue" resumes the open plan instead of restarting. This mirrors that —
12
+ // stored in brain KV (synchronous, survives restarts) keyed by the CTM chat
13
+ // session id, the same stable key session-workspaces.js uses to group sequential
14
+ // runTurn calls into one logical coding session.
15
+
16
+ const KEY_PREFIX = 'coding_session_plan:';
17
+ const MAX_TODOS = 40;
18
+ const MAX_CONTENT = 500;
19
+
20
+ function _normalizeTodos(todos) {
21
+ if (!Array.isArray(todos)) return [];
22
+ const out = [];
23
+ for (const t of todos) {
24
+ if (!t || typeof t !== 'object') continue;
25
+ const content = typeof t.content === 'string' ? t.content.trim().slice(0, MAX_CONTENT) : '';
26
+ if (!content) continue;
27
+ const status = t.status === 'in_progress' || t.status === 'completed' ? t.status : 'pending';
28
+ out.push({ content, status });
29
+ if (out.length >= MAX_TODOS) break;
30
+ }
31
+ return out;
32
+ }
33
+
34
+ function readSessionPlan(brain, sessionKey) {
35
+ if (!brain || !sessionKey || typeof brain.getKv !== 'function') return [];
36
+ try {
37
+ const raw = brain.getKv(KEY_PREFIX + sessionKey);
38
+ if (!raw) return [];
39
+ const arr = typeof raw === 'string' ? JSON.parse(raw) : raw;
40
+ return _normalizeTodos(arr);
41
+ } catch {
42
+ return [];
43
+ }
44
+ }
45
+
46
+ // Overwrite the session's plan with the latest todos (the tool always sends the
47
+ // full list, not a delta). Never throws — a brain hiccup must not break a coding
48
+ // turn; the worst case is the next turn missing the resume hint.
49
+ function writeSessionPlan(brain, sessionKey, todos) {
50
+ const normalized = _normalizeTodos(todos);
51
+ if (brain && sessionKey && typeof brain.setKv === 'function') {
52
+ try { brain.setKv(KEY_PREFIX + sessionKey, JSON.stringify(normalized)); } catch { /* best-effort */ }
53
+ }
54
+ return normalized;
55
+ }
56
+
57
+ // Render the open plan as a prompt block so the agent resumes it each turn. Returns
58
+ // '' when there is no plan, or when every item is already completed (nothing to
59
+ // resume — surfacing a fully-done list would just be noise).
60
+ function formatSessionPlanForPrompt(plan) {
61
+ const todos = _normalizeTodos(plan);
62
+ if (!todos.length) return '';
63
+ const open = todos.filter((t) => t.status !== 'completed');
64
+ if (!open.length) return '';
65
+ const mark = { completed: '[x]', in_progress: '[~]', pending: '[ ]' };
66
+ const lines = todos.map((t) => `- ${mark[t.status] || '[ ]'} ${t.content}`).join('\n');
67
+ return `\n\n<session_plan>
68
+ You already laid out a plan earlier in THIS coding session (persisted across turns). CONTINUE it — do not re-investigate work you have already done, and do not restart from scratch. Pick up the first unfinished item, apply it, and call update_todos to mark items completed as you finish. Current state:
69
+ ${lines}
70
+ </session_plan>`;
71
+ }
72
+
73
+ module.exports = {
74
+ readSessionPlan,
75
+ writeSessionPlan,
76
+ formatSessionPlanForPrompt,
77
+ KEY_PREFIX,
78
+ MAX_TODOS,
79
+ };
@@ -161,6 +161,7 @@ const { createCodingTranscript } = require('./coding/transcript-writer');
161
161
  const { createCodingCapabilities } = require('./coding/capability-broker');
162
162
  const { gitRootFor } = require('./chat/code-review-context');
163
163
  const { recordSessionWorkspace, recordSessionAnchor } = require('./coding/session-workspaces');
164
+ const { readSessionPlan, writeSessionPlan } = require('./coding/session-plan');
164
165
  const {
165
166
  CompactionService,
166
167
  DEFAULT_CONTEXT_WINDOW,
@@ -2291,6 +2292,10 @@ async function runAgentLoop(prompt, opts = {}) {
2291
2292
  // agent resolves "the project / the site / what you built" to the anchor instead
2292
2293
  // of whatever cwd this turn happens to carry (session c3f3af97).
2293
2294
  const sessionAnchor = recordSessionAnchor(opts.brain, groundingSessionKey, gitRoot || resolvedCwd);
2295
+ // Resume the session's plan/todos: persisted by update_todos in earlier turns, it
2296
+ // is surfaced in the system prompt so "continue"/"go ahead" picks up the open
2297
+ // checklist instead of re-investigating from scratch (session c3f3af97).
2298
+ const sessionPlan = readSessionPlan(opts.brain, groundingSessionKey);
2294
2299
  const systemPrompt = buildAgentSystemPrompt({
2295
2300
  resolvedCwd,
2296
2301
  projectInfo: projectInfo && projectInfo.type !== 'unknown' ? projectInfo : null,
@@ -2308,6 +2313,7 @@ async function runAgentLoop(prompt, opts = {}) {
2308
2313
  environment: { gitRoot, isGitRepo: !!gitRoot, platform: process.platform },
2309
2314
  sessionWorkspaces,
2310
2315
  sessionAnchor,
2316
+ sessionPlan,
2311
2317
  },
2312
2318
  });
2313
2319
 
@@ -2819,7 +2825,7 @@ async function runAgentLoop(prompt, opts = {}) {
2819
2825
  mode: opts.mode || '',
2820
2826
  runtimeMode: runtimeMode.id,
2821
2827
  interactive: opts.interactive,
2822
- onTodos: (todos) => { currentTodos = todos; },
2828
+ onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
2823
2829
  // Forward the turn's abort signal so user Stop kills an in-flight
2824
2830
  // run_shell child, and a heartbeat so a long command shows live motion.
2825
2831
  signal: runtimeCtx.signal,
@@ -3431,7 +3437,7 @@ async function runAgentLoop(prompt, opts = {}) {
3431
3437
  runtimeMode: runtimeMode.id,
3432
3438
  llmCtx,
3433
3439
  interactive: opts.interactive,
3434
- onTodos: (todos) => { currentTodos = todos; },
3440
+ onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
3435
3441
  }),
3436
3442
  });
3437
3443
  emitActionMemoryDecision(execution, turn);
@@ -4860,7 +4866,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
4860
4866
  if (diffErr.code !== 1) throw diffErr;
4861
4867
  }
4862
4868
  const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
4863
- const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
4869
+ const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Codex <noreply@openai.com>`;
4864
4870
  const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
4865
4871
  // Extract commit hash
4866
4872
  const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);
@@ -15,6 +15,7 @@ const {
15
15
  routeArtifactCapabilities,
16
16
  } = require('./coding/capability-router');
17
17
  const { buildResponseLanguagePolicy } = require('./context/response-language');
18
+ const { formatSessionPlanForPrompt } = require('./coding/session-plan');
18
19
 
19
20
  /**
20
21
  * Coding-agent system + subtask prompt builders.
@@ -260,10 +261,13 @@ This coding session is anchored to its home project: ${sessionAnchor}
260
261
  Treat that as the session's primary project. When the user refers to "the project", "the app", "the site", or something you built earlier without naming a directory, resolve it under the anchor — not whichever directory this turn happens to point at.${currentRoot && sessionAnchor && currentRoot !== sessionAnchor ? `\nNOTE: the current working directory (${currentRoot}) is a DIFFERENT project than the anchor. Say which project you are acting on before you change files.` : ''}
261
262
  </session_anchor>`
262
263
  : '';
264
+ // Resume the session's open plan/todos (persisted across turns) so a bare
265
+ // "continue" / "go ahead" picks up the checklist instead of restarting (c3f3af97).
266
+ const planBlock = formatSessionPlanForPrompt(runtimeContext.sessionPlan);
263
267
 
264
268
  const body = `You are an expert software engineer executing a coding task. Use the provided tools to actually do the work — describing what to do is not completing the task.${projectCtx}${projectSkillCtx}
265
269
 
266
- ${envBlock}${anchorBlock}${workspacesBlock}
270
+ ${envBlock}${anchorBlock}${workspacesBlock}${planBlock}
267
271
  ${largeTaskCtx}
268
272
 
269
273
  # Runtime role
@@ -274,7 +278,10 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
274
278
 
275
279
  1. Explore first. Use read_file / list_directory / lsp_symbols to learn the relevant code BEFORE editing it. Reading three files cheaply beats one wrong edit.
276
280
  2. When acting on a non-trivial change, call update_todos to lay out steps, then mark each completed as you go. This keeps long sessions on track.
277
- 3. Edit, don't create. Prefer edit_file (or multi_edit / apply_patch) on existing files. Only write_file when a new file is genuinely required by the task. NEVER create README/docs files unless explicitly asked.
281
+ 3. Edit the real file in place — never clone it. Make changes to the exact file the task names, at its real path, with edit_file / multi_edit / apply_patch. Only write_file when a NEW file is genuinely required. NEVER create README/docs files unless explicitly asked.
282
+ - Do NOT duplicate the target into a copy or variant directory (e.g. site-final, site-improved, site-v2, *-fixed, *-backup, a timestamped dir) as a "safe" place to work. A working copy strands your edits in a throwaway tree the user never sees, branches the session's work, and forces you to re-investigate which copy is current next turn. If you are worried about breaking the file, rely on git/snapshots — not a hand-made copy.
283
+ - If the task explicitly tells you to keep your version in a SEPARATE folder, create that ONE destination a single time, then keep editing THAT SAME path on every later turn. Never make a fresh copy each turn (no \`site-v2-fixed-$(date)\`), and reuse a destination you already created earlier in this session instead of re-copying the source.
284
+ - Never use \`cp -R\`, \`cp -r\`, or shell redirects to clone a project or site as a working copy. Edit the files directly.
278
285
  4. Verify with run_shell. Run the test command, lint, or build that proves the change works. Report the actual output — "tests pass" without showing them is not verification.
279
286
 
280
287
  # Finishing — apply, don't ask to apply
@@ -300,7 +307,7 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
300
307
  # Tool-selection discipline
301
308
 
302
309
  - Prefer dedicated tools over run_shell when one fits: Read for known paths, edit_file/multi_edit for surgical edits, list_directory over \`ls -R\`. Reserve run_shell for things only a shell can do.
303
- - For writing source files, use write_file/edit_file/multi_edit. These tools can write inside the current project/cwd, including temporary project directories. Do not use run_shell heredocs or redirects just to create source files.
310
+ - For writing source files, use write_file/edit_file/multi_edit on the real target files in place. Do not use run_shell heredocs or redirects just to create source files, and do not stage your work in a scratch/temporary copy of the project — edit the actual files the task is about.
304
311
  - run_shell takes a complete shell command string in \`command\`. If you need pipes, redirects, heredocs, or \`cd ... && ...\`, put the whole shell expression in \`command\`, not in \`args\` or an interpreter \`-c\` wrapper.
305
312
  - For static HTML/CSS/JS verification, use browser_screenshot and browser_smoke_test with a \`file://\` URL for the local HTML file. If a static file server is genuinely needed, use start_static_server and check_url. For NON-static long-lived processes (dev servers, watchers, long builds), use run_shell with \`background: true\` and poll with bg_output / stop with bg_kill — never append \`&\` to a command. Never say a localhost/127.0.0.1 preview is live, back up, HTTP 200, or reachable unless the current turn has successful start_static_server/check_url/browser_screenshot/browser_smoke_test evidence for that URL; localhost evidence is Wall-E host loopback only, not phone/remote-browser proof.
306
313
  - Multiple INDEPENDENT tool calls can run in parallel — use that to keep the loop fast. SEQUENTIAL calls (each depends on the previous result) must run one at a time.