npm - create-walle - Versions diffs - 0.9.26 → 0.9.27 - Mend

create-walle 0.9.26 → 0.9.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +1 -1
package/template/claude-task-manager/api-prompts.js +11 -6
package/template/claude-task-manager/lib/session-messages-projection.js +30 -1
package/template/claude-task-manager/public/index.html +50 -2
package/template/claude-task-manager/server.js +125 -1
package/template/claude-task-manager/workers/read-pool-worker.js +10 -0
package/template/package.json +1 -1
package/template/wall-e/agent.js +77 -24
package/template/wall-e/brain.js +258 -1
package/template/wall-e/chat.js +30 -25
package/template/wall-e/coding/session-plan.js +79 -0
package/template/wall-e/coding-orchestrator.js +9 -3
package/template/wall-e/coding-prompts.js +10 -3
package/template/wall-e/lib/scheduler.js +154 -8
package/template/wall-e/lib/worker-thread-pool.js +9 -1
package/template/wall-e/loops/think.js +26 -3
package/template/wall-e/mcp-server.js +20 -4
package/template/wall-e/sources/jsonl-utils.js +84 -11
package/template/wall-e/tools/local-tools.js +16 -0
package/template/wall-e/workers/runtime-worker.js +24 -0

package/template/wall-e/brain.js CHANGED Viewed

@@ -167,7 +167,7 @@ function _applyPersistedBackupDirSetting() {
 }
 // --- Schema versioning via PRAGMA user_version ---
-const SCHEMA_VERSION = 25; // Bump on every migration addition
+const SCHEMA_VERSION = 26; // Bump on every migration addition
 const MIGRATIONS = {
   1: (d) => {
@@ -716,6 +716,34 @@ const MIGRATIONS = {
         ON model_routing_policy_routes(provider_type, model_id);
     `);
   },
+  26: (d) => {
+    d.exec(`
+      CREATE TABLE IF NOT EXISTS runtime_work_items (
+        id TEXT PRIMARY KEY,
+        kind TEXT NOT NULL,
+        lane INTEGER NOT NULL DEFAULT 2,
+        priority INTEGER NOT NULL DEFAULT 5,
+        state TEXT NOT NULL DEFAULT 'pending',
+        payload_json TEXT NOT NULL DEFAULT '{}',
+        cursor_json TEXT NOT NULL DEFAULT '{}',
+        attempts INTEGER NOT NULL DEFAULT 0,
+        max_attempts INTEGER NOT NULL DEFAULT 5,
+        not_before INTEGER,
+        lease_owner TEXT,
+        lease_expires_at INTEGER,
+        last_error TEXT,
+        created_at INTEGER NOT NULL,
+        updated_at INTEGER NOT NULL,
+        completed_at INTEGER
+      );
+      CREATE INDEX IF NOT EXISTS idx_runtime_work_items_ready
+        ON runtime_work_items(state, lane, priority, not_before, created_at);
+      CREATE INDEX IF NOT EXISTS idx_runtime_work_items_kind
+        ON runtime_work_items(kind, state, updated_at);
+      CREATE INDEX IF NOT EXISTS idx_runtime_work_items_lease
+        ON runtime_work_items(lease_expires_at);
+    `);
+  },
 };
 // Schema invariants — columns/tables that MUST exist after the named migration.
@@ -741,6 +769,7 @@ const SCHEMA_INVARIANTS = [
   { migration: 24, table: 'chat_message_parts', column: 'part_type' },
   { migration: 25, table: 'model_routing_policies', column: 'id' },
   { migration: 25, table: 'model_routing_policy_routes', column: 'policy_id' },
+  { migration: 26, table: 'runtime_work_items', column: 'kind' },
 ];
 function _columnExists(d, table, column) {
@@ -2871,6 +2900,226 @@ function deleteSchedulerJobState(job_name) {
   getDb().prepare('DELETE FROM scheduler_job_state WHERE job_name = ?').run(job_name);
 }
+// -- Runtime work items (Migration 26) --
+function _safeParseJson(value, fallback) {
+  if (value == null || value === '') return fallback;
+  try {
+    const parsed = JSON.parse(value);
+    return parsed == null ? fallback : parsed;
+  } catch {
+    return fallback;
+  }
+}
+function _runtimeWorkItemRowToPublic(row) {
+  if (!row) return null;
+  return {
+    ...row,
+    payload: _safeParseJson(row.payload_json, {}),
+    cursor: _safeParseJson(row.cursor_json, {}),
+  };
+}
+function enqueueRuntimeWorkItem({
+  id = uuidv4(),
+  kind,
+  lane = 2,
+  priority = 5,
+  state = 'pending',
+  payload = {},
+  cursor = {},
+  max_attempts = 5,
+  not_before = null,
+} = {}) {
+  if (!kind) throw new Error('enqueueRuntimeWorkItem requires kind');
+  const now = Date.now();
+  getDb().prepare(`
+    INSERT INTO runtime_work_items (
+      id, kind, lane, priority, state, payload_json, cursor_json,
+      attempts, max_attempts, not_before, created_at, updated_at
+    ) VALUES (?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, ?)
+    ON CONFLICT(id) DO UPDATE SET
+      kind = excluded.kind,
+      lane = excluded.lane,
+      priority = excluded.priority,
+      state = excluded.state,
+      payload_json = excluded.payload_json,
+      cursor_json = excluded.cursor_json,
+      attempts = 0,
+      max_attempts = excluded.max_attempts,
+      not_before = excluded.not_before,
+      lease_owner = NULL,
+      lease_expires_at = NULL,
+      last_error = NULL,
+      completed_at = NULL,
+      updated_at = excluded.updated_at
+  `).run(
+    id,
+    String(kind),
+    Math.max(0, Math.trunc(Number(lane) || 0)),
+    Math.max(0, Math.trunc(Number(priority) || 0)),
+    String(state || 'pending'),
+    JSON.stringify(payload || {}),
+    JSON.stringify(cursor || {}),
+    Math.max(1, Math.trunc(Number(max_attempts) || 5)),
+    Number.isFinite(Number(not_before)) ? Math.trunc(Number(not_before)) : null,
+    now,
+    now
+  );
+  return getRuntimeWorkItem(id);
+}
+function getRuntimeWorkItem(id) {
+  return _runtimeWorkItemRowToPublic(
+    getDb().prepare('SELECT * FROM runtime_work_items WHERE id = ?').get(id)
+  );
+}
+function leaseRuntimeWorkItems({
+  limit = 1,
+  lanes = null,
+  leaseOwner = 'wall-e',
+  leaseMs = 60000,
+  now = Date.now(),
+} = {}) {
+  const cap = Math.max(1, Math.min(100, Math.trunc(Number(limit) || 1)));
+  const laneValues = Array.isArray(lanes)
+    ? lanes.map((lane) => Math.trunc(Number(lane))).filter((lane) => Number.isFinite(lane))
+    : [];
+  const params = [now, now];
+  let laneClause = '';
+  if (laneValues.length > 0) {
+    laneClause = ` AND lane IN (${laneValues.map(() => '?').join(',')})`;
+    params.push(...laneValues);
+  }
+  params.push(cap);
+  const rows = getDb().prepare(`
+    SELECT *
+    FROM runtime_work_items
+    WHERE
+      (
+        state = 'pending'
+        OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
+      )
+      AND (not_before IS NULL OR not_before <= ?)
+      AND attempts < max_attempts
+      ${laneClause}
+    ORDER BY lane ASC, priority ASC, COALESCE(not_before, 0) ASC, created_at ASC
+    LIMIT ?
+  `).all(...params);
+  const leaseExpiresAt = now + Math.max(1000, Math.trunc(Number(leaseMs) || 60000));
+  const tx = getDb().transaction((items) => {
+    const leased = [];
+    const stmt = getDb().prepare(`
+      UPDATE runtime_work_items
+      SET state = 'leased',
+          lease_owner = ?,
+          lease_expires_at = ?,
+          attempts = attempts + 1,
+          updated_at = ?
+      WHERE id = ?
+        AND (
+          state = 'pending'
+          OR (state = 'leased' AND lease_expires_at IS NOT NULL AND lease_expires_at <= ?)
+        )
+        AND (not_before IS NULL OR not_before <= ?)
+        AND attempts < max_attempts
+    `);
+    for (const row of items) {
+      const result = stmt.run(String(leaseOwner || 'wall-e'), leaseExpiresAt, now, row.id, now, now);
+      if (result.changes > 0) leased.push(getRuntimeWorkItem(row.id));
+    }
+    return leased;
+  });
+  return tx(rows);
+}
+function completeRuntimeWorkItem(id, { cursor = null } = {}) {
+  const now = Date.now();
+  const cursorJson = cursor == null ? null : JSON.stringify(cursor);
+  const stmt = cursorJson == null
+    ? getDb().prepare(`
+        UPDATE runtime_work_items
+        SET state = 'completed',
+            lease_owner = NULL,
+            lease_expires_at = NULL,
+            completed_at = ?,
+            updated_at = ?
+        WHERE id = ?
+      `)
+    : getDb().prepare(`
+        UPDATE runtime_work_items
+        SET state = 'completed',
+            cursor_json = ?,
+            lease_owner = NULL,
+            lease_expires_at = NULL,
+            completed_at = ?,
+            updated_at = ?
+        WHERE id = ?
+      `);
+  if (cursorJson == null) stmt.run(now, now, id);
+  else stmt.run(cursorJson, now, now, id);
+  return getRuntimeWorkItem(id);
+}
+function failRuntimeWorkItem(id, { error = null, retryAfterMs = 60000 } = {}) {
+  const row = getRuntimeWorkItem(id);
+  if (!row) return null;
+  const now = Date.now();
+  const exhausted = Number(row.attempts || 0) >= Number(row.max_attempts || 0);
+  getDb().prepare(`
+    UPDATE runtime_work_items
+    SET state = ?,
+        not_before = ?,
+        lease_owner = NULL,
+        lease_expires_at = NULL,
+        last_error = ?,
+        updated_at = ?
+    WHERE id = ?
+  `).run(
+    exhausted ? 'failed' : 'pending',
+    exhausted ? null : now + Math.max(1000, Math.trunc(Number(retryAfterMs) || 60000)),
+    String(error && (error.message || error.code) ? (error.message || error.code) : (error || '')).slice(0, 500),
+    now,
+    id
+  );
+  return getRuntimeWorkItem(id);
+}
+function listRuntimeWorkItems({ state = null, limit = 50 } = {}) {
+  const cap = Math.max(1, Math.min(500, Math.trunc(Number(limit) || 50)));
+  if (state) {
+    return getDb().prepare(`
+      SELECT * FROM runtime_work_items
+      WHERE state = ?
+      ORDER BY lane ASC, priority ASC, updated_at DESC
+      LIMIT ?
+    `).all(String(state), cap).map(_runtimeWorkItemRowToPublic);
+  }
+  return getDb().prepare(`
+    SELECT * FROM runtime_work_items
+    ORDER BY state ASC, lane ASC, priority ASC, updated_at DESC
+    LIMIT ?
+  `).all(cap).map(_runtimeWorkItemRowToPublic);
+}
+function summarizeRuntimeWorkItems() {
+  const rows = getDb().prepare(`
+    SELECT state, lane, COUNT(*) AS count
+    FROM runtime_work_items
+    GROUP BY state, lane
+    ORDER BY state ASC, lane ASC
+  `).all();
+  const by_state = {};
+  const by_lane = {};
+  for (const row of rows) {
+    by_state[row.state] = (by_state[row.state] || 0) + row.count;
+    by_lane[row.lane] = (by_lane[row.lane] || 0) + row.count;
+  }
+  return { total: rows.reduce((sum, row) => sum + row.count, 0), by_state, by_lane, rows };
+}
 // -- Backup --
 function _backupDirForCurrentDb() {
@@ -6953,6 +7202,14 @@ module.exports = {
   getSchedulerJobState,
   listSchedulerJobStates,
   deleteSchedulerJobState,
+  // Runtime work items (Migration 26)
+  enqueueRuntimeWorkItem,
+  getRuntimeWorkItem,
+  leaseRuntimeWorkItems,
+  completeRuntimeWorkItem,
+  failRuntimeWorkItem,
+  listRuntimeWorkItems,
+  summarizeRuntimeWorkItems,
   // Checkpoints
   upsertCheckpoint,
   getCheckpoint,

package/template/wall-e/chat.js CHANGED Viewed

@@ -2231,6 +2231,34 @@ async function chat(message, opts = {}) {
     });
   }
+  const promptStart = Date.now();
+  let promptCapabilityContextBlock = '';
+  let requestedSkillEvents = [];
+  let hasExplicitRequestedSkill = false;
+  try {
+    const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
+    const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
+    const missingSkillReply = _formatMissingSkillReply(missingSkills);
+    if (missingSkillReply) {
+      return returnSystemReply(missingSkillReply, {
+        provider: 'skill-router',
+        model: 'system',
+        reason: 'skill_not_found',
+      });
+    }
+    if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
+    requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
+    hasExplicitRequestedSkill = (promptCapabilities.resolution?.skills || [])
+      .some(skill => skill && (skill.prefix || skill.requestedName));
+  } catch (err) {
+    console.warn('[chat] Prompt capability resolution failed:', err.message);
+    return returnSystemReply(_formatSkillResolutionFailureReply(err), {
+      provider: 'skill-router',
+      model: 'system',
+      reason: 'skill_resolution_failed',
+    });
+  }
   const runtimeFrameContextMessages = (() => {
     const override = normalizeContextMessagesForChat(opts.contextMessages, message);
     if (override.length > 0) return override;
@@ -2289,7 +2317,8 @@ async function chat(message, opts = {}) {
   if (isWeatherQuestion(routingMessage, queryTopics)
       && !_hasExplicitWeatherLocation(routingMessage)
-      && hasUsableRuntimeWeatherLocation(currentLocationSummary)) {
+      && hasUsableRuntimeWeatherLocation(currentLocationSummary)
+      && !hasExplicitRequestedSkill) {
     try {
       onProgress({
         type: 'tool_call',
@@ -2385,30 +2414,6 @@ async function chat(message, opts = {}) {
   const currentLocationContext = currentLocationSummary?.fresh ? currentLocationSummary : null;
-  const promptStart = Date.now();
-  let promptCapabilityContextBlock = '';
-  let requestedSkillEvents = [];
-  try {
-    const promptCapabilities = await _resolveChatPromptCapabilities(message, effectiveCwd, opts);
-    const missingSkills = _missingRequestedSkills(promptCapabilities.resolution);
-    const missingSkillReply = _formatMissingSkillReply(missingSkills);
-    if (missingSkillReply) {
-      return returnSystemReply(missingSkillReply, {
-        provider: 'skill-router',
-        model: 'system',
-        reason: 'skill_not_found',
-      });
-    }
-    if (promptCapabilities.context) promptCapabilityContextBlock = '\n\n' + promptCapabilities.context;
-    requestedSkillEvents = _requestedSkillProgressEvents(promptCapabilities.resolution);
-  } catch (err) {
-    console.warn('[chat] Prompt capability resolution failed:', err.message);
-    return returnSystemReply(_formatSkillResolutionFailureReply(err), {
-      provider: 'skill-router',
-      model: 'system',
-      reason: 'skill_resolution_failed',
-    });
-  }
   if (providerAvailability.getConfiguredProviders().length > 0 && !providerAvailability.isAnyProviderAvailable()) {
     const unavailableErr = unavailableProviderError(providerAvailability.getConfiguredProviders(), {
       provider: opts.provider || getDefaultProviderType(),

package/template/wall-e/coding/session-plan.js ADDED Viewed

@@ -0,0 +1,79 @@
+'use strict';
+// Persist a coding session's plan/todos across turns.
+//
+// The model already has an `update_todos` tool, but its output lived only in a
+// per-run `currentTodos` variable (coding-orchestrator.js) — wiped the moment a
+// turn ended. So a multi-turn session that yielded ("go ahead", "continue") came
+// back with NO record of the checklist it had laid out, and re-investigated from
+// scratch (session c3f3af97: re-copied the source + re-explored the same files
+// every turn). OpenCode and Claude Code persist TodoWrite in the session store so
+// "continue" resumes the open plan instead of restarting. This mirrors that —
+// stored in brain KV (synchronous, survives restarts) keyed by the CTM chat
+// session id, the same stable key session-workspaces.js uses to group sequential
+// runTurn calls into one logical coding session.
+const KEY_PREFIX = 'coding_session_plan:';
+const MAX_TODOS = 40;
+const MAX_CONTENT = 500;
+function _normalizeTodos(todos) {
+  if (!Array.isArray(todos)) return [];
+  const out = [];
+  for (const t of todos) {
+    if (!t || typeof t !== 'object') continue;
+    const content = typeof t.content === 'string' ? t.content.trim().slice(0, MAX_CONTENT) : '';
+    if (!content) continue;
+    const status = t.status === 'in_progress' || t.status === 'completed' ? t.status : 'pending';
+    out.push({ content, status });
+    if (out.length >= MAX_TODOS) break;
+  }
+  return out;
+}
+function readSessionPlan(brain, sessionKey) {
+  if (!brain || !sessionKey || typeof brain.getKv !== 'function') return [];
+  try {
+    const raw = brain.getKv(KEY_PREFIX + sessionKey);
+    if (!raw) return [];
+    const arr = typeof raw === 'string' ? JSON.parse(raw) : raw;
+    return _normalizeTodos(arr);
+  } catch {
+    return [];
+  }
+}
+// Overwrite the session's plan with the latest todos (the tool always sends the
+// full list, not a delta). Never throws — a brain hiccup must not break a coding
+// turn; the worst case is the next turn missing the resume hint.
+function writeSessionPlan(brain, sessionKey, todos) {
+  const normalized = _normalizeTodos(todos);
+  if (brain && sessionKey && typeof brain.setKv === 'function') {
+    try { brain.setKv(KEY_PREFIX + sessionKey, JSON.stringify(normalized)); } catch { /* best-effort */ }
+  }
+  return normalized;
+}
+// Render the open plan as a prompt block so the agent resumes it each turn. Returns
+// '' when there is no plan, or when every item is already completed (nothing to
+// resume — surfacing a fully-done list would just be noise).
+function formatSessionPlanForPrompt(plan) {
+  const todos = _normalizeTodos(plan);
+  if (!todos.length) return '';
+  const open = todos.filter((t) => t.status !== 'completed');
+  if (!open.length) return '';
+  const mark = { completed: '[x]', in_progress: '[~]', pending: '[ ]' };
+  const lines = todos.map((t) => `- ${mark[t.status] || '[ ]'} ${t.content}`).join('\n');
+  return `\n\n<session_plan>
+You already laid out a plan earlier in THIS coding session (persisted across turns). CONTINUE it — do not re-investigate work you have already done, and do not restart from scratch. Pick up the first unfinished item, apply it, and call update_todos to mark items completed as you finish. Current state:
+${lines}
+</session_plan>`;
+}
+module.exports = {
+  readSessionPlan,
+  writeSessionPlan,
+  formatSessionPlanForPrompt,
+  KEY_PREFIX,
+  MAX_TODOS,
+};

package/template/wall-e/coding-orchestrator.js CHANGED Viewed

@@ -161,6 +161,7 @@ const { createCodingTranscript } = require('./coding/transcript-writer');
 const { createCodingCapabilities } = require('./coding/capability-broker');
 const { gitRootFor } = require('./chat/code-review-context');
 const { recordSessionWorkspace, recordSessionAnchor } = require('./coding/session-workspaces');
+const { readSessionPlan, writeSessionPlan } = require('./coding/session-plan');
 const {
   CompactionService,
   DEFAULT_CONTEXT_WINDOW,
@@ -2291,6 +2292,10 @@ async function runAgentLoop(prompt, opts = {}) {
   // agent resolves "the project / the site / what you built" to the anchor instead
   // of whatever cwd this turn happens to carry (session c3f3af97).
   const sessionAnchor = recordSessionAnchor(opts.brain, groundingSessionKey, gitRoot || resolvedCwd);
+  // Resume the session's plan/todos: persisted by update_todos in earlier turns, it
+  // is surfaced in the system prompt so "continue"/"go ahead" picks up the open
+  // checklist instead of re-investigating from scratch (session c3f3af97).
+  const sessionPlan = readSessionPlan(opts.brain, groundingSessionKey);
   const systemPrompt = buildAgentSystemPrompt({
     resolvedCwd,
     projectInfo: projectInfo && projectInfo.type !== 'unknown' ? projectInfo : null,
@@ -2308,6 +2313,7 @@ async function runAgentLoop(prompt, opts = {}) {
       environment: { gitRoot, isGitRepo: !!gitRoot, platform: process.platform },
       sessionWorkspaces,
       sessionAnchor,
+      sessionPlan,
     },
   });
@@ -2819,7 +2825,7 @@ async function runAgentLoop(prompt, opts = {}) {
           mode: opts.mode || '',
           runtimeMode: runtimeMode.id,
           interactive: opts.interactive,
-          onTodos: (todos) => { currentTodos = todos; },
+          onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
           // Forward the turn's abort signal so user Stop kills an in-flight
           // run_shell child, and a heartbeat so a long command shows live motion.
           signal: runtimeCtx.signal,
@@ -3431,7 +3437,7 @@ async function runAgentLoop(prompt, opts = {}) {
               runtimeMode: runtimeMode.id,
               llmCtx,
               interactive: opts.interactive,
-              onTodos: (todos) => { currentTodos = todos; },
+              onTodos: (todos) => { currentTodos = todos; writeSessionPlan(opts.brain, groundingSessionKey, todos); },
             }),
           });
           emitActionMemoryDecision(execution, turn);
@@ -4860,7 +4866,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
         if (diffErr.code !== 1) throw diffErr;
       }
       const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
-      const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
+      const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Codex <noreply@openai.com>`;
       const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
       // Extract commit hash
       const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);

package/template/wall-e/coding-prompts.js CHANGED Viewed

@@ -15,6 +15,7 @@ const {
   routeArtifactCapabilities,
 } = require('./coding/capability-router');
 const { buildResponseLanguagePolicy } = require('./context/response-language');
+const { formatSessionPlanForPrompt } = require('./coding/session-plan');
 /**
  * Coding-agent system + subtask prompt builders.
@@ -260,10 +261,13 @@ This coding session is anchored to its home project: ${sessionAnchor}
 Treat that as the session's primary project. When the user refers to "the project", "the app", "the site", or something you built earlier without naming a directory, resolve it under the anchor — not whichever directory this turn happens to point at.${currentRoot && sessionAnchor && currentRoot !== sessionAnchor ? `\nNOTE: the current working directory (${currentRoot}) is a DIFFERENT project than the anchor. Say which project you are acting on before you change files.` : ''}
 </session_anchor>`
     : '';
+  // Resume the session's open plan/todos (persisted across turns) so a bare
+  // "continue" / "go ahead" picks up the checklist instead of restarting (c3f3af97).
+  const planBlock = formatSessionPlanForPrompt(runtimeContext.sessionPlan);
   const body = `You are an expert software engineer executing a coding task. Use the provided tools to actually do the work — describing what to do is not completing the task.${projectCtx}${projectSkillCtx}
-${envBlock}${anchorBlock}${workspacesBlock}
+${envBlock}${anchorBlock}${workspacesBlock}${planBlock}
 ${largeTaskCtx}
 # Runtime role
@@ -274,7 +278,10 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
 1. Explore first. Use read_file / list_directory / lsp_symbols to learn the relevant code BEFORE editing it. Reading three files cheaply beats one wrong edit.
 2. When acting on a non-trivial change, call update_todos to lay out steps, then mark each completed as you go. This keeps long sessions on track.
-3. Edit, don't create. Prefer edit_file (or multi_edit / apply_patch) on existing files. Only write_file when a new file is genuinely required by the task. NEVER create README/docs files unless explicitly asked.
+3. Edit the real file in place — never clone it. Make changes to the exact file the task names, at its real path, with edit_file / multi_edit / apply_patch. Only write_file when a NEW file is genuinely required. NEVER create README/docs files unless explicitly asked.
+   - Do NOT duplicate the target into a copy or variant directory (e.g. site-final, site-improved, site-v2, *-fixed, *-backup, a timestamped dir) as a "safe" place to work. A working copy strands your edits in a throwaway tree the user never sees, branches the session's work, and forces you to re-investigate which copy is current next turn. If you are worried about breaking the file, rely on git/snapshots — not a hand-made copy.
+   - If the task explicitly tells you to keep your version in a SEPARATE folder, create that ONE destination a single time, then keep editing THAT SAME path on every later turn. Never make a fresh copy each turn (no \`site-v2-fixed-$(date)\`), and reuse a destination you already created earlier in this session instead of re-copying the source.
+   - Never use \`cp -R\`, \`cp -r\`, or shell redirects to clone a project or site as a working copy. Edit the files directly.
 4. Verify with run_shell. Run the test command, lint, or build that proves the change works. Report the actual output — "tests pass" without showing them is not verification.
 # Finishing — apply, don't ask to apply
@@ -300,7 +307,7 @@ ${memoryProtocolCtx ? `${memoryProtocolCtx}\n\n` : ''}${frontendDesignCtx ? `${f
 # Tool-selection discipline
 - Prefer dedicated tools over run_shell when one fits: Read for known paths, edit_file/multi_edit for surgical edits, list_directory over \`ls -R\`. Reserve run_shell for things only a shell can do.
-- For writing source files, use write_file/edit_file/multi_edit. These tools can write inside the current project/cwd, including temporary project directories. Do not use run_shell heredocs or redirects just to create source files.
+- For writing source files, use write_file/edit_file/multi_edit on the real target files in place. Do not use run_shell heredocs or redirects just to create source files, and do not stage your work in a scratch/temporary copy of the project — edit the actual files the task is about.
 - run_shell takes a complete shell command string in \`command\`. If you need pipes, redirects, heredocs, or \`cd ... && ...\`, put the whole shell expression in \`command\`, not in \`args\` or an interpreter \`-c\` wrapper.
 - For static HTML/CSS/JS verification, use browser_screenshot and browser_smoke_test with a \`file://\` URL for the local HTML file. If a static file server is genuinely needed, use start_static_server and check_url. For NON-static long-lived processes (dev servers, watchers, long builds), use run_shell with \`background: true\` and poll with bg_output / stop with bg_kill — never append \`&\` to a command. Never say a localhost/127.0.0.1 preview is live, back up, HTTP 200, or reachable unless the current turn has successful start_static_server/check_url/browser_screenshot/browser_smoke_test evidence for that URL; localhost evidence is Wall-E host loopback only, not phone/remote-browser proof.
 - Multiple INDEPENDENT tool calls can run in parallel — use that to keep the loop fast. SEQUENTIAL calls (each depends on the previous result) must run one at a time.