npm - prizmkit - Versions diffs - 1.1.70 → 1.1.74 - Mend

prizmkit 1.1.70 → 1.1.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/bundled/dev-pipeline/run-feature.sh CHANGED Viewed

@@ -77,13 +77,16 @@ FEATURE_LIST=""
 # Branch tracking (for cleanup on interrupt)
 _ORIGINAL_BRANCH=""
 _DEV_BRANCH_NAME=""
+_SPAWN_FEATURE_SLUG=""
+_SPAWN_EXIT_CODE=0
 # ============================================================
 # Shared: Spawn an AI CLI session and wait for result
 # ============================================================
 # Spawns an AI CLI session with heartbeat + timeout, waits for completion,
-# checks session status, and updates feature status.
+# and checks session status. Canonical status updates happen after the caller
+# returns to the original branch.
 #
 # Arguments:
 #   $1 - feature_id
@@ -105,6 +108,9 @@ spawn_and_wait_session() {
     local session_log="$session_dir/logs/session.log"
     local progress_json="$session_dir/logs/progress.json"
+    _SPAWN_FEATURE_SLUG=""
+    _SPAWN_EXIT_CODE=0
     local effective_model="${feature_model:-$MODEL}"
     local cbc_pid
     prizm_start_ai_session "$bootstrap_prompt" "$session_log" "$effective_model"
@@ -144,6 +150,7 @@ spawn_and_wait_session() {
     if [[ $exit_code -eq 143 ]]; then
         exit_code=124
     fi
+    _SPAWN_EXIT_CODE="$exit_code"
     # Check for stale-kill marker (heartbeat killed the process due to no progress)
     local stale_kill_marker="$session_dir/logs/stale-kill.json"
@@ -174,7 +181,28 @@ spawn_and_wait_session() {
     project_root="$PROJECT_ROOT"
     local default_branch="$base_branch"
-    if [[ $exit_code -eq 124 ]]; then
+    local semantic_finalized=false
+    local semantic_feature_slug=""
+    local semantic_commit_sha=""
+    local was_ai_runtime_error=false
+    if prizm_detect_ai_runtime_error "$session_log" "$progress_json"; then
+        was_ai_runtime_error=true
+    fi
+    if prizm_feature_semantically_complete "$feature_list" "$feature_id" "$project_root" "$default_branch" "$PRIZMKIT_DIR"; then
+        semantic_finalized=true
+        semantic_feature_slug="$PRIZM_SEMANTIC_FEATURE_SLUG"
+        semantic_commit_sha="$PRIZM_SEMANTIC_COMMIT_SHA"
+        if [[ $exit_code -ne 0 || "$was_stale_killed" == true || "$was_ai_runtime_error" == true ]]; then
+            log_warn "Session ended with a failure signal after semantic completion; treating as finalized success"
+            log_warn "Semantic completion commit: ${semantic_commit_sha:-unknown}"
+        fi
+        session_status="success"
+    elif [[ "$was_ai_runtime_error" == true ]]; then
+        log_warn "Session failed due to structured AI runtime/context error"
+        log_warn "AI runtime errors are retried without consuming code retry budget"
+        session_status="infra_error"
+    elif [[ $exit_code -eq 124 ]]; then
         log_warn "Session timed out after ${SESSION_TIMEOUT}s"
         session_status="timed_out"
     elif [[ "$was_infra_error" == true ]]; then
@@ -222,15 +250,31 @@ spawn_and_wait_session() {
     # ── Post-success validation ──────────────────────────────────────────
     if [[ "$session_status" == "success" ]]; then
         if git -C "$project_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
-            # Auto-commit any remaining dirty files produced during the session
             local dirty_files=""
             dirty_files=$(git -C "$project_root" status --porcelain 2>/dev/null || true)
             if [[ -n "$dirty_files" ]]; then
-                log_info "Auto-committing remaining session artifacts..."
-                git -C "$project_root" add -A 2>/dev/null || true
-                git -C "$project_root" commit --no-verify --amend --no-edit 2>/dev/null \
-                    || git -C "$project_root" commit --no-verify -m "chore($feature_id): include remaining session artifacts" 2>/dev/null \
-                    || true
+                if [[ "$semantic_finalized" == true ]]; then
+                    local post_completion_slug="$semantic_feature_slug"
+                    if [[ -z "$post_completion_slug" ]]; then
+                        post_completion_slug=$(prizm_feature_slug_from_list "$feature_list" "$feature_id" 2>/dev/null || true)
+                    fi
+                    if [[ -n "$post_completion_slug" ]] && prizm_preserve_post_completion_dirty "$project_root" "$PRIZMKIT_DIR/specs/${post_completion_slug}" "$feature_id" "$session_id"; then
+                        log_warn "Post-completion dirty changes preserved under $PRIZMKIT_DIR/specs/${post_completion_slug}/"
+                        log_warn "They were not included in the finalized feature commit."
+                    else
+                        log_warn "Could not safely preserve post-completion dirty changes; preserving dev branch for manual finalization"
+                        session_status="finalization_needed"
+                    fi
+                else
+                    # Auto-commit any remaining dirty files produced during a normal
+                    # clean success path. Semantic finalization explicitly avoids this
+                    # so delayed post-commit findings cannot be merged into main.
+                    log_info "Auto-committing remaining session artifacts..."
+                    git -C "$project_root" add -A 2>/dev/null || true
+                    git -C "$project_root" commit --no-verify --amend --no-edit 2>/dev/null \
+                        || git -C "$project_root" commit --no-verify -m "chore($feature_id): include remaining session artifacts" 2>/dev/null \
+                        || true
+                fi
             fi
         fi
     fi
@@ -242,7 +286,10 @@ spawn_and_wait_session() {
     # Write lightweight session summary for post-session inspection
     local feature_slug
-    feature_slug=$(python3 -c "
+    if [[ -n "$semantic_feature_slug" ]]; then
+        feature_slug="$semantic_feature_slug"
+    else
+        feature_slug=$(python3 -c "
 import json, re, sys
 flist, fid = sys.argv[1], sys.argv[2]
 with open(flist) as f:
@@ -258,9 +305,11 @@ for feat in data.get('features', []):
         sys.exit(0)
 sys.exit(1)
 " "$feature_list" "$feature_id" 2>/dev/null) || {
-        log_warn "Could not resolve feature slug for $feature_id — session summary and artifact validation will be skipped"
-        feature_slug=""
-    }
+            log_warn "Could not resolve feature slug for $feature_id — session summary and artifact validation will be skipped"
+            feature_slug=""
+        }
+    fi
+    _SPAWN_FEATURE_SLUG="$feature_slug"
     # Validate key artifacts exist after successful session
     if [[ "$session_status" == "success" && -n "$feature_slug" ]]; then
@@ -315,16 +364,6 @@ sys.exit(0)
         fi
     fi
-    # Check if session produced a failure-log for future retries
-    if [[ "$session_status" != "success" && -n "$feature_slug" ]]; then
-        local failure_log="$PRIZMKIT_DIR/specs/${feature_slug}/failure-log.md"
-        if [[ -f "$failure_log" ]]; then
-            log_info "FAILURE_LOG: Session wrote failure-log.md — will be available to next retry"
-        else
-            log_info "FAILURE_LOG: No failure-log.md written by session"
-        fi
-    fi
     # Propagate completion notes for dependency context (only on success)
     if [[ "$session_status" == "success" && -n "$feature_slug" ]]; then
         local summary_path="$PRIZMKIT_DIR/specs/$feature_slug/completion-summary.json"
@@ -342,7 +381,45 @@ sys.exit(0)
         fi
     fi
-    # Update feature status (do NOT commit on dev branch — commit happens after merge)
+    # Return status via global variable (avoids $() swallowing stdout)
+    _SPAWN_RESULT="$session_status"
+}
+finalize_feature_status_after_branch_return() {
+    local feature_id="$1"
+    local feature_list="$2"
+    local session_id="$3"
+    local session_status="$4"
+    local max_retries="$5"
+    local session_dir="$6"
+    local base_branch="${7:-main}"
+    local feature_slug="${_SPAWN_FEATURE_SLUG:-}"
+    local progress_json="$session_dir/logs/progress.json"
+    local stale_kill_marker="$session_dir/logs/stale-kill.json"
+    local exit_code="${_SPAWN_EXIT_CODE:-0}"
+    # Check if session produced a failure-log for future retries; synthesize one
+    # after branch return so canonical diagnostics live on the original branch.
+    if [[ "$session_status" != "success" && -n "$feature_slug" ]]; then
+        local failure_log="$PRIZMKIT_DIR/specs/${feature_slug}/failure-log.md"
+        local checkpoint_file_for_failure="$PRIZMKIT_DIR/specs/${feature_slug}/workflow-checkpoint.json"
+        if [[ -f "$failure_log" ]]; then
+            log_info "FAILURE_LOG: Session wrote failure-log.md — will be available to next retry"
+        else
+            prizm_synthesize_failure_log \
+                "$failure_log" "$feature_id" "$session_id" "$session_status" "$exit_code" \
+                "$stale_kill_marker" "$progress_json" "$checkpoint_file_for_failure" "$PROJECT_ROOT" "$base_branch"
+            if [[ -f "$failure_log" ]]; then
+                log_info "FAILURE_LOG: Runtime synthesized failure-log.md — will be available to next retry"
+            else
+                log_info "FAILURE_LOG: No failure-log.md written by session"
+            fi
+        fi
+    fi
+    # Update feature status on the original branch. The caller commits the
+    # resulting feature-list diff immediately after this helper returns.
     local update_output
     update_output=$(python3 "$SCRIPTS_DIR/update-feature-status.py" \
         --feature-list "$feature_list" \
@@ -357,9 +434,6 @@ sys.exit(0)
     }
     _SPAWN_ITEM_STATUS="$(printf '%s' "$update_output" | prizm_extract_update_new_status)"
-    # Return status via global variable (avoids $() swallowing stdout)
-    _SPAWN_RESULT="$session_status"
 }
 # ============================================================
@@ -896,7 +970,7 @@ else:
         else
             log_warn "Auto-merge failed — dev branch preserved: $_DEV_BRANCH_NAME"
             log_warn "Merge manually: git checkout $_ORIGINAL_BRANCH && git rebase $_DEV_BRANCH_NAME"
-            _DEV_BRANCH_NAME=""
+            session_status="merge_conflict"
         fi
     elif [[ -n "$_DEV_BRANCH_NAME" ]]; then
         # Session failed — preserve dev branch for inspection
@@ -907,6 +981,9 @@ else:
     # GUARANTEED: always return to original branch regardless of success/failure/merge outcome
     branch_ensure_return "$_proj_root" "$_ORIGINAL_BRANCH"
+    finalize_feature_status_after_branch_return \
+        "$feature_id" "$feature_list" "$session_id" "$session_status" 999 "$session_dir" "$_ORIGINAL_BRANCH"
     # Commit feature status update on the original branch (after guaranteed return)
     if ! git -C "$_proj_root" diff --quiet "$feature_list" 2>/dev/null; then
         git -C "$_proj_root" add "$feature_list"
@@ -1318,7 +1395,6 @@ DEPLOY_PROMPT_EOF
             "$feature_id" "$feature_list" "$session_id" \
             "$bootstrap_prompt" "$session_dir" "$MAX_RETRIES" "$feature_model" "$_ORIGINAL_BRANCH"
         local session_status="$_SPAWN_RESULT"
-        local item_status_after_session="${_SPAWN_ITEM_STATUS:-}"
         # Merge per-feature dev branch back to original on success
         if [[ "$session_status" == "success" && -n "$_DEV_BRANCH_NAME" ]]; then
@@ -1327,7 +1403,7 @@ DEPLOY_PROMPT_EOF
             else
                 log_warn "Auto-merge failed — dev branch preserved: $_DEV_BRANCH_NAME"
                 log_warn "Merge manually: git checkout $_ORIGINAL_BRANCH && git rebase $_DEV_BRANCH_NAME"
-                _DEV_BRANCH_NAME=""
+                session_status="merge_conflict"
             fi
         elif [[ -n "$_DEV_BRANCH_NAME" ]]; then
             # Session failed — preserve dev branch for inspection
@@ -1338,6 +1414,10 @@ DEPLOY_PROMPT_EOF
         # GUARANTEED: always return to original branch regardless of success/failure/merge outcome
         branch_ensure_return "$_proj_root" "$_ORIGINAL_BRANCH"
+        finalize_feature_status_after_branch_return \
+            "$feature_id" "$feature_list" "$session_id" "$session_status" "$MAX_RETRIES" "$session_dir" "$_ORIGINAL_BRANCH"
+        local item_status_after_session="${_SPAWN_ITEM_STATUS:-}"
         # Commit feature status update on the original branch (after guaranteed return)
         if ! git -C "$_proj_root" diff --quiet "$feature_list" 2>/dev/null; then
             git -C "$_proj_root" add "$feature_list"

package/bundled/dev-pipeline/scripts/parse-stream-progress.py CHANGED Viewed

@@ -17,6 +17,7 @@ The script runs until:
 import argparse
 import json
 import os
+import re
 import signal
 import sys
 import tempfile
@@ -59,6 +60,58 @@ PHASE_KEYWORDS = {
     },
 }
+CONTEXT_ERROR_PATTERNS = [
+    re.compile(pattern, re.IGNORECASE)
+    for pattern in (
+        r"context_too_large",
+        r"model_context_window_exceeded",
+        r"Your input exceeds the context window",
+        r"input exceeds the context window",
+        r"context window of this model",
+        r"context window exceeded",
+        r"invalid_request_error.*context window",
+        r"context window.*invalid_request_error",
+    )
+]
+ERROR_CONTEXT_PATTERNS = [
+    re.compile(pattern, re.IGNORECASE)
+    for pattern in (
+        r"\bapi error\b",
+        r"invalid_request_error",
+        r"\bstatus\s*[:=]?\s*(400|413)\b",
+        r"\bapi_error_status\b",
+        r"\bapi_error_code\b",
+        r"\blast_result_is_error\b\s*[\"':=]*\s*true\b",
+        r"\bis_error\b\s*[\"':=]*\s*true\b",
+    )
+]
+def _has_error_context(text):
+    """Return true when free text looks like a runtime/provider error."""
+    if not text:
+        return False
+    return any(pattern.search(text) for pattern in ERROR_CONTEXT_PATTERNS)
+def detect_api_error_code(text, require_error_context=False):
+    """Return a normalized fatal/runtime error code from terminal text.
+    Structured terminal result/error events and raw stderr can be matched
+    directly. Ordinary assistant prose is noisier: it may mention the phrase
+    "input exceeds the context window" while explaining a test or recovery
+    rule, so callers can require additional error-like context there.
+    """
+    if not text:
+        return ""
+    if require_error_context and not _has_error_context(text):
+        return ""
+    for pattern in CONTEXT_ERROR_PATTERNS:
+        if pattern.search(text):
+            return "context_too_large"
+    return ""
 class ProgressTracker:
     """Tracks progress state from stream-json events."""
@@ -73,6 +126,12 @@ class ProgressTracker:
         self.tool_call_counts = Counter()
         self.total_tool_calls = 0
         self.last_text_snippet = ""
+        self.last_result_is_error = False
+        self.api_error_status = None
+        self.api_error_code = ""
+        self.terminal_result_text = ""
+        self.terminal_success_at = ""
+        self.fatal_error_code = ""
         self.is_active = True
         self.errors = []
         self.event_format = ""
@@ -164,11 +223,13 @@ class ProgressTracker:
             elif event_type == "turn.failed":
                 error = event.get("error") or event.get("message") or "Codex turn failed"
                 self.errors.append(str(error))
+                self._detect_terminal_error(str(error))
                 self.current_tool = None
             elif event_type == "error":
                 error = event.get("error") or event.get("message") or "Unknown error"
                 self.errors.append(str(error))
+                self._detect_terminal_error(str(error))
             return
@@ -196,12 +257,51 @@ class ProgressTracker:
                     if text.strip():
                         self.last_text_snippet = text.strip()[:120]
                     self._detect_phase(text)
+                    self._detect_terminal_error(text, require_error_context=True)
         elif event_type == "tool_result" or event_type == "user":
             # tool_result contains output from tool execution
             self.event_format = self.event_format or "stream-json"
             self.is_active = True
+            # Check for error patterns in tool_result content (supports both formats):
+            # A) Top-level tool_result events: event["content"] is the result text
+            # B) Nested user events: event["message"]["content"][] has type=="tool_result" items
+            content_candidates = []
+            # Format A: top-level tool_result
+            if event_type == "tool_result":
+                content_candidates.append(str(event.get("content", "")))
+            # Format B: nested inside user event
+            if event_type == "user":
+                message = event.get("message", {})
+                content_list = message.get("content", [])
+                if isinstance(content_list, list):
+                    for item in content_list:
+                        if isinstance(item, dict) and item.get("type") == "tool_result":
+                            content_candidates.append(str(item.get("content", "")))
+            for result_text in content_candidates:
+                if "shorter than the provided offset" in result_text:
+                    self.errors.append({
+                        "type": "read_offset_overflow",
+                        "tool": self.current_tool,
+                        "at": datetime.now(timezone.utc).isoformat(),
+                    })
+                    break  # one error per event is enough
+                elif "Wasted call" in result_text:
+                    self.errors.append({
+                        "type": "wasted_call",
+                        "tool": self.current_tool,
+                        "at": datetime.now(timezone.utc).isoformat(),
+                    })
+                    break
+            # Keep only last 20 errors to prevent unbounded growth in progress.json
+            if len(self.errors) > 20:
+                self.errors = self.errors[-20:]
         elif event_type == "system":
             # System events (hooks, init, task notifications, etc.) — track but don't count as messages.
             self.event_format = self.event_format or "stream-json"
@@ -274,6 +374,28 @@ class ProgressTracker:
                         state.setdefault("subagent_type", "")
                     self._update_claude_subagent_status_counts()
+        elif event_type == "result":
+            self.event_format = self.event_format or "stream-json"
+            self.is_active = False
+            result_text = event.get("result") or event.get("message") or ""
+            error_obj = event.get("error")
+            if isinstance(error_obj, dict):
+                error_text = " ".join(
+                    str(error_obj.get(key) or "")
+                    for key in ("type", "code", "message")
+                    if error_obj.get(key)
+                )
+                result_text = " ".join(part for part in (str(result_text), error_text) if part)
+            api_error_code = event.get("api_error_code") or event.get("error_code") or ""
+            if isinstance(error_obj, dict) and not api_error_code:
+                api_error_code = error_obj.get("code") or error_obj.get("type") or ""
+            self._record_terminal_result(
+                text=str(result_text or ""),
+                is_error=bool(event.get("is_error")),
+                api_error_status=event.get("api_error_status"),
+                api_error_code=str(api_error_code or ""),
+            )
         # ── Claude API raw stream format ────────────────────────────
         elif event_type == "message_start":
             self.event_format = self.event_format or "stream-json"
@@ -316,6 +438,7 @@ class ProgressTracker:
                     self.last_text_snippet = stripped[:120]
                 # Try to detect phase from text
                 self._detect_phase(text)
+                self._detect_terminal_error(text, require_error_context=True)
             elif delta_type == "input_json_delta":
                 partial = delta.get("partial_json", "")
@@ -331,21 +454,73 @@ class ProgressTracker:
                 self._extract_tool_summary(full_input)
                 self._detect_phase(full_input)
             else:
-                # Text block finished - detect phase from accumulated text
+                # Text block finished - detect phase and terminal errors from accumulated text
                 if self._text_buffer:
                     self._detect_phase(self._text_buffer)
+                    self._detect_terminal_error(
+                        self._text_buffer,
+                        require_error_context=True,
+                    )
             self._in_tool_use = False
             self._current_tool_input_parts = []
         elif event_type == "error":
             error_msg = event.get("error", {}).get("message", "Unknown error")
             self.errors.append(error_msg)
+            self._detect_terminal_error(str(error_msg))
         # Check for subagent indicator
         if event.get("parent_tool_use_id"):
             # This is a sub-agent event; tool name is still tracked normally
             pass
+    def _record_terminal_result(self, text="", is_error=False, api_error_status=None, api_error_code=""):
+        """Record a Claude Code terminal result event."""
+        terminal_text = str(text or "")
+        self.last_result_is_error = bool(is_error)
+        if api_error_status not in (None, ""):
+            try:
+                self.api_error_status = int(api_error_status)
+            except (TypeError, ValueError):
+                self.api_error_status = api_error_status
+        error_like_result = (
+            self.last_result_is_error
+            or api_error_status not in (None, "")
+            or bool(api_error_code)
+            or _has_error_context(terminal_text)
+        )
+        normalized_code = detect_api_error_code(
+            " ".join([str(api_error_code or ""), terminal_text]),
+            require_error_context=not error_like_result,
+        )
+        if normalized_code:
+            self.api_error_code = normalized_code
+            self.fatal_error_code = normalized_code
+        elif api_error_code:
+            self.api_error_code = str(api_error_code)
+        self.terminal_result_text = terminal_text[:1000]
+        if terminal_text.strip():
+            self.last_text_snippet = terminal_text.strip()[:120]
+        if not self.last_result_is_error and not self.fatal_error_code:
+            self.terminal_success_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+        elif self.fatal_error_code:
+            self.errors.append(self.fatal_error_code)
+    def _detect_terminal_error(self, text, require_error_context=False):
+        """Detect fatal context-window errors from unstructured text."""
+        code = detect_api_error_code(
+            str(text or ""),
+            require_error_context=require_error_context,
+        )
+        if not code:
+            return
+        self.last_result_is_error = True
+        self.api_error_code = code
+        self.fatal_error_code = code
+        self.terminal_result_text = str(text or "")[:1000]
+        if text:
+            self.last_text_snippet = str(text).strip()[:120]
     def _detect_phase(self, text):
         """Detect pipeline phase from text content.
@@ -692,6 +867,12 @@ class ProgressTracker:
             "child_activity_signature": self.child_activity_signature,
             "last_child_activity_at": self.last_child_activity_at,
             "last_text_snippet": self.last_text_snippet,
+            "last_result_is_error": self.last_result_is_error,
+            "api_error_status": self.api_error_status,
+            "api_error_code": self.api_error_code,
+            "terminal_result_text": self.terminal_result_text,
+            "terminal_success_at": self.terminal_success_at,
+            "fatal_error_code": self.fatal_error_code,
             "is_active": self.is_active,
             "errors": self.errors[-10:],  # Keep last 10 errors
         }
@@ -728,6 +909,12 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
             state["current_phase"],
             state["total_tool_calls"],
             state.get("child_activity_signature", ""),
+            state.get("last_result_is_error"),
+            state.get("api_error_status"),
+            state.get("api_error_code", ""),
+            state.get("fatal_error_code", ""),
+            state.get("terminal_result_text", ""),
+            tuple(state.get("errors", [])),
         )
     # Wait for log file to appear
@@ -752,11 +939,19 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
                     event = json.loads(line)
                     tracker.process_event(event)
                 except json.JSONDecodeError:
-                    # Not a JSON line (could be stderr mixed in)
-                    # Use it as a text snippet if meaningful
+                    # Not a JSON line (could be stderr mixed in). Use it as a
+                    # text snippet and only treat it as terminal when it has a
+                    # strong API/runtime error marker; ordinary assistant prose
+                    # can discuss context limits without being fatal.
                     stripped = line.strip()
                     if stripped and len(stripped) > 5:
                         tracker.last_text_snippet = stripped[:120]
+                        tracker._detect_terminal_error(stripped, require_error_context=True)
+                        current_state = tracker.to_dict()
+                        current_state_key = state_key(current_state)
+                        if current_state_key != last_write_state:
+                            atomic_write_json(current_state, progress_file)
+                            last_write_state = current_state_key
                     continue
                 # Write progress if state changed

package/bundled/dev-pipeline/scripts/update-feature-status.py CHANGED Viewed

@@ -49,6 +49,7 @@ SESSION_STATUS_VALUES = [
     "commit_missing",
     "docs_missing",
     "merge_conflict",
+    "finalization_needed",
 ]
 TERMINAL_STATUSES = {"completed", "failed", "skipped", "auto_skipped", "split"}
@@ -644,7 +645,25 @@ def action_update(args, feature_list_path, state_dir):
         fs["degraded_reason"] = session_status
         fs["resume_from_phase"] = None
         fs["sessions"] = []
-        fs["last_session_id"] = None
+        if session_id:
+            fs["last_session_id"] = session_id
+            fs["last_failed_session_id"] = session_id
+        err = update_feature_in_list(feature_list_path, feature_id, new_status)
+        if err:
+            error_out("Failed to update .prizmkit/plans/feature-list.json: {}".format(err))
+            return
+    elif session_status == "finalization_needed":
+        # Runtime preserved dirty post-completion changes but could not safely
+        # clean them for automatic merge. Preserve the dev branch and stop for
+        # manual finalization instead of spending code retry budget.
+        new_status = "failed"
+        fs["degraded_reason"] = session_status
+        fs["resume_from_phase"] = None
+        fs["finalization_needed_count"] = fs.get("finalization_needed_count", 0) + 1
+        if session_id:
+            fs["last_session_id"] = session_id
+            fs["last_failed_session_id"] = session_id
         err = update_feature_in_list(feature_list_path, feature_id, new_status)
         if err:
@@ -657,6 +676,8 @@ def action_update(args, feature_list_path, state_dir):
         new_status = "pending"
         fs["infra_error_count"] = fs.get("infra_error_count", 0) + 1
         fs["last_infra_error_session_id"] = session_id
+        if session_id:
+            fs["last_session_id"] = session_id
         fs["resume_from_phase"] = None
         err = update_feature_in_list(feature_list_path, feature_id, new_status)
@@ -673,6 +694,9 @@ def action_update(args, feature_list_path, state_dir):
             new_status = "pending"
         fs["resume_from_phase"] = None
+        if session_id:
+            fs["last_session_id"] = session_id
+            fs["last_failed_session_id"] = session_id
         # Keep sessions list and last_session_id for debugging
         err = update_feature_in_list(feature_list_path, feature_id, new_status)
@@ -712,9 +736,9 @@ def action_update(args, feature_list_path, state_dir):
     }
     if auto_skipped_features:
         summary["auto_skipped"] = [info["feature_id"] for info in auto_skipped_features]
-    if session_status in ("commit_missing", "docs_missing", "merge_conflict"):
+    if session_status in ("commit_missing", "docs_missing", "merge_conflict", "finalization_needed"):
         summary["degraded_reason"] = session_status
-        summary["restart_policy"] = "finalization_retry"
+        summary["restart_policy"] = "manual_finalization" if session_status == "finalization_needed" else "finalization_retry"
     elif session_status == "infra_error":
         summary["restart_policy"] = "infra_retry"
         summary["infra_error_count"] = fs.get("infra_error_count", 0)

package/bundled/dev-pipeline/templates/agent-prompts/dev-implement.md CHANGED Viewed

@@ -1,5 +1,23 @@
 "Read {{DEV_SUBAGENT_PATH}}. Implement feature {{FEATURE_ID}} (slug: {{FEATURE_SLUG}}).
+## Task Summary Card
+**Objective**: Implement {{FEATURE_TITLE}}.
+**Primary files** (see context-snapshot.md Section 4 for full manifest):
+- Review plan.md Tasks section for the complete task-to-file mapping.
+- Each task's `— file:` suffix identifies the target file.
+**Test command**: `{{TEST_CMD}}`
+**Known baseline failures**: `{{BASELINE_FAILURES}}`
+**DO NOT**:
+- Re-read source files already listed in context-snapshot.md Section 4 File Manifest
+- Create new files unless a plan.md task explicitly requires it
+- Run git commands
+- Use mock success data or fake rows in UI/tests
 ## Required Inputs
 1. Read `.prizmkit/specs/{{FEATURE_SLUG}}/context-snapshot.md` first.
@@ -35,6 +53,9 @@ Before returning, append `## Implementation Log` to `context-snapshot.md` with:
 - Carry forward the Dev-isolated subset: skip scaffold/generated files listed in `context-snapshot.md`; verify dependency versions before install/build commands that resolve dependencies; after build/compile commands, ensure outputs are ignored and never commit generated artifacts.
 - If tests fail, follow this Test Failure Recovery subset: classify failures as baseline, new regression, brittle test, or environment/tooling; fix new regressions and brittle tests while progress is being made; document baseline failures; write `failure-log.md` for blockers.
 - Do not run git commands; staging and commit are handled by the orchestrator.
+- **Edit safety**: If an Edit fails with 'String to replace not found', grep for the target text before retrying. Never guess file offsets — verify them with a Read or grep first.
+- **Read safety**: If 3 consecutive Reads to the same file return 'shorter than offset' or 'Wasted call', STOP and report BLOCKED.
+- **Test early**: Run `{{TEST_CMD}}` after every 3 successful Edit operations. Capture output to /tmp/test-out.txt and grep for failures.
 Do not return success unless:
 1. implementation tasks are complete;

package/bundled/dev-pipeline/templates/bootstrap-tier2.md CHANGED Viewed

@@ -131,7 +131,7 @@ If MISSING — build it now:
      ```bash
      find . -maxdepth 2 -type d -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -path '*/build/*' -not -path '*/__pycache__/*' -not -path '*/vendor/*' | sed -e 's;[^/]*/;|____;g;s;____|; |;g'
      ```
-   - **Section 3 — Prizm Context**: full content of root.prizm and relevant L1/L2 docs
+   - **Section 3 — Key TRAPS & RULES**: relevant TRAPS/RULES from prizm-docs (not full copies)
    - **Section 4 — File Manifest**: For each file relevant to this feature, list: file path, why it's needed (modify/reference/test), key interface signatures (function names + params + return types). Do NOT include full file content — agents read files on-demand. Format:
      ### Files to Modify
      | File | Why Needed | Key Interfaces |