npm - @ranger-testing/ranger-cli - Versions diffs - 1.0.12 → 1.0.14 - Mend

@ranger-testing/ranger-cli 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/README.md +28 -65
package/build/cli.js +105 -102
package/build/cli.js.map +1 -1
package/build/commands/addEnv.js +1 -1
package/build/commands/addEnv.js.map +1 -1
package/build/commands/authEncrypt.js +7 -6
package/build/commands/authEncrypt.js.map +1 -1
package/build/commands/clean.js +1 -1
package/build/commands/clean.js.map +1 -1
package/build/commands/config.js +5 -4
package/build/commands/config.js.map +1 -1
package/build/commands/dataMcpServer.js +1 -1
package/build/commands/dataMcpServer.js.map +1 -1
package/build/commands/env.js +17 -10
package/build/commands/env.js.map +1 -1
package/build/commands/feature.js +208 -273
package/build/commands/feature.js.map +1 -1
package/build/commands/index.js +3 -0
package/build/commands/index.js.map +1 -1
package/build/commands/postEditHook.js +25 -0
package/build/commands/postEditHook.js.map +1 -0
package/build/commands/preCompactHook.js +85 -0
package/build/commands/preCompactHook.js.map +1 -0
package/build/commands/sessionStartHook.js +64 -0
package/build/commands/sessionStartHook.js.map +1 -0
package/build/commands/skillup.js +21 -21
package/build/commands/skillup.js.map +1 -1
package/build/commands/start.js +1 -1
package/build/commands/start.js.map +1 -1
package/build/commands/status.js +30 -44
package/build/commands/status.js.map +1 -1
package/build/commands/update.js +32 -40
package/build/commands/update.js.map +1 -1
package/build/commands/updateEnv.js +1 -1
package/build/commands/updateEnv.js.map +1 -1
package/build/commands/useEnv.js +1 -1
package/build/commands/useEnv.js.map +1 -1
package/build/commands/utils/browserSessionsApi.js +1 -1
package/build/commands/utils/browserSessionsApi.js.map +1 -1
package/build/commands/utils/cliSecret.js +1 -1
package/build/commands/utils/environment.js +0 -6
package/build/commands/utils/environment.js.map +1 -1
package/build/commands/utils/featureApi.js +68 -24
package/build/commands/utils/featureApi.js.map +1 -1
package/build/commands/utils/featureReportGenerator.js +37 -3
package/build/commands/utils/featureReportGenerator.js.map +1 -1
package/build/commands/utils/keychain.js +1 -1
package/build/commands/utils/keychain.js.map +1 -1
package/build/commands/utils/localAgentInstallationsApi.js +1 -1
package/build/commands/utils/mcpConfig.js +1 -1
package/build/commands/utils/rangerRoot.js +30 -0
package/build/commands/utils/rangerRoot.js.map +1 -0
package/build/commands/utils/settings.js +7 -5
package/build/commands/utils/settings.js.map +1 -1
package/build/commands/utils/skillContent.js +28 -0
package/build/commands/utils/skillContent.js.map +1 -0
package/build/commands/utils/skills.js +1 -1
package/build/commands/utils/skills.js.map +1 -1
package/build/commands/utils/userApi.js +32 -0
package/build/commands/utils/userApi.js.map +1 -0
package/build/commands/verifyFeature.js +429 -104
package/build/commands/verifyFeature.js.map +1 -1
package/build/commands/verifyInBrowser.js +1 -1
package/build/commands/verifyInBrowser.js.map +1 -1
package/build/skills/bug-bash.md +31 -10
package/build/skills/feature-tracker/SKILL.md +8 -30
package/build/skills/feature-tracker/create.md +47 -38
package/build/skills/feature-tracker/start.md +4 -4
package/build/skills/feature-tracker/verify.md +10 -14
package/package.json +5 -3
package/scripts/postinstall.js +18 -0
package/build/skills/feature-tracker/manage.md +0 -145

package/build/commands/verifyFeature.js CHANGED Viewed

@@ -1,15 +1,16 @@
 import { query } from '@anthropic-ai/claude-agent-sdk';
 import { join, dirname } from 'path';
-import { readFile, readdir, appendFile, mkdir, rm } from 'fs/promises';
+import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
 import { existsSync } from 'fs';
 import { execSync } from 'child_process';
 import { tmpdir } from 'os';
 import inquirer from 'inquirer';
 import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, } from './utils/settings.js';
-import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
+import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
 import { getToken } from './utils/keychain.js';
 import { getActiveFeatureId } from './feature.js';
-import { getFeature, addChecklistItem, updateChecklistItem, } from './utils/featureApi.js';
+import { getFeature, updateChecklistItem, startSession, } from './utils/featureApi.js';
+import { getRangerDir } from './utils/rangerRoot.js';
 /**
  * Zip a directory and return the buffer
  */
@@ -26,7 +27,7 @@ async function zipDirectory(dirPath) {
  * Find the trace directory for a session
  */
 function getTraceDirectory(sessionId) {
-    return join(process.cwd(), '.ranger', 'sessions', sessionId);
+    return join(getRangerDir(), 'sessions', sessionId);
 }
 /**
  * Get the conversation file path for a session
@@ -34,24 +35,132 @@ function getTraceDirectory(sessionId) {
 function getConversationFilePath(sessionId) {
     return join(tmpdir(), 'ranger-browser-sessions', sessionId, 'conversation.jsonl');
 }
+/**
+ * Load videos from a session's videos directory
+ */
+async function loadSessionVideos(sessionDir) {
+    const videosDir = join(sessionDir, 'videos');
+    if (!existsSync(videosDir)) {
+        return [];
+    }
+    const files = await readdir(videosDir);
+    const videoFiles = files.filter((f) => f.toLowerCase().endsWith('.webm'));
+    return videoFiles.map((filename) => ({
+        filename,
+        path: join(videosDir, filename),
+    }));
+}
+/**
+ * Get mock evaluation data for debug mode
+ */
+function getMockEvaluation(outcome) {
+    const mockEvaluations = {
+        verified: {
+            success: true,
+            summary: '[DEBUG] Mock verification completed successfully.',
+            evaluation: 'verified',
+            evaluationReason: 'All checklist requirements were met.',
+        },
+        partial: {
+            success: false,
+            summary: '[DEBUG] Mock partial verification.',
+            evaluation: 'partial',
+            evaluationReason: 'Some requirements were not fully verified.',
+            issues: [
+                {
+                    severity: 'MINOR',
+                    type: 'OTHER',
+                    description: 'Secondary feature not fully implemented',
+                },
+            ],
+        },
+        incomplete: {
+            success: false,
+            summary: '[DEBUG] Mock incomplete verification.',
+            evaluation: 'partial',
+            evaluationReason: 'Implementation is incomplete and needs additional work.',
+            issues: [
+                {
+                    severity: 'MAJOR',
+                    type: 'OTHER',
+                    description: 'Feature is partially implemented but missing key functionality',
+                },
+                {
+                    severity: 'MINOR',
+                    type: 'OTHER',
+                    description: 'UI elements present but not fully functional',
+                },
+            ],
+        },
+        blocked: {
+            success: false,
+            summary: '[DEBUG] Mock blocked verification.',
+            evaluation: 'blocked',
+            evaluationReason: 'HTTP 404 - Page not found.',
+            issues: [
+                {
+                    severity: 'BLOCKER',
+                    type: 'HTTP_404',
+                    description: 'Target page returns 404 Not Found',
+                },
+                {
+                    severity: 'MAJOR',
+                    type: 'NAVIGATION_ERROR',
+                    description: 'Unable to proceed due to missing page',
+                },
+            ],
+        },
+        failed: {
+            success: false,
+            summary: '[DEBUG] Mock failed verification.',
+            evaluation: 'failed',
+            evaluationReason: 'Browser automation failed with timeout error.',
+            issues: [
+                {
+                    severity: 'BLOCKER',
+                    type: 'OTHER',
+                    description: 'Timeout waiting for element',
+                },
+            ],
+        },
+    };
+    return mockEvaluations[outcome];
+}
+/**
+ * Get the debug mode prompt for minimal browser interaction
+ */
+function getDebugPrompt() {
+    return `You are testing browser automation. Your task is simple:
+1. Navigate to https://www.mozilla.org using browser_navigate
+2. Take a snapshot with browser_snapshot to see the page
+3. Take a screenshot named "01_mozilla-homepage.png" using browser_take_screenshot
+4. Return immediately with the structured output
+Return your findings in the structured output format.`;
+}
 /**
  * Prompt user to select a checklist item
  */
 async function selectChecklistItem(items) {
+    if (items.length === 0) {
+        return null;
+    }
     const choices = items.map((item, i) => {
         const emoji = item.status === 'verified'
             ? '\u2705'
-            : item.status === 'blocked'
-                ? '\ud83d\uded1'
-                : item.status === 'canceled'
-                    ? '\u26d4'
-                    : '\u2b1c';
+            : item.status === 'incomplete'
+                ? '\ud83d\udfe0' // orange circle
+                : item.status === 'blocked'
+                    ? '\ud83d\uded1'
+                    : item.status === 'canceled'
+                        ? '\u26d4'
+                        : '\u2b1c';
         return {
             name: `${i + 1}. ${emoji} ${item.description}`,
             value: item.id,
         };
     });
-    choices.push({ name: '+ Add new item', value: '__new__' });
     const { selected } = await inquirer.prompt([
         {
             type: 'list',
@@ -60,18 +169,49 @@ async function selectChecklistItem(items) {
             choices,
         },
     ]);
-    if (selected === '__new__') {
-        return { item: null, addNew: true };
+    return items.find((i) => i.id === selected) || null;
+}
+/**
+ * Handle incomplete verification - check if all other items are terminal and prompt user
+ */
+async function handleIncompleteItem(featureId, incompleteItem, result) {
+    const feature = await getFeature(featureId);
+    const sessionItems = feature.checklistItems.filter((i) => i.sessionId === feature.currentSessionId);
+    const otherItems = sessionItems.filter((i) => i.id !== incompleteItem.id);
+    const allOthersTerminal = otherItems.every((i) => i.status === 'verified' ||
+        i.status === 'blocked' ||
+        i.status === 'canceled' ||
+        i.status === 'incomplete');
+    console.log(`\n${'='.repeat(60)}`);
+    console.log(`INCOMPLETE - Verification found issues`);
+    console.log(`${'='.repeat(60)}`);
+    // Display structured list of issues
+    if (result.issues && result.issues.length > 0) {
+        console.log(`\nIssues found:`);
+        for (const issue of result.issues) {
+            console.log(`  • ${issue.description}`);
+        }
     }
-    return {
-        item: items.find((i) => i.id === selected) || null,
-        addNew: false,
-    };
+    else if (result.evaluationReason) {
+        console.log(`\nReason: ${result.evaluationReason}`);
+    }
+    console.log(`\nNext steps:`);
+    console.log(`  1. Fix the issues above in your code`);
+    console.log(`  2. Run 'ranger verify-feature' again to re-verify`);
+    if (allOthersTerminal && otherItems.length > 0) {
+        console.log(`\nAll other checklist items are complete.`);
+        console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
+    }
+    console.log(`${'='.repeat(60)}\n`);
 }
 /**
  * Verify a checklist item in the browser
  */
-export async function verifyFeature(url, options) {
+export async function verifyFeature(options) {
+    const isDebugMode = !!options.debugOutcome;
+    if (isDebugMode) {
+        console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
+    }
     // 1. Check for active feature
     const featureId = await getActiveFeatureId();
     if (!featureId) {
@@ -80,77 +220,104 @@ export async function verifyFeature(url, options) {
     // Load feature details
     const feature = await getFeature(featureId);
     console.log(`\nActive feature: ${feature.name} (${featureId})`);
+    // Filter to only items in the current session
+    const currentSessionId = feature.currentSessionId;
+    const currentSessionItems = currentSessionId
+        ? feature.checklistItems.filter((item) => item.sessionId === currentSessionId)
+        : feature.checklistItems;
     // 2. Determine which checklist item we're verifying
     let checklistItem = null;
     let taskDescription = options.task;
-    if (options.newItem) {
-        // Create a new item with the provided description
-        checklistItem = await addChecklistItem(featureId, {
-            description: options.newItem,
-        });
-        console.log(`Created new checklist item: ${checklistItem.description}`);
-        if (!taskDescription) {
-            taskDescription = options.newItem;
-        }
-    }
-    else if (options.item !== undefined) {
-        // Use specified item index
+    if (options.item !== undefined) {
+        // Use specified item index (1-based, relative to current session items)
         const itemIndex = options.item - 1; // 1-based to 0-based
-        if (itemIndex < 0 || itemIndex >= feature.checklistItems.length) {
-            throw new Error(`Invalid item index: ${options.item}. Feature has ${feature.checklistItems.length} items.`);
+        if (itemIndex < 0 || itemIndex >= currentSessionItems.length) {
+            throw new Error(`Invalid item index: ${options.item}. Current session has ${currentSessionItems.length} items.`);
         }
-        checklistItem = feature.checklistItems[itemIndex];
+        checklistItem = currentSessionItems[itemIndex];
         if (!taskDescription) {
             taskDescription = checklistItem.description;
         }
     }
     else {
-        // Interactive selection
-        const { item, addNew } = await selectChecklistItem(feature.checklistItems);
-        if (addNew) {
-            const { description } = await inquirer.prompt([
-                {
-                    type: 'input',
-                    name: 'description',
-                    message: 'Enter new item description:',
-                    validate: (input) => input.trim() ? true : 'Description is required',
-                },
-            ]);
-            checklistItem = await addChecklistItem(featureId, {
-                description: description.trim(),
+        // Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
+        const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
+        if (!isInteractive) {
+            // Non-TTY mode: require --item flag, show available items
+            console.log('\nNon-interactive mode detected. The --item flag is required.');
+            console.log('\nAvailable checklist items for current session:');
+            currentSessionItems.forEach((item, i) => {
+                const emoji = item.status === 'verified'
+                    ? '\u2705'
+                    : item.status === 'incomplete'
+                        ? '\ud83d\udfe0' // orange circle
+                        : item.status === 'blocked'
+                            ? '\ud83d\uded1'
+                            : item.status === 'canceled'
+                                ? '\u26d4'
+                                : '\u2b1c';
+                console.log(`  ${i + 1}. ${emoji} ${item.description}`);
             });
-            console.log(`Created new checklist item: ${checklistItem.description}`);
-            if (!taskDescription) {
-                taskDescription = checklistItem.description;
-            }
+            console.log('\nUsage: ranger verify-feature --item <number>');
+            console.log('Example: ranger verify-feature --item 1');
+            throw new Error('The --item flag is required in non-interactive mode. See available items above.');
         }
-        else {
-            checklistItem = item;
-            if (!taskDescription && checklistItem) {
-                taskDescription = checklistItem.description;
-            }
+        // Interactive selection (show only current session items)
+        checklistItem = await selectChecklistItem(currentSessionItems);
+        if (!taskDescription && checklistItem) {
+            taskDescription = checklistItem.description;
         }
     }
     if (!checklistItem) {
-        throw new Error('No checklist item selected');
+        throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
     }
     if (!taskDescription) {
         throw new Error('No task description provided');
     }
     console.log(`\nVerifying: ${checklistItem.description}`);
     console.log(`Task: ${taskDescription}`);
-    // 3. Load active environment
-    const activeEnvPath = join(process.cwd(), '.ranger', 'active-env.txt');
-    if (!existsSync(activeEnvPath)) {
-        throw new Error('No active environment. Run: ranger use <env-name>');
+    // Start the session if it's in ready status
+    if (feature.currentSession &&
+        feature.currentSession.status === 'ready' &&
+        feature.currentSessionId) {
+        try {
+            await startSession(featureId, feature.currentSessionId);
+        }
+        catch (error) {
+            // Ignore if session is already started (race condition)
+            const message = error instanceof Error ? error.message : String(error);
+            if (!message.includes('already')) {
+                throw error;
+            }
+        }
     }
-    const activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
-    const envDir = join(process.cwd(), '.ranger', activeEnv);
+    // Update checklist item status to verification_in_progress
+    await updateChecklistItem(featureId, checklistItem.id, {
+        status: 'verification_in_progress',
+    });
+    // 3. Determine which environment to use (same pattern as verifyInBrowser)
+    let activeEnv;
+    if (options.env) {
+        activeEnv = options.env;
+    }
+    else {
+        const activeEnvPath = join(getRangerDir(), 'active-env.txt');
+        if (!existsSync(activeEnvPath)) {
+            throw new Error('No active environment. Run: ranger use <env-name>');
+        }
+        activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
+    }
+    const envDir = join(getRangerDir(), activeEnv);
     if (!existsSync(envDir)) {
         throw new Error(`Environment not found at ${envDir}. Run: ranger add env ${activeEnv}`);
     }
     const settings = await loadSettings(activeEnv);
     const resolvedSettings = resolveEnvVars(settings);
+    // Get base URL from settings
+    const url = resolvedSettings.baseUrl;
+    if (!url) {
+        throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
+    }
     // 4. Create browser session
     const token = await getToken();
     if (!token) {
@@ -161,6 +328,8 @@ export async function verifyFeature(url, options) {
         settings: resolvedSettings,
         task: taskDescription,
         url,
+        featureId,
+        checklistItemId: checklistItem.id,
     });
     console.log(`Browser session created: ${browserSession.id}`);
     const configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
@@ -195,23 +364,79 @@ export async function verifyFeature(url, options) {
         throw new Error(errorMsg);
     }
     // 5. UI Verifier + Evaluation Agent prompt
-    const verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
+    let verifierPrompt;
+    if (isDebugMode) {
+        verifierPrompt = getDebugPrompt();
+    }
+    else {
+        const notesSection = checklistItem.notes
+            ? `\n\n## Additional Notes\n${checklistItem.notes}`
+            : '';
+        verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
 ## Checklist Item to Verify
-${checklistItem.description}
+${checklistItem.description}${notesSection}
 ## Task to Execute
 ${taskDescription}
-## URL
-${url}
+CRITICAL URL REQUIREMENT:
+Your base URL is: ${url}
+- You may ONLY navigate to paths under this base URL (same protocol, host, and port)
+- For example, if the base URL is "http://localhost:3000", you can navigate to "http://localhost:3000/home", "http://localhost:3000/settings", etc.
+- DO NOT navigate to any different domain, host, or port under any circumstances
+- IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
+- If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
+- The base URL above is the ONLY authorized environment for this verification
 ## Instructions
-1. Navigate to the URL using browser_navigate
+1. Navigate to the URL above using browser_navigate
 2. Take a snapshot with browser_snapshot to see the page
-3. Execute the task step-by-step using browser tools
-4. Document any issues found (bugs, errors, unexpected behavior)
-5. After completing the verification, evaluate whether the result adequately verifies the checklist item
+3. **IMMEDIATELY check for blocking HTTP errors before proceeding**
+4. Execute the task step-by-step using browser tools
+5. **Take screenshots at key moments** (see Screenshot Guidelines below)
+6. Document any issues found (bugs, errors, unexpected behavior)
+7. After completing the verification, evaluate whether the result adequately verifies the checklist item
+## Screenshot Guidelines - IMPORTANT
+Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
+**When to take screenshots (use browser_take_screenshot):**
+- After initial page load (capture starting state)
+- Before and after clicking buttons or submitting forms
+- When important UI elements appear (modals, notifications, loading states)
+- After navigation to new pages
+- When verifying specific elements exist
+- At the final state showing the completed action
+**Screenshot naming:**
+- Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
+- Number prefixes (01_, 02_, etc.) help maintain chronological order
+- For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
+- The "key_" prefix marks screenshots as high-priority evidence for human reviewers
+**Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
+## Critical: Early Error Detection
+After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP errors:
+**Blocking errors to detect:**
+- HTTP 404: "404", "Not Found", "Page not found", "does not exist"
+- HTTP 500: "500", "Internal Server Error", "Server Error", "Something went wrong"
+- HTTP 400: "400", "Bad Request", "Invalid request"
+**Also check for:**
+- Framework error pages (Next.js error boundary, React error page, "Application error")
+- Completely blank/empty pages with no content
+- "Cannot GET /path" messages
+**If ANY blocking error is detected:**
+1. DO NOT continue with the task
+2. Return IMMEDIATELY with evaluation: "blocked"
+3. Set evaluationReason to describe the specific error (e.g., "HTTP 404 - Page not found at /dashboard")
+4. Include the error in issues array with severity: "BLOCKER" and appropriate type (HTTP_404, HTTP_500, HTTP_400, or NAVIGATION_ERROR)
+This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
 ## Evaluation Criteria
 - VERIFIED: The task completed successfully and the checklist item requirements are fully met
@@ -220,6 +445,7 @@ ${url}
 - FAILED: The task could not be completed due to errors
 Return your findings in the structured output format with your evaluation.`;
+    }
     const outputSchema = {
         type: 'object',
         properties: {
@@ -239,6 +465,16 @@ Return your findings in the structured output format with your evaluation.`;
                             type: 'string',
                             enum: ['BLOCKER', 'MAJOR', 'MINOR'],
                         },
+                        type: {
+                            type: 'string',
+                            enum: [
+                                'HTTP_404',
+                                'HTTP_500',
+                                'HTTP_400',
+                                'NAVIGATION_ERROR',
+                                'OTHER',
+                            ],
+                        },
                         description: { type: 'string' },
                         screenshot: { type: 'string' },
                     },
@@ -264,7 +500,6 @@ Return your findings in the structured output format with your evaluation.`;
                 type: 'json_schema',
                 schema: outputSchema,
             },
-            maxTurns: 25,
             env: {
                 ...process.env,
                 ANTHROPIC_API_KEY: anthropicApiKey,
@@ -275,6 +510,8 @@ Return your findings in the structured output format with your evaluation.`;
     // 7. Collect messages
     let finalResult = null;
     let agentError = null;
+    // Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
+    let lastStructuredOutputInput = null;
     const conversationFilePath = getConversationFilePath(browserSession.id);
     const conversationDir = dirname(conversationFilePath);
     await mkdir(conversationDir, { recursive: true });
@@ -297,13 +534,25 @@ Return your findings in the structured output format with your evaluation.`;
                             // Ignore
                         }
                         const msg = message;
+                        // Capture StructuredOutput tool call input as fallback
+                        // This handles SDK bug where structured_output is not populated in result
+                        if (msg.type === 'assistant' && msg.message?.content) {
+                            for (const block of msg.message.content) {
+                                if (block.type === 'tool_use' &&
+                                    block.name === 'StructuredOutput' &&
+                                    block.input) {
+                                    lastStructuredOutputInput =
+                                        block.input;
+                                }
+                            }
+                        }
                         if (msg.error) {
                             let errorText = msg.error;
                             if (msg.message?.content &&
                                 Array.isArray(msg.message.content)) {
                                 const texts = msg.message.content
                                     .filter((c) => c.type === 'text')
-                                    .map((c) => c.text)
+                                    .map((c) => c.text || '')
                                     .filter(Boolean);
                                 if (texts.length > 0) {
                                     errorText = texts.join(' ');
@@ -318,7 +567,15 @@ Return your findings in the structured output format with your evaluation.`;
                                     message.structured_output;
                             }
                             else if (message.subtype !== 'success') {
-                                if (!agentError) {
+                                // SDK bug workaround: If we got error_during_execution but have
+                                // a StructuredOutput tool call, use that instead
+                                if (lastStructuredOutputInput &&
+                                    message.errors?.length === 0) {
+                                    finalResult = lastStructuredOutputInput;
+                                    // Clear the error since we actually succeeded
+                                    agentError = null;
+                                }
+                                else if (!agentError) {
                                     agentError =
                                         message.errors?.join(', ') ||
                                             'Unknown error';
@@ -334,27 +591,60 @@ Return your findings in the structured output format with your evaluation.`;
             agentError = error instanceof Error ? error.message : String(error);
         }
         const durationMs = Date.now() - startTime;
-        // 8. Upload trace and update session
+        // 8. Upload trace, videos, screenshots with metadata, and update session
         let traceDownloadUrl;
         try {
             const traceDir = getTraceDirectory(browserSession.id);
             if (existsSync(traceDir)) {
                 const files = await readdir(traceDir);
                 if (files.length > 0) {
+                    // Upload trace zip
                     const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
                     const traceBuffer = await zipDirectory(traceDir);
                     await uploadTrace(traceUrls.uploadUrl, traceBuffer);
                     traceDownloadUrl = traceUrls.downloadUrl;
+                    // Upload videos from videos/ subdirectory
+                    const videos = await loadSessionVideos(traceDir);
+                    for (const video of videos) {
+                        try {
+                            const videoBuffer = await readFile(video.path);
+                            const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
+                            await uploadVideo(videoUrls.uploadUrl, videoBuffer);
+                        }
+                        catch {
+                            // Ignore individual video upload errors
+                        }
+                    }
+                    // Upload screenshots (same approach as main, with metadata)
                     const pngFiles = files.filter((f) => f.toLowerCase().endsWith('.png'));
                     for (const pngFile of pngFiles) {
                         try {
                             const pngPath = join(traceDir, pngFile);
                             const pngBuffer = await readFile(pngPath);
-                            const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png');
+                            const pngStat = await stat(pngPath);
+                            // Detect "key_" prefix for high-priority screenshots
+                            const isKeyFrame = pngFile
+                                .toLowerCase()
+                                .startsWith('key_');
+                            const displayName = pngFile
+                                .replace(/\.png$/i, '')
+                                .replace(/^key_/i, '')
+                                .replace(/^\d+_/, '')
+                                .replace(/-/g, ' ');
+                            const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png', {
+                                metadata: {
+                                    name: displayName,
+                                    description: isKeyFrame
+                                        ? 'Key moment captured during verification'
+                                        : 'Screenshot captured during verification',
+                                    highPriority: isKeyFrame,
+                                    timestamp: pngStat.mtime.toISOString(),
+                                },
+                            });
                             await uploadScreenshot(pngUrls.uploadUrl, pngBuffer);
                         }
                         catch {
-                            // Ignore
+                            // Ignore individual screenshot upload errors
                         }
                     }
                 }
@@ -390,39 +680,74 @@ Return your findings in the structured output format with your evaluation.`;
         catch {
             // Ignore upload errors
         }
-        // 9. Update checklist item based on evaluation
-        // Use typedResult from outer scope for the evaluation
-        const resultForEval = finalResult;
-        if (resultForEval && checklistItem) {
-            const evaluation = resultForEval.evaluation;
-            if (evaluation === 'verified') {
-                await updateChecklistItem(featureId, checklistItem.id, {
-                    status: 'verified',
-                    browserSessionId: browserSession.id,
-                });
-                console.log(`\n\u2705 Checklist item verified!`);
-            }
-            else if (evaluation === 'blocked') {
-                await updateChecklistItem(featureId, checklistItem.id, {
-                    status: 'blocked',
-                    browserSessionId: browserSession.id,
-                    blockedReason: resultForEval.evaluationReason,
-                });
-                console.log(`\n\ud83d\uded1 Checklist item blocked: ${resultForEval.evaluationReason}`);
+        // 9. Determine the result to use for evaluation
+        // In debug mode, use mock evaluation; otherwise use agent result
+        let resultForEval;
+        if (isDebugMode && options.debugOutcome) {
+            const mockEval = getMockEvaluation(options.debugOutcome);
+            resultForEval = {
+                ...mockEval,
+                sessionId: browserSession.id,
+                sessionDir: getTraceDirectory(browserSession.id),
+                durationMs,
+                traceViewerUrl: traceDownloadUrl
+                    ? buildTraceViewerUrl(traceDownloadUrl)
+                    : undefined,
+                checklistItemId: checklistItem.id,
+            };
+            console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
+        }
+        else {
+            const typedResult = finalResult;
+            if (agentError && !typedResult) {
+                throw new Error(`Verification failed: ${agentError}`);
             }
-            else if (evaluation === 'partial' || evaluation === 'failed') {
-                // Keep pending but link session
-                await updateChecklistItem(featureId, checklistItem.id, {
-                    browserSessionId: browserSession.id,
-                });
-                console.log(`\n\u26a0\ufe0f ${evaluation === 'partial' ? 'Partial verification' : 'Verification failed'}: ${resultForEval.evaluationReason}`);
+            if (!typedResult) {
+                throw new Error('No result received from agent');
             }
+            resultForEval = typedResult;
+        }
+        // 10. Update checklist item based on evaluation
+        const evaluation = resultForEval.evaluation;
+        if (evaluation === 'verified') {
+            await updateChecklistItem(featureId, checklistItem.id, {
+                status: 'verified',
+                browserSessionId: browserSession.id,
+            });
+            console.log(`\n\u2705 Checklist item verified!`);
         }
-        if (agentError && !resultForEval) {
-            throw new Error(`Verification failed: ${agentError}`);
+        else if (evaluation === 'blocked') {
+            await updateChecklistItem(featureId, checklistItem.id, {
+                status: 'blocked',
+                browserSessionId: browserSession.id,
+                blockedReason: resultForEval.evaluationReason,
+            });
+            // Enhanced output for Claude Code
+            console.log(`\n${'='.repeat(60)}`);
+            console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
+            console.log(`${'='.repeat(60)}`);
+            console.log(`\nIssue: ${resultForEval.evaluationReason}`);
+            if (resultForEval.issues?.length) {
+                console.log(`\nDetails:`);
+                for (const issue of resultForEval.issues) {
+                    const typeStr = issue.type ? ` (${issue.type})` : '';
+                    console.log(`  - [${issue.severity}]${typeStr} ${issue.description}`);
+                }
+            }
+            if (resultForEval.traceViewerUrl) {
+                console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
+            }
+            console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
+            console.log(`${'='.repeat(60)}\n`);
         }
-        if (!resultForEval) {
-            throw new Error('No result received from agent');
+        else if (evaluation === 'partial' || evaluation === 'failed') {
+            // Mark as incomplete - verification happened but requirements not fully met
+            await updateChecklistItem(featureId, checklistItem.id, {
+                status: 'incomplete',
+                browserSessionId: browserSession.id,
+            });
+            // Check if other items are terminal and prompt user
+            await handleIncompleteItem(featureId, checklistItem, resultForEval);
         }
         return resultForEval;
     }