npm - edsger - Versions diffs - 0.56.3 → 0.58.0 - Mend

edsger 0.56.3 → 0.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/dist/api/chat.js +55 -2
package/dist/api/cross-product.d.ts +8 -1
package/dist/api/cross-product.js +44 -1
package/dist/api/intelligence.js +98 -0
package/dist/api/issues/get-issue.js +26 -0
package/dist/api/issues/issue-utils.js +52 -0
package/dist/api/issues/test-cases.js +89 -14
package/dist/api/issues/update-issue.js +46 -8
package/dist/api/issues/user-stories.js +89 -14
package/dist/api/products/test-cases.d.ts +18 -0
package/dist/api/products/test-cases.js +51 -0
package/dist/api/products.js +21 -0
package/dist/api/release-test-cases.js +38 -0
package/dist/api/releases.js +86 -0
package/dist/api/tasks.js +41 -4
package/dist/api/test-reports.js +22 -4
package/dist/api/user-psychology.d.ts +101 -0
package/dist/api/user-psychology.js +143 -0
package/dist/auth/auth-store.d.ts +33 -0
package/dist/auth/auth-store.js +39 -0
package/dist/commands/agent-workflow/chat-worker.js +187 -15
package/dist/commands/agent-workflow/processor.d.ts +11 -0
package/dist/commands/agent-workflow/processor.js +81 -2
package/dist/commands/product-test-cases/index.d.ts +12 -0
package/dist/commands/product-test-cases/index.js +40 -0
package/dist/commands/screen-flow/index.d.ts +16 -0
package/dist/commands/screen-flow/index.js +45 -0
package/dist/commands/user-psychology/index.d.ts +7 -0
package/dist/commands/user-psychology/index.js +51 -0
package/dist/index.js +65 -0
package/dist/phases/analyze-logs/index.js +27 -6
package/dist/phases/bug-fixing/context-fetcher.js +26 -5
package/dist/phases/find-features/index.js +53 -9
package/dist/phases/find-shared/mcp.js +21 -0
package/dist/phases/growth-analysis/context.d.ts +5 -3
package/dist/phases/growth-analysis/context.js +52 -5
package/dist/phases/output-contracts.js +140 -0
package/dist/phases/pr-resolve/github-reply.d.ts +5 -2
package/dist/phases/pr-resolve/github-reply.js +19 -3
package/dist/phases/pr-resolve/index.js +19 -5
package/dist/phases/pr-resolve/prompts.js +17 -18
package/dist/phases/pr-shared/agent-utils.d.ts +11 -3
package/dist/phases/pr-shared/agent-utils.js +48 -4
package/dist/phases/product-test-cases/index.d.ts +25 -0
package/dist/phases/product-test-cases/index.js +174 -0
package/dist/phases/product-test-cases/prompts.d.ts +24 -0
package/dist/phases/product-test-cases/prompts.js +80 -0
package/dist/phases/product-test-cases/types.d.ts +17 -0
package/dist/phases/product-test-cases/types.js +27 -0
package/dist/phases/screen-flow/index.d.ts +23 -0
package/dist/phases/screen-flow/index.js +285 -0
package/dist/phases/screen-flow/mcp-server.d.ts +195 -0
package/dist/phases/screen-flow/mcp-server.js +262 -0
package/dist/phases/screen-flow/prompts.d.ts +19 -0
package/dist/phases/screen-flow/prompts.js +41 -0
package/dist/phases/screen-flow/theme.d.ts +19 -0
package/dist/phases/screen-flow/theme.js +193 -0
package/dist/phases/screen-flow/types.d.ts +130 -0
package/dist/phases/screen-flow/types.js +81 -0
package/dist/phases/user-psychology/agent.d.ts +16 -0
package/dist/phases/user-psychology/agent.js +105 -0
package/dist/phases/user-psychology/context.d.ts +10 -0
package/dist/phases/user-psychology/context.js +65 -0
package/dist/phases/user-psychology/index.d.ts +18 -0
package/dist/phases/user-psychology/index.js +96 -0
package/dist/phases/user-psychology/prompts.d.ts +2 -0
package/dist/phases/user-psychology/prompts.js +41 -0
package/dist/services/audit-logs.js +67 -9
package/dist/services/branches.js +90 -14
package/dist/services/phase-ratings.js +71 -9
package/dist/services/product-logs.js +65 -5
package/dist/services/pull-requests.js +74 -14
package/dist/skills/phase/screen-flow/SKILL.md +78 -0
package/dist/skills/phase/user-psychology/SKILL.md +135 -0
package/dist/supabase/client.d.ts +23 -0
package/dist/supabase/client.js +90 -0
package/dist/system/session-manager.js +97 -24
package/dist/types/index.d.ts +3 -0
package/dist/utils/logger.js +24 -4
package/package.json +4 -3
package/vitest.config.ts +1 -0

package/dist/phases/output-contracts.js CHANGED Viewed

@@ -499,6 +499,92 @@ You MUST return ONLY a JSON object. Do NOT include any text before or after the
 - "frame_background": customize the gradient/color behind the device (e.g., "linear-gradient(135deg, #667eea 0%, #764ba2 100%)")
 - "frame_browser_url": set a realistic URL for browser frames (e.g., "app.yourproduct.com/dashboard")
 - If should_generate_video is false, scenes array should be empty
+`,
+    'user-psychology': `
+**CRITICAL - Result Format**:
+You MUST return ONLY a JSON object inside a \`\`\`json code block. Do NOT include any text before or after the JSON.
+\`\`\`json
+{
+  "analysis": {
+    "product_id": "PRODUCT_ID",
+    "status": "success",
+    "analysis_content": "3-4 sentence executive summary of who these users are and what they really care about. Concrete, not generic.",
+    "target_personas": [
+      {
+        "name": "Asha, the burned-out solo founder",
+        "archetype": "One-line description of who they are and what they do",
+        "demographics": {
+          "role": "Solo founder of a 1-5 person SaaS",
+          "seniority": "5-10 years experience",
+          "context": "Other specifics that matter (team size, tools, stage, etc.)"
+        },
+        "goals": ["Goal 1", "Goal 2"],
+        "frustrations": ["Frustration 1 in their voice", "Frustration 2"],
+        "values": "What they believe makes them good at their job",
+        "decision_drivers": ["What tips them toward yes", "Another driver"],
+        "anti_persona_note": "Who looks similar but is the wrong fit",
+        "evidence": "Which feature/file/context-line supports this persona"
+      }
+    ],
+    "jobs_to_be_done": [
+      {
+        "statement": "When [situation], I want to [motivation], so I can [outcome].",
+        "type": "functional|emotional|social",
+        "current_alternatives": ["Competitor / spreadsheet / nothing"],
+        "switching_cost": "What makes it hard to switch",
+        "persona": "Which persona this job belongs to (name from target_personas)"
+      }
+    ],
+    "pain_points": [
+      {
+        "pain": "Single sentence in the user's voice",
+        "trigger": "What event makes the pain acute",
+        "severity": "critical|chronic|occasional",
+        "evidence": "Which feature, file, or context line implies this pain"
+      }
+    ],
+    "motivations": {
+      "autonomy": "How the product serves their need for control / removes oversight (or 'not addressed')",
+      "competence": "How the product makes them feel capable",
+      "relatedness": "How the product connects them to others or signals belonging"
+    },
+    "behavior_triggers": [
+      {
+        "behavior": "Specific desired action (e.g., 'invite first teammate')",
+        "motivation_level": "high|medium|low",
+        "motivation_reason": "Why",
+        "ability_barrier": "What is hard about doing it",
+        "prompt": "What cue would trigger this right now",
+        "recommendation": "Concrete change to lift motivation, reduce barrier, or improve prompt"
+      }
+    ],
+    "messaging_angles": [
+      {
+        "angle_name": "Short label",
+        "hook": "The headline in 10-15 words, in the user's voice",
+        "persona": "Persona name this speaks to",
+        "job": "JTBD statement (or short reference) this answers",
+        "psychological_lever": "loss_aversion|social_proof|identity_affirmation|curiosity_gap|status|reciprocity|other",
+        "why_it_works": "One sentence on the lever"
+      }
+    ]
+  }
+}
+\`\`\`
+**Required field rules**:
+- 3-5 personas (collapse overlapping ones)
+- 4-8 jobs_to_be_done — cover at least one emotional and one social job, not just functional
+- 3-8 pain_points
+- behavior_triggers should target 3-5 high-value actions (signup, activation, retention, expansion)
+- 3-6 messaging_angles, each tied to a real persona + job
+**Anti-rules — these fail validation**:
+- No placeholder text ([role], [insert benefit], "users want to be productive")
+- No demographic-only personas — psychographics or skip the field
+- No "users struggle with X" phrasing in pain_points — write in the user's voice
+- No JTBD that is just a feature description ("user wants to use Feature Y")
 `,
     'intelligence-analysis': `
 **Output Format**:
@@ -809,5 +895,59 @@ You MUST end your response with a JSON object containing the code refine results
   }
 }
 \`\`\`
+`,
+    'screen-flow': `
+**CRITICAL — How to return the result**:
+Return the extraction by calling the MCP tool
+\`mcp__screen-flow__submit_screen_flow\` **exactly once** with three arguments:
+- \`summary\` — 1-3 sentence narrative of what kind of app this is and its primary user flows
+- \`nodes\` — array of ScreenSchema objects (every user-facing screen, modal, drawer, tab, or named state)
+- \`edges\` — array of ScreenEdge objects (transitions between screens)
+The tool validates the arguments against the schema. If it returns an error,
+fix the issue it describes and call the tool again. After a successful call,
+end your turn — do not also paste the same data as a fenced text block.
+You can also call \`mcp__screen-flow__record_progress({ phase, message })\` at
+each phase boundary (detection / routing / screens / transitions / submission)
+to keep the user informed during long runs. This is observability only — it
+does not affect the extraction.
+ScreenSchema fields:
+- \`slug\` (unique within the flow), \`name\`, \`route?\`, \`file?\`
+- \`kind\`: one of \`page\`, \`modal\`, \`drawer\`, \`tab\`, \`state\`
+- \`layout\`: one of \`centered\`, \`sidebar\`, \`split\`, \`list-detail\`, \`tabs\`, \`stacked\`
+- \`header?\`: \`{ title, subtitle?, back?, actions?: [{ label, variant?, icon? }] }\`
+- \`body\`: array of sections; each section \`type\` is one of \`form\`, \`list\`, \`card-grid\`, \`table\`, \`kanban\`, \`text\`, \`image\`, \`chart\`, \`stats\`, \`empty-state\`, \`tabs\`, \`custom\`
+ScreenEdge fields:
+- \`fromSlug\`, \`toSlug\` (both MUST appear in nodes), \`triggerLabel\`, \`triggerFile?\`
+- \`kind\`: one of \`navigate\`, \`modal\`, \`redirect\`, \`back\`
+Schematic example of the tool call:
+\`\`\`
+submit_screen_flow({
+  summary: "Two-screen demo: sign in then land on home.",
+  nodes: [
+    { slug: "login", name: "Login", route: "/signin", file: "src/pages/Login.tsx",
+      kind: "page", layout: "centered",
+      header: { title: "Sign in", actions: [{ label: "Sign up", variant: "ghost" }] },
+      body: [{ type: "form", submitLabel: "Sign in", fields: [
+        { label: "Email", kind: "email", required: true },
+        { label: "Password", kind: "password", required: true }
+      ]}]
+    },
+    { slug: "home", name: "Home", route: "/", file: "src/pages/Home.tsx",
+      kind: "page", layout: "sidebar", body: [] }
+  ],
+  edges: [
+    { fromSlug: "login", toSlug: "home", triggerLabel: "Submit credentials",
+      triggerFile: "src/pages/Login.tsx", kind: "navigate" }
+  ]
+})
+\`\`\`
 `,
 };

package/dist/phases/pr-resolve/github-reply.d.ts CHANGED Viewed

@@ -3,10 +3,13 @@
  * Reuses GraphQL patterns from code-refine-verification.
  */
 import { type Octokit } from '@octokit/rest';
+export declare function buildResolveMarker(action: 'changed' | 'skipped'): string;
+export declare function hasResolveMarker(body: string | undefined | null): boolean;
 /**
- * Reply to a review thread on GitHub using GraphQL.
+ * Reply to a review thread on GitHub using GraphQL. Appends a marker so the
+ * next run can detect that we've already responded to this thread.
  */
-export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, verbose?: boolean): Promise<boolean>;
+export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, action: 'changed' | 'skipped', verbose?: boolean): Promise<boolean>;
 /**
  * Resolve a review thread on GitHub using GraphQL.
  */

package/dist/phases/pr-resolve/github-reply.js CHANGED Viewed

@@ -4,9 +4,24 @@
  */
 import { logError, logInfo } from '../../utils/logger.js';
 /**
- * Reply to a review thread on GitHub using GraphQL.
+ * Marker appended to every reply we post so subsequent runs can recognise
+ * their own prior comments and avoid posting duplicates.
  */
-export async function replyToReviewThread(octokit, threadId, body, verbose) {
+const RESOLVE_MARKER_PREFIX = '<!-- edsger:pr-resolve';
+export function buildResolveMarker(action) {
+    return `${RESOLVE_MARKER_PREFIX}:${action} -->`;
+}
+export function hasResolveMarker(body) {
+    if (!body) {
+        return false;
+    }
+    return body.includes(RESOLVE_MARKER_PREFIX);
+}
+/**
+ * Reply to a review thread on GitHub using GraphQL. Appends a marker so the
+ * next run can detect that we've already responded to this thread.
+ */
+export async function replyToReviewThread(octokit, threadId, body, action, verbose) {
     try {
         const mutation = `
       mutation($threadId: ID!, $body: String!) {
@@ -20,7 +35,8 @@ export async function replyToReviewThread(octokit, threadId, body, verbose) {
         }
       }
     `;
-        await octokit.graphql(mutation, { threadId, body });
+        const bodyWithMarker = `${body}\n\n${buildResolveMarker(action)}`;
+        await octokit.graphql(mutation, { threadId, body: bodyWithMarker });
         if (verbose) {
             logInfo(`Replied to thread ${threadId}`);
         }

package/dist/phases/pr-resolve/index.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { fetchUnresolvedReviewThreads } from '../code-refine-verification/github
 import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
 import { parsePullRequestUrl } from '../pr-shared/context.js';
 import { learnFromReviewFeedback } from './checklist-learner.js';
-import { replyToReviewThread, resolveReviewThread } from './github-reply.js';
+import { hasResolveMarker, replyToReviewThread, resolveReviewThread, } from './github-reply.js';
 import { createResolveSystemPrompt, createResolveUserPrompt, } from './prompts.js';
 import { isResolveResult } from './types.js';
 import { hasNewCommits, hasUncommittedChanges, prepareWorkspace, pushChanges, } from './workspace.js';
@@ -35,12 +35,26 @@ export async function resolveStandalonePR(options) {
         const octokit = new Octokit({ auth: githubToken });
         // Fetch unresolved review threads
         logInfo('Fetching unresolved review threads...');
-        const unresolvedThreads = await fetchUnresolvedReviewThreads(octokit, owner, repo, prInfo.prNumber, verbose);
+        const allUnresolvedThreads = await fetchUnresolvedReviewThreads(octokit, owner, repo, prInfo.prNumber, verbose);
+        // Skip threads whose last comment already carries our marker — they were
+        // handled in a previous pr-resolve run and replying again would just spam.
+        // If a human has commented after our reply, the last comment will no
+        // longer be ours and the thread will be picked up again.
+        const unresolvedThreads = allUnresolvedThreads.filter((thread) => {
+            const lastComment = thread.comments.nodes[thread.comments.nodes.length - 1];
+            return !hasResolveMarker(lastComment?.body);
+        });
+        const alreadyHandled = allUnresolvedThreads.length - unresolvedThreads.length;
+        if (alreadyHandled > 0) {
+            logInfo(`Skipping ${alreadyHandled} thread(s) already addressed in a previous resolve run`);
+        }
         if (unresolvedThreads.length === 0) {
             logSuccess('No unresolved review threads found.');
             return {
                 status: 'success',
-                message: 'No unresolved review threads to resolve',
+                message: alreadyHandled > 0
+                    ? `All ${alreadyHandled} unresolved thread(s) were already addressed in a previous resolve run`
+                    : 'No unresolved review threads to resolve',
                 threadsAddressed: 0,
                 threadsSkipped: 0,
             };
@@ -162,7 +176,7 @@ export async function resolveStandalonePR(options) {
                     }
                     // eslint-disable-next-line max-depth
                     try {
-                        const replied = await replyToReviewThread(octokit, threadId, comment.reply, verbose);
+                        const replied = await replyToReviewThread(octokit, threadId, comment.reply, comment.action, verbose);
                         // eslint-disable-next-line max-depth
                         if (replied && comment.action === 'changed') {
                             // Resolve the thread since the change was made
@@ -192,7 +206,7 @@ export async function resolveStandalonePR(options) {
                         const genericReply = agentMadeChanges
                             ? 'Changes were made to address review feedback. Please re-review.'
                             : 'Reviewed this comment. No changes were made at this time.';
-                        const replied = await replyToReviewThread(octokit, thread.id, genericReply, verbose);
+                        const replied = await replyToReviewThread(octokit, thread.id, genericReply, agentMadeChanges ? 'changed' : 'skipped', verbose);
                         // eslint-disable-next-line max-depth
                         if (replied) {
                             threadsSkipped++;

package/dist/phases/pr-resolve/prompts.js CHANGED Viewed

@@ -8,27 +8,26 @@
 export function createResolveSystemPrompt() {
     return `You are an expert software engineer resolving code review feedback on a pull request.
-**Your Goal**: For each review comment, evaluate whether the suggested change genuinely improves the code. If it does, make the change. If you disagree, do NOT make the change.
+**Your Goal**: For each review comment, make the change unless the reviewer is factually wrong or has misunderstood the code. The default is to accept the feedback.
-**Decision Criteria - Make the change when**:
-- The suggestion fixes a real bug or logic error
-- The suggestion improves correctness, security, or error handling
-- The suggestion makes the code clearer or more maintainable
-- The suggestion follows established best practices for the language/framework
+**Make the change when** (this is the default — apply it broadly):
+- The suggestion would improve the code in any way: correctness, security, error handling, clarity, maintainability, performance, design, naming, structure, tests, docs
+- The suggestion aligns with best practices for the language or framework
+- Apply the change even if it is large, touches many files, or requires non-trivial refactoring — workload is not a reason to skip
-**Skip the change when**:
-- The suggestion is purely stylistic preference without clear benefit
-- The suggestion would increase complexity without proportional value
-- The suggestion conflicts with the codebase's established patterns
-- You disagree with the technical rationale
+**Skip the change ONLY when** (the bar is high — be conservative about skipping):
+- The reviewer is factually wrong (e.g., claims the code does X when it actually does Y, or asserts a behavior that does not exist)
+- The reviewer has misunderstood the code's purpose, the surrounding context, or how this piece interacts with other parts of the system
+- Following the suggestion would actually make the code worse or introduce a regression
+Personal preference, "I'd prefer a different style", "this is more complex than I'd like", or "this conflicts with a pattern I prefer" are NOT valid reasons to skip. If the change would make the code better, do it.
 **Process**:
 1. Read all the review comments carefully
-2. For each comment, examine the relevant code
-3. If you agree: make the change in the file
-4. If you disagree: skip it (do NOT modify the file for that comment)
-5. After making all changes, commit them with a descriptive message summarizing what was resolved (do NOT push)
-6. After committing, output a JSON summary
+2. For each comment, examine the relevant code so you actually understand what it does
+3. Default: make the change in the file. Only skip if you can articulate a specific factual error or misunderstanding by the reviewer.
+4. After making all changes, commit them with a descriptive message summarizing what was resolved (do NOT push)
+5. After committing, output a JSON summary
 **CRITICAL - Result Format**:
 After making all changes, you MUST output a JSON result. Use the exact comment_id from each comment (comment_1, comment_2, etc.):
@@ -56,7 +55,7 @@ After making all changes, you MUST output a JSON result. Use the exact comment_i
 **Reply Guidelines**:
 - For "changed": briefly describe what was changed (1-2 sentences)
-- For "skipped": provide a clear, respectful technical explanation of why the current code is better (2-3 sentences)
+- For "skipped": clearly explain the specific factual error or misunderstanding — point to the exact line, behavior, or invariant the reviewer got wrong (2-3 sentences). Do not skip with a vague "I disagree" — name the misunderstanding.
 - Be professional and constructive in all replies
 - You MUST include an entry for EVERY comment_id`;
 }
@@ -101,7 +100,7 @@ export function createResolveUserPrompt(unresolvedThreads) {
     sections.push('## Instructions');
     sections.push('');
     sections.push('For each comment above, read the referenced file and evaluate the suggestion.');
-    sections.push('Make changes only when they genuinely improve the code. Skip changes you disagree with.');
+    sections.push('Default to making the change — even if it is large or touches many files. Only skip when the reviewer is factually wrong or has misunderstood the code, and explain the specific misunderstanding in your reply.');
     sections.push('After processing all comments, output the JSON resolve_result with your decisions and reply messages.');
     sections.push(`Use the exact comment IDs: ${Array.from(commentIdToThreadId.keys()).join(', ')}`);
     return { prompt: sections.join('\n'), commentIdToThreadId };

package/dist/phases/pr-shared/agent-utils.d.ts CHANGED Viewed

@@ -24,16 +24,24 @@ export declare function createPromptGenerator(prompt: string): AsyncGenerator<{
 }>;
 /**
  * Extract text content from assistant message content array.
+ *
+ * When `verbose`, also surfaces tool_use / tool_result blocks via
+ * logDebug so it's visible whether the agent is making MCP / file /
+ * bash calls — without these, a long-running session looks frozen
+ * between text emissions.
  */
 export declare function extractTextFromContent(content: any[], verbose?: boolean): string;
 /**
  * Try to parse a JSON result from agent response text.
- * Looks for ```json code blocks first, then falls back to raw JSON parsing.
- * Returns the parsed object or null on failure.
+ * Tries a custom fenceTag (e.g. ```screen_flow) first when provided, then
+ * ```json, then falls back to raw JSON parsing. Returns the parsed object or
+ * null on failure.
  */
-export declare function tryParseJsonFromResponse(responseText: string): unknown | null;
+export declare function tryParseJsonFromResponse(responseText: string, fenceTag?: string): unknown | null;
 /**
  * Extract a specific keyed result from agent response.
  * e.g., tryExtractResult(text, 'review_result') extracts the review_result key.
+ * The key is also tried as the fenced code-block tag so phases whose output
+ * contract uses a custom fence (e.g. ```screen_flow) parse correctly.
  */
 export declare function tryExtractResult(responseText: string, key: string): unknown | null;

package/dist/phases/pr-shared/agent-utils.js CHANGED Viewed

@@ -23,6 +23,11 @@ export async function* createPromptGenerator(prompt) {
 }
 /**
  * Extract text content from assistant message content array.
+ *
+ * When `verbose`, also surfaces tool_use / tool_result blocks via
+ * logDebug so it's visible whether the agent is making MCP / file /
+ * bash calls — without these, a long-running session looks frozen
+ * between text emissions.
  */
 export function extractTextFromContent(
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -33,16 +38,50 @@ content, verbose) {
             text += `${item.text}\n`;
             logDebug(item.text, verbose);
         }
+        else if (verbose && item.type === 'tool_use') {
+            logDebug(`→ ${item.name}(${previewJson(item.input)})`, verbose);
+        }
+        else if (verbose && item.type === 'tool_result') {
+            const preview = Array.isArray(item.content)
+                ? item.content
+                    .filter((c) => c?.type === 'text')
+                    .map((c) => c.text ?? '')
+                    .join(' ')
+                : String(item.content ?? '');
+            const flag = item.is_error ? '✗' : '←';
+            logDebug(`${flag} ${truncate(preview, 200)}`, verbose);
+        }
     }
     return text;
 }
+function previewJson(value, max = 200) {
+    try {
+        return truncate(JSON.stringify(value), max);
+    }
+    catch {
+        return truncate(String(value), max);
+    }
+}
+function truncate(text, max) {
+    if (text.length <= max) {
+        return text;
+    }
+    return `${text.slice(0, max - 1)}…`;
+}
 /**
  * Try to parse a JSON result from agent response text.
- * Looks for ```json code blocks first, then falls back to raw JSON parsing.
- * Returns the parsed object or null on failure.
+ * Tries a custom fenceTag (e.g. ```screen_flow) first when provided, then
+ * ```json, then falls back to raw JSON parsing. Returns the parsed object or
+ * null on failure.
  */
-export function tryParseJsonFromResponse(responseText) {
+export function tryParseJsonFromResponse(responseText, fenceTag = 'json') {
     try {
+        if (fenceTag !== 'json') {
+            const taggedMatch = responseText.match(new RegExp(`\`\`\`${escapeRegExp(fenceTag)}\\s*\\n([\\s\\S]*?)\\n\\s*\`\`\``));
+            if (taggedMatch) {
+                return JSON.parse(taggedMatch[1]);
+            }
+        }
         const jsonBlockMatch = responseText.match(/```json\s*\n([\s\S]*?)\n\s*```/);
         return jsonBlockMatch
             ? JSON.parse(jsonBlockMatch[1])
@@ -55,9 +94,11 @@ export function tryParseJsonFromResponse(responseText) {
 /**
  * Extract a specific keyed result from agent response.
  * e.g., tryExtractResult(text, 'review_result') extracts the review_result key.
+ * The key is also tried as the fenced code-block tag so phases whose output
+ * contract uses a custom fence (e.g. ```screen_flow) parse correctly.
  */
 export function tryExtractResult(responseText, key) {
-    const parsed = tryParseJsonFromResponse(responseText);
+    const parsed = tryParseJsonFromResponse(responseText, key);
     if (parsed &&
         typeof parsed === 'object' &&
         key in parsed) {
@@ -66,3 +107,6 @@ export function tryExtractResult(responseText, key) {
     // If top-level has the expected shape, return the whole thing
     return parsed;
 }
+function escapeRegExp(value) {
+    return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}

package/dist/phases/product-test-cases/index.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Product-test-cases phase: clone the product's repo, ask Claude to draft a
+ * product-level regression suite (deduping against existing cases), and save
+ * the new ones as drafts via MCP. Approved test cases are NEVER touched —
+ * only draft/pending_approval entries may be replaced.
+ *
+ * Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
+ * run a bounded Claude session, persist via MCP, cleanup on success.
+ */
+export interface GenerateProductTestCasesOptions {
+    productId: string;
+    githubToken: string;
+    owner: string;
+    repo: string;
+    branch?: string;
+    verbose?: boolean;
+}
+export interface GenerateProductTestCasesResult {
+    status: 'success' | 'error';
+    message: string;
+    createdCount?: number;
+    deletedCount?: number;
+    summary?: string;
+}
+export declare function generateProductTestCases(options: GenerateProductTestCasesOptions): Promise<GenerateProductTestCasesResult>;

package/dist/phases/product-test-cases/index.js ADDED Viewed

@@ -0,0 +1,174 @@
+/**
+ * Product-test-cases phase: clone the product's repo, ask Claude to draft a
+ * product-level regression suite (deduping against existing cases), and save
+ * the new ones as drafts via MCP. Approved test cases are NEVER touched —
+ * only draft/pending_approval entries may be replaced.
+ *
+ * Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
+ * run a bounded Claude session, persist via MCP, cleanup on success.
+ */
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import { batchDeleteTestCases } from '../../api/issues/batch-operations.js';
+import { createProductTestCases, getProductTestCases, } from '../../api/products/test-cases.js';
+import { DEFAULT_MODEL } from '../../constants.js';
+import { logError, logInfo, logSuccess, logWarning, } from '../../utils/logger.js';
+import { cleanupIssueRepo, cloneIssueRepo, ensureWorkspaceDir, syncRepoToRef, } from '../../workspace/workspace-manager.js';
+import { detectDefaultBranch } from '../find-shared/git.js';
+import { fetchProductBasics } from '../find-shared/mcp.js';
+import { createScanStateModule } from '../find-shared/scan-state.js';
+import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
+import { createProductTestCasesSystemPrompt, createProductTestCasesUserPrompt, } from './prompts.js';
+import { isProductTestCasesAgentResult, } from './types.js';
+const WORKSPACE_KEY = 'product-test-cases';
+// Generation is read-heavy and open-scope. 200 turns matches find-bugs and is
+// enough for a mid-sized repo while still bounding cost.
+const MAX_TURNS = 200;
+// Per-product lock so two concurrent invocations (e.g. user clicks twice in
+// the UI, or CLI + desktop fire at once) don't race on the shared clone dir.
+// We don't persist any state besides the lock — generation is stateless.
+const lockModule = createScanStateModule({
+    dirName: 'product-test-cases-state',
+});
+// UUID regex matching MCP-issued ids — defensive filter before we trust
+// strings the agent puts in deleted_test_case_ids.
+const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
+// eslint-disable-next-line complexity
+export async function generateProductTestCases(options) {
+    const { productId, githubToken, owner, repo, verbose } = options;
+    logInfo(`Starting product test-cases generation for product ${productId} (${owner}/${repo})`);
+    const lock = lockModule.acquireLock(productId);
+    if (!lock) {
+        logWarning(`Another product test-cases generation is already running for product ${productId}; skipping.`);
+        return {
+            status: 'error',
+            message: 'Another product test-cases generation is already running for this product',
+        };
+    }
+    let repoPath;
+    let succeeded = false;
+    try {
+        const workspaceRoot = ensureWorkspaceDir();
+        const repoKey = `${WORKSPACE_KEY}-${productId}`;
+        ({ repoPath } = cloneIssueRepo(workspaceRoot, repoKey, owner, repo, githubToken));
+        const branch = options.branch ?? detectDefaultBranch(repoPath);
+        logInfo(`Syncing ${owner}/${repo} to branch ${branch}`);
+        syncRepoToRef(repoPath, { branch }, githubToken);
+        const [product, existing] = await Promise.all([
+            fetchProductBasics(productId),
+            getProductTestCases(productId, verbose),
+        ]);
+        const approved = [];
+        const replaceable = [];
+        for (const tc of existing) {
+            const slot = {
+                id: tc.id,
+                name: tc.name,
+                description: tc.description,
+                is_critical: tc.is_critical,
+                status: tc.status ?? 'draft',
+            };
+            if (slot.status === 'approved') {
+                approved.push(slot);
+            }
+            else {
+                replaceable.push(slot);
+            }
+        }
+        const replaceableIds = new Set(replaceable.map((tc) => tc.id));
+        logInfo(`Existing test cases: ${approved.length} approved (locked), ${replaceable.length} replaceable`);
+        const systemPrompt = createProductTestCasesSystemPrompt();
+        const userPrompt = createProductTestCasesUserPrompt({
+            productName: product.name,
+            productDescription: product.description,
+            approvedTestCases: approved,
+            replaceableTestCases: replaceable,
+        });
+        let lastAssistantResponse = '';
+        let agentResult = null;
+        logInfo('Running Claude agent to draft test cases...');
+        for await (const message of query({
+            prompt: createPromptGenerator(userPrompt),
+            options: {
+                systemPrompt: {
+                    type: 'preset',
+                    preset: 'claude_code',
+                    append: systemPrompt,
+                },
+                model: DEFAULT_MODEL,
+                maxTurns: MAX_TURNS,
+                permissionMode: 'bypassPermissions',
+                cwd: repoPath,
+            },
+        })) {
+            if (message.type === 'assistant') {
+                lastAssistantResponse += extractTextFromContent(message.message?.content ?? [], verbose);
+                continue;
+            }
+            if (message.type !== 'result') {
+                continue;
+            }
+            const responseText = message.subtype === 'success'
+                ? message.result || lastAssistantResponse
+                : lastAssistantResponse;
+            const parsed = tryExtractResult(responseText, 'test_cases_result');
+            if (isProductTestCasesAgentResult(parsed)) {
+                agentResult = parsed;
+            }
+            else if (message.subtype !== 'success') {
+                logError(`Agent run incomplete: ${message.subtype}`);
+            }
+        }
+        if (!agentResult) {
+            return {
+                status: 'error',
+                message: 'Test cases generation failed: could not parse a test_cases_result from the agent',
+            };
+        }
+        // Apply deletions. Filter to only ids that are valid UUIDs AND belong
+        // to the replaceable set — never trust the agent's word that an id
+        // is deletable. Approved cases must never be removed.
+        let deletedCount = 0;
+        const requestedDeletes = (agentResult.deleted_test_case_ids ?? []).filter((id) => typeof id === 'string' && UUID_RE.test(id) && replaceableIds.has(id));
+        if (requestedDeletes.length > 0) {
+            logInfo(`Deleting ${requestedDeletes.length} obsolete replaceable test cases...`);
+            const ok = await batchDeleteTestCases(requestedDeletes, verbose);
+            if (ok) {
+                deletedCount = requestedDeletes.length;
+            }
+            else {
+                logWarning('Some deletions failed; leaving any remaining cases in place.');
+            }
+        }
+        const { createdIds } = await createProductTestCases(productId, agentResult.created_test_cases.map((tc) => ({
+            name: tc.name,
+            description: tc.description,
+            is_critical: tc.is_critical ?? false,
+        })), verbose);
+        logSuccess(`Created ${createdIds.length} test cases, deleted ${deletedCount} obsolete entries. ${agentResult.summary}`);
+        succeeded = true;
+        return {
+            status: 'success',
+            message: `Generated ${createdIds.length} test cases (deleted ${deletedCount})`,
+            createdCount: createdIds.length,
+            deletedCount,
+            summary: agentResult.summary,
+        };
+    }
+    catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        logError(`Product test cases generation failed: ${message}`);
+        return {
+            status: 'error',
+            message: `Product test cases generation failed: ${message}`,
+        };
+    }
+    finally {
+        if (succeeded) {
+            cleanupIssueRepo(repoPath);
+        }
+        else if (repoPath) {
+            logInfo(`Workspace preserved for inspection: ${repoPath}`);
+        }
+        lock.release();
+    }
+}