edsger 0.56.3 → 0.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/api/chat.js +55 -2
  2. package/dist/api/cross-product.d.ts +8 -1
  3. package/dist/api/cross-product.js +44 -1
  4. package/dist/api/intelligence.js +98 -0
  5. package/dist/api/issues/get-issue.js +26 -0
  6. package/dist/api/issues/issue-utils.js +52 -0
  7. package/dist/api/issues/test-cases.js +89 -14
  8. package/dist/api/issues/update-issue.js +46 -8
  9. package/dist/api/issues/user-stories.js +89 -14
  10. package/dist/api/products/test-cases.d.ts +18 -0
  11. package/dist/api/products/test-cases.js +51 -0
  12. package/dist/api/products.js +21 -0
  13. package/dist/api/release-test-cases.js +38 -0
  14. package/dist/api/releases.js +86 -0
  15. package/dist/api/tasks.js +41 -4
  16. package/dist/api/test-reports.js +22 -4
  17. package/dist/api/user-psychology.d.ts +101 -0
  18. package/dist/api/user-psychology.js +143 -0
  19. package/dist/auth/auth-store.d.ts +33 -0
  20. package/dist/auth/auth-store.js +39 -0
  21. package/dist/commands/agent-workflow/chat-worker.js +187 -15
  22. package/dist/commands/agent-workflow/processor.d.ts +11 -0
  23. package/dist/commands/agent-workflow/processor.js +81 -2
  24. package/dist/commands/product-test-cases/index.d.ts +12 -0
  25. package/dist/commands/product-test-cases/index.js +40 -0
  26. package/dist/commands/screen-flow/index.d.ts +16 -0
  27. package/dist/commands/screen-flow/index.js +45 -0
  28. package/dist/commands/user-psychology/index.d.ts +7 -0
  29. package/dist/commands/user-psychology/index.js +51 -0
  30. package/dist/index.js +65 -0
  31. package/dist/phases/analyze-logs/index.js +27 -6
  32. package/dist/phases/bug-fixing/context-fetcher.js +26 -5
  33. package/dist/phases/find-features/index.js +53 -9
  34. package/dist/phases/find-shared/mcp.js +21 -0
  35. package/dist/phases/growth-analysis/context.d.ts +5 -3
  36. package/dist/phases/growth-analysis/context.js +52 -5
  37. package/dist/phases/output-contracts.js +140 -0
  38. package/dist/phases/pr-resolve/github-reply.d.ts +5 -2
  39. package/dist/phases/pr-resolve/github-reply.js +19 -3
  40. package/dist/phases/pr-resolve/index.js +19 -5
  41. package/dist/phases/pr-resolve/prompts.js +17 -18
  42. package/dist/phases/pr-shared/agent-utils.d.ts +11 -3
  43. package/dist/phases/pr-shared/agent-utils.js +48 -4
  44. package/dist/phases/product-test-cases/index.d.ts +25 -0
  45. package/dist/phases/product-test-cases/index.js +174 -0
  46. package/dist/phases/product-test-cases/prompts.d.ts +24 -0
  47. package/dist/phases/product-test-cases/prompts.js +80 -0
  48. package/dist/phases/product-test-cases/types.d.ts +17 -0
  49. package/dist/phases/product-test-cases/types.js +27 -0
  50. package/dist/phases/screen-flow/index.d.ts +23 -0
  51. package/dist/phases/screen-flow/index.js +285 -0
  52. package/dist/phases/screen-flow/mcp-server.d.ts +195 -0
  53. package/dist/phases/screen-flow/mcp-server.js +262 -0
  54. package/dist/phases/screen-flow/prompts.d.ts +19 -0
  55. package/dist/phases/screen-flow/prompts.js +41 -0
  56. package/dist/phases/screen-flow/theme.d.ts +19 -0
  57. package/dist/phases/screen-flow/theme.js +193 -0
  58. package/dist/phases/screen-flow/types.d.ts +130 -0
  59. package/dist/phases/screen-flow/types.js +81 -0
  60. package/dist/phases/user-psychology/agent.d.ts +16 -0
  61. package/dist/phases/user-psychology/agent.js +105 -0
  62. package/dist/phases/user-psychology/context.d.ts +10 -0
  63. package/dist/phases/user-psychology/context.js +65 -0
  64. package/dist/phases/user-psychology/index.d.ts +18 -0
  65. package/dist/phases/user-psychology/index.js +96 -0
  66. package/dist/phases/user-psychology/prompts.d.ts +2 -0
  67. package/dist/phases/user-psychology/prompts.js +41 -0
  68. package/dist/services/audit-logs.js +67 -9
  69. package/dist/services/branches.js +90 -14
  70. package/dist/services/phase-ratings.js +71 -9
  71. package/dist/services/product-logs.js +65 -5
  72. package/dist/services/pull-requests.js +74 -14
  73. package/dist/skills/phase/screen-flow/SKILL.md +78 -0
  74. package/dist/skills/phase/user-psychology/SKILL.md +135 -0
  75. package/dist/supabase/client.d.ts +23 -0
  76. package/dist/supabase/client.js +90 -0
  77. package/dist/system/session-manager.js +97 -24
  78. package/dist/types/index.d.ts +3 -0
  79. package/dist/utils/logger.js +24 -4
  80. package/package.json +4 -3
  81. package/vitest.config.ts +1 -0
@@ -499,6 +499,92 @@ You MUST return ONLY a JSON object. Do NOT include any text before or after the
499
499
  - "frame_background": customize the gradient/color behind the device (e.g., "linear-gradient(135deg, #667eea 0%, #764ba2 100%)")
500
500
  - "frame_browser_url": set a realistic URL for browser frames (e.g., "app.yourproduct.com/dashboard")
501
501
  - If should_generate_video is false, scenes array should be empty
502
+ `,
503
+ 'user-psychology': `
504
+ **CRITICAL - Result Format**:
505
+ You MUST return ONLY a JSON object inside a \`\`\`json code block. Do NOT include any text before or after the JSON.
506
+
507
+ \`\`\`json
508
+ {
509
+ "analysis": {
510
+ "product_id": "PRODUCT_ID",
511
+ "status": "success",
512
+ "analysis_content": "3-4 sentence executive summary of who these users are and what they really care about. Concrete, not generic.",
513
+ "target_personas": [
514
+ {
515
+ "name": "Asha, the burned-out solo founder",
516
+ "archetype": "One-line description of who they are and what they do",
517
+ "demographics": {
518
+ "role": "Solo founder of a 1-5 person SaaS",
519
+ "seniority": "5-10 years experience",
520
+ "context": "Other specifics that matter (team size, tools, stage, etc.)"
521
+ },
522
+ "goals": ["Goal 1", "Goal 2"],
523
+ "frustrations": ["Frustration 1 in their voice", "Frustration 2"],
524
+ "values": "What they believe makes them good at their job",
525
+ "decision_drivers": ["What tips them toward yes", "Another driver"],
526
+ "anti_persona_note": "Who looks similar but is the wrong fit",
527
+ "evidence": "Which feature/file/context-line supports this persona"
528
+ }
529
+ ],
530
+ "jobs_to_be_done": [
531
+ {
532
+ "statement": "When [situation], I want to [motivation], so I can [outcome].",
533
+ "type": "functional|emotional|social",
534
+ "current_alternatives": ["Competitor / spreadsheet / nothing"],
535
+ "switching_cost": "What makes it hard to switch",
536
+ "persona": "Which persona this job belongs to (name from target_personas)"
537
+ }
538
+ ],
539
+ "pain_points": [
540
+ {
541
+ "pain": "Single sentence in the user's voice",
542
+ "trigger": "What event makes the pain acute",
543
+ "severity": "critical|chronic|occasional",
544
+ "evidence": "Which feature, file, or context line implies this pain"
545
+ }
546
+ ],
547
+ "motivations": {
548
+ "autonomy": "How the product serves their need for control / removes oversight (or 'not addressed')",
549
+ "competence": "How the product makes them feel capable",
550
+ "relatedness": "How the product connects them to others or signals belonging"
551
+ },
552
+ "behavior_triggers": [
553
+ {
554
+ "behavior": "Specific desired action (e.g., 'invite first teammate')",
555
+ "motivation_level": "high|medium|low",
556
+ "motivation_reason": "Why",
557
+ "ability_barrier": "What is hard about doing it",
558
+ "prompt": "What cue would trigger this right now",
559
+ "recommendation": "Concrete change to lift motivation, reduce barrier, or improve prompt"
560
+ }
561
+ ],
562
+ "messaging_angles": [
563
+ {
564
+ "angle_name": "Short label",
565
+ "hook": "The headline in 10-15 words, in the user's voice",
566
+ "persona": "Persona name this speaks to",
567
+ "job": "JTBD statement (or short reference) this answers",
568
+ "psychological_lever": "loss_aversion|social_proof|identity_affirmation|curiosity_gap|status|reciprocity|other",
569
+ "why_it_works": "One sentence on the lever"
570
+ }
571
+ ]
572
+ }
573
+ }
574
+ \`\`\`
575
+
576
+ **Required field rules**:
577
+ - 3-5 personas (collapse overlapping ones)
578
+ - 4-8 jobs_to_be_done — cover at least one emotional and one social job, not just functional
579
+ - 3-8 pain_points
580
+ - behavior_triggers should target 3-5 high-value actions (signup, activation, retention, expansion)
581
+ - 3-6 messaging_angles, each tied to a real persona + job
582
+
583
+ **Anti-rules — these fail validation**:
584
+ - No placeholder text ([role], [insert benefit], "users want to be productive")
585
+ - No demographic-only personas — psychographics or skip the field
586
+ - No "users struggle with X" phrasing in pain_points — write in the user's voice
587
+ - No JTBD that is just a feature description ("user wants to use Feature Y")
502
588
  `,
503
589
  'intelligence-analysis': `
504
590
  **Output Format**:
@@ -809,5 +895,59 @@ You MUST end your response with a JSON object containing the code refine results
809
895
  }
810
896
  }
811
897
  \`\`\`
898
+ `,
899
+ 'screen-flow': `
900
+ **CRITICAL — How to return the result**:
901
+
902
+ Return the extraction by calling the MCP tool
903
+ \`mcp__screen-flow__submit_screen_flow\` **exactly once** with three arguments:
904
+
905
+ - \`summary\` — 1-3 sentence narrative of what kind of app this is and its primary user flows
906
+ - \`nodes\` — array of ScreenSchema objects (every user-facing screen, modal, drawer, tab, or named state)
907
+ - \`edges\` — array of ScreenEdge objects (transitions between screens)
908
+
909
+ The tool validates the arguments against the schema. If it returns an error,
910
+ fix the issue it describes and call the tool again. After a successful call,
911
+ end your turn — do not also paste the same data as a fenced text block.
912
+
913
+ You can also call \`mcp__screen-flow__record_progress({ phase, message })\` at
914
+ each phase boundary (detection / routing / screens / transitions / submission)
915
+ to keep the user informed during long runs. This is observability only — it
916
+ does not affect the extraction.
917
+
918
+ ScreenSchema fields:
919
+ - \`slug\` (unique within the flow), \`name\`, \`route?\`, \`file?\`
920
+ - \`kind\`: one of \`page\`, \`modal\`, \`drawer\`, \`tab\`, \`state\`
921
+ - \`layout\`: one of \`centered\`, \`sidebar\`, \`split\`, \`list-detail\`, \`tabs\`, \`stacked\`
922
+ - \`header?\`: \`{ title, subtitle?, back?, actions?: [{ label, variant?, icon? }] }\`
923
+ - \`body\`: array of sections; each section \`type\` is one of \`form\`, \`list\`, \`card-grid\`, \`table\`, \`kanban\`, \`text\`, \`image\`, \`chart\`, \`stats\`, \`empty-state\`, \`tabs\`, \`custom\`
924
+
925
+ ScreenEdge fields:
926
+ - \`fromSlug\`, \`toSlug\` (both MUST appear in nodes), \`triggerLabel\`, \`triggerFile?\`
927
+ - \`kind\`: one of \`navigate\`, \`modal\`, \`redirect\`, \`back\`
928
+
929
+ Schematic example of the tool call:
930
+
931
+ \`\`\`
932
+ submit_screen_flow({
933
+ summary: "Two-screen demo: sign in then land on home.",
934
+ nodes: [
935
+ { slug: "login", name: "Login", route: "/signin", file: "src/pages/Login.tsx",
936
+ kind: "page", layout: "centered",
937
+ header: { title: "Sign in", actions: [{ label: "Sign up", variant: "ghost" }] },
938
+ body: [{ type: "form", submitLabel: "Sign in", fields: [
939
+ { label: "Email", kind: "email", required: true },
940
+ { label: "Password", kind: "password", required: true }
941
+ ]}]
942
+ },
943
+ { slug: "home", name: "Home", route: "/", file: "src/pages/Home.tsx",
944
+ kind: "page", layout: "sidebar", body: [] }
945
+ ],
946
+ edges: [
947
+ { fromSlug: "login", toSlug: "home", triggerLabel: "Submit credentials",
948
+ triggerFile: "src/pages/Login.tsx", kind: "navigate" }
949
+ ]
950
+ })
951
+ \`\`\`
812
952
  `,
813
953
  };
@@ -3,10 +3,13 @@
3
3
  * Reuses GraphQL patterns from code-refine-verification.
4
4
  */
5
5
  import { type Octokit } from '@octokit/rest';
6
+ export declare function buildResolveMarker(action: 'changed' | 'skipped'): string;
7
+ export declare function hasResolveMarker(body: string | undefined | null): boolean;
6
8
  /**
7
- * Reply to a review thread on GitHub using GraphQL.
9
+ * Reply to a review thread on GitHub using GraphQL. Appends a marker so the
10
+ * next run can detect that we've already responded to this thread.
8
11
  */
9
- export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, verbose?: boolean): Promise<boolean>;
12
+ export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, action: 'changed' | 'skipped', verbose?: boolean): Promise<boolean>;
10
13
  /**
11
14
  * Resolve a review thread on GitHub using GraphQL.
12
15
  */
@@ -4,9 +4,24 @@
4
4
  */
5
5
  import { logError, logInfo } from '../../utils/logger.js';
6
6
  /**
7
- * Reply to a review thread on GitHub using GraphQL.
7
+ * Marker appended to every reply we post so subsequent runs can recognise
8
+ * their own prior comments and avoid posting duplicates.
8
9
  */
9
- export async function replyToReviewThread(octokit, threadId, body, verbose) {
10
+ const RESOLVE_MARKER_PREFIX = '<!-- edsger:pr-resolve';
11
+ export function buildResolveMarker(action) {
12
+ return `${RESOLVE_MARKER_PREFIX}:${action} -->`;
13
+ }
14
+ export function hasResolveMarker(body) {
15
+ if (!body) {
16
+ return false;
17
+ }
18
+ return body.includes(RESOLVE_MARKER_PREFIX);
19
+ }
20
+ /**
21
+ * Reply to a review thread on GitHub using GraphQL. Appends a marker so the
22
+ * next run can detect that we've already responded to this thread.
23
+ */
24
+ export async function replyToReviewThread(octokit, threadId, body, action, verbose) {
10
25
  try {
11
26
  const mutation = `
12
27
  mutation($threadId: ID!, $body: String!) {
@@ -20,7 +35,8 @@ export async function replyToReviewThread(octokit, threadId, body, verbose) {
20
35
  }
21
36
  }
22
37
  `;
23
- await octokit.graphql(mutation, { threadId, body });
38
+ const bodyWithMarker = `${body}\n\n${buildResolveMarker(action)}`;
39
+ await octokit.graphql(mutation, { threadId, body: bodyWithMarker });
24
40
  if (verbose) {
25
41
  logInfo(`Replied to thread ${threadId}`);
26
42
  }
@@ -14,7 +14,7 @@ import { fetchUnresolvedReviewThreads } from '../code-refine-verification/github
14
14
  import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
15
15
  import { parsePullRequestUrl } from '../pr-shared/context.js';
16
16
  import { learnFromReviewFeedback } from './checklist-learner.js';
17
- import { replyToReviewThread, resolveReviewThread } from './github-reply.js';
17
+ import { hasResolveMarker, replyToReviewThread, resolveReviewThread, } from './github-reply.js';
18
18
  import { createResolveSystemPrompt, createResolveUserPrompt, } from './prompts.js';
19
19
  import { isResolveResult } from './types.js';
20
20
  import { hasNewCommits, hasUncommittedChanges, prepareWorkspace, pushChanges, } from './workspace.js';
@@ -35,12 +35,26 @@ export async function resolveStandalonePR(options) {
35
35
  const octokit = new Octokit({ auth: githubToken });
36
36
  // Fetch unresolved review threads
37
37
  logInfo('Fetching unresolved review threads...');
38
- const unresolvedThreads = await fetchUnresolvedReviewThreads(octokit, owner, repo, prInfo.prNumber, verbose);
38
+ const allUnresolvedThreads = await fetchUnresolvedReviewThreads(octokit, owner, repo, prInfo.prNumber, verbose);
39
+ // Skip threads whose last comment already carries our marker — they were
40
+ // handled in a previous pr-resolve run and replying again would just spam.
41
+ // If a human has commented after our reply, the last comment will no
42
+ // longer be ours and the thread will be picked up again.
43
+ const unresolvedThreads = allUnresolvedThreads.filter((thread) => {
44
+ const lastComment = thread.comments.nodes[thread.comments.nodes.length - 1];
45
+ return !hasResolveMarker(lastComment?.body);
46
+ });
47
+ const alreadyHandled = allUnresolvedThreads.length - unresolvedThreads.length;
48
+ if (alreadyHandled > 0) {
49
+ logInfo(`Skipping ${alreadyHandled} thread(s) already addressed in a previous resolve run`);
50
+ }
39
51
  if (unresolvedThreads.length === 0) {
40
52
  logSuccess('No unresolved review threads found.');
41
53
  return {
42
54
  status: 'success',
43
- message: 'No unresolved review threads to resolve',
55
+ message: alreadyHandled > 0
56
+ ? `All ${alreadyHandled} unresolved thread(s) were already addressed in a previous resolve run`
57
+ : 'No unresolved review threads to resolve',
44
58
  threadsAddressed: 0,
45
59
  threadsSkipped: 0,
46
60
  };
@@ -162,7 +176,7 @@ export async function resolveStandalonePR(options) {
162
176
  }
163
177
  // eslint-disable-next-line max-depth
164
178
  try {
165
- const replied = await replyToReviewThread(octokit, threadId, comment.reply, verbose);
179
+ const replied = await replyToReviewThread(octokit, threadId, comment.reply, comment.action, verbose);
166
180
  // eslint-disable-next-line max-depth
167
181
  if (replied && comment.action === 'changed') {
168
182
  // Resolve the thread since the change was made
@@ -192,7 +206,7 @@ export async function resolveStandalonePR(options) {
192
206
  const genericReply = agentMadeChanges
193
207
  ? 'Changes were made to address review feedback. Please re-review.'
194
208
  : 'Reviewed this comment. No changes were made at this time.';
195
- const replied = await replyToReviewThread(octokit, thread.id, genericReply, verbose);
209
+ const replied = await replyToReviewThread(octokit, thread.id, genericReply, agentMadeChanges ? 'changed' : 'skipped', verbose);
196
210
  // eslint-disable-next-line max-depth
197
211
  if (replied) {
198
212
  threadsSkipped++;
@@ -8,27 +8,26 @@
8
8
  export function createResolveSystemPrompt() {
9
9
  return `You are an expert software engineer resolving code review feedback on a pull request.
10
10
 
11
- **Your Goal**: For each review comment, evaluate whether the suggested change genuinely improves the code. If it does, make the change. If you disagree, do NOT make the change.
11
+ **Your Goal**: For each review comment, make the change unless the reviewer is factually wrong or has misunderstood the code. The default is to accept the feedback.
12
12
 
13
- **Decision Criteria - Make the change when**:
14
- - The suggestion fixes a real bug or logic error
15
- - The suggestion improves correctness, security, or error handling
16
- - The suggestion makes the code clearer or more maintainable
17
- - The suggestion follows established best practices for the language/framework
13
+ **Make the change when** (this is the default — apply it broadly):
14
+ - The suggestion would improve the code in any way: correctness, security, error handling, clarity, maintainability, performance, design, naming, structure, tests, docs
15
+ - The suggestion aligns with best practices for the language or framework
16
+ - Apply the change even if it is large, touches many files, or requires non-trivial refactoring — workload is not a reason to skip
18
17
 
19
- **Skip the change when**:
20
- - The suggestion is purely stylistic preference without clear benefit
21
- - The suggestion would increase complexity without proportional value
22
- - The suggestion conflicts with the codebase's established patterns
23
- - You disagree with the technical rationale
18
+ **Skip the change ONLY when** (the bar is high — be conservative about skipping):
19
+ - The reviewer is factually wrong (e.g., claims the code does X when it actually does Y, or asserts a behavior that does not exist)
20
+ - The reviewer has misunderstood the code's purpose, the surrounding context, or how this piece interacts with other parts of the system
21
+ - Following the suggestion would actually make the code worse or introduce a regression
22
+
23
+ Personal preference, "I'd prefer a different style", "this is more complex than I'd like", or "this conflicts with a pattern I prefer" are NOT valid reasons to skip. If the change would make the code better, do it.
24
24
 
25
25
  **Process**:
26
26
  1. Read all the review comments carefully
27
- 2. For each comment, examine the relevant code
28
- 3. If you agree: make the change in the file
29
- 4. If you disagree: skip it (do NOT modify the file for that comment)
30
- 5. After making all changes, commit them with a descriptive message summarizing what was resolved (do NOT push)
31
- 6. After committing, output a JSON summary
27
+ 2. For each comment, examine the relevant code so you actually understand what it does
28
+ 3. Default: make the change in the file. Only skip if you can articulate a specific factual error or misunderstanding by the reviewer.
29
+ 4. After making all changes, commit them with a descriptive message summarizing what was resolved (do NOT push)
30
+ 5. After committing, output a JSON summary
32
31
 
33
32
  **CRITICAL - Result Format**:
34
33
  After making all changes, you MUST output a JSON result. Use the exact comment_id from each comment (comment_1, comment_2, etc.):
@@ -56,7 +55,7 @@ After making all changes, you MUST output a JSON result. Use the exact comment_i
56
55
 
57
56
  **Reply Guidelines**:
58
57
  - For "changed": briefly describe what was changed (1-2 sentences)
59
- - For "skipped": provide a clear, respectful technical explanation of why the current code is better (2-3 sentences)
58
+ - For "skipped": clearly explain the specific factual error or misunderstanding — point to the exact line, behavior, or invariant the reviewer got wrong (2-3 sentences). Do not skip with a vague "I disagree" — name the misunderstanding.
60
59
  - Be professional and constructive in all replies
61
60
  - You MUST include an entry for EVERY comment_id`;
62
61
  }
@@ -101,7 +100,7 @@ export function createResolveUserPrompt(unresolvedThreads) {
101
100
  sections.push('## Instructions');
102
101
  sections.push('');
103
102
  sections.push('For each comment above, read the referenced file and evaluate the suggestion.');
104
- sections.push('Make changes only when they genuinely improve the code. Skip changes you disagree with.');
103
+ sections.push('Default to making the change — even if it is large or touches many files. Only skip when the reviewer is factually wrong or has misunderstood the code, and explain the specific misunderstanding in your reply.');
105
104
  sections.push('After processing all comments, output the JSON resolve_result with your decisions and reply messages.');
106
105
  sections.push(`Use the exact comment IDs: ${Array.from(commentIdToThreadId.keys()).join(', ')}`);
107
106
  return { prompt: sections.join('\n'), commentIdToThreadId };
@@ -24,16 +24,24 @@ export declare function createPromptGenerator(prompt: string): AsyncGenerator<{
24
24
  }>;
25
25
  /**
26
26
  * Extract text content from assistant message content array.
27
+ *
28
+ * When `verbose`, also surfaces tool_use / tool_result blocks via
29
+ * logDebug so it's visible whether the agent is making MCP / file /
30
+ * bash calls — without these, a long-running session looks frozen
31
+ * between text emissions.
27
32
  */
28
33
  export declare function extractTextFromContent(content: any[], verbose?: boolean): string;
29
34
  /**
30
35
  * Try to parse a JSON result from agent response text.
31
- * Looks for ```json code blocks first, then falls back to raw JSON parsing.
32
- * Returns the parsed object or null on failure.
36
+ * Tries a custom fenceTag (e.g. ```screen_flow) first when provided, then
37
+ * ```json, then falls back to raw JSON parsing. Returns the parsed object or
38
+ * null on failure.
33
39
  */
34
- export declare function tryParseJsonFromResponse(responseText: string): unknown | null;
40
+ export declare function tryParseJsonFromResponse(responseText: string, fenceTag?: string): unknown | null;
35
41
  /**
36
42
  * Extract a specific keyed result from agent response.
37
43
  * e.g., tryExtractResult(text, 'review_result') extracts the review_result key.
44
+ * The key is also tried as the fenced code-block tag so phases whose output
45
+ * contract uses a custom fence (e.g. ```screen_flow) parse correctly.
38
46
  */
39
47
  export declare function tryExtractResult(responseText: string, key: string): unknown | null;
@@ -23,6 +23,11 @@ export async function* createPromptGenerator(prompt) {
23
23
  }
24
24
  /**
25
25
  * Extract text content from assistant message content array.
26
+ *
27
+ * When `verbose`, also surfaces tool_use / tool_result blocks via
28
+ * logDebug so it's visible whether the agent is making MCP / file /
29
+ * bash calls — without these, a long-running session looks frozen
30
+ * between text emissions.
26
31
  */
27
32
  export function extractTextFromContent(
28
33
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -33,16 +38,50 @@ content, verbose) {
33
38
  text += `${item.text}\n`;
34
39
  logDebug(item.text, verbose);
35
40
  }
41
+ else if (verbose && item.type === 'tool_use') {
42
+ logDebug(`→ ${item.name}(${previewJson(item.input)})`, verbose);
43
+ }
44
+ else if (verbose && item.type === 'tool_result') {
45
+ const preview = Array.isArray(item.content)
46
+ ? item.content
47
+ .filter((c) => c?.type === 'text')
48
+ .map((c) => c.text ?? '')
49
+ .join(' ')
50
+ : String(item.content ?? '');
51
+ const flag = item.is_error ? '✗' : '←';
52
+ logDebug(`${flag} ${truncate(preview, 200)}`, verbose);
53
+ }
36
54
  }
37
55
  return text;
38
56
  }
57
+ function previewJson(value, max = 200) {
58
+ try {
59
+ return truncate(JSON.stringify(value), max);
60
+ }
61
+ catch {
62
+ return truncate(String(value), max);
63
+ }
64
+ }
65
+ function truncate(text, max) {
66
+ if (text.length <= max) {
67
+ return text;
68
+ }
69
+ return `${text.slice(0, max - 1)}…`;
70
+ }
39
71
  /**
40
72
  * Try to parse a JSON result from agent response text.
41
- * Looks for ```json code blocks first, then falls back to raw JSON parsing.
42
- * Returns the parsed object or null on failure.
73
+ * Tries a custom fenceTag (e.g. ```screen_flow) first when provided, then
74
+ * ```json, then falls back to raw JSON parsing. Returns the parsed object or
75
+ * null on failure.
43
76
  */
44
- export function tryParseJsonFromResponse(responseText) {
77
+ export function tryParseJsonFromResponse(responseText, fenceTag = 'json') {
45
78
  try {
79
+ if (fenceTag !== 'json') {
80
+ const taggedMatch = responseText.match(new RegExp(`\`\`\`${escapeRegExp(fenceTag)}\\s*\\n([\\s\\S]*?)\\n\\s*\`\`\``));
81
+ if (taggedMatch) {
82
+ return JSON.parse(taggedMatch[1]);
83
+ }
84
+ }
46
85
  const jsonBlockMatch = responseText.match(/```json\s*\n([\s\S]*?)\n\s*```/);
47
86
  return jsonBlockMatch
48
87
  ? JSON.parse(jsonBlockMatch[1])
@@ -55,9 +94,11 @@ export function tryParseJsonFromResponse(responseText) {
55
94
  /**
56
95
  * Extract a specific keyed result from agent response.
57
96
  * e.g., tryExtractResult(text, 'review_result') extracts the review_result key.
97
+ * The key is also tried as the fenced code-block tag so phases whose output
98
+ * contract uses a custom fence (e.g. ```screen_flow) parse correctly.
58
99
  */
59
100
  export function tryExtractResult(responseText, key) {
60
- const parsed = tryParseJsonFromResponse(responseText);
101
+ const parsed = tryParseJsonFromResponse(responseText, key);
61
102
  if (parsed &&
62
103
  typeof parsed === 'object' &&
63
104
  key in parsed) {
@@ -66,3 +107,6 @@ export function tryExtractResult(responseText, key) {
66
107
  // If top-level has the expected shape, return the whole thing
67
108
  return parsed;
68
109
  }
110
+ function escapeRegExp(value) {
111
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
112
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Product-test-cases phase: clone the product's repo, ask Claude to draft a
3
+ * product-level regression suite (deduping against existing cases), and save
4
+ * the new ones as drafts via MCP. Approved test cases are NEVER touched —
5
+ * only draft/pending_approval entries may be replaced.
6
+ *
7
+ * Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
8
+ * run a bounded Claude session, persist via MCP, cleanup on success.
9
+ */
10
+ export interface GenerateProductTestCasesOptions {
11
+ productId: string;
12
+ githubToken: string;
13
+ owner: string;
14
+ repo: string;
15
+ branch?: string;
16
+ verbose?: boolean;
17
+ }
18
+ export interface GenerateProductTestCasesResult {
19
+ status: 'success' | 'error';
20
+ message: string;
21
+ createdCount?: number;
22
+ deletedCount?: number;
23
+ summary?: string;
24
+ }
25
+ export declare function generateProductTestCases(options: GenerateProductTestCasesOptions): Promise<GenerateProductTestCasesResult>;
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Product-test-cases phase: clone the product's repo, ask Claude to draft a
3
+ * product-level regression suite (deduping against existing cases), and save
4
+ * the new ones as drafts via MCP. Approved test cases are NEVER touched —
5
+ * only draft/pending_approval entries may be replaced.
6
+ *
7
+ * Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
8
+ * run a bounded Claude session, persist via MCP, cleanup on success.
9
+ */
10
+ import { query } from '@anthropic-ai/claude-agent-sdk';
11
+ import { batchDeleteTestCases } from '../../api/issues/batch-operations.js';
12
+ import { createProductTestCases, getProductTestCases, } from '../../api/products/test-cases.js';
13
+ import { DEFAULT_MODEL } from '../../constants.js';
14
+ import { logError, logInfo, logSuccess, logWarning, } from '../../utils/logger.js';
15
+ import { cleanupIssueRepo, cloneIssueRepo, ensureWorkspaceDir, syncRepoToRef, } from '../../workspace/workspace-manager.js';
16
+ import { detectDefaultBranch } from '../find-shared/git.js';
17
+ import { fetchProductBasics } from '../find-shared/mcp.js';
18
+ import { createScanStateModule } from '../find-shared/scan-state.js';
19
+ import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
20
+ import { createProductTestCasesSystemPrompt, createProductTestCasesUserPrompt, } from './prompts.js';
21
+ import { isProductTestCasesAgentResult, } from './types.js';
22
+ const WORKSPACE_KEY = 'product-test-cases';
23
+ // Generation is read-heavy and open-scope. 200 turns matches find-bugs and is
24
+ // enough for a mid-sized repo while still bounding cost.
25
+ const MAX_TURNS = 200;
26
+ // Per-product lock so two concurrent invocations (e.g. user clicks twice in
27
+ // the UI, or CLI + desktop fire at once) don't race on the shared clone dir.
28
+ // We don't persist any state besides the lock — generation is stateless.
29
+ const lockModule = createScanStateModule({
30
+ dirName: 'product-test-cases-state',
31
+ });
32
+ // UUID regex matching MCP-issued ids — defensive filter before we trust
33
+ // strings the agent puts in deleted_test_case_ids.
34
+ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
35
+ // eslint-disable-next-line complexity
36
+ export async function generateProductTestCases(options) {
37
+ const { productId, githubToken, owner, repo, verbose } = options;
38
+ logInfo(`Starting product test-cases generation for product ${productId} (${owner}/${repo})`);
39
+ const lock = lockModule.acquireLock(productId);
40
+ if (!lock) {
41
+ logWarning(`Another product test-cases generation is already running for product ${productId}; skipping.`);
42
+ return {
43
+ status: 'error',
44
+ message: 'Another product test-cases generation is already running for this product',
45
+ };
46
+ }
47
+ let repoPath;
48
+ let succeeded = false;
49
+ try {
50
+ const workspaceRoot = ensureWorkspaceDir();
51
+ const repoKey = `${WORKSPACE_KEY}-${productId}`;
52
+ ({ repoPath } = cloneIssueRepo(workspaceRoot, repoKey, owner, repo, githubToken));
53
+ const branch = options.branch ?? detectDefaultBranch(repoPath);
54
+ logInfo(`Syncing ${owner}/${repo} to branch ${branch}`);
55
+ syncRepoToRef(repoPath, { branch }, githubToken);
56
+ const [product, existing] = await Promise.all([
57
+ fetchProductBasics(productId),
58
+ getProductTestCases(productId, verbose),
59
+ ]);
60
+ const approved = [];
61
+ const replaceable = [];
62
+ for (const tc of existing) {
63
+ const slot = {
64
+ id: tc.id,
65
+ name: tc.name,
66
+ description: tc.description,
67
+ is_critical: tc.is_critical,
68
+ status: tc.status ?? 'draft',
69
+ };
70
+ if (slot.status === 'approved') {
71
+ approved.push(slot);
72
+ }
73
+ else {
74
+ replaceable.push(slot);
75
+ }
76
+ }
77
+ const replaceableIds = new Set(replaceable.map((tc) => tc.id));
78
+ logInfo(`Existing test cases: ${approved.length} approved (locked), ${replaceable.length} replaceable`);
79
+ const systemPrompt = createProductTestCasesSystemPrompt();
80
+ const userPrompt = createProductTestCasesUserPrompt({
81
+ productName: product.name,
82
+ productDescription: product.description,
83
+ approvedTestCases: approved,
84
+ replaceableTestCases: replaceable,
85
+ });
86
+ let lastAssistantResponse = '';
87
+ let agentResult = null;
88
+ logInfo('Running Claude agent to draft test cases...');
89
+ for await (const message of query({
90
+ prompt: createPromptGenerator(userPrompt),
91
+ options: {
92
+ systemPrompt: {
93
+ type: 'preset',
94
+ preset: 'claude_code',
95
+ append: systemPrompt,
96
+ },
97
+ model: DEFAULT_MODEL,
98
+ maxTurns: MAX_TURNS,
99
+ permissionMode: 'bypassPermissions',
100
+ cwd: repoPath,
101
+ },
102
+ })) {
103
+ if (message.type === 'assistant') {
104
+ lastAssistantResponse += extractTextFromContent(message.message?.content ?? [], verbose);
105
+ continue;
106
+ }
107
+ if (message.type !== 'result') {
108
+ continue;
109
+ }
110
+ const responseText = message.subtype === 'success'
111
+ ? message.result || lastAssistantResponse
112
+ : lastAssistantResponse;
113
+ const parsed = tryExtractResult(responseText, 'test_cases_result');
114
+ if (isProductTestCasesAgentResult(parsed)) {
115
+ agentResult = parsed;
116
+ }
117
+ else if (message.subtype !== 'success') {
118
+ logError(`Agent run incomplete: ${message.subtype}`);
119
+ }
120
+ }
121
+ if (!agentResult) {
122
+ return {
123
+ status: 'error',
124
+ message: 'Test cases generation failed: could not parse a test_cases_result from the agent',
125
+ };
126
+ }
127
+ // Apply deletions. Filter to only ids that are valid UUIDs AND belong
128
+ // to the replaceable set — never trust the agent's word that an id
129
+ // is deletable. Approved cases must never be removed.
130
+ let deletedCount = 0;
131
+ const requestedDeletes = (agentResult.deleted_test_case_ids ?? []).filter((id) => typeof id === 'string' && UUID_RE.test(id) && replaceableIds.has(id));
132
+ if (requestedDeletes.length > 0) {
133
+ logInfo(`Deleting ${requestedDeletes.length} obsolete replaceable test cases...`);
134
+ const ok = await batchDeleteTestCases(requestedDeletes, verbose);
135
+ if (ok) {
136
+ deletedCount = requestedDeletes.length;
137
+ }
138
+ else {
139
+ logWarning('Some deletions failed; leaving any remaining cases in place.');
140
+ }
141
+ }
142
+ const { createdIds } = await createProductTestCases(productId, agentResult.created_test_cases.map((tc) => ({
143
+ name: tc.name,
144
+ description: tc.description,
145
+ is_critical: tc.is_critical ?? false,
146
+ })), verbose);
147
+ logSuccess(`Created ${createdIds.length} test cases, deleted ${deletedCount} obsolete entries. ${agentResult.summary}`);
148
+ succeeded = true;
149
+ return {
150
+ status: 'success',
151
+ message: `Generated ${createdIds.length} test cases (deleted ${deletedCount})`,
152
+ createdCount: createdIds.length,
153
+ deletedCount,
154
+ summary: agentResult.summary,
155
+ };
156
+ }
157
+ catch (error) {
158
+ const message = error instanceof Error ? error.message : String(error);
159
+ logError(`Product test cases generation failed: ${message}`);
160
+ return {
161
+ status: 'error',
162
+ message: `Product test cases generation failed: ${message}`,
163
+ };
164
+ }
165
+ finally {
166
+ if (succeeded) {
167
+ cleanupIssueRepo(repoPath);
168
+ }
169
+ else if (repoPath) {
170
+ logInfo(`Workspace preserved for inspection: ${repoPath}`);
171
+ }
172
+ lock.release();
173
+ }
174
+ }