edsger 0.56.3 → 0.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/chat.js +55 -2
- package/dist/api/cross-product.d.ts +8 -1
- package/dist/api/cross-product.js +44 -1
- package/dist/api/intelligence.js +98 -0
- package/dist/api/issues/get-issue.js +26 -0
- package/dist/api/issues/issue-utils.js +52 -0
- package/dist/api/issues/test-cases.js +89 -14
- package/dist/api/issues/update-issue.js +46 -8
- package/dist/api/issues/user-stories.js +89 -14
- package/dist/api/products/test-cases.d.ts +18 -0
- package/dist/api/products/test-cases.js +51 -0
- package/dist/api/products.js +21 -0
- package/dist/api/release-test-cases.js +38 -0
- package/dist/api/releases.js +86 -0
- package/dist/api/tasks.js +41 -4
- package/dist/api/test-reports.js +22 -4
- package/dist/api/user-psychology.d.ts +101 -0
- package/dist/api/user-psychology.js +143 -0
- package/dist/auth/auth-store.d.ts +33 -0
- package/dist/auth/auth-store.js +39 -0
- package/dist/commands/agent-workflow/chat-worker.js +187 -15
- package/dist/commands/agent-workflow/processor.d.ts +11 -0
- package/dist/commands/agent-workflow/processor.js +81 -2
- package/dist/commands/product-test-cases/index.d.ts +12 -0
- package/dist/commands/product-test-cases/index.js +40 -0
- package/dist/commands/screen-flow/index.d.ts +16 -0
- package/dist/commands/screen-flow/index.js +45 -0
- package/dist/commands/user-psychology/index.d.ts +7 -0
- package/dist/commands/user-psychology/index.js +51 -0
- package/dist/index.js +65 -0
- package/dist/phases/analyze-logs/index.js +27 -6
- package/dist/phases/bug-fixing/context-fetcher.js +26 -5
- package/dist/phases/find-features/index.js +53 -9
- package/dist/phases/find-shared/mcp.js +21 -0
- package/dist/phases/growth-analysis/context.d.ts +5 -3
- package/dist/phases/growth-analysis/context.js +52 -5
- package/dist/phases/output-contracts.js +129 -0
- package/dist/phases/pr-resolve/github-reply.d.ts +5 -2
- package/dist/phases/pr-resolve/github-reply.js +19 -3
- package/dist/phases/pr-resolve/index.js +19 -5
- package/dist/phases/pr-resolve/prompts.js +17 -18
- package/dist/phases/product-test-cases/index.d.ts +25 -0
- package/dist/phases/product-test-cases/index.js +174 -0
- package/dist/phases/product-test-cases/prompts.d.ts +24 -0
- package/dist/phases/product-test-cases/prompts.js +80 -0
- package/dist/phases/product-test-cases/types.d.ts +17 -0
- package/dist/phases/product-test-cases/types.js +27 -0
- package/dist/phases/screen-flow/index.d.ts +23 -0
- package/dist/phases/screen-flow/index.js +229 -0
- package/dist/phases/screen-flow/prompts.d.ts +19 -0
- package/dist/phases/screen-flow/prompts.js +39 -0
- package/dist/phases/screen-flow/theme.d.ts +19 -0
- package/dist/phases/screen-flow/theme.js +182 -0
- package/dist/phases/screen-flow/types.d.ts +130 -0
- package/dist/phases/screen-flow/types.js +66 -0
- package/dist/phases/user-psychology/agent.d.ts +16 -0
- package/dist/phases/user-psychology/agent.js +105 -0
- package/dist/phases/user-psychology/context.d.ts +10 -0
- package/dist/phases/user-psychology/context.js +65 -0
- package/dist/phases/user-psychology/index.d.ts +18 -0
- package/dist/phases/user-psychology/index.js +96 -0
- package/dist/phases/user-psychology/prompts.d.ts +2 -0
- package/dist/phases/user-psychology/prompts.js +41 -0
- package/dist/services/audit-logs.js +67 -9
- package/dist/services/branches.js +90 -14
- package/dist/services/phase-ratings.js +71 -9
- package/dist/services/product-logs.js +65 -5
- package/dist/services/pull-requests.js +74 -14
- package/dist/skills/phase/screen-flow/SKILL.md +78 -0
- package/dist/skills/phase/user-psychology/SKILL.md +135 -0
- package/dist/supabase/client.d.ts +23 -0
- package/dist/supabase/client.js +90 -0
- package/dist/system/session-manager.js +97 -24
- package/dist/types/index.d.ts +3 -0
- package/dist/utils/logger.js +24 -4
- package/package.json +4 -3
- package/vitest.config.ts +1 -0
|
@@ -499,6 +499,92 @@ You MUST return ONLY a JSON object. Do NOT include any text before or after the
|
|
|
499
499
|
- "frame_background": customize the gradient/color behind the device (e.g., "linear-gradient(135deg, #667eea 0%, #764ba2 100%)")
|
|
500
500
|
- "frame_browser_url": set a realistic URL for browser frames (e.g., "app.yourproduct.com/dashboard")
|
|
501
501
|
- If should_generate_video is false, scenes array should be empty
|
|
502
|
+
`,
|
|
503
|
+
'user-psychology': `
|
|
504
|
+
**CRITICAL - Result Format**:
|
|
505
|
+
You MUST return ONLY a JSON object inside a \`\`\`json code block. Do NOT include any text before or after the JSON.
|
|
506
|
+
|
|
507
|
+
\`\`\`json
|
|
508
|
+
{
|
|
509
|
+
"analysis": {
|
|
510
|
+
"product_id": "PRODUCT_ID",
|
|
511
|
+
"status": "success",
|
|
512
|
+
"analysis_content": "3-4 sentence executive summary of who these users are and what they really care about. Concrete, not generic.",
|
|
513
|
+
"target_personas": [
|
|
514
|
+
{
|
|
515
|
+
"name": "Asha, the burned-out solo founder",
|
|
516
|
+
"archetype": "One-line description of who they are and what they do",
|
|
517
|
+
"demographics": {
|
|
518
|
+
"role": "Solo founder of a 1-5 person SaaS",
|
|
519
|
+
"seniority": "5-10 years experience",
|
|
520
|
+
"context": "Other specifics that matter (team size, tools, stage, etc.)"
|
|
521
|
+
},
|
|
522
|
+
"goals": ["Goal 1", "Goal 2"],
|
|
523
|
+
"frustrations": ["Frustration 1 in their voice", "Frustration 2"],
|
|
524
|
+
"values": "What they believe makes them good at their job",
|
|
525
|
+
"decision_drivers": ["What tips them toward yes", "Another driver"],
|
|
526
|
+
"anti_persona_note": "Who looks similar but is the wrong fit",
|
|
527
|
+
"evidence": "Which feature/file/context-line supports this persona"
|
|
528
|
+
}
|
|
529
|
+
],
|
|
530
|
+
"jobs_to_be_done": [
|
|
531
|
+
{
|
|
532
|
+
"statement": "When [situation], I want to [motivation], so I can [outcome].",
|
|
533
|
+
"type": "functional|emotional|social",
|
|
534
|
+
"current_alternatives": ["Competitor / spreadsheet / nothing"],
|
|
535
|
+
"switching_cost": "What makes it hard to switch",
|
|
536
|
+
"persona": "Which persona this job belongs to (name from target_personas)"
|
|
537
|
+
}
|
|
538
|
+
],
|
|
539
|
+
"pain_points": [
|
|
540
|
+
{
|
|
541
|
+
"pain": "Single sentence in the user's voice",
|
|
542
|
+
"trigger": "What event makes the pain acute",
|
|
543
|
+
"severity": "critical|chronic|occasional",
|
|
544
|
+
"evidence": "Which feature, file, or context line implies this pain"
|
|
545
|
+
}
|
|
546
|
+
],
|
|
547
|
+
"motivations": {
|
|
548
|
+
"autonomy": "How the product serves their need for control / removes oversight (or 'not addressed')",
|
|
549
|
+
"competence": "How the product makes them feel capable",
|
|
550
|
+
"relatedness": "How the product connects them to others or signals belonging"
|
|
551
|
+
},
|
|
552
|
+
"behavior_triggers": [
|
|
553
|
+
{
|
|
554
|
+
"behavior": "Specific desired action (e.g., 'invite first teammate')",
|
|
555
|
+
"motivation_level": "high|medium|low",
|
|
556
|
+
"motivation_reason": "Why",
|
|
557
|
+
"ability_barrier": "What is hard about doing it",
|
|
558
|
+
"prompt": "What cue would trigger this right now",
|
|
559
|
+
"recommendation": "Concrete change to lift motivation, reduce barrier, or improve prompt"
|
|
560
|
+
}
|
|
561
|
+
],
|
|
562
|
+
"messaging_angles": [
|
|
563
|
+
{
|
|
564
|
+
"angle_name": "Short label",
|
|
565
|
+
"hook": "The headline in 10-15 words, in the user's voice",
|
|
566
|
+
"persona": "Persona name this speaks to",
|
|
567
|
+
"job": "JTBD statement (or short reference) this answers",
|
|
568
|
+
"psychological_lever": "loss_aversion|social_proof|identity_affirmation|curiosity_gap|status|reciprocity|other",
|
|
569
|
+
"why_it_works": "One sentence on the lever"
|
|
570
|
+
}
|
|
571
|
+
]
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
\`\`\`
|
|
575
|
+
|
|
576
|
+
**Required field rules**:
|
|
577
|
+
- 3-5 personas (collapse overlapping ones)
|
|
578
|
+
- 4-8 jobs_to_be_done — cover at least one emotional and one social job, not just functional
|
|
579
|
+
- 3-8 pain_points
|
|
580
|
+
- behavior_triggers should target 3-5 high-value actions (signup, activation, retention, expansion)
|
|
581
|
+
- 3-6 messaging_angles, each tied to a real persona + job
|
|
582
|
+
|
|
583
|
+
**Anti-rules — these fail validation**:
|
|
584
|
+
- No placeholder text ([role], [insert benefit], "users want to be productive")
|
|
585
|
+
- No demographic-only personas — psychographics or skip the field
|
|
586
|
+
- No "users struggle with X" phrasing in pain_points — write in the user's voice
|
|
587
|
+
- No JTBD that is just a feature description ("user wants to use Feature Y")
|
|
502
588
|
`,
|
|
503
589
|
'intelligence-analysis': `
|
|
504
590
|
**Output Format**:
|
|
@@ -809,5 +895,48 @@ You MUST end your response with a JSON object containing the code refine results
|
|
|
809
895
|
}
|
|
810
896
|
}
|
|
811
897
|
\`\`\`
|
|
898
|
+
`,
|
|
899
|
+
'screen-flow': `
|
|
900
|
+
**CRITICAL — Output Format**:
|
|
901
|
+
|
|
902
|
+
After finishing your investigation, emit a single fenced code block tagged \`screen_flow\` containing the structured extraction. Do not emit any other JSON blocks.
|
|
903
|
+
|
|
904
|
+
\`\`\`screen_flow
|
|
905
|
+
{
|
|
906
|
+
"summary": "1-3 sentence narrative of what kind of app this is and its primary user flows",
|
|
907
|
+
"nodes": [
|
|
908
|
+
{
|
|
909
|
+
"slug": "login",
|
|
910
|
+
"name": "Login",
|
|
911
|
+
"route": "/signin",
|
|
912
|
+
"file": "src/pages/Login.tsx",
|
|
913
|
+
"kind": "page",
|
|
914
|
+
"layout": "centered",
|
|
915
|
+
"header": { "title": "Sign in", "actions": [{ "label": "Sign up", "variant": "ghost" }] },
|
|
916
|
+
"body": [
|
|
917
|
+
{
|
|
918
|
+
"type": "form",
|
|
919
|
+
"submitLabel": "Sign in",
|
|
920
|
+
"fields": [
|
|
921
|
+
{ "label": "Email", "kind": "email", "required": true },
|
|
922
|
+
{ "label": "Password", "kind": "password", "required": true }
|
|
923
|
+
]
|
|
924
|
+
}
|
|
925
|
+
]
|
|
926
|
+
}
|
|
927
|
+
],
|
|
928
|
+
"edges": [
|
|
929
|
+
{
|
|
930
|
+
"fromSlug": "login",
|
|
931
|
+
"toSlug": "home",
|
|
932
|
+
"triggerLabel": "Submit credentials",
|
|
933
|
+
"triggerFile": "src/pages/Login.tsx",
|
|
934
|
+
"kind": "navigate"
|
|
935
|
+
}
|
|
936
|
+
]
|
|
937
|
+
}
|
|
938
|
+
\`\`\`
|
|
939
|
+
|
|
940
|
+
All node \`slug\` values must be unique. Every \`fromSlug\` / \`toSlug\` in edges must reference a slug that appears in \`nodes\`. Section \`type\` values are restricted to: \`form\`, \`list\`, \`card-grid\`, \`table\`, \`kanban\`, \`text\`, \`image\`, \`chart\`, \`stats\`, \`empty-state\`, \`tabs\`, \`custom\`. Edge \`kind\` values are restricted to: \`navigate\`, \`modal\`, \`redirect\`, \`back\`.
|
|
812
941
|
`,
|
|
813
942
|
};
|
|
@@ -3,10 +3,13 @@
|
|
|
3
3
|
* Reuses GraphQL patterns from code-refine-verification.
|
|
4
4
|
*/
|
|
5
5
|
import { type Octokit } from '@octokit/rest';
|
|
6
|
+
export declare function buildResolveMarker(action: 'changed' | 'skipped'): string;
|
|
7
|
+
export declare function hasResolveMarker(body: string | undefined | null): boolean;
|
|
6
8
|
/**
|
|
7
|
-
* Reply to a review thread on GitHub using GraphQL.
|
|
9
|
+
* Reply to a review thread on GitHub using GraphQL. Appends a marker so the
|
|
10
|
+
* next run can detect that we've already responded to this thread.
|
|
8
11
|
*/
|
|
9
|
-
export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, verbose?: boolean): Promise<boolean>;
|
|
12
|
+
export declare function replyToReviewThread(octokit: Octokit, threadId: string, body: string, action: 'changed' | 'skipped', verbose?: boolean): Promise<boolean>;
|
|
10
13
|
/**
|
|
11
14
|
* Resolve a review thread on GitHub using GraphQL.
|
|
12
15
|
*/
|
|
@@ -4,9 +4,24 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { logError, logInfo } from '../../utils/logger.js';
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* Marker appended to every reply we post so subsequent runs can recognise
|
|
8
|
+
* their own prior comments and avoid posting duplicates.
|
|
8
9
|
*/
|
|
9
|
-
|
|
10
|
+
const RESOLVE_MARKER_PREFIX = '<!-- edsger:pr-resolve';
|
|
11
|
+
export function buildResolveMarker(action) {
|
|
12
|
+
return `${RESOLVE_MARKER_PREFIX}:${action} -->`;
|
|
13
|
+
}
|
|
14
|
+
export function hasResolveMarker(body) {
|
|
15
|
+
if (!body) {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
return body.includes(RESOLVE_MARKER_PREFIX);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Reply to a review thread on GitHub using GraphQL. Appends a marker so the
|
|
22
|
+
* next run can detect that we've already responded to this thread.
|
|
23
|
+
*/
|
|
24
|
+
export async function replyToReviewThread(octokit, threadId, body, action, verbose) {
|
|
10
25
|
try {
|
|
11
26
|
const mutation = `
|
|
12
27
|
mutation($threadId: ID!, $body: String!) {
|
|
@@ -20,7 +35,8 @@ export async function replyToReviewThread(octokit, threadId, body, verbose) {
|
|
|
20
35
|
}
|
|
21
36
|
}
|
|
22
37
|
`;
|
|
23
|
-
|
|
38
|
+
const bodyWithMarker = `${body}\n\n${buildResolveMarker(action)}`;
|
|
39
|
+
await octokit.graphql(mutation, { threadId, body: bodyWithMarker });
|
|
24
40
|
if (verbose) {
|
|
25
41
|
logInfo(`Replied to thread ${threadId}`);
|
|
26
42
|
}
|
|
@@ -14,7 +14,7 @@ import { fetchUnresolvedReviewThreads } from '../code-refine-verification/github
|
|
|
14
14
|
import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
|
|
15
15
|
import { parsePullRequestUrl } from '../pr-shared/context.js';
|
|
16
16
|
import { learnFromReviewFeedback } from './checklist-learner.js';
|
|
17
|
-
import { replyToReviewThread, resolveReviewThread } from './github-reply.js';
|
|
17
|
+
import { hasResolveMarker, replyToReviewThread, resolveReviewThread, } from './github-reply.js';
|
|
18
18
|
import { createResolveSystemPrompt, createResolveUserPrompt, } from './prompts.js';
|
|
19
19
|
import { isResolveResult } from './types.js';
|
|
20
20
|
import { hasNewCommits, hasUncommittedChanges, prepareWorkspace, pushChanges, } from './workspace.js';
|
|
@@ -35,12 +35,26 @@ export async function resolveStandalonePR(options) {
|
|
|
35
35
|
const octokit = new Octokit({ auth: githubToken });
|
|
36
36
|
// Fetch unresolved review threads
|
|
37
37
|
logInfo('Fetching unresolved review threads...');
|
|
38
|
-
const
|
|
38
|
+
const allUnresolvedThreads = await fetchUnresolvedReviewThreads(octokit, owner, repo, prInfo.prNumber, verbose);
|
|
39
|
+
// Skip threads whose last comment already carries our marker — they were
|
|
40
|
+
// handled in a previous pr-resolve run and replying again would just spam.
|
|
41
|
+
// If a human has commented after our reply, the last comment will no
|
|
42
|
+
// longer be ours and the thread will be picked up again.
|
|
43
|
+
const unresolvedThreads = allUnresolvedThreads.filter((thread) => {
|
|
44
|
+
const lastComment = thread.comments.nodes[thread.comments.nodes.length - 1];
|
|
45
|
+
return !hasResolveMarker(lastComment?.body);
|
|
46
|
+
});
|
|
47
|
+
const alreadyHandled = allUnresolvedThreads.length - unresolvedThreads.length;
|
|
48
|
+
if (alreadyHandled > 0) {
|
|
49
|
+
logInfo(`Skipping ${alreadyHandled} thread(s) already addressed in a previous resolve run`);
|
|
50
|
+
}
|
|
39
51
|
if (unresolvedThreads.length === 0) {
|
|
40
52
|
logSuccess('No unresolved review threads found.');
|
|
41
53
|
return {
|
|
42
54
|
status: 'success',
|
|
43
|
-
message:
|
|
55
|
+
message: alreadyHandled > 0
|
|
56
|
+
? `All ${alreadyHandled} unresolved thread(s) were already addressed in a previous resolve run`
|
|
57
|
+
: 'No unresolved review threads to resolve',
|
|
44
58
|
threadsAddressed: 0,
|
|
45
59
|
threadsSkipped: 0,
|
|
46
60
|
};
|
|
@@ -162,7 +176,7 @@ export async function resolveStandalonePR(options) {
|
|
|
162
176
|
}
|
|
163
177
|
// eslint-disable-next-line max-depth
|
|
164
178
|
try {
|
|
165
|
-
const replied = await replyToReviewThread(octokit, threadId, comment.reply, verbose);
|
|
179
|
+
const replied = await replyToReviewThread(octokit, threadId, comment.reply, comment.action, verbose);
|
|
166
180
|
// eslint-disable-next-line max-depth
|
|
167
181
|
if (replied && comment.action === 'changed') {
|
|
168
182
|
// Resolve the thread since the change was made
|
|
@@ -192,7 +206,7 @@ export async function resolveStandalonePR(options) {
|
|
|
192
206
|
const genericReply = agentMadeChanges
|
|
193
207
|
? 'Changes were made to address review feedback. Please re-review.'
|
|
194
208
|
: 'Reviewed this comment. No changes were made at this time.';
|
|
195
|
-
const replied = await replyToReviewThread(octokit, thread.id, genericReply, verbose);
|
|
209
|
+
const replied = await replyToReviewThread(octokit, thread.id, genericReply, agentMadeChanges ? 'changed' : 'skipped', verbose);
|
|
196
210
|
// eslint-disable-next-line max-depth
|
|
197
211
|
if (replied) {
|
|
198
212
|
threadsSkipped++;
|
|
@@ -8,27 +8,26 @@
|
|
|
8
8
|
export function createResolveSystemPrompt() {
|
|
9
9
|
return `You are an expert software engineer resolving code review feedback on a pull request.
|
|
10
10
|
|
|
11
|
-
**Your Goal**: For each review comment,
|
|
11
|
+
**Your Goal**: For each review comment, make the change unless the reviewer is factually wrong or has misunderstood the code. The default is to accept the feedback.
|
|
12
12
|
|
|
13
|
-
**
|
|
14
|
-
- The suggestion
|
|
15
|
-
- The suggestion
|
|
16
|
-
-
|
|
17
|
-
- The suggestion follows established best practices for the language/framework
|
|
13
|
+
**Make the change when** (this is the default — apply it broadly):
|
|
14
|
+
- The suggestion would improve the code in any way: correctness, security, error handling, clarity, maintainability, performance, design, naming, structure, tests, docs
|
|
15
|
+
- The suggestion aligns with best practices for the language or framework
|
|
16
|
+
- Apply the change even if it is large, touches many files, or requires non-trivial refactoring — workload is not a reason to skip
|
|
18
17
|
|
|
19
|
-
**Skip the change when
|
|
20
|
-
- The
|
|
21
|
-
- The
|
|
22
|
-
-
|
|
23
|
-
|
|
18
|
+
**Skip the change ONLY when** (the bar is high — be conservative about skipping):
|
|
19
|
+
- The reviewer is factually wrong (e.g., claims the code does X when it actually does Y, or asserts a behavior that does not exist)
|
|
20
|
+
- The reviewer has misunderstood the code's purpose, the surrounding context, or how this piece interacts with other parts of the system
|
|
21
|
+
- Following the suggestion would actually make the code worse or introduce a regression
|
|
22
|
+
|
|
23
|
+
Personal preference, "I'd prefer a different style", "this is more complex than I'd like", or "this conflicts with a pattern I prefer" are NOT valid reasons to skip. If the change would make the code better, do it.
|
|
24
24
|
|
|
25
25
|
**Process**:
|
|
26
26
|
1. Read all the review comments carefully
|
|
27
|
-
2. For each comment, examine the relevant code
|
|
28
|
-
3.
|
|
29
|
-
4.
|
|
30
|
-
5. After
|
|
31
|
-
6. After committing, output a JSON summary
|
|
27
|
+
2. For each comment, examine the relevant code so you actually understand what it does
|
|
28
|
+
3. Default: make the change in the file. Only skip if you can articulate a specific factual error or misunderstanding by the reviewer.
|
|
29
|
+
4. After making all changes, commit them with a descriptive message summarizing what was resolved (do NOT push)
|
|
30
|
+
5. After committing, output a JSON summary
|
|
32
31
|
|
|
33
32
|
**CRITICAL - Result Format**:
|
|
34
33
|
After making all changes, you MUST output a JSON result. Use the exact comment_id from each comment (comment_1, comment_2, etc.):
|
|
@@ -56,7 +55,7 @@ After making all changes, you MUST output a JSON result. Use the exact comment_i
|
|
|
56
55
|
|
|
57
56
|
**Reply Guidelines**:
|
|
58
57
|
- For "changed": briefly describe what was changed (1-2 sentences)
|
|
59
|
-
- For "skipped":
|
|
58
|
+
- For "skipped": clearly explain the specific factual error or misunderstanding — point to the exact line, behavior, or invariant the reviewer got wrong (2-3 sentences). Do not skip with a vague "I disagree" — name the misunderstanding.
|
|
60
59
|
- Be professional and constructive in all replies
|
|
61
60
|
- You MUST include an entry for EVERY comment_id`;
|
|
62
61
|
}
|
|
@@ -101,7 +100,7 @@ export function createResolveUserPrompt(unresolvedThreads) {
|
|
|
101
100
|
sections.push('## Instructions');
|
|
102
101
|
sections.push('');
|
|
103
102
|
sections.push('For each comment above, read the referenced file and evaluate the suggestion.');
|
|
104
|
-
sections.push('
|
|
103
|
+
sections.push('Default to making the change — even if it is large or touches many files. Only skip when the reviewer is factually wrong or has misunderstood the code, and explain the specific misunderstanding in your reply.');
|
|
105
104
|
sections.push('After processing all comments, output the JSON resolve_result with your decisions and reply messages.');
|
|
106
105
|
sections.push(`Use the exact comment IDs: ${Array.from(commentIdToThreadId.keys()).join(', ')}`);
|
|
107
106
|
return { prompt: sections.join('\n'), commentIdToThreadId };
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Product-test-cases phase: clone the product's repo, ask Claude to draft a
|
|
3
|
+
* product-level regression suite (deduping against existing cases), and save
|
|
4
|
+
* the new ones as drafts via MCP. Approved test cases are NEVER touched —
|
|
5
|
+
* only draft/pending_approval entries may be replaced.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
|
|
8
|
+
* run a bounded Claude session, persist via MCP, cleanup on success.
|
|
9
|
+
*/
|
|
10
|
+
export interface GenerateProductTestCasesOptions {
|
|
11
|
+
productId: string;
|
|
12
|
+
githubToken: string;
|
|
13
|
+
owner: string;
|
|
14
|
+
repo: string;
|
|
15
|
+
branch?: string;
|
|
16
|
+
verbose?: boolean;
|
|
17
|
+
}
|
|
18
|
+
export interface GenerateProductTestCasesResult {
|
|
19
|
+
status: 'success' | 'error';
|
|
20
|
+
message: string;
|
|
21
|
+
createdCount?: number;
|
|
22
|
+
deletedCount?: number;
|
|
23
|
+
summary?: string;
|
|
24
|
+
}
|
|
25
|
+
export declare function generateProductTestCases(options: GenerateProductTestCasesOptions): Promise<GenerateProductTestCasesResult>;
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Product-test-cases phase: clone the product's repo, ask Claude to draft a
|
|
3
|
+
* product-level regression suite (deduping against existing cases), and save
|
|
4
|
+
* the new ones as drafts via MCP. Approved test cases are NEVER touched —
|
|
5
|
+
* only draft/pending_approval entries may be replaced.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors the find-bugs pattern: clone to ~/edsger/product-test-cases-<id>,
|
|
8
|
+
* run a bounded Claude session, persist via MCP, cleanup on success.
|
|
9
|
+
*/
|
|
10
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
11
|
+
import { batchDeleteTestCases } from '../../api/issues/batch-operations.js';
|
|
12
|
+
import { createProductTestCases, getProductTestCases, } from '../../api/products/test-cases.js';
|
|
13
|
+
import { DEFAULT_MODEL } from '../../constants.js';
|
|
14
|
+
import { logError, logInfo, logSuccess, logWarning, } from '../../utils/logger.js';
|
|
15
|
+
import { cleanupIssueRepo, cloneIssueRepo, ensureWorkspaceDir, syncRepoToRef, } from '../../workspace/workspace-manager.js';
|
|
16
|
+
import { detectDefaultBranch } from '../find-shared/git.js';
|
|
17
|
+
import { fetchProductBasics } from '../find-shared/mcp.js';
|
|
18
|
+
import { createScanStateModule } from '../find-shared/scan-state.js';
|
|
19
|
+
import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
|
|
20
|
+
import { createProductTestCasesSystemPrompt, createProductTestCasesUserPrompt, } from './prompts.js';
|
|
21
|
+
import { isProductTestCasesAgentResult, } from './types.js';
|
|
22
|
+
const WORKSPACE_KEY = 'product-test-cases';
|
|
23
|
+
// Generation is read-heavy and open-scope. 200 turns matches find-bugs and is
|
|
24
|
+
// enough for a mid-sized repo while still bounding cost.
|
|
25
|
+
const MAX_TURNS = 200;
|
|
26
|
+
// Per-product lock so two concurrent invocations (e.g. user clicks twice in
|
|
27
|
+
// the UI, or CLI + desktop fire at once) don't race on the shared clone dir.
|
|
28
|
+
// We don't persist any state besides the lock — generation is stateless.
|
|
29
|
+
const lockModule = createScanStateModule({
|
|
30
|
+
dirName: 'product-test-cases-state',
|
|
31
|
+
});
|
|
32
|
+
// UUID regex matching MCP-issued ids — defensive filter before we trust
|
|
33
|
+
// strings the agent puts in deleted_test_case_ids.
|
|
34
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
35
|
+
// eslint-disable-next-line complexity
|
|
36
|
+
export async function generateProductTestCases(options) {
|
|
37
|
+
const { productId, githubToken, owner, repo, verbose } = options;
|
|
38
|
+
logInfo(`Starting product test-cases generation for product ${productId} (${owner}/${repo})`);
|
|
39
|
+
const lock = lockModule.acquireLock(productId);
|
|
40
|
+
if (!lock) {
|
|
41
|
+
logWarning(`Another product test-cases generation is already running for product ${productId}; skipping.`);
|
|
42
|
+
return {
|
|
43
|
+
status: 'error',
|
|
44
|
+
message: 'Another product test-cases generation is already running for this product',
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
let repoPath;
|
|
48
|
+
let succeeded = false;
|
|
49
|
+
try {
|
|
50
|
+
const workspaceRoot = ensureWorkspaceDir();
|
|
51
|
+
const repoKey = `${WORKSPACE_KEY}-${productId}`;
|
|
52
|
+
({ repoPath } = cloneIssueRepo(workspaceRoot, repoKey, owner, repo, githubToken));
|
|
53
|
+
const branch = options.branch ?? detectDefaultBranch(repoPath);
|
|
54
|
+
logInfo(`Syncing ${owner}/${repo} to branch ${branch}`);
|
|
55
|
+
syncRepoToRef(repoPath, { branch }, githubToken);
|
|
56
|
+
const [product, existing] = await Promise.all([
|
|
57
|
+
fetchProductBasics(productId),
|
|
58
|
+
getProductTestCases(productId, verbose),
|
|
59
|
+
]);
|
|
60
|
+
const approved = [];
|
|
61
|
+
const replaceable = [];
|
|
62
|
+
for (const tc of existing) {
|
|
63
|
+
const slot = {
|
|
64
|
+
id: tc.id,
|
|
65
|
+
name: tc.name,
|
|
66
|
+
description: tc.description,
|
|
67
|
+
is_critical: tc.is_critical,
|
|
68
|
+
status: tc.status ?? 'draft',
|
|
69
|
+
};
|
|
70
|
+
if (slot.status === 'approved') {
|
|
71
|
+
approved.push(slot);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
replaceable.push(slot);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const replaceableIds = new Set(replaceable.map((tc) => tc.id));
|
|
78
|
+
logInfo(`Existing test cases: ${approved.length} approved (locked), ${replaceable.length} replaceable`);
|
|
79
|
+
const systemPrompt = createProductTestCasesSystemPrompt();
|
|
80
|
+
const userPrompt = createProductTestCasesUserPrompt({
|
|
81
|
+
productName: product.name,
|
|
82
|
+
productDescription: product.description,
|
|
83
|
+
approvedTestCases: approved,
|
|
84
|
+
replaceableTestCases: replaceable,
|
|
85
|
+
});
|
|
86
|
+
let lastAssistantResponse = '';
|
|
87
|
+
let agentResult = null;
|
|
88
|
+
logInfo('Running Claude agent to draft test cases...');
|
|
89
|
+
for await (const message of query({
|
|
90
|
+
prompt: createPromptGenerator(userPrompt),
|
|
91
|
+
options: {
|
|
92
|
+
systemPrompt: {
|
|
93
|
+
type: 'preset',
|
|
94
|
+
preset: 'claude_code',
|
|
95
|
+
append: systemPrompt,
|
|
96
|
+
},
|
|
97
|
+
model: DEFAULT_MODEL,
|
|
98
|
+
maxTurns: MAX_TURNS,
|
|
99
|
+
permissionMode: 'bypassPermissions',
|
|
100
|
+
cwd: repoPath,
|
|
101
|
+
},
|
|
102
|
+
})) {
|
|
103
|
+
if (message.type === 'assistant') {
|
|
104
|
+
lastAssistantResponse += extractTextFromContent(message.message?.content ?? [], verbose);
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if (message.type !== 'result') {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
const responseText = message.subtype === 'success'
|
|
111
|
+
? message.result || lastAssistantResponse
|
|
112
|
+
: lastAssistantResponse;
|
|
113
|
+
const parsed = tryExtractResult(responseText, 'test_cases_result');
|
|
114
|
+
if (isProductTestCasesAgentResult(parsed)) {
|
|
115
|
+
agentResult = parsed;
|
|
116
|
+
}
|
|
117
|
+
else if (message.subtype !== 'success') {
|
|
118
|
+
logError(`Agent run incomplete: ${message.subtype}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (!agentResult) {
|
|
122
|
+
return {
|
|
123
|
+
status: 'error',
|
|
124
|
+
message: 'Test cases generation failed: could not parse a test_cases_result from the agent',
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
// Apply deletions. Filter to only ids that are valid UUIDs AND belong
|
|
128
|
+
// to the replaceable set — never trust the agent's word that an id
|
|
129
|
+
// is deletable. Approved cases must never be removed.
|
|
130
|
+
let deletedCount = 0;
|
|
131
|
+
const requestedDeletes = (agentResult.deleted_test_case_ids ?? []).filter((id) => typeof id === 'string' && UUID_RE.test(id) && replaceableIds.has(id));
|
|
132
|
+
if (requestedDeletes.length > 0) {
|
|
133
|
+
logInfo(`Deleting ${requestedDeletes.length} obsolete replaceable test cases...`);
|
|
134
|
+
const ok = await batchDeleteTestCases(requestedDeletes, verbose);
|
|
135
|
+
if (ok) {
|
|
136
|
+
deletedCount = requestedDeletes.length;
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
logWarning('Some deletions failed; leaving any remaining cases in place.');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
const { createdIds } = await createProductTestCases(productId, agentResult.created_test_cases.map((tc) => ({
|
|
143
|
+
name: tc.name,
|
|
144
|
+
description: tc.description,
|
|
145
|
+
is_critical: tc.is_critical ?? false,
|
|
146
|
+
})), verbose);
|
|
147
|
+
logSuccess(`Created ${createdIds.length} test cases, deleted ${deletedCount} obsolete entries. ${agentResult.summary}`);
|
|
148
|
+
succeeded = true;
|
|
149
|
+
return {
|
|
150
|
+
status: 'success',
|
|
151
|
+
message: `Generated ${createdIds.length} test cases (deleted ${deletedCount})`,
|
|
152
|
+
createdCount: createdIds.length,
|
|
153
|
+
deletedCount,
|
|
154
|
+
summary: agentResult.summary,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
catch (error) {
|
|
158
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
159
|
+
logError(`Product test cases generation failed: ${message}`);
|
|
160
|
+
return {
|
|
161
|
+
status: 'error',
|
|
162
|
+
message: `Product test cases generation failed: ${message}`,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
if (succeeded) {
|
|
167
|
+
cleanupIssueRepo(repoPath);
|
|
168
|
+
}
|
|
169
|
+
else if (repoPath) {
|
|
170
|
+
logInfo(`Workspace preserved for inspection: ${repoPath}`);
|
|
171
|
+
}
|
|
172
|
+
lock.release();
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the product-level test cases phase.
|
|
3
|
+
*
|
|
4
|
+
* Agent's job: explore the cloned product repo and propose a regression-suite
|
|
5
|
+
* worth of test cases for the whole product (not a single issue). Existing
|
|
6
|
+
* test cases — both approved (locked) and unapproved (replaceable) — are
|
|
7
|
+
* included in the context so the agent doesn't duplicate work and so the
|
|
8
|
+
* unapproved set can be refreshed when the codebase has drifted.
|
|
9
|
+
*/
|
|
10
|
+
export interface ExistingProductTestCase {
|
|
11
|
+
id: string;
|
|
12
|
+
name: string;
|
|
13
|
+
description: string;
|
|
14
|
+
is_critical: boolean;
|
|
15
|
+
status: string;
|
|
16
|
+
}
|
|
17
|
+
export interface ProductTestCasesPromptContext {
|
|
18
|
+
productName: string;
|
|
19
|
+
productDescription?: string;
|
|
20
|
+
approvedTestCases: ExistingProductTestCase[];
|
|
21
|
+
replaceableTestCases: ExistingProductTestCase[];
|
|
22
|
+
}
|
|
23
|
+
export declare function createProductTestCasesSystemPrompt(): string;
|
|
24
|
+
export declare function createProductTestCasesUserPrompt(context: ProductTestCasesPromptContext): string;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the product-level test cases phase.
|
|
3
|
+
*
|
|
4
|
+
* Agent's job: explore the cloned product repo and propose a regression-suite
|
|
5
|
+
* worth of test cases for the whole product (not a single issue). Existing
|
|
6
|
+
* test cases — both approved (locked) and unapproved (replaceable) — are
|
|
7
|
+
* included in the context so the agent doesn't duplicate work and so the
|
|
8
|
+
* unapproved set can be refreshed when the codebase has drifted.
|
|
9
|
+
*/
|
|
10
|
+
export function createProductTestCasesSystemPrompt() {
|
|
11
|
+
return `You are a senior QA engineer drafting a product-level regression test suite.
|
|
12
|
+
|
|
13
|
+
The current working directory is a fresh clone of the product's repository. Use Glob/Grep/Read to explore it and understand the surface area: entry points, public APIs, user-facing flows, critical paths. Look at recent commits with Bash if it helps.
|
|
14
|
+
|
|
15
|
+
Output a single JSON result of the form:
|
|
16
|
+
\`\`\`json
|
|
17
|
+
{
|
|
18
|
+
"test_cases_result": {
|
|
19
|
+
"summary": "Short 1-2 sentence summary of what you covered",
|
|
20
|
+
"created_test_cases": [
|
|
21
|
+
{
|
|
22
|
+
"name": "Concise name (max 200 chars)",
|
|
23
|
+
"description": "Markdown describing Given/When/Then steps + expected result",
|
|
24
|
+
"is_critical": true
|
|
25
|
+
}
|
|
26
|
+
],
|
|
27
|
+
"deleted_test_case_ids": [
|
|
28
|
+
"uuid-of-replaceable-test-case-you-want-removed"
|
|
29
|
+
],
|
|
30
|
+
"deletion_reasons": {
|
|
31
|
+
"uuid": "Why this test case is no longer relevant"
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
\`\`\`
|
|
36
|
+
|
|
37
|
+
Rules:
|
|
38
|
+
- DO NOT duplicate any test case in the "Approved test cases" or "Replaceable test cases" sections. If an existing case already covers a behaviour, skip it.
|
|
39
|
+
- You MAY propose deletions ONLY for entries in the "Replaceable test cases" list. Approved test cases are locked by the human reviewer and cannot be touched.
|
|
40
|
+
- Mark \`is_critical: true\` only for must-pass flows (auth, payments, data loss risks, primary user journeys).
|
|
41
|
+
- Aim for 5-30 high-signal cases total. Quality over quantity.
|
|
42
|
+
- Each test case must be executable by a human tester reading the description alone.
|
|
43
|
+
- The "deleted_test_case_ids" and "deletion_reasons" fields are optional; omit or use [] / {} when you have nothing to delete.
|
|
44
|
+
- Wrap the final JSON in a single \`\`\`json fenced block. No extra prose after it.`;
|
|
45
|
+
}
|
|
46
|
+
export function createProductTestCasesUserPrompt(context) {
|
|
47
|
+
const lines = [];
|
|
48
|
+
lines.push(`# Product: ${context.productName}`);
|
|
49
|
+
if (context.productDescription) {
|
|
50
|
+
lines.push('');
|
|
51
|
+
lines.push(`## Description`);
|
|
52
|
+
lines.push(context.productDescription);
|
|
53
|
+
}
|
|
54
|
+
lines.push('');
|
|
55
|
+
lines.push('## Approved test cases (locked — do not duplicate, cannot delete)');
|
|
56
|
+
if (context.approvedTestCases.length === 0) {
|
|
57
|
+
lines.push('_(none)_');
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
for (const tc of context.approvedTestCases) {
|
|
61
|
+
lines.push(`- **${tc.name}**${tc.is_critical ? ' [critical]' : ''}`);
|
|
62
|
+
lines.push(` ${tc.description.replace(/\n/g, '\n ')}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
lines.push('');
|
|
66
|
+
lines.push('## Replaceable test cases (draft / pending_approval — may be deleted if obsolete or duplicated)');
|
|
67
|
+
if (context.replaceableTestCases.length === 0) {
|
|
68
|
+
lines.push('_(none)_');
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
for (const tc of context.replaceableTestCases) {
|
|
72
|
+
lines.push(`- \`${tc.id}\` (${tc.status})${tc.is_critical ? ' [critical]' : ''} — **${tc.name}**`);
|
|
73
|
+
lines.push(` ${tc.description.replace(/\n/g, '\n ')}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
lines.push('');
|
|
77
|
+
lines.push('## Task');
|
|
78
|
+
lines.push('Explore the repository (Glob/Grep/Read/Bash) and propose product-level regression test cases. Follow the JSON output contract in your system prompt.');
|
|
79
|
+
return lines.join('\n');
|
|
80
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wire shape for the product-test-cases agent output. The agent is asked to
|
|
3
|
+
* emit `{ test_cases_result: { ... } }`; we validate it loosely so a single
|
|
4
|
+
* stray field doesn't kill an otherwise-good run.
|
|
5
|
+
*/
|
|
6
|
+
export interface ProductTestCaseDraft {
|
|
7
|
+
name: string;
|
|
8
|
+
description: string;
|
|
9
|
+
is_critical?: boolean;
|
|
10
|
+
}
|
|
11
|
+
export interface ProductTestCasesAgentResult {
|
|
12
|
+
summary: string;
|
|
13
|
+
created_test_cases: ProductTestCaseDraft[];
|
|
14
|
+
deleted_test_case_ids?: string[];
|
|
15
|
+
deletion_reasons?: Record<string, string>;
|
|
16
|
+
}
|
|
17
|
+
export declare function isProductTestCasesAgentResult(value: unknown): value is ProductTestCasesAgentResult;
|