edsger 0.57.0 → 0.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/cross-product.js +0 -1
- package/dist/api/issues/issue-utils.js +0 -1
- package/dist/api/issues/update-issue.js +1 -1
- package/dist/commands/agent-workflow/chat-worker.js +1 -1
- package/dist/commands/checklists/index.js +1 -1
- package/dist/commands/product-techniques/index.d.ts +15 -0
- package/dist/commands/product-techniques/index.js +37 -0
- package/dist/commands/workflow/executors/phase-executor.js +1 -1
- package/dist/index.js +24 -1
- package/dist/phases/analyze-logs/index.js +1 -1
- package/dist/phases/bug-fixing/context-fetcher.js +4 -2
- package/dist/phases/find-features/index.js +1 -1
- package/dist/phases/output-contracts.js +47 -36
- package/dist/phases/pr-shared/agent-utils.d.ts +11 -3
- package/dist/phases/pr-shared/agent-utils.js +48 -4
- package/dist/phases/product-techniques/index.d.ts +52 -0
- package/dist/phases/product-techniques/index.js +268 -0
- package/dist/phases/product-techniques/mcp-server.d.ts +41 -0
- package/dist/phases/product-techniques/mcp-server.js +96 -0
- package/dist/phases/product-techniques/prompts.d.ts +19 -0
- package/dist/phases/product-techniques/prompts.js +66 -0
- package/dist/phases/product-techniques/types.d.ts +13 -0
- package/dist/phases/product-techniques/types.js +13 -0
- package/dist/phases/screen-flow/index.js +73 -17
- package/dist/phases/screen-flow/mcp-server.d.ts +195 -0
- package/dist/phases/screen-flow/mcp-server.js +262 -0
- package/dist/phases/screen-flow/prompts.js +3 -1
- package/dist/phases/screen-flow/theme.js +23 -12
- package/dist/phases/screen-flow/types.js +30 -15
- package/dist/services/branches.js +3 -3
- package/dist/services/phase-hooks/hook-executor.js +1 -1
- package/dist/services/phase-ratings.js +1 -1
- package/dist/services/product-logs.js +1 -1
- package/dist/services/pull-requests.js +3 -3
- package/package.json +1 -1
- package/vitest.config.ts +1 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* product-techniques phase: clone the product's repo, ask Claude (via the
|
|
3
|
+
* `submit_techniques` MCP tool) to write a catalogue of the techniques the
|
|
4
|
+
* repo uses, and persist the result to product_techniques via the Supabase
|
|
5
|
+
* SDK.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors the screen-flow pattern. Production-grade behaviours layered on
|
|
8
|
+
* top of the basic generate-and-write loop:
|
|
9
|
+
*
|
|
10
|
+
* - Heartbeat: `last_heartbeat_at` is refreshed on every assistant message
|
|
11
|
+
* so the reader can detect stalled / crashed runs (see services/db/
|
|
12
|
+
* product-techniques.ts for the lazy reaper).
|
|
13
|
+
* - Cancellation-safe writes: markRunning / markSuccess / markFailed only
|
|
14
|
+
* touch rows whose status is in {pending, running}. If the user clicked
|
|
15
|
+
* Stop and the row is now 'cancelled', the final write no-ops.
|
|
16
|
+
* - Tool-based submission: validated server-side via Zod + content checks
|
|
17
|
+
* (mcp-server.ts). Falls back to fenced JSON parsing for resilience.
|
|
18
|
+
*/
|
|
19
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
20
|
+
import { getGitHubConfigByProduct } from '../../api/github.js';
|
|
21
|
+
import { DEFAULT_MODEL } from '../../constants.js';
|
|
22
|
+
import { getSupabase } from '../../supabase/client.js';
|
|
23
|
+
import { logError, logInfo, logSuccess, logWarning, } from '../../utils/logger.js';
|
|
24
|
+
import { cleanupIssueRepo, cloneIssueRepo, ensureWorkspaceDir, } from '../../workspace/workspace-manager.js';
|
|
25
|
+
import { fetchProductBasics } from '../find-shared/mcp.js';
|
|
26
|
+
import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
|
|
27
|
+
import { createTechniquesCaptureState, createTechniquesMcpServer, validateContent, } from './mcp-server.js';
|
|
28
|
+
import { createProductTechniquesSystemPrompt, createProductTechniquesUserPrompt, } from './prompts.js';
|
|
29
|
+
import { isTechniquesExtraction, TECHNIQUES_CONTENT_MAX, TECHNIQUES_SUMMARY_MAX, } from './types.js';
|
|
30
|
+
const WORKSPACE_KEY = 'product-techniques';
|
|
31
|
+
const MAX_TURNS = 120;
|
|
32
|
+
// Heartbeat cadence: at most one DB write per HEARTBEAT_MIN_INTERVAL_MS.
|
|
33
|
+
// Triggered on every assistant message so a stalled agent (no messages
|
|
34
|
+
// flowing) lets the row go stale and the reader can mark it failed.
|
|
35
|
+
const HEARTBEAT_MIN_INTERVAL_MS = 15_000;
|
|
36
|
+
export async function runProductTechniquesPhase(options) {
|
|
37
|
+
const { productId, techniquesId, guidance, verbose } = options;
|
|
38
|
+
logInfo(`Starting product-techniques generation for product ${productId}`);
|
|
39
|
+
const supabase = getSupabase();
|
|
40
|
+
const claimed = await markRunning(supabase, techniquesId);
|
|
41
|
+
if (!claimed) {
|
|
42
|
+
return {
|
|
43
|
+
status: 'cancelled',
|
|
44
|
+
message: 'Techniques row is no longer in a runnable state (likely cancelled before the CLI started)',
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
const githubConfig = await getGitHubConfigByProduct(productId, verbose);
|
|
48
|
+
if (!githubConfig.configured ||
|
|
49
|
+
!githubConfig.token ||
|
|
50
|
+
!githubConfig.owner ||
|
|
51
|
+
!githubConfig.repo) {
|
|
52
|
+
const msg = githubConfig.message ||
|
|
53
|
+
'GitHub repository not configured for this product. Connect a repo first.';
|
|
54
|
+
await markFailed(supabase, techniquesId, msg);
|
|
55
|
+
return { status: 'error', message: msg };
|
|
56
|
+
}
|
|
57
|
+
let repoPath;
|
|
58
|
+
let succeeded = false;
|
|
59
|
+
try {
|
|
60
|
+
const workspaceRoot = ensureWorkspaceDir();
|
|
61
|
+
const repoKey = `${WORKSPACE_KEY}-${productId}`;
|
|
62
|
+
({ repoPath } = cloneIssueRepo(workspaceRoot, repoKey, githubConfig.owner, githubConfig.repo, githubConfig.token));
|
|
63
|
+
const product = await fetchProductBasics(productId);
|
|
64
|
+
const systemPrompt = createProductTechniquesSystemPrompt();
|
|
65
|
+
const userPrompt = createProductTechniquesUserPrompt({
|
|
66
|
+
productName: product.name,
|
|
67
|
+
productDescription: product.description,
|
|
68
|
+
guidance,
|
|
69
|
+
});
|
|
70
|
+
logInfo('Running Claude agent to write techniques catalogue...');
|
|
71
|
+
const captureState = createTechniquesCaptureState();
|
|
72
|
+
const mcpServer = createTechniquesMcpServer(captureState);
|
|
73
|
+
let lastAssistantResponse = '';
|
|
74
|
+
let lastHeartbeatAt = 0;
|
|
75
|
+
for await (const message of query({
|
|
76
|
+
prompt: createPromptGenerator(userPrompt),
|
|
77
|
+
options: {
|
|
78
|
+
systemPrompt: {
|
|
79
|
+
type: 'preset',
|
|
80
|
+
preset: 'claude_code',
|
|
81
|
+
append: systemPrompt,
|
|
82
|
+
},
|
|
83
|
+
model: DEFAULT_MODEL,
|
|
84
|
+
maxTurns: MAX_TURNS,
|
|
85
|
+
permissionMode: 'bypassPermissions',
|
|
86
|
+
cwd: repoPath,
|
|
87
|
+
mcpServers: {
|
|
88
|
+
'product-techniques': mcpServer,
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
})) {
|
|
92
|
+
if (message.type === 'assistant') {
|
|
93
|
+
lastAssistantResponse += extractTextFromContent(message.message?.content ?? [], verbose);
|
|
94
|
+
// Throttled heartbeat. Awaited (cheap UPDATE) so we don't pile up
|
|
95
|
+
// unresolved promises on a long run.
|
|
96
|
+
const now = Date.now();
|
|
97
|
+
if (now - lastHeartbeatAt >= HEARTBEAT_MIN_INTERVAL_MS) {
|
|
98
|
+
lastHeartbeatAt = now;
|
|
99
|
+
await heartbeat(supabase, techniquesId);
|
|
100
|
+
}
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (message.type !== 'result') {
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
// Prefer the MCP-captured extraction. Fall back to fenced JSON only
|
|
107
|
+
// if the agent ignored the tool — robustness, not a contract.
|
|
108
|
+
const fromTool = captureState.captured;
|
|
109
|
+
const fromFallback = fromTool
|
|
110
|
+
? null
|
|
111
|
+
: tryFallbackParse(message, lastAssistantResponse);
|
|
112
|
+
const extraction = fromTool ?? fromFallback;
|
|
113
|
+
if (!extraction) {
|
|
114
|
+
const msg = message.subtype === 'success'
|
|
115
|
+
? 'Techniques generation failed: agent did not call submit_techniques and no parseable fallback was found'
|
|
116
|
+
: `Techniques generation failed: agent ${message.subtype}`;
|
|
117
|
+
const written = await markFailed(supabase, techniquesId, msg);
|
|
118
|
+
return {
|
|
119
|
+
status: written ? 'error' : 'cancelled',
|
|
120
|
+
message: written
|
|
121
|
+
? msg
|
|
122
|
+
: 'Generation was cancelled while the agent was running',
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
if (fromFallback) {
|
|
126
|
+
logWarning('Agent skipped submit_techniques; used fenced fallback JSON instead');
|
|
127
|
+
}
|
|
128
|
+
const written = await markSuccess(supabase, techniquesId, extraction.summary, extraction.content);
|
|
129
|
+
if (!written) {
|
|
130
|
+
return {
|
|
131
|
+
status: 'cancelled',
|
|
132
|
+
message: 'Generation was cancelled before the result could be written',
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
succeeded = true;
|
|
136
|
+
logSuccess(`Techniques catalogue generated (${extraction.content.length} chars of markdown)`);
|
|
137
|
+
return {
|
|
138
|
+
status: 'success',
|
|
139
|
+
message: 'Techniques catalogue generated',
|
|
140
|
+
summary: extraction.summary,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// Loop ended without a 'result' message — treat as failure.
|
|
144
|
+
const msg = 'Techniques generation ended without a result message';
|
|
145
|
+
await markFailed(supabase, techniquesId, msg);
|
|
146
|
+
return { status: 'error', message: msg };
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
150
|
+
logError(`Techniques generation failed: ${errorMessage}`);
|
|
151
|
+
await markFailed(supabase, techniquesId, errorMessage);
|
|
152
|
+
return { status: 'error', message: errorMessage };
|
|
153
|
+
}
|
|
154
|
+
finally {
|
|
155
|
+
if (succeeded) {
|
|
156
|
+
cleanupIssueRepo(repoPath);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Fallback parser: extract a fenced `techniques` JSON block from the final
|
|
161
|
+
// assistant text if the agent skipped the submit_techniques tool. Backstop
|
|
162
|
+
// only — we expect the tool path to be the norm.
|
|
163
|
+
function tryFallbackParse(resultMessage, assistantText) {
|
|
164
|
+
const responseText = resultMessage.subtype === 'success'
|
|
165
|
+
? resultMessage.result || assistantText
|
|
166
|
+
: assistantText;
|
|
167
|
+
const parsed = tryExtractResult(responseText, 'techniques');
|
|
168
|
+
if (!isTechniquesExtraction(parsed)) {
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
if (parsed.summary.length > TECHNIQUES_SUMMARY_MAX) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
if (parsed.content.length > TECHNIQUES_CONTENT_MAX) {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
const { error } = validateContent(parsed.content);
|
|
178
|
+
if (error) {
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
return parsed;
|
|
182
|
+
}
|
|
183
|
+
// ============================================================================
|
|
184
|
+
// Persistence — exported for unit tests
|
|
185
|
+
// ============================================================================
|
|
186
|
+
/**
|
|
187
|
+
* Claim the row by flipping `pending` → `running`. Returns true on success
|
|
188
|
+
* (we won the claim) and false when the row has already moved on (e.g. user
|
|
189
|
+
* cancelled before the CLI started). Bounded by the status filter so we
|
|
190
|
+
* can't accidentally resurrect a 'cancelled' row.
|
|
191
|
+
*/
|
|
192
|
+
export async function markRunning(supabase, techniquesId) {
|
|
193
|
+
const { data, error } = await supabase
|
|
194
|
+
.from('product_techniques')
|
|
195
|
+
.update({
|
|
196
|
+
status: 'running',
|
|
197
|
+
error: null,
|
|
198
|
+
last_heartbeat_at: new Date().toISOString(),
|
|
199
|
+
})
|
|
200
|
+
.eq('id', techniquesId)
|
|
201
|
+
.in('status', ['pending', 'running'])
|
|
202
|
+
.select('id')
|
|
203
|
+
.maybeSingle();
|
|
204
|
+
if (error) {
|
|
205
|
+
logWarning(`Could not mark techniques as running: ${error.message}`);
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
return data !== null;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Touch the heartbeat. Best-effort — if it fails (network blip, RLS), the
|
|
212
|
+
* agent loop keeps running; the reader treats this row as stale and marks
|
|
213
|
+
* it failed on next read, which is the correct behaviour.
|
|
214
|
+
*/
|
|
215
|
+
export async function heartbeat(supabase, techniquesId) {
|
|
216
|
+
const { error } = await supabase
|
|
217
|
+
.from('product_techniques')
|
|
218
|
+
.update({ last_heartbeat_at: new Date().toISOString() })
|
|
219
|
+
.eq('id', techniquesId)
|
|
220
|
+
.eq('status', 'running');
|
|
221
|
+
if (error) {
|
|
222
|
+
logWarning(`Heartbeat failed: ${error.message}`);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Write failure status iff the row is still in an active state. Returns
|
|
227
|
+
* true if the row was actually updated (so the caller knows whether the
|
|
228
|
+
* agent's verdict made it to the DB). Returns false when the row has
|
|
229
|
+
* already been cancelled or otherwise resolved by someone else.
|
|
230
|
+
*/
|
|
231
|
+
export async function markFailed(supabase, techniquesId, errorMessage) {
|
|
232
|
+
const { data, error } = await supabase
|
|
233
|
+
.from('product_techniques')
|
|
234
|
+
.update({
|
|
235
|
+
status: 'failed',
|
|
236
|
+
error: errorMessage,
|
|
237
|
+
completed_at: new Date().toISOString(),
|
|
238
|
+
})
|
|
239
|
+
.eq('id', techniquesId)
|
|
240
|
+
.in('status', ['pending', 'running'])
|
|
241
|
+
.select('id')
|
|
242
|
+
.maybeSingle();
|
|
243
|
+
if (error) {
|
|
244
|
+
logWarning(`Could not mark techniques as failed: ${error.message}`);
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
return data !== null;
|
|
248
|
+
}
|
|
249
|
+
export async function markSuccess(supabase, techniquesId, summary, content) {
|
|
250
|
+
const { data, error } = await supabase
|
|
251
|
+
.from('product_techniques')
|
|
252
|
+
.update({
|
|
253
|
+
status: 'success',
|
|
254
|
+
summary,
|
|
255
|
+
content,
|
|
256
|
+
error: null,
|
|
257
|
+
completed_at: new Date().toISOString(),
|
|
258
|
+
})
|
|
259
|
+
.eq('id', techniquesId)
|
|
260
|
+
.in('status', ['pending', 'running'])
|
|
261
|
+
.select('id')
|
|
262
|
+
.maybeSingle();
|
|
263
|
+
if (error) {
|
|
264
|
+
logWarning(`Could not mark techniques as success: ${error.message}`);
|
|
265
|
+
return false;
|
|
266
|
+
}
|
|
267
|
+
return data !== null;
|
|
268
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process MCP server exposing a single tool — `submit_techniques` — that
|
|
3
|
+
* the Claude Agent SDK session calls to return the final markdown catalogue.
|
|
4
|
+
*
|
|
5
|
+
* Using a tool call instead of parsing a fenced JSON block lets the SDK
|
|
6
|
+
* enforce the schema (via Zod) and lets the agent self-correct when
|
|
7
|
+
* validation fails: the error message goes back as the tool result and the
|
|
8
|
+
* agent can re-call the tool with corrected data.
|
|
9
|
+
*
|
|
10
|
+
* The capture pattern: callers pass in a `TechniquesCaptureState`. The tool
|
|
11
|
+
* handler stores the validated args on `state.captured`. The orchestrator
|
|
12
|
+
* reads it after the SDK loop ends.
|
|
13
|
+
*/
|
|
14
|
+
import { z } from 'zod';
|
|
15
|
+
import { type TechniquesExtraction } from './types.js';
|
|
16
|
+
export interface TechniquesCaptureState {
|
|
17
|
+
captured: TechniquesExtraction | null;
|
|
18
|
+
}
|
|
19
|
+
export declare function createTechniquesCaptureState(): TechniquesCaptureState;
|
|
20
|
+
/**
|
|
21
|
+
* Validation that goes beyond the basic Zod schema: enforce that the
|
|
22
|
+
* markdown actually has the H2 sections the prompt asks for. The agent gets
|
|
23
|
+
* an actionable error and can re-submit.
|
|
24
|
+
*
|
|
25
|
+
* We don't require ALL six sections — agents on tiny repos often legitimately
|
|
26
|
+
* collapse some. We require at least the first one ("Languages & Runtime")
|
|
27
|
+
* plus "Notable Techniques", which the prompt calls out as the whole point.
|
|
28
|
+
*/
|
|
29
|
+
export declare function validateContent(content: string): {
|
|
30
|
+
error: string | null;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Build the `submit_techniques` tool. Exported separately from the server
|
|
34
|
+
* so tests can exercise the handler directly without going through MCP
|
|
35
|
+
* transport.
|
|
36
|
+
*/
|
|
37
|
+
export declare function createSubmitTechniquesTool(state: TechniquesCaptureState): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
|
|
38
|
+
summary: z.ZodString;
|
|
39
|
+
content: z.ZodString;
|
|
40
|
+
}>;
|
|
41
|
+
export declare function createTechniquesMcpServer(state: TechniquesCaptureState): import("@anthropic-ai/claude-agent-sdk").McpSdkServerConfigWithInstance;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process MCP server exposing a single tool — `submit_techniques` — that
|
|
3
|
+
* the Claude Agent SDK session calls to return the final markdown catalogue.
|
|
4
|
+
*
|
|
5
|
+
* Using a tool call instead of parsing a fenced JSON block lets the SDK
|
|
6
|
+
* enforce the schema (via Zod) and lets the agent self-correct when
|
|
7
|
+
* validation fails: the error message goes back as the tool result and the
|
|
8
|
+
* agent can re-call the tool with corrected data.
|
|
9
|
+
*
|
|
10
|
+
* The capture pattern: callers pass in a `TechniquesCaptureState`. The tool
|
|
11
|
+
* handler stores the validated args on `state.captured`. The orchestrator
|
|
12
|
+
* reads it after the SDK loop ends.
|
|
13
|
+
*/
|
|
14
|
+
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
|
|
15
|
+
import { z } from 'zod';
|
|
16
|
+
import { TECHNIQUES_CONTENT_MAX, TECHNIQUES_SUMMARY_MAX, } from './types.js';
|
|
17
|
+
export function createTechniquesCaptureState() {
|
|
18
|
+
return { captured: null };
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Validation that goes beyond the basic Zod schema: enforce that the
|
|
22
|
+
* markdown actually has the H2 sections the prompt asks for. The agent gets
|
|
23
|
+
* an actionable error and can re-submit.
|
|
24
|
+
*
|
|
25
|
+
* We don't require ALL six sections — agents on tiny repos often legitimately
|
|
26
|
+
* collapse some. We require at least the first one ("Languages & Runtime")
|
|
27
|
+
* plus "Notable Techniques", which the prompt calls out as the whole point.
|
|
28
|
+
*/
|
|
29
|
+
export function validateContent(content) {
|
|
30
|
+
const required = [
|
|
31
|
+
/^##\s+Languages\s*&\s*Runtime\b/im,
|
|
32
|
+
/^##\s+Notable\s+Techniques\b/im,
|
|
33
|
+
];
|
|
34
|
+
for (const re of required) {
|
|
35
|
+
if (!re.test(content)) {
|
|
36
|
+
return {
|
|
37
|
+
error: `content is missing the required section matching ${re.source}. Add the section and re-call submit_techniques.`,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return { error: null };
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Build the `submit_techniques` tool. Exported separately from the server
|
|
45
|
+
* so tests can exercise the handler directly without going through MCP
|
|
46
|
+
* transport.
|
|
47
|
+
*/
|
|
48
|
+
export function createSubmitTechniquesTool(state) {
|
|
49
|
+
return tool('submit_techniques', [
|
|
50
|
+
'Submit the final techniques catalogue. Call this EXACTLY once, when',
|
|
51
|
+
'you have finished cataloguing every technique. Pass the summary and',
|
|
52
|
+
'the full markdown content as arguments. After this call succeeds, end',
|
|
53
|
+
'your turn — do NOT also paste the same content as a fenced code block.',
|
|
54
|
+
'If validation fails, the error message tells you what to fix; call the',
|
|
55
|
+
'tool again with corrected data.',
|
|
56
|
+
].join(' '), {
|
|
57
|
+
summary: z
|
|
58
|
+
.string()
|
|
59
|
+
.min(1)
|
|
60
|
+
.max(TECHNIQUES_SUMMARY_MAX)
|
|
61
|
+
.describe('1-2 sentence summary suitable for a tab header. Plain text, no markdown.'),
|
|
62
|
+
content: z
|
|
63
|
+
.string()
|
|
64
|
+
.min(1)
|
|
65
|
+
.max(TECHNIQUES_CONTENT_MAX)
|
|
66
|
+
.describe('Full markdown body with H2 sections: Languages & Runtime, Frameworks & Libraries, Architecture Patterns, State & Data Techniques, Build & Deploy Techniques, Notable Techniques.'),
|
|
67
|
+
}, async (args) => {
|
|
68
|
+
const extraction = {
|
|
69
|
+
summary: args.summary,
|
|
70
|
+
content: args.content,
|
|
71
|
+
};
|
|
72
|
+
const { error } = validateContent(extraction.content);
|
|
73
|
+
if (error) {
|
|
74
|
+
return {
|
|
75
|
+
content: [{ type: 'text', text: error }],
|
|
76
|
+
isError: true,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
state.captured = extraction;
|
|
80
|
+
return {
|
|
81
|
+
content: [
|
|
82
|
+
{
|
|
83
|
+
type: 'text',
|
|
84
|
+
text: `Captured techniques catalogue (${extraction.content.length} chars). End your turn now.`,
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
};
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
export function createTechniquesMcpServer(state) {
|
|
91
|
+
return createSdkMcpServer({
|
|
92
|
+
name: 'product-techniques',
|
|
93
|
+
version: '1.0.0',
|
|
94
|
+
tools: [createSubmitTechniquesTool(state)],
|
|
95
|
+
});
|
|
96
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the product-level techniques phase.
|
|
3
|
+
*
|
|
4
|
+
* Agent's job: explore the cloned product repo and write a markdown catalogue
|
|
5
|
+
* of the techniques it actually uses — languages, frameworks, patterns, state
|
|
6
|
+
* idioms, build/deploy choices, plus the clever non-obvious bits. Focused on
|
|
7
|
+
* what a new engineer needs to recognize and recreate, not feature lists.
|
|
8
|
+
*
|
|
9
|
+
* The final result is submitted via the `submit_techniques` MCP tool, NOT a
|
|
10
|
+
* fenced JSON block. Tool calls let the SDK enforce the schema with Zod and
|
|
11
|
+
* let the agent self-correct on validation errors. See mcp-server.ts.
|
|
12
|
+
*/
|
|
13
|
+
export interface ProductTechniquesPromptContext {
|
|
14
|
+
productName: string;
|
|
15
|
+
productDescription?: string;
|
|
16
|
+
guidance?: string;
|
|
17
|
+
}
|
|
18
|
+
export declare function createProductTechniquesSystemPrompt(): string;
|
|
19
|
+
export declare function createProductTechniquesUserPrompt(context: ProductTechniquesPromptContext): string;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the product-level techniques phase.
|
|
3
|
+
*
|
|
4
|
+
* Agent's job: explore the cloned product repo and write a markdown catalogue
|
|
5
|
+
* of the techniques it actually uses — languages, frameworks, patterns, state
|
|
6
|
+
* idioms, build/deploy choices, plus the clever non-obvious bits. Focused on
|
|
7
|
+
* what a new engineer needs to recognize and recreate, not feature lists.
|
|
8
|
+
*
|
|
9
|
+
* The final result is submitted via the `submit_techniques` MCP tool, NOT a
|
|
10
|
+
* fenced JSON block. Tool calls let the SDK enforce the schema with Zod and
|
|
11
|
+
* let the agent self-correct on validation errors. See mcp-server.ts.
|
|
12
|
+
*/
|
|
13
|
+
export function createProductTechniquesSystemPrompt() {
|
|
14
|
+
return `You are a senior staff engineer cataloguing the techniques a product's codebase uses.
|
|
15
|
+
|
|
16
|
+
The current working directory is a fresh clone of the product's repository. Use Glob/Grep/Read (and Bash for git/log when helpful) to explore it before writing.
|
|
17
|
+
|
|
18
|
+
Your audience: a new engineer joining the team. They need to recognize WHICH techniques this repo employs — what frameworks, what patterns, what idioms — so they can read code without surprise and contribute in the codebase's existing style. This is not a feature list and not an architecture proposal. Anchor every technique in the actual code; reference real file paths.
|
|
19
|
+
|
|
20
|
+
## Output protocol
|
|
21
|
+
|
|
22
|
+
When you are ready to submit, call the \`submit_techniques\` tool EXACTLY ONCE with:
|
|
23
|
+
- \`summary\` — 1-2 sentence summary suitable for a tab header (plain text, no markdown).
|
|
24
|
+
- \`content\` — the full markdown body, structured as described below.
|
|
25
|
+
|
|
26
|
+
Do NOT also paste the same content as a fenced code block — the tool call is the only channel for the result. If validation fails, the tool response tells you what to fix; call the tool again with corrected data.
|
|
27
|
+
|
|
28
|
+
## Required sections in \`content\`
|
|
29
|
+
|
|
30
|
+
The markdown body MUST contain these H2 sections, in this order:
|
|
31
|
+
|
|
32
|
+
1. **## Languages & Runtime** — languages used (with versions if pinned), runtime targets (Node / Bun / Deno / browser / native / etc), package manager.
|
|
33
|
+
2. **## Frameworks & Libraries** — the major frameworks and libraries this repo depends on, and what each one is doing here. Don't just list package names; explain the role. Group by area (UI, server, data, infra) if it helps.
|
|
34
|
+
3. **## Architecture Patterns** — the architectural choices the code embodies: layering / module boundaries / dependency direction / how features are organised. Include one Mermaid \`flowchart TD\` showing the dominant pattern (≤ 10 nodes). Name the pattern when you can ("repository pattern", "feature-sliced design", "hexagonal", "BFF", etc.).
|
|
35
|
+
4. **## State & Data Techniques** — state management, data fetching, caching, optimistic updates, validation, type-safety across boundaries. How is server state vs client state handled? How are forms managed? How is data flowing between server and client?
|
|
36
|
+
5. **## Build & Deploy Techniques** — bundler / compiler choices, code-splitting, environment handling, monorepo tooling, CI/CD specifics, deployment target (edge / serverless / containers / static).
|
|
37
|
+
6. **## Notable Techniques** — the genuinely interesting bits that aren't obvious from the stack: clever abstractions, performance optimizations, security hardening, custom hooks/utilities worth knowing, intentional deviations from defaults, surprising workarounds. Each one: what it is, where it lives (file path), why it matters. This section is the whole point — be specific and useful here.
|
|
38
|
+
|
|
39
|
+
The validator will reject content missing the "Languages & Runtime" or "Notable Techniques" headings.
|
|
40
|
+
|
|
41
|
+
## Rules
|
|
42
|
+
|
|
43
|
+
- Use relative file paths from the repo root (e.g. \`src/services/auth.ts\`). Link to specific files where the technique is most visible.
|
|
44
|
+
- Mermaid blocks must be valid (no unclosed quotes, no unsupported node shapes). Prefer simple \`flowchart TD\` syntax.
|
|
45
|
+
- Don't invent or guess. If a section has nothing distinctive ("just a stock CRA app"), say so briefly — don't pad.
|
|
46
|
+
- Focus on TECHNIQUES, not features. Bad: "the app has a login screen". Good: "uses next-auth's credentials provider with a custom JWT callback at src/lib/auth.ts:42".
|
|
47
|
+
- The whole document should fit in one screen-readable scroll — aim for ~1500-3000 words of content. Less is fine for small repos.`;
|
|
48
|
+
}
|
|
49
|
+
export function createProductTechniquesUserPrompt(context) {
|
|
50
|
+
const lines = [];
|
|
51
|
+
lines.push(`# Product: ${context.productName}`);
|
|
52
|
+
if (context.productDescription) {
|
|
53
|
+
lines.push('');
|
|
54
|
+
lines.push('## Description');
|
|
55
|
+
lines.push(context.productDescription);
|
|
56
|
+
}
|
|
57
|
+
if (context.guidance && context.guidance.trim()) {
|
|
58
|
+
lines.push('');
|
|
59
|
+
lines.push('## Reviewer guidance (focus or exclusions)');
|
|
60
|
+
lines.push(context.guidance.trim());
|
|
61
|
+
}
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push('## Task');
|
|
64
|
+
lines.push('Explore the cloned repository and produce the techniques catalogue, then submit it via the `submit_techniques` MCP tool as specified in your system prompt. Pay particular attention to the "Notable Techniques" section — that is the real value to the reader.');
|
|
65
|
+
return lines.join('\n');
|
|
66
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shape returned by the agent via the `submit_techniques` MCP tool. Mirrors
|
|
3
|
+
* the Zod schema in `mcp-server.ts` — kept duplicated as a plain TS type so
|
|
4
|
+
* consumers (the phase orchestrator, persistence helpers, tests) don't have
|
|
5
|
+
* to inflate Zod into their dependency graph.
|
|
6
|
+
*/
|
|
7
|
+
export interface TechniquesExtraction {
|
|
8
|
+
summary: string;
|
|
9
|
+
content: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function isTechniquesExtraction(v: unknown): v is TechniquesExtraction;
|
|
12
|
+
export declare const TECHNIQUES_SUMMARY_MAX = 500;
|
|
13
|
+
export declare const TECHNIQUES_CONTENT_MAX = 200000;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export function isTechniquesExtraction(v) {
|
|
2
|
+
if (!v || typeof v !== 'object') {
|
|
3
|
+
return false;
|
|
4
|
+
}
|
|
5
|
+
const obj = v;
|
|
6
|
+
return typeof obj.summary === 'string' && typeof obj.content === 'string';
|
|
7
|
+
}
|
|
8
|
+
// Defensive caps mirroring the DB CHECK constraints from
|
|
9
|
+
// 20260521000000_create_product_techniques.sql. Keeping the limits here
|
|
10
|
+
// too lets the MCP tool reject oversized output with an actionable error
|
|
11
|
+
// message instead of getting a Postgres constraint violation.
|
|
12
|
+
export const TECHNIQUES_SUMMARY_MAX = 500;
|
|
13
|
+
export const TECHNIQUES_CONTENT_MAX = 200_000;
|
|
@@ -15,6 +15,7 @@ import { logError, logInfo, logSuccess, logWarning } from '../../utils/logger.js
|
|
|
15
15
|
import { cleanupIssueRepo, cloneIssueRepo, ensureWorkspaceDir, } from '../../workspace/workspace-manager.js';
|
|
16
16
|
import { fetchProductBasics } from '../find-shared/mcp.js';
|
|
17
17
|
import { createPromptGenerator, extractTextFromContent, tryExtractResult, } from '../pr-shared/agent-utils.js';
|
|
18
|
+
import { createScreenFlowCaptureState, createScreenFlowMcpServer, validateConsistency, } from './mcp-server.js';
|
|
18
19
|
import { createScreenFlowSystemPrompt, createScreenFlowUserPrompt, } from './prompts.js';
|
|
19
20
|
import { extractTheme } from './theme.js';
|
|
20
21
|
import { isScreenFlowExtraction, } from './types.js';
|
|
@@ -61,6 +62,17 @@ export async function runScreenFlowPhase(options) {
|
|
|
61
62
|
guidance,
|
|
62
63
|
});
|
|
63
64
|
logInfo('Running Claude screen-flow extraction...');
|
|
65
|
+
// The agent submits the extraction by calling submit_screen_flow on the
|
|
66
|
+
// in-process MCP server. The handler validates with Zod + cross-field
|
|
67
|
+
// checks and stores the result in `captureState.captured`. If the agent
|
|
68
|
+
// never calls the tool, we fall back to parsing a fenced screen_flow
|
|
69
|
+
// block out of the assistant text.
|
|
70
|
+
const captureState = createScreenFlowCaptureState();
|
|
71
|
+
const mcpServer = createScreenFlowMcpServer(captureState, {
|
|
72
|
+
onProgress: ({ phase, message }) => {
|
|
73
|
+
logInfo(`[${phase}] ${message}`);
|
|
74
|
+
},
|
|
75
|
+
});
|
|
64
76
|
let lastAssistantResponse = '';
|
|
65
77
|
let extraction = null;
|
|
66
78
|
for await (const message of query({
|
|
@@ -75,28 +87,19 @@ export async function runScreenFlowPhase(options) {
|
|
|
75
87
|
maxTurns: MAX_TURNS,
|
|
76
88
|
permissionMode: 'bypassPermissions',
|
|
77
89
|
cwd: repoPath,
|
|
90
|
+
mcpServers: {
|
|
91
|
+
'screen-flow': mcpServer,
|
|
92
|
+
},
|
|
78
93
|
},
|
|
79
94
|
})) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (message.type !== 'result') {
|
|
85
|
-
continue;
|
|
86
|
-
}
|
|
87
|
-
const responseText = message.subtype === 'success'
|
|
88
|
-
? message.result || lastAssistantResponse
|
|
89
|
-
: lastAssistantResponse;
|
|
90
|
-
const parsed = tryExtractResult(responseText, 'screen_flow');
|
|
91
|
-
if (isScreenFlowExtraction(parsed)) {
|
|
92
|
-
extraction = parsed;
|
|
93
|
-
}
|
|
94
|
-
else if (message.subtype !== 'success') {
|
|
95
|
-
logError(`Extraction incomplete: ${message.subtype}`);
|
|
95
|
+
const { assistantBuffer, extraction: nextExtraction } = processSdkMessage(message, lastAssistantResponse, captureState, verbose);
|
|
96
|
+
lastAssistantResponse = assistantBuffer;
|
|
97
|
+
if (nextExtraction) {
|
|
98
|
+
extraction = nextExtraction;
|
|
96
99
|
}
|
|
97
100
|
}
|
|
98
101
|
if (!extraction) {
|
|
99
|
-
const msg = 'Screen flow extraction failed:
|
|
102
|
+
const msg = 'Screen flow extraction failed: agent did not call submit_screen_flow and no parseable screen_flow block was found in the response';
|
|
100
103
|
await markFlowFailed(supabase, flowId, msg);
|
|
101
104
|
return { status: 'error', message: msg };
|
|
102
105
|
}
|
|
@@ -125,6 +128,59 @@ export async function runScreenFlowPhase(options) {
|
|
|
125
128
|
}
|
|
126
129
|
}
|
|
127
130
|
}
|
|
131
|
+
// Per-message handler — extracted out of the SDK loop to keep
|
|
132
|
+
// runScreenFlowPhase under the eslint complexity ceiling.
|
|
133
|
+
//
|
|
134
|
+
function processSdkMessage(
|
|
135
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
136
|
+
message, assistantBuffer, captureState, verbose) {
|
|
137
|
+
if (message.type === 'assistant') {
|
|
138
|
+
const next = assistantBuffer +
|
|
139
|
+
extractTextFromContent(message.message?.content ?? [], verbose);
|
|
140
|
+
return { assistantBuffer: next, extraction: null };
|
|
141
|
+
}
|
|
142
|
+
if (message.type === 'user' && verbose) {
|
|
143
|
+
// Surface tool_result blocks (incl. submit_screen_flow validation
|
|
144
|
+
// errors) so verbose mode shows the round-trip.
|
|
145
|
+
const userContent = message.message?.content;
|
|
146
|
+
if (Array.isArray(userContent)) {
|
|
147
|
+
extractTextFromContent(userContent, verbose);
|
|
148
|
+
}
|
|
149
|
+
return { assistantBuffer, extraction: null };
|
|
150
|
+
}
|
|
151
|
+
if (message.type !== 'result') {
|
|
152
|
+
return { assistantBuffer, extraction: null };
|
|
153
|
+
}
|
|
154
|
+
if (captureState.captured) {
|
|
155
|
+
return { assistantBuffer, extraction: captureState.captured };
|
|
156
|
+
}
|
|
157
|
+
const fallback = tryFallbackParse(message, assistantBuffer);
|
|
158
|
+
if (fallback) {
|
|
159
|
+
logWarning('Agent emitted a fenced screen_flow block instead of calling submit_screen_flow; using the parsed text as a fallback.');
|
|
160
|
+
return { assistantBuffer, extraction: fallback };
|
|
161
|
+
}
|
|
162
|
+
if (message.subtype !== 'success') {
|
|
163
|
+
logError(`Extraction incomplete: ${message.subtype}`);
|
|
164
|
+
}
|
|
165
|
+
return { assistantBuffer, extraction: null };
|
|
166
|
+
}
|
|
167
|
+
// Fallback parser: extract a screen_flow JSON block from the final assistant
|
|
168
|
+
// text if the agent skipped the submit_screen_flow tool call.
|
|
169
|
+
function tryFallbackParse(resultMessage, assistantText) {
|
|
170
|
+
const responseText = resultMessage.subtype === 'success'
|
|
171
|
+
? resultMessage.result || assistantText
|
|
172
|
+
: assistantText;
|
|
173
|
+
const parsed = tryExtractResult(responseText, 'screen_flow');
|
|
174
|
+
if (!isScreenFlowExtraction(parsed)) {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
const { error } = validateConsistency(parsed);
|
|
178
|
+
if (error) {
|
|
179
|
+
logWarning(`Fallback extraction failed consistency check: ${error}`);
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
return parsed;
|
|
183
|
+
}
|
|
128
184
|
// ============================================================================
|
|
129
185
|
// Persistence
|
|
130
186
|
// ============================================================================
|