pullfrog 0.0.201 → 0.0.203
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/postRun.d.ts +66 -0
- package/dist/agents/reviewer.d.ts +32 -0
- package/dist/agents/sessionLabeler.d.ts +77 -0
- package/dist/agents/shared.d.ts +86 -3
- package/dist/cli.mjs +2803 -1025
- package/dist/external.d.ts +1 -1
- package/dist/index.js +2780 -1009
- package/dist/internal/index.d.ts +1 -1
- package/dist/internal.js +280 -68
- package/dist/lifecycle.d.ts +1 -1
- package/dist/mcp/checkout.d.ts +16 -2
- package/dist/mcp/comment.d.ts +1 -0
- package/dist/mcp/geminiSanitizer.d.ts +17 -0
- package/dist/mcp/git.d.ts +8 -2
- package/dist/mcp/review.d.ts +139 -0
- package/dist/mcp/server.d.ts +12 -0
- package/dist/mcp/shared.d.ts +1 -1
- package/dist/models.d.ts +17 -0
- package/dist/modes.d.ts +1 -1
- package/dist/skills/git-archaeology/SKILL.md +188 -0
- package/dist/utils/activity.d.ts +4 -0
- package/dist/utils/agent.d.ts +3 -1
- package/dist/utils/diffCoverage.d.ts +62 -0
- package/dist/utils/lifecycle.d.ts +14 -2
- package/dist/utils/log.d.ts +13 -2
- package/dist/utils/patchWorkflowRunFields.d.ts +27 -4
- package/dist/utils/runContext.d.ts +2 -0
- package/dist/utils/secrets.d.ts +9 -2
- package/dist/utils/setup.d.ts +13 -0
- package/dist/utils/skills.d.ts +10 -0
- package/dist/utils/subprocess.d.ts +7 -0
- package/dist/utils/time.d.ts +1 -0
- package/dist/utils/todoTracking.d.ts +3 -1
- package/package.json +3 -2
package/dist/internal/index.d.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Re-exports shared types, values, and utilities needed by the Next.js app.
|
|
4
4
|
*/
|
|
5
5
|
export type { AuthorPermission, ModelAlias, ModelProvider, Payload, PayloadEvent, ProviderConfig, PushPermission, ShellPermission, ToolPermission, WriteablePayload, } from "../external.ts";
|
|
6
|
-
export { getModelEnvVars, getModelProvider, getProviderDisplayName, modelAliases, parseModel, providers, pullfrogMcpName, resolveCliModel, resolveModelSlug, } from "../external.ts";
|
|
6
|
+
export { getModelEnvVars, getModelProvider, getProviderDisplayName, modelAliases, parseModel, providers, pullfrogMcpName, resolveCliModel, resolveDisplayAlias, resolveModelSlug, resolveOpenRouterModel, } from "../external.ts";
|
|
7
7
|
export type { Mode } from "../modes.ts";
|
|
8
8
|
export { modes } from "../modes.ts";
|
|
9
9
|
export type { BuildPullfrogFooterParams, WorkflowRunFooterInfo, } from "../utils/buildPullfrogFooter.ts";
|
package/dist/internal.js
CHANGED
|
@@ -11,8 +11,8 @@ var providers = {
|
|
|
11
11
|
models: {
|
|
12
12
|
"claude-opus": {
|
|
13
13
|
displayName: "Claude Opus",
|
|
14
|
-
resolve: "anthropic/claude-opus-4-
|
|
15
|
-
openRouterResolve: "openrouter/anthropic/claude-opus-4.
|
|
14
|
+
resolve: "anthropic/claude-opus-4-7",
|
|
15
|
+
openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
|
|
16
16
|
preferred: true
|
|
17
17
|
},
|
|
18
18
|
"claude-sonnet": {
|
|
@@ -31,16 +31,38 @@ var providers = {
|
|
|
31
31
|
displayName: "OpenAI",
|
|
32
32
|
envVars: ["OPENAI_API_KEY"],
|
|
33
33
|
models: {
|
|
34
|
+
gpt: {
|
|
35
|
+
displayName: "GPT",
|
|
36
|
+
resolve: "openai/gpt-5.5",
|
|
37
|
+
openRouterResolve: "openrouter/openai/gpt-5.5",
|
|
38
|
+
preferred: true
|
|
39
|
+
},
|
|
40
|
+
"gpt-pro": {
|
|
41
|
+
displayName: "GPT Pro",
|
|
42
|
+
resolve: "openai/gpt-5.5-pro",
|
|
43
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
44
|
+
},
|
|
45
|
+
"gpt-mini": {
|
|
46
|
+
displayName: "GPT Mini",
|
|
47
|
+
resolve: "openai/gpt-5.4-mini",
|
|
48
|
+
openRouterResolve: "openrouter/openai/gpt-5.4-mini"
|
|
49
|
+
},
|
|
50
|
+
// legacy aliases — openai unified the codex line into the main GPT family
|
|
51
|
+
// and is shutting down every "-codex" snapshot on 2026-07-23. transparently
|
|
52
|
+
// upgrade existing users via the fallback chain. UI display sites resolve
|
|
53
|
+
// to the terminal alias's label (so dropdown trigger + PR footers show
|
|
54
|
+
// "GPT" / "GPT Mini", not the historical name).
|
|
34
55
|
"gpt-codex": {
|
|
35
56
|
displayName: "GPT Codex",
|
|
36
57
|
resolve: "openai/gpt-5.3-codex",
|
|
37
58
|
openRouterResolve: "openrouter/openai/gpt-5.3-codex",
|
|
38
|
-
|
|
59
|
+
fallback: "openai/gpt"
|
|
39
60
|
},
|
|
40
61
|
"gpt-codex-mini": {
|
|
41
62
|
displayName: "GPT Codex Mini",
|
|
42
|
-
resolve: "openai/codex-mini
|
|
43
|
-
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini"
|
|
63
|
+
resolve: "openai/gpt-5.1-codex-mini",
|
|
64
|
+
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini",
|
|
65
|
+
fallback: "openai/gpt-mini"
|
|
44
66
|
},
|
|
45
67
|
o3: {
|
|
46
68
|
displayName: "O3",
|
|
@@ -91,16 +113,30 @@ var providers = {
|
|
|
91
113
|
displayName: "DeepSeek",
|
|
92
114
|
envVars: ["DEEPSEEK_API_KEY"],
|
|
93
115
|
models: {
|
|
116
|
+
"deepseek-pro": {
|
|
117
|
+
displayName: "DeepSeek Pro",
|
|
118
|
+
resolve: "deepseek/deepseek-v4-pro",
|
|
119
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v4-pro",
|
|
120
|
+
preferred: true
|
|
121
|
+
},
|
|
122
|
+
"deepseek-flash": {
|
|
123
|
+
displayName: "DeepSeek Flash",
|
|
124
|
+
resolve: "deepseek/deepseek-v4-flash",
|
|
125
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v4-flash"
|
|
126
|
+
},
|
|
127
|
+
// legacy aliases — deepseek retires these on 2026-07-24; transparently
|
|
128
|
+
// upgrade existing users to the v4 family via the fallback chain.
|
|
94
129
|
"deepseek-reasoner": {
|
|
95
130
|
displayName: "DeepSeek Reasoner",
|
|
96
131
|
resolve: "deepseek/deepseek-reasoner",
|
|
97
132
|
openRouterResolve: "openrouter/deepseek/deepseek-v3.2",
|
|
98
|
-
|
|
133
|
+
fallback: "deepseek/deepseek-pro"
|
|
99
134
|
},
|
|
100
135
|
"deepseek-chat": {
|
|
101
136
|
displayName: "DeepSeek Chat",
|
|
102
137
|
resolve: "deepseek/deepseek-chat",
|
|
103
|
-
openRouterResolve: "openrouter/deepseek/deepseek-v3.2"
|
|
138
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v3.2",
|
|
139
|
+
fallback: "deepseek/deepseek-flash"
|
|
104
140
|
}
|
|
105
141
|
}
|
|
106
142
|
}),
|
|
@@ -110,8 +146,8 @@ var providers = {
|
|
|
110
146
|
models: {
|
|
111
147
|
"kimi-k2": {
|
|
112
148
|
displayName: "Kimi K2",
|
|
113
|
-
resolve: "moonshotai/kimi-k2.
|
|
114
|
-
openRouterResolve: "openrouter/moonshotai/kimi-k2.
|
|
149
|
+
resolve: "moonshotai/kimi-k2.6",
|
|
150
|
+
openRouterResolve: "openrouter/moonshotai/kimi-k2.6",
|
|
115
151
|
preferred: true
|
|
116
152
|
}
|
|
117
153
|
}
|
|
@@ -129,8 +165,8 @@ var providers = {
|
|
|
129
165
|
},
|
|
130
166
|
"claude-opus": {
|
|
131
167
|
displayName: "Claude Opus",
|
|
132
|
-
resolve: "opencode/claude-opus-4-
|
|
133
|
-
openRouterResolve: "openrouter/anthropic/claude-opus-4.
|
|
168
|
+
resolve: "opencode/claude-opus-4-7",
|
|
169
|
+
openRouterResolve: "openrouter/anthropic/claude-opus-4.7"
|
|
134
170
|
},
|
|
135
171
|
"claude-sonnet": {
|
|
136
172
|
displayName: "Claude Sonnet",
|
|
@@ -142,15 +178,33 @@ var providers = {
|
|
|
142
178
|
resolve: "opencode/claude-haiku-4-5",
|
|
143
179
|
openRouterResolve: "openrouter/anthropic/claude-haiku-4.5"
|
|
144
180
|
},
|
|
181
|
+
gpt: {
|
|
182
|
+
displayName: "GPT",
|
|
183
|
+
resolve: "opencode/gpt-5.5",
|
|
184
|
+
openRouterResolve: "openrouter/openai/gpt-5.5"
|
|
185
|
+
},
|
|
186
|
+
"gpt-pro": {
|
|
187
|
+
displayName: "GPT Pro",
|
|
188
|
+
resolve: "opencode/gpt-5.5-pro",
|
|
189
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
190
|
+
},
|
|
191
|
+
"gpt-mini": {
|
|
192
|
+
displayName: "GPT Mini",
|
|
193
|
+
resolve: "opencode/gpt-5.4-mini",
|
|
194
|
+
openRouterResolve: "openrouter/openai/gpt-5.4-mini"
|
|
195
|
+
},
|
|
196
|
+
// legacy aliases — see openai provider above for context.
|
|
145
197
|
"gpt-codex": {
|
|
146
198
|
displayName: "GPT Codex",
|
|
147
199
|
resolve: "opencode/gpt-5.3-codex",
|
|
148
|
-
openRouterResolve: "openrouter/openai/gpt-5.3-codex"
|
|
200
|
+
openRouterResolve: "openrouter/openai/gpt-5.3-codex",
|
|
201
|
+
fallback: "opencode/gpt"
|
|
149
202
|
},
|
|
150
203
|
"gpt-codex-mini": {
|
|
151
204
|
displayName: "GPT Codex Mini",
|
|
152
205
|
resolve: "opencode/gpt-5.1-codex-mini",
|
|
153
|
-
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini"
|
|
206
|
+
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini",
|
|
207
|
+
fallback: "opencode/gpt-mini"
|
|
154
208
|
},
|
|
155
209
|
"gemini-pro": {
|
|
156
210
|
displayName: "Gemini Pro",
|
|
@@ -164,8 +218,8 @@ var providers = {
|
|
|
164
218
|
},
|
|
165
219
|
"kimi-k2": {
|
|
166
220
|
displayName: "Kimi K2",
|
|
167
|
-
resolve: "opencode/kimi-k2.
|
|
168
|
-
openRouterResolve: "openrouter/moonshotai/kimi-k2.
|
|
221
|
+
resolve: "opencode/kimi-k2.6",
|
|
222
|
+
openRouterResolve: "openrouter/moonshotai/kimi-k2.6"
|
|
169
223
|
},
|
|
170
224
|
"gpt-5-nano": {
|
|
171
225
|
displayName: "GPT Nano",
|
|
@@ -185,12 +239,6 @@ var providers = {
|
|
|
185
239
|
resolve: "opencode/minimax-m2.5-free",
|
|
186
240
|
envVars: [],
|
|
187
241
|
isFree: true
|
|
188
|
-
},
|
|
189
|
-
"nemotron-3-super-free": {
|
|
190
|
-
displayName: "Nemotron 3 Super",
|
|
191
|
-
resolve: "opencode/nemotron-3-super-free",
|
|
192
|
-
envVars: [],
|
|
193
|
-
isFree: true
|
|
194
242
|
}
|
|
195
243
|
}
|
|
196
244
|
}),
|
|
@@ -200,8 +248,8 @@ var providers = {
|
|
|
200
248
|
models: {
|
|
201
249
|
"claude-opus": {
|
|
202
250
|
displayName: "Claude Opus",
|
|
203
|
-
resolve: "openrouter/anthropic/claude-opus-4.
|
|
204
|
-
openRouterResolve: "openrouter/anthropic/claude-opus-4.
|
|
251
|
+
resolve: "openrouter/anthropic/claude-opus-4.7",
|
|
252
|
+
openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
|
|
205
253
|
preferred: true
|
|
206
254
|
},
|
|
207
255
|
"claude-sonnet": {
|
|
@@ -214,15 +262,33 @@ var providers = {
|
|
|
214
262
|
resolve: "openrouter/anthropic/claude-haiku-4.5",
|
|
215
263
|
openRouterResolve: "openrouter/anthropic/claude-haiku-4.5"
|
|
216
264
|
},
|
|
265
|
+
gpt: {
|
|
266
|
+
displayName: "GPT",
|
|
267
|
+
resolve: "openrouter/openai/gpt-5.5",
|
|
268
|
+
openRouterResolve: "openrouter/openai/gpt-5.5"
|
|
269
|
+
},
|
|
270
|
+
"gpt-pro": {
|
|
271
|
+
displayName: "GPT Pro",
|
|
272
|
+
resolve: "openrouter/openai/gpt-5.5-pro",
|
|
273
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
274
|
+
},
|
|
275
|
+
"gpt-mini": {
|
|
276
|
+
displayName: "GPT Mini",
|
|
277
|
+
resolve: "openrouter/openai/gpt-5.4-mini",
|
|
278
|
+
openRouterResolve: "openrouter/openai/gpt-5.4-mini"
|
|
279
|
+
},
|
|
280
|
+
// legacy aliases — see openai provider for context.
|
|
217
281
|
"gpt-codex": {
|
|
218
282
|
displayName: "GPT Codex",
|
|
219
283
|
resolve: "openrouter/openai/gpt-5.3-codex",
|
|
220
|
-
openRouterResolve: "openrouter/openai/gpt-5.3-codex"
|
|
284
|
+
openRouterResolve: "openrouter/openai/gpt-5.3-codex",
|
|
285
|
+
fallback: "openrouter/gpt"
|
|
221
286
|
},
|
|
222
287
|
"gpt-codex-mini": {
|
|
223
288
|
displayName: "GPT Codex Mini",
|
|
224
289
|
resolve: "openrouter/openai/gpt-5.1-codex-mini",
|
|
225
|
-
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini"
|
|
290
|
+
openRouterResolve: "openrouter/openai/gpt-5.1-codex-mini",
|
|
291
|
+
fallback: "openrouter/gpt-mini"
|
|
226
292
|
},
|
|
227
293
|
"o4-mini": {
|
|
228
294
|
displayName: "O4 Mini",
|
|
@@ -244,15 +310,28 @@ var providers = {
|
|
|
244
310
|
resolve: "openrouter/x-ai/grok-4",
|
|
245
311
|
openRouterResolve: "openrouter/x-ai/grok-4"
|
|
246
312
|
},
|
|
313
|
+
"deepseek-pro": {
|
|
314
|
+
displayName: "DeepSeek Pro",
|
|
315
|
+
resolve: "openrouter/deepseek/deepseek-v4-pro",
|
|
316
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v4-pro"
|
|
317
|
+
},
|
|
318
|
+
"deepseek-flash": {
|
|
319
|
+
displayName: "DeepSeek Flash",
|
|
320
|
+
resolve: "openrouter/deepseek/deepseek-v4-flash",
|
|
321
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v4-flash"
|
|
322
|
+
},
|
|
323
|
+
// legacy alias — deepseek retires this on 2026-07-24; transparently
|
|
324
|
+
// upgrade existing users to the v4 family via the fallback chain.
|
|
247
325
|
"deepseek-chat": {
|
|
248
326
|
displayName: "DeepSeek Chat",
|
|
249
327
|
resolve: "openrouter/deepseek/deepseek-v3.2",
|
|
250
|
-
openRouterResolve: "openrouter/deepseek/deepseek-v3.2"
|
|
328
|
+
openRouterResolve: "openrouter/deepseek/deepseek-v3.2",
|
|
329
|
+
fallback: "openrouter/deepseek-flash"
|
|
251
330
|
},
|
|
252
331
|
"kimi-k2": {
|
|
253
332
|
displayName: "Kimi K2",
|
|
254
|
-
resolve: "openrouter/moonshotai/kimi-k2.
|
|
255
|
-
openRouterResolve: "openrouter/moonshotai/kimi-k2.
|
|
333
|
+
resolve: "openrouter/moonshotai/kimi-k2.6",
|
|
334
|
+
openRouterResolve: "openrouter/moonshotai/kimi-k2.6"
|
|
256
335
|
}
|
|
257
336
|
}
|
|
258
337
|
})
|
|
@@ -299,7 +378,7 @@ function resolveModelSlug(slug) {
|
|
|
299
378
|
return modelAliases.find((a) => a.slug === slug)?.resolve;
|
|
300
379
|
}
|
|
301
380
|
var MAX_FALLBACK_DEPTH = 10;
|
|
302
|
-
function
|
|
381
|
+
function resolveDisplayAlias(slug) {
|
|
303
382
|
let current = slug;
|
|
304
383
|
const visited = /* @__PURE__ */ new Set();
|
|
305
384
|
for (let i = 0; i < MAX_FALLBACK_DEPTH; i++) {
|
|
@@ -307,11 +386,17 @@ function resolveCliModel(slug) {
|
|
|
307
386
|
visited.add(current);
|
|
308
387
|
const alias = modelAliases.find((a) => a.slug === current);
|
|
309
388
|
if (!alias) return void 0;
|
|
310
|
-
if (!alias.fallback) return alias
|
|
389
|
+
if (!alias.fallback) return alias;
|
|
311
390
|
current = alias.fallback;
|
|
312
391
|
}
|
|
313
392
|
return void 0;
|
|
314
393
|
}
|
|
394
|
+
function resolveCliModel(slug) {
|
|
395
|
+
return resolveDisplayAlias(slug)?.resolve;
|
|
396
|
+
}
|
|
397
|
+
function resolveOpenRouterModel(slug) {
|
|
398
|
+
return resolveDisplayAlias(slug)?.openRouterResolve;
|
|
399
|
+
}
|
|
315
400
|
|
|
316
401
|
// external.ts
|
|
317
402
|
var pullfrogMcpName = "pullfrog";
|
|
@@ -326,6 +411,9 @@ function formatMcpToolRef(agentId, toolName) {
|
|
|
326
411
|
}
|
|
327
412
|
}
|
|
328
413
|
|
|
414
|
+
// agents/reviewer.ts
|
|
415
|
+
var REVIEWER_AGENT_NAME = "reviewfrog";
|
|
416
|
+
|
|
329
417
|
// modes.ts
|
|
330
418
|
var PR_SUMMARY_FORMAT = `### Default format
|
|
331
419
|
|
|
@@ -368,7 +456,7 @@ GitHub's markdown parser requires a blank line between ALL block-level elements.
|
|
|
368
456
|
Rules:
|
|
369
457
|
- \`##\` titles and key-change bullet lead-ins are plain-language summaries; backtick only actual code tokens (files, types, functions) where they appear in the title
|
|
370
458
|
- ALL variable names, identifiers, and file names in body text must be in backticks
|
|
371
|
-
- ALL file references MUST link to the PR Files Changed view.
|
|
459
|
+
- ALL file references MUST link to the PR Files Changed view. Use the \`diff-<hex>\` anchor precomputed next to each filename in the \`checkout_pr\` TOC \u2014 do NOT run \`sha256sum\` or any other shell command to compute anchors. NEVER fabricate hex strings. If a file is not in the TOC, omit the \`#diff-\` anchor rather than guessing.
|
|
372
460
|
- Add <br/> before each ## heading for visual spacing. Do NOT use horizontal rules (---)
|
|
373
461
|
- Do NOT include raw diff stats like '+123 / -45' or line counts
|
|
374
462
|
- Do NOT include code blocks or repeat diff contents
|
|
@@ -397,9 +485,36 @@ function computeModes(agentId) {
|
|
|
397
485
|
- plan your approach before writing code: identify which files need to change, key design decisions, and edge cases. for non-trivial changes, consider whether there's a more elegant approach.
|
|
398
486
|
- run relevant tests/lints before committing
|
|
399
487
|
|
|
400
|
-
4. **self-review**:
|
|
401
|
-
|
|
402
|
-
- commit
|
|
488
|
+
4. **self-review**: judgment call \u2014 does YOUR diff warrant a fresh-eyes pass?
|
|
489
|
+
|
|
490
|
+
Skip self-review (commit directly) when the diff is **genuinely trivial**:
|
|
491
|
+
- doc typos, comment-only edits, whitespace/format-only, import reordering
|
|
492
|
+
- lockfile or generated-code regeneration, mechanical rename whose only effect is import-path updates (size of diff is irrelevant \u2014 read the *shape*, not the line count)
|
|
493
|
+
- low-risk dep patch bump from a trusted source
|
|
494
|
+
|
|
495
|
+
Run self-review when the diff has **any behavioral surface, however small**:
|
|
496
|
+
- 1-line changes to SQL operators / comparison logic / regexes / redirects / HTTP methods / response codes
|
|
497
|
+
- any change to money / tax / currency / billing / fee / refund / payout calculations or constants
|
|
498
|
+
- any change to auth / permissions / roles / sessions / tokens / signature verification
|
|
499
|
+
- any change to feature-flag defaults, retry counts, timeouts, rate limits, batch sizes
|
|
500
|
+
- new endpoints, new code paths, new error branches \u2014 even small ones
|
|
501
|
+
- mixed diffs (whitespace + a single semantic line) \u2014 the semantic line still triggers self-review
|
|
502
|
+
- anything you're uncertain about
|
|
503
|
+
|
|
504
|
+
Tie-breaker: when in doubt, run self-review. One false-positive subagent dispatch costs cents; one false-negative shipped bug costs much more. There's no value in dispatching for a typo, but there's also no excuse for skipping on a 1-line change to a billing path.
|
|
505
|
+
|
|
506
|
+
Otherwise delegate the \`${REVIEWER_AGENT_NAME}\` subagent to review your diff with fresh eyes against YOUR TASK. The subagent's baked-in system prompt enforces a non-mutative + non-recursive contract: read-only file/search/web tools and read-only MCP queries only; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch. Enforcement is prose-only \u2014 restate the constraint in your dispatch instructions and do not relax it.
|
|
507
|
+
|
|
508
|
+
Provide the subagent with YOUR TASK, the output of \`git diff\`, and a tight summary (not raw output) of any lint/typecheck/test failures you fixed during build \u2014 what broke, root cause, the fix \u2014 so it can check that fixes addressed root causes rather than suppressed symptoms; say "no build-phase failures" if the build path was clean. Instruct it to flag bugs, logic errors, missing edge cases, gaps between request and diff, and unintended changes.
|
|
509
|
+
|
|
510
|
+
Delegation + research discipline (distilled from \`/anneal\` canonical \u2014 these are codified learnings from many review rounds, not theoretical best practices):
|
|
511
|
+
- Do NOT summarize what you implemented \u2014 that biases the subagent toward validating the shape of your solution rather than questioning it.
|
|
512
|
+
- Do NOT curate a reading list of files. Let the subagent discover scope from the diff and codebase.
|
|
513
|
+
- Do NOT pre-shape output with a severity / category schema. That leaks your hypotheses; severity is your call during evaluation.
|
|
514
|
+
- Do NOT defect-hunt the diff yourself in parallel with the subagent. Your role is dispatch + evaluation; doing the review yourself reintroduces the implementation bias the subagent is meant to mitigate.
|
|
515
|
+
- For diffs that rely on third-party API contracts, SDK semantics, framework directives, or DB engine specifics, instruct the subagent to verify load-bearing claims via web search and quote source URLs rather than trust training data \u2014 this is the single most common review-quality failure mode.
|
|
516
|
+
|
|
517
|
+
Review the findings, address valid points, and discard nitpicks or false positives. The reviewer is fallible \u2014 it biases toward *recommending additions* (defensive checks for impossible cases, extra logging, new abstractions used once, comments restating code, tests asserting tautologies, "just-in-case" guards). For each finding, ask: would applying it leave the code more sound, correct, AND elegant? Two-out-of-three is not enough \u2014 a fix that improves correctness while degrading elegance still degrades the codebase. Reject bloat-shaped findings without applying them, and after applying the rest re-read your diff and be discerning about what *you just changed*: if any fix turned out to be bloat in context, revert it. The goal is code that is sound and correct *while remaining elegant*; the smallest diff that fixes the real defect almost always wins. Then verify only intended changes are present, no debug artifacts or commented-out code remain, no unrelated files were modified. Commit locally via shell (\`git add . && git commit -m "..."\`).
|
|
403
518
|
|
|
404
519
|
5. **finalize**:
|
|
405
520
|
- confirm a clean working tree, then push via \`${t("push_branch")}\` (see *SYSTEM* Git rules if this fails \u2014 prepush errors are usually the repo's tests/lint, not infra timeouts)
|
|
@@ -423,11 +538,12 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
423
538
|
|
|
424
539
|
3. For each comment:
|
|
425
540
|
- understand the feedback
|
|
426
|
-
-
|
|
427
|
-
-
|
|
541
|
+
- evaluate whether applying it would leave the code more **sound, correct, AND elegant**. reviewers are fallible and bias toward *recommending additions* (defensive checks for impossible cases, extra abstractions, comments restating obvious code, tests asserting tautologies, "just-in-case" guards). if a request would add bloat \u2014 ceremony without commensurate correctness benefit \u2014 push back in your reply rather than mechanically applying it. two-out-of-three is not enough; improving correctness while degrading elegance still degrades the code.
|
|
542
|
+
- if the request stands, make the code change using your native tools; otherwise reply explaining why
|
|
543
|
+
- record what was done (or why nothing was done)
|
|
428
544
|
|
|
429
545
|
4. Quality check:
|
|
430
|
-
- test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, and the changes are clean enough that a senior engineer would approve without hesitation
|
|
546
|
+
- test changes, then review the diff before committing \u2014 verify only intended changes are present, no debug artifacts remain, no fix turned out to be bloat in context (revert any that did), and the changes are clean enough that a senior engineer would approve without hesitation
|
|
431
547
|
- commit locally via shell (\`git add . && git commit -m "..."\`)
|
|
432
548
|
|
|
433
549
|
5. Finalize:
|
|
@@ -438,28 +554,94 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
438
554
|
|
|
439
555
|
${learningsStep(t, 6)}`
|
|
440
556
|
},
|
|
557
|
+
// Review and IncrementalReview use the multi-lens orchestrator pattern
|
|
558
|
+
// (canonical source: .claude/commands/anneal.md). The orchestrator does
|
|
559
|
+
// triage → parallel read-only subagent fan-out → aggregate → draft comments
|
|
560
|
+
// → submit. For someone else's PR, parallel lenses (correctness, security,
|
|
561
|
+
// research-validated claims, user-journey, etc.) provide breadth across
|
|
562
|
+
// angles that a single subagent can't carry coherently. Build mode keeps
|
|
563
|
+
// a single fresh-eyes subagent (different problem shape — orchestrator
|
|
564
|
+
// wrote the code and bias-mitigation comes from delegating to one
|
|
565
|
+
// subagent that doesn't share the implementation context).
|
|
566
|
+
// Deliberate omission vs canonical /anneal: severity categorization in the
|
|
567
|
+
// final message (the review body has its own CAUTION/IMPORTANT framing
|
|
568
|
+
// instead of a severity table).
|
|
441
569
|
{
|
|
442
570
|
name: "Review",
|
|
443
571
|
description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
|
|
444
572
|
prompt: `### Checklist
|
|
445
573
|
|
|
446
|
-
1.
|
|
447
|
-
|
|
448
|
-
2.
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
-
|
|
454
|
-
-
|
|
455
|
-
-
|
|
456
|
-
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
574
|
+
1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
|
|
575
|
+
|
|
576
|
+
2. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). orientation only \u2014 defer specific defect-hunting to the subagents; pre-reviewing biases the lenses you pick. use \`${t("get_pull_request")}\` and other read-only GitHub tools for additional context if needed.
|
|
577
|
+
|
|
578
|
+
if the PR is **genuinely trivial**, skip steps 3\u20134 entirely and submit \`Reviewed \u2014 no issues found.\` per step 5. there's no value in dispatching even one lens for a typo.
|
|
579
|
+
|
|
580
|
+
"Genuinely trivial" (skip):
|
|
581
|
+
- single-word doc typo, whitespace/format-only, comment-only across any number of files
|
|
582
|
+
- lockfile or generated-code regeneration (size of diff is irrelevant \u2014 read the *shape*)
|
|
583
|
+
- mechanical rename whose only effect is import-path updates
|
|
584
|
+
- low-risk dep patch bump
|
|
585
|
+
|
|
586
|
+
"Looks trivial but isn't" (do **NOT** skip \u2014 small diff, big blast radius):
|
|
587
|
+
- any 1-line change to SQL / regex / auth / billing / permission / signature-verification code
|
|
588
|
+
- flipping a feature-flag default, default config value, or retry/timeout constant
|
|
589
|
+
- changing a money/tax/currency/fee constant by any amount
|
|
590
|
+
- changing an HTTP method, redirect URL, response code, or status enum
|
|
591
|
+
- tightening or loosening a comparison operator (\`<\` \u2194 \`<=\`, \`==\` \u2194 \`!=\`)
|
|
592
|
+
- renaming a public API surface (still trivial in shape, but needs an impact lens)
|
|
593
|
+
- adding a new direct dependency (supply-chain surface)
|
|
594
|
+
- any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
|
|
595
|
+
- mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
|
|
596
|
+
|
|
597
|
+
When unsure, treat as non-trivial. The cost of one extra subagent is cents; the cost of a missed billing/auth/data bug is much more.
|
|
598
|
+
|
|
599
|
+
otherwise pick lenses by where the PR concentrates risk \u2014 **there's no fixed count**. lens count is judgment, not a formula. concrete shapes to anchor against:
|
|
600
|
+
|
|
601
|
+
- **1 lens** \u2014 pure refactor / mechanical rename across many files (impact); new test file with no source change (test-integrity); small isolated bug fix (correctness); doc-only PR with non-trivial technical content (research-validated or holistic)
|
|
602
|
+
- **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
|
|
603
|
+
- **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
|
|
604
|
+
- **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
|
|
605
|
+
|
|
606
|
+
lenses come in two flavors, and you can mix them:
|
|
607
|
+
- **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
|
|
608
|
+
- **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). a subsystem lens is "review the PR specifically for what could go wrong in this subsystem" and naturally combines theme + scope. **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
|
|
609
|
+
|
|
610
|
+
starter menu (combine, omit, or invent your own):
|
|
611
|
+
- **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
|
|
612
|
+
- **impact** \u2014 when the PR removes features, deletes exports, renames identifiers, or changes architectural patterns: stale references in code, tests, docs (\`docs/\`, \`wiki/\`), comments, configs, UI
|
|
613
|
+
- **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
|
|
614
|
+
- **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
|
|
615
|
+
- **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
|
|
616
|
+
- **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
|
|
617
|
+
- **integration & cross-cutting** \u2014 API contracts between modules, backward-compat of public surfaces, multi-service ordering
|
|
618
|
+
- **test integrity** \u2014 meaningful coverage for the changed behavior; deterministic; no shared-state pollution
|
|
619
|
+
- **performance** \u2014 N+1 queries, hot-path allocation, latency budgets, index coverage
|
|
620
|
+
- **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
|
|
621
|
+
- **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
|
|
622
|
+
|
|
623
|
+
3. **fan out**: dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). when picking 2+ lenses, dispatch them in a **single assistant turn with multiple parallel subagent calls**; issuing one and awaiting reply before the next collapses the fan-out into a serial review. if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 3 entirely on a single subagent failure. each subagent gets:
|
|
624
|
+
- the diff path / target \u2014 reading the diff and the codebase is its job
|
|
625
|
+
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
626
|
+
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
627
|
+
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
628
|
+
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
|
|
629
|
+
- ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
|
|
630
|
+
|
|
631
|
+
delegation discipline:
|
|
632
|
+
- do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
|
|
633
|
+
- do NOT summarize the PR for them (biases toward a validation frame)
|
|
634
|
+
- do NOT hand them a curated reading list (let them discover scope)
|
|
635
|
+
- do NOT pre-shape their output with a finding schema
|
|
636
|
+
- do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
|
|
637
|
+
|
|
638
|
+
4. **aggregate & draft**: merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
|
|
639
|
+
|
|
640
|
+
for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
|
|
641
|
+
|
|
642
|
+
5. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
|
|
643
|
+
|
|
644
|
+
note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
463
645
|
|
|
464
646
|
- **critical issues** (blocks merge \u2014 bugs, security, data loss):
|
|
465
647
|
\`approved: false\`. Body begins with a GitHub alert blockquote, e.g.:
|
|
@@ -472,35 +654,63 @@ ${learningsStep(t, 6)}`
|
|
|
472
654
|
- **no actionable issues**:
|
|
473
655
|
\`approved: true\`, body: "Reviewed \u2014 no issues found."`
|
|
474
656
|
},
|
|
657
|
+
// IncrementalReview shares Review's multi-lens orchestrator pattern but
|
|
658
|
+
// scopes the target to the incremental diff and adds prior-review-feedback
|
|
659
|
+
// tracking. The "issues must be NEW since the last Pullfrog review" filter
|
|
660
|
+
// lives at aggregation time (step 5), NOT in the subagent prompt — pushing
|
|
661
|
+
// the filter into subagents matches the canonical anneal anti-pattern of
|
|
662
|
+
// "list known pre-existing failures — don't flag these" and suppresses
|
|
663
|
+
// signal on regressions the new commits amplified. The body-format rules
|
|
664
|
+
// (Reviewed changes / Prior review feedback) are unchanged from the prior
|
|
665
|
+
// version. Same severity-table omission as Review.
|
|
475
666
|
{
|
|
476
667
|
name: "IncrementalReview",
|
|
477
668
|
description: "Re-review a PR after new commits are pushed; focus on new changes since the last review",
|
|
478
669
|
prompt: `### Checklist
|
|
479
670
|
|
|
480
|
-
1.
|
|
671
|
+
1. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata, \`diffPath\` (full diff), and \`incrementalDiffPath\` (changes since last reviewed version, if available). read the diff TOC first and use its line ranges as your coverage checklist.
|
|
672
|
+
|
|
673
|
+
2. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
|
|
674
|
+
|
|
675
|
+
3. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll need this in step 6 to track which prior comments were addressed.
|
|
676
|
+
|
|
677
|
+
4. **triage & fan out**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces.
|
|
678
|
+
|
|
679
|
+
if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 7's non-substantive path (do NOT submit a review).
|
|
680
|
+
|
|
681
|
+
"Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
|
|
682
|
+
"Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
|
|
683
|
+
When unsure, treat as non-trivial.
|
|
684
|
+
|
|
685
|
+
otherwise pick lenses by where the new commits concentrate risk \u2014 **there's no fixed count**, same calibration as Review mode (1 lens for pure refactor / isolated fix; 2\u20133 for typical features; 4\u20135 for high-stakes subsystem touches; 6+ is a smell). lens framing follows Review mode: themed lenses (correctness & invariants, impact when new commits remove/rename/deprecate things, research-validated assumptions, security, user-journey, operational readiness, integration & cross-cutting, test integrity, performance, holistic) and subsystem lenses (auth, billing, schema migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens rather than the generic themed equivalent.
|
|
481
686
|
|
|
482
|
-
|
|
687
|
+
dispatch one \`${REVIEWER_AGENT_NAME}\` subagent per lens \u2014 its baked-in system prompt enforces the non-mutative + non-recursive contract (read-only file/search/web tools and read-only MCP queries; no writes, shell side effects, state-changing MCP calls, or nested subagent dispatch). dispatch them in a **single assistant turn with multiple parallel subagent calls** (serial dispatch collapses the fan-out). if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip step 4 entirely on a single subagent failure. each subagent gets:
|
|
688
|
+
- the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 5), not in the subagent prompt
|
|
689
|
+
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
690
|
+
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
691
|
+
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
692
|
+
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
|
|
693
|
+
- ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
|
|
483
694
|
|
|
484
|
-
|
|
695
|
+
delegation discipline:
|
|
696
|
+
- do NOT lens-review the diff yourself in parallel with the subagents
|
|
697
|
+
- do NOT summarize the changes for them (biases toward validation frame)
|
|
698
|
+
- do NOT hand them a curated reading list (let them discover scope)
|
|
699
|
+
- do NOT pre-shape their output with a finding schema
|
|
700
|
+
- do NOT mention the other lenses (independence is the point)
|
|
485
701
|
|
|
486
|
-
|
|
487
|
-
- review the incremental diff while using the full diff for context
|
|
488
|
-
- check whether prior review feedback was addressed by the new commits
|
|
489
|
-
- trace data flow, check boundaries, verify assumptions, consider lifecycle, spot performance issues
|
|
490
|
-
- if the new commits remove, rename, or deprecate anything, run impact analysis with grep across code/tests/docs/comments/configs to find stale references and include those findings in the summary body
|
|
491
|
-
- never repeat prior feedback. only comment on genuinely new issues introduced by the new commits.
|
|
492
|
-
- draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable (2-3 sentences max)
|
|
493
|
-
- for large or cross-cutting PRs, consider delegating read-only subagents for parallel investigation. subagents must ONLY read files, grep, and search \u2014 no MCP tools, no writes, no shell commands, no side effects. collect their findings and use them to draft comments.
|
|
702
|
+
5. **aggregate, draft, self-critique**: merge findings; de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 1 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 3) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
|
|
494
703
|
|
|
495
|
-
|
|
704
|
+
then check: which prior review comments were addressed by the new commits? track the addressed ones for step 6b.
|
|
496
705
|
|
|
497
|
-
6. **
|
|
706
|
+
6. **build the review body** \u2014 two distinct sections:
|
|
498
707
|
a. **Reviewed changes**: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed.
|
|
499
708
|
b. **Prior review feedback** (only if any were addressed): list only the prior review comments that WERE addressed by the new commits (\`- [x] safeParse instead of parse \u2014 addressed\`). omit unaddressed comments. omit this entire section if nothing was addressed. a change can appear in both sections.
|
|
500
709
|
- no headings, no tables, no prose paragraphs in either section \u2014 just bullets
|
|
501
710
|
- in some cases you may receive a complete diff for the whole pull request instead of an incremental one. when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
|
|
502
711
|
|
|
503
712
|
7. Submit \u2014 Do NOT call \`report_progress\` or \`create_issue_comment\` \u2014 the review is the final record and the progress comment will be cleaned up automatically. the review body always includes the reviewed changes from step 6a. append \`Prior review feedback:\\n\` with the checklist from step 6b only if any prior comments were addressed. Follow these rules:
|
|
713
|
+
- note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
504
714
|
- IF NO NEW ISSUES, NON-SUBSTANTIVE CHANGES ONLY (trivial formatting, import reordering, comment tweaks): do NOT submit a review. Do NOT call \`report_progress\`. Exit \u2014 the progress comment will be cleaned up automatically.
|
|
505
715
|
- ELSE IF NEW CRITICAL ISSUES (blocks merge): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with a GitHub alert blockquote (e.g. \`> [!CAUTION]\\n> This PR introduces ...\`), then the reviewed changes summary and prior feedback (if any).
|
|
506
716
|
- ELSE IF NEW RECOMMENDED CHANGES (non-critical): call \`${t("create_pull_request_review")}\` with \`approved: false\`, all comments, and the review body. body opens with \`> [!IMPORTANT]\\n> ...\` alert, then the reviewed changes summary and prior feedback (if any).
|
|
@@ -611,7 +821,7 @@ var modes = computeModes("opencode");
|
|
|
611
821
|
var PULLFROG_DIVIDER = "<!-- PULLFROG_DIVIDER_DO_NOT_REMOVE_PLZ -->";
|
|
612
822
|
var FROG_LOGO = `<a href="https://pullfrog.com"><picture><source media="(prefers-color-scheme: dark)" srcset="https://pullfrog.com/logos/frog-white-full-18px.png"><img src="https://pullfrog.com/logos/frog-green-full-18px.png" width="9px" height="9px" style="vertical-align: middle; " alt="Pullfrog"></picture></a>`;
|
|
613
823
|
function formatModelLabel(slug) {
|
|
614
|
-
const alias =
|
|
824
|
+
const alias = resolveDisplayAlias(slug);
|
|
615
825
|
if (!alias) return `\`${slug}\``;
|
|
616
826
|
return alias.isFree ? `\`${alias.displayName}\` (free)` : `\`${alias.displayName}\``;
|
|
617
827
|
}
|
|
@@ -676,6 +886,8 @@ export {
|
|
|
676
886
|
providers,
|
|
677
887
|
pullfrogMcpName,
|
|
678
888
|
resolveCliModel,
|
|
889
|
+
resolveDisplayAlias,
|
|
679
890
|
resolveModelSlug,
|
|
891
|
+
resolveOpenRouterModel,
|
|
680
892
|
stripExistingFooter
|
|
681
893
|
};
|
package/dist/lifecycle.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
/** timeout for lifecycle hook scripts */
|
|
2
|
-
export declare const LIFECYCLE_HOOK_TIMEOUT_MS =
|
|
2
|
+
export declare const LIFECYCLE_HOOK_TIMEOUT_MS = 600000;
|