pullfrog 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/postRun.d.ts +21 -0
- package/dist/agents/sessionLabeler.d.ts +38 -18
- package/dist/agents/subagentModels.d.ts +19 -0
- package/dist/cli.mjs +678 -278
- package/dist/index.js +662 -264
- package/dist/internal.js +151 -59
- package/dist/models.d.ts +63 -3
- package/dist/utils/agent.d.ts +5 -2
- package/dist/utils/apiKeys.d.ts +18 -0
- package/dist/utils/instructions.d.ts +19 -0
- package/dist/utils/learnings.d.ts +20 -9
- package/dist/utils/normalizeEnv.d.ts +21 -1
- package/dist/utils/runContext.d.ts +16 -0
- package/dist/utils/subprocess.d.ts +40 -0
- package/dist/utils/timer.d.ts +11 -0
- package/package.json +1 -1
package/dist/internal.js
CHANGED
|
@@ -13,7 +13,8 @@ var providers = {
|
|
|
13
13
|
displayName: "Claude Opus",
|
|
14
14
|
resolve: "anthropic/claude-opus-4-7",
|
|
15
15
|
openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
|
|
16
|
-
preferred: true
|
|
16
|
+
preferred: true,
|
|
17
|
+
subagentModel: "claude-sonnet"
|
|
17
18
|
},
|
|
18
19
|
"claude-sonnet": {
|
|
19
20
|
displayName: "Claude Sonnet",
|
|
@@ -35,12 +36,23 @@ var providers = {
|
|
|
35
36
|
displayName: "GPT",
|
|
36
37
|
resolve: "openai/gpt-5.5",
|
|
37
38
|
openRouterResolve: "openrouter/openai/gpt-5.5",
|
|
38
|
-
preferred: true
|
|
39
|
+
preferred: true,
|
|
40
|
+
subagentModel: "gpt-5.4"
|
|
39
41
|
},
|
|
40
42
|
"gpt-pro": {
|
|
41
43
|
displayName: "GPT Pro",
|
|
42
44
|
resolve: "openai/gpt-5.5-pro",
|
|
43
|
-
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
45
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro",
|
|
46
|
+
subagentModel: "gpt"
|
|
47
|
+
},
|
|
48
|
+
// hidden subagent target — `gpt` lenses run against this. surfacing
|
|
49
|
+
// it in the picker would just confuse users (it's the prior-flagship,
|
|
50
|
+
// and they already have `gpt` and `gpt-mini` to choose from).
|
|
51
|
+
"gpt-5.4": {
|
|
52
|
+
displayName: "GPT 5.4",
|
|
53
|
+
resolve: "openai/gpt-5.4",
|
|
54
|
+
openRouterResolve: "openrouter/openai/gpt-5.4",
|
|
55
|
+
hidden: true
|
|
44
56
|
},
|
|
45
57
|
"gpt-mini": {
|
|
46
58
|
displayName: "GPT Mini",
|
|
@@ -78,7 +90,8 @@ var providers = {
|
|
|
78
90
|
displayName: "Gemini Pro",
|
|
79
91
|
resolve: "google/gemini-3.1-pro-preview",
|
|
80
92
|
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
81
|
-
preferred: true
|
|
93
|
+
preferred: true,
|
|
94
|
+
subagentModel: "gemini-flash"
|
|
82
95
|
},
|
|
83
96
|
"gemini-flash": {
|
|
84
97
|
displayName: "Gemini Flash",
|
|
@@ -166,7 +179,8 @@ var providers = {
|
|
|
166
179
|
"claude-opus": {
|
|
167
180
|
displayName: "Claude Opus",
|
|
168
181
|
resolve: "opencode/claude-opus-4-7",
|
|
169
|
-
openRouterResolve: "openrouter/anthropic/claude-opus-4.7"
|
|
182
|
+
openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
|
|
183
|
+
subagentModel: "claude-sonnet"
|
|
170
184
|
},
|
|
171
185
|
"claude-sonnet": {
|
|
172
186
|
displayName: "Claude Sonnet",
|
|
@@ -181,12 +195,21 @@ var providers = {
|
|
|
181
195
|
gpt: {
|
|
182
196
|
displayName: "GPT",
|
|
183
197
|
resolve: "opencode/gpt-5.5",
|
|
184
|
-
openRouterResolve: "openrouter/openai/gpt-5.5"
|
|
198
|
+
openRouterResolve: "openrouter/openai/gpt-5.5",
|
|
199
|
+
subagentModel: "gpt-5.4"
|
|
185
200
|
},
|
|
186
201
|
"gpt-pro": {
|
|
187
202
|
displayName: "GPT Pro",
|
|
188
203
|
resolve: "opencode/gpt-5.5-pro",
|
|
189
|
-
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
204
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro",
|
|
205
|
+
subagentModel: "gpt"
|
|
206
|
+
},
|
|
207
|
+
// hidden subagent target — see openai provider above for context.
|
|
208
|
+
"gpt-5.4": {
|
|
209
|
+
displayName: "GPT 5.4",
|
|
210
|
+
resolve: "opencode/gpt-5.4",
|
|
211
|
+
openRouterResolve: "openrouter/openai/gpt-5.4",
|
|
212
|
+
hidden: true
|
|
190
213
|
},
|
|
191
214
|
"gpt-mini": {
|
|
192
215
|
displayName: "GPT Mini",
|
|
@@ -209,7 +232,8 @@ var providers = {
|
|
|
209
232
|
"gemini-pro": {
|
|
210
233
|
displayName: "Gemini Pro",
|
|
211
234
|
resolve: "opencode/gemini-3.1-pro",
|
|
212
|
-
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
235
|
+
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
236
|
+
subagentModel: "gemini-flash"
|
|
213
237
|
},
|
|
214
238
|
"gemini-flash": {
|
|
215
239
|
displayName: "Gemini Flash",
|
|
@@ -241,6 +265,20 @@ var providers = {
|
|
|
241
265
|
}
|
|
242
266
|
}
|
|
243
267
|
}),
|
|
268
|
+
bedrock: provider({
|
|
269
|
+
displayName: "Amazon Bedrock",
|
|
270
|
+
envVars: ["AWS_BEARER_TOKEN_BEDROCK", "AWS_REGION", "BEDROCK_MODEL_ID"],
|
|
271
|
+
models: {
|
|
272
|
+
// single routing entry — the actual Bedrock model ID is read from
|
|
273
|
+
// BEDROCK_MODEL_ID at run time. see ModelRouting docs for why we
|
|
274
|
+
// don't catalog individual Bedrock models.
|
|
275
|
+
byok: {
|
|
276
|
+
displayName: "Amazon Bedrock",
|
|
277
|
+
resolve: "bedrock",
|
|
278
|
+
routing: "bedrock"
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}),
|
|
244
282
|
openrouter: provider({
|
|
245
283
|
displayName: "OpenRouter",
|
|
246
284
|
envVars: ["OPENROUTER_API_KEY"],
|
|
@@ -249,7 +287,8 @@ var providers = {
|
|
|
249
287
|
displayName: "Claude Opus",
|
|
250
288
|
resolve: "openrouter/anthropic/claude-opus-4.7",
|
|
251
289
|
openRouterResolve: "openrouter/anthropic/claude-opus-4.7",
|
|
252
|
-
preferred: true
|
|
290
|
+
preferred: true,
|
|
291
|
+
subagentModel: "claude-sonnet"
|
|
253
292
|
},
|
|
254
293
|
"claude-sonnet": {
|
|
255
294
|
displayName: "Claude Sonnet",
|
|
@@ -264,12 +303,21 @@ var providers = {
|
|
|
264
303
|
gpt: {
|
|
265
304
|
displayName: "GPT",
|
|
266
305
|
resolve: "openrouter/openai/gpt-5.5",
|
|
267
|
-
openRouterResolve: "openrouter/openai/gpt-5.5"
|
|
306
|
+
openRouterResolve: "openrouter/openai/gpt-5.5",
|
|
307
|
+
subagentModel: "gpt-5.4"
|
|
268
308
|
},
|
|
269
309
|
"gpt-pro": {
|
|
270
310
|
displayName: "GPT Pro",
|
|
271
311
|
resolve: "openrouter/openai/gpt-5.5-pro",
|
|
272
|
-
openRouterResolve: "openrouter/openai/gpt-5.5-pro"
|
|
312
|
+
openRouterResolve: "openrouter/openai/gpt-5.5-pro",
|
|
313
|
+
subagentModel: "gpt"
|
|
314
|
+
},
|
|
315
|
+
// hidden subagent target — see openai provider above for context.
|
|
316
|
+
"gpt-5.4": {
|
|
317
|
+
displayName: "GPT 5.4",
|
|
318
|
+
resolve: "openrouter/openai/gpt-5.4",
|
|
319
|
+
openRouterResolve: "openrouter/openai/gpt-5.4",
|
|
320
|
+
hidden: true
|
|
273
321
|
},
|
|
274
322
|
"gpt-mini": {
|
|
275
323
|
displayName: "GPT Mini",
|
|
@@ -297,7 +345,8 @@ var providers = {
|
|
|
297
345
|
"gemini-pro": {
|
|
298
346
|
displayName: "Gemini Pro",
|
|
299
347
|
resolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
300
|
-
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview"
|
|
348
|
+
openRouterResolve: "openrouter/google/gemini-3.1-pro-preview",
|
|
349
|
+
subagentModel: "gemini-flash"
|
|
301
350
|
},
|
|
302
351
|
"gemini-flash": {
|
|
303
352
|
displayName: "Gemini Flash",
|
|
@@ -370,7 +419,13 @@ var modelAliases = Object.entries(providers).flatMap(
|
|
|
370
419
|
openRouterResolve: def.openRouterResolve,
|
|
371
420
|
preferred: def.preferred ?? false,
|
|
372
421
|
isFree: def.isFree ?? false,
|
|
373
|
-
fallback: def.fallback
|
|
422
|
+
fallback: def.fallback,
|
|
423
|
+
routing: def.routing,
|
|
424
|
+
// subagentModel is stored as an alias key local to the provider; expand
|
|
425
|
+
// here to a fully-qualified slug so callers can look up the target alias
|
|
426
|
+
// directly without re-deriving the provider.
|
|
427
|
+
subagentModel: def.subagentModel ? `${providerKey}/${def.subagentModel}` : void 0,
|
|
428
|
+
hidden: def.hidden ?? false
|
|
374
429
|
}))
|
|
375
430
|
);
|
|
376
431
|
function resolveModelSlug(slug) {
|
|
@@ -550,18 +605,24 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
550
605
|
- resolve addressed threads via \`${t("resolve_review_thread")}\`
|
|
551
606
|
- call \`${t("report_progress")}\` with a brief summary (or the exact push error if push failed)`
|
|
552
607
|
},
|
|
553
|
-
// Review and IncrementalReview use
|
|
554
|
-
// (
|
|
555
|
-
//
|
|
556
|
-
//
|
|
557
|
-
//
|
|
558
|
-
//
|
|
559
|
-
//
|
|
560
|
-
//
|
|
561
|
-
//
|
|
562
|
-
//
|
|
563
|
-
//
|
|
564
|
-
//
|
|
608
|
+
// Review and IncrementalReview use a 0-or-2+ lens pattern. The default is
|
|
609
|
+
// 0 lenses (orchestrator handles the review solo). Multi-lens (2+
|
|
610
|
+
// reviewfrog subagents in parallel) only fires for substantive PRs or
|
|
611
|
+
// high-stakes-subsystem touches — and when it fires, ALL lenses must
|
|
612
|
+
// dispatch in a single assistant turn or the parallelism win disappears.
|
|
613
|
+
// We never dispatch exactly one lens: a single lens is just a worse,
|
|
614
|
+
// slower version of doing the work yourself.
|
|
615
|
+
//
|
|
616
|
+
// Build mode self-review is a different problem shape: the orchestrator
|
|
617
|
+
// wrote the code, so bias-mitigation comes from delegating to one
|
|
618
|
+
// fresh-eyes subagent that doesn't share the implementation context. A
|
|
619
|
+
// single subagent there is appropriate; the 0-or-2+ rule applies only to
|
|
620
|
+
// the Review/IncrementalReview lens fan-out where independence between
|
|
621
|
+
// perspectives is what's being purchased.
|
|
622
|
+
//
|
|
623
|
+
// Deliberate omission vs canonical /anneal: severity categorization in
|
|
624
|
+
// the final message (the review body has its own CAUTION/IMPORTANT
|
|
625
|
+
// framing instead of a severity table).
|
|
565
626
|
{
|
|
566
627
|
name: "Review",
|
|
567
628
|
description: "Review code, PRs, or implementations; provide feedback or suggestions; identify issues; or check code quality, style, and correctness",
|
|
@@ -571,9 +632,9 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
571
632
|
|
|
572
633
|
2. **checkout**: call \`${t("checkout_pr")}\` \u2014 this returns PR metadata and a \`diffPath\`. read the diff TOC end-to-end and treat its file line ranges as your coverage checklist.
|
|
573
634
|
|
|
574
|
-
3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes).
|
|
635
|
+
3. **triage**: orient yourself on the PR \u2014 identify *what kind of thing this is* (domain it touches, seams it crosses, external contracts it depends on, user-facing surfaces it changes). pull as much context as you need to render a confident, well-grounded review: read related files, grep for callers of changed symbols, check tests that exercise the touched paths, fetch related GitHub state. **you are the synthesizer** \u2014 never delegate understanding to subagents.
|
|
575
636
|
|
|
576
|
-
if the PR is **genuinely trivial**, skip
|
|
637
|
+
if the PR is **genuinely trivial**, skip the fan-out entirely and submit a \`No new issues found.\` review per step 7.
|
|
577
638
|
|
|
578
639
|
"Genuinely trivial" (skip):
|
|
579
640
|
- single-word doc typo, whitespace/format-only, comment-only across any number of files
|
|
@@ -592,23 +653,25 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
592
653
|
- any "typo fix" in user-facing copy that changes meaning ("approved" \u2192 "denied")
|
|
593
654
|
- mixed diffs where a semantic 1-liner is buried in whitespace/formatting changes
|
|
594
655
|
|
|
595
|
-
|
|
656
|
+
4. **lens decision \u2014 0 or 2+, NEVER 1**.
|
|
657
|
+
|
|
658
|
+
The default is **0 lenses**: handle the review yourself end-to-end. Most PRs land here.
|
|
596
659
|
|
|
597
|
-
|
|
660
|
+
Dispatch **2+ \`${REVIEWER_AGENT_NAME}\` lenses in parallel** ONLY when ALL of the following are true:
|
|
661
|
+
- the PR is substantive (>5 files changed AND >200 net lines), OR touches a high-stakes subsystem (auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling)
|
|
662
|
+
- you can name 2+ distinct concrete failure modes that warrant independent lenses (one lens per failure mode; orthogonal, not overlapping)
|
|
663
|
+
- parallel-orchestrated independent perspectives meaningfully outperform what you'd find solo
|
|
598
664
|
|
|
599
|
-
|
|
600
|
-
- **2\u20133 lenses (most PRs land here)** \u2014 new CRUD endpoint (correctness + security + test-integrity); new UI flow (user-journey + correctness); a single bug fix in a non-critical subsystem (correctness + test-integrity); design doc covering one domain (research-validated + correctness or holistic)
|
|
601
|
-
- **4\u20135 lenses (high-stakes subsystem touches)** \u2014 any billing/payments change (billing-subsystem + correctness + security + operational-readiness); new auth flow (auth-subsystem + correctness + security + test-integrity); schema migration (schema-migration-subsystem + correctness + operational-readiness + impact); cross-subsystem PR that touches billing AND auth AND schema (one subsystem lens per domain + correctness)
|
|
602
|
-
- **6+ lenses** \u2014 almost always a smell; you're either covering overlapping ground or this PR should have been split. push back via the review body rather than expanding lens count.
|
|
665
|
+
**NEVER dispatch exactly one lens.** A single lens is just a more expensive version of doing the work yourself with a worse model \u2014 it adds wall time and a context-handoff for no orthogonality benefit. Either you have at least two genuinely independent failure-mode hypotheses (dispatch all in one turn), or you don't (do the review yourself).
|
|
603
666
|
|
|
604
|
-
|
|
667
|
+
When you do go multi-lens, lens framings come in two flavors:
|
|
605
668
|
- **themed lenses** \u2014 a perspective applied across the whole diff (correctness, security, user-journey, performance, etc.).
|
|
606
|
-
- **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens").
|
|
669
|
+
- **subsystem lenses** \u2014 a domain-scoped frame for high-stakes subsystems the PR touches (e.g. "the auth lens", "the billing lens", "the schema-migration lens"). **for high-stakes domains, lead with the subsystem lens rather than the generic themed equivalent** \u2014 "billing-subsystem" outperforms "correctness on billing code" because the framing primes the subagent to remember domain-specific failure modes (double-charges, refund races, currency rounding, dispute flows) the generic lens misses.
|
|
607
670
|
|
|
608
671
|
starter menu (combine, omit, or invent your own):
|
|
609
672
|
- **correctness & invariants** \u2014 bugs, races, error handling, edge cases, state-machine boundaries
|
|
610
|
-
- **impact** \u2014
|
|
611
|
-
- **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. the subagent must verify load-bearing claims via web search and quote source URLs.
|
|
673
|
+
- **impact** \u2014 stale references in code/tests/docs/configs/UI after rename/remove
|
|
674
|
+
- **research-validated assumptions** \u2014 third-party API contracts, SDK semantics, framework directives, version-gated behavior. **only pick when the PR's correctness depends on the contract behaving a specific way** \u2014 not when the API is merely used. The bar is "if the third-party contract differs from what the diff assumes, the PR is incorrect." When dispatched, the subagent must verify load-bearing claims via web search and quote source URLs.
|
|
612
675
|
- **security** \u2014 new endpoints, authZ, input validation, secrets handling, replay/CSRF/injection, cross-tenant isolation
|
|
613
676
|
- **user-journey** \u2014 UX-touching flows: walk through happy path and failure modes as a user
|
|
614
677
|
- **operational readiness** \u2014 observability, alerting, migrations (forward + rollback), feature flags, on-call burden
|
|
@@ -618,26 +681,36 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
618
681
|
- **holistic** \u2014 does the PR make sense as a whole? symmetric flows (delete for every create, rollback for every migration)?
|
|
619
682
|
- **subsystem lenses** (invent as the PR demands) \u2014 auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling, etc.
|
|
620
683
|
|
|
621
|
-
|
|
684
|
+
The only subagent type is \`${REVIEWER_AGENT_NAME}\` \u2014 used for lens judgment work ("is this safe / correct / well-tested?"), runs on a mid-tier model.
|
|
685
|
+
|
|
686
|
+
5. **fan out (only if step 4 said 2+ lenses)**: dispatch every \`${REVIEWER_AGENT_NAME}\` subagent for this run **IN A SINGLE ASSISTANT TURN, AS MULTIPLE PARALLEL TASK TOOL_USE BLOCKS IN ONE MESSAGE.**
|
|
687
|
+
|
|
688
|
+
\u26A0\uFE0F CRITICAL \u2014 PARALLELISM IS THE ONLY REASON LENSES EXIST. \u26A0\uFE0F
|
|
689
|
+
The default tool-call behavior of Claude Code (and most agent runtimes) is **serial dispatch**: emit one Task call, await result, emit next, await, etc. This collapses your fan-out into a sequential review where each lens adds N \xD7 (orchestrator-think-time + lens-execution-time) to wall time. **YOU MUST OVERRIDE THIS DEFAULT.** Emit ALL of your Task tool_use blocks in the SAME assistant message, BEFORE you read ANY result from ANY of them. If you find yourself emitting one Task call, then thinking about the result, then emitting another \u2014 STOP and re-issue them all together. The whole point of going multi-lens is the wall-clock speedup from parallel execution; serial dispatch defeats it entirely.
|
|
690
|
+
|
|
691
|
+
\u2705 Right pattern: one assistant turn with N Task tool_use blocks \u2192 wait \u2192 N results arrive together \u2192 aggregate.
|
|
692
|
+
\u274C Wrong pattern: turn 1 = Task(lens A) \u2192 turn 2 (after A's result) = Task(lens B) \u2192 turn 3 (after B's result) = Task(lens C). This is the failure mode. Do not do this.
|
|
693
|
+
|
|
694
|
+
You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches \u2014 concurrent context-pulling on the orchestrator side runs in parallel with the lens fan-out and costs zero extra wall time.
|
|
695
|
+
|
|
696
|
+
if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body \u2014 do not skip the fan-out entirely on a single subagent failure. each subagent gets:
|
|
622
697
|
- the diff path / target \u2014 reading the diff and the codebase is its job
|
|
623
698
|
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
624
699
|
- **a Task \`description\` set to the lens name** (e.g. \`"security"\`, \`"correctness"\`, \`"billing-subsystem"\`) \u2014 the harness reads this field to label the subagent's log lines so parallel runs can be told apart in CI output. without it, every subagent shows up as \`subagent#N\`.
|
|
625
|
-
- the read-only contract restated in your dispatch instructions so the rule is present twice (the subagent's system prompt also enforces it). The test: would this call still be a no-op if reverted? If not (PR comments, branch pushes, issue updates, set_output, label changes, dependency installs, etc.), don't make it.
|
|
626
700
|
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search rather than trust training data, and to quote source URLs in its reasoning. action runs are non-interactive \u2014 there's no human in the loop to catch "I'm pretty sure Stripe does X."
|
|
627
701
|
- ask the subagent to report findings with file paths and NEW line numbers from the diff so you can anchor inline comments without re-reading the entire diff.
|
|
628
702
|
|
|
629
703
|
delegation discipline:
|
|
630
|
-
- do NOT lens-review the diff yourself in parallel with the subagents (your job is dispatch + comment-drafting; doing the lens work yourself reintroduces the bias the fan-out avoids)
|
|
631
704
|
- do NOT summarize the PR for them (biases toward a validation frame)
|
|
632
705
|
- do NOT hand them a curated reading list (let them discover scope)
|
|
633
706
|
- do NOT pre-shape their output with a finding schema
|
|
634
707
|
- do NOT mention the other lenses (independence is the point \u2014 overlapping findings are a strong signal)
|
|
635
708
|
|
|
636
|
-
|
|
709
|
+
6. **aggregate & draft**: when the fan-out lands, merge findings; de-dup overlaps (two lenses catching the same issue = higher-confidence signal); trace each finding yourself before accepting it. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the PR (heuristic: if the finding's root cause lives in lines this PR added or modified, it's in scope; otherwise drop unless the PR plausibly introduced or amplified the regression), and anything not actionable. also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or worse, degrades elegance to nominally improve correctness) makes the codebase worse, not better.
|
|
637
710
|
|
|
638
711
|
for surviving findings, draft inline comments with NEW line numbers from the diff. every comment must be actionable, 2-3 sentences max. use GitHub permalink format for code references. for impact-analysis findings (stale references after rename/remove), report them in the review body ordered by severity (runtime breakage > incorrect docs > stale comments) rather than as inline comments unless they're anchored to a specific line.
|
|
639
712
|
|
|
640
|
-
|
|
713
|
+
7. **submit**: ALWAYS submit exactly one review via \`${t("create_pull_request_review")}\`. Do NOT call \`report_progress\` \u2014 the review is the final record and the progress comment will be cleaned up automatically.
|
|
641
714
|
|
|
642
715
|
note: the first create_pull_request_review submission may error with a one-time diff-coverage nudge listing unread TOC regions. retry the same call to proceed \u2014 optionally after reading the listed ranges. the pre-flight will not block again this session.
|
|
643
716
|
|
|
@@ -665,10 +738,10 @@ For simple, well-defined tasks, skip the plan phase and go straight to build.`
|
|
|
665
738
|
|
|
666
739
|
${PR_SUMMARY_FORMAT}`
|
|
667
740
|
},
|
|
668
|
-
// IncrementalReview shares Review's
|
|
669
|
-
//
|
|
670
|
-
//
|
|
671
|
-
//
|
|
741
|
+
// IncrementalReview shares Review's 0-or-2+ lens pattern but scopes the
|
|
742
|
+
// target to the incremental diff. The "issues must be NEW since the last
|
|
743
|
+
// Pullfrog review" filter lives at aggregation time (step 8), NOT in the
|
|
744
|
+
// subagent prompt — pushing the filter into
|
|
672
745
|
// subagents matches the canonical anneal anti-pattern of "list known
|
|
673
746
|
// pre-existing failures — don't flag these" and suppresses signal on
|
|
674
747
|
// regressions the new commits amplified. The review body is just
|
|
@@ -687,38 +760,57 @@ ${PR_SUMMARY_FORMAT}`
|
|
|
687
760
|
|
|
688
761
|
3. **incremental scope**: if \`incrementalDiffPath\` is present, read it to see what changed since the last review. this is a range-diff that isolates the net changes, filtering out base branch noise. if not present, fall back to reviewing the full PR diff and determine what changed since Pullfrog's most recent review.
|
|
689
762
|
|
|
690
|
-
4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step
|
|
763
|
+
4. **prior feedback**: fetch previous reviews via \`${t("list_pull_request_reviews")}\`. for the most recent Pullfrog review, call \`${t("get_review_comments")}\` with the review ID to retrieve specific prior line-level feedback. you'll use this to filter your aggregation in step 8 \u2014 anything already flagged in a prior review and not changed by the new commits should not be re-raised. you do NOT need to render this in the review body; the rolling PR summary snapshot is the durable record of what's been addressed.
|
|
691
764
|
|
|
692
|
-
5. **triage
|
|
765
|
+
5. **triage**: orient on the *incremental* changes \u2014 domain, seams, external contracts, user-facing surfaces. pull as much context as you need to render a confident review: read related files, grep for callers of changed symbols, check tests that exercise the touched paths. **you are the synthesizer.**
|
|
693
766
|
|
|
694
|
-
if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step
|
|
767
|
+
if the incremental changes are **genuinely trivial**, skip the fan-out entirely and jump to step 10's non-substantive path (do NOT submit a review).
|
|
695
768
|
|
|
696
769
|
"Genuinely trivial" (skip): formatting/comment tweaks, import reordering, lockfile regen, mechanical rename of import paths, whitespace-only.
|
|
697
770
|
"Looks trivial but isn't" (do NOT skip \u2014 same anti-patterns as Review mode): 1-line changes to SQL/regex/auth/billing/permissions/signature-verification code; flipping feature-flag defaults or retry/timeout constants; money/tax/HTTP-method/redirect changes; tightening or loosening a comparison operator; mixed diffs with a semantic line buried in formatting.
|
|
698
771
|
When unsure, treat as non-trivial.
|
|
699
772
|
|
|
700
|
-
|
|
773
|
+
6. **lens decision \u2014 0 or 2+, NEVER 1**.
|
|
774
|
+
|
|
775
|
+
The default is **0 lenses**: handle the re-review yourself end-to-end. Most incremental reviews land here \u2014 especially thread-reply re-reviews where the user is asking "did you address X?" rather than "review the diff again."
|
|
701
776
|
|
|
702
|
-
|
|
703
|
-
- the
|
|
777
|
+
Dispatch **2+ \`${REVIEWER_AGENT_NAME}\` lenses in parallel** ONLY when ALL of the following are true:
|
|
778
|
+
- the incremental changes are substantive (>5 files changed AND >200 net new lines), OR touch a high-stakes subsystem (auth, billing, payments, schema migration, webhooks, secrets, RBAC, multi-tenant isolation, cron/scheduling)
|
|
779
|
+
- you can name 2+ distinct concrete failure modes the new commits plausibly introduce that warrant independent lenses
|
|
780
|
+
- parallel-orchestrated independent perspectives meaningfully outperform what you'd find solo
|
|
781
|
+
|
|
782
|
+
**NEVER dispatch exactly one lens.** Single-lens dispatch adds wall time and cost for no orthogonality benefit. Either go multi-lens (\u22652 in parallel) or do the re-review yourself.
|
|
783
|
+
|
|
784
|
+
Lens framing follows Review mode: themed lenses (correctness, security, etc.) and subsystem lenses (auth, billing, schema-migration, etc.) \u2014 for high-stakes domains lead with the subsystem lens.
|
|
785
|
+
|
|
786
|
+
7. **fan out (only if step 6 said 2+ lenses)**: dispatch every \`${REVIEWER_AGENT_NAME}\` subagent for this run **IN A SINGLE ASSISTANT TURN, AS MULTIPLE PARALLEL TASK TOOL_USE BLOCKS IN ONE MESSAGE.**
|
|
787
|
+
|
|
788
|
+
\u26A0\uFE0F CRITICAL \u2014 PARALLELISM IS THE ONLY REASON LENSES EXIST. \u26A0\uFE0F
|
|
789
|
+
Default tool-call behavior is **serial dispatch**: emit one Task call, await result, emit next, await, etc. This collapses your fan-out into a sequential review where each lens adds N \xD7 (orchestrator-think-time + lens-execution-time) to wall time. **YOU MUST OVERRIDE THIS DEFAULT.** Emit ALL of your Task tool_use blocks in the SAME assistant message, BEFORE you read ANY result from ANY of them.
|
|
790
|
+
|
|
791
|
+
\u2705 Right pattern: one assistant turn with N Task tool_use blocks \u2192 wait \u2192 N results arrive together \u2192 aggregate.
|
|
792
|
+
\u274C Wrong pattern: turn 1 = Task(lens A) \u2192 turn 2 (after A's result) = Task(lens B). This is the failure mode.
|
|
793
|
+
|
|
794
|
+
You can also include your own \`read\` / \`grep\` / \`webfetch\` calls in the SAME turn as the parallel \`${REVIEWER_AGENT_NAME}\` dispatches.
|
|
795
|
+
|
|
796
|
+
if a subagent errors out, times out, or returns nothing usable, retry once with the same lens; if it still fails, proceed with partial coverage and note the missing lens in the review body. each subagent gets:
|
|
797
|
+
- the diff scope (incremental diff path if available, full diff otherwise). do NOT tell them to skip pre-existing issues \u2014 that suppresses regressions the new commits amplified; the "issues must be NEW" filter lives at aggregation time (step 8), not in the subagent prompt
|
|
704
798
|
- **only one lens** \u2014 never a multi-section "review for X, Y, and Z" prompt
|
|
705
|
-
- **a Task \`description\` set to the lens name**
|
|
706
|
-
- the
|
|
707
|
-
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs. action runs are non-interactive \u2014 there's no human to catch "I'm pretty sure Stripe does X."
|
|
799
|
+
- **a Task \`description\` set to the lens name** \u2014 the harness reads this field to label log lines so parallel runs can be told apart.
|
|
800
|
+
- if the lens touches external contracts, instruct the subagent to verify load-bearing claims via web search and quote source URLs.
|
|
708
801
|
- ask the subagent to report findings with file paths and NEW line numbers from the full PR diff so you can anchor inline comments.
|
|
709
802
|
|
|
710
803
|
delegation discipline:
|
|
711
|
-
- do NOT lens-review the diff yourself in parallel with the subagents
|
|
712
804
|
- do NOT summarize the changes for them (biases toward validation frame)
|
|
713
805
|
- do NOT hand them a curated reading list (let them discover scope)
|
|
714
806
|
- do NOT pre-shape their output with a finding schema
|
|
715
807
|
- do NOT mention the other lenses (independence is the point)
|
|
716
808
|
|
|
717
|
-
|
|
809
|
+
8. **aggregate, draft, self-critique**: merge findings (yours + any subagent output if you went multi-lens); de-dup overlaps; trace each finding yourself. drop praise, style preferences, speculative/unverified claims, findings about pre-existing code unrelated to the new commits, anything not actionable, and anything that re-states prior review feedback (heuristic: if the finding's root cause lives in lines the *new commits* added or modified, it's in scope; otherwise drop). also drop **bloat-shaped findings** \u2014 proposed fixes that would add defensive checks for cases that can't happen, abstractions used once, comments restating obvious code, tests asserting tautologies, or "just-in-case" guards. subagents are fallible and bias toward recommending changes; the bar for an actionable inline comment is sound + correct + elegant. recommending a change that improves only one of the three (or degrades elegance to nominally improve correctness) makes the codebase worse, not better. To compute "lines the new commits added or modified": if \`incrementalDiffPath\` from step 2 is present, use it directly. Otherwise, take the prior Pullfrog review's \`commit_id\` (returned alongside each entry from \`${t("list_pull_request_reviews")}\` in step 4) and run \`git diff <prior-review-sha>..HEAD\` to isolate the lines added since that review. draft inline comments with NEW line numbers from the full PR diff \u2014 every comment must be actionable, 2-3 sentences max.
|
|
718
810
|
|
|
719
|
-
|
|
811
|
+
9. **build the review body** \u2014 a single "Reviewed changes" section: summarize at the logical-change level, not per-file. each bullet starts with a past-tense verb (e.g. \`- Extracted shared CLI runtime into a single module\`, \`- Renamed package to pullfrog\`). avoid file paths unless they add clarity. if the changes can be described in one sentence, use one sentence \u2014 no bullets needed. do NOT include a separate "Prior review feedback" checklist; that's tracked in the rolling PR summary snapshot for the next agent run, and surfacing it in the user-facing body is noise (changes that addressed prior feedback are already covered by the Reviewed-changes bullets). in some cases you may receive a complete diff for the whole pull request instead of an incremental one \u2014 when this happens, you will need to determine what changes have happened since Pullfrog's most recent review.
|
|
720
812
|
|
|
721
|
-
|
|
813
|
+
10. Submit \u2014 every run must end with EXACTLY ONE of \`${t("create_pull_request_review")}\` (substantive review) or \`${t("report_progress")}\` (no-review acknowledgement). do NOT call \`create_issue_comment\` for review output.
|
|
722
814
|
|
|
723
815
|
Same callout-intensity ladder as Review mode \u2014 \`[!CAUTION]\` (large red, "will break") \u2192 \`[!IMPORTANT]\` (large purple, "must address before merging") \u2192 \`[!NOTE]\` (small blue, "FYI") \u2192 no callout (plain text). And the same Fix-button lever: the footer renders a Fix button on every non-approving review, so \`approved: true\` suppresses it. Wrapping mergeable feedback in \`[!IMPORTANT]\` trains users to click Fix on reviews that don't need fixing \u2014 pick the tier the author's actual next action justifies.
|
|
724
816
|
|
package/dist/models.d.ts
CHANGED
|
@@ -4,6 +4,21 @@
|
|
|
4
4
|
* slugs use the format `provider/model-id` (e.g. "anthropic/claude-opus").
|
|
5
5
|
* bump `resolve` when a new model generation ships — the alias (slug) stays stable.
|
|
6
6
|
*/
|
|
7
|
+
/**
|
|
8
|
+
* routing discriminant for entries whose `resolve` is dynamic — looked up
|
|
9
|
+
* from a separate env var at run time rather than fixed in the catalog.
|
|
10
|
+
*
|
|
11
|
+
* `"bedrock"` means the actual model ID comes from `BEDROCK_MODEL_ID`
|
|
12
|
+
* (an AWS-canonical Bedrock model ID like `us.anthropic.claude-opus-4-7`
|
|
13
|
+
* or `amazon.nova-pro-v1:0`). enterprise Bedrock customers self-select for
|
|
14
|
+
* version control — silent alias bumps would break compliance review,
|
|
15
|
+
* model-access enrollment, and provisioned-throughput contracts. so the
|
|
16
|
+
* single `bedrock/byok` entry is a routing slug, not a model alias: the
|
|
17
|
+
* harness reads `BEDROCK_MODEL_ID` and routes to claude-code (when the ID
|
|
18
|
+
* contains "anthropic") or opencode (everything else, with an
|
|
19
|
+
* `amazon-bedrock/` prefix).
|
|
20
|
+
*/
|
|
21
|
+
export type ModelRouting = "bedrock";
|
|
7
22
|
export interface ModelAlias {
|
|
8
23
|
/** stable alias stored in DB, e.g. "anthropic/claude-opus" */
|
|
9
24
|
slug: string;
|
|
@@ -11,9 +26,9 @@ export interface ModelAlias {
|
|
|
11
26
|
provider: string;
|
|
12
27
|
/** human-readable name shown in dropdowns */
|
|
13
28
|
displayName: string;
|
|
14
|
-
/** concrete models.dev specifier, e.g. "anthropic/claude-opus-4-6" */
|
|
29
|
+
/** concrete models.dev specifier, e.g. "anthropic/claude-opus-4-6". sentinel for routing entries — never passed to a CLI directly. */
|
|
15
30
|
resolve: string;
|
|
16
|
-
/** full models.dev specifier for the OpenRouter equivalent (undefined for free models) */
|
|
31
|
+
/** full models.dev specifier for the OpenRouter equivalent (undefined for free models and routing entries) */
|
|
17
32
|
openRouterResolve: string | undefined;
|
|
18
33
|
/** top-tier pick for this provider — preferred during auto-select */
|
|
19
34
|
preferred: boolean;
|
|
@@ -21,6 +36,15 @@ export interface ModelAlias {
|
|
|
21
36
|
isFree: boolean;
|
|
22
37
|
/** slug of a replacement model — presence implies this model is deprecated */
|
|
23
38
|
fallback: string | undefined;
|
|
39
|
+
/** dynamic-resolution discriminant — see ModelRouting docs */
|
|
40
|
+
routing: ModelRouting | undefined;
|
|
41
|
+
/** alias key (within same provider) of the cheaper sibling reviewfrog should
|
|
42
|
+
* use as its lens-fanout subagent. e.g. claude-opus → "claude-sonnet". */
|
|
43
|
+
subagentModel: string | undefined;
|
|
44
|
+
/** hide from selectable lists (UI dropdowns, CLI pickers). does NOT affect
|
|
45
|
+
* resolution — for that use `fallback`. used for internal-only tier targets
|
|
46
|
+
* (e.g. gpt-5.4 as a subagent target without exposing it to users). */
|
|
47
|
+
hidden: boolean;
|
|
24
48
|
}
|
|
25
49
|
interface ModelDef {
|
|
26
50
|
displayName: string;
|
|
@@ -33,6 +57,13 @@ interface ModelDef {
|
|
|
33
57
|
isFree?: boolean;
|
|
34
58
|
/** slug of a replacement model — presence implies this model is deprecated */
|
|
35
59
|
fallback?: string;
|
|
60
|
+
/** dynamic-resolution discriminant — see ModelRouting docs */
|
|
61
|
+
routing?: ModelRouting;
|
|
62
|
+
/** alias key (within same provider) of the cheaper sibling reviewfrog should
|
|
63
|
+
* use as its lens-fanout subagent (e.g. claude-opus → "claude-sonnet"). */
|
|
64
|
+
subagentModel?: string;
|
|
65
|
+
/** hide from selectable lists. does NOT affect resolution; for that use `fallback`. */
|
|
66
|
+
hidden?: boolean;
|
|
36
67
|
}
|
|
37
68
|
export interface ProviderConfig {
|
|
38
69
|
displayName: string;
|
|
@@ -47,6 +78,7 @@ export declare const providers: {
|
|
|
47
78
|
deepseek: ProviderConfig;
|
|
48
79
|
moonshotai: ProviderConfig;
|
|
49
80
|
opencode: ProviderConfig;
|
|
81
|
+
bedrock: ProviderConfig;
|
|
50
82
|
openrouter: ProviderConfig;
|
|
51
83
|
};
|
|
52
84
|
export type ModelProvider = keyof typeof providers;
|
|
@@ -67,7 +99,7 @@ export declare function resolveModelSlug(slug: string): string | undefined;
|
|
|
67
99
|
* use this in UI display sites (dropdown trigger labels, PR-comment footers,
|
|
68
100
|
* etc.) so a deprecated stored slug renders as the model the user actually
|
|
69
101
|
* runs against — not the historical name. selectable lists should still hide
|
|
70
|
-
* deprecated aliases by filtering on `!a.fallback`.
|
|
102
|
+
* deprecated and internal-only aliases by filtering on `!a.fallback && !a.hidden`.
|
|
71
103
|
*/
|
|
72
104
|
export declare function resolveDisplayAlias(slug: string): ModelAlias | undefined;
|
|
73
105
|
/**
|
|
@@ -83,4 +115,32 @@ export declare function resolveCliModel(slug: string): string | undefined;
|
|
|
83
115
|
* (e.g. free opencode models).
|
|
84
116
|
*/
|
|
85
117
|
export declare function resolveOpenRouterModel(slug: string): string | undefined;
|
|
118
|
+
/** env var that supplies the Bedrock model ID for the `bedrock/byok` slug. */
|
|
119
|
+
export declare const BEDROCK_MODEL_ID_ENV = "BEDROCK_MODEL_ID";
|
|
120
|
+
/**
|
|
121
|
+
* the Bedrock model ID passed to claude-code or opencode is whatever the
|
|
122
|
+
* user set in `BEDROCK_MODEL_ID` — Pullfrog never resolves or upgrades it.
|
|
123
|
+
* we route by checking whether the ID names an Anthropic model: claude-code
|
|
124
|
+
* handles Anthropic-on-Bedrock natively (with `CLAUDE_CODE_USE_BEDROCK=1`),
|
|
125
|
+
* everything else goes through opencode's `amazon-bedrock` provider.
|
|
126
|
+
*
|
|
127
|
+
* AWS Bedrock IDs come in two shapes:
|
|
128
|
+
* - dotted foundation IDs: `us.anthropic.claude-opus-4-7`,
|
|
129
|
+
* `anthropic.claude-haiku-4-5-20251001-v1:0`, `amazon.nova-pro-v1:0`,
|
|
130
|
+
* `meta.llama4-scout-17b-instruct-v1:0`. AWS-published, lowercase, the
|
|
131
|
+
* foundation provider always appears as a discrete dot-segment.
|
|
132
|
+
* - inference-profile ARNs: `arn:aws:bedrock:us-east-2:<acct>:application-inference-profile/<user-name>`.
|
|
133
|
+
* `<user-name>` is operator-chosen, so a naive substring check is fragile
|
|
134
|
+
* in both directions (Anthropic profile named without "anthropic" → routes
|
|
135
|
+
* to opencode and misses CLAUDE_CODE_USE_BEDROCK; non-Anthropic profile
|
|
136
|
+
* whose name happens to contain "anthropic" → routes to claude-code).
|
|
137
|
+
*
|
|
138
|
+
* we anchor on a discrete dot-segment match (case-insensitive). this catches
|
|
139
|
+
* every published foundation ID and is conservative for ARN-form IDs: ARN
|
|
140
|
+
* names that don't include "anthropic" as their own dot-segment route to
|
|
141
|
+
* opencode by default. operators using ARN-form IDs whose backing model is
|
|
142
|
+
* Anthropic should set `PULLFROG_AGENT=claude` to force the right route, or
|
|
143
|
+
* include the foundation segment in the profile name.
|
|
144
|
+
*/
|
|
145
|
+
export declare function isBedrockAnthropicId(bedrockModelId: string): boolean;
|
|
86
146
|
export {};
|
package/dist/utils/agent.d.ts
CHANGED
|
@@ -6,8 +6,11 @@ import type { Agent } from "../agents/index.ts";
|
|
|
6
6
|
* 1. PULLFROG_MODEL env var — resolved through the alias registry first,
|
|
7
7
|
* so values like "anthropic/claude-opus" become "anthropic/claude-opus-4-7".
|
|
8
8
|
* raw specifiers (e.g. "anthropic/claude-opus-4-6") pass through unchanged.
|
|
9
|
-
*
|
|
10
|
-
*
|
|
9
|
+
* always wins — bypasses Bedrock routing entirely. to test a different
|
|
10
|
+
* Bedrock model, change `BEDROCK_MODEL_ID`, not `PULLFROG_MODEL`.
|
|
11
|
+
* 2. slug from repo config / payload → alias registry. routing slugs
|
|
12
|
+
* (e.g. `bedrock/byok`) defer to a separate env var (`BEDROCK_MODEL_ID`).
|
|
13
|
+
* 3. undefined — agent will auto-select.
|
|
11
14
|
*/
|
|
12
15
|
export declare function resolveModel(ctx: {
|
|
13
16
|
slug?: string | undefined;
|
package/dist/utils/apiKeys.d.ts
CHANGED
|
@@ -8,3 +8,21 @@ export declare function validateAgentApiKey(params: {
|
|
|
8
8
|
owner: string;
|
|
9
9
|
name: string;
|
|
10
10
|
}): void;
|
|
11
|
+
/**
|
|
12
|
+
* Detect agent-runtime auth failures that should be reformatted as an actionable
|
|
13
|
+
* key-fix CTA before being shown to the user. Covers the two shapes we see:
|
|
14
|
+
* - missing key (validateAgentApiKey throw): contains MISSING_KEY_MARKER
|
|
15
|
+
* - revoked / invalid key (Claude CLI 401 surfaced via api_error_status):
|
|
16
|
+
* "Invalid API key · Fix external API key" + similar provider variants
|
|
17
|
+
*/
|
|
18
|
+
export declare function isApiKeyAuthError(text: string): boolean;
|
|
19
|
+
/**
|
|
20
|
+
* Friendly Markdown summary for both the missing-key and invalid-key cases.
|
|
21
|
+
* Used in the catch / result-failure paths in `main.ts` to overwrite the raw
|
|
22
|
+
* agent error before it's posted to the PR progress comment.
|
|
23
|
+
*/
|
|
24
|
+
export declare function formatApiKeyErrorSummary(params: {
|
|
25
|
+
owner: string;
|
|
26
|
+
name: string;
|
|
27
|
+
raw: string;
|
|
28
|
+
}): string;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type AgentId } from "../external.ts";
|
|
2
2
|
import type { Mode } from "../modes.ts";
|
|
3
3
|
import type { ResolvedPayload } from "./payload.ts";
|
|
4
|
+
import type { LearningsHeading } from "./runContext.ts";
|
|
4
5
|
import type { RunContextData } from "./runContextData.ts";
|
|
5
6
|
interface InstructionsContext {
|
|
6
7
|
payload: ResolvedPayload;
|
|
@@ -12,6 +13,10 @@ interface InstructionsContext {
|
|
|
12
13
|
* couldn't be seeded for some reason. main.ts always seeds, so in
|
|
13
14
|
* practice this is always set; the null case keeps the type honest. */
|
|
14
15
|
learningsFilePath: string | null;
|
|
16
|
+
/** server-parsed TOC for the body of the learnings tmpfile. rendered
|
|
17
|
+
* inline into the LEARNINGS prompt section so the agent can `read_file`
|
|
18
|
+
* targeted line ranges instead of pulling the whole file into context. */
|
|
19
|
+
learningsHeadings: LearningsHeading[];
|
|
15
20
|
}
|
|
16
21
|
export interface ResolvedInstructions {
|
|
17
22
|
full: string;
|
|
@@ -21,5 +26,19 @@ export interface ResolvedInstructions {
|
|
|
21
26
|
event: string;
|
|
22
27
|
runtime: string;
|
|
23
28
|
}
|
|
29
|
+
/** render the heading list as an indented bullet TOC. ranges shown in
|
|
30
|
+
* parentheses (`(L3-L18)`); the start line is always the heading line
|
|
31
|
+
* itself, so reading the listed range gives the agent the heading +
|
|
32
|
+
* body together. shallowest heading depth in the body sits at the root
|
|
33
|
+
* column; deeper levels indent by `(depth - rootDepth) * 2` spaces. */
|
|
34
|
+
export declare function renderLearningsToc(headings: LearningsHeading[]): string;
|
|
35
|
+
/** assemble the LEARNINGS prompt section: file path + intro + either
|
|
36
|
+
* the rendered heading TOC (when the body has structure) or a no-headings
|
|
37
|
+
* affordance pointing the agent at the reflection turn for restructuring.
|
|
38
|
+
* empty string when the seed step failed and there's no path to surface. */
|
|
39
|
+
export declare function buildLearningsSection(ctx: {
|
|
40
|
+
filePath: string | null;
|
|
41
|
+
headings: LearningsHeading[];
|
|
42
|
+
}): string;
|
|
24
43
|
export declare function resolveInstructions(ctx: InstructionsContext): ResolvedInstructions;
|
|
25
44
|
export {};
|