jeo-code 0.6.29 → 0.6.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.ja.md +1 -1
- package/README.ko.md +1 -1
- package/README.md +1 -1
- package/README.zh.md +1 -1
- package/package.json +1 -1
- package/src/agent/AGENTS.md +1 -1
- package/src/agent/engine.ts +14 -33
- package/src/agent/loop-guards.ts +135 -0
- package/src/agent/tool-registry.ts +0 -54
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
6
6
|
|
|
7
7
|
The README mirrors the latest 5 entries — regenerate with `bun run changelog:sync`.
|
|
8
8
|
|
|
9
|
+
## [0.6.30] - 2026-06-19
|
|
10
|
+
_gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down._
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
- **Loop intermediate-judgment guards extracted into a classified module (`src/agent/loop-guards.ts`).** The mid-run "continue / self-correct / stop" decisions that were inlined across `engine.ts`'s `while` loop as scattered booleans and message strings are now a named `GuardState` discriminated-union taxonomy — jeo's descendant of gjc's `ultragoal-guard` `UltragoalGuardState` pattern. A single frozen `GUARD_LIMITS` object is the source of truth for every threshold (`MAX_REPEAT`, `MAX_FAILURES`, `MAX_REFUSAL_RETRIES`, `MAX_INVALID_CALLS`, `MAX_PARSE_BOUNCES`, `CYCLE_WINDOW`), and pure classifiers (`isVerificationSignal`, `repeatHint`, `nearestToolName`, `classifyDoneGate`) are now independently testable. `engine.ts` still owns all control flow (history mutation, `step++`, `continue`, `return finish(...)`) — only the JUDGMENT moved, so behavior is unchanged (net −19 lines in `engine.ts`). Removed the now-unused `src/agent/tool-registry.ts`.
|
|
14
|
+
|
|
15
|
+
### Verified
|
|
16
|
+
- **`jeo --tmux` has no bun memory leak and does not slow down.** An in-process probe streaming 5,000,000 SGR mouse-report escapes through `queuePromptInputChunk` (10 × 500k, `Bun.gc(true)` between batches) holds RSS flat (133.9 → 135.2 MB, slope ≈0.13 MB/round) with zero prompt-queue accumulation; a real `jeo --tmux` session flooded with 60k live mouse reports via `tmux send-keys` plateaus in RSS (129,456 → 129,472 KB). `jeo --tmux -p` end-to-end creates the profiled session, runs the turn, and tears down cleanly.
|
|
17
|
+
- **Full suite green:** `bun run typecheck` clean and `bun test` 1687 pass / 0 fail across 210 files (includes the new `test/loop-guards.test.ts`, 9 tests, and the signature-only Anthropic replay test).
|
|
18
|
+
|
|
9
19
|
## [0.6.29] - 2026-06-19
|
|
10
20
|
_Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak._
|
|
11
21
|
|
package/README.ja.md
CHANGED
|
@@ -200,11 +200,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
|
|
|
200
200
|
## 変更履歴 (Changelog)
|
|
201
201
|
|
|
202
202
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
203
|
+
- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
|
|
203
204
|
- **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
|
|
204
205
|
- **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
|
|
205
206
|
- **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
|
|
206
207
|
- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
|
|
207
|
-
- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
|
|
208
208
|
|
|
209
209
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
210
210
|
<!-- CHANGELOG:END -->
|
package/README.ko.md
CHANGED
|
@@ -200,11 +200,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
|
|
|
200
200
|
## 변경 이력 (Changelog)
|
|
201
201
|
|
|
202
202
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
203
|
+
- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
|
|
203
204
|
- **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
|
|
204
205
|
- **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
|
|
205
206
|
- **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
|
|
206
207
|
- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
|
|
207
|
-
- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
|
|
208
208
|
|
|
209
209
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
210
210
|
<!-- CHANGELOG:END -->
|
package/README.md
CHANGED
|
@@ -200,11 +200,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
|
|
|
200
200
|
## Changelog
|
|
201
201
|
|
|
202
202
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
203
|
+
- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
|
|
203
204
|
- **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
|
|
204
205
|
- **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
|
|
205
206
|
- **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
|
|
206
207
|
- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
|
|
207
|
-
- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
|
|
208
208
|
|
|
209
209
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
210
210
|
<!-- CHANGELOG:END -->
|
package/README.zh.md
CHANGED
|
@@ -200,11 +200,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
|
|
|
200
200
|
## 更新日志 (Changelog)
|
|
201
201
|
|
|
202
202
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
203
|
+
- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
|
|
203
204
|
- **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
|
|
204
205
|
- **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
|
|
205
206
|
- **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
|
|
206
207
|
- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
|
|
207
|
-
- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
|
|
208
208
|
|
|
209
209
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
210
210
|
<!-- CHANGELOG:END -->
|
package/package.json
CHANGED
package/src/agent/AGENTS.md
CHANGED
|
@@ -17,6 +17,7 @@ The core runtime loop, tool registry, session management, and state persistence
|
|
|
17
17
|
| `hooks.ts` | Brief description of purpose |
|
|
18
18
|
| `json.ts` | Brief description of purpose |
|
|
19
19
|
| `loop.ts` | The primary execution loop orchestrating model calls and tool execution |
|
|
20
|
+
| `loop-guards.ts` | Intermediate-judgment classification (gjc ultragoal-guard parity): named `GuardState` taxonomy, `GUARD_LIMITS` thresholds, and pure classifiers (`isVerificationSignal`, `repeatHint`, `classifyDoneGate`) consumed by `engine.ts` |
|
|
20
21
|
| `memory.ts` | OKF concept-bundle memory: session distill, query-aware budget injection, legacy MEMORY.md migration (`migrateLegacyMemory`) + `JEO_MEMORY_LEGACY` rollback toggle |
|
|
21
22
|
| `memory-okf.ts` | OKF v0.1 format layer: frontmatter parse/serialize, concept IDs, conformance validation |
|
|
22
23
|
| `memory-graph.ts` | Concept cross-link graph: build/expand (1-hop search), broken-link-tolerant lint, optional graphify detection |
|
|
@@ -35,7 +36,6 @@ The core runtime loop, tool registry, session management, and state persistence
|
|
|
35
36
|
| `todo-tool.ts` | Brief description of purpose |
|
|
36
37
|
| `tokenizer.ts` | Brief description of purpose |
|
|
37
38
|
| `tool-output.ts` | Brief description of purpose |
|
|
38
|
-
| `tool-registry.ts` | Brief description of purpose |
|
|
39
39
|
| `tools.ts` | Built-in tool definitions (bash, read, write, edit, etc.) |
|
|
40
40
|
| `web-search.ts` | Brief description of purpose |
|
|
41
41
|
|
package/src/agent/engine.ts
CHANGED
|
@@ -22,6 +22,7 @@ export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFA
|
|
|
22
22
|
import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
|
|
23
23
|
import { historyTokens, trimToolResultsInPlace } from "./compaction";
|
|
24
24
|
import { jeoEnv } from "../util/env";
|
|
25
|
+
import { GUARD_LIMITS, isVerificationSignal, repeatHint, classifyDoneGate } from "./loop-guards";
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
async function invokeCallLlm(history: Message[], options: {
|
|
@@ -378,29 +379,15 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
378
379
|
} catch { /* best-effort; fall through to the plain stop message */ }
|
|
379
380
|
return finish({ done: false, steps: step, doneReason: `Stopped: ${stopReason}` });
|
|
380
381
|
};
|
|
381
|
-
// Result-aware repeat nudge (A): tell the model WHY repeating won't help and what to
|
|
382
|
-
// try instead, tailored to the repeated tool and its last actual result.
|
|
383
|
-
const repeatHint = (tool: string, prev?: { success: boolean; output: string }): string => {
|
|
384
|
-
const out = prev?.output ?? "";
|
|
385
|
-
const empty = !prev || !prev.success || out.trim() === "" || /no match|0 match|no result|not found|no file/i.test(out);
|
|
386
|
-
if (tool === "search" || tool === "find" || tool === "ls") {
|
|
387
|
-
return empty
|
|
388
|
-
? `That '${tool}' returned nothing useful and will again — BROADEN it (a looser pattern, a parent directory, or a different tool such as ${tool === "search" ? "find" : "search"}), or call done if this lookup isn't needed.`
|
|
389
|
-
: `That '${tool}' already returned results — open one of the hits with read, or move on; re-running it changes nothing.`;
|
|
390
|
-
}
|
|
391
|
-
if (tool === "read") return `You already read that and its content is unchanged — use what you read, or read a DIFFERENT file.`;
|
|
392
|
-
if (tool === "bash") return `That command already ran with the same output — change the command, or call done.`;
|
|
393
|
-
return `That call's result is unchanged — take a different action, or call done.`;
|
|
394
|
-
};
|
|
395
382
|
// No-progress guard: weak/local models often repeat the same tool call without
|
|
396
383
|
// ever emitting `done`. Two escalating corrections (B), then a consolidated stop.
|
|
397
|
-
const MAX_REPEAT =
|
|
384
|
+
const MAX_REPEAT = GUARD_LIMITS.MAX_REPEAT;
|
|
398
385
|
// Last executed step's per-call results — fed to repeatHint so a corrective bounce
|
|
399
386
|
// can cite the repeated call's ACTUAL last outcome (A).
|
|
400
387
|
let lastResults: { success: boolean; output: string; executed: boolean }[] = [];
|
|
401
388
|
// Consecutive-failure guard: a model that keeps emitting *different* but failing
|
|
402
389
|
// calls (bad edits, failing commands) would otherwise burn the whole step budget.
|
|
403
|
-
const MAX_FAILURES =
|
|
390
|
+
const MAX_FAILURES = GUARD_LIMITS.MAX_FAILURES;
|
|
404
391
|
let consecutiveFailures = 0;
|
|
405
392
|
// done-verification guard (plan/gjc-inheritance.md B4, gjc ultragoal-guard 경량 계승):
|
|
406
393
|
// a turn that MUTATED files but shows no verification signal gets ONE pushback on
|
|
@@ -424,16 +411,15 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
424
411
|
// as-is, then once more with an explicit re-grounding note; only a third
|
|
425
412
|
// refusal in the turn surfaces the (friendly) error. Bounded per turn so a
|
|
426
413
|
// genuinely refused request can never burn billed calls in a loop.
|
|
427
|
-
const MAX_REFUSAL_RETRIES =
|
|
414
|
+
const MAX_REFUSAL_RETRIES = GUARD_LIMITS.MAX_REFUSAL_RETRIES;
|
|
428
415
|
let refusalRetries = 0;
|
|
429
|
-
const VERIFY_SIGNAL_RE = /\b(test|tests|tsc|typecheck|lint|build|check|spec|pytest|vitest|jest)\b/i;
|
|
430
416
|
let lastSig = "";
|
|
431
417
|
let repeatCount = 0;
|
|
432
418
|
// Cycle guard (the A↔B ping-pong the exact-repeat guard cannot see): the recent
|
|
433
419
|
// executed step signatures, as fixed-size digests. When a full window cycles
|
|
434
420
|
// through ≤2 distinct calls, bounce ONCE with an explicit correction; a spin that
|
|
435
421
|
// persists through the correction stops the turn.
|
|
436
|
-
const CYCLE_WINDOW =
|
|
422
|
+
const CYCLE_WINDOW = GUARD_LIMITS.CYCLE_WINDOW;
|
|
437
423
|
const recentStepSigs: string[] = [];
|
|
438
424
|
let cycleBounceUsed = false;
|
|
439
425
|
// Invalid-tool-call guard: a model that returns JSON without a usable `tool`
|
|
@@ -441,10 +427,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
441
427
|
let invalidToolCalls = 0;
|
|
442
428
|
// A JSON reply with no usable `tool` field can't drive the loop — stop sooner than the
|
|
443
429
|
// repeat-spin guard (no escalating correction helps a model that isn't producing a call).
|
|
444
|
-
const MAX_INVALID_CALLS =
|
|
430
|
+
const MAX_INVALID_CALLS = GUARD_LIMITS.MAX_INVALID_CALLS;
|
|
445
431
|
// Prose-bounce guard: after this many invalid-JSON corrections, salvage the
|
|
446
432
|
// model's text as the final answer instead of burning the whole step budget.
|
|
447
|
-
const MAX_PARSE_BOUNCES =
|
|
433
|
+
const MAX_PARSE_BOUNCES = GUARD_LIMITS.MAX_PARSE_BOUNCES;
|
|
448
434
|
let parseFailures = 0;
|
|
449
435
|
while (true) {
|
|
450
436
|
if (turnBudgetMs > 0 && Date.now() - turnStartedAt > turnBudgetMs) {
|
|
@@ -703,19 +689,14 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
703
689
|
ev.onAssistant?.(responseText, toolCalls[0]);
|
|
704
690
|
|
|
705
691
|
if (toolCalls.length === 1 && toolCalls[0].tool === "done") {
|
|
706
|
-
|
|
692
|
+
// done-verification gate — jeo's descendant of gjc's ultragoal-guard completion
|
|
693
|
+
// state machine (plan/gjc-inheritance.md B4). The classifier owns the JUDGMENT
|
|
694
|
+
// (which named state, which message); the loop owns the once-pushback latch.
|
|
695
|
+
const doneGate = classifyDoneGate({ sawMutation, sawVerification, pendingHookFailure });
|
|
696
|
+
if (doneGate.block && !donePushbackUsed) {
|
|
707
697
|
donePushbackUsed = true; // second done always passes — escape hatch
|
|
708
698
|
pushAssistantTurn(history, responseText, reasonBuf, artifactBuf);
|
|
709
|
-
history.push({
|
|
710
|
-
role: "user",
|
|
711
|
-
content: pendingHookFailure !== null
|
|
712
|
-
? `Your latest mutation left the post-turn hook "${pendingHookFailure}" FAILING (non-zero exit) — its diagnostics were shown in the tool result above. ` +
|
|
713
|
-
"Fix the reported problems (the hook re-runs on your next mutation), then call done. " +
|
|
714
|
-
"If the hook failure is a false positive, call done again and say why in the reason."
|
|
715
|
-
: "You modified files this turn but ran NO verification (no test/build/typecheck command succeeded). " +
|
|
716
|
-
"Run the narrowest command that proves your change works, then call done. " +
|
|
717
|
-
"If verification is genuinely not applicable (docs/config-only change), call done again and say why in the reason.",
|
|
718
|
-
});
|
|
699
|
+
history.push({ role: "user", content: doneGate.message });
|
|
719
700
|
step++;
|
|
720
701
|
continue;
|
|
721
702
|
}
|
|
@@ -1039,7 +1020,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
1039
1020
|
if (t === "write" || t === "edit") sawMutation = true;
|
|
1040
1021
|
else if (t === "bash") {
|
|
1041
1022
|
const cmd = String(toolCalls[i].arguments?.command ?? "");
|
|
1042
|
-
if (
|
|
1023
|
+
if (isVerificationSignal(cmd, results[i].output)) sawVerification = true;
|
|
1043
1024
|
}
|
|
1044
1025
|
}
|
|
1045
1026
|
// F6 (round 4 architect, Low): judge the step by its NON-TRIVIAL calls — a
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intermediate-judgment guards for the agent loop — the mid-run "should this turn
|
|
3
|
+
* continue, correct itself, or stop" decisions that run between model calls.
|
|
4
|
+
*
|
|
5
|
+
* gjc keeps this concern in its own layer: `gjc-runtime/ultragoal-guard.ts` computes a
|
|
6
|
+
* named `UltragoalGuardState` discriminated union PURELY, and the runtime merely acts on
|
|
7
|
+
* the verdict. jeo previously inlined the same logic inside `engine.ts`'s `while` loop as
|
|
8
|
+
* scattered booleans and message strings. This module gives jeo the same classification:
|
|
9
|
+
* a named `GuardState` taxonomy plus pure, independently-testable classifier functions.
|
|
10
|
+
* `engine.ts` still owns the control flow (history mutation, `step++`, `continue`,
|
|
11
|
+
* `return finish(...)`) — only the JUDGMENT moves here, so behavior is unchanged.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Named taxonomy of the loop's intermediate judgments — jeo's descendant of gjc's
|
|
16
|
+
* `UltragoalGuardState`. Each member names one decision the loop can reach mid-turn.
|
|
17
|
+
*/
|
|
18
|
+
export type GuardState =
|
|
19
|
+
| "ok" // proceed: emit / execute the tool call as-is
|
|
20
|
+
| "repeat_correct" // exact-repeat detected → ONE corrective bounce (skip execution)
|
|
21
|
+
| "repeat_stop" // exact-repeat survived the correction → consolidate-stop
|
|
22
|
+
| "cycle_correct" // A↔B alternation detected → ONE corrective bounce
|
|
23
|
+
| "cycle_stop" // cycle survived the correction → consolidate-stop
|
|
24
|
+
| "consecutive_failure_stop" // MAX_FAILURES different-but-failing steps → stop
|
|
25
|
+
| "invalid_tool_stop" // MAX_INVALID_CALLS replies with no usable tool field → stop
|
|
26
|
+
| "parse_salvage" // repeated non-JSON prose → salvage the text as the final answer
|
|
27
|
+
| "context_overflow_retry" // provider reported context overflow → ONE trim + retry
|
|
28
|
+
| "refusal_retry" // transient safety refusal → bounded resend ladder
|
|
29
|
+
| "done_unverified" // mutated files, no verification signal → pushback on done
|
|
30
|
+
| "done_hook_failing" // post-turn hook still failing → pushback on done
|
|
31
|
+
| "done_ok"; // done accepted — the turn is finished
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Bounded thresholds for every loop guard — the single, named source of truth.
|
|
35
|
+
* Kept in one frozen object so the limits are discoverable and testable instead of
|
|
36
|
+
* sprinkled as bare literals through the loop body.
|
|
37
|
+
*/
|
|
38
|
+
export const GUARD_LIMITS = Object.freeze({
|
|
39
|
+
/** Identical step repeats tolerated before a consolidated stop (with corrections en route). */
|
|
40
|
+
MAX_REPEAT: 4,
|
|
41
|
+
/** Consecutive different-but-failing steps before the turn stops. */
|
|
42
|
+
MAX_FAILURES: 5,
|
|
43
|
+
/** Safety-refusal resends per turn before surfacing the friendly error. */
|
|
44
|
+
MAX_REFUSAL_RETRIES: 3,
|
|
45
|
+
/** Replies with no usable `tool`/`tools` field before the turn stops. */
|
|
46
|
+
MAX_INVALID_CALLS: 3,
|
|
47
|
+
/** Consecutive non-JSON parse failures before the prose is salvaged as the answer. */
|
|
48
|
+
MAX_PARSE_BOUNCES: 2,
|
|
49
|
+
/** Recent-signature window scanned for an A↔B (≤2 distinct calls) cycle. */
|
|
50
|
+
CYCLE_WINDOW: 6,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Commands (or their output) that count as a verification signal: a test, build,
|
|
55
|
+
* typecheck, or lint invocation. The done-verification guard treats a turn that mutated
|
|
56
|
+
* files without any such signal as "unverified".
|
|
57
|
+
*/
|
|
58
|
+
export const VERIFY_SIGNAL_RE = /\b(test|tests|tsc|typecheck|lint|build|check|spec|pytest|vitest|jest)\b/i;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* True when a bash command (or the head of its output) proves the work was verified.
|
|
62
|
+
* Output is examined only up to the first 2000 chars — enough to catch a tool runner's
|
|
63
|
+
* banner without rescanning a megabyte of logs.
|
|
64
|
+
*/
|
|
65
|
+
export function isVerificationSignal(cmd: string, output = ""): boolean {
|
|
66
|
+
return VERIFY_SIGNAL_RE.test(cmd) || VERIFY_SIGNAL_RE.test(output.slice(0, 2000));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Result-aware repeat nudge: tells the model WHY repeating the call won't help and what
|
|
71
|
+
* to try instead, tailored to the repeated tool and its last actual result.
|
|
72
|
+
*/
|
|
73
|
+
export function repeatHint(tool: string, prev?: { success: boolean; output: string }): string {
|
|
74
|
+
const out = prev?.output ?? "";
|
|
75
|
+
const empty = !prev || !prev.success || out.trim() === "" || /no match|0 match|no result|not found|no file/i.test(out);
|
|
76
|
+
if (tool === "search" || tool === "find" || tool === "ls") {
|
|
77
|
+
return empty
|
|
78
|
+
? `That '${tool}' returned nothing useful and will again — BROADEN it (a looser pattern, a parent directory, or a different tool such as ${tool === "search" ? "find" : "search"}), or call done if this lookup isn't needed.`
|
|
79
|
+
: `That '${tool}' already returned results — open one of the hits with read, or move on; re-running it changes nothing.`;
|
|
80
|
+
}
|
|
81
|
+
if (tool === "read") return `You already read that and its content is unchanged — use what you read, or read a DIFFERENT file.`;
|
|
82
|
+
if (tool === "bash") return `That command already ran with the same output — change the command, or call done.`;
|
|
83
|
+
return `That call's result is unchanged — take a different action, or call done.`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Inputs for the done-verification gate (jeo's descendant of gjc's ultragoal-guard). */
|
|
87
|
+
export interface DoneGateInput {
|
|
88
|
+
/** A write/edit succeeded this turn. */
|
|
89
|
+
sawMutation: boolean;
|
|
90
|
+
/** A test/build/typecheck/lint command succeeded this turn. */
|
|
91
|
+
sawVerification: boolean;
|
|
92
|
+
/** The run-command of the most recent still-failing post-turn hook, or null. */
|
|
93
|
+
pendingHookFailure: string | null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Verdict from {@link classifyDoneGate}: whether to bounce `done`, and the message. */
|
|
97
|
+
export interface DoneGateVerdict {
|
|
98
|
+
state: Extract<GuardState, "done_ok" | "done_unverified" | "done_hook_failing">;
|
|
99
|
+
/** When true, `done` should be bounced ONCE with `message` (the caller owns the once-gate). */
|
|
100
|
+
block: boolean;
|
|
101
|
+
/** Corrective message to push back on `done`; empty when `state === "done_ok"`. */
|
|
102
|
+
message: string;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Classify whether a `done` should be accepted or bounced — the direct descendant of
|
|
107
|
+
* gjc's `ultragoal-guard` completion gate (plan/gjc-inheritance.md B4).
|
|
108
|
+
*
|
|
109
|
+
* A turn that MUTATED files but has either NO verification signal or a still-failing
|
|
110
|
+
* post-turn hook is blocked ONCE. The caller owns the single-pushback latch; a second
|
|
111
|
+
* `done` always passes (the escape hatch for genuinely-unverifiable docs/config changes).
|
|
112
|
+
*/
|
|
113
|
+
export function classifyDoneGate(input: DoneGateInput): DoneGateVerdict {
|
|
114
|
+
const hookFailing = input.pendingHookFailure !== null;
|
|
115
|
+
const block = input.sawMutation && (!input.sawVerification || hookFailing);
|
|
116
|
+
if (!block) return { state: "done_ok", block: false, message: "" };
|
|
117
|
+
if (hookFailing) {
|
|
118
|
+
return {
|
|
119
|
+
state: "done_hook_failing",
|
|
120
|
+
block: true,
|
|
121
|
+
message:
|
|
122
|
+
`Your latest mutation left the post-turn hook "${input.pendingHookFailure}" FAILING (non-zero exit) — its diagnostics were shown in the tool result above. ` +
|
|
123
|
+
"Fix the reported problems (the hook re-runs on your next mutation), then call done. " +
|
|
124
|
+
"If the hook failure is a false positive, call done again and say why in the reason.",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
return {
|
|
128
|
+
state: "done_unverified",
|
|
129
|
+
block: true,
|
|
130
|
+
message:
|
|
131
|
+
"You modified files this turn but ran NO verification (no test/build/typecheck command succeeded). " +
|
|
132
|
+
"Run the narrowest command that proves your change works, then call done. " +
|
|
133
|
+
"If verification is genuinely not applicable (docs/config-only change), call done again and say why in the reason.",
|
|
134
|
+
};
|
|
135
|
+
}
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { readTool, writeTool, editTool, bashTool, findTool, searchTool, lsTool, type ToolResult } from "./tools";
|
|
2
|
-
|
|
3
|
-
export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
|
|
4
|
-
|
|
5
|
-
export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
|
|
6
|
-
read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
|
|
7
|
-
write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
|
|
8
|
-
edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
|
|
9
|
-
bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
|
|
10
|
-
find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
|
|
11
|
-
search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
|
|
12
|
-
ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
export const TOOL_PROTOCOL = [
|
|
16
|
-
"You have these tools (call exactly ONE per step):",
|
|
17
|
-
"1. read {filePath, lineRange?, raw?} — read a file",
|
|
18
|
-
"2. write {filePath, content} — create/overwrite a file",
|
|
19
|
-
"3. edit {filePath, editBlock} — replace/insert lines",
|
|
20
|
-
"4. bash {command, timeoutMs?, cwd?, env?} — run a shell command",
|
|
21
|
-
"5. find {globPattern} — find files by name",
|
|
22
|
-
"6. search {pattern, globPattern?, ignoreCase?, context?, maxMatches?} — grep",
|
|
23
|
-
"7. ls {dirPath} — list a directory",
|
|
24
|
-
"8. done {reason?} — call when done",
|
|
25
|
-
"",
|
|
26
|
-
"Reply with STRICT JSON only:",
|
|
27
|
-
'{ "tool": "<name>", "arguments": { ... } }',
|
|
28
|
-
].join("\n");
|
|
29
|
-
|
|
30
|
-
export const READONLY_TOOL_PROTOCOL = [
|
|
31
|
-
"You have these READ-ONLY tools:",
|
|
32
|
-
"1. read {filePath, lineRange?} — read a file",
|
|
33
|
-
"2. find {globPattern} — find files by name",
|
|
34
|
-
"3. search {pattern, globPattern?, ignoreCase?} — grep",
|
|
35
|
-
"4. ls {dirPath} — list a directory",
|
|
36
|
-
"5. done {reason?} — call when complete",
|
|
37
|
-
"",
|
|
38
|
-
"Reply with STRICT JSON only:",
|
|
39
|
-
'{ "tool": "<name>", "arguments": { ... } }',
|
|
40
|
-
].join("\n");
|
|
41
|
-
|
|
42
|
-
export function nearestToolName(name: string, known: string[]): string | undefined {
|
|
43
|
-
const want = name.trim().toLowerCase();
|
|
44
|
-
if (!want) return undefined;
|
|
45
|
-
let best: string | undefined;
|
|
46
|
-
let bestD = Infinity;
|
|
47
|
-
for (const k of known) {
|
|
48
|
-
const kl = k.toLowerCase();
|
|
49
|
-
if (kl === want) return k;
|
|
50
|
-
const d = kl.startsWith(want) || want.startsWith(kl) ? 1 : 10;
|
|
51
|
-
if (d < bestD) { bestD = d; best = k; }
|
|
52
|
-
}
|
|
53
|
-
return bestD <= 2 ? best : undefined;
|
|
54
|
-
}
|