@agjs/tsforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/tsforge.js +2 -0
- package/package.json +35 -0
- package/src/agent/agent.constants.ts +382 -0
- package/src/agent/agent.types.ts +34 -0
- package/src/agent/index.ts +4 -0
- package/src/agent/model-agent.ts +297 -0
- package/src/agent/tool-repair.ts +194 -0
- package/src/agent/tools.ts +190 -0
- package/src/browser/checks.ts +96 -0
- package/src/browser/index.ts +8 -0
- package/src/browser/oracle.ts +303 -0
- package/src/classify.ts +48 -0
- package/src/cli.ts +1333 -0
- package/src/config/config.constants.ts +9 -0
- package/src/config/flags.ts +32 -0
- package/src/config/index.ts +8 -0
- package/src/config/tsforge-config.ts +301 -0
- package/src/constitution/baseline.ts +257 -0
- package/src/detect-gate.ts +498 -0
- package/src/eval/eval.types.ts +36 -0
- package/src/eval/index.ts +3 -0
- package/src/eval/judge.ts +62 -0
- package/src/eval/score.ts +39 -0
- package/src/files/create.ts +22 -0
- package/src/files/edit.ts +193 -0
- package/src/files/files.constants.ts +11 -0
- package/src/files/files.types.ts +81 -0
- package/src/files/hashline-format.ts +110 -0
- package/src/files/hashline.ts +689 -0
- package/src/files/index.ts +19 -0
- package/src/index.ts +8 -0
- package/src/inference/index.ts +6 -0
- package/src/inference/inference.constants.ts +34 -0
- package/src/inference/inference.types.ts +123 -0
- package/src/inference/openai-compatible.ts +113 -0
- package/src/inference/stream-guard.ts +161 -0
- package/src/inference/stream.ts +370 -0
- package/src/inference/transport.ts +78 -0
- package/src/inference/wire.ts +0 -0
- package/src/lib/fs/fs.ts +126 -0
- package/src/lib/fs/fs.types.ts +5 -0
- package/src/lib/fs/index.ts +3 -0
- package/src/lib/fs/process.ts +146 -0
- package/src/lib/guards/guards.ts +9 -0
- package/src/lib/guards/index.ts +1 -0
- package/src/lib/json/index.ts +1 -0
- package/src/lib/json/json.ts +12 -0
- package/src/lib/scope/index.ts +2 -0
- package/src/lib/scope/scope.constants.ts +3 -0
- package/src/lib/scope/scope.ts +40 -0
- package/src/loop/astgrep-fix.ts +228 -0
- package/src/loop/feedback/feedback.ts +138 -0
- package/src/loop/feedback/index.ts +8 -0
- package/src/loop/feedback/meta-rule-docs.ts +41 -0
- package/src/loop/feedback/meta-rule-feedback.ts +61 -0
- package/src/loop/feedback/rule-docs.generated.json +112 -0
- package/src/loop/feedback/rule-docs.ts +342 -0
- package/src/loop/index.ts +19 -0
- package/src/loop/loop.constants.ts +68 -0
- package/src/loop/loop.types.ts +99 -0
- package/src/loop/prompt/index.ts +2 -0
- package/src/loop/prompt/project-map.ts +69 -0
- package/src/loop/prompt/prompt.ts +107 -0
- package/src/loop/quality.ts +174 -0
- package/src/loop/rule-docs.generated.json +367 -0
- package/src/loop/run-spec.ts +88 -0
- package/src/loop/run.ts +400 -0
- package/src/loop/session.ts +1410 -0
- package/src/loop/tools/add-dependency.ts +71 -0
- package/src/loop/tools/condense.ts +498 -0
- package/src/loop/tools/edit-hashline.ts +80 -0
- package/src/loop/tools/execute-tool.ts +80 -0
- package/src/loop/tools/file-ops.ts +323 -0
- package/src/loop/tools/index.ts +2 -0
- package/src/loop/tools/lsp-ops.ts +222 -0
- package/src/loop/tools/scaffold-routes.ts +68 -0
- package/src/loop/tools/scaffold-ui.ts +62 -0
- package/src/loop/tools/scaffold-web.ts +35 -0
- package/src/loop/tools/tool-context.ts +126 -0
- package/src/loop/ttsr-defaults.ts +53 -0
- package/src/loop/ttsr.ts +322 -0
- package/src/loop/turn.ts +856 -0
- package/src/lsp/index.ts +2 -0
- package/src/lsp/lsp.types.ts +56 -0
- package/src/lsp/service.ts +500 -0
- package/src/meta-rules/context.ts +195 -0
- package/src/meta-rules/index.ts +9 -0
- package/src/meta-rules/meta-rules.types.ts +47 -0
- package/src/meta-rules/parsers/package-json-parser.ts +51 -0
- package/src/meta-rules/registry.ts +37 -0
- package/src/meta-rules/rules/ci/workflow-actions-pinned.ts +59 -0
- package/src/meta-rules/rules/ci/workflow-runner-pinned.ts +57 -0
- package/src/meta-rules/rules/ci/workflow-timeout-required.ts +114 -0
- package/src/meta-rules/rules/config/tsconfig-paths-exist.ts +117 -0
- package/src/meta-rules/rules/config/tsconfig-strict.ts +91 -0
- package/src/meta-rules/rules/source-text/no-eslint-disable-comments.ts +34 -0
- package/src/meta-rules/rules/source-text/no-ts-suppressions.ts +38 -0
- package/src/meta-rules/rules/supply-chain/no-overlapping-libs.ts +57 -0
- package/src/meta-rules/rules/supply-chain/package-exact-deps.ts +55 -0
- package/src/meta-rules/rules/testing/test-sibling-required.ts +110 -0
- package/src/meta-rules/runner.ts +64 -0
- package/src/models-config.ts +196 -0
- package/src/render/ansi.ts +289 -0
- package/src/render/banner.ts +113 -0
- package/src/render/box.ts +134 -0
- package/src/render/index.ts +7 -0
- package/src/render/markdown.ts +123 -0
- package/src/render/render.types.ts +21 -0
- package/src/render/stream-markdown.ts +128 -0
- package/src/render/style.ts +26 -0
- package/src/rule-packs/bullmq/index.ts +39 -0
- package/src/rule-packs/bullmq/rules/index.ts +7 -0
- package/src/rule-packs/bullmq/rules/job-name-must-be-constant.ts +141 -0
- package/src/rule-packs/bullmq/rules/job-options-must-set-attempts.ts +174 -0
- package/src/rule-packs/bullmq/rules/no-blocking-concurrency-zero.ts +103 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeoncomplete.ts +130 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeonfail.ts +130 -0
- package/src/rule-packs/bullmq/rules/worker-must-implement-close.ts +182 -0
- package/src/rule-packs/bullmq/rules/worker-must-listen-failed.ts +140 -0
- package/src/rule-packs/bullmq/utils.ts +334 -0
- package/src/rule-packs/code-flow/index.ts +25 -0
- package/src/rule-packs/code-flow/rules/index.ts +3 -0
- package/src/rule-packs/code-flow/rules/no-bare-date-now.ts +138 -0
- package/src/rule-packs/code-flow/rules/no-template-trim-empty-ternary.ts +87 -0
- package/src/rule-packs/code-flow/rules/prefer-early-return.ts +80 -0
- package/src/rule-packs/code-flow/utils/prefer-early-return.ts +132 -0
- package/src/rule-packs/comment-hygiene/index.ts +25 -0
- package/src/rule-packs/comment-hygiene/rules/index.ts +3 -0
- package/src/rule-packs/comment-hygiene/rules/no-historical-comments.ts +102 -0
- package/src/rule-packs/comment-hygiene/rules/no-narration-comments.ts +83 -0
- package/src/rule-packs/comment-hygiene/rules/no-pr-reference-comments.ts +90 -0
- package/src/rule-packs/create-rule.ts +9 -0
- package/src/rule-packs/drizzle/index.ts +41 -0
- package/src/rule-packs/drizzle/rules/account-scoped-tables-require-where.ts +371 -0
- package/src/rule-packs/drizzle/rules/index.ts +8 -0
- package/src/rule-packs/drizzle/rules/no-nested-db-transaction.ts +127 -0
- package/src/rule-packs/drizzle/rules/no-raw-sql-outside-allowlist.ts +100 -0
- package/src/rule-packs/drizzle/rules/relations-must-cover-fks.ts +209 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-not-import-driver.ts +127 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-only-export-schema.ts +149 -0
- package/src/rule-packs/drizzle/rules/tables-must-have-timestamps.ts +312 -0
- package/src/rule-packs/drizzle/rules/timestamp-must-specify-mode.ts +166 -0
- package/src/rule-packs/drizzle/utils.ts +115 -0
- package/src/rule-packs/elysia/index.ts +43 -0
- package/src/rule-packs/elysia/rules/consistent-status-via-set.ts +69 -0
- package/src/rule-packs/elysia/rules/no-decorate-state-collision.ts +276 -0
- package/src/rule-packs/elysia/rules/no-separate-model-interfaces.ts +144 -0
- package/src/rule-packs/elysia/rules/prefer-destructured-context.ts +155 -0
- package/src/rule-packs/elysia/rules/prefer-direct-return.ts +176 -0
- package/src/rule-packs/elysia/rules/prefer-static-services.ts +159 -0
- package/src/rule-packs/elysia/rules/prefer-throw-status.ts +151 -0
- package/src/rule-packs/elysia/rules/require-hooks-before-routes.ts +209 -0
- package/src/rule-packs/elysia/rules/require-plugin-name.ts +107 -0
- package/src/rule-packs/elysia/utils/elysiaChain.ts +306 -0
- package/src/rule-packs/env-access/index.ts +23 -0
- package/src/rule-packs/env-access/rules/index.ts +2 -0
- package/src/rule-packs/env-access/rules/no-direct-process-env.ts +133 -0
- package/src/rule-packs/env-access/rules/no-process-exit.ts +95 -0
- package/src/rule-packs/i18n-keys/index.ts +19 -0
- package/src/rule-packs/i18n-keys/rules/static-translation-key-exists.ts +173 -0
- package/src/rule-packs/index.ts +139 -0
- package/src/rule-packs/jwt-cookies/index.ts +25 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-httponly.ts +150 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-secure-in-prod.ts +149 -0
- package/src/rule-packs/jwt-cookies/rules/bcrypt-rounds-min.ts +195 -0
- package/src/rule-packs/jwt-cookies/utils.ts +188 -0
- package/src/rule-packs/oauth-security/index.ts +25 -0
- package/src/rule-packs/oauth-security/rules/pkce-required-for-oidc.ts +296 -0
- package/src/rule-packs/oauth-security/rules/state-must-be-redis-backed.ts +193 -0
- package/src/rule-packs/oauth-security/rules/state-ttl-bounded.ts +219 -0
- package/src/rule-packs/oauth-security/utils.ts +127 -0
- package/src/rule-packs/react-component-architecture/index.ts +35 -0
- package/src/rule-packs/react-component-architecture/rules/component-folder-structure.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/forwardref-display-name.ts +93 -0
- package/src/rule-packs/react-component-architecture/rules/index-must-reexport-default.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/max-hooks-per-file.ts +122 -0
- package/src/rule-packs/react-component-architecture/rules/no-cross-feature-imports.ts +170 -0
- package/src/rule-packs/react-component-architecture/rules/no-inline-jsx-functions.ts +66 -0
- package/src/rule-packs/react-component-architecture/utils.ts +47 -0
- package/src/rule-packs/rule-packs.types.ts +18 -0
- package/src/rule-packs/structured-logging/index.ts +26 -0
- package/src/rule-packs/structured-logging/rules/mask-pii-fields.ts +221 -0
- package/src/rule-packs/structured-logging/rules/no-error-stringify.ts +217 -0
- package/src/rule-packs/structured-logging/rules/require-event-field.ts +136 -0
- package/src/rule-packs/structured-logging/utils/logger.ts +104 -0
- package/src/rule-packs/tanstack-query/index.ts +20 -0
- package/src/rule-packs/tanstack-query/rules/prefix-query-key-must-use-set-queries-data.ts +321 -0
- package/src/rule-packs/test-conventions/index.ts +23 -0
- package/src/rule-packs/test-conventions/rules/index.ts +2 -0
- package/src/rule-packs/test-conventions/rules/no-focused-tests.ts +170 -0
- package/src/rule-packs/test-conventions/rules/test-file-mirrors-source.ts +127 -0
- package/src/rule-packs/utils.ts +142 -0
- package/src/session-store.ts +359 -0
- package/src/spec/generate-tests.ts +213 -0
- package/src/spec/index.ts +5 -0
- package/src/spec/parse.ts +152 -0
- package/src/spec/review-tests.ts +162 -0
- package/src/spec/spec.constants.ts +13 -0
- package/src/spec/spec.types.ts +79 -0
- package/src/stack-detection/detect.ts +246 -0
- package/src/stack-detection/index.ts +3 -0
- package/src/stack-detection/packs.ts +174 -0
- package/src/stack-detection/stack-detection.types.ts +47 -0
- package/src/validate/accept.ts +49 -0
- package/src/validate/errors.ts +35 -0
- package/src/validate/index.ts +12 -0
- package/src/validate/parse.ts +148 -0
- package/src/validate/run-tests.ts +59 -0
- package/src/validate/validate.ts +40 -0
- package/src/validate/validate.types.ts +52 -0
- package/src/web-components.ts +638 -0
- package/src/web-coverage.ts +89 -0
- package/src/web-routes.ts +151 -0
- package/src/web-templates.ts +1011 -0
- package/strict.eslint.config.mjs +84 -0
- package/strict.web.eslint.config.mjs +185 -0
|
@@ -0,0 +1,1410 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IChatMessage,
|
|
3
|
+
IModelResponse,
|
|
4
|
+
IProvider,
|
|
5
|
+
ITokenUsage,
|
|
6
|
+
} from "../inference";
|
|
7
|
+
import type { ITask } from "../spec";
|
|
8
|
+
import type { FileLinter } from "../detect-gate";
|
|
9
|
+
import {
|
|
10
|
+
SCAFFOLD_UI_TOOL,
|
|
11
|
+
SCAFFOLD_ROUTES_TOOL,
|
|
12
|
+
SCAFFOLD_WEB_TOOL,
|
|
13
|
+
SEARCH_TOOL,
|
|
14
|
+
ADD_DEPENDENCY_TOOL,
|
|
15
|
+
YIELD_STATUS_TOOL,
|
|
16
|
+
READ_ONLY_TOOL_NAMES,
|
|
17
|
+
TOOL_NAME,
|
|
18
|
+
} from "../agent";
|
|
19
|
+
import { flags } from "../config";
|
|
20
|
+
import { readFiles } from "../lib/fs";
|
|
21
|
+
import { validate, type ErrorParser } from "../validate";
|
|
22
|
+
import { detectStack } from "../stack-detection";
|
|
23
|
+
import {
|
|
24
|
+
loadTsforgeConfig,
|
|
25
|
+
normalizeRuleOverrides,
|
|
26
|
+
resolveActivePacks,
|
|
27
|
+
} from "../config/tsforge-config";
|
|
28
|
+
import { LOOP_LIMITS, RUN_STATUS } from "./loop.constants";
|
|
29
|
+
import type { Reporter } from "./loop.types";
|
|
30
|
+
import { CHAT_SYSTEM, COMPACT_SYSTEM } from "./prompt";
|
|
31
|
+
import {
|
|
32
|
+
buildTsService,
|
|
33
|
+
BUILD_NUDGE,
|
|
34
|
+
emitTiming,
|
|
35
|
+
type ILoopCtx,
|
|
36
|
+
type ILoopState,
|
|
37
|
+
isPhantomRouteError,
|
|
38
|
+
NO_TOOL_CALL_NUDGE,
|
|
39
|
+
runToolCalls,
|
|
40
|
+
settleGate,
|
|
41
|
+
toolsFor,
|
|
42
|
+
} from "./turn";
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* A persistent, tool-using conversation against a working directory — the engine
|
|
46
|
+
* behind the interactive CLI. Unlike `runTask` (one RED-first task driven to
|
|
47
|
+
* green and returned), a Session lives across many user messages: each `send()`
|
|
48
|
+
* runs the model until it stops calling tools, then — IF a gate is configured —
|
|
49
|
+
* the deterministic gate confirms "done" (green = accept, red = errors fed back,
|
|
50
|
+
* keep going). With no gate it's a plain conversational turn. Same `turn.ts`
|
|
51
|
+
* primitives as `runTask`, so there is one tool-loop and one gate, not two.
|
|
52
|
+
*/
|
|
53
|
+
export interface ISessionConfig {
|
|
54
|
+
provider: IProvider;
|
|
55
|
+
/** Working directory the agent operates in. */
|
|
56
|
+
cwd: string;
|
|
57
|
+
/** Editable scope — edits/creates outside these are rejected. Empty = read-only. */
|
|
58
|
+
files?: string[];
|
|
59
|
+
/** Gate command. When set, a turn that ends without tool calls is gate-confirmed. */
|
|
60
|
+
accept?: string;
|
|
61
|
+
/** Auto-fix command run before re-validating (e.g. `eslint --fix`). */
|
|
62
|
+
fix?: string;
|
|
63
|
+
/** Read-only context files. */
|
|
64
|
+
context?: string[];
|
|
65
|
+
parse?: ErrorParser;
|
|
66
|
+
report?: Reporter;
|
|
67
|
+
temperature?: number;
|
|
68
|
+
enableThinking?: boolean;
|
|
69
|
+
thinkingTokenBudget?: number;
|
|
70
|
+
/** Per-`send` turn cap (default LOOP_LIMITS.maxTurns). */
|
|
71
|
+
maxTurns?: number;
|
|
72
|
+
/** Resume from a saved conversation (incl. its system message) instead of
|
|
73
|
+
* starting fresh — used by `--continue`. */
|
|
74
|
+
history?: IChatMessage[];
|
|
75
|
+
/** Extra opinionated guidance appended to the system prompt (e.g. a scaffold's
|
|
76
|
+
* conventions: "this is a web app, the entry is app.ts…"). */
|
|
77
|
+
guidance?: string;
|
|
78
|
+
/** The model's context window (tokens). When set, the session auto-compacts
|
|
79
|
+
* before a send once the held context exceeds `autoCompactAt` of it. 0/unset
|
|
80
|
+
* disables auto-compaction. */
|
|
81
|
+
contextWindow?: number;
|
|
82
|
+
/** Fraction of `contextWindow` that triggers auto-compaction (default 0.8). */
|
|
83
|
+
autoCompactAt?: number;
|
|
84
|
+
/** A FAST check (e.g. `tsc --noEmit`) run every `checkEvery` edits WHILE the
|
|
85
|
+
* model is still building — so errors surface a few edits after they're made,
|
|
86
|
+
* not as a 100-error avalanche when it finally stops. Empty = off. */
|
|
87
|
+
incrementalCheck?: string;
|
|
88
|
+
/** Edits between incremental checks (default 3). */
|
|
89
|
+
checkEvery?: number;
|
|
90
|
+
/** Write-time single-file linter (the gate's eslint rules per write). When set,
|
|
91
|
+
* the write-guard reports lint violations — the moat rules tsc can't see (`as`,
|
|
92
|
+
* `I`-prefix) — inline, so they're fixed in-context not piled up at the gate. */
|
|
93
|
+
lintFile?: FileLinter;
|
|
94
|
+
/** Offer the `scaffold_ui` tool (themed UI primitives). Web builds only — keeps
|
|
95
|
+
* it off the pure-TS/scratch tool list where it's meaningless noise. */
|
|
96
|
+
scaffoldUi?: boolean;
|
|
97
|
+
/** Offer the `scaffold_web` tool — a fresh INTERACTIVE session where the agent
|
|
98
|
+
* decides whether to start a web app. Pair with `setSetupWeb`. */
|
|
99
|
+
scaffoldWeb?: boolean;
|
|
100
|
+
/** FORCED-TOOLS experiment (default: the TSFORGE_FORCE_TOOLS env flag): gated
|
|
101
|
+
* build turns always run with tool_choice "required" + the `yield_status`
|
|
102
|
+
* stop tool, so every turn is grammar-constrained and the malformed-call
|
|
103
|
+
* class is impossible. Conversational (no-gate) and plan-mode turns are
|
|
104
|
+
* unaffected (they should stream prose). */
|
|
105
|
+
forceTools?: boolean;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** The outcome of one `send`. `responded` = conversational (no gate); the gate
|
|
109
|
+
* verdicts are `done`/`stuck` as in `runTask`; `interrupted` = the user aborted. */
|
|
110
|
+
export interface ISendResult {
|
|
111
|
+
status: "responded" | "done" | "stuck" | "interrupted";
|
|
112
|
+
turns: number;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface ISendOptions {
|
|
116
|
+
/** Caller cancellation (Ctrl-C). */
|
|
117
|
+
signal?: AbortSignal;
|
|
118
|
+
/** Drained at each turn boundary — any returned strings are injected as user
|
|
119
|
+
* messages before the next model call, so the user can STEER a run in flight
|
|
120
|
+
* ("actually use Tailwind") without aborting it. */
|
|
121
|
+
steer?: () => string[];
|
|
122
|
+
/** Per-send thinking override (beats cfg.enableThinking for this send only).
|
|
123
|
+
* Used to keep thinking ON for the design phase (where reasoning earns its
|
|
124
|
+
* keep) but OFF for the mechanical implement phase, where ~25k tokens of
|
|
125
|
+
* pre-write reasoning per build is pure latency. */
|
|
126
|
+
enableThinking?: boolean;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const SESSION_ID = "session";
|
|
130
|
+
|
|
131
|
+
/** Default share of the context window that triggers auto-compaction. */
|
|
132
|
+
const AUTO_COMPACT_AT = 0.8;
|
|
133
|
+
|
|
134
|
+
/** Staged-build step 1: design the type contract FIRST, gate off. Constraining
|
|
135
|
+
* the model to types before UI is the community-validated cure for random API
|
|
136
|
+
* invention on local models (plan → interfaces → implementation). */
|
|
137
|
+
const PLAN_TYPES_STEP =
|
|
138
|
+
"STEP 1 of 2 — DESIGN FIRST, do not build the UI yet. In ONE short paragraph, " +
|
|
139
|
+
"name the DOMAINS the app needs and the data each holds. Then lay out the type " +
|
|
140
|
+
"contract the boringstack way: for each domain create its " +
|
|
141
|
+
"`src/<domain>/<domain>.types.ts` (its `I`-prefixed interfaces) and, where it has " +
|
|
142
|
+
"fixed registries/config, `src/<domain>/<domain>.constants.ts` (`as const`). Put " +
|
|
143
|
+
"types shared across domains in `src/shared/shared.types.ts`. Do NOT create one " +
|
|
144
|
+
"mega `src/types.ts`. THIS STEP IS TYPES/CONSTANTS ONLY: do NOT create components, " +
|
|
145
|
+
"routes, services, seeds, or hooks, and do NOT call scaffold_routes or scaffold_ui " +
|
|
146
|
+
"yet — the NEXT step builds ALL of that. This phase's gate checks ONLY types (no " +
|
|
147
|
+
"build), so anything else you write now just risks errors and wastes turns. When " +
|
|
148
|
+
"your `.types.ts`/`.constants.ts` files type-check, STOP.\n" +
|
|
149
|
+
"SPEED: after the one-paragraph plan, write MANY files per turn — emit SEVERAL " +
|
|
150
|
+
"`create` tool calls in a SINGLE response (batch all of a domain's type/constant " +
|
|
151
|
+
"files at once). Do NOT write one file then stop and wait.";
|
|
152
|
+
|
|
153
|
+
/** Plan mode — emitted AFTER the design phase to surface the model's intent for a
|
|
154
|
+
* human to review before phase 2 commits. Asks for a concise plan, NOT code. */
|
|
155
|
+
const PLAN_SUMMARY_STEP =
|
|
156
|
+
"Before building the UI, output your BUILD PLAN as concise markdown so it can be " +
|
|
157
|
+
"reviewed. Cover, briefly:\n" +
|
|
158
|
+
"1. ENTITIES — list each, and for each say whether it gets its OWN routes " +
|
|
159
|
+
"(list/detail/create) or is NESTED/EMBEDDED in another (say where).\n" +
|
|
160
|
+
"2. ROUTES/PAGES — the routes you will create.\n" +
|
|
161
|
+
"3. DONE — what you consider a complete app for this spec.\n" +
|
|
162
|
+
"4. DECISIONS/ASSUMPTIONS — any modeling choices a reviewer might want to change.\n" +
|
|
163
|
+
"Output ONLY the markdown plan — no preamble, no tool calls, no code.";
|
|
164
|
+
|
|
165
|
+
/** GENERAL plan mode (the `/plan` toggle, any task — distinct from the staged
|
|
166
|
+
* web build's PLAN_SUMMARY_STEP): rides the first user message after the mode
|
|
167
|
+
* flips on. Read-only tools enforce the contract at the execute layer; this
|
|
168
|
+
* note tells the model the workflow — explore, clarify, propose, wait. */
|
|
169
|
+
const PLAN_MODE_NOTE =
|
|
170
|
+
"[PLAN MODE — read-only. edit/create and write commands are disabled until " +
|
|
171
|
+
"the user approves a plan.]\n" +
|
|
172
|
+
"1. EXPLORE first: read/search the code this request touches.\n" +
|
|
173
|
+
"2. If the request is ambiguous, ask your clarifying question(s) and STOP — " +
|
|
174
|
+
"the user will answer.\n" +
|
|
175
|
+
"3. When you know enough, reply with a concise plan under a `## Plan` " +
|
|
176
|
+
"heading: each file to change and what to do in it, in order. No code dumps, " +
|
|
177
|
+
"no tool calls in that reply.\n" +
|
|
178
|
+
"The user will reply with feedback (revise the plan) or approve it; you " +
|
|
179
|
+
"implement ONLY after approval.";
|
|
180
|
+
|
|
181
|
+
/** Sent when the user approves a plan-mode plan — the plan itself is already the
|
|
182
|
+
* latest assistant message, so anchor it instead of re-pasting it. */
|
|
183
|
+
export const PLAN_APPROVED_NOTE =
|
|
184
|
+
"Your plan is APPROVED — plan mode is off and all tools are available again. " +
|
|
185
|
+
"Implement the approved plan above now, in order, starting with the first " +
|
|
186
|
+
"step. Do not re-explore or restate the plan; emit the tool calls.";
|
|
187
|
+
|
|
188
|
+
/** Default edits between incremental checks. */
|
|
189
|
+
const CHECK_EVERY = 3;
|
|
190
|
+
|
|
191
|
+
/** How many times a send recovers from a repetition loop before giving up. */
|
|
192
|
+
const MAX_DEGENERATION_RECOVERIES = 2;
|
|
193
|
+
|
|
194
|
+
/** How many times a send recovers from a model-request TIMEOUT before giving up.
|
|
195
|
+
* A single over-long turn (the model spiralled past the request timeout) must not
|
|
196
|
+
* throw away many turns of real progress — re-steer toward a small, fast turn and
|
|
197
|
+
* continue. Bounded so a server that's genuinely wedged still ends the run. */
|
|
198
|
+
const MAX_TIMEOUT_RECOVERIES = 2;
|
|
199
|
+
|
|
200
|
+
/** Pushed after a request timeout — the previous turn ran past the (generous)
|
|
201
|
+
* request timeout, almost always from too-long reasoning or one huge file. Demand
|
|
202
|
+
* a small, fast turn (paired with a forced, thinking-off tool call). */
|
|
203
|
+
const TIMEOUT_RESTEER =
|
|
204
|
+
"Your previous response timed out — it ran too long (likely over-long reasoning " +
|
|
205
|
+
"or one huge file). Make the SINGLE next tool call now: create or edit just ONE " +
|
|
206
|
+
"file, kept small. Keep reasoning brief. No prose.";
|
|
207
|
+
|
|
208
|
+
/** True when an error is a request TIMEOUT (AbortSignal.timeout fires a
|
|
209
|
+
* `TimeoutError`), as opposed to a caller abort or a connection drop. */
|
|
210
|
+
function isModelTimeout(err: unknown): boolean {
|
|
211
|
+
if (!(err instanceof Error)) {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return err.name === "TimeoutError" || /timed out|timeout/i.test(err.message);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/** Pushed after a repetition loop — break the spiral by demanding ONE concrete
|
|
219
|
+
* action (paired with a forced tool call, which can't loop in prose). */
|
|
220
|
+
const REPETITION_RESTEER =
|
|
221
|
+
"You started repeating yourself. STOP — do not re-explain or re-decide. Emit " +
|
|
222
|
+
"the SINGLE next tool call that makes concrete progress (create or edit ONE " +
|
|
223
|
+
"file). No prose.";
|
|
224
|
+
|
|
225
|
+
/** Prefaces interim-check feedback so the model fixes real errors and ignores the
|
|
226
|
+
* expected "module not found" noise from files it hasn't created yet. */
|
|
227
|
+
const INTERIM_CHECK_NOTE =
|
|
228
|
+
"Interim type-check (NOT the final gate) — fix these now, while they are few, " +
|
|
229
|
+
"before writing more. IGNORE any `Cannot find module './…'` for files you have " +
|
|
230
|
+
"not created yet; fix the real type errors:";
|
|
231
|
+
|
|
232
|
+
/** Staged-build step 2: implement against the contract, gate on (drive to green). */
|
|
233
|
+
const IMPLEMENT_STEP =
|
|
234
|
+
"STEP 2 of 2 — build the app in THIS ORDER, so every file compiles the moment " +
|
|
235
|
+
"you write it (each step depends only on earlier ones — no forward references):\n" +
|
|
236
|
+
"1) DATA LAYER — each domain's seed + service (`createCollection`). Small files; " +
|
|
237
|
+
"emit them together.\n" +
|
|
238
|
+
"2) ROUTES — call `scaffold_routes` ONCE with EVERY page the app needs (list, " +
|
|
239
|
+
"detail with $param like /accounts/$accountId, and create/edit like " +
|
|
240
|
+
"/deals/create). This writes all route files at once, so from here every " +
|
|
241
|
+
"<Link to>/navigate target type-checks — NEVER hand-write a route file.\n" +
|
|
242
|
+
"3) SHELL — the app-shell layout + nav linking those routes.\n" +
|
|
243
|
+
"4) FILL, FEATURE BY FEATURE — replace each route's placeholder with its real " +
|
|
244
|
+
"component (import your types + `useCollection(service)` + @/components/ui + " +
|
|
245
|
+
"<Link> to any route). FINISH one feature before starting the next.\n" +
|
|
246
|
+
"PACE: write ONE coherent slice per turn — a single feature's few files together " +
|
|
247
|
+
"(or one file if it's large) — then let the gate check it. Do NOT dump the whole " +
|
|
248
|
+
"app in one response (it gets cut off and the work is lost); do NOT trickle one " +
|
|
249
|
+
"trivial file at a time either. The gate builds + browser-verifies; fix exactly " +
|
|
250
|
+
"what it reports. Don't explain or plan in prose — just emit the tool calls.";
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Did the model write whole files INTO its chat message instead of calling
|
|
254
|
+
* `create`? Trips on ≥2 fenced code blocks (4 ``` markers), or one big block in
|
|
255
|
+
* a long message — i.e. it dumped the app as prose. A single short illustrative
|
|
256
|
+
* snippet in a chat answer does NOT trip it, so genuine Q&A is unaffected.
|
|
257
|
+
*/
|
|
258
|
+
function looksLikeCodeDump(content: string): boolean {
|
|
259
|
+
const fences = (content.match(/```/g) ?? []).length;
|
|
260
|
+
|
|
261
|
+
return fences >= 4 || (fences >= 2 && content.length > 1500);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const TOOL_NAMES_ALT = Object.values(TOOL_NAME).join("|");
|
|
265
|
+
|
|
266
|
+
/** Tool-call MARKUP leaked into the reply text: the known malformed variants
|
|
267
|
+
* (`<function=`, `<tool_call`, `<parameter…`, `<|tool|>`, `<tool>` for a tool
|
|
268
|
+
* we offer) — the server's parser left the call in content and salvage could
|
|
269
|
+
* not rescue it (see malformed-toolcall-format + wire.ts salvage). */
|
|
270
|
+
const LEAKED_CALL_RE = new RegExp(
|
|
271
|
+
`<function=|<tool_call|<parameters?[=>]|<\\|(?:${TOOL_NAMES_ALT})\\|>|^<(?:${TOOL_NAMES_ALT})>`,
|
|
272
|
+
"im"
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
/** The fully-degenerate invented-markup form: a short matched `<tag>…</tag>`
|
|
276
|
+
* pair on its own lines (e.g. `<files>\n["…"]\n</files>`, captured live). A
|
|
277
|
+
* legit prose answer with an HTML example could match — the cost is one
|
|
278
|
+
* bounded nudge turn, while missing it strands the whole build. */
|
|
279
|
+
const TAG_PAIR_RE = /^<([a-z_]+)>\s*$[\s\S]{0,400}?^<\/\1>\s*$/m;
|
|
280
|
+
|
|
281
|
+
/** Did the model emit a tool call as TEXT instead of invoking one? */
|
|
282
|
+
function leaksToolMarkup(content: string): boolean {
|
|
283
|
+
return LEAKED_CALL_RE.test(content) || TAG_PAIR_RE.test(content);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/** Pushed when a no-tool-call reply contained leaked tool markup — the model
|
|
287
|
+
* believes it acted, but nothing ran. Paired with a FORCED tool call next turn
|
|
288
|
+
* (constrained decoding ⇒ the retry always parses). */
|
|
289
|
+
const MALFORMED_CALL_NUDGE =
|
|
290
|
+
"Your last reply contained tool-call markup as plain TEXT — the syntax was " +
|
|
291
|
+
"malformed, so NO tool ran and nothing happened. Do not write tool syntax " +
|
|
292
|
+
"in prose. Re-issue that action as a real tool call now.";
|
|
293
|
+
|
|
294
|
+
/** CHAT_SYSTEM + a short orientation to the workspace and (optional) gate. */
|
|
295
|
+
function systemPrompt(cfg: ISessionConfig): string {
|
|
296
|
+
const lines = [`Workspace: ${cfg.cwd}`];
|
|
297
|
+
const files = cfg.files ?? [];
|
|
298
|
+
const wholeRepo = files.length === 0 || files.includes("**/*");
|
|
299
|
+
|
|
300
|
+
lines.push(
|
|
301
|
+
wholeRepo
|
|
302
|
+
? "You may read, run, and edit any file in the workspace."
|
|
303
|
+
: `You may only edit: ${files.join(", ")} (everything else is read-only).`
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
if (cfg.accept !== undefined && cfg.accept.length > 0) {
|
|
307
|
+
lines.push(
|
|
308
|
+
`A check is configured: \`${cfg.accept}\`. When you finish a change and ` +
|
|
309
|
+
"stop calling tools, it runs automatically — if it fails you'll get the " +
|
|
310
|
+
"errors and should fix them and continue until it passes."
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (cfg.guidance !== undefined && cfg.guidance.length > 0) {
|
|
315
|
+
lines.push(cfg.guidance);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return `${CHAT_SYSTEM}\n\n${lines.join("\n")}`;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export class Session {
|
|
322
|
+
private readonly provider: IProvider;
|
|
323
|
+
private readonly cfg: ISessionConfig;
|
|
324
|
+
private readonly report: Reporter;
|
|
325
|
+
private tools: (
|
|
326
|
+
| ReturnType<typeof toolsFor>[number]
|
|
327
|
+
| typeof SCAFFOLD_UI_TOOL
|
|
328
|
+
| typeof SCAFFOLD_ROUTES_TOOL
|
|
329
|
+
| typeof SCAFFOLD_WEB_TOOL
|
|
330
|
+
| typeof ADD_DEPENDENCY_TOOL
|
|
331
|
+
| typeof YIELD_STATUS_TOOL
|
|
332
|
+
)[];
|
|
333
|
+
private hasGate: boolean;
|
|
334
|
+
private readonly ctx: ILoopCtx;
|
|
335
|
+
private readonly state: ILoopState;
|
|
336
|
+
/** Token usage from the most recent model call — `promptTokens` is the real
|
|
337
|
+
* size of the context the model last saw (drives the status gauge and, soon,
|
|
338
|
+
* auto-compaction). */
|
|
339
|
+
private lastUsage?: ITokenUsage;
|
|
340
|
+
/** Fast check run every few edits while building (e.g. tsc); "" = off. */
|
|
341
|
+
private incrementalCheck: string;
|
|
342
|
+
/** Per-send thinking override, set from ISendOptions for the duration of a
|
|
343
|
+
* `send` (cleared after). Lets the design phase think and the implement phase
|
|
344
|
+
* not. Undefined = fall back to cfg.enableThinking (server default). */
|
|
345
|
+
private activeThinking?: boolean;
|
|
346
|
+
/** ADAPTIVE THINKING: true while the model has outstanding errors to fix (an
|
|
347
|
+
* interim check or the gate came back RED). Measured: ~80% of build time is
|
|
348
|
+
* REPAIR, and thinking-OFF repair oscillates and never converges (churns to the
|
|
349
|
+
* turn cap), while thinking-ON repair converges. So we think ONLY while
|
|
350
|
+
* repairing — fast thinking-off creation, convergent thinking-on repair. */
|
|
351
|
+
private repairing = false;
|
|
352
|
+
/** GENERAL plan mode: read-only exploration until the user approves a plan.
|
|
353
|
+
* Mirrors into ctx.readOnly (the execute-layer guarantee) and filters the
|
|
354
|
+
* advertised tool list per call — `this.tools` itself is never mutated, so
|
|
355
|
+
* toggling off restores everything with zero bookkeeping. */
|
|
356
|
+
private planMode = false;
|
|
357
|
+
/** Attach PLAN_MODE_NOTE to the NEXT send only (not every revision reply). */
|
|
358
|
+
private planIntroPending = false;
|
|
359
|
+
/** FORCED-TOOLS experiment — see ISessionConfig.forceTools. */
|
|
360
|
+
private readonly forceTools: boolean;
|
|
361
|
+
/** Mid-session turn-cap override (setMaxTurns) — a web scaffold raises it. */
|
|
362
|
+
private maxTurnsOverride?: number;
|
|
363
|
+
|
|
364
|
+
private constructor(cfg: ISessionConfig, ctx: ILoopCtx) {
|
|
365
|
+
this.provider = cfg.provider;
|
|
366
|
+
this.cfg = cfg;
|
|
367
|
+
this.report = cfg.report ?? ((): void => undefined);
|
|
368
|
+
this.hasGate = cfg.accept !== undefined && cfg.accept.length > 0;
|
|
369
|
+
this.incrementalCheck = cfg.incrementalCheck ?? "";
|
|
370
|
+
// Start with the 4 BASE tools (read/run/edit/create). Measured: the bigger
|
|
371
|
+
// 11-tool list pushes this model onto a malformed-tool-call boundary (it
|
|
372
|
+
// emits unparseable formats the server leaves in content) — see
|
|
373
|
+
// malformed-toolcall-format. The base tools are enough to work a repo; the
|
|
374
|
+
// LSP nav set can become an opt-in once we confirm it parses cleanly here.
|
|
375
|
+
// WEB builds add ONE coarse tool — `scaffold_ui` — so the model generates
|
|
376
|
+
// tested themed primitives instead of re-authoring a button/card every build.
|
|
377
|
+
// Interactive sessions (scaffoldWeb) also offer `scaffold_web` so the AGENT
|
|
378
|
+
// can choose to start a web app — the UI/routes tools ride along so they're
|
|
379
|
+
// ready once it scaffolds. Headless web builds (scaffoldUi) scaffold up front,
|
|
380
|
+
// so they skip scaffold_web.
|
|
381
|
+
// Interactive sessions also get `search` (ripgrep): it's read-only, needs
|
|
382
|
+
// no tsconfig, and is the plan-mode explorer's main tool besides `read`.
|
|
383
|
+
// Headless/eval sessions keep the measured base set (see
|
|
384
|
+
// lsp-tools-regress-scratch: nav tools hurt from-scratch builds).
|
|
385
|
+
this.tools =
|
|
386
|
+
cfg.scaffoldWeb === true
|
|
387
|
+
? [
|
|
388
|
+
...toolsFor(false),
|
|
389
|
+
SEARCH_TOOL,
|
|
390
|
+
SCAFFOLD_WEB_TOOL,
|
|
391
|
+
SCAFFOLD_UI_TOOL,
|
|
392
|
+
SCAFFOLD_ROUTES_TOOL,
|
|
393
|
+
ADD_DEPENDENCY_TOOL,
|
|
394
|
+
]
|
|
395
|
+
: cfg.scaffoldUi === true
|
|
396
|
+
? [
|
|
397
|
+
...toolsFor(false),
|
|
398
|
+
SCAFFOLD_UI_TOOL,
|
|
399
|
+
SCAFFOLD_ROUTES_TOOL,
|
|
400
|
+
ADD_DEPENDENCY_TOOL,
|
|
401
|
+
]
|
|
402
|
+
: toolsFor(false);
|
|
403
|
+
this.forceTools = cfg.forceTools ?? flags.forceTools();
|
|
404
|
+
|
|
405
|
+
if (this.forceTools) {
|
|
406
|
+
this.tools = [...this.tools, YIELD_STATUS_TOOL];
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
this.ctx = ctx;
|
|
410
|
+
this.state = {
|
|
411
|
+
prevGateErrors: [],
|
|
412
|
+
gateNoProgress: 0,
|
|
413
|
+
lastGateCount: -1,
|
|
414
|
+
edits: 0,
|
|
415
|
+
regressions: 0,
|
|
416
|
+
ttsrInterrupts: 0,
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/** Build a session (async because it spins up the TS LanguageService). */
|
|
421
|
+
static async create(cfg: ISessionConfig): Promise<Session> {
|
|
422
|
+
const task: ITask = {
|
|
423
|
+
id: SESSION_ID,
|
|
424
|
+
accept: cfg.accept ?? "",
|
|
425
|
+
files: cfg.files ?? [],
|
|
426
|
+
context: cfg.context,
|
|
427
|
+
fix: cfg.fix,
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
const report = cfg.report ?? ((): void => undefined);
|
|
431
|
+
// Same stack + tsforge.config.json resolution as the eval path
|
|
432
|
+
// (resolveStackForRun in run.ts) — interactive users get identical
|
|
433
|
+
// pack selection and rule-severity overrides.
|
|
434
|
+
const detected = await detectStack(cfg.cwd);
|
|
435
|
+
const projectConfig = await loadTsforgeConfig(cfg.cwd);
|
|
436
|
+
const stackProfile = {
|
|
437
|
+
...detected,
|
|
438
|
+
packs: resolveActivePacks(detected.packs, projectConfig),
|
|
439
|
+
};
|
|
440
|
+
const ruleOverrides = normalizeRuleOverrides(projectConfig);
|
|
441
|
+
|
|
442
|
+
const ctx: ILoopCtx = {
|
|
443
|
+
task,
|
|
444
|
+
cwd: cfg.cwd,
|
|
445
|
+
tsService: await buildTsService(cfg.cwd),
|
|
446
|
+
...(cfg.lintFile === undefined ? {} : { lintFile: cfg.lintFile }),
|
|
447
|
+
parse: cfg.parse,
|
|
448
|
+
report,
|
|
449
|
+
stackProfile,
|
|
450
|
+
...(Object.keys(ruleOverrides).length > 0 ? { ruleOverrides } : {}),
|
|
451
|
+
messages:
|
|
452
|
+
cfg.history !== undefined && cfg.history.length > 0
|
|
453
|
+
? [...cfg.history]
|
|
454
|
+
: [{ role: "system", content: systemPrompt(cfg) }],
|
|
455
|
+
// Stream the gate's output live (the interactive CLI), so a slow gate
|
|
456
|
+
// (vite build + chromium) shows progress instead of running silently.
|
|
457
|
+
onGateChunk: (text) => {
|
|
458
|
+
report({ kind: "token", task: SESSION_ID, message: text });
|
|
459
|
+
},
|
|
460
|
+
};
|
|
461
|
+
|
|
462
|
+
return new Session(cfg, ctx);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/** The current gate command (empty when none). */
|
|
466
|
+
get gate(): string {
|
|
467
|
+
return this.ctx.task.accept;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/** The editable scope globs. */
|
|
471
|
+
get scope(): string[] {
|
|
472
|
+
return this.ctx.task.files;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/** Real token usage of the most recent model call (undefined until the first
|
|
476
|
+
* call, or if the server reports none). */
|
|
477
|
+
get usage(): ITokenUsage | undefined {
|
|
478
|
+
return this.lastUsage;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/** The real size of the context the model is currently holding — the prompt
|
|
482
|
+
* tokens of the last call (what auto-compaction watches), 0 before any call. */
|
|
483
|
+
get contextTokens(): number {
|
|
484
|
+
return this.lastUsage?.promptTokens ?? 0;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/** If the held context is at/over the auto-compact threshold, the percent full
|
|
488
|
+
* (for the notice); otherwise undefined. Needs a known window AND real usage
|
|
489
|
+
* from a prior turn — both absent on the first send, so it never fires early. */
|
|
490
|
+
private autoCompactPct(): number | undefined {
|
|
491
|
+
const window = this.cfg.contextWindow ?? 0;
|
|
492
|
+
|
|
493
|
+
if (window <= 0 || this.lastUsage === undefined) {
|
|
494
|
+
return undefined;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const fraction = this.lastUsage.promptTokens / window;
|
|
498
|
+
const threshold = this.cfg.autoCompactAt ?? AUTO_COMPACT_AT;
|
|
499
|
+
|
|
500
|
+
return fraction >= threshold ? Math.round(fraction * 100) : undefined;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/** Set (or clear, with "") the gate command mid-session. */
|
|
504
|
+
setGate(command: string): void {
|
|
505
|
+
this.ctx.task.accept = command;
|
|
506
|
+
this.hasGate = command.length > 0;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
/** Raise/lower the per-send turn cap mid-session — `scaffold_web` flips a chat
|
|
510
|
+
* session into a from-scratch web build, whose heavy gate needs the bigger
|
|
511
|
+
* webMaxTurns budget (0/undefined restores the config default). */
|
|
512
|
+
setMaxTurns(n?: number): void {
|
|
513
|
+
this.maxTurnsOverride = n !== undefined && n > 0 ? n : undefined;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/** Toggle GENERAL plan mode: read-only tools + the plan-then-approve workflow.
|
|
517
|
+
* ON ⇒ the next send carries PLAN_MODE_NOTE, the advertised tools shrink to
|
|
518
|
+
* the read-only set, and the execute layer rejects any mutating call. */
|
|
519
|
+
setPlanMode(on: boolean): void {
|
|
520
|
+
this.planMode = on;
|
|
521
|
+
this.ctx.readOnly = on; // the hard guarantee at the execute layer
|
|
522
|
+
this.planIntroPending = on;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/** Set (or clear, with "") the auto-fix command run before each gate — e.g. a
|
|
526
|
+
* scaffold's `eslint --fix`, so mechanical lint violations are squashed
|
|
527
|
+
* deterministically instead of costing the model turns. */
|
|
528
|
+
setFix(command: string): void {
|
|
529
|
+
this.ctx.task.fix = command.length > 0 ? command : undefined;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/** Set (or clear, with "") the fast incremental check (e.g. `tsc --noEmit`) run
|
|
533
|
+
* every few edits while building, so errors surface early instead of piling up. */
|
|
534
|
+
setIncrementalCheck(command: string): void {
|
|
535
|
+
this.incrementalCheck = command;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/** Replace the editable scope globs mid-session. */
|
|
539
|
+
setScope(globs: string[]): void {
|
|
540
|
+
this.ctx.task.files = globs;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/** Wire the web-setup callback the `scaffold_web` tool invokes when the AGENT
|
|
544
|
+
* decides the task is a from-scratch web app — scaffolds the stack and flips
|
|
545
|
+
* this session to the web gate/guidance. Late-bound (after create) because the
|
|
546
|
+
* callback closes over this session to reconfigure it. */
|
|
547
|
+
setSetupWeb(fn: (framework: string) => Promise<void>): void {
|
|
548
|
+
this.ctx.setupWeb = fn;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/** Append opinionated guidance to the SYSTEM prompt (e.g. after classifying a
|
|
552
|
+
* fresh request as a web build). Folded into the existing system message — a
|
|
553
|
+
* second system message breaks some chat templates (Qwen → 400). */
|
|
554
|
+
guide(text: string): void {
|
|
555
|
+
const first = this.ctx.messages[0];
|
|
556
|
+
|
|
557
|
+
if (first?.role === "system") {
|
|
558
|
+
first.content = `${first.content}\n\n${text}`;
|
|
559
|
+
} else {
|
|
560
|
+
this.ctx.messages.unshift({ role: "system", content: text });
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Compress the conversation: ask the model to summarize everything so far, then
|
|
566
|
+
* replace the history with [system, summary]. Frees context for long sessions
|
|
567
|
+
* while preserving goals/decisions/changes. Returns the message count before/after.
|
|
568
|
+
*/
|
|
569
|
+
async compact(
|
|
570
|
+
signal?: AbortSignal
|
|
571
|
+
): Promise<{ before: number; after: number }> {
|
|
572
|
+
const { ctx } = this;
|
|
573
|
+
const before = ctx.messages.length;
|
|
574
|
+
const conversation = ctx.messages.filter((m) => m.role !== "system");
|
|
575
|
+
|
|
576
|
+
if (conversation.length === 0) {
|
|
577
|
+
return { before, after: before };
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const transcript = conversation
|
|
581
|
+
.map((m) => `[${m.role}] ${m.content}`)
|
|
582
|
+
.join("\n\n");
|
|
583
|
+
const res = await this.provider.complete(
|
|
584
|
+
[
|
|
585
|
+
{ role: "system", content: COMPACT_SYSTEM },
|
|
586
|
+
{ role: "user", content: transcript },
|
|
587
|
+
],
|
|
588
|
+
{ temperature: 0, ...(signal === undefined ? {} : { signal }) }
|
|
589
|
+
);
|
|
590
|
+
|
|
591
|
+
const system = ctx.messages[0];
|
|
592
|
+
const summary: IChatMessage = {
|
|
593
|
+
role: "user",
|
|
594
|
+
content: `[Summary of the earlier conversation]\n${res.content}`,
|
|
595
|
+
};
|
|
596
|
+
|
|
597
|
+
ctx.messages = system?.role === "system" ? [system, summary] : [summary];
|
|
598
|
+
|
|
599
|
+
return { before, after: ctx.messages.length };
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/** The live conversation (system + every exchange). Read-only view. */
|
|
603
|
+
get messages(): readonly IChatMessage[] {
|
|
604
|
+
return this.ctx.messages;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
/**
|
|
608
|
+
* Run one user message: drive the model until it stops calling tools, then
|
|
609
|
+
* gate-confirm if a gate is set. Loops on red gate feedback up to the turn cap.
|
|
610
|
+
*/
|
|
611
|
+
async send(text: string, opts: ISendOptions = {}): Promise<ISendResult> {
|
|
612
|
+
const { ctx, report } = this;
|
|
613
|
+
const maxTurns =
|
|
614
|
+
this.maxTurnsOverride ?? this.cfg.maxTurns ?? LOOP_LIMITS.maxTurns;
|
|
615
|
+
const sendStart = performance.now();
|
|
616
|
+
|
|
617
|
+
// Thread cancellation to the tool `run` commands and the gate (not just the
|
|
618
|
+
// model call), so Ctrl-C kills in-flight child processes too.
|
|
619
|
+
ctx.signal = opts.signal;
|
|
620
|
+
this.activeThinking = opts.enableThinking;
|
|
621
|
+
this.repairing = false; // fresh send starts in (fast, thinking-off) creation mode
|
|
622
|
+
|
|
623
|
+
try {
|
|
624
|
+
// Auto-compact BEFORE adding the new message (so it stays a fresh turn
|
|
625
|
+
// after the summary) when the held context is near the window.
|
|
626
|
+
const pct = this.autoCompactPct();
|
|
627
|
+
|
|
628
|
+
if (pct !== undefined) {
|
|
629
|
+
report({
|
|
630
|
+
kind: "tool",
|
|
631
|
+
task: SESSION_ID,
|
|
632
|
+
message: `⊙ context ~${pct}% full — auto-compacting to free room`,
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
const { before, after } = await this.compact(opts.signal);
|
|
636
|
+
|
|
637
|
+
report({
|
|
638
|
+
kind: "tool",
|
|
639
|
+
task: SESSION_ID,
|
|
640
|
+
message: `⊙ compacted ${before} → ${after} messages`,
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// The plan-mode workflow note rides the FIRST message after the mode flips
|
|
645
|
+
// on; revision replies go bare (the instruction persists in history).
|
|
646
|
+
if (this.planMode && this.planIntroPending) {
|
|
647
|
+
this.planIntroPending = false;
|
|
648
|
+
ctx.messages.push({
|
|
649
|
+
role: "user",
|
|
650
|
+
content: `${text}\n\n${PLAN_MODE_NOTE}`,
|
|
651
|
+
});
|
|
652
|
+
} else {
|
|
653
|
+
ctx.messages.push({ role: "user", content: text });
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return await this.drive(maxTurns, sendStart, opts);
|
|
657
|
+
} catch (err) {
|
|
658
|
+
if (opts.signal?.aborted === true) {
|
|
659
|
+
report({
|
|
660
|
+
kind: "stuck",
|
|
661
|
+
task: SESSION_ID,
|
|
662
|
+
message: "interrupted",
|
|
663
|
+
});
|
|
664
|
+
|
|
665
|
+
return { status: "interrupted", turns: 0 };
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
// A provider/network error (request timeout, connection drop after retries)
|
|
669
|
+
// ends the turn GRACEFULLY as stuck — never crash the process. The message
|
|
670
|
+
// is logged so it's visible/debuggable, not silently swallowed. This keeps a
|
|
671
|
+
// long autonomous run (and the interactive CLI) alive through a flaky model.
|
|
672
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
673
|
+
|
|
674
|
+
report({
|
|
675
|
+
kind: "stuck",
|
|
676
|
+
task: SESSION_ID,
|
|
677
|
+
message: `⚠ model request failed: ${detail}`,
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
return { status: "stuck", turns: 0 };
|
|
681
|
+
} finally {
|
|
682
|
+
ctx.signal = undefined;
|
|
683
|
+
this.activeThinking = undefined;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Build a project from scratch in two STAGES, the way local models stay
|
|
689
|
+
* reliable: (1) plan + write the type contract (`src/types.ts`) with the gate
|
|
690
|
+
* OFF — a types-only app can't build yet, so gating here would spuriously fail;
|
|
691
|
+
* (2) implement against those types with the gate ON, driving to green. This is
|
|
692
|
+
* the community-validated plan→interfaces→implementation pattern; our gate is
|
|
693
|
+
* the verification stage. A soft constraint: if the model ignores step 1 and
|
|
694
|
+
* builds everything, step 2 simply continues — nothing breaks.
|
|
695
|
+
*/
|
|
696
|
+
async buildStaged(
|
|
697
|
+
request: string,
|
|
698
|
+
opts: ISendOptions = {},
|
|
699
|
+
designGate = ""
|
|
700
|
+
): Promise<ISendResult> {
|
|
701
|
+
const planned = await this.designBuild(request, opts, designGate);
|
|
702
|
+
|
|
703
|
+
// Don't push on to implementation if the user aborted the design step.
|
|
704
|
+
if (planned.status === "interrupted") {
|
|
705
|
+
return planned;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
return this.implementBuild("", opts);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* PHASE 1 — design the type contract only. Gates on TYPES (tsc + lint, no build)
|
|
713
|
+
* when a `designGate` is given, so the contract is driven self-consistent BEFORE
|
|
714
|
+
* components (catching as-const↔interface errors small, not as a final pile).
|
|
715
|
+
* Withholds the app-building scaffold tools so the model CANNOT start the UI here
|
|
716
|
+
* — a prompt-only "types only" was repeatedly ignored. Returns the phase-1 result
|
|
717
|
+
* and leaves the session ready for `implementBuild`. Split out from `buildStaged`
|
|
718
|
+
* so plan mode can insert a human review between the phases.
|
|
719
|
+
*/
|
|
720
|
+
async designBuild(
|
|
721
|
+
request: string,
|
|
722
|
+
opts: ISendOptions = {},
|
|
723
|
+
designGate = ""
|
|
724
|
+
): Promise<ISendResult> {
|
|
725
|
+
const gate = this.ctx.task.accept;
|
|
726
|
+
|
|
727
|
+
this.setGate(designGate);
|
|
728
|
+
|
|
729
|
+
const phaseTwoTools = this.tools;
|
|
730
|
+
|
|
731
|
+
this.tools = toolsFor(false);
|
|
732
|
+
const planned = await this.send(`${request}\n\n${PLAN_TYPES_STEP}`, opts);
|
|
733
|
+
|
|
734
|
+
this.tools = phaseTwoTools;
|
|
735
|
+
this.setGate(gate);
|
|
736
|
+
|
|
737
|
+
return planned;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* PHASE 2 — implement against the designed types, driving to green. If phase 1
|
|
742
|
+
* already produced a fully-green app (it ignored "types only" and built
|
|
743
|
+
* everything), this returns done WITHOUT rebuilding — else the model concludes
|
|
744
|
+
* the prior phase did "only the data layer" and `rm -rf`s its own finished UI to
|
|
745
|
+
* rebuild (observed: 23-00-52 went green at turn 146, then phase 2 wiped every
|
|
746
|
+
* file). `planNotes` (human plan-mode edits) are injected into the implement step.
|
|
747
|
+
*/
|
|
748
|
+
async implementBuild(
|
|
749
|
+
planNotes = "",
|
|
750
|
+
opts: ISendOptions = {}
|
|
751
|
+
): Promise<ISendResult> {
|
|
752
|
+
const gate = this.ctx.task.accept;
|
|
753
|
+
const fullGateTask: ITask = { ...this.ctx.task, accept: gate };
|
|
754
|
+
const full = await validate(
|
|
755
|
+
fullGateTask,
|
|
756
|
+
this.ctx.cwd,
|
|
757
|
+
this.ctx.parse,
|
|
758
|
+
this.ctx.signal === undefined ? {} : { signal: this.ctx.signal }
|
|
759
|
+
);
|
|
760
|
+
|
|
761
|
+
if (full.passed) {
|
|
762
|
+
this.report({
|
|
763
|
+
kind: "tool",
|
|
764
|
+
task: this.ctx.task.id,
|
|
765
|
+
message:
|
|
766
|
+
"phase 1 already produced a fully-green app — skipping phase 2 (no rebuild)",
|
|
767
|
+
});
|
|
768
|
+
|
|
769
|
+
return { status: "done", turns: 0 };
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// Inject the EXACT type contract the design phase just wrote, fresh, right
|
|
773
|
+
// before implementation. The 27b's #1 first-pass error is misremembering its
|
|
774
|
+
// OWN types across many files/turns (a field shape it defined 30 turns ago) —
|
|
775
|
+
// re-showing the precise current signatures cuts those consistency errors (so
|
|
776
|
+
// less repair). Both phases run ADAPTIVE thinking (governed by `repairing`).
|
|
777
|
+
const contract = await this.typeContract();
|
|
778
|
+
const notes =
|
|
779
|
+
planNotes.length > 0
|
|
780
|
+
? `\n\n## Approved plan — follow these decisions\n${planNotes}\n`
|
|
781
|
+
: "";
|
|
782
|
+
|
|
783
|
+
return this.send(`${contract}${IMPLEMENT_STEP}${notes}`, opts);
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
/**
|
|
787
|
+
* Plan mode — after `designBuild`, ask the model to state its build PLAN as
|
|
788
|
+
* markdown (entities + whether each is its own route or nested/embedded; the
|
|
789
|
+
* routes/pages it will create; what it considers DONE; key modeling decisions)
|
|
790
|
+
* so a human can review/correct it BEFORE phase 2 commits ~100 turns. A single
|
|
791
|
+
* completion over the live conversation; emits NO tool calls and touches no
|
|
792
|
+
* files. Returns the plan text (empty string if the model returned nothing).
|
|
793
|
+
*/
|
|
794
|
+
async generatePlan(): Promise<string> {
|
|
795
|
+
const res = await this.provider.complete(
|
|
796
|
+
[...this.ctx.messages, { role: "user", content: PLAN_SUMMARY_STEP }],
|
|
797
|
+
{
|
|
798
|
+
temperature: 0,
|
|
799
|
+
...(this.ctx.signal === undefined ? {} : { signal: this.ctx.signal }),
|
|
800
|
+
}
|
|
801
|
+
);
|
|
802
|
+
|
|
803
|
+
return res.content.trim();
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
/** Read the per-domain `.types.ts`/`.constants.ts` the design phase wrote and
|
|
807
|
+
* format them as a precise reference block for the implement phase — so the
|
|
808
|
+
* model builds against the EXACT current signatures instead of its (lossy)
|
|
809
|
+
* recollection of them. Empty string if none exist yet (nothing to anchor). */
|
|
810
|
+
private async typeContract(): Promise<string> {
|
|
811
|
+
const files = await readFiles(this.ctx.cwd, [
|
|
812
|
+
"src/**/*.types.ts",
|
|
813
|
+
"src/**/*.constants.ts",
|
|
814
|
+
]);
|
|
815
|
+
|
|
816
|
+
if (files.length === 0) {
|
|
817
|
+
return "";
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
const blocks = files
|
|
821
|
+
.map((f) => `// ${f.path}\n${f.content.trim()}`)
|
|
822
|
+
.join("\n\n");
|
|
823
|
+
|
|
824
|
+
return (
|
|
825
|
+
"THE TYPE CONTRACT you just designed (use these EXACT names/shapes — do " +
|
|
826
|
+
"NOT invent or misremember fields; import from these paths):\n\n```ts\n" +
|
|
827
|
+
`${blocks}\n` +
|
|
828
|
+
"```\n\n"
|
|
829
|
+
);
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/** Once `editsSinceCheck` reaches the threshold, run the incremental check and
|
|
833
|
+
* reset the counter; otherwise pass it through. Keeps `drive` branch-light. */
|
|
834
|
+
private async checkAfterEdits(
|
|
835
|
+
editsSinceCheck: number,
|
|
836
|
+
checkEvery: number
|
|
837
|
+
): Promise<number> {
|
|
838
|
+
if (editsSinceCheck < checkEvery) {
|
|
839
|
+
return editsSinceCheck;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
await this.runIncrementalCheck();
|
|
843
|
+
|
|
844
|
+
return 0;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
/** Run the fast incremental check (e.g. tsc) and, if it surfaces errors, feed
|
|
848
|
+
* them back NOW as a user message so the model fixes them before writing more
|
|
849
|
+
* — instead of letting them pile up for the final gate. No-op when unset. */
|
|
850
|
+
private async runIncrementalCheck(): Promise<void> {
|
|
851
|
+
if (this.incrementalCheck.length === 0) {
|
|
852
|
+
return;
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
const { ctx } = this;
|
|
856
|
+
const task: ITask = { ...ctx.task, accept: this.incrementalCheck };
|
|
857
|
+
const result = await validate(
|
|
858
|
+
task,
|
|
859
|
+
ctx.cwd,
|
|
860
|
+
ctx.parse,
|
|
861
|
+
ctx.signal === undefined ? {} : { signal: ctx.signal }
|
|
862
|
+
);
|
|
863
|
+
|
|
864
|
+
// Drop stub-route-tree phantoms (the build regenerates the tree at the gate) —
|
|
865
|
+
// the model can't fix them and shouldn't be told to try.
|
|
866
|
+
const errors = result.errors.filter((e) => !isPhantomRouteError(e.message));
|
|
867
|
+
|
|
868
|
+
if (result.passed || errors.length === 0) {
|
|
869
|
+
this.repairing = false; // clean (or only phantoms) → fast thinking-off creation
|
|
870
|
+
|
|
871
|
+
return;
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
this.repairing = true; // errors outstanding → next turns think to converge
|
|
875
|
+
|
|
876
|
+
const detail = errors
|
|
877
|
+
.slice(0, 20)
|
|
878
|
+
.map((e) => e.message)
|
|
879
|
+
.join("\n");
|
|
880
|
+
|
|
881
|
+
// Surface the ACTUAL errors into the log (not just the count) — so we can see
|
|
882
|
+
// WHAT the model fails at and target the systematic ones in the harness.
|
|
883
|
+
ctx.report({
|
|
884
|
+
kind: "tool",
|
|
885
|
+
task: SESSION_ID,
|
|
886
|
+
message: `⊙ interim check: ${String(errors.length)} error(s) — fixing now:\n${detail}`,
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
ctx.messages.push({
|
|
890
|
+
role: "user",
|
|
891
|
+
content: `${INTERIM_CHECK_NOTE}\n${detail}`,
|
|
892
|
+
});
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
/** The turn loop — separated so `send` can wrap it in abort handling. */
|
|
896
|
+
/** One model call: stream thinking live, push the reply, and surface salvage +
|
|
897
|
+
* the highlighted answer. Keeps `drive`'s per-turn control flow lean. */
|
|
898
|
+
private async askModel(
|
|
899
|
+
signal?: AbortSignal,
|
|
900
|
+
toolChoice: "auto" | "required" = "auto",
|
|
901
|
+
forceNoThinking = false
|
|
902
|
+
): Promise<IModelResponse> {
|
|
903
|
+
const { ctx, report } = this;
|
|
904
|
+
// On a FORCED tool turn, disable thinking: the model already decided what to
|
|
905
|
+
// do, and thinking-on is a known source of prose-before-the-call malformed
|
|
906
|
+
// output on this model. `required` + thinking-off = the cleanest tool call.
|
|
907
|
+
// ADAPTIVE: think while REPAIRING (errors outstanding) so repair converges;
|
|
908
|
+
// otherwise honour the per-send/cfg setting (off = fast creation). A forced
|
|
909
|
+
// recovery turn always thinks-off (it just needs one clean tool call).
|
|
910
|
+
const enableThinking = forceNoThinking
|
|
911
|
+
? false
|
|
912
|
+
: this.repairing
|
|
913
|
+
? true
|
|
914
|
+
: (this.activeThinking ?? this.cfg.enableThinking);
|
|
915
|
+
// PLAN MODE advertises only the read-only tools (+ `run`, whose handler
|
|
916
|
+
// enforces a read-only command allowlist) — the model never sees a write
|
|
917
|
+
// tool. Filtered per call, so `this.tools` is untouched and toggling the
|
|
918
|
+
// mode off restores the full set with zero bookkeeping.
|
|
919
|
+
const offeredTools = this.planMode
|
|
920
|
+
? this.tools.filter(
|
|
921
|
+
(t) =>
|
|
922
|
+
READ_ONLY_TOOL_NAMES.has(t.function.name) ||
|
|
923
|
+
t.function.name === TOOL_NAME.run
|
|
924
|
+
)
|
|
925
|
+
: this.tools;
|
|
926
|
+
const res = await this.provider.complete(ctx.messages, {
|
|
927
|
+
tools: offeredTools,
|
|
928
|
+
temperature: this.cfg.temperature ?? 0,
|
|
929
|
+
toolChoice,
|
|
930
|
+
...(enableThinking === undefined ? {} : { enableThinking }),
|
|
931
|
+
...(this.cfg.thinkingTokenBudget === undefined
|
|
932
|
+
? {}
|
|
933
|
+
: { thinkingTokenBudget: this.cfg.thinkingTokenBudget }),
|
|
934
|
+
...(signal === undefined ? {} : { signal }),
|
|
935
|
+
onToken: (token, channel) => {
|
|
936
|
+
// Stream EVERYTHING live — thinking, the tool calls being written, and
|
|
937
|
+
// the answer itself (channel `content`), so the user watches the reply
|
|
938
|
+
// arrive instead of staring at a frozen indicator. The renderer formats
|
|
939
|
+
// content incrementally line-by-line; the consolidated `message` event
|
|
940
|
+
// below stays as the log's record (the interactive renderer dedupes it).
|
|
941
|
+
report({ kind: "token", task: SESSION_ID, message: token, channel });
|
|
942
|
+
},
|
|
943
|
+
});
|
|
944
|
+
|
|
945
|
+
if (res.usage !== undefined) {
|
|
946
|
+
this.lastUsage = res.usage;
|
|
947
|
+
// Logged (not shown) so the --log analyzer can compute tokens-to-solution.
|
|
948
|
+
// `thinking` records THIS call's mode, so malformed-call rates can be
|
|
949
|
+
// correlated with it (analyze-malformed).
|
|
950
|
+
report({
|
|
951
|
+
kind: "usage",
|
|
952
|
+
task: SESSION_ID,
|
|
953
|
+
message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out`,
|
|
954
|
+
promptTokens: res.usage.promptTokens,
|
|
955
|
+
completionTokens: res.usage.completionTokens,
|
|
956
|
+
totalTokens: res.usage.totalTokens,
|
|
957
|
+
...(enableThinking === undefined ? {} : { thinking: enableThinking }),
|
|
958
|
+
});
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
ctx.messages.push({
|
|
962
|
+
role: "assistant",
|
|
963
|
+
content: res.content,
|
|
964
|
+
toolCalls: res.toolCalls,
|
|
965
|
+
});
|
|
966
|
+
|
|
967
|
+
if (res.salvaged !== undefined && res.salvaged > 0) {
|
|
968
|
+
report({
|
|
969
|
+
kind: "tool",
|
|
970
|
+
task: SESSION_ID,
|
|
971
|
+
message: `⚠ recovered ${res.salvaged} malformed tool call(s) (server tool-call parser mismatch)`,
|
|
972
|
+
...(enableThinking === undefined ? {} : { thinking: enableThinking }),
|
|
973
|
+
});
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
if (res.content.length > 0) {
|
|
977
|
+
report({ kind: "message", task: SESSION_ID, message: res.content });
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
return res;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
/**
|
|
984
|
+
* Decide what a turn that ended with NO tool calls (and no edits yet this send)
|
|
985
|
+
* means. A plain answer — no gate, or a conversational reply — is `responded`.
|
|
986
|
+
* But with a gate set and the reply DUMPING whole files as prose (instead of
|
|
987
|
+
* calling `create`), that's the narrate-instead-of-build failure: the content
|
|
988
|
+
* never reaches disk. We nudge it to act (`result: null`, capped); past the cap
|
|
989
|
+
* we stop honestly rather than loop forever. Side effects (the nudge message,
|
|
990
|
+
* the stuck report) happen here; the caller only emits timing and loops/returns.
|
|
991
|
+
*/
|
|
992
|
+
private resolveNoEditYield(
|
|
993
|
+
content: string,
|
|
994
|
+
turn: number,
|
|
995
|
+
buildNudges: number
|
|
996
|
+
): { result: ISendResult | null } {
|
|
997
|
+
// Plan mode is read-only — a fenced-snippet-heavy PLAN is the desired
|
|
998
|
+
// output, not a narrate-instead-of-build failure; never nudge it to build.
|
|
999
|
+
if (this.planMode) {
|
|
1000
|
+
return { result: { status: "responded", turns: turn } };
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
// Leaked tool markup = the model TRIED to act but the call never parsed
|
|
1004
|
+
// (and salvage couldn't rescue it). Without this nudge the turn ends as a
|
|
1005
|
+
// fake "responded" and the build silently strands (captured live: a
|
|
1006
|
+
// scaffold_web emitted as text). The retry is a FORCED tool call, which is
|
|
1007
|
+
// grammar-constrained — so it always parses.
|
|
1008
|
+
const leaked = this.hasGate && leaksToolMarkup(content);
|
|
1009
|
+
|
|
1010
|
+
if (!leaked && (!this.hasGate || !looksLikeCodeDump(content))) {
|
|
1011
|
+
return { result: { status: "responded", turns: turn } };
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
if (buildNudges >= LOOP_LIMITS.maxBuildNudges) {
|
|
1015
|
+
this.report({
|
|
1016
|
+
kind: "stuck",
|
|
1017
|
+
task: SESSION_ID,
|
|
1018
|
+
message: leaked
|
|
1019
|
+
? "⚠ model kept emitting malformed tool-call text instead of real " +
|
|
1020
|
+
"calls — stopped. See malformed-toolcall-format (server parser)."
|
|
1021
|
+
: "⚠ model kept writing files as chat messages instead of creating " +
|
|
1022
|
+
"them — stopped. Try a smaller step (e.g. one file at a time).",
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
return { result: { status: "stuck", turns: turn } };
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
this.report({
|
|
1029
|
+
kind: "tool",
|
|
1030
|
+
task: SESSION_ID,
|
|
1031
|
+
message: leaked
|
|
1032
|
+
? "↳ malformed tool-call text (no tool ran) — forcing a real call"
|
|
1033
|
+
: "↳ no files written — nudging the model to build with tools",
|
|
1034
|
+
});
|
|
1035
|
+
this.ctx.messages.push({
|
|
1036
|
+
role: "user",
|
|
1037
|
+
content: leaked ? MALFORMED_CALL_NUDGE : BUILD_NUDGE,
|
|
1038
|
+
});
|
|
1039
|
+
|
|
1040
|
+
return { result: null };
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
/** Handle a repetition-loop detection: stop (return a stuck result) once the
|
|
1044
|
+
* recovery budget is spent, else re-steer toward one concrete action and
|
|
1045
|
+
* return null so the caller forces a tool call next turn. */
|
|
1046
|
+
private degenerationRecovery(
|
|
1047
|
+
degenerations: number,
|
|
1048
|
+
turn: number
|
|
1049
|
+
): ISendResult | null {
|
|
1050
|
+
if (degenerations >= MAX_DEGENERATION_RECOVERIES) {
|
|
1051
|
+
this.report({
|
|
1052
|
+
kind: "stuck",
|
|
1053
|
+
task: SESSION_ID,
|
|
1054
|
+
message:
|
|
1055
|
+
"⚠ repetition loop persisted after recovery attempts — stopped. Try a smaller step.",
|
|
1056
|
+
});
|
|
1057
|
+
|
|
1058
|
+
return { status: "stuck", turns: turn };
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
this.report({
|
|
1062
|
+
kind: "tool",
|
|
1063
|
+
task: SESSION_ID,
|
|
1064
|
+
message: "⚠ repetition loop — forcing a concrete next action",
|
|
1065
|
+
});
|
|
1066
|
+
this.ctx.messages.push({ role: "user", content: REPETITION_RESTEER });
|
|
1067
|
+
|
|
1068
|
+
return null;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
/** Handle a thrown model call: rethrow a caller abort or any non-timeout error
|
|
1072
|
+
* (terminal — send()'s handler turns it into interrupted/stuck). A request
|
|
1073
|
+
* TIMEOUT is recoverable: emit timing, then stop (return stuck) once the budget
|
|
1074
|
+
* is spent, else re-steer toward a small fast turn and return null so the caller
|
|
1075
|
+
* forces a (thinking-off) tool call and CONTINUES — preserving the turns already
|
|
1076
|
+
* done rather than abandoning the whole build on one over-long turn. */
|
|
1077
|
+
private recoverFromTimeout(
|
|
1078
|
+
err: unknown,
|
|
1079
|
+
timeouts: number,
|
|
1080
|
+
turn: number,
|
|
1081
|
+
turnStart: number,
|
|
1082
|
+
sendStart: number,
|
|
1083
|
+
signal?: AbortSignal
|
|
1084
|
+
): ISendResult | null {
|
|
1085
|
+
if (signal?.aborted === true || !isModelTimeout(err)) {
|
|
1086
|
+
throw err;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
emitTiming(this.report, SESSION_ID, turn, turnStart, sendStart);
|
|
1090
|
+
|
|
1091
|
+
// Log the RAW error so the timeout's true source (request-timeout ceiling vs a
|
|
1092
|
+
// server-side stream close) is diagnosable from the --log, not swallowed.
|
|
1093
|
+
const detail =
|
|
1094
|
+
err instanceof Error ? `${err.name}: ${err.message}` : String(err);
|
|
1095
|
+
|
|
1096
|
+
if (timeouts >= MAX_TIMEOUT_RECOVERIES) {
|
|
1097
|
+
this.report({
|
|
1098
|
+
kind: "stuck",
|
|
1099
|
+
task: SESSION_ID,
|
|
1100
|
+
message: `⚠ model request timed out repeatedly (${detail}) — stopped. The server may be wedged or the task too large for one turn.`,
|
|
1101
|
+
});
|
|
1102
|
+
|
|
1103
|
+
return { status: "stuck", turns: turn };
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
this.report({
|
|
1107
|
+
kind: "tool",
|
|
1108
|
+
task: SESSION_ID,
|
|
1109
|
+
message: `⚠ model request timed out (${detail}) — re-steering to a smaller turn and continuing (${String(timeouts + 1)}/${String(MAX_TIMEOUT_RECOVERIES)})`,
|
|
1110
|
+
});
|
|
1111
|
+
this.ctx.messages.push({ role: "user", content: TIMEOUT_RESTEER });
|
|
1112
|
+
|
|
1113
|
+
return null;
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
/** Inject any messages the user typed mid-run (steering) before the next turn. */
|
|
1117
|
+
private injectSteer(steer?: () => string[]): void {
|
|
1118
|
+
for (const message of steer?.() ?? []) {
|
|
1119
|
+
this.ctx.messages.push({ role: "user", content: message });
|
|
1120
|
+
this.report({
|
|
1121
|
+
kind: "tool",
|
|
1122
|
+
task: SESSION_ID,
|
|
1123
|
+
message: `↳ steering: ${message.slice(0, 60)}`,
|
|
1124
|
+
});
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
/** One model turn for `drive`, with timeout recovery folded in so the loop body
|
|
1129
|
+
* stays lean: `ok` → use the response; `stop` → terminal result; `retry` →
|
|
1130
|
+
* timed out, re-steer applied, force a small tool call next turn. A caller abort
|
|
1131
|
+
* or non-timeout error propagates (via recoverFromTimeout) to send()'s handler. */
|
|
1132
|
+
private async acquireResponse(
|
|
1133
|
+
forceTool: boolean,
|
|
1134
|
+
timeouts: number,
|
|
1135
|
+
turn: number,
|
|
1136
|
+
turnStart: number,
|
|
1137
|
+
sendStart: number,
|
|
1138
|
+
opts: ISendOptions
|
|
1139
|
+
): Promise<
|
|
1140
|
+
| { kind: "ok"; res: IModelResponse }
|
|
1141
|
+
| { kind: "stop"; result: ISendResult }
|
|
1142
|
+
| { kind: "retry" }
|
|
1143
|
+
> {
|
|
1144
|
+
try {
|
|
1145
|
+
// FORCED-TOOLS experiment: gated, non-plan turns are ALWAYS grammar-
|
|
1146
|
+
// constrained (the model stops via yield_status), so malformed tool text
|
|
1147
|
+
// can't occur. A recovery force additionally disables thinking.
|
|
1148
|
+
const required =
|
|
1149
|
+
forceTool || (this.forceTools && this.hasGate && !this.planMode);
|
|
1150
|
+
const res = await this.askModel(
|
|
1151
|
+
opts.signal,
|
|
1152
|
+
required ? "required" : "auto",
|
|
1153
|
+
forceTool // forced tool turn → also disable thinking for a clean call
|
|
1154
|
+
);
|
|
1155
|
+
|
|
1156
|
+
return { kind: "ok", res };
|
|
1157
|
+
} catch (err) {
|
|
1158
|
+
const recovered = this.recoverFromTimeout(
|
|
1159
|
+
err,
|
|
1160
|
+
timeouts,
|
|
1161
|
+
turn,
|
|
1162
|
+
turnStart,
|
|
1163
|
+
sendStart,
|
|
1164
|
+
opts.signal
|
|
1165
|
+
);
|
|
1166
|
+
|
|
1167
|
+
return recovered !== null
|
|
1168
|
+
? { kind: "stop", result: recovered }
|
|
1169
|
+
: { kind: "retry" };
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
/** Run the tool calls of a turn, account the edits, emit timing, and run the
|
|
1174
|
+
* incremental check every few edits — returns the updated edit accounting so
|
|
1175
|
+
* `drive`'s loop body stays lean. */
|
|
1176
|
+
private async runEditTurn(
|
|
1177
|
+
res: IModelResponse,
|
|
1178
|
+
acc: { edited: boolean; editsSinceCheck: number; checkEvery: number },
|
|
1179
|
+
turn: number,
|
|
1180
|
+
turnStart: number,
|
|
1181
|
+
sendStart: number
|
|
1182
|
+
): Promise<{ edited: boolean; editsSinceCheck: number }> {
|
|
1183
|
+
const { ctx, state, report } = this;
|
|
1184
|
+
const before = state.edits;
|
|
1185
|
+
const edited =
|
|
1186
|
+
(await runToolCalls(res.toolCalls, ctx, state)) || acc.edited;
|
|
1187
|
+
|
|
1188
|
+
emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
|
|
1189
|
+
|
|
1190
|
+
// Check every few edits WHILE building, so errors surface early instead of
|
|
1191
|
+
// piling up into a final avalanche the model can't dig out of.
|
|
1192
|
+
const editsSinceCheck = await this.checkAfterEdits(
|
|
1193
|
+
acc.editsSinceCheck + (state.edits - before),
|
|
1194
|
+
acc.checkEvery
|
|
1195
|
+
);
|
|
1196
|
+
|
|
1197
|
+
return { edited, editsSinceCheck };
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
/** Run the gate once the model has stopped after editing: a terminal result
|
|
1201
|
+
* (done/stuck) or null when still red (drive then pushes feedback + continues).
|
|
1202
|
+
* Keeps the done/stuck mapping out of `drive`'s loop body. */
|
|
1203
|
+
private async settleTurn(
|
|
1204
|
+
turn: number,
|
|
1205
|
+
turnStart: number,
|
|
1206
|
+
sendStart: number
|
|
1207
|
+
): Promise<ISendResult | null> {
|
|
1208
|
+
const settled = await settleGate(this.ctx, this.state, turn);
|
|
1209
|
+
|
|
1210
|
+
emitTiming(this.report, SESSION_ID, turn, turnStart, sendStart);
|
|
1211
|
+
|
|
1212
|
+
if (settled === null) {
|
|
1213
|
+
return null;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
return {
|
|
1217
|
+
status: settled.status === RUN_STATUS.done ? "done" : "stuck",
|
|
1218
|
+
turns: turn,
|
|
1219
|
+
};
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
/** FORCED-TOOLS mode: convert `yield_status` calls back into a normal "model
|
|
1223
|
+
* stopped" turn — ack each call (so no tool_call dangles on the wire), strip
|
|
1224
|
+
* them from the response, and promote the summary to the reply content. The
|
|
1225
|
+
* existing no-tool-call paths (gate confirm / responded) then apply unchanged.
|
|
1226
|
+
* A yield alongside REAL calls is dropped here and answered by its dispatch
|
|
1227
|
+
* stub ("finish the work, then yield alone") — the work runs, the model
|
|
1228
|
+
* yields properly next turn. */
|
|
1229
|
+
private resolveYieldCalls(res: IModelResponse): void {
|
|
1230
|
+
const yields = res.toolCalls.filter(
|
|
1231
|
+
(c) => c.name === TOOL_NAME.yieldStatus
|
|
1232
|
+
);
|
|
1233
|
+
|
|
1234
|
+
if (yields.length === 0) {
|
|
1235
|
+
return;
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
const others = res.toolCalls.filter(
|
|
1239
|
+
(c) => c.name !== TOOL_NAME.yieldStatus
|
|
1240
|
+
);
|
|
1241
|
+
|
|
1242
|
+
if (others.length > 0) {
|
|
1243
|
+
return; // mixed turn: let dispatch run everything (stub answers the yield)
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
for (const y of yields) {
|
|
1247
|
+
this.ctx.messages.push({
|
|
1248
|
+
role: "tool",
|
|
1249
|
+
toolCallId: y.id ?? "",
|
|
1250
|
+
content: "(turn ended)",
|
|
1251
|
+
});
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
res.toolCalls = [];
|
|
1255
|
+
|
|
1256
|
+
const summary = yields[0]?.arguments.summary;
|
|
1257
|
+
|
|
1258
|
+
if (res.content.length === 0 && typeof summary === "string") {
|
|
1259
|
+
res.content = summary;
|
|
1260
|
+
this.report({ kind: "message", task: SESSION_ID, message: summary });
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
private async drive(
|
|
1265
|
+
maxTurns: number,
|
|
1266
|
+
sendStart: number,
|
|
1267
|
+
opts: ISendOptions
|
|
1268
|
+
): Promise<ISendResult> {
|
|
1269
|
+
const { ctx, report } = this;
|
|
1270
|
+
// The gate confirms CHANGES, not answers: it fires only once the model has
|
|
1271
|
+
// actually edited a file this turn. So a pure question never triggers a gate
|
|
1272
|
+
// run (even with one configured) — and an auto-detected gate stays unobtrusive.
|
|
1273
|
+
let edited = false;
|
|
1274
|
+
// How many times this send the model dumped file contents as a chat message
|
|
1275
|
+
// instead of calling `create` (the narrate-instead-of-build failure).
|
|
1276
|
+
let buildNudges = 0;
|
|
1277
|
+
// Set after we nudge a narrating model: on the NEXT turn we FORCE a tool call
|
|
1278
|
+
// (tool_choice "required") instead of "auto". vLLM's required path follows the
|
|
1279
|
+
// tool schema strictly — so the model can't narrate (or emit malformed tool
|
|
1280
|
+
// syntax) again on a turn where we already know a tool call is the move.
|
|
1281
|
+
let forceTool = false;
|
|
1282
|
+
// Times the stream degenerated into a repetition loop this send — we try a
|
|
1283
|
+
// bounded recovery (force a concrete tool call) before giving up.
|
|
1284
|
+
let degenerations = 0;
|
|
1285
|
+
// Times a model request timed out this send — a single over-long turn must not
|
|
1286
|
+
// throw away prior progress; we re-steer to a small turn and continue.
|
|
1287
|
+
let timeouts = 0;
|
|
1288
|
+
// Edits since the last incremental check — drives "check every few edits".
|
|
1289
|
+
let editsSinceCheck = 0;
|
|
1290
|
+
const checkEvery = this.cfg.checkEvery ?? CHECK_EVERY;
|
|
1291
|
+
|
|
1292
|
+
for (let turn = 1; turn <= maxTurns; turn += 1) {
|
|
1293
|
+
const turnStart = performance.now();
|
|
1294
|
+
|
|
1295
|
+
// Inject any messages the user typed while the run was in flight, so they
|
|
1296
|
+
// steer the next model turn instead of waiting for the run to finish.
|
|
1297
|
+
this.injectSteer(opts.steer);
|
|
1298
|
+
|
|
1299
|
+
report({
|
|
1300
|
+
kind: "cycle",
|
|
1301
|
+
task: SESSION_ID,
|
|
1302
|
+
cycle: turn,
|
|
1303
|
+
message: `turn ${turn}: asking model`,
|
|
1304
|
+
});
|
|
1305
|
+
|
|
1306
|
+
// Ask the model, recovering from a request timeout (re-steer + continue,
|
|
1307
|
+
// keeping prior turns) instead of abandoning the whole build on one over-long
|
|
1308
|
+
// turn. A caller abort or any other error propagates to send()'s handler.
|
|
1309
|
+
const ask = await this.acquireResponse(
|
|
1310
|
+
forceTool,
|
|
1311
|
+
timeouts,
|
|
1312
|
+
turn,
|
|
1313
|
+
turnStart,
|
|
1314
|
+
sendStart,
|
|
1315
|
+
opts
|
|
1316
|
+
);
|
|
1317
|
+
|
|
1318
|
+
if (ask.kind === "stop") {
|
|
1319
|
+
return ask.result;
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
if (ask.kind === "retry") {
|
|
1323
|
+
timeouts += 1;
|
|
1324
|
+
forceTool = true; // next turn: forced, thinking-off → a small clean call
|
|
1325
|
+
|
|
1326
|
+
continue;
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
const res = ask.res;
|
|
1330
|
+
|
|
1331
|
+
forceTool = false;
|
|
1332
|
+
|
|
1333
|
+
// The stream caught a degenerate repetition loop. Try a BOUNDED recovery
|
|
1334
|
+
// (force a concrete tool call next turn — can't loop in prose) before
|
|
1335
|
+
// giving up; see degenerationRecovery.
|
|
1336
|
+
if (res.degenerated === true) {
|
|
1337
|
+
const stop = this.degenerationRecovery(degenerations, turn);
|
|
1338
|
+
|
|
1339
|
+
emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
|
|
1340
|
+
|
|
1341
|
+
if (stop !== null) {
|
|
1342
|
+
return stop;
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
degenerations += 1;
|
|
1346
|
+
forceTool = true;
|
|
1347
|
+
|
|
1348
|
+
continue;
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
// FORCED-TOOLS: a lone yield_status call becomes a normal stop.
|
|
1352
|
+
this.resolveYieldCalls(res);
|
|
1353
|
+
|
|
1354
|
+
// Still working — run the calls and keep going (we gate only when it stops).
|
|
1355
|
+
if (res.toolCalls.length > 0) {
|
|
1356
|
+
({ edited, editsSinceCheck } = await this.runEditTurn(
|
|
1357
|
+
res,
|
|
1358
|
+
{ edited, editsSinceCheck, checkEvery },
|
|
1359
|
+
turn,
|
|
1360
|
+
turnStart,
|
|
1361
|
+
sendStart
|
|
1362
|
+
));
|
|
1363
|
+
|
|
1364
|
+
continue;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// The model yielded with no tool calls. With no gate it's a conversational
|
|
1368
|
+
// reply; with a gate but no edits this send, decide whether that's a real
|
|
1369
|
+
// answer or the narrate-instead-of-build failure (see resolveNoEditYield).
|
|
1370
|
+
if (!this.hasGate || !edited) {
|
|
1371
|
+
const outcome = this.resolveNoEditYield(res.content, turn, buildNudges);
|
|
1372
|
+
|
|
1373
|
+
emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
|
|
1374
|
+
|
|
1375
|
+
if (outcome.result !== null) {
|
|
1376
|
+
return outcome.result;
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
buildNudges += 1;
|
|
1380
|
+
forceTool = true; // it just narrated code — force a tool call next turn
|
|
1381
|
+
|
|
1382
|
+
continue;
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
// Gate confirms. Green/stuck ⇒ terminal; null ⇒ red, feedback pushed.
|
|
1386
|
+
const settled = await this.settleTurn(turn, turnStart, sendStart);
|
|
1387
|
+
|
|
1388
|
+
if (settled !== null) {
|
|
1389
|
+
return settled;
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
// Gate came back RED → enter repair mode (think to converge on the fix).
|
|
1393
|
+
this.repairing = true;
|
|
1394
|
+
|
|
1395
|
+
// Stopped while still red without acting → nudge it to act, not narrate,
|
|
1396
|
+
// and FORCE a tool call on the next turn so it can't narrate again.
|
|
1397
|
+
ctx.messages.push({ role: "user", content: NO_TOOL_CALL_NUDGE });
|
|
1398
|
+
forceTool = true;
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
report({
|
|
1402
|
+
kind: "stuck",
|
|
1403
|
+
task: SESSION_ID,
|
|
1404
|
+
cycles: maxTurns,
|
|
1405
|
+
message: `stuck (hit ${maxTurns}-turn cap)`,
|
|
1406
|
+
});
|
|
1407
|
+
|
|
1408
|
+
return { status: "stuck", turns: maxTurns };
|
|
1409
|
+
}
|
|
1410
|
+
}
|