@agjs/tsforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/bin/tsforge.js +2 -0
  2. package/package.json +35 -0
  3. package/src/agent/agent.constants.ts +382 -0
  4. package/src/agent/agent.types.ts +34 -0
  5. package/src/agent/index.ts +4 -0
  6. package/src/agent/model-agent.ts +297 -0
  7. package/src/agent/tool-repair.ts +194 -0
  8. package/src/agent/tools.ts +190 -0
  9. package/src/browser/checks.ts +96 -0
  10. package/src/browser/index.ts +8 -0
  11. package/src/browser/oracle.ts +303 -0
  12. package/src/classify.ts +48 -0
  13. package/src/cli.ts +1333 -0
  14. package/src/config/config.constants.ts +9 -0
  15. package/src/config/flags.ts +32 -0
  16. package/src/config/index.ts +8 -0
  17. package/src/config/tsforge-config.ts +301 -0
  18. package/src/constitution/baseline.ts +257 -0
  19. package/src/detect-gate.ts +498 -0
  20. package/src/eval/eval.types.ts +36 -0
  21. package/src/eval/index.ts +3 -0
  22. package/src/eval/judge.ts +62 -0
  23. package/src/eval/score.ts +39 -0
  24. package/src/files/create.ts +22 -0
  25. package/src/files/edit.ts +193 -0
  26. package/src/files/files.constants.ts +11 -0
  27. package/src/files/files.types.ts +81 -0
  28. package/src/files/hashline-format.ts +110 -0
  29. package/src/files/hashline.ts +689 -0
  30. package/src/files/index.ts +19 -0
  31. package/src/index.ts +8 -0
  32. package/src/inference/index.ts +6 -0
  33. package/src/inference/inference.constants.ts +34 -0
  34. package/src/inference/inference.types.ts +123 -0
  35. package/src/inference/openai-compatible.ts +113 -0
  36. package/src/inference/stream-guard.ts +161 -0
  37. package/src/inference/stream.ts +370 -0
  38. package/src/inference/transport.ts +78 -0
  39. package/src/inference/wire.ts +0 -0
  40. package/src/lib/fs/fs.ts +126 -0
  41. package/src/lib/fs/fs.types.ts +5 -0
  42. package/src/lib/fs/index.ts +3 -0
  43. package/src/lib/fs/process.ts +146 -0
  44. package/src/lib/guards/guards.ts +9 -0
  45. package/src/lib/guards/index.ts +1 -0
  46. package/src/lib/json/index.ts +1 -0
  47. package/src/lib/json/json.ts +12 -0
  48. package/src/lib/scope/index.ts +2 -0
  49. package/src/lib/scope/scope.constants.ts +3 -0
  50. package/src/lib/scope/scope.ts +40 -0
  51. package/src/loop/astgrep-fix.ts +228 -0
  52. package/src/loop/feedback/feedback.ts +138 -0
  53. package/src/loop/feedback/index.ts +8 -0
  54. package/src/loop/feedback/meta-rule-docs.ts +41 -0
  55. package/src/loop/feedback/meta-rule-feedback.ts +61 -0
  56. package/src/loop/feedback/rule-docs.generated.json +112 -0
  57. package/src/loop/feedback/rule-docs.ts +342 -0
  58. package/src/loop/index.ts +19 -0
  59. package/src/loop/loop.constants.ts +68 -0
  60. package/src/loop/loop.types.ts +99 -0
  61. package/src/loop/prompt/index.ts +2 -0
  62. package/src/loop/prompt/project-map.ts +69 -0
  63. package/src/loop/prompt/prompt.ts +107 -0
  64. package/src/loop/quality.ts +174 -0
  65. package/src/loop/rule-docs.generated.json +367 -0
  66. package/src/loop/run-spec.ts +88 -0
  67. package/src/loop/run.ts +400 -0
  68. package/src/loop/session.ts +1410 -0
  69. package/src/loop/tools/add-dependency.ts +71 -0
  70. package/src/loop/tools/condense.ts +498 -0
  71. package/src/loop/tools/edit-hashline.ts +80 -0
  72. package/src/loop/tools/execute-tool.ts +80 -0
  73. package/src/loop/tools/file-ops.ts +323 -0
  74. package/src/loop/tools/index.ts +2 -0
  75. package/src/loop/tools/lsp-ops.ts +222 -0
  76. package/src/loop/tools/scaffold-routes.ts +68 -0
  77. package/src/loop/tools/scaffold-ui.ts +62 -0
  78. package/src/loop/tools/scaffold-web.ts +35 -0
  79. package/src/loop/tools/tool-context.ts +126 -0
  80. package/src/loop/ttsr-defaults.ts +53 -0
  81. package/src/loop/ttsr.ts +322 -0
  82. package/src/loop/turn.ts +856 -0
  83. package/src/lsp/index.ts +2 -0
  84. package/src/lsp/lsp.types.ts +56 -0
  85. package/src/lsp/service.ts +500 -0
  86. package/src/meta-rules/context.ts +195 -0
  87. package/src/meta-rules/index.ts +9 -0
  88. package/src/meta-rules/meta-rules.types.ts +47 -0
  89. package/src/meta-rules/parsers/package-json-parser.ts +51 -0
  90. package/src/meta-rules/registry.ts +37 -0
  91. package/src/meta-rules/rules/ci/workflow-actions-pinned.ts +59 -0
  92. package/src/meta-rules/rules/ci/workflow-runner-pinned.ts +57 -0
  93. package/src/meta-rules/rules/ci/workflow-timeout-required.ts +114 -0
  94. package/src/meta-rules/rules/config/tsconfig-paths-exist.ts +117 -0
  95. package/src/meta-rules/rules/config/tsconfig-strict.ts +91 -0
  96. package/src/meta-rules/rules/source-text/no-eslint-disable-comments.ts +34 -0
  97. package/src/meta-rules/rules/source-text/no-ts-suppressions.ts +38 -0
  98. package/src/meta-rules/rules/supply-chain/no-overlapping-libs.ts +57 -0
  99. package/src/meta-rules/rules/supply-chain/package-exact-deps.ts +55 -0
  100. package/src/meta-rules/rules/testing/test-sibling-required.ts +110 -0
  101. package/src/meta-rules/runner.ts +64 -0
  102. package/src/models-config.ts +196 -0
  103. package/src/render/ansi.ts +289 -0
  104. package/src/render/banner.ts +113 -0
  105. package/src/render/box.ts +134 -0
  106. package/src/render/index.ts +7 -0
  107. package/src/render/markdown.ts +123 -0
  108. package/src/render/render.types.ts +21 -0
  109. package/src/render/stream-markdown.ts +128 -0
  110. package/src/render/style.ts +26 -0
  111. package/src/rule-packs/bullmq/index.ts +39 -0
  112. package/src/rule-packs/bullmq/rules/index.ts +7 -0
  113. package/src/rule-packs/bullmq/rules/job-name-must-be-constant.ts +141 -0
  114. package/src/rule-packs/bullmq/rules/job-options-must-set-attempts.ts +174 -0
  115. package/src/rule-packs/bullmq/rules/no-blocking-concurrency-zero.ts +103 -0
  116. package/src/rule-packs/bullmq/rules/queue-options-must-set-removeoncomplete.ts +130 -0
  117. package/src/rule-packs/bullmq/rules/queue-options-must-set-removeonfail.ts +130 -0
  118. package/src/rule-packs/bullmq/rules/worker-must-implement-close.ts +182 -0
  119. package/src/rule-packs/bullmq/rules/worker-must-listen-failed.ts +140 -0
  120. package/src/rule-packs/bullmq/utils.ts +334 -0
  121. package/src/rule-packs/code-flow/index.ts +25 -0
  122. package/src/rule-packs/code-flow/rules/index.ts +3 -0
  123. package/src/rule-packs/code-flow/rules/no-bare-date-now.ts +138 -0
  124. package/src/rule-packs/code-flow/rules/no-template-trim-empty-ternary.ts +87 -0
  125. package/src/rule-packs/code-flow/rules/prefer-early-return.ts +80 -0
  126. package/src/rule-packs/code-flow/utils/prefer-early-return.ts +132 -0
  127. package/src/rule-packs/comment-hygiene/index.ts +25 -0
  128. package/src/rule-packs/comment-hygiene/rules/index.ts +3 -0
  129. package/src/rule-packs/comment-hygiene/rules/no-historical-comments.ts +102 -0
  130. package/src/rule-packs/comment-hygiene/rules/no-narration-comments.ts +83 -0
  131. package/src/rule-packs/comment-hygiene/rules/no-pr-reference-comments.ts +90 -0
  132. package/src/rule-packs/create-rule.ts +9 -0
  133. package/src/rule-packs/drizzle/index.ts +41 -0
  134. package/src/rule-packs/drizzle/rules/account-scoped-tables-require-where.ts +371 -0
  135. package/src/rule-packs/drizzle/rules/index.ts +8 -0
  136. package/src/rule-packs/drizzle/rules/no-nested-db-transaction.ts +127 -0
  137. package/src/rule-packs/drizzle/rules/no-raw-sql-outside-allowlist.ts +100 -0
  138. package/src/rule-packs/drizzle/rules/relations-must-cover-fks.ts +209 -0
  139. package/src/rule-packs/drizzle/rules/schema-files-must-not-import-driver.ts +127 -0
  140. package/src/rule-packs/drizzle/rules/schema-files-must-only-export-schema.ts +149 -0
  141. package/src/rule-packs/drizzle/rules/tables-must-have-timestamps.ts +312 -0
  142. package/src/rule-packs/drizzle/rules/timestamp-must-specify-mode.ts +166 -0
  143. package/src/rule-packs/drizzle/utils.ts +115 -0
  144. package/src/rule-packs/elysia/index.ts +43 -0
  145. package/src/rule-packs/elysia/rules/consistent-status-via-set.ts +69 -0
  146. package/src/rule-packs/elysia/rules/no-decorate-state-collision.ts +276 -0
  147. package/src/rule-packs/elysia/rules/no-separate-model-interfaces.ts +144 -0
  148. package/src/rule-packs/elysia/rules/prefer-destructured-context.ts +155 -0
  149. package/src/rule-packs/elysia/rules/prefer-direct-return.ts +176 -0
  150. package/src/rule-packs/elysia/rules/prefer-static-services.ts +159 -0
  151. package/src/rule-packs/elysia/rules/prefer-throw-status.ts +151 -0
  152. package/src/rule-packs/elysia/rules/require-hooks-before-routes.ts +209 -0
  153. package/src/rule-packs/elysia/rules/require-plugin-name.ts +107 -0
  154. package/src/rule-packs/elysia/utils/elysiaChain.ts +306 -0
  155. package/src/rule-packs/env-access/index.ts +23 -0
  156. package/src/rule-packs/env-access/rules/index.ts +2 -0
  157. package/src/rule-packs/env-access/rules/no-direct-process-env.ts +133 -0
  158. package/src/rule-packs/env-access/rules/no-process-exit.ts +95 -0
  159. package/src/rule-packs/i18n-keys/index.ts +19 -0
  160. package/src/rule-packs/i18n-keys/rules/static-translation-key-exists.ts +173 -0
  161. package/src/rule-packs/index.ts +139 -0
  162. package/src/rule-packs/jwt-cookies/index.ts +25 -0
  163. package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-httponly.ts +150 -0
  164. package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-secure-in-prod.ts +149 -0
  165. package/src/rule-packs/jwt-cookies/rules/bcrypt-rounds-min.ts +195 -0
  166. package/src/rule-packs/jwt-cookies/utils.ts +188 -0
  167. package/src/rule-packs/oauth-security/index.ts +25 -0
  168. package/src/rule-packs/oauth-security/rules/pkce-required-for-oidc.ts +296 -0
  169. package/src/rule-packs/oauth-security/rules/state-must-be-redis-backed.ts +193 -0
  170. package/src/rule-packs/oauth-security/rules/state-ttl-bounded.ts +219 -0
  171. package/src/rule-packs/oauth-security/utils.ts +127 -0
  172. package/src/rule-packs/react-component-architecture/index.ts +35 -0
  173. package/src/rule-packs/react-component-architecture/rules/component-folder-structure.ts +123 -0
  174. package/src/rule-packs/react-component-architecture/rules/forwardref-display-name.ts +93 -0
  175. package/src/rule-packs/react-component-architecture/rules/index-must-reexport-default.ts +123 -0
  176. package/src/rule-packs/react-component-architecture/rules/max-hooks-per-file.ts +122 -0
  177. package/src/rule-packs/react-component-architecture/rules/no-cross-feature-imports.ts +170 -0
  178. package/src/rule-packs/react-component-architecture/rules/no-inline-jsx-functions.ts +66 -0
  179. package/src/rule-packs/react-component-architecture/utils.ts +47 -0
  180. package/src/rule-packs/rule-packs.types.ts +18 -0
  181. package/src/rule-packs/structured-logging/index.ts +26 -0
  182. package/src/rule-packs/structured-logging/rules/mask-pii-fields.ts +221 -0
  183. package/src/rule-packs/structured-logging/rules/no-error-stringify.ts +217 -0
  184. package/src/rule-packs/structured-logging/rules/require-event-field.ts +136 -0
  185. package/src/rule-packs/structured-logging/utils/logger.ts +104 -0
  186. package/src/rule-packs/tanstack-query/index.ts +20 -0
  187. package/src/rule-packs/tanstack-query/rules/prefix-query-key-must-use-set-queries-data.ts +321 -0
  188. package/src/rule-packs/test-conventions/index.ts +23 -0
  189. package/src/rule-packs/test-conventions/rules/index.ts +2 -0
  190. package/src/rule-packs/test-conventions/rules/no-focused-tests.ts +170 -0
  191. package/src/rule-packs/test-conventions/rules/test-file-mirrors-source.ts +127 -0
  192. package/src/rule-packs/utils.ts +142 -0
  193. package/src/session-store.ts +359 -0
  194. package/src/spec/generate-tests.ts +213 -0
  195. package/src/spec/index.ts +5 -0
  196. package/src/spec/parse.ts +152 -0
  197. package/src/spec/review-tests.ts +162 -0
  198. package/src/spec/spec.constants.ts +13 -0
  199. package/src/spec/spec.types.ts +79 -0
  200. package/src/stack-detection/detect.ts +246 -0
  201. package/src/stack-detection/index.ts +3 -0
  202. package/src/stack-detection/packs.ts +174 -0
  203. package/src/stack-detection/stack-detection.types.ts +47 -0
  204. package/src/validate/accept.ts +49 -0
  205. package/src/validate/errors.ts +35 -0
  206. package/src/validate/index.ts +12 -0
  207. package/src/validate/parse.ts +148 -0
  208. package/src/validate/run-tests.ts +59 -0
  209. package/src/validate/validate.ts +40 -0
  210. package/src/validate/validate.types.ts +52 -0
  211. package/src/web-components.ts +638 -0
  212. package/src/web-coverage.ts +89 -0
  213. package/src/web-routes.ts +151 -0
  214. package/src/web-templates.ts +1011 -0
  215. package/strict.eslint.config.mjs +84 -0
  216. package/strict.web.eslint.config.mjs +185 -0
@@ -0,0 +1,1410 @@
1
+ import type {
2
+ IChatMessage,
3
+ IModelResponse,
4
+ IProvider,
5
+ ITokenUsage,
6
+ } from "../inference";
7
+ import type { ITask } from "../spec";
8
+ import type { FileLinter } from "../detect-gate";
9
+ import {
10
+ SCAFFOLD_UI_TOOL,
11
+ SCAFFOLD_ROUTES_TOOL,
12
+ SCAFFOLD_WEB_TOOL,
13
+ SEARCH_TOOL,
14
+ ADD_DEPENDENCY_TOOL,
15
+ YIELD_STATUS_TOOL,
16
+ READ_ONLY_TOOL_NAMES,
17
+ TOOL_NAME,
18
+ } from "../agent";
19
+ import { flags } from "../config";
20
+ import { readFiles } from "../lib/fs";
21
+ import { validate, type ErrorParser } from "../validate";
22
+ import { detectStack } from "../stack-detection";
23
+ import {
24
+ loadTsforgeConfig,
25
+ normalizeRuleOverrides,
26
+ resolveActivePacks,
27
+ } from "../config/tsforge-config";
28
+ import { LOOP_LIMITS, RUN_STATUS } from "./loop.constants";
29
+ import type { Reporter } from "./loop.types";
30
+ import { CHAT_SYSTEM, COMPACT_SYSTEM } from "./prompt";
31
+ import {
32
+ buildTsService,
33
+ BUILD_NUDGE,
34
+ emitTiming,
35
+ type ILoopCtx,
36
+ type ILoopState,
37
+ isPhantomRouteError,
38
+ NO_TOOL_CALL_NUDGE,
39
+ runToolCalls,
40
+ settleGate,
41
+ toolsFor,
42
+ } from "./turn";
43
+
44
+ /**
45
+ * A persistent, tool-using conversation against a working directory — the engine
46
+ * behind the interactive CLI. Unlike `runTask` (one RED-first task driven to
47
+ * green and returned), a Session lives across many user messages: each `send()`
48
+ * runs the model until it stops calling tools, then — IF a gate is configured —
49
+ * the deterministic gate confirms "done" (green = accept, red = errors fed back,
50
+ * keep going). With no gate it's a plain conversational turn. Same `turn.ts`
51
+ * primitives as `runTask`, so there is one tool-loop and one gate, not two.
52
+ */
53
+ export interface ISessionConfig {
54
+ provider: IProvider;
55
+ /** Working directory the agent operates in. */
56
+ cwd: string;
57
+ /** Editable scope — edits/creates outside these are rejected. Empty = read-only. */
58
+ files?: string[];
59
+ /** Gate command. When set, a turn that ends without tool calls is gate-confirmed. */
60
+ accept?: string;
61
+ /** Auto-fix command run before re-validating (e.g. `eslint --fix`). */
62
+ fix?: string;
63
+ /** Read-only context files. */
64
+ context?: string[];
65
+ parse?: ErrorParser;
66
+ report?: Reporter;
67
+ temperature?: number;
68
+ enableThinking?: boolean;
69
+ thinkingTokenBudget?: number;
70
+ /** Per-`send` turn cap (default LOOP_LIMITS.maxTurns). */
71
+ maxTurns?: number;
72
+ /** Resume from a saved conversation (incl. its system message) instead of
73
+ * starting fresh — used by `--continue`. */
74
+ history?: IChatMessage[];
75
+ /** Extra opinionated guidance appended to the system prompt (e.g. a scaffold's
76
+ * conventions: "this is a web app, the entry is app.ts…"). */
77
+ guidance?: string;
78
+ /** The model's context window (tokens). When set, the session auto-compacts
79
+ * before a send once the held context exceeds `autoCompactAt` of it. 0/unset
80
+ * disables auto-compaction. */
81
+ contextWindow?: number;
82
+ /** Fraction of `contextWindow` that triggers auto-compaction (default 0.8). */
83
+ autoCompactAt?: number;
84
+ /** A FAST check (e.g. `tsc --noEmit`) run every `checkEvery` edits WHILE the
85
+ * model is still building — so errors surface a few edits after they're made,
86
+ * not as a 100-error avalanche when it finally stops. Empty = off. */
87
+ incrementalCheck?: string;
88
+ /** Edits between incremental checks (default 3). */
89
+ checkEvery?: number;
90
+ /** Write-time single-file linter (the gate's eslint rules per write). When set,
91
+ * the write-guard reports lint violations — the moat rules tsc can't see (`as`,
92
+ * `I`-prefix) — inline, so they're fixed in-context not piled up at the gate. */
93
+ lintFile?: FileLinter;
94
+ /** Offer the `scaffold_ui` tool (themed UI primitives). Web builds only — keeps
95
+ * it off the pure-TS/scratch tool list where it's meaningless noise. */
96
+ scaffoldUi?: boolean;
97
+ /** Offer the `scaffold_web` tool — a fresh INTERACTIVE session where the agent
98
+ * decides whether to start a web app. Pair with `setSetupWeb`. */
99
+ scaffoldWeb?: boolean;
100
+ /** FORCED-TOOLS experiment (default: the TSFORGE_FORCE_TOOLS env flag): gated
101
+ * build turns always run with tool_choice "required" + the `yield_status`
102
+ * stop tool, so every turn is grammar-constrained and the malformed-call
103
+ * class is impossible. Conversational (no-gate) and plan-mode turns are
104
+ * unaffected (they should stream prose). */
105
+ forceTools?: boolean;
106
+ }
107
+
108
+ /** The outcome of one `send`. `responded` = conversational (no gate); the gate
109
+ * verdicts are `done`/`stuck` as in `runTask`; `interrupted` = the user aborted. */
110
+ export interface ISendResult {
111
+ status: "responded" | "done" | "stuck" | "interrupted";
112
+ turns: number;
113
+ }
114
+
115
+ export interface ISendOptions {
116
+ /** Caller cancellation (Ctrl-C). */
117
+ signal?: AbortSignal;
118
+ /** Drained at each turn boundary — any returned strings are injected as user
119
+ * messages before the next model call, so the user can STEER a run in flight
120
+ * ("actually use Tailwind") without aborting it. */
121
+ steer?: () => string[];
122
+ /** Per-send thinking override (beats cfg.enableThinking for this send only).
123
+ * Used to keep thinking ON for the design phase (where reasoning earns its
124
+ * keep) but OFF for the mechanical implement phase, where ~25k tokens of
125
+ * pre-write reasoning per build is pure latency. */
126
+ enableThinking?: boolean;
127
+ }
128
+
129
+ const SESSION_ID = "session";
130
+
131
+ /** Default share of the context window that triggers auto-compaction. */
132
+ const AUTO_COMPACT_AT = 0.8;
133
+
134
+ /** Staged-build step 1: design the type contract FIRST, gate off. Constraining
135
+ * the model to types before UI is the community-validated cure for random API
136
+ * invention on local models (plan → interfaces → implementation). */
137
+ const PLAN_TYPES_STEP =
138
+ "STEP 1 of 2 — DESIGN FIRST, do not build the UI yet. In ONE short paragraph, " +
139
+ "name the DOMAINS the app needs and the data each holds. Then lay out the type " +
140
+ "contract the boringstack way: for each domain create its " +
141
+ "`src/<domain>/<domain>.types.ts` (its `I`-prefixed interfaces) and, where it has " +
142
+ "fixed registries/config, `src/<domain>/<domain>.constants.ts` (`as const`). Put " +
143
+ "types shared across domains in `src/shared/shared.types.ts`. Do NOT create one " +
144
+ "mega `src/types.ts`. THIS STEP IS TYPES/CONSTANTS ONLY: do NOT create components, " +
145
+ "routes, services, seeds, or hooks, and do NOT call scaffold_routes or scaffold_ui " +
146
+ "yet — the NEXT step builds ALL of that. This phase's gate checks ONLY types (no " +
147
+ "build), so anything else you write now just risks errors and wastes turns. When " +
148
+ "your `.types.ts`/`.constants.ts` files type-check, STOP.\n" +
149
+ "SPEED: after the one-paragraph plan, write MANY files per turn — emit SEVERAL " +
150
+ "`create` tool calls in a SINGLE response (batch all of a domain's type/constant " +
151
+ "files at once). Do NOT write one file then stop and wait.";
152
+
153
+ /** Plan mode — emitted AFTER the design phase to surface the model's intent for a
154
+ * human to review before phase 2 commits. Asks for a concise plan, NOT code. */
155
+ const PLAN_SUMMARY_STEP =
156
+ "Before building the UI, output your BUILD PLAN as concise markdown so it can be " +
157
+ "reviewed. Cover, briefly:\n" +
158
+ "1. ENTITIES — list each, and for each say whether it gets its OWN routes " +
159
+ "(list/detail/create) or is NESTED/EMBEDDED in another (say where).\n" +
160
+ "2. ROUTES/PAGES — the routes you will create.\n" +
161
+ "3. DONE — what you consider a complete app for this spec.\n" +
162
+ "4. DECISIONS/ASSUMPTIONS — any modeling choices a reviewer might want to change.\n" +
163
+ "Output ONLY the markdown plan — no preamble, no tool calls, no code.";
164
+
165
+ /** GENERAL plan mode (the `/plan` toggle, any task — distinct from the staged
166
+ * web build's PLAN_SUMMARY_STEP): rides the first user message after the mode
167
+ * flips on. Read-only tools enforce the contract at the execute layer; this
168
+ * note tells the model the workflow — explore, clarify, propose, wait. */
169
+ const PLAN_MODE_NOTE =
170
+ "[PLAN MODE — read-only. edit/create and write commands are disabled until " +
171
+ "the user approves a plan.]\n" +
172
+ "1. EXPLORE first: read/search the code this request touches.\n" +
173
+ "2. If the request is ambiguous, ask your clarifying question(s) and STOP — " +
174
+ "the user will answer.\n" +
175
+ "3. When you know enough, reply with a concise plan under a `## Plan` " +
176
+ "heading: each file to change and what to do in it, in order. No code dumps, " +
177
+ "no tool calls in that reply.\n" +
178
+ "The user will reply with feedback (revise the plan) or approve it; you " +
179
+ "implement ONLY after approval.";
180
+
181
+ /** Sent when the user approves a plan-mode plan — the plan itself is already the
182
+ * latest assistant message, so anchor it instead of re-pasting it. */
183
+ export const PLAN_APPROVED_NOTE =
184
+ "Your plan is APPROVED — plan mode is off and all tools are available again. " +
185
+ "Implement the approved plan above now, in order, starting with the first " +
186
+ "step. Do not re-explore or restate the plan; emit the tool calls.";
187
+
188
+ /** Default edits between incremental checks. */
189
+ const CHECK_EVERY = 3;
190
+
191
+ /** How many times a send recovers from a repetition loop before giving up. */
192
+ const MAX_DEGENERATION_RECOVERIES = 2;
193
+
194
+ /** How many times a send recovers from a model-request TIMEOUT before giving up.
195
+ * A single over-long turn (the model spiralled past the request timeout) must not
196
+ * throw away many turns of real progress — re-steer toward a small, fast turn and
197
+ * continue. Bounded so a server that's genuinely wedged still ends the run. */
198
+ const MAX_TIMEOUT_RECOVERIES = 2;
199
+
200
+ /** Pushed after a request timeout — the previous turn ran past the (generous)
201
+ * request timeout, almost always from too-long reasoning or one huge file. Demand
202
+ * a small, fast turn (paired with a forced, thinking-off tool call). */
203
+ const TIMEOUT_RESTEER =
204
+ "Your previous response timed out — it ran too long (likely over-long reasoning " +
205
+ "or one huge file). Make the SINGLE next tool call now: create or edit just ONE " +
206
+ "file, kept small. Keep reasoning brief. No prose.";
207
+
208
+ /** True when an error is a request TIMEOUT (AbortSignal.timeout fires a
209
+ * `TimeoutError`), as opposed to a caller abort or a connection drop. */
210
+ function isModelTimeout(err: unknown): boolean {
211
+ if (!(err instanceof Error)) {
212
+ return false;
213
+ }
214
+
215
+ return err.name === "TimeoutError" || /timed out|timeout/i.test(err.message);
216
+ }
217
+
218
+ /** Pushed after a repetition loop — break the spiral by demanding ONE concrete
219
+ * action (paired with a forced tool call, which can't loop in prose). */
220
+ const REPETITION_RESTEER =
221
+ "You started repeating yourself. STOP — do not re-explain or re-decide. Emit " +
222
+ "the SINGLE next tool call that makes concrete progress (create or edit ONE " +
223
+ "file). No prose.";
224
+
225
+ /** Prefaces interim-check feedback so the model fixes real errors and ignores the
226
+ * expected "module not found" noise from files it hasn't created yet. */
227
+ const INTERIM_CHECK_NOTE =
228
+ "Interim type-check (NOT the final gate) — fix these now, while they are few, " +
229
+ "before writing more. IGNORE any `Cannot find module './…'` for files you have " +
230
+ "not created yet; fix the real type errors:";
231
+
232
+ /** Staged-build step 2: implement against the contract, gate on (drive to green). */
233
+ const IMPLEMENT_STEP =
234
+ "STEP 2 of 2 — build the app in THIS ORDER, so every file compiles the moment " +
235
+ "you write it (each step depends only on earlier ones — no forward references):\n" +
236
+ "1) DATA LAYER — each domain's seed + service (`createCollection`). Small files; " +
237
+ "emit them together.\n" +
238
+ "2) ROUTES — call `scaffold_routes` ONCE with EVERY page the app needs (list, " +
239
+ "detail with $param like /accounts/$accountId, and create/edit like " +
240
+ "/deals/create). This writes all route files at once, so from here every " +
241
+ "<Link to>/navigate target type-checks — NEVER hand-write a route file.\n" +
242
+ "3) SHELL — the app-shell layout + nav linking those routes.\n" +
243
+ "4) FILL, FEATURE BY FEATURE — replace each route's placeholder with its real " +
244
+ "component (import your types + `useCollection(service)` + @/components/ui + " +
245
+ "<Link> to any route). FINISH one feature before starting the next.\n" +
246
+ "PACE: write ONE coherent slice per turn — a single feature's few files together " +
247
+ "(or one file if it's large) — then let the gate check it. Do NOT dump the whole " +
248
+ "app in one response (it gets cut off and the work is lost); do NOT trickle one " +
249
+ "trivial file at a time either. The gate builds + browser-verifies; fix exactly " +
250
+ "what it reports. Don't explain or plan in prose — just emit the tool calls.";
251
+
252
+ /**
253
+ * Did the model write whole files INTO its chat message instead of calling
254
+ * `create`? Trips on ≥2 fenced code blocks (4 ``` markers), or one big block in
255
+ * a long message — i.e. it dumped the app as prose. A single short illustrative
256
+ * snippet in a chat answer does NOT trip it, so genuine Q&A is unaffected.
257
+ */
258
+ function looksLikeCodeDump(content: string): boolean {
259
+ const fences = (content.match(/```/g) ?? []).length;
260
+
261
+ return fences >= 4 || (fences >= 2 && content.length > 1500);
262
+ }
263
+
264
+ const TOOL_NAMES_ALT = Object.values(TOOL_NAME).join("|");
265
+
266
+ /** Tool-call MARKUP leaked into the reply text: the known malformed variants
267
+ * (`<function=`, `<tool_call`, `<parameter…`, `<|tool|>`, `<tool>` for a tool
268
+ * we offer) — the server's parser left the call in content and salvage could
269
+ * not rescue it (see malformed-toolcall-format + wire.ts salvage). */
270
+ const LEAKED_CALL_RE = new RegExp(
271
+ `<function=|<tool_call|<parameters?[=>]|<\\|(?:${TOOL_NAMES_ALT})\\|>|^<(?:${TOOL_NAMES_ALT})>`,
272
+ "im"
273
+ );
274
+
275
+ /** The fully-degenerate invented-markup form: a short matched `<tag>…</tag>`
276
+ * pair on its own lines (e.g. `<files>\n["…"]\n</files>`, captured live). A
277
+ * legit prose answer with an HTML example could match — the cost is one
278
+ * bounded nudge turn, while missing it strands the whole build. */
279
+ const TAG_PAIR_RE = /^<([a-z_]+)>\s*$[\s\S]{0,400}?^<\/\1>\s*$/m;
280
+
281
+ /** Did the model emit a tool call as TEXT instead of invoking one? */
282
+ function leaksToolMarkup(content: string): boolean {
283
+ return LEAKED_CALL_RE.test(content) || TAG_PAIR_RE.test(content);
284
+ }
285
+
286
+ /** Pushed when a no-tool-call reply contained leaked tool markup — the model
287
+ * believes it acted, but nothing ran. Paired with a FORCED tool call next turn
288
+ * (constrained decoding ⇒ the retry always parses). */
289
+ const MALFORMED_CALL_NUDGE =
290
+ "Your last reply contained tool-call markup as plain TEXT — the syntax was " +
291
+ "malformed, so NO tool ran and nothing happened. Do not write tool syntax " +
292
+ "in prose. Re-issue that action as a real tool call now.";
293
+
294
+ /** CHAT_SYSTEM + a short orientation to the workspace and (optional) gate. */
295
+ function systemPrompt(cfg: ISessionConfig): string {
296
+ const lines = [`Workspace: ${cfg.cwd}`];
297
+ const files = cfg.files ?? [];
298
+ const wholeRepo = files.length === 0 || files.includes("**/*");
299
+
300
+ lines.push(
301
+ wholeRepo
302
+ ? "You may read, run, and edit any file in the workspace."
303
+ : `You may only edit: ${files.join(", ")} (everything else is read-only).`
304
+ );
305
+
306
+ if (cfg.accept !== undefined && cfg.accept.length > 0) {
307
+ lines.push(
308
+ `A check is configured: \`${cfg.accept}\`. When you finish a change and ` +
309
+ "stop calling tools, it runs automatically — if it fails you'll get the " +
310
+ "errors and should fix them and continue until it passes."
311
+ );
312
+ }
313
+
314
+ if (cfg.guidance !== undefined && cfg.guidance.length > 0) {
315
+ lines.push(cfg.guidance);
316
+ }
317
+
318
+ return `${CHAT_SYSTEM}\n\n${lines.join("\n")}`;
319
+ }
320
+
321
+ export class Session {
322
+ private readonly provider: IProvider;
323
+ private readonly cfg: ISessionConfig;
324
+ private readonly report: Reporter;
325
+ private tools: (
326
+ | ReturnType<typeof toolsFor>[number]
327
+ | typeof SCAFFOLD_UI_TOOL
328
+ | typeof SCAFFOLD_ROUTES_TOOL
329
+ | typeof SCAFFOLD_WEB_TOOL
330
+ | typeof ADD_DEPENDENCY_TOOL
331
+ | typeof YIELD_STATUS_TOOL
332
+ )[];
333
+ private hasGate: boolean;
334
+ private readonly ctx: ILoopCtx;
335
+ private readonly state: ILoopState;
336
+ /** Token usage from the most recent model call — `promptTokens` is the real
337
+ * size of the context the model last saw (drives the status gauge and, soon,
338
+ * auto-compaction). */
339
+ private lastUsage?: ITokenUsage;
340
+ /** Fast check run every few edits while building (e.g. tsc); "" = off. */
341
+ private incrementalCheck: string;
342
+ /** Per-send thinking override, set from ISendOptions for the duration of a
343
+ * `send` (cleared after). Lets the design phase think and the implement phase
344
+ * not. Undefined = fall back to cfg.enableThinking (server default). */
345
+ private activeThinking?: boolean;
346
+ /** ADAPTIVE THINKING: true while the model has outstanding errors to fix (an
347
+ * interim check or the gate came back RED). Measured: ~80% of build time is
348
+ * REPAIR, and thinking-OFF repair oscillates and never converges (churns to the
349
+ * turn cap), while thinking-ON repair converges. So we think ONLY while
350
+ * repairing — fast thinking-off creation, convergent thinking-on repair. */
351
+ private repairing = false;
352
+ /** GENERAL plan mode: read-only exploration until the user approves a plan.
353
+ * Mirrors into ctx.readOnly (the execute-layer guarantee) and filters the
354
+ * advertised tool list per call — `this.tools` itself is never mutated, so
355
+ * toggling off restores everything with zero bookkeeping. */
356
+ private planMode = false;
357
+ /** Attach PLAN_MODE_NOTE to the NEXT send only (not every revision reply). */
358
+ private planIntroPending = false;
359
+ /** FORCED-TOOLS experiment — see ISessionConfig.forceTools. */
360
+ private readonly forceTools: boolean;
361
+ /** Mid-session turn-cap override (setMaxTurns) — a web scaffold raises it. */
362
+ private maxTurnsOverride?: number;
363
+
364
+ private constructor(cfg: ISessionConfig, ctx: ILoopCtx) {
365
+ this.provider = cfg.provider;
366
+ this.cfg = cfg;
367
+ this.report = cfg.report ?? ((): void => undefined);
368
+ this.hasGate = cfg.accept !== undefined && cfg.accept.length > 0;
369
+ this.incrementalCheck = cfg.incrementalCheck ?? "";
370
+ // Start with the 4 BASE tools (read/run/edit/create). Measured: the bigger
371
+ // 11-tool list pushes this model onto a malformed-tool-call boundary (it
372
+ // emits unparseable formats the server leaves in content) — see
373
+ // malformed-toolcall-format. The base tools are enough to work a repo; the
374
+ // LSP nav set can become an opt-in once we confirm it parses cleanly here.
375
+ // WEB builds add ONE coarse tool — `scaffold_ui` — so the model generates
376
+ // tested themed primitives instead of re-authoring a button/card every build.
377
+ // Interactive sessions (scaffoldWeb) also offer `scaffold_web` so the AGENT
378
+ // can choose to start a web app — the UI/routes tools ride along so they're
379
+ // ready once it scaffolds. Headless web builds (scaffoldUi) scaffold up front,
380
+ // so they skip scaffold_web.
381
+ // Interactive sessions also get `search` (ripgrep): it's read-only, needs
382
+ // no tsconfig, and is the plan-mode explorer's main tool besides `read`.
383
+ // Headless/eval sessions keep the measured base set (see
384
+ // lsp-tools-regress-scratch: nav tools hurt from-scratch builds).
385
+ this.tools =
386
+ cfg.scaffoldWeb === true
387
+ ? [
388
+ ...toolsFor(false),
389
+ SEARCH_TOOL,
390
+ SCAFFOLD_WEB_TOOL,
391
+ SCAFFOLD_UI_TOOL,
392
+ SCAFFOLD_ROUTES_TOOL,
393
+ ADD_DEPENDENCY_TOOL,
394
+ ]
395
+ : cfg.scaffoldUi === true
396
+ ? [
397
+ ...toolsFor(false),
398
+ SCAFFOLD_UI_TOOL,
399
+ SCAFFOLD_ROUTES_TOOL,
400
+ ADD_DEPENDENCY_TOOL,
401
+ ]
402
+ : toolsFor(false);
403
+ this.forceTools = cfg.forceTools ?? flags.forceTools();
404
+
405
+ if (this.forceTools) {
406
+ this.tools = [...this.tools, YIELD_STATUS_TOOL];
407
+ }
408
+
409
+ this.ctx = ctx;
410
+ this.state = {
411
+ prevGateErrors: [],
412
+ gateNoProgress: 0,
413
+ lastGateCount: -1,
414
+ edits: 0,
415
+ regressions: 0,
416
+ ttsrInterrupts: 0,
417
+ };
418
+ }
419
+
420
+ /** Build a session (async because it spins up the TS LanguageService). */
421
+ static async create(cfg: ISessionConfig): Promise<Session> {
422
+ const task: ITask = {
423
+ id: SESSION_ID,
424
+ accept: cfg.accept ?? "",
425
+ files: cfg.files ?? [],
426
+ context: cfg.context,
427
+ fix: cfg.fix,
428
+ };
429
+
430
+ const report = cfg.report ?? ((): void => undefined);
431
+ // Same stack + tsforge.config.json resolution as the eval path
432
+ // (resolveStackForRun in run.ts) — interactive users get identical
433
+ // pack selection and rule-severity overrides.
434
+ const detected = await detectStack(cfg.cwd);
435
+ const projectConfig = await loadTsforgeConfig(cfg.cwd);
436
+ const stackProfile = {
437
+ ...detected,
438
+ packs: resolveActivePacks(detected.packs, projectConfig),
439
+ };
440
+ const ruleOverrides = normalizeRuleOverrides(projectConfig);
441
+
442
+ const ctx: ILoopCtx = {
443
+ task,
444
+ cwd: cfg.cwd,
445
+ tsService: await buildTsService(cfg.cwd),
446
+ ...(cfg.lintFile === undefined ? {} : { lintFile: cfg.lintFile }),
447
+ parse: cfg.parse,
448
+ report,
449
+ stackProfile,
450
+ ...(Object.keys(ruleOverrides).length > 0 ? { ruleOverrides } : {}),
451
+ messages:
452
+ cfg.history !== undefined && cfg.history.length > 0
453
+ ? [...cfg.history]
454
+ : [{ role: "system", content: systemPrompt(cfg) }],
455
+ // Stream the gate's output live (the interactive CLI), so a slow gate
456
+ // (vite build + chromium) shows progress instead of running silently.
457
+ onGateChunk: (text) => {
458
+ report({ kind: "token", task: SESSION_ID, message: text });
459
+ },
460
+ };
461
+
462
+ return new Session(cfg, ctx);
463
+ }
464
+
465
+ /** The current gate command (empty when none). */
466
+ get gate(): string {
467
+ return this.ctx.task.accept;
468
+ }
469
+
470
+ /** The editable scope globs. */
471
+ get scope(): string[] {
472
+ return this.ctx.task.files;
473
+ }
474
+
475
+ /** Real token usage of the most recent model call (undefined until the first
476
+ * call, or if the server reports none). */
477
+ get usage(): ITokenUsage | undefined {
478
+ return this.lastUsage;
479
+ }
480
+
481
+ /** The real size of the context the model is currently holding — the prompt
482
+ * tokens of the last call (what auto-compaction watches), 0 before any call. */
483
+ get contextTokens(): number {
484
+ return this.lastUsage?.promptTokens ?? 0;
485
+ }
486
+
487
+ /** If the held context is at/over the auto-compact threshold, the percent full
488
+ * (for the notice); otherwise undefined. Needs a known window AND real usage
489
+ * from a prior turn — both absent on the first send, so it never fires early. */
490
+ private autoCompactPct(): number | undefined {
491
+ const window = this.cfg.contextWindow ?? 0;
492
+
493
+ if (window <= 0 || this.lastUsage === undefined) {
494
+ return undefined;
495
+ }
496
+
497
+ const fraction = this.lastUsage.promptTokens / window;
498
+ const threshold = this.cfg.autoCompactAt ?? AUTO_COMPACT_AT;
499
+
500
+ return fraction >= threshold ? Math.round(fraction * 100) : undefined;
501
+ }
502
+
503
+ /** Set (or clear, with "") the gate command mid-session. */
504
+ setGate(command: string): void {
505
+ this.ctx.task.accept = command;
506
+ this.hasGate = command.length > 0;
507
+ }
508
+
509
+ /** Raise/lower the per-send turn cap mid-session — `scaffold_web` flips a chat
510
+ * session into a from-scratch web build, whose heavy gate needs the bigger
511
+ * webMaxTurns budget (0/undefined restores the config default). */
512
+ setMaxTurns(n?: number): void {
513
+ this.maxTurnsOverride = n !== undefined && n > 0 ? n : undefined;
514
+ }
515
+
516
+ /** Toggle GENERAL plan mode: read-only tools + the plan-then-approve workflow.
517
+ * ON ⇒ the next send carries PLAN_MODE_NOTE, the advertised tools shrink to
518
+ * the read-only set, and the execute layer rejects any mutating call. */
519
+ setPlanMode(on: boolean): void {
520
+ this.planMode = on;
521
+ this.ctx.readOnly = on; // the hard guarantee at the execute layer
522
+ this.planIntroPending = on;
523
+ }
524
+
525
+ /** Set (or clear, with "") the auto-fix command run before each gate — e.g. a
526
+ * scaffold's `eslint --fix`, so mechanical lint violations are squashed
527
+ * deterministically instead of costing the model turns. */
528
+ setFix(command: string): void {
529
+ this.ctx.task.fix = command.length > 0 ? command : undefined;
530
+ }
531
+
532
+ /** Set (or clear, with "") the fast incremental check (e.g. `tsc --noEmit`) run
533
+ * every few edits while building, so errors surface early instead of piling up. */
534
+ setIncrementalCheck(command: string): void {
535
+ this.incrementalCheck = command;
536
+ }
537
+
538
+ /** Replace the editable scope globs mid-session. */
539
+ setScope(globs: string[]): void {
540
+ this.ctx.task.files = globs;
541
+ }
542
+
543
+ /** Wire the web-setup callback the `scaffold_web` tool invokes when the AGENT
544
+ * decides the task is a from-scratch web app — scaffolds the stack and flips
545
+ * this session to the web gate/guidance. Late-bound (after create) because the
546
+ * callback closes over this session to reconfigure it. */
547
+ setSetupWeb(fn: (framework: string) => Promise<void>): void {
548
+ this.ctx.setupWeb = fn;
549
+ }
550
+
551
+ /** Append opinionated guidance to the SYSTEM prompt (e.g. after classifying a
552
+ * fresh request as a web build). Folded into the existing system message — a
553
+ * second system message breaks some chat templates (Qwen → 400). */
554
+ guide(text: string): void {
555
+ const first = this.ctx.messages[0];
556
+
557
+ if (first?.role === "system") {
558
+ first.content = `${first.content}\n\n${text}`;
559
+ } else {
560
+ this.ctx.messages.unshift({ role: "system", content: text });
561
+ }
562
+ }
563
+
564
+ /**
565
+ * Compress the conversation: ask the model to summarize everything so far, then
566
+ * replace the history with [system, summary]. Frees context for long sessions
567
+ * while preserving goals/decisions/changes. Returns the message count before/after.
568
+ */
569
+ async compact(
570
+ signal?: AbortSignal
571
+ ): Promise<{ before: number; after: number }> {
572
+ const { ctx } = this;
573
+ const before = ctx.messages.length;
574
+ const conversation = ctx.messages.filter((m) => m.role !== "system");
575
+
576
+ if (conversation.length === 0) {
577
+ return { before, after: before };
578
+ }
579
+
580
+ const transcript = conversation
581
+ .map((m) => `[${m.role}] ${m.content}`)
582
+ .join("\n\n");
583
+ const res = await this.provider.complete(
584
+ [
585
+ { role: "system", content: COMPACT_SYSTEM },
586
+ { role: "user", content: transcript },
587
+ ],
588
+ { temperature: 0, ...(signal === undefined ? {} : { signal }) }
589
+ );
590
+
591
+ const system = ctx.messages[0];
592
+ const summary: IChatMessage = {
593
+ role: "user",
594
+ content: `[Summary of the earlier conversation]\n${res.content}`,
595
+ };
596
+
597
+ ctx.messages = system?.role === "system" ? [system, summary] : [summary];
598
+
599
+ return { before, after: ctx.messages.length };
600
+ }
601
+
602
+ /** The live conversation (system + every exchange). Read-only view. */
603
+ get messages(): readonly IChatMessage[] {
604
+ return this.ctx.messages;
605
+ }
606
+
607
+ /**
608
+ * Run one user message: drive the model until it stops calling tools, then
609
+ * gate-confirm if a gate is set. Loops on red gate feedback up to the turn cap.
610
+ */
611
+ async send(text: string, opts: ISendOptions = {}): Promise<ISendResult> {
612
+ const { ctx, report } = this;
613
+ const maxTurns =
614
+ this.maxTurnsOverride ?? this.cfg.maxTurns ?? LOOP_LIMITS.maxTurns;
615
+ const sendStart = performance.now();
616
+
617
+ // Thread cancellation to the tool `run` commands and the gate (not just the
618
+ // model call), so Ctrl-C kills in-flight child processes too.
619
+ ctx.signal = opts.signal;
620
+ this.activeThinking = opts.enableThinking;
621
+ this.repairing = false; // fresh send starts in (fast, thinking-off) creation mode
622
+
623
+ try {
624
+ // Auto-compact BEFORE adding the new message (so it stays a fresh turn
625
+ // after the summary) when the held context is near the window.
626
+ const pct = this.autoCompactPct();
627
+
628
+ if (pct !== undefined) {
629
+ report({
630
+ kind: "tool",
631
+ task: SESSION_ID,
632
+ message: `⊙ context ~${pct}% full — auto-compacting to free room`,
633
+ });
634
+
635
+ const { before, after } = await this.compact(opts.signal);
636
+
637
+ report({
638
+ kind: "tool",
639
+ task: SESSION_ID,
640
+ message: `⊙ compacted ${before} → ${after} messages`,
641
+ });
642
+ }
643
+
644
+ // The plan-mode workflow note rides the FIRST message after the mode flips
645
+ // on; revision replies go bare (the instruction persists in history).
646
+ if (this.planMode && this.planIntroPending) {
647
+ this.planIntroPending = false;
648
+ ctx.messages.push({
649
+ role: "user",
650
+ content: `${text}\n\n${PLAN_MODE_NOTE}`,
651
+ });
652
+ } else {
653
+ ctx.messages.push({ role: "user", content: text });
654
+ }
655
+
656
+ return await this.drive(maxTurns, sendStart, opts);
657
+ } catch (err) {
658
+ if (opts.signal?.aborted === true) {
659
+ report({
660
+ kind: "stuck",
661
+ task: SESSION_ID,
662
+ message: "interrupted",
663
+ });
664
+
665
+ return { status: "interrupted", turns: 0 };
666
+ }
667
+
668
+ // A provider/network error (request timeout, connection drop after retries)
669
+ // ends the turn GRACEFULLY as stuck — never crash the process. The message
670
+ // is logged so it's visible/debuggable, not silently swallowed. This keeps a
671
+ // long autonomous run (and the interactive CLI) alive through a flaky model.
672
+ const detail = err instanceof Error ? err.message : String(err);
673
+
674
+ report({
675
+ kind: "stuck",
676
+ task: SESSION_ID,
677
+ message: `⚠ model request failed: ${detail}`,
678
+ });
679
+
680
+ return { status: "stuck", turns: 0 };
681
+ } finally {
682
+ ctx.signal = undefined;
683
+ this.activeThinking = undefined;
684
+ }
685
+ }
686
+
687
+ /**
688
+ * Build a project from scratch in two STAGES, the way local models stay
689
+ * reliable: (1) plan + write the type contract (`src/types.ts`) with the gate
690
+ * OFF — a types-only app can't build yet, so gating here would spuriously fail;
691
+ * (2) implement against those types with the gate ON, driving to green. This is
692
+ * the community-validated plan→interfaces→implementation pattern; our gate is
693
+ * the verification stage. A soft constraint: if the model ignores step 1 and
694
+ * builds everything, step 2 simply continues — nothing breaks.
695
+ */
696
+ async buildStaged(
697
+ request: string,
698
+ opts: ISendOptions = {},
699
+ designGate = ""
700
+ ): Promise<ISendResult> {
701
+ const planned = await this.designBuild(request, opts, designGate);
702
+
703
+ // Don't push on to implementation if the user aborted the design step.
704
+ if (planned.status === "interrupted") {
705
+ return planned;
706
+ }
707
+
708
+ return this.implementBuild("", opts);
709
+ }
710
+
711
+ /**
712
+ * PHASE 1 — design the type contract only. Gates on TYPES (tsc + lint, no build)
713
+ * when a `designGate` is given, so the contract is driven self-consistent BEFORE
714
+ * components (catching as-const↔interface errors small, not as a final pile).
715
+ * Withholds the app-building scaffold tools so the model CANNOT start the UI here
716
+ * — a prompt-only "types only" was repeatedly ignored. Returns the phase-1 result
717
+ * and leaves the session ready for `implementBuild`. Split out from `buildStaged`
718
+ * so plan mode can insert a human review between the phases.
719
+ */
720
+ async designBuild(
721
+ request: string,
722
+ opts: ISendOptions = {},
723
+ designGate = ""
724
+ ): Promise<ISendResult> {
725
+ const gate = this.ctx.task.accept;
726
+
727
+ this.setGate(designGate);
728
+
729
+ const phaseTwoTools = this.tools;
730
+
731
+ this.tools = toolsFor(false);
732
+ const planned = await this.send(`${request}\n\n${PLAN_TYPES_STEP}`, opts);
733
+
734
+ this.tools = phaseTwoTools;
735
+ this.setGate(gate);
736
+
737
+ return planned;
738
+ }
739
+
740
+ /**
741
+ * PHASE 2 — implement against the designed types, driving to green. If phase 1
742
+ * already produced a fully-green app (it ignored "types only" and built
743
+ * everything), this returns done WITHOUT rebuilding — else the model concludes
744
+ * the prior phase did "only the data layer" and `rm -rf`s its own finished UI to
745
+ * rebuild (observed: 23-00-52 went green at turn 146, then phase 2 wiped every
746
+ * file). `planNotes` (human plan-mode edits) are injected into the implement step.
747
+ */
748
+ async implementBuild(
749
+ planNotes = "",
750
+ opts: ISendOptions = {}
751
+ ): Promise<ISendResult> {
752
+ const gate = this.ctx.task.accept;
753
+ const fullGateTask: ITask = { ...this.ctx.task, accept: gate };
754
+ const full = await validate(
755
+ fullGateTask,
756
+ this.ctx.cwd,
757
+ this.ctx.parse,
758
+ this.ctx.signal === undefined ? {} : { signal: this.ctx.signal }
759
+ );
760
+
761
+ if (full.passed) {
762
+ this.report({
763
+ kind: "tool",
764
+ task: this.ctx.task.id,
765
+ message:
766
+ "phase 1 already produced a fully-green app — skipping phase 2 (no rebuild)",
767
+ });
768
+
769
+ return { status: "done", turns: 0 };
770
+ }
771
+
772
+ // Inject the EXACT type contract the design phase just wrote, fresh, right
773
+ // before implementation. The 27b's #1 first-pass error is misremembering its
774
+ // OWN types across many files/turns (a field shape it defined 30 turns ago) —
775
+ // re-showing the precise current signatures cuts those consistency errors (so
776
+ // less repair). Both phases run ADAPTIVE thinking (governed by `repairing`).
777
+ const contract = await this.typeContract();
778
+ const notes =
779
+ planNotes.length > 0
780
+ ? `\n\n## Approved plan — follow these decisions\n${planNotes}\n`
781
+ : "";
782
+
783
+ return this.send(`${contract}${IMPLEMENT_STEP}${notes}`, opts);
784
+ }
785
+
786
+ /**
787
+ * Plan mode — after `designBuild`, ask the model to state its build PLAN as
788
+ * markdown (entities + whether each is its own route or nested/embedded; the
789
+ * routes/pages it will create; what it considers DONE; key modeling decisions)
790
+ * so a human can review/correct it BEFORE phase 2 commits ~100 turns. A single
791
+ * completion over the live conversation; emits NO tool calls and touches no
792
+ * files. Returns the plan text (empty string if the model returned nothing).
793
+ */
794
+ async generatePlan(): Promise<string> {
795
+ const res = await this.provider.complete(
796
+ [...this.ctx.messages, { role: "user", content: PLAN_SUMMARY_STEP }],
797
+ {
798
+ temperature: 0,
799
+ ...(this.ctx.signal === undefined ? {} : { signal: this.ctx.signal }),
800
+ }
801
+ );
802
+
803
+ return res.content.trim();
804
+ }
805
+
806
+ /** Read the per-domain `.types.ts`/`.constants.ts` the design phase wrote and
807
+ * format them as a precise reference block for the implement phase — so the
808
+ * model builds against the EXACT current signatures instead of its (lossy)
809
+ * recollection of them. Empty string if none exist yet (nothing to anchor). */
810
+ private async typeContract(): Promise<string> {
811
+ const files = await readFiles(this.ctx.cwd, [
812
+ "src/**/*.types.ts",
813
+ "src/**/*.constants.ts",
814
+ ]);
815
+
816
+ if (files.length === 0) {
817
+ return "";
818
+ }
819
+
820
+ const blocks = files
821
+ .map((f) => `// ${f.path}\n${f.content.trim()}`)
822
+ .join("\n\n");
823
+
824
+ return (
825
+ "THE TYPE CONTRACT you just designed (use these EXACT names/shapes — do " +
826
+ "NOT invent or misremember fields; import from these paths):\n\n```ts\n" +
827
+ `${blocks}\n` +
828
+ "```\n\n"
829
+ );
830
+ }
831
+
832
+ /** Once `editsSinceCheck` reaches the threshold, run the incremental check and
833
+ * reset the counter; otherwise pass it through. Keeps `drive` branch-light. */
834
+ private async checkAfterEdits(
835
+ editsSinceCheck: number,
836
+ checkEvery: number
837
+ ): Promise<number> {
838
+ if (editsSinceCheck < checkEvery) {
839
+ return editsSinceCheck;
840
+ }
841
+
842
+ await this.runIncrementalCheck();
843
+
844
+ return 0;
845
+ }
846
+
847
+ /** Run the fast incremental check (e.g. tsc) and, if it surfaces errors, feed
848
+ * them back NOW as a user message so the model fixes them before writing more
849
+ * — instead of letting them pile up for the final gate. No-op when unset. */
850
+ private async runIncrementalCheck(): Promise<void> {
851
+ if (this.incrementalCheck.length === 0) {
852
+ return;
853
+ }
854
+
855
+ const { ctx } = this;
856
+ const task: ITask = { ...ctx.task, accept: this.incrementalCheck };
857
+ const result = await validate(
858
+ task,
859
+ ctx.cwd,
860
+ ctx.parse,
861
+ ctx.signal === undefined ? {} : { signal: ctx.signal }
862
+ );
863
+
864
+ // Drop stub-route-tree phantoms (the build regenerates the tree at the gate) —
865
+ // the model can't fix them and shouldn't be told to try.
866
+ const errors = result.errors.filter((e) => !isPhantomRouteError(e.message));
867
+
868
+ if (result.passed || errors.length === 0) {
869
+ this.repairing = false; // clean (or only phantoms) → fast thinking-off creation
870
+
871
+ return;
872
+ }
873
+
874
+ this.repairing = true; // errors outstanding → next turns think to converge
875
+
876
+ const detail = errors
877
+ .slice(0, 20)
878
+ .map((e) => e.message)
879
+ .join("\n");
880
+
881
+ // Surface the ACTUAL errors into the log (not just the count) — so we can see
882
+ // WHAT the model fails at and target the systematic ones in the harness.
883
+ ctx.report({
884
+ kind: "tool",
885
+ task: SESSION_ID,
886
+ message: `⊙ interim check: ${String(errors.length)} error(s) — fixing now:\n${detail}`,
887
+ });
888
+
889
+ ctx.messages.push({
890
+ role: "user",
891
+ content: `${INTERIM_CHECK_NOTE}\n${detail}`,
892
+ });
893
+ }
894
+
895
+ /** The turn loop — separated so `send` can wrap it in abort handling. */
896
+ /** One model call: stream thinking live, push the reply, and surface salvage +
897
+ * the highlighted answer. Keeps `drive`'s per-turn control flow lean. */
898
+ private async askModel(
899
+ signal?: AbortSignal,
900
+ toolChoice: "auto" | "required" = "auto",
901
+ forceNoThinking = false
902
+ ): Promise<IModelResponse> {
903
+ const { ctx, report } = this;
904
+ // On a FORCED tool turn, disable thinking: the model already decided what to
905
+ // do, and thinking-on is a known source of prose-before-the-call malformed
906
+ // output on this model. `required` + thinking-off = the cleanest tool call.
907
+ // ADAPTIVE: think while REPAIRING (errors outstanding) so repair converges;
908
+ // otherwise honour the per-send/cfg setting (off = fast creation). A forced
909
+ // recovery turn always thinks-off (it just needs one clean tool call).
910
+ const enableThinking = forceNoThinking
911
+ ? false
912
+ : this.repairing
913
+ ? true
914
+ : (this.activeThinking ?? this.cfg.enableThinking);
915
+ // PLAN MODE advertises only the read-only tools (+ `run`, whose handler
916
+ // enforces a read-only command allowlist) — the model never sees a write
917
+ // tool. Filtered per call, so `this.tools` is untouched and toggling the
918
+ // mode off restores the full set with zero bookkeeping.
919
+ const offeredTools = this.planMode
920
+ ? this.tools.filter(
921
+ (t) =>
922
+ READ_ONLY_TOOL_NAMES.has(t.function.name) ||
923
+ t.function.name === TOOL_NAME.run
924
+ )
925
+ : this.tools;
926
+ const res = await this.provider.complete(ctx.messages, {
927
+ tools: offeredTools,
928
+ temperature: this.cfg.temperature ?? 0,
929
+ toolChoice,
930
+ ...(enableThinking === undefined ? {} : { enableThinking }),
931
+ ...(this.cfg.thinkingTokenBudget === undefined
932
+ ? {}
933
+ : { thinkingTokenBudget: this.cfg.thinkingTokenBudget }),
934
+ ...(signal === undefined ? {} : { signal }),
935
+ onToken: (token, channel) => {
936
+ // Stream EVERYTHING live — thinking, the tool calls being written, and
937
+ // the answer itself (channel `content`), so the user watches the reply
938
+ // arrive instead of staring at a frozen indicator. The renderer formats
939
+ // content incrementally line-by-line; the consolidated `message` event
940
+ // below stays as the log's record (the interactive renderer dedupes it).
941
+ report({ kind: "token", task: SESSION_ID, message: token, channel });
942
+ },
943
+ });
944
+
945
+ if (res.usage !== undefined) {
946
+ this.lastUsage = res.usage;
947
+ // Logged (not shown) so the --log analyzer can compute tokens-to-solution.
948
+ // `thinking` records THIS call's mode, so malformed-call rates can be
949
+ // correlated with it (analyze-malformed).
950
+ report({
951
+ kind: "usage",
952
+ task: SESSION_ID,
953
+ message: `tokens ${res.usage.promptTokens} in / ${res.usage.completionTokens} out`,
954
+ promptTokens: res.usage.promptTokens,
955
+ completionTokens: res.usage.completionTokens,
956
+ totalTokens: res.usage.totalTokens,
957
+ ...(enableThinking === undefined ? {} : { thinking: enableThinking }),
958
+ });
959
+ }
960
+
961
+ ctx.messages.push({
962
+ role: "assistant",
963
+ content: res.content,
964
+ toolCalls: res.toolCalls,
965
+ });
966
+
967
+ if (res.salvaged !== undefined && res.salvaged > 0) {
968
+ report({
969
+ kind: "tool",
970
+ task: SESSION_ID,
971
+ message: `⚠ recovered ${res.salvaged} malformed tool call(s) (server tool-call parser mismatch)`,
972
+ ...(enableThinking === undefined ? {} : { thinking: enableThinking }),
973
+ });
974
+ }
975
+
976
+ if (res.content.length > 0) {
977
+ report({ kind: "message", task: SESSION_ID, message: res.content });
978
+ }
979
+
980
+ return res;
981
+ }
982
+
983
+ /**
984
+ * Decide what a turn that ended with NO tool calls (and no edits yet this send)
985
+ * means. A plain answer — no gate, or a conversational reply — is `responded`.
986
+ * But with a gate set and the reply DUMPING whole files as prose (instead of
987
+ * calling `create`), that's the narrate-instead-of-build failure: the content
988
+ * never reaches disk. We nudge it to act (`result: null`, capped); past the cap
989
+ * we stop honestly rather than loop forever. Side effects (the nudge message,
990
+ * the stuck report) happen here; the caller only emits timing and loops/returns.
991
+ */
992
+ private resolveNoEditYield(
993
+ content: string,
994
+ turn: number,
995
+ buildNudges: number
996
+ ): { result: ISendResult | null } {
997
+ // Plan mode is read-only — a fenced-snippet-heavy PLAN is the desired
998
+ // output, not a narrate-instead-of-build failure; never nudge it to build.
999
+ if (this.planMode) {
1000
+ return { result: { status: "responded", turns: turn } };
1001
+ }
1002
+
1003
+ // Leaked tool markup = the model TRIED to act but the call never parsed
1004
+ // (and salvage couldn't rescue it). Without this nudge the turn ends as a
1005
+ // fake "responded" and the build silently strands (captured live: a
1006
+ // scaffold_web emitted as text). The retry is a FORCED tool call, which is
1007
+ // grammar-constrained — so it always parses.
1008
+ const leaked = this.hasGate && leaksToolMarkup(content);
1009
+
1010
+ if (!leaked && (!this.hasGate || !looksLikeCodeDump(content))) {
1011
+ return { result: { status: "responded", turns: turn } };
1012
+ }
1013
+
1014
+ if (buildNudges >= LOOP_LIMITS.maxBuildNudges) {
1015
+ this.report({
1016
+ kind: "stuck",
1017
+ task: SESSION_ID,
1018
+ message: leaked
1019
+ ? "⚠ model kept emitting malformed tool-call text instead of real " +
1020
+ "calls — stopped. See malformed-toolcall-format (server parser)."
1021
+ : "⚠ model kept writing files as chat messages instead of creating " +
1022
+ "them — stopped. Try a smaller step (e.g. one file at a time).",
1023
+ });
1024
+
1025
+ return { result: { status: "stuck", turns: turn } };
1026
+ }
1027
+
1028
+ this.report({
1029
+ kind: "tool",
1030
+ task: SESSION_ID,
1031
+ message: leaked
1032
+ ? "↳ malformed tool-call text (no tool ran) — forcing a real call"
1033
+ : "↳ no files written — nudging the model to build with tools",
1034
+ });
1035
+ this.ctx.messages.push({
1036
+ role: "user",
1037
+ content: leaked ? MALFORMED_CALL_NUDGE : BUILD_NUDGE,
1038
+ });
1039
+
1040
+ return { result: null };
1041
+ }
1042
+
1043
+ /** Handle a repetition-loop detection: stop (return a stuck result) once the
1044
+ * recovery budget is spent, else re-steer toward one concrete action and
1045
+ * return null so the caller forces a tool call next turn. */
1046
+ private degenerationRecovery(
1047
+ degenerations: number,
1048
+ turn: number
1049
+ ): ISendResult | null {
1050
+ if (degenerations >= MAX_DEGENERATION_RECOVERIES) {
1051
+ this.report({
1052
+ kind: "stuck",
1053
+ task: SESSION_ID,
1054
+ message:
1055
+ "⚠ repetition loop persisted after recovery attempts — stopped. Try a smaller step.",
1056
+ });
1057
+
1058
+ return { status: "stuck", turns: turn };
1059
+ }
1060
+
1061
+ this.report({
1062
+ kind: "tool",
1063
+ task: SESSION_ID,
1064
+ message: "⚠ repetition loop — forcing a concrete next action",
1065
+ });
1066
+ this.ctx.messages.push({ role: "user", content: REPETITION_RESTEER });
1067
+
1068
+ return null;
1069
+ }
1070
+
1071
+ /** Handle a thrown model call: rethrow a caller abort or any non-timeout error
1072
+ * (terminal — send()'s handler turns it into interrupted/stuck). A request
1073
+ * TIMEOUT is recoverable: emit timing, then stop (return stuck) once the budget
1074
+ * is spent, else re-steer toward a small fast turn and return null so the caller
1075
+ * forces a (thinking-off) tool call and CONTINUES — preserving the turns already
1076
+ * done rather than abandoning the whole build on one over-long turn. */
1077
+ private recoverFromTimeout(
1078
+ err: unknown,
1079
+ timeouts: number,
1080
+ turn: number,
1081
+ turnStart: number,
1082
+ sendStart: number,
1083
+ signal?: AbortSignal
1084
+ ): ISendResult | null {
1085
+ if (signal?.aborted === true || !isModelTimeout(err)) {
1086
+ throw err;
1087
+ }
1088
+
1089
+ emitTiming(this.report, SESSION_ID, turn, turnStart, sendStart);
1090
+
1091
+ // Log the RAW error so the timeout's true source (request-timeout ceiling vs a
1092
+ // server-side stream close) is diagnosable from the --log, not swallowed.
1093
+ const detail =
1094
+ err instanceof Error ? `${err.name}: ${err.message}` : String(err);
1095
+
1096
+ if (timeouts >= MAX_TIMEOUT_RECOVERIES) {
1097
+ this.report({
1098
+ kind: "stuck",
1099
+ task: SESSION_ID,
1100
+ message: `⚠ model request timed out repeatedly (${detail}) — stopped. The server may be wedged or the task too large for one turn.`,
1101
+ });
1102
+
1103
+ return { status: "stuck", turns: turn };
1104
+ }
1105
+
1106
+ this.report({
1107
+ kind: "tool",
1108
+ task: SESSION_ID,
1109
+ message: `⚠ model request timed out (${detail}) — re-steering to a smaller turn and continuing (${String(timeouts + 1)}/${String(MAX_TIMEOUT_RECOVERIES)})`,
1110
+ });
1111
+ this.ctx.messages.push({ role: "user", content: TIMEOUT_RESTEER });
1112
+
1113
+ return null;
1114
+ }
1115
+
1116
+ /** Inject any messages the user typed mid-run (steering) before the next turn. */
1117
+ private injectSteer(steer?: () => string[]): void {
1118
+ for (const message of steer?.() ?? []) {
1119
+ this.ctx.messages.push({ role: "user", content: message });
1120
+ this.report({
1121
+ kind: "tool",
1122
+ task: SESSION_ID,
1123
+ message: `↳ steering: ${message.slice(0, 60)}`,
1124
+ });
1125
+ }
1126
+ }
1127
+
1128
+ /** One model turn for `drive`, with timeout recovery folded in so the loop body
1129
+ * stays lean: `ok` → use the response; `stop` → terminal result; `retry` →
1130
+ * timed out, re-steer applied, force a small tool call next turn. A caller abort
1131
+ * or non-timeout error propagates (via recoverFromTimeout) to send()'s handler. */
1132
+ private async acquireResponse(
1133
+ forceTool: boolean,
1134
+ timeouts: number,
1135
+ turn: number,
1136
+ turnStart: number,
1137
+ sendStart: number,
1138
+ opts: ISendOptions
1139
+ ): Promise<
1140
+ | { kind: "ok"; res: IModelResponse }
1141
+ | { kind: "stop"; result: ISendResult }
1142
+ | { kind: "retry" }
1143
+ > {
1144
+ try {
1145
+ // FORCED-TOOLS experiment: gated, non-plan turns are ALWAYS grammar-
1146
+ // constrained (the model stops via yield_status), so malformed tool text
1147
+ // can't occur. A recovery force additionally disables thinking.
1148
+ const required =
1149
+ forceTool || (this.forceTools && this.hasGate && !this.planMode);
1150
+ const res = await this.askModel(
1151
+ opts.signal,
1152
+ required ? "required" : "auto",
1153
+ forceTool // forced tool turn → also disable thinking for a clean call
1154
+ );
1155
+
1156
+ return { kind: "ok", res };
1157
+ } catch (err) {
1158
+ const recovered = this.recoverFromTimeout(
1159
+ err,
1160
+ timeouts,
1161
+ turn,
1162
+ turnStart,
1163
+ sendStart,
1164
+ opts.signal
1165
+ );
1166
+
1167
+ return recovered !== null
1168
+ ? { kind: "stop", result: recovered }
1169
+ : { kind: "retry" };
1170
+ }
1171
+ }
1172
+
1173
+ /** Run the tool calls of a turn, account the edits, emit timing, and run the
1174
+ * incremental check every few edits — returns the updated edit accounting so
1175
+ * `drive`'s loop body stays lean. */
1176
+ private async runEditTurn(
1177
+ res: IModelResponse,
1178
+ acc: { edited: boolean; editsSinceCheck: number; checkEvery: number },
1179
+ turn: number,
1180
+ turnStart: number,
1181
+ sendStart: number
1182
+ ): Promise<{ edited: boolean; editsSinceCheck: number }> {
1183
+ const { ctx, state, report } = this;
1184
+ const before = state.edits;
1185
+ const edited =
1186
+ (await runToolCalls(res.toolCalls, ctx, state)) || acc.edited;
1187
+
1188
+ emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
1189
+
1190
+ // Check every few edits WHILE building, so errors surface early instead of
1191
+ // piling up into a final avalanche the model can't dig out of.
1192
+ const editsSinceCheck = await this.checkAfterEdits(
1193
+ acc.editsSinceCheck + (state.edits - before),
1194
+ acc.checkEvery
1195
+ );
1196
+
1197
+ return { edited, editsSinceCheck };
1198
+ }
1199
+
1200
+ /** Run the gate once the model has stopped after editing: a terminal result
1201
+ * (done/stuck) or null when still red (drive then pushes feedback + continues).
1202
+ * Keeps the done/stuck mapping out of `drive`'s loop body. */
1203
+ private async settleTurn(
1204
+ turn: number,
1205
+ turnStart: number,
1206
+ sendStart: number
1207
+ ): Promise<ISendResult | null> {
1208
+ const settled = await settleGate(this.ctx, this.state, turn);
1209
+
1210
+ emitTiming(this.report, SESSION_ID, turn, turnStart, sendStart);
1211
+
1212
+ if (settled === null) {
1213
+ return null;
1214
+ }
1215
+
1216
+ return {
1217
+ status: settled.status === RUN_STATUS.done ? "done" : "stuck",
1218
+ turns: turn,
1219
+ };
1220
+ }
1221
+
1222
+ /** FORCED-TOOLS mode: convert `yield_status` calls back into a normal "model
1223
+ * stopped" turn — ack each call (so no tool_call dangles on the wire), strip
1224
+ * them from the response, and promote the summary to the reply content. The
1225
+ * existing no-tool-call paths (gate confirm / responded) then apply unchanged.
1226
+ * A yield alongside REAL calls is dropped here and answered by its dispatch
1227
+ * stub ("finish the work, then yield alone") — the work runs, the model
1228
+ * yields properly next turn. */
1229
+ private resolveYieldCalls(res: IModelResponse): void {
1230
+ const yields = res.toolCalls.filter(
1231
+ (c) => c.name === TOOL_NAME.yieldStatus
1232
+ );
1233
+
1234
+ if (yields.length === 0) {
1235
+ return;
1236
+ }
1237
+
1238
+ const others = res.toolCalls.filter(
1239
+ (c) => c.name !== TOOL_NAME.yieldStatus
1240
+ );
1241
+
1242
+ if (others.length > 0) {
1243
+ return; // mixed turn: let dispatch run everything (stub answers the yield)
1244
+ }
1245
+
1246
+ for (const y of yields) {
1247
+ this.ctx.messages.push({
1248
+ role: "tool",
1249
+ toolCallId: y.id ?? "",
1250
+ content: "(turn ended)",
1251
+ });
1252
+ }
1253
+
1254
+ res.toolCalls = [];
1255
+
1256
+ const summary = yields[0]?.arguments.summary;
1257
+
1258
+ if (res.content.length === 0 && typeof summary === "string") {
1259
+ res.content = summary;
1260
+ this.report({ kind: "message", task: SESSION_ID, message: summary });
1261
+ }
1262
+ }
1263
+
1264
+ private async drive(
1265
+ maxTurns: number,
1266
+ sendStart: number,
1267
+ opts: ISendOptions
1268
+ ): Promise<ISendResult> {
1269
+ const { ctx, report } = this;
1270
+ // The gate confirms CHANGES, not answers: it fires only once the model has
1271
+ // actually edited a file this turn. So a pure question never triggers a gate
1272
+ // run (even with one configured) — and an auto-detected gate stays unobtrusive.
1273
+ let edited = false;
1274
+ // How many times this send the model dumped file contents as a chat message
1275
+ // instead of calling `create` (the narrate-instead-of-build failure).
1276
+ let buildNudges = 0;
1277
+ // Set after we nudge a narrating model: on the NEXT turn we FORCE a tool call
1278
+ // (tool_choice "required") instead of "auto". vLLM's required path follows the
1279
+ // tool schema strictly — so the model can't narrate (or emit malformed tool
1280
+ // syntax) again on a turn where we already know a tool call is the move.
1281
+ let forceTool = false;
1282
+ // Times the stream degenerated into a repetition loop this send — we try a
1283
+ // bounded recovery (force a concrete tool call) before giving up.
1284
+ let degenerations = 0;
1285
+ // Times a model request timed out this send — a single over-long turn must not
1286
+ // throw away prior progress; we re-steer to a small turn and continue.
1287
+ let timeouts = 0;
1288
+ // Edits since the last incremental check — drives "check every few edits".
1289
+ let editsSinceCheck = 0;
1290
+ const checkEvery = this.cfg.checkEvery ?? CHECK_EVERY;
1291
+
1292
+ for (let turn = 1; turn <= maxTurns; turn += 1) {
1293
+ const turnStart = performance.now();
1294
+
1295
+ // Inject any messages the user typed while the run was in flight, so they
1296
+ // steer the next model turn instead of waiting for the run to finish.
1297
+ this.injectSteer(opts.steer);
1298
+
1299
+ report({
1300
+ kind: "cycle",
1301
+ task: SESSION_ID,
1302
+ cycle: turn,
1303
+ message: `turn ${turn}: asking model`,
1304
+ });
1305
+
1306
+ // Ask the model, recovering from a request timeout (re-steer + continue,
1307
+ // keeping prior turns) instead of abandoning the whole build on one over-long
1308
+ // turn. A caller abort or any other error propagates to send()'s handler.
1309
+ const ask = await this.acquireResponse(
1310
+ forceTool,
1311
+ timeouts,
1312
+ turn,
1313
+ turnStart,
1314
+ sendStart,
1315
+ opts
1316
+ );
1317
+
1318
+ if (ask.kind === "stop") {
1319
+ return ask.result;
1320
+ }
1321
+
1322
+ if (ask.kind === "retry") {
1323
+ timeouts += 1;
1324
+ forceTool = true; // next turn: forced, thinking-off → a small clean call
1325
+
1326
+ continue;
1327
+ }
1328
+
1329
+ const res = ask.res;
1330
+
1331
+ forceTool = false;
1332
+
1333
+ // The stream caught a degenerate repetition loop. Try a BOUNDED recovery
1334
+ // (force a concrete tool call next turn — can't loop in prose) before
1335
+ // giving up; see degenerationRecovery.
1336
+ if (res.degenerated === true) {
1337
+ const stop = this.degenerationRecovery(degenerations, turn);
1338
+
1339
+ emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
1340
+
1341
+ if (stop !== null) {
1342
+ return stop;
1343
+ }
1344
+
1345
+ degenerations += 1;
1346
+ forceTool = true;
1347
+
1348
+ continue;
1349
+ }
1350
+
1351
+ // FORCED-TOOLS: a lone yield_status call becomes a normal stop.
1352
+ this.resolveYieldCalls(res);
1353
+
1354
+ // Still working — run the calls and keep going (we gate only when it stops).
1355
+ if (res.toolCalls.length > 0) {
1356
+ ({ edited, editsSinceCheck } = await this.runEditTurn(
1357
+ res,
1358
+ { edited, editsSinceCheck, checkEvery },
1359
+ turn,
1360
+ turnStart,
1361
+ sendStart
1362
+ ));
1363
+
1364
+ continue;
1365
+ }
1366
+
1367
+ // The model yielded with no tool calls. With no gate it's a conversational
1368
+ // reply; with a gate but no edits this send, decide whether that's a real
1369
+ // answer or the narrate-instead-of-build failure (see resolveNoEditYield).
1370
+ if (!this.hasGate || !edited) {
1371
+ const outcome = this.resolveNoEditYield(res.content, turn, buildNudges);
1372
+
1373
+ emitTiming(report, SESSION_ID, turn, turnStart, sendStart);
1374
+
1375
+ if (outcome.result !== null) {
1376
+ return outcome.result;
1377
+ }
1378
+
1379
+ buildNudges += 1;
1380
+ forceTool = true; // it just narrated code — force a tool call next turn
1381
+
1382
+ continue;
1383
+ }
1384
+
1385
+ // Gate confirms. Green/stuck ⇒ terminal; null ⇒ red, feedback pushed.
1386
+ const settled = await this.settleTurn(turn, turnStart, sendStart);
1387
+
1388
+ if (settled !== null) {
1389
+ return settled;
1390
+ }
1391
+
1392
+ // Gate came back RED → enter repair mode (think to converge on the fix).
1393
+ this.repairing = true;
1394
+
1395
+ // Stopped while still red without acting → nudge it to act, not narrate,
1396
+ // and FORCE a tool call on the next turn so it can't narrate again.
1397
+ ctx.messages.push({ role: "user", content: NO_TOOL_CALL_NUDGE });
1398
+ forceTool = true;
1399
+ }
1400
+
1401
+ report({
1402
+ kind: "stuck",
1403
+ task: SESSION_ID,
1404
+ cycles: maxTurns,
1405
+ message: `stuck (hit ${maxTurns}-turn cap)`,
1406
+ });
1407
+
1408
+ return { status: "stuck", turns: maxTurns };
1409
+ }
1410
+ }