@agjs/tsforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/bin/tsforge.js +2 -0
  2. package/package.json +35 -0
  3. package/src/agent/agent.constants.ts +382 -0
  4. package/src/agent/agent.types.ts +34 -0
  5. package/src/agent/index.ts +4 -0
  6. package/src/agent/model-agent.ts +297 -0
  7. package/src/agent/tool-repair.ts +194 -0
  8. package/src/agent/tools.ts +190 -0
  9. package/src/browser/checks.ts +96 -0
  10. package/src/browser/index.ts +8 -0
  11. package/src/browser/oracle.ts +303 -0
  12. package/src/classify.ts +48 -0
  13. package/src/cli.ts +1333 -0
  14. package/src/config/config.constants.ts +9 -0
  15. package/src/config/flags.ts +32 -0
  16. package/src/config/index.ts +8 -0
  17. package/src/config/tsforge-config.ts +301 -0
  18. package/src/constitution/baseline.ts +257 -0
  19. package/src/detect-gate.ts +498 -0
  20. package/src/eval/eval.types.ts +36 -0
  21. package/src/eval/index.ts +3 -0
  22. package/src/eval/judge.ts +62 -0
  23. package/src/eval/score.ts +39 -0
  24. package/src/files/create.ts +22 -0
  25. package/src/files/edit.ts +193 -0
  26. package/src/files/files.constants.ts +11 -0
  27. package/src/files/files.types.ts +81 -0
  28. package/src/files/hashline-format.ts +110 -0
  29. package/src/files/hashline.ts +689 -0
  30. package/src/files/index.ts +19 -0
  31. package/src/index.ts +8 -0
  32. package/src/inference/index.ts +6 -0
  33. package/src/inference/inference.constants.ts +34 -0
  34. package/src/inference/inference.types.ts +123 -0
  35. package/src/inference/openai-compatible.ts +113 -0
  36. package/src/inference/stream-guard.ts +161 -0
  37. package/src/inference/stream.ts +370 -0
  38. package/src/inference/transport.ts +78 -0
  39. package/src/inference/wire.ts +0 -0
  40. package/src/lib/fs/fs.ts +126 -0
  41. package/src/lib/fs/fs.types.ts +5 -0
  42. package/src/lib/fs/index.ts +3 -0
  43. package/src/lib/fs/process.ts +146 -0
  44. package/src/lib/guards/guards.ts +9 -0
  45. package/src/lib/guards/index.ts +1 -0
  46. package/src/lib/json/index.ts +1 -0
  47. package/src/lib/json/json.ts +12 -0
  48. package/src/lib/scope/index.ts +2 -0
  49. package/src/lib/scope/scope.constants.ts +3 -0
  50. package/src/lib/scope/scope.ts +40 -0
  51. package/src/loop/astgrep-fix.ts +228 -0
  52. package/src/loop/feedback/feedback.ts +138 -0
  53. package/src/loop/feedback/index.ts +8 -0
  54. package/src/loop/feedback/meta-rule-docs.ts +41 -0
  55. package/src/loop/feedback/meta-rule-feedback.ts +61 -0
  56. package/src/loop/feedback/rule-docs.generated.json +112 -0
  57. package/src/loop/feedback/rule-docs.ts +342 -0
  58. package/src/loop/index.ts +19 -0
  59. package/src/loop/loop.constants.ts +68 -0
  60. package/src/loop/loop.types.ts +99 -0
  61. package/src/loop/prompt/index.ts +2 -0
  62. package/src/loop/prompt/project-map.ts +69 -0
  63. package/src/loop/prompt/prompt.ts +107 -0
  64. package/src/loop/quality.ts +174 -0
  65. package/src/loop/rule-docs.generated.json +367 -0
  66. package/src/loop/run-spec.ts +88 -0
  67. package/src/loop/run.ts +400 -0
  68. package/src/loop/session.ts +1410 -0
  69. package/src/loop/tools/add-dependency.ts +71 -0
  70. package/src/loop/tools/condense.ts +498 -0
  71. package/src/loop/tools/edit-hashline.ts +80 -0
  72. package/src/loop/tools/execute-tool.ts +80 -0
  73. package/src/loop/tools/file-ops.ts +323 -0
  74. package/src/loop/tools/index.ts +2 -0
  75. package/src/loop/tools/lsp-ops.ts +222 -0
  76. package/src/loop/tools/scaffold-routes.ts +68 -0
  77. package/src/loop/tools/scaffold-ui.ts +62 -0
  78. package/src/loop/tools/scaffold-web.ts +35 -0
  79. package/src/loop/tools/tool-context.ts +126 -0
  80. package/src/loop/ttsr-defaults.ts +53 -0
  81. package/src/loop/ttsr.ts +322 -0
  82. package/src/loop/turn.ts +856 -0
  83. package/src/lsp/index.ts +2 -0
  84. package/src/lsp/lsp.types.ts +56 -0
  85. package/src/lsp/service.ts +500 -0
  86. package/src/meta-rules/context.ts +195 -0
  87. package/src/meta-rules/index.ts +9 -0
  88. package/src/meta-rules/meta-rules.types.ts +47 -0
  89. package/src/meta-rules/parsers/package-json-parser.ts +51 -0
  90. package/src/meta-rules/registry.ts +37 -0
  91. package/src/meta-rules/rules/ci/workflow-actions-pinned.ts +59 -0
  92. package/src/meta-rules/rules/ci/workflow-runner-pinned.ts +57 -0
  93. package/src/meta-rules/rules/ci/workflow-timeout-required.ts +114 -0
  94. package/src/meta-rules/rules/config/tsconfig-paths-exist.ts +117 -0
  95. package/src/meta-rules/rules/config/tsconfig-strict.ts +91 -0
  96. package/src/meta-rules/rules/source-text/no-eslint-disable-comments.ts +34 -0
  97. package/src/meta-rules/rules/source-text/no-ts-suppressions.ts +38 -0
  98. package/src/meta-rules/rules/supply-chain/no-overlapping-libs.ts +57 -0
  99. package/src/meta-rules/rules/supply-chain/package-exact-deps.ts +55 -0
  100. package/src/meta-rules/rules/testing/test-sibling-required.ts +110 -0
  101. package/src/meta-rules/runner.ts +64 -0
  102. package/src/models-config.ts +196 -0
  103. package/src/render/ansi.ts +289 -0
  104. package/src/render/banner.ts +113 -0
  105. package/src/render/box.ts +134 -0
  106. package/src/render/index.ts +7 -0
  107. package/src/render/markdown.ts +123 -0
  108. package/src/render/render.types.ts +21 -0
  109. package/src/render/stream-markdown.ts +128 -0
  110. package/src/render/style.ts +26 -0
  111. package/src/rule-packs/bullmq/index.ts +39 -0
  112. package/src/rule-packs/bullmq/rules/index.ts +7 -0
  113. package/src/rule-packs/bullmq/rules/job-name-must-be-constant.ts +141 -0
  114. package/src/rule-packs/bullmq/rules/job-options-must-set-attempts.ts +174 -0
  115. package/src/rule-packs/bullmq/rules/no-blocking-concurrency-zero.ts +103 -0
  116. package/src/rule-packs/bullmq/rules/queue-options-must-set-removeoncomplete.ts +130 -0
  117. package/src/rule-packs/bullmq/rules/queue-options-must-set-removeonfail.ts +130 -0
  118. package/src/rule-packs/bullmq/rules/worker-must-implement-close.ts +182 -0
  119. package/src/rule-packs/bullmq/rules/worker-must-listen-failed.ts +140 -0
  120. package/src/rule-packs/bullmq/utils.ts +334 -0
  121. package/src/rule-packs/code-flow/index.ts +25 -0
  122. package/src/rule-packs/code-flow/rules/index.ts +3 -0
  123. package/src/rule-packs/code-flow/rules/no-bare-date-now.ts +138 -0
  124. package/src/rule-packs/code-flow/rules/no-template-trim-empty-ternary.ts +87 -0
  125. package/src/rule-packs/code-flow/rules/prefer-early-return.ts +80 -0
  126. package/src/rule-packs/code-flow/utils/prefer-early-return.ts +132 -0
  127. package/src/rule-packs/comment-hygiene/index.ts +25 -0
  128. package/src/rule-packs/comment-hygiene/rules/index.ts +3 -0
  129. package/src/rule-packs/comment-hygiene/rules/no-historical-comments.ts +102 -0
  130. package/src/rule-packs/comment-hygiene/rules/no-narration-comments.ts +83 -0
  131. package/src/rule-packs/comment-hygiene/rules/no-pr-reference-comments.ts +90 -0
  132. package/src/rule-packs/create-rule.ts +9 -0
  133. package/src/rule-packs/drizzle/index.ts +41 -0
  134. package/src/rule-packs/drizzle/rules/account-scoped-tables-require-where.ts +371 -0
  135. package/src/rule-packs/drizzle/rules/index.ts +8 -0
  136. package/src/rule-packs/drizzle/rules/no-nested-db-transaction.ts +127 -0
  137. package/src/rule-packs/drizzle/rules/no-raw-sql-outside-allowlist.ts +100 -0
  138. package/src/rule-packs/drizzle/rules/relations-must-cover-fks.ts +209 -0
  139. package/src/rule-packs/drizzle/rules/schema-files-must-not-import-driver.ts +127 -0
  140. package/src/rule-packs/drizzle/rules/schema-files-must-only-export-schema.ts +149 -0
  141. package/src/rule-packs/drizzle/rules/tables-must-have-timestamps.ts +312 -0
  142. package/src/rule-packs/drizzle/rules/timestamp-must-specify-mode.ts +166 -0
  143. package/src/rule-packs/drizzle/utils.ts +115 -0
  144. package/src/rule-packs/elysia/index.ts +43 -0
  145. package/src/rule-packs/elysia/rules/consistent-status-via-set.ts +69 -0
  146. package/src/rule-packs/elysia/rules/no-decorate-state-collision.ts +276 -0
  147. package/src/rule-packs/elysia/rules/no-separate-model-interfaces.ts +144 -0
  148. package/src/rule-packs/elysia/rules/prefer-destructured-context.ts +155 -0
  149. package/src/rule-packs/elysia/rules/prefer-direct-return.ts +176 -0
  150. package/src/rule-packs/elysia/rules/prefer-static-services.ts +159 -0
  151. package/src/rule-packs/elysia/rules/prefer-throw-status.ts +151 -0
  152. package/src/rule-packs/elysia/rules/require-hooks-before-routes.ts +209 -0
  153. package/src/rule-packs/elysia/rules/require-plugin-name.ts +107 -0
  154. package/src/rule-packs/elysia/utils/elysiaChain.ts +306 -0
  155. package/src/rule-packs/env-access/index.ts +23 -0
  156. package/src/rule-packs/env-access/rules/index.ts +2 -0
  157. package/src/rule-packs/env-access/rules/no-direct-process-env.ts +133 -0
  158. package/src/rule-packs/env-access/rules/no-process-exit.ts +95 -0
  159. package/src/rule-packs/i18n-keys/index.ts +19 -0
  160. package/src/rule-packs/i18n-keys/rules/static-translation-key-exists.ts +173 -0
  161. package/src/rule-packs/index.ts +139 -0
  162. package/src/rule-packs/jwt-cookies/index.ts +25 -0
  163. package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-httponly.ts +150 -0
  164. package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-secure-in-prod.ts +149 -0
  165. package/src/rule-packs/jwt-cookies/rules/bcrypt-rounds-min.ts +195 -0
  166. package/src/rule-packs/jwt-cookies/utils.ts +188 -0
  167. package/src/rule-packs/oauth-security/index.ts +25 -0
  168. package/src/rule-packs/oauth-security/rules/pkce-required-for-oidc.ts +296 -0
  169. package/src/rule-packs/oauth-security/rules/state-must-be-redis-backed.ts +193 -0
  170. package/src/rule-packs/oauth-security/rules/state-ttl-bounded.ts +219 -0
  171. package/src/rule-packs/oauth-security/utils.ts +127 -0
  172. package/src/rule-packs/react-component-architecture/index.ts +35 -0
  173. package/src/rule-packs/react-component-architecture/rules/component-folder-structure.ts +123 -0
  174. package/src/rule-packs/react-component-architecture/rules/forwardref-display-name.ts +93 -0
  175. package/src/rule-packs/react-component-architecture/rules/index-must-reexport-default.ts +123 -0
  176. package/src/rule-packs/react-component-architecture/rules/max-hooks-per-file.ts +122 -0
  177. package/src/rule-packs/react-component-architecture/rules/no-cross-feature-imports.ts +170 -0
  178. package/src/rule-packs/react-component-architecture/rules/no-inline-jsx-functions.ts +66 -0
  179. package/src/rule-packs/react-component-architecture/utils.ts +47 -0
  180. package/src/rule-packs/rule-packs.types.ts +18 -0
  181. package/src/rule-packs/structured-logging/index.ts +26 -0
  182. package/src/rule-packs/structured-logging/rules/mask-pii-fields.ts +221 -0
  183. package/src/rule-packs/structured-logging/rules/no-error-stringify.ts +217 -0
  184. package/src/rule-packs/structured-logging/rules/require-event-field.ts +136 -0
  185. package/src/rule-packs/structured-logging/utils/logger.ts +104 -0
  186. package/src/rule-packs/tanstack-query/index.ts +20 -0
  187. package/src/rule-packs/tanstack-query/rules/prefix-query-key-must-use-set-queries-data.ts +321 -0
  188. package/src/rule-packs/test-conventions/index.ts +23 -0
  189. package/src/rule-packs/test-conventions/rules/index.ts +2 -0
  190. package/src/rule-packs/test-conventions/rules/no-focused-tests.ts +170 -0
  191. package/src/rule-packs/test-conventions/rules/test-file-mirrors-source.ts +127 -0
  192. package/src/rule-packs/utils.ts +142 -0
  193. package/src/session-store.ts +359 -0
  194. package/src/spec/generate-tests.ts +213 -0
  195. package/src/spec/index.ts +5 -0
  196. package/src/spec/parse.ts +152 -0
  197. package/src/spec/review-tests.ts +162 -0
  198. package/src/spec/spec.constants.ts +13 -0
  199. package/src/spec/spec.types.ts +79 -0
  200. package/src/stack-detection/detect.ts +246 -0
  201. package/src/stack-detection/index.ts +3 -0
  202. package/src/stack-detection/packs.ts +174 -0
  203. package/src/stack-detection/stack-detection.types.ts +47 -0
  204. package/src/validate/accept.ts +49 -0
  205. package/src/validate/errors.ts +35 -0
  206. package/src/validate/index.ts +12 -0
  207. package/src/validate/parse.ts +148 -0
  208. package/src/validate/run-tests.ts +59 -0
  209. package/src/validate/validate.ts +40 -0
  210. package/src/validate/validate.types.ts +52 -0
  211. package/src/web-components.ts +638 -0
  212. package/src/web-coverage.ts +89 -0
  213. package/src/web-routes.ts +151 -0
  214. package/src/web-templates.ts +1011 -0
  215. package/strict.eslint.config.mjs +84 -0
  216. package/strict.web.eslint.config.mjs +185 -0
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Provider tuning. Defaults for the OpenAI-compatible client — kept with the
3
+ * inference domain (not a global bucket) so the provider's knobs live next to it.
4
+ */
5
+ export const PROVIDER_LIMITS = {
6
+ /**
7
+ * Default model output token budget. Single source of truth (previously split
8
+ * between the CLI and the provider, which silently truncated large generations).
9
+ */
10
+ maxTokens: 16384,
11
+ /** Per-request timeout (ms): generous for slow local generations, bounded so a
12
+ * hung server can't wedge an unattended run forever. */
13
+ requestTimeoutMs: 600_000,
14
+ /** Linear backoff base per retry attempt (ms): attempt N waits N * this. */
15
+ retryBackoffMs: 400,
16
+ /**
17
+ * Recommended vLLM repetition penalty IF you opt in via
18
+ * TSFORGE_REPETITION_PENALTY. OFF by default: applied globally it also
19
+ * penalizes the repetitive tool-call JSON tokens and pushes the model to
20
+ * narrate instead of emit tool calls (→ no files written). Degenerate
21
+ * repetition LOOPS are handled by the StreamGuard (inference/stream-guard.ts)
22
+ * instead, which only watches prose and can't affect tool-calling.
23
+ */
24
+ repetitionPenalty: 1.1,
25
+ } as const;
26
+
27
+ /**
28
+ * Default endpoint + model for the local provider — the single source of truth.
29
+ * Override per-run with TSFORGE_BASE_URL / TSFORGE_MODEL.
30
+ */
31
+ export const PROVIDER_DEFAULTS = {
32
+ baseUrl: "http://localhost:8000/v1",
33
+ model: "qwen3.6-27b",
34
+ } as const;
@@ -0,0 +1,123 @@
1
+ export type Role = "system" | "user" | "assistant" | "tool";
2
+
3
+ export interface IChatMessage {
4
+ role: Role;
5
+ content: string;
6
+ /** Assistant only: the tool calls it emitted (kept in history so the model
7
+ * sees what it asked for and the results that came back). */
8
+ toolCalls?: IToolCall[];
9
+ /** Tool messages only: the id of the call this message is the result of. */
10
+ toolCallId?: string;
11
+ }
12
+
13
+ /** A parsed tool call from the model (name + decoded JSON arguments). */
14
+ export interface IToolCall {
15
+ /** Correlation id so a tool-result message can reference it. */
16
+ id?: string;
17
+ name: string;
18
+ arguments: Record<string, unknown>;
19
+ }
20
+
21
+ /** Real token accounting from the server's `usage` block — the basis for the
22
+ * status line's context gauge and (soon) auto-compaction triggering. */
23
+ export interface ITokenUsage {
24
+ promptTokens: number;
25
+ completionTokens: number;
26
+ totalTokens: number;
27
+ }
28
+
29
+ export interface IModelResponse {
30
+ content: string;
31
+ toolCalls: IToolCall[];
32
+ /** Server-reported token usage for this call, when available. `promptTokens`
33
+ * is the full context the model just saw — what auto-compaction will watch. */
34
+ usage?: ITokenUsage;
35
+ /** How many tool calls were SALVAGED from malformed content (server parser
36
+ * left them as text). >0 signals the model emitted unparseable tool syntax. */
37
+ salvaged?: number;
38
+ /** Set when the stream was aborted because the model fell into a degenerate
39
+ * repetition loop (same line/template until max_tokens). The loop driver
40
+ * stops the turn instead of nudging into another loop. */
41
+ degenerated?: boolean;
42
+ /** Set when TTSR aborted the stream due to a rule match. Contains the rule name
43
+ * and guidance to append to the corrective retry message. */
44
+ ttsrFired?: { ruleName: string; guidance: string };
45
+ }
46
+
47
+ export interface ICompleteOptions {
48
+ temperature?: number;
49
+ /** OpenAI-style tool schemas to advertise (opaque JSON). */
50
+ tools?: unknown[];
51
+ /**
52
+ * How hard to push the model to call a tool. `required` forces a tool call —
53
+ * which suppresses chat-style "here is my answer" prose the harness discards
54
+ * anyway. Defaults to `auto`. Ignored when no tools are advertised.
55
+ */
56
+ toolChoice?: "auto" | "required" | "none";
57
+ /** Per-request thinking toggle (Qwen `chat_template_kwargs.enable_thinking`).
58
+ * Omitted = server default. Off for mechanical work, on for hard reasoning. */
59
+ enableThinking?: boolean;
60
+ /** Cap reasoning tokens before the model must answer (vLLM
61
+ * `thinking_token_budget`). Omitted = unbounded. The lever for turn *time*. */
62
+ thinkingTokenBudget?: number;
63
+ /** When set, the request streams and each token is delivered here as it
64
+ * arrives, tagged by channel: `reasoning` (the model's thinking) vs `content`
65
+ * (its actual answer). Lets a UI dim the thinking and format the answer. */
66
+ onToken?: (text: string, channel: TokenChannel) => void;
67
+ /** Caller cancellation — aborting it stops the request (and any stream)
68
+ * mid-flight. Combined with the per-request timeout. */
69
+ signal?: AbortSignal;
70
+ /** TTSR watcher for stream-interrupting rules (wired by the loop, not the provider). */
71
+ ttsrManager?: ITtsrWatcher;
72
+ }
73
+
74
+ /** Structural view of the loop's TtsrManager — keeps the inference layer free of
75
+ * a hard dependency on loop internals while staying fully typed. */
76
+ export interface ITtsrWatcher {
77
+ checkDelta(
78
+ text: string,
79
+ context: { source: "content" | "tool-args"; currentFile?: string }
80
+ ): { readonly name: string; readonly guidance: string } | null;
81
+ }
82
+
83
+ /** Which stream a token belongs to: the model's thinking (`reasoning`), its answer
84
+ * (`content`), or the tool calls it is emitting (`tool` — the file it's writing,
85
+ * streamed so a long tool-call generation isn't silent dead air). */
86
+ export type TokenChannel = "reasoning" | "content" | "tool";
87
+
88
+ /** The model seam. Implementations talk to a local server (vLLM/Ollama/...). */
89
+ export interface IProvider {
90
+ complete(
91
+ messages: IChatMessage[],
92
+ opts?: ICompleteOptions
93
+ ): Promise<IModelResponse>;
94
+ }
95
+
96
+ export interface IOpenAICompatibleConfig {
97
+ /** Root of the OpenAI-compatible API, e.g. http://localhost:11434/v1 */
98
+ baseUrl: string;
99
+ /** Model id, e.g. qwen3.6-35b-a3b */
100
+ model: string;
101
+ apiKey?: string;
102
+ /**
103
+ * Abort a single request after this many ms (default LIMITS.requestTimeoutMs).
104
+ * Generous because local generations are slow, but bounded so a hung server
105
+ * can't wedge an unattended run forever.
106
+ */
107
+ timeoutMs?: number;
108
+ /**
109
+ * Hard cap on tokens per response (default LIMITS.maxTokens). Bounds a
110
+ * degenerate repetition loop so one runaway generation can't spew until the
111
+ * context limit. Generous enough for whole-file tool-call output.
112
+ */
113
+ maxTokens?: number;
114
+ /**
115
+ * vLLM repetition penalty (>1 discourages repeating tokens). The cure for the
116
+ * degenerate loops this local model falls into at temp 0 — where it repeats
117
+ * the same line/JSON until max_tokens. ~1.1 breaks loops without hurting
118
+ * correctness. Omitted (1.0 = off) by default; set it on code-gen providers.
119
+ */
120
+ repetitionPenalty?: number;
121
+ /** Injectable for tests; defaults to global fetch. */
122
+ fetch?: typeof fetch;
123
+ }
@@ -0,0 +1,113 @@
1
+ import type {
2
+ IChatMessage,
3
+ ICompleteOptions,
4
+ IModelResponse,
5
+ IProvider,
6
+ IOpenAICompatibleConfig,
7
+ } from "./inference.types";
8
+ import { PROVIDER_LIMITS } from "./inference.constants";
9
+ import { fetchWithRetry } from "./transport";
10
+ import { toWire, parseResponse } from "./wire";
11
+ import { streamResponse } from "./stream";
12
+
13
+ export { salvageToolCalls } from "./wire";
14
+
15
+ /**
16
+ * Talks to any OpenAI-compatible `/chat/completions` endpoint — which Ollama,
17
+ * vLLM, and llama.cpp all expose for a local Qwen3.6. Supports streaming: pass
18
+ * `onToken` to receive reasoning + content tokens as they arrive. The wire
19
+ * mapping lives in ./wire, the SSE assembly in ./stream, and connection retry in
20
+ * ./transport — this class just orchestrates one request.
21
+ */
22
+ export class OpenAICompatibleProvider implements IProvider {
23
+ constructor(private cfg: IOpenAICompatibleConfig) {}
24
+
25
+ /** Hot-swap the endpoint/model/key (used by `/model` to switch live): the
26
+ * running session keeps this provider reference and picks up the new config on
27
+ * its next request — no restart. */
28
+ reconfigure(cfg: IOpenAICompatibleConfig): void {
29
+ this.cfg = cfg;
30
+ }
31
+
32
+ /** The current config — read by the CLI for the model/endpoint status line. */
33
+ get config(): IOpenAICompatibleConfig {
34
+ return this.cfg;
35
+ }
36
+
37
+ async complete(
38
+ messages: IChatMessage[],
39
+ opts: ICompleteOptions = {}
40
+ ): Promise<IModelResponse> {
41
+ const doFetch = this.cfg.fetch ?? fetch;
42
+ const streaming = opts.onToken !== undefined;
43
+ const headers: Record<string, string> = {
44
+ "content-type": "application/json",
45
+ };
46
+
47
+ if (this.cfg.apiKey !== undefined) {
48
+ headers.authorization = `Bearer ${this.cfg.apiKey}`;
49
+ }
50
+
51
+ const body = JSON.stringify({
52
+ model: this.cfg.model,
53
+ messages: messages.map(toWire),
54
+ max_tokens: this.cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens,
55
+ temperature: opts.temperature,
56
+ ...(this.cfg.repetitionPenalty === undefined
57
+ ? {}
58
+ : { repetition_penalty: this.cfg.repetitionPenalty }),
59
+ ...(opts.tools === undefined
60
+ ? {}
61
+ : { tools: opts.tools, tool_choice: opts.toolChoice ?? "auto" }),
62
+ ...(opts.enableThinking === undefined
63
+ ? {}
64
+ : { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
65
+ ...(opts.thinkingTokenBudget === undefined
66
+ ? {}
67
+ : { thinking_token_budget: opts.thinkingTokenBudget }),
68
+ // include_usage → the stream emits a final chunk carrying token `usage`
69
+ // (otherwise a streamed response reports none). Non-stream replies carry it
70
+ // by default.
71
+ ...(streaming
72
+ ? { stream: true, stream_options: { include_usage: true } }
73
+ : {}),
74
+ });
75
+
76
+ // Retry transient CONNECTION blips (socket close / unable-to-connect) — the
77
+ // connect happens before any stream starts, so retrying is safe for both
78
+ // streaming and non-streaming. Essential for a long-running CLI; also stops
79
+ // a network hiccup from wrecking an eval run.
80
+ const res = await fetchWithRetry(
81
+ doFetch,
82
+ `${this.cfg.baseUrl}/chat/completions`,
83
+ headers,
84
+ body,
85
+ this.cfg.timeoutMs ?? PROVIDER_LIMITS.requestTimeoutMs,
86
+ opts.signal
87
+ );
88
+
89
+ if (!res.ok) {
90
+ const detail = await responseDetail(res);
91
+
92
+ throw new Error(
93
+ `model request failed: ${res.status}${detail.length > 0 ? ` ${detail}` : ""}`
94
+ );
95
+ }
96
+
97
+ if (opts.onToken !== undefined) {
98
+ return streamResponse(res, opts.onToken, opts.ttsrManager);
99
+ }
100
+
101
+ const data: unknown = await res.json();
102
+
103
+ return parseResponse(data);
104
+ }
105
+ }
106
+
107
+ async function responseDetail(res: Response): Promise<string> {
108
+ try {
109
+ return (await res.text()).trim().slice(0, 1000);
110
+ } catch {
111
+ return "";
112
+ }
113
+ }
@@ -0,0 +1,161 @@
1
+ import type { TokenChannel } from "./inference.types";
2
+
3
+ /**
4
+ * Detects the degenerate repetition loops this local model falls into at
5
+ * temperature 0 — where it spews the same line (or a fixed template like
6
+ * "I will ensure it is X.") until it hits max_tokens, never emitting a tool
7
+ * call. The repetition penalty makes this rare; this guard makes it IMPOSSIBLE
8
+ * to hang the CLI: the stream is aborted the moment a loop is detected, instead
9
+ * of burning a full 16k-token generation (and then a nudge, and another loop).
10
+ *
11
+ * Watches the prose channels only (reasoning + content) — file content is
12
+ * carried in tool-call ARGUMENTS, never here, so code can't false-positive.
13
+ * Thresholds are deliberately high: real narration never repeats one short line
14
+ * 24 times, nor shares a 4-word prefix across 20 of 24 consecutive lines.
15
+ *
16
+ * The sliding-window checks only see repetition whose PERIOD fits inside WINDOW.
17
+ * A model that re-prints a large block (e.g. a ~30-line function + paragraphs of
18
+ * "wait, I think I see the issue…") loops with a period far bigger than 24, so
19
+ * every window slice looks distinct and they miss it entirely. The period-
20
+ * AGNOSTIC counter below catches that: any one long line emitted many times
21
+ * across the whole generation is a loop no matter how big the repeating block.
22
+ */
23
+ const WINDOW = 24;
24
+ /** Ignore trivial lines (blanks, lone braces, indentation) — only substantial
25
+ * lines count toward a loop. */
26
+ const MIN_LINE_LEN = 6;
27
+ /** ≤ this many distinct lines across the window ⇒ near-exact repetition. */
28
+ const MAX_DISTINCT = 3;
29
+ /** ≥ this many lines sharing a 4-word prefix ⇒ templated repetition. */
30
+ const PREFIX_MATCH = 20;
31
+ const PREFIX_WORDS = 4;
32
+ /** A line this long, repeated verbatim `GLOBAL_REPEAT_LIMIT` times anywhere in
33
+ * the stream, is a loop — long lines don't recur exactly in real prose/code. */
34
+ const LONG_LINE_LEN = 20;
35
+ const GLOBAL_REPEAT_LIMIT = 5;
36
+
37
+ /** Markers that the model has started emitting STRUCTURED tool calls into the
38
+ * content channel — a server tool-call-parser mismatch (e.g. atlas-spark's
39
+ * Qwen3.5-native `<function=…>` XML, which Atlas's parsers don't match) leaves
40
+ * them in `content` instead of `tool_calls`. Once seen, content is no longer
41
+ * prose, so the prose-loop guard must stand down for it (the leaked calls are
42
+ * salvaged + deduped downstream). */
43
+ const TOOL_MARKUP_RE = /<function=|<tool_call>/i;
44
+
45
+ type ProseChannel = "reasoning" | "content";
46
+
47
+ export class StreamGuard {
48
+ private readonly lines: Record<ProseChannel, string[]> = {
49
+ reasoning: [],
50
+ content: [],
51
+ };
52
+ private readonly partial: Record<ProseChannel, string> = {
53
+ reasoning: "",
54
+ content: "",
55
+ };
56
+ /** Per-channel count of every substantial line seen across the WHOLE stream —
57
+ * backs the period-agnostic large-block loop check. */
58
+ private readonly counts: Record<ProseChannel, Map<string, number>> = {
59
+ reasoning: new Map(),
60
+ content: new Map(),
61
+ };
62
+ /** Set once tool-call markup leaks into the content channel — thereafter the
63
+ * prose-loop guard stands down for content (see TOOL_MARKUP_RE). */
64
+ private contentIsToolMarkup = false;
65
+
66
+ /** Feed a streamed token; returns true once the channel has degenerated. Only
67
+ * the prose channels are watched — tool-call output is structured, not a loop
68
+ * we'd abort. */
69
+ observe(text: string, channel: TokenChannel): boolean {
70
+ if (channel === "tool") {
71
+ return false;
72
+ }
73
+
74
+ this.partial[channel] += text;
75
+
76
+ if (
77
+ channel === "content" &&
78
+ !this.contentIsToolMarkup &&
79
+ TOOL_MARKUP_RE.test(this.partial.content)
80
+ ) {
81
+ this.contentIsToolMarkup = true;
82
+ }
83
+
84
+ const segments = this.partial[channel].split("\n");
85
+
86
+ this.partial[channel] = segments.pop() ?? "";
87
+
88
+ // Content has become leaked tool-call markup, not prose — drain the buffer
89
+ // (so it stays bounded) but don't run the prose-loop checks on it.
90
+ if (channel === "content" && this.contentIsToolMarkup) {
91
+ return false;
92
+ }
93
+
94
+ for (const segment of segments) {
95
+ const trimmed = segment.trim();
96
+
97
+ if (trimmed.length < MIN_LINE_LEN) {
98
+ continue;
99
+ }
100
+
101
+ // Period-agnostic: a long line repeated many times anywhere in the stream
102
+ // is a loop even when the repeating BLOCK is larger than WINDOW (which the
103
+ // sliding-window checks below would miss).
104
+ if (trimmed.length >= LONG_LINE_LEN) {
105
+ const counts = this.counts[channel];
106
+ const seen = (counts.get(trimmed) ?? 0) + 1;
107
+
108
+ counts.set(trimmed, seen);
109
+
110
+ if (seen >= GLOBAL_REPEAT_LIMIT) {
111
+ return true;
112
+ }
113
+ }
114
+
115
+ const window = this.lines[channel];
116
+
117
+ window.push(trimmed);
118
+
119
+ if (window.length > WINDOW) {
120
+ window.shift();
121
+ }
122
+
123
+ if (window.length === WINDOW && isRepetitive(window)) {
124
+ return true;
125
+ }
126
+ }
127
+
128
+ return false;
129
+ }
130
+ }
131
+
132
+ function isRepetitive(window: string[]): boolean {
133
+ const distinct = new Set(window).size;
134
+
135
+ if (distinct <= MAX_DISTINCT) {
136
+ return true;
137
+ }
138
+
139
+ // Block repetition: the model loops a multi-line block (e.g. re-printing the
140
+ // same "cat X / npx tsc / echo …" sequence). The lines vary within the block,
141
+ // so the exact-line check above misses it — but half-or-more of the window
142
+ // being duplicates is a loop no real prose produces.
143
+ if (distinct <= Math.floor(WINDOW / 2)) {
144
+ return true;
145
+ }
146
+
147
+ const prefixCounts = new Map<string, number>();
148
+
149
+ for (const line of window) {
150
+ const prefix = line.split(/\s+/).slice(0, PREFIX_WORDS).join(" ");
151
+ const next = (prefixCounts.get(prefix) ?? 0) + 1;
152
+
153
+ prefixCounts.set(prefix, next);
154
+
155
+ if (next >= PREFIX_MATCH) {
156
+ return true;
157
+ }
158
+ }
159
+
160
+ return false;
161
+ }