@agjs/tsforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/tsforge.js +2 -0
- package/package.json +35 -0
- package/src/agent/agent.constants.ts +382 -0
- package/src/agent/agent.types.ts +34 -0
- package/src/agent/index.ts +4 -0
- package/src/agent/model-agent.ts +297 -0
- package/src/agent/tool-repair.ts +194 -0
- package/src/agent/tools.ts +190 -0
- package/src/browser/checks.ts +96 -0
- package/src/browser/index.ts +8 -0
- package/src/browser/oracle.ts +303 -0
- package/src/classify.ts +48 -0
- package/src/cli.ts +1333 -0
- package/src/config/config.constants.ts +9 -0
- package/src/config/flags.ts +32 -0
- package/src/config/index.ts +8 -0
- package/src/config/tsforge-config.ts +301 -0
- package/src/constitution/baseline.ts +257 -0
- package/src/detect-gate.ts +498 -0
- package/src/eval/eval.types.ts +36 -0
- package/src/eval/index.ts +3 -0
- package/src/eval/judge.ts +62 -0
- package/src/eval/score.ts +39 -0
- package/src/files/create.ts +22 -0
- package/src/files/edit.ts +193 -0
- package/src/files/files.constants.ts +11 -0
- package/src/files/files.types.ts +81 -0
- package/src/files/hashline-format.ts +110 -0
- package/src/files/hashline.ts +689 -0
- package/src/files/index.ts +19 -0
- package/src/index.ts +8 -0
- package/src/inference/index.ts +6 -0
- package/src/inference/inference.constants.ts +34 -0
- package/src/inference/inference.types.ts +123 -0
- package/src/inference/openai-compatible.ts +113 -0
- package/src/inference/stream-guard.ts +161 -0
- package/src/inference/stream.ts +370 -0
- package/src/inference/transport.ts +78 -0
- package/src/inference/wire.ts +0 -0
- package/src/lib/fs/fs.ts +126 -0
- package/src/lib/fs/fs.types.ts +5 -0
- package/src/lib/fs/index.ts +3 -0
- package/src/lib/fs/process.ts +146 -0
- package/src/lib/guards/guards.ts +9 -0
- package/src/lib/guards/index.ts +1 -0
- package/src/lib/json/index.ts +1 -0
- package/src/lib/json/json.ts +12 -0
- package/src/lib/scope/index.ts +2 -0
- package/src/lib/scope/scope.constants.ts +3 -0
- package/src/lib/scope/scope.ts +40 -0
- package/src/loop/astgrep-fix.ts +228 -0
- package/src/loop/feedback/feedback.ts +138 -0
- package/src/loop/feedback/index.ts +8 -0
- package/src/loop/feedback/meta-rule-docs.ts +41 -0
- package/src/loop/feedback/meta-rule-feedback.ts +61 -0
- package/src/loop/feedback/rule-docs.generated.json +112 -0
- package/src/loop/feedback/rule-docs.ts +342 -0
- package/src/loop/index.ts +19 -0
- package/src/loop/loop.constants.ts +68 -0
- package/src/loop/loop.types.ts +99 -0
- package/src/loop/prompt/index.ts +2 -0
- package/src/loop/prompt/project-map.ts +69 -0
- package/src/loop/prompt/prompt.ts +107 -0
- package/src/loop/quality.ts +174 -0
- package/src/loop/rule-docs.generated.json +367 -0
- package/src/loop/run-spec.ts +88 -0
- package/src/loop/run.ts +400 -0
- package/src/loop/session.ts +1410 -0
- package/src/loop/tools/add-dependency.ts +71 -0
- package/src/loop/tools/condense.ts +498 -0
- package/src/loop/tools/edit-hashline.ts +80 -0
- package/src/loop/tools/execute-tool.ts +80 -0
- package/src/loop/tools/file-ops.ts +323 -0
- package/src/loop/tools/index.ts +2 -0
- package/src/loop/tools/lsp-ops.ts +222 -0
- package/src/loop/tools/scaffold-routes.ts +68 -0
- package/src/loop/tools/scaffold-ui.ts +62 -0
- package/src/loop/tools/scaffold-web.ts +35 -0
- package/src/loop/tools/tool-context.ts +126 -0
- package/src/loop/ttsr-defaults.ts +53 -0
- package/src/loop/ttsr.ts +322 -0
- package/src/loop/turn.ts +856 -0
- package/src/lsp/index.ts +2 -0
- package/src/lsp/lsp.types.ts +56 -0
- package/src/lsp/service.ts +500 -0
- package/src/meta-rules/context.ts +195 -0
- package/src/meta-rules/index.ts +9 -0
- package/src/meta-rules/meta-rules.types.ts +47 -0
- package/src/meta-rules/parsers/package-json-parser.ts +51 -0
- package/src/meta-rules/registry.ts +37 -0
- package/src/meta-rules/rules/ci/workflow-actions-pinned.ts +59 -0
- package/src/meta-rules/rules/ci/workflow-runner-pinned.ts +57 -0
- package/src/meta-rules/rules/ci/workflow-timeout-required.ts +114 -0
- package/src/meta-rules/rules/config/tsconfig-paths-exist.ts +117 -0
- package/src/meta-rules/rules/config/tsconfig-strict.ts +91 -0
- package/src/meta-rules/rules/source-text/no-eslint-disable-comments.ts +34 -0
- package/src/meta-rules/rules/source-text/no-ts-suppressions.ts +38 -0
- package/src/meta-rules/rules/supply-chain/no-overlapping-libs.ts +57 -0
- package/src/meta-rules/rules/supply-chain/package-exact-deps.ts +55 -0
- package/src/meta-rules/rules/testing/test-sibling-required.ts +110 -0
- package/src/meta-rules/runner.ts +64 -0
- package/src/models-config.ts +196 -0
- package/src/render/ansi.ts +289 -0
- package/src/render/banner.ts +113 -0
- package/src/render/box.ts +134 -0
- package/src/render/index.ts +7 -0
- package/src/render/markdown.ts +123 -0
- package/src/render/render.types.ts +21 -0
- package/src/render/stream-markdown.ts +128 -0
- package/src/render/style.ts +26 -0
- package/src/rule-packs/bullmq/index.ts +39 -0
- package/src/rule-packs/bullmq/rules/index.ts +7 -0
- package/src/rule-packs/bullmq/rules/job-name-must-be-constant.ts +141 -0
- package/src/rule-packs/bullmq/rules/job-options-must-set-attempts.ts +174 -0
- package/src/rule-packs/bullmq/rules/no-blocking-concurrency-zero.ts +103 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeoncomplete.ts +130 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeonfail.ts +130 -0
- package/src/rule-packs/bullmq/rules/worker-must-implement-close.ts +182 -0
- package/src/rule-packs/bullmq/rules/worker-must-listen-failed.ts +140 -0
- package/src/rule-packs/bullmq/utils.ts +334 -0
- package/src/rule-packs/code-flow/index.ts +25 -0
- package/src/rule-packs/code-flow/rules/index.ts +3 -0
- package/src/rule-packs/code-flow/rules/no-bare-date-now.ts +138 -0
- package/src/rule-packs/code-flow/rules/no-template-trim-empty-ternary.ts +87 -0
- package/src/rule-packs/code-flow/rules/prefer-early-return.ts +80 -0
- package/src/rule-packs/code-flow/utils/prefer-early-return.ts +132 -0
- package/src/rule-packs/comment-hygiene/index.ts +25 -0
- package/src/rule-packs/comment-hygiene/rules/index.ts +3 -0
- package/src/rule-packs/comment-hygiene/rules/no-historical-comments.ts +102 -0
- package/src/rule-packs/comment-hygiene/rules/no-narration-comments.ts +83 -0
- package/src/rule-packs/comment-hygiene/rules/no-pr-reference-comments.ts +90 -0
- package/src/rule-packs/create-rule.ts +9 -0
- package/src/rule-packs/drizzle/index.ts +41 -0
- package/src/rule-packs/drizzle/rules/account-scoped-tables-require-where.ts +371 -0
- package/src/rule-packs/drizzle/rules/index.ts +8 -0
- package/src/rule-packs/drizzle/rules/no-nested-db-transaction.ts +127 -0
- package/src/rule-packs/drizzle/rules/no-raw-sql-outside-allowlist.ts +100 -0
- package/src/rule-packs/drizzle/rules/relations-must-cover-fks.ts +209 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-not-import-driver.ts +127 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-only-export-schema.ts +149 -0
- package/src/rule-packs/drizzle/rules/tables-must-have-timestamps.ts +312 -0
- package/src/rule-packs/drizzle/rules/timestamp-must-specify-mode.ts +166 -0
- package/src/rule-packs/drizzle/utils.ts +115 -0
- package/src/rule-packs/elysia/index.ts +43 -0
- package/src/rule-packs/elysia/rules/consistent-status-via-set.ts +69 -0
- package/src/rule-packs/elysia/rules/no-decorate-state-collision.ts +276 -0
- package/src/rule-packs/elysia/rules/no-separate-model-interfaces.ts +144 -0
- package/src/rule-packs/elysia/rules/prefer-destructured-context.ts +155 -0
- package/src/rule-packs/elysia/rules/prefer-direct-return.ts +176 -0
- package/src/rule-packs/elysia/rules/prefer-static-services.ts +159 -0
- package/src/rule-packs/elysia/rules/prefer-throw-status.ts +151 -0
- package/src/rule-packs/elysia/rules/require-hooks-before-routes.ts +209 -0
- package/src/rule-packs/elysia/rules/require-plugin-name.ts +107 -0
- package/src/rule-packs/elysia/utils/elysiaChain.ts +306 -0
- package/src/rule-packs/env-access/index.ts +23 -0
- package/src/rule-packs/env-access/rules/index.ts +2 -0
- package/src/rule-packs/env-access/rules/no-direct-process-env.ts +133 -0
- package/src/rule-packs/env-access/rules/no-process-exit.ts +95 -0
- package/src/rule-packs/i18n-keys/index.ts +19 -0
- package/src/rule-packs/i18n-keys/rules/static-translation-key-exists.ts +173 -0
- package/src/rule-packs/index.ts +139 -0
- package/src/rule-packs/jwt-cookies/index.ts +25 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-httponly.ts +150 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-secure-in-prod.ts +149 -0
- package/src/rule-packs/jwt-cookies/rules/bcrypt-rounds-min.ts +195 -0
- package/src/rule-packs/jwt-cookies/utils.ts +188 -0
- package/src/rule-packs/oauth-security/index.ts +25 -0
- package/src/rule-packs/oauth-security/rules/pkce-required-for-oidc.ts +296 -0
- package/src/rule-packs/oauth-security/rules/state-must-be-redis-backed.ts +193 -0
- package/src/rule-packs/oauth-security/rules/state-ttl-bounded.ts +219 -0
- package/src/rule-packs/oauth-security/utils.ts +127 -0
- package/src/rule-packs/react-component-architecture/index.ts +35 -0
- package/src/rule-packs/react-component-architecture/rules/component-folder-structure.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/forwardref-display-name.ts +93 -0
- package/src/rule-packs/react-component-architecture/rules/index-must-reexport-default.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/max-hooks-per-file.ts +122 -0
- package/src/rule-packs/react-component-architecture/rules/no-cross-feature-imports.ts +170 -0
- package/src/rule-packs/react-component-architecture/rules/no-inline-jsx-functions.ts +66 -0
- package/src/rule-packs/react-component-architecture/utils.ts +47 -0
- package/src/rule-packs/rule-packs.types.ts +18 -0
- package/src/rule-packs/structured-logging/index.ts +26 -0
- package/src/rule-packs/structured-logging/rules/mask-pii-fields.ts +221 -0
- package/src/rule-packs/structured-logging/rules/no-error-stringify.ts +217 -0
- package/src/rule-packs/structured-logging/rules/require-event-field.ts +136 -0
- package/src/rule-packs/structured-logging/utils/logger.ts +104 -0
- package/src/rule-packs/tanstack-query/index.ts +20 -0
- package/src/rule-packs/tanstack-query/rules/prefix-query-key-must-use-set-queries-data.ts +321 -0
- package/src/rule-packs/test-conventions/index.ts +23 -0
- package/src/rule-packs/test-conventions/rules/index.ts +2 -0
- package/src/rule-packs/test-conventions/rules/no-focused-tests.ts +170 -0
- package/src/rule-packs/test-conventions/rules/test-file-mirrors-source.ts +127 -0
- package/src/rule-packs/utils.ts +142 -0
- package/src/session-store.ts +359 -0
- package/src/spec/generate-tests.ts +213 -0
- package/src/spec/index.ts +5 -0
- package/src/spec/parse.ts +152 -0
- package/src/spec/review-tests.ts +162 -0
- package/src/spec/spec.constants.ts +13 -0
- package/src/spec/spec.types.ts +79 -0
- package/src/stack-detection/detect.ts +246 -0
- package/src/stack-detection/index.ts +3 -0
- package/src/stack-detection/packs.ts +174 -0
- package/src/stack-detection/stack-detection.types.ts +47 -0
- package/src/validate/accept.ts +49 -0
- package/src/validate/errors.ts +35 -0
- package/src/validate/index.ts +12 -0
- package/src/validate/parse.ts +148 -0
- package/src/validate/run-tests.ts +59 -0
- package/src/validate/validate.ts +40 -0
- package/src/validate/validate.types.ts +52 -0
- package/src/web-components.ts +638 -0
- package/src/web-coverage.ts +89 -0
- package/src/web-routes.ts +151 -0
- package/src/web-templates.ts +1011 -0
- package/strict.eslint.config.mjs +84 -0
- package/strict.web.eslint.config.mjs +185 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider tuning. Defaults for the OpenAI-compatible client — kept with the
|
|
3
|
+
* inference domain (not a global bucket) so the provider's knobs live next to it.
|
|
4
|
+
*/
|
|
5
|
+
export const PROVIDER_LIMITS = {
|
|
6
|
+
/**
|
|
7
|
+
* Default model output token budget. Single source of truth (previously split
|
|
8
|
+
* between the CLI and the provider, which silently truncated large generations).
|
|
9
|
+
*/
|
|
10
|
+
maxTokens: 16384,
|
|
11
|
+
/** Per-request timeout (ms): generous for slow local generations, bounded so a
|
|
12
|
+
* hung server can't wedge an unattended run forever. */
|
|
13
|
+
requestTimeoutMs: 600_000,
|
|
14
|
+
/** Linear backoff base per retry attempt (ms): attempt N waits N * this. */
|
|
15
|
+
retryBackoffMs: 400,
|
|
16
|
+
/**
|
|
17
|
+
* Recommended vLLM repetition penalty IF you opt in via
|
|
18
|
+
* TSFORGE_REPETITION_PENALTY. OFF by default: applied globally it also
|
|
19
|
+
* penalizes the repetitive tool-call JSON tokens and pushes the model to
|
|
20
|
+
* narrate instead of emit tool calls (→ no files written). Degenerate
|
|
21
|
+
* repetition LOOPS are handled by the StreamGuard (inference/stream-guard.ts)
|
|
22
|
+
* instead, which only watches prose and can't affect tool-calling.
|
|
23
|
+
*/
|
|
24
|
+
repetitionPenalty: 1.1,
|
|
25
|
+
} as const;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Default endpoint + model for the local provider — the single source of truth.
|
|
29
|
+
* Override per-run with TSFORGE_BASE_URL / TSFORGE_MODEL.
|
|
30
|
+
*/
|
|
31
|
+
export const PROVIDER_DEFAULTS = {
|
|
32
|
+
baseUrl: "http://localhost:8000/v1",
|
|
33
|
+
model: "qwen3.6-27b",
|
|
34
|
+
} as const;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
export type Role = "system" | "user" | "assistant" | "tool";
|
|
2
|
+
|
|
3
|
+
export interface IChatMessage {
|
|
4
|
+
role: Role;
|
|
5
|
+
content: string;
|
|
6
|
+
/** Assistant only: the tool calls it emitted (kept in history so the model
|
|
7
|
+
* sees what it asked for and the results that came back). */
|
|
8
|
+
toolCalls?: IToolCall[];
|
|
9
|
+
/** Tool messages only: the id of the call this message is the result of. */
|
|
10
|
+
toolCallId?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/** A parsed tool call from the model (name + decoded JSON arguments). */
|
|
14
|
+
export interface IToolCall {
|
|
15
|
+
/** Correlation id so a tool-result message can reference it. */
|
|
16
|
+
id?: string;
|
|
17
|
+
name: string;
|
|
18
|
+
arguments: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Real token accounting from the server's `usage` block — the basis for the
|
|
22
|
+
* status line's context gauge and (soon) auto-compaction triggering. */
|
|
23
|
+
export interface ITokenUsage {
|
|
24
|
+
promptTokens: number;
|
|
25
|
+
completionTokens: number;
|
|
26
|
+
totalTokens: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface IModelResponse {
|
|
30
|
+
content: string;
|
|
31
|
+
toolCalls: IToolCall[];
|
|
32
|
+
/** Server-reported token usage for this call, when available. `promptTokens`
|
|
33
|
+
* is the full context the model just saw — what auto-compaction will watch. */
|
|
34
|
+
usage?: ITokenUsage;
|
|
35
|
+
/** How many tool calls were SALVAGED from malformed content (server parser
|
|
36
|
+
* left them as text). >0 signals the model emitted unparseable tool syntax. */
|
|
37
|
+
salvaged?: number;
|
|
38
|
+
/** Set when the stream was aborted because the model fell into a degenerate
|
|
39
|
+
* repetition loop (same line/template until max_tokens). The loop driver
|
|
40
|
+
* stops the turn instead of nudging into another loop. */
|
|
41
|
+
degenerated?: boolean;
|
|
42
|
+
/** Set when TTSR aborted the stream due to a rule match. Contains the rule name
|
|
43
|
+
* and guidance to append to the corrective retry message. */
|
|
44
|
+
ttsrFired?: { ruleName: string; guidance: string };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface ICompleteOptions {
|
|
48
|
+
temperature?: number;
|
|
49
|
+
/** OpenAI-style tool schemas to advertise (opaque JSON). */
|
|
50
|
+
tools?: unknown[];
|
|
51
|
+
/**
|
|
52
|
+
* How hard to push the model to call a tool. `required` forces a tool call —
|
|
53
|
+
* which suppresses chat-style "here is my answer" prose the harness discards
|
|
54
|
+
* anyway. Defaults to `auto`. Ignored when no tools are advertised.
|
|
55
|
+
*/
|
|
56
|
+
toolChoice?: "auto" | "required" | "none";
|
|
57
|
+
/** Per-request thinking toggle (Qwen `chat_template_kwargs.enable_thinking`).
|
|
58
|
+
* Omitted = server default. Off for mechanical work, on for hard reasoning. */
|
|
59
|
+
enableThinking?: boolean;
|
|
60
|
+
/** Cap reasoning tokens before the model must answer (vLLM
|
|
61
|
+
* `thinking_token_budget`). Omitted = unbounded. The lever for turn *time*. */
|
|
62
|
+
thinkingTokenBudget?: number;
|
|
63
|
+
/** When set, the request streams and each token is delivered here as it
|
|
64
|
+
* arrives, tagged by channel: `reasoning` (the model's thinking) vs `content`
|
|
65
|
+
* (its actual answer). Lets a UI dim the thinking and format the answer. */
|
|
66
|
+
onToken?: (text: string, channel: TokenChannel) => void;
|
|
67
|
+
/** Caller cancellation — aborting it stops the request (and any stream)
|
|
68
|
+
* mid-flight. Combined with the per-request timeout. */
|
|
69
|
+
signal?: AbortSignal;
|
|
70
|
+
/** TTSR watcher for stream-interrupting rules (wired by the loop, not the provider). */
|
|
71
|
+
ttsrManager?: ITtsrWatcher;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Structural view of the loop's TtsrManager — keeps the inference layer free of
|
|
75
|
+
* a hard dependency on loop internals while staying fully typed. */
|
|
76
|
+
export interface ITtsrWatcher {
|
|
77
|
+
checkDelta(
|
|
78
|
+
text: string,
|
|
79
|
+
context: { source: "content" | "tool-args"; currentFile?: string }
|
|
80
|
+
): { readonly name: string; readonly guidance: string } | null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Which stream a token belongs to: the model's thinking (`reasoning`), its answer
|
|
84
|
+
* (`content`), or the tool calls it is emitting (`tool` — the file it's writing,
|
|
85
|
+
* streamed so a long tool-call generation isn't silent dead air). */
|
|
86
|
+
export type TokenChannel = "reasoning" | "content" | "tool";
|
|
87
|
+
|
|
88
|
+
/** The model seam. Implementations talk to a local server (vLLM/Ollama/...). */
|
|
89
|
+
export interface IProvider {
|
|
90
|
+
complete(
|
|
91
|
+
messages: IChatMessage[],
|
|
92
|
+
opts?: ICompleteOptions
|
|
93
|
+
): Promise<IModelResponse>;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface IOpenAICompatibleConfig {
|
|
97
|
+
/** Root of the OpenAI-compatible API, e.g. http://localhost:11434/v1 */
|
|
98
|
+
baseUrl: string;
|
|
99
|
+
/** Model id, e.g. qwen3.6-35b-a3b */
|
|
100
|
+
model: string;
|
|
101
|
+
apiKey?: string;
|
|
102
|
+
/**
|
|
103
|
+
* Abort a single request after this many ms (default LIMITS.requestTimeoutMs).
|
|
104
|
+
* Generous because local generations are slow, but bounded so a hung server
|
|
105
|
+
* can't wedge an unattended run forever.
|
|
106
|
+
*/
|
|
107
|
+
timeoutMs?: number;
|
|
108
|
+
/**
|
|
109
|
+
* Hard cap on tokens per response (default LIMITS.maxTokens). Bounds a
|
|
110
|
+
* degenerate repetition loop so one runaway generation can't spew until the
|
|
111
|
+
* context limit. Generous enough for whole-file tool-call output.
|
|
112
|
+
*/
|
|
113
|
+
maxTokens?: number;
|
|
114
|
+
/**
|
|
115
|
+
* vLLM repetition penalty (>1 discourages repeating tokens). The cure for the
|
|
116
|
+
* degenerate loops this local model falls into at temp 0 — where it repeats
|
|
117
|
+
* the same line/JSON until max_tokens. ~1.1 breaks loops without hurting
|
|
118
|
+
* correctness. Omitted (1.0 = off) by default; set it on code-gen providers.
|
|
119
|
+
*/
|
|
120
|
+
repetitionPenalty?: number;
|
|
121
|
+
/** Injectable for tests; defaults to global fetch. */
|
|
122
|
+
fetch?: typeof fetch;
|
|
123
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IChatMessage,
|
|
3
|
+
ICompleteOptions,
|
|
4
|
+
IModelResponse,
|
|
5
|
+
IProvider,
|
|
6
|
+
IOpenAICompatibleConfig,
|
|
7
|
+
} from "./inference.types";
|
|
8
|
+
import { PROVIDER_LIMITS } from "./inference.constants";
|
|
9
|
+
import { fetchWithRetry } from "./transport";
|
|
10
|
+
import { toWire, parseResponse } from "./wire";
|
|
11
|
+
import { streamResponse } from "./stream";
|
|
12
|
+
|
|
13
|
+
export { salvageToolCalls } from "./wire";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Talks to any OpenAI-compatible `/chat/completions` endpoint — which Ollama,
|
|
17
|
+
* vLLM, and llama.cpp all expose for a local Qwen3.6. Supports streaming: pass
|
|
18
|
+
* `onToken` to receive reasoning + content tokens as they arrive. The wire
|
|
19
|
+
* mapping lives in ./wire, the SSE assembly in ./stream, and connection retry in
|
|
20
|
+
* ./transport — this class just orchestrates one request.
|
|
21
|
+
*/
|
|
22
|
+
export class OpenAICompatibleProvider implements IProvider {
|
|
23
|
+
constructor(private cfg: IOpenAICompatibleConfig) {}
|
|
24
|
+
|
|
25
|
+
/** Hot-swap the endpoint/model/key (used by `/model` to switch live): the
|
|
26
|
+
* running session keeps this provider reference and picks up the new config on
|
|
27
|
+
* its next request — no restart. */
|
|
28
|
+
reconfigure(cfg: IOpenAICompatibleConfig): void {
|
|
29
|
+
this.cfg = cfg;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** The current config — read by the CLI for the model/endpoint status line. */
|
|
33
|
+
get config(): IOpenAICompatibleConfig {
|
|
34
|
+
return this.cfg;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async complete(
|
|
38
|
+
messages: IChatMessage[],
|
|
39
|
+
opts: ICompleteOptions = {}
|
|
40
|
+
): Promise<IModelResponse> {
|
|
41
|
+
const doFetch = this.cfg.fetch ?? fetch;
|
|
42
|
+
const streaming = opts.onToken !== undefined;
|
|
43
|
+
const headers: Record<string, string> = {
|
|
44
|
+
"content-type": "application/json",
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
if (this.cfg.apiKey !== undefined) {
|
|
48
|
+
headers.authorization = `Bearer ${this.cfg.apiKey}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const body = JSON.stringify({
|
|
52
|
+
model: this.cfg.model,
|
|
53
|
+
messages: messages.map(toWire),
|
|
54
|
+
max_tokens: this.cfg.maxTokens ?? PROVIDER_LIMITS.maxTokens,
|
|
55
|
+
temperature: opts.temperature,
|
|
56
|
+
...(this.cfg.repetitionPenalty === undefined
|
|
57
|
+
? {}
|
|
58
|
+
: { repetition_penalty: this.cfg.repetitionPenalty }),
|
|
59
|
+
...(opts.tools === undefined
|
|
60
|
+
? {}
|
|
61
|
+
: { tools: opts.tools, tool_choice: opts.toolChoice ?? "auto" }),
|
|
62
|
+
...(opts.enableThinking === undefined
|
|
63
|
+
? {}
|
|
64
|
+
: { chat_template_kwargs: { enable_thinking: opts.enableThinking } }),
|
|
65
|
+
...(opts.thinkingTokenBudget === undefined
|
|
66
|
+
? {}
|
|
67
|
+
: { thinking_token_budget: opts.thinkingTokenBudget }),
|
|
68
|
+
// include_usage → the stream emits a final chunk carrying token `usage`
|
|
69
|
+
// (otherwise a streamed response reports none). Non-stream replies carry it
|
|
70
|
+
// by default.
|
|
71
|
+
...(streaming
|
|
72
|
+
? { stream: true, stream_options: { include_usage: true } }
|
|
73
|
+
: {}),
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// Retry transient CONNECTION blips (socket close / unable-to-connect) — the
|
|
77
|
+
// connect happens before any stream starts, so retrying is safe for both
|
|
78
|
+
// streaming and non-streaming. Essential for a long-running CLI; also stops
|
|
79
|
+
// a network hiccup from wrecking an eval run.
|
|
80
|
+
const res = await fetchWithRetry(
|
|
81
|
+
doFetch,
|
|
82
|
+
`${this.cfg.baseUrl}/chat/completions`,
|
|
83
|
+
headers,
|
|
84
|
+
body,
|
|
85
|
+
this.cfg.timeoutMs ?? PROVIDER_LIMITS.requestTimeoutMs,
|
|
86
|
+
opts.signal
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
if (!res.ok) {
|
|
90
|
+
const detail = await responseDetail(res);
|
|
91
|
+
|
|
92
|
+
throw new Error(
|
|
93
|
+
`model request failed: ${res.status}${detail.length > 0 ? ` ${detail}` : ""}`
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (opts.onToken !== undefined) {
|
|
98
|
+
return streamResponse(res, opts.onToken, opts.ttsrManager);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const data: unknown = await res.json();
|
|
102
|
+
|
|
103
|
+
return parseResponse(data);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async function responseDetail(res: Response): Promise<string> {
|
|
108
|
+
try {
|
|
109
|
+
return (await res.text()).trim().slice(0, 1000);
|
|
110
|
+
} catch {
|
|
111
|
+
return "";
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import type { TokenChannel } from "./inference.types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Detects the degenerate repetition loops this local model falls into at
|
|
5
|
+
* temperature 0 — where it spews the same line (or a fixed template like
|
|
6
|
+
* "I will ensure it is X.") until it hits max_tokens, never emitting a tool
|
|
7
|
+
* call. The repetition penalty makes this rare; this guard makes it IMPOSSIBLE
|
|
8
|
+
* to hang the CLI: the stream is aborted the moment a loop is detected, instead
|
|
9
|
+
* of burning a full 16k-token generation (and then a nudge, and another loop).
|
|
10
|
+
*
|
|
11
|
+
* Watches the prose channels only (reasoning + content) — file content is
|
|
12
|
+
* carried in tool-call ARGUMENTS, never here, so code can't false-positive.
|
|
13
|
+
* Thresholds are deliberately high: real narration never repeats one short line
|
|
14
|
+
* 24 times, nor shares a 4-word prefix across 20 of 24 consecutive lines.
|
|
15
|
+
*
|
|
16
|
+
* The sliding-window checks only see repetition whose PERIOD fits inside WINDOW.
|
|
17
|
+
* A model that re-prints a large block (e.g. a ~30-line function + paragraphs of
|
|
18
|
+
* "wait, I think I see the issue…") loops with a period far bigger than 24, so
|
|
19
|
+
* every window slice looks distinct and they miss it entirely. The period-
|
|
20
|
+
* AGNOSTIC counter below catches that: any one long line emitted many times
|
|
21
|
+
* across the whole generation is a loop no matter how big the repeating block.
|
|
22
|
+
*/
|
|
23
|
+
const WINDOW = 24;
|
|
24
|
+
/** Ignore trivial lines (blanks, lone braces, indentation) — only substantial
|
|
25
|
+
* lines count toward a loop. */
|
|
26
|
+
const MIN_LINE_LEN = 6;
|
|
27
|
+
/** ≤ this many distinct lines across the window ⇒ near-exact repetition. */
|
|
28
|
+
const MAX_DISTINCT = 3;
|
|
29
|
+
/** ≥ this many lines sharing a 4-word prefix ⇒ templated repetition. */
|
|
30
|
+
const PREFIX_MATCH = 20;
|
|
31
|
+
const PREFIX_WORDS = 4;
|
|
32
|
+
/** A line this long, repeated verbatim `GLOBAL_REPEAT_LIMIT` times anywhere in
|
|
33
|
+
* the stream, is a loop — long lines don't recur exactly in real prose/code. */
|
|
34
|
+
const LONG_LINE_LEN = 20;
|
|
35
|
+
const GLOBAL_REPEAT_LIMIT = 5;
|
|
36
|
+
|
|
37
|
+
/** Markers that the model has started emitting STRUCTURED tool calls into the
|
|
38
|
+
* content channel — a server tool-call-parser mismatch (e.g. atlas-spark's
|
|
39
|
+
* Qwen3.5-native `<function=…>` XML, which Atlas's parsers don't match) leaves
|
|
40
|
+
* them in `content` instead of `tool_calls`. Once seen, content is no longer
|
|
41
|
+
* prose, so the prose-loop guard must stand down for it (the leaked calls are
|
|
42
|
+
* salvaged + deduped downstream). */
|
|
43
|
+
const TOOL_MARKUP_RE = /<function=|<tool_call>/i;
|
|
44
|
+
|
|
45
|
+
type ProseChannel = "reasoning" | "content";
|
|
46
|
+
|
|
47
|
+
export class StreamGuard {
|
|
48
|
+
private readonly lines: Record<ProseChannel, string[]> = {
|
|
49
|
+
reasoning: [],
|
|
50
|
+
content: [],
|
|
51
|
+
};
|
|
52
|
+
private readonly partial: Record<ProseChannel, string> = {
|
|
53
|
+
reasoning: "",
|
|
54
|
+
content: "",
|
|
55
|
+
};
|
|
56
|
+
/** Per-channel count of every substantial line seen across the WHOLE stream —
|
|
57
|
+
* backs the period-agnostic large-block loop check. */
|
|
58
|
+
private readonly counts: Record<ProseChannel, Map<string, number>> = {
|
|
59
|
+
reasoning: new Map(),
|
|
60
|
+
content: new Map(),
|
|
61
|
+
};
|
|
62
|
+
/** Set once tool-call markup leaks into the content channel — thereafter the
|
|
63
|
+
* prose-loop guard stands down for content (see TOOL_MARKUP_RE). */
|
|
64
|
+
private contentIsToolMarkup = false;
|
|
65
|
+
|
|
66
|
+
/** Feed a streamed token; returns true once the channel has degenerated. Only
|
|
67
|
+
* the prose channels are watched — tool-call output is structured, not a loop
|
|
68
|
+
* we'd abort. */
|
|
69
|
+
observe(text: string, channel: TokenChannel): boolean {
|
|
70
|
+
if (channel === "tool") {
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
this.partial[channel] += text;
|
|
75
|
+
|
|
76
|
+
if (
|
|
77
|
+
channel === "content" &&
|
|
78
|
+
!this.contentIsToolMarkup &&
|
|
79
|
+
TOOL_MARKUP_RE.test(this.partial.content)
|
|
80
|
+
) {
|
|
81
|
+
this.contentIsToolMarkup = true;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const segments = this.partial[channel].split("\n");
|
|
85
|
+
|
|
86
|
+
this.partial[channel] = segments.pop() ?? "";
|
|
87
|
+
|
|
88
|
+
// Content has become leaked tool-call markup, not prose — drain the buffer
|
|
89
|
+
// (so it stays bounded) but don't run the prose-loop checks on it.
|
|
90
|
+
if (channel === "content" && this.contentIsToolMarkup) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
for (const segment of segments) {
|
|
95
|
+
const trimmed = segment.trim();
|
|
96
|
+
|
|
97
|
+
if (trimmed.length < MIN_LINE_LEN) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Period-agnostic: a long line repeated many times anywhere in the stream
|
|
102
|
+
// is a loop even when the repeating BLOCK is larger than WINDOW (which the
|
|
103
|
+
// sliding-window checks below would miss).
|
|
104
|
+
if (trimmed.length >= LONG_LINE_LEN) {
|
|
105
|
+
const counts = this.counts[channel];
|
|
106
|
+
const seen = (counts.get(trimmed) ?? 0) + 1;
|
|
107
|
+
|
|
108
|
+
counts.set(trimmed, seen);
|
|
109
|
+
|
|
110
|
+
if (seen >= GLOBAL_REPEAT_LIMIT) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const window = this.lines[channel];
|
|
116
|
+
|
|
117
|
+
window.push(trimmed);
|
|
118
|
+
|
|
119
|
+
if (window.length > WINDOW) {
|
|
120
|
+
window.shift();
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (window.length === WINDOW && isRepetitive(window)) {
|
|
124
|
+
return true;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isRepetitive(window: string[]): boolean {
|
|
133
|
+
const distinct = new Set(window).size;
|
|
134
|
+
|
|
135
|
+
if (distinct <= MAX_DISTINCT) {
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Block repetition: the model loops a multi-line block (e.g. re-printing the
|
|
140
|
+
// same "cat X / npx tsc / echo …" sequence). The lines vary within the block,
|
|
141
|
+
// so the exact-line check above misses it — but half-or-more of the window
|
|
142
|
+
// being duplicates is a loop no real prose produces.
|
|
143
|
+
if (distinct <= Math.floor(WINDOW / 2)) {
|
|
144
|
+
return true;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const prefixCounts = new Map<string, number>();
|
|
148
|
+
|
|
149
|
+
for (const line of window) {
|
|
150
|
+
const prefix = line.split(/\s+/).slice(0, PREFIX_WORDS).join(" ");
|
|
151
|
+
const next = (prefixCounts.get(prefix) ?? 0) + 1;
|
|
152
|
+
|
|
153
|
+
prefixCounts.set(prefix, next);
|
|
154
|
+
|
|
155
|
+
if (next >= PREFIX_MATCH) {
|
|
156
|
+
return true;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return false;
|
|
161
|
+
}
|