@gajae-code/ai 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2644 -0
- package/README.md +1181 -0
- package/dist/types/api-registry.d.ts +30 -0
- package/dist/types/auth-broker/client.d.ts +66 -0
- package/dist/types/auth-broker/index.d.ts +5 -0
- package/dist/types/auth-broker/refresher.d.ts +25 -0
- package/dist/types/auth-broker/remote-store.d.ts +96 -0
- package/dist/types/auth-broker/server.d.ts +32 -0
- package/dist/types/auth-broker/types.d.ts +105 -0
- package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
- package/dist/types/auth-gateway/http.d.ts +39 -0
- package/dist/types/auth-gateway/index.d.ts +3 -0
- package/dist/types/auth-gateway/server.d.ts +17 -0
- package/dist/types/auth-gateway/types.d.ts +115 -0
- package/dist/types/auth-storage.d.ts +641 -0
- package/dist/types/cli.d.ts +2 -0
- package/dist/types/index.d.ts +49 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +62 -0
- package/dist/types/model-thinking.d.ts +71 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-details.d.ts +24 -0
- package/dist/types/provider-models/bundled-references.d.ts +4 -0
- package/dist/types/provider-models/descriptors.d.ts +48 -0
- package/dist/types/provider-models/google.d.ts +20 -0
- package/dist/types/provider-models/index.d.ts +5 -0
- package/dist/types/provider-models/ollama.d.ts +7 -0
- package/dist/types/provider-models/openai-compat.d.ts +237 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/providers/amazon-bedrock.d.ts +36 -0
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
- package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
- package/dist/types/providers/anthropic.d.ts +188 -0
- package/dist/types/providers/aws-credentials.d.ts +43 -0
- package/dist/types/providers/aws-eventstream.d.ts +38 -0
- package/dist/types/providers/aws-sigv4.d.ts +55 -0
- package/dist/types/providers/azure-openai-responses.d.ts +15 -0
- package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
- package/dist/types/providers/cursor.d.ts +42 -0
- package/dist/types/providers/error-message.d.ts +27 -0
- package/dist/types/providers/github-copilot-headers.d.ts +40 -0
- package/dist/types/providers/gitlab-duo.d.ts +27 -0
- package/dist/types/providers/google-auth.d.ts +24 -0
- package/dist/types/providers/google-gemini-cli.d.ts +72 -0
- package/dist/types/providers/google-gemini-headers.d.ts +18 -0
- package/dist/types/providers/google-shared.d.ts +163 -0
- package/dist/types/providers/google-types.d.ts +138 -0
- package/dist/types/providers/google-vertex.d.ts +7 -0
- package/dist/types/providers/google.d.ts +4 -0
- package/dist/types/providers/grammar.d.ts +1 -0
- package/dist/types/providers/kimi.d.ts +27 -0
- package/dist/types/providers/mock.d.ts +175 -0
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
- package/dist/types/providers/openai-chat-server.d.ts +16 -0
- package/dist/types/providers/openai-codex/constants.d.ts +26 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
- package/dist/types/providers/openai-codex-responses.d.ts +67 -0
- package/dist/types/providers/openai-completions-compat.d.ts +25 -0
- package/dist/types/providers/openai-completions.d.ts +33 -0
- package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
- package/dist/types/providers/openai-responses-server.d.ts +17 -0
- package/dist/types/providers/openai-responses-shared.d.ts +89 -0
- package/dist/types/providers/openai-responses.d.ts +32 -0
- package/dist/types/providers/pi-native-client.d.ts +13 -0
- package/dist/types/providers/pi-native-server.d.ts +68 -0
- package/dist/types/providers/register-builtins.d.ts +31 -0
- package/dist/types/providers/synthetic.d.ts +26 -0
- package/dist/types/providers/transform-messages.d.ts +12 -0
- package/dist/types/providers/vision-guard.d.ts +8 -0
- package/dist/types/rate-limit-utils.d.ts +19 -0
- package/dist/types/stream.d.ts +24 -0
- package/dist/types/types.d.ts +746 -0
- package/dist/types/usage/claude.d.ts +3 -0
- package/dist/types/usage/gemini.d.ts +2 -0
- package/dist/types/usage/github-copilot.d.ts +7 -0
- package/dist/types/usage/google-antigravity.d.ts +2 -0
- package/dist/types/usage/kimi.d.ts +2 -0
- package/dist/types/usage/minimax-code.d.ts +2 -0
- package/dist/types/usage/openai-codex.d.ts +3 -0
- package/dist/types/usage/shared.d.ts +1 -0
- package/dist/types/usage/zai.d.ts +2 -0
- package/dist/types/usage.d.ts +258 -0
- package/dist/types/utils/abort.d.ts +19 -0
- package/dist/types/utils/anthropic-auth.d.ts +31 -0
- package/dist/types/utils/discovery/antigravity.d.ts +61 -0
- package/dist/types/utils/discovery/codex.d.ts +38 -0
- package/dist/types/utils/discovery/cursor.d.ts +23 -0
- package/dist/types/utils/discovery/gemini.d.ts +25 -0
- package/dist/types/utils/discovery/index.d.ts +4 -0
- package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/utils/event-stream.d.ts +28 -0
- package/dist/types/utils/fireworks-model-id.d.ts +10 -0
- package/dist/types/utils/foundry.d.ts +1 -0
- package/dist/types/utils/h2-fetch.d.ts +22 -0
- package/dist/types/utils/http-inspector.d.ts +31 -0
- package/dist/types/utils/idle-iterator.d.ts +67 -0
- package/dist/types/utils/json-parse.d.ts +10 -0
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
- package/dist/types/utils/oauth/anthropic.d.ts +22 -0
- package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
- package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
- package/dist/types/utils/oauth/callback-server.d.ts +57 -0
- package/dist/types/utils/oauth/cerebras.d.ts +1 -0
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/cursor.d.ts +15 -0
- package/dist/types/utils/oauth/deepseek.d.ts +10 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/fireworks.d.ts +1 -0
- package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
- package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
- package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
- package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
- package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
- package/dist/types/utils/oauth/huggingface.d.ts +19 -0
- package/dist/types/utils/oauth/index.d.ts +38 -0
- package/dist/types/utils/oauth/kagi.d.ts +17 -0
- package/dist/types/utils/oauth/kilo.d.ts +5 -0
- package/dist/types/utils/oauth/kimi.d.ts +21 -0
- package/dist/types/utils/oauth/litellm.d.ts +18 -0
- package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
- package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
- package/dist/types/utils/oauth/moonshot.d.ts +1 -0
- package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
- package/dist/types/utils/oauth/nvidia.d.ts +18 -0
- package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
- package/dist/types/utils/oauth/ollama.d.ts +18 -0
- package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
- package/dist/types/utils/oauth/opencode.d.ts +18 -0
- package/dist/types/utils/oauth/parallel.d.ts +17 -0
- package/dist/types/utils/oauth/perplexity.d.ts +9 -0
- package/dist/types/utils/oauth/pkce.d.ts +8 -0
- package/dist/types/utils/oauth/qianfan.d.ts +17 -0
- package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
- package/dist/types/utils/oauth/synthetic.d.ts +1 -0
- package/dist/types/utils/oauth/tavily.d.ts +17 -0
- package/dist/types/utils/oauth/together.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +44 -0
- package/dist/types/utils/oauth/venice.d.ts +18 -0
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/vllm.d.ts +16 -0
- package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
- package/dist/types/utils/oauth/zai.d.ts +18 -0
- package/dist/types/utils/oauth/zenmux.d.ts +1 -0
- package/dist/types/utils/overflow.d.ts +54 -0
- package/dist/types/utils/parse-bind.d.ts +23 -0
- package/dist/types/utils/provider-response.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +3 -0
- package/dist/types/utils/retry.d.ts +26 -0
- package/dist/types/utils/schema/adapt.d.ts +24 -0
- package/dist/types/utils/schema/compatibility.d.ts +30 -0
- package/dist/types/utils/schema/dereference.d.ts +11 -0
- package/dist/types/utils/schema/draft.d.ts +10 -0
- package/dist/types/utils/schema/equality.d.ts +4 -0
- package/dist/types/utils/schema/fields.d.ts +49 -0
- package/dist/types/utils/schema/index.d.ts +13 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
- package/dist/types/utils/schema/meta-validator.d.ts +2 -0
- package/dist/types/utils/schema/normalize.d.ts +93 -0
- package/dist/types/utils/schema/spill.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +25 -0
- package/dist/types/utils/schema/types.d.ts +4 -0
- package/dist/types/utils/schema/wire.d.ts +54 -0
- package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
- package/dist/types/utils/sse-debug.d.ts +10 -0
- package/dist/types/utils/tool-call-healing.d.ts +71 -0
- package/dist/types/utils/tool-choice.d.ts +50 -0
- package/dist/types/utils/validation.d.ts +17 -0
- package/dist/types/utils.d.ts +28 -0
- package/package.json +146 -0
- package/src/api-registry.ts +96 -0
- package/src/auth-broker/client.ts +358 -0
- package/src/auth-broker/index.ts +5 -0
- package/src/auth-broker/refresher.ts +127 -0
- package/src/auth-broker/remote-store.ts +623 -0
- package/src/auth-broker/server.ts +644 -0
- package/src/auth-broker/types.ts +127 -0
- package/src/auth-broker/wire-schemas.ts +200 -0
- package/src/auth-gateway/http.ts +194 -0
- package/src/auth-gateway/index.ts +3 -0
- package/src/auth-gateway/server.ts +717 -0
- package/src/auth-gateway/types.ts +134 -0
- package/src/auth-storage.ts +4104 -0
- package/src/cli.ts +262 -0
- package/src/index.ts +54 -0
- package/src/model-cache.ts +129 -0
- package/src/model-manager.ts +450 -0
- package/src/model-thinking.ts +691 -0
- package/src/models.json +73853 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +56 -0
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/provider-details.ts +90 -0
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +308 -0
- package/src/provider-models/google.ts +91 -0
- package/src/provider-models/index.ts +5 -0
- package/src/provider-models/ollama.ts +153 -0
- package/src/provider-models/openai-compat.ts +2275 -0
- package/src/provider-models/special.ts +67 -0
- package/src/providers/amazon-bedrock.ts +849 -0
- package/src/providers/anthropic-messages-server-schema.ts +229 -0
- package/src/providers/anthropic-messages-server.ts +677 -0
- package/src/providers/anthropic.ts +2696 -0
- package/src/providers/aws-credentials.ts +501 -0
- package/src/providers/aws-eventstream.ts +185 -0
- package/src/providers/aws-sigv4.ts +218 -0
- package/src/providers/azure-openai-responses.ts +337 -0
- package/src/providers/cursor/gen/agent_pb.ts +15274 -0
- package/src/providers/cursor/proto/agent.proto +3526 -0
- package/src/providers/cursor/proto/buf.gen.yaml +6 -0
- package/src/providers/cursor/proto/buf.yaml +17 -0
- package/src/providers/cursor.ts +2561 -0
- package/src/providers/error-message.ts +21 -0
- package/src/providers/github-copilot-headers.ts +140 -0
- package/src/providers/gitlab-duo.ts +372 -0
- package/src/providers/google-auth.ts +252 -0
- package/src/providers/google-gemini-cli.ts +795 -0
- package/src/providers/google-gemini-headers.ts +41 -0
- package/src/providers/google-shared.ts +902 -0
- package/src/providers/google-types.ts +167 -0
- package/src/providers/google-vertex.ts +88 -0
- package/src/providers/google.ts +41 -0
- package/src/providers/grammar.ts +70 -0
- package/src/providers/kimi.ts +52 -0
- package/src/providers/mock.ts +500 -0
- package/src/providers/ollama.ts +544 -0
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-chat-server-schema.ts +243 -0
- package/src/providers/openai-chat-server.ts +628 -0
- package/src/providers/openai-codex/constants.ts +43 -0
- package/src/providers/openai-codex/request-transformer.ts +161 -0
- package/src/providers/openai-codex/response-handler.ts +81 -0
- package/src/providers/openai-codex-responses.ts +2598 -0
- package/src/providers/openai-completions-compat.ts +279 -0
- package/src/providers/openai-completions.ts +1853 -0
- package/src/providers/openai-responses-server-schema.ts +290 -0
- package/src/providers/openai-responses-server.ts +1183 -0
- package/src/providers/openai-responses-shared.ts +800 -0
- package/src/providers/openai-responses.ts +621 -0
- package/src/providers/pi-native-client.ts +228 -0
- package/src/providers/pi-native-server.ts +210 -0
- package/src/providers/register-builtins.ts +412 -0
- package/src/providers/synthetic.ts +50 -0
- package/src/providers/transform-messages.ts +309 -0
- package/src/providers/vision-guard.ts +31 -0
- package/src/rate-limit-utils.ts +84 -0
- package/src/stream.ts +895 -0
- package/src/types.ts +884 -0
- package/src/usage/claude.ts +431 -0
- package/src/usage/gemini.ts +250 -0
- package/src/usage/github-copilot.ts +421 -0
- package/src/usage/google-antigravity.ts +201 -0
- package/src/usage/kimi.ts +271 -0
- package/src/usage/minimax-code.ts +31 -0
- package/src/usage/openai-codex.ts +503 -0
- package/src/usage/shared.ts +10 -0
- package/src/usage/zai.ts +247 -0
- package/src/usage.ts +183 -0
- package/src/utils/abort.ts +51 -0
- package/src/utils/anthropic-auth.ts +87 -0
- package/src/utils/discovery/antigravity.ts +261 -0
- package/src/utils/discovery/codex.ts +371 -0
- package/src/utils/discovery/cursor.ts +306 -0
- package/src/utils/discovery/gemini.ts +248 -0
- package/src/utils/discovery/index.ts +4 -0
- package/src/utils/discovery/openai-compatible.ts +224 -0
- package/src/utils/event-stream.ts +142 -0
- package/src/utils/fireworks-model-id.ts +30 -0
- package/src/utils/foundry.ts +8 -0
- package/src/utils/h2-fetch.ts +60 -0
- package/src/utils/http-inspector.ts +176 -0
- package/src/utils/idle-iterator.ts +250 -0
- package/src/utils/json-parse.ts +148 -0
- package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
- package/src/utils/oauth/anthropic.ts +200 -0
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/api-key-validation.ts +92 -0
- package/src/utils/oauth/callback-server.ts +276 -0
- package/src/utils/oauth/cerebras.ts +16 -0
- package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
- package/src/utils/oauth/cursor.ts +157 -0
- package/src/utils/oauth/deepseek.ts +53 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/github-copilot.ts +362 -0
- package/src/utils/oauth/gitlab-duo.ts +123 -0
- package/src/utils/oauth/google-antigravity.ts +200 -0
- package/src/utils/oauth/google-gemini-cli.ts +256 -0
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/huggingface.ts +62 -0
- package/src/utils/oauth/index.ts +444 -0
- package/src/utils/oauth/kagi.ts +47 -0
- package/src/utils/oauth/kilo.ts +87 -0
- package/src/utils/oauth/kimi.ts +254 -0
- package/src/utils/oauth/litellm.ts +47 -0
- package/src/utils/oauth/lm-studio.ts +38 -0
- package/src/utils/oauth/minimax-code.ts +78 -0
- package/src/utils/oauth/moonshot.ts +16 -0
- package/src/utils/oauth/nanogpt.ts +15 -0
- package/src/utils/oauth/nvidia.ts +70 -0
- package/src/utils/oauth/oauth.html +199 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/ollama.ts +47 -0
- package/src/utils/oauth/openai-codex.ts +299 -0
- package/src/utils/oauth/opencode.ts +49 -0
- package/src/utils/oauth/parallel.ts +46 -0
- package/src/utils/oauth/perplexity.ts +206 -0
- package/src/utils/oauth/pkce.ts +18 -0
- package/src/utils/oauth/qianfan.ts +58 -0
- package/src/utils/oauth/qwen-portal.ts +60 -0
- package/src/utils/oauth/synthetic.ts +16 -0
- package/src/utils/oauth/tavily.ts +46 -0
- package/src/utils/oauth/together.ts +16 -0
- package/src/utils/oauth/types.ts +94 -0
- package/src/utils/oauth/venice.ts +59 -0
- package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
- package/src/utils/oauth/vllm.ts +40 -0
- package/src/utils/oauth/xiaomi.ts +137 -0
- package/src/utils/oauth/zai.ts +60 -0
- package/src/utils/oauth/zenmux.ts +15 -0
- package/src/utils/overflow.ts +137 -0
- package/src/utils/parse-bind.ts +54 -0
- package/src/utils/provider-response.ts +30 -0
- package/src/utils/retry-after.ts +110 -0
- package/src/utils/retry.ts +54 -0
- package/src/utils/schema/CONSTRAINTS.md +164 -0
- package/src/utils/schema/adapt.ts +36 -0
- package/src/utils/schema/compatibility.ts +435 -0
- package/src/utils/schema/dereference.ts +98 -0
- package/src/utils/schema/draft.ts +341 -0
- package/src/utils/schema/equality.ts +97 -0
- package/src/utils/schema/fields.ts +190 -0
- package/src/utils/schema/index.ts +13 -0
- package/src/utils/schema/json-schema-validator.ts +577 -0
- package/src/utils/schema/meta-validator.ts +167 -0
- package/src/utils/schema/normalize.ts +1588 -0
- package/src/utils/schema/spill.ts +43 -0
- package/src/utils/schema/stamps.ts +97 -0
- package/src/utils/schema/types.ts +11 -0
- package/src/utils/schema/wire.ts +213 -0
- package/src/utils/schema/zod-decontaminate.ts +331 -0
- package/src/utils/sse-debug.ts +289 -0
- package/src/utils/tool-call-healing.ts +271 -0
- package/src/utils/tool-choice.ts +99 -0
- package/src/utils/validation.ts +1019 -0
- package/src/utils.ts +166 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import type { ServerSentEvent } from "@gajae-code/utils";
|
|
2
|
+
import type { RawSseEvent } from "../types";
|
|
3
|
+
|
|
4
|
+
type FetchFunction = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
5
|
+
type FetchWithPreconnect = FetchFunction & { preconnect?: typeof fetch.preconnect };
|
|
6
|
+
|
|
7
|
+
type RawSseObserver = (event: RawSseEvent) => void;
|
|
8
|
+
|
|
9
|
+
export function notifyRawSseEvent(observer: RawSseObserver | undefined, event: ServerSentEvent | RawSseEvent): void {
|
|
10
|
+
if (!observer) return;
|
|
11
|
+
try {
|
|
12
|
+
// Pass the event through without cloning `raw`. The only wired observer
|
|
13
|
+
// (`RawSseDebugBuffer.recordEvent`) treats `raw` as owned and never
|
|
14
|
+
// mutates it; new observers must adhere to the same contract.
|
|
15
|
+
// `ServerSentEvent` and `RawSseEvent` are structurally identical
|
|
16
|
+
// (`event: string | null`, `data: string`, `raw: string[]`).
|
|
17
|
+
observer(event as RawSseEvent);
|
|
18
|
+
} catch {
|
|
19
|
+
// Raw stream observers are diagnostic only and must not affect generation.
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function isSseResponse(response: Response): boolean {
|
|
24
|
+
// `response.body` is non-null for any fetch Response with a body, but we
|
|
25
|
+
// still guard because user-supplied `fetch` mocks may return `{ body: null }`
|
|
26
|
+
// for empty responses and we don't want to wrap those.
|
|
27
|
+
if (!response.ok || !response.body) return false;
|
|
28
|
+
const contentType = response.headers.get("content-type");
|
|
29
|
+
// All providers in this repo emit lowercase `text/event-stream` (verified
|
|
30
|
+
// against anthropic, openai-completions, openai-responses, azure-openai-responses,
|
|
31
|
+
// google-shared, google-gemini-cli, OpenAI code provider-responses, pi-native-client,
|
|
32
|
+
// and the auth-gateway server). A canonical `includes` check is sufficient;
|
|
33
|
+
// if a future provider sends mixed case it will fall back to the unwrapped
|
|
34
|
+
// fetch — observably safe, just no debug tee for that response.
|
|
35
|
+
return contentType?.includes("text/event-stream") ?? false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Reused for every UTF-8 line decode. Safe because lines are split on LF
|
|
39
|
+
// (0x0a), which is single-byte ASCII and never appears inside a UTF-8
|
|
40
|
+
// multi-byte sequence — each line is a complete UTF-8 run, so the decoder
|
|
41
|
+
// carries no state across calls.
|
|
42
|
+
const SSE_LINE_DECODER = new TextDecoder("utf-8");
|
|
43
|
+
|
|
44
|
+
// Decode bytes [start, end) of an SSE line.
|
|
45
|
+
//
|
|
46
|
+
// A previous revision added an ASCII fast-path using `String.fromCharCode.apply`
|
|
47
|
+
// over chunked subarrays, on the theory that skipping `TextDecoder` would save
|
|
48
|
+
// the ~9.7% `decode` self-time the profile reported. In practice the swap
|
|
49
|
+
// *regressed* total wall time: `fromCharCode` became a new 7.8% hotspot,
|
|
50
|
+
// `Uint8Array` allocations grew 5.3%, and `subarray` rose from 11.5% to 18.3%
|
|
51
|
+
// — net loss of ~10pp. Bun's `TextDecoder.decode` has a fast C++ ASCII path
|
|
52
|
+
// that beats chunked `fromCharCode.apply` for the typical sub-1KB SSE line,
|
|
53
|
+
// so we keep the decoder. The line is bounded by LF (0x0a, single-byte
|
|
54
|
+
// ASCII), so each [start, end) slice is a complete UTF-8 run and the shared
|
|
55
|
+
// stateless decoder is safe to reuse.
|
|
56
|
+
function decodeSseLine(buf: Uint8Array, start: number, end: number): string {
|
|
57
|
+
if (start === 0 && end === buf.length) return SSE_LINE_DECODER.decode(buf);
|
|
58
|
+
return SSE_LINE_DECODER.decode(buf.subarray(start, end));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Inline SSE event splitter. Walks the byte stream as it flows through a
|
|
63
|
+
* `TransformStream`, dispatching parsed events to the debug observer while
|
|
64
|
+
* the bytes are forwarded unchanged to the response consumer. Replaces the
|
|
65
|
+
* previous `body.tee()` + `readSseEvents` re-parse pipeline so the byte
|
|
66
|
+
* stream is parsed exactly once when a debug observer is attached.
|
|
67
|
+
*
|
|
68
|
+
* Field parsing intentionally mirrors `readSseEvents` in `@gajae-code/utils`
|
|
69
|
+
* (only `event` and `data` are observed; `id`/`retry` ignored; CR stripped
|
|
70
|
+
* before LF dispatch; leading space after `:` trimmed; `data:` lines join
|
|
71
|
+
* with `\n`). Reusing `readSseEvents` directly would require a second stream
|
|
72
|
+
* pipeline, which is exactly what this class avoids.
|
|
73
|
+
*/
|
|
74
|
+
class SseTeeParser {
|
|
75
|
+
#observer: RawSseObserver;
|
|
76
|
+
// Trailing bytes from the previous chunk that did not end with LF.
|
|
77
|
+
#partial: Uint8Array | null = null;
|
|
78
|
+
#event: string | null = null;
|
|
79
|
+
#data: string | null = null;
|
|
80
|
+
#raw: string[] = [];
|
|
81
|
+
|
|
82
|
+
constructor(observer: RawSseObserver) {
|
|
83
|
+
this.#observer = observer;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
push(chunk: Uint8Array): void {
|
|
87
|
+
// Carry-forward path: concat the partial line with the new chunk so the
|
|
88
|
+
// LF scan walks a single contiguous buffer. The common case (partial is
|
|
89
|
+
// null) skips the allocation entirely.
|
|
90
|
+
let buf: Uint8Array;
|
|
91
|
+
if (this.#partial) {
|
|
92
|
+
buf = new Uint8Array(this.#partial.length + chunk.length);
|
|
93
|
+
buf.set(this.#partial, 0);
|
|
94
|
+
buf.set(chunk, this.#partial.length);
|
|
95
|
+
this.#partial = null;
|
|
96
|
+
} else {
|
|
97
|
+
buf = chunk;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const len = buf.length;
|
|
101
|
+
let i = 0;
|
|
102
|
+
while (i < len) {
|
|
103
|
+
const lf = buf.indexOf(0x0a, i);
|
|
104
|
+
if (lf === -1) {
|
|
105
|
+
// Retain the tail as a partial line for the next chunk. Copy
|
|
106
|
+
// because the source `chunk` buffer may be reused upstream.
|
|
107
|
+
this.#partial = buf.subarray(i).slice();
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
let end = lf;
|
|
111
|
+
if (end > i && buf[end - 1] === 0x0d) end--;
|
|
112
|
+
this.#consumeLine(buf, i, end);
|
|
113
|
+
i = lf + 1;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
flush(): void {
|
|
118
|
+
// Treat any trailing partial line (no terminating LF) as a complete line.
|
|
119
|
+
if (this.#partial) {
|
|
120
|
+
const tail = this.#partial;
|
|
121
|
+
this.#partial = null;
|
|
122
|
+
let end = tail.length;
|
|
123
|
+
if (end > 0 && tail[end - 1] === 0x0d) end--;
|
|
124
|
+
if (end > 0) this.#consumeLine(tail, 0, end);
|
|
125
|
+
}
|
|
126
|
+
// Real services don't always close on a blank line — flush any pending event.
|
|
127
|
+
this.#dispatch();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
#consumeLine(buf: Uint8Array, start: number, end: number): void {
|
|
131
|
+
if (end === start) {
|
|
132
|
+
this.#dispatch();
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
// Comment line: keep verbatim in `raw` for diagnostic context, skip parsing.
|
|
136
|
+
// SSE spec § 9.2.6: lines beginning with ':' are heartbeats/comments and
|
|
137
|
+
// MUST NOT contribute to the event dispatch state. Heartbeats are the
|
|
138
|
+
// single most common line type on long-poll provider streams, so the
|
|
139
|
+
// early-return here directly avoids ~half the field-parse work.
|
|
140
|
+
if (buf[start] === 0x3a /* ':' */) {
|
|
141
|
+
this.#raw.push(decodeSseLine(buf, start, end));
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
// Byte-level field parse. We avoid `text.indexOf(':')` + two `String.slice`
|
|
145
|
+
// calls (~6% of CPU pre-optimization) by scanning bytes for the field
|
|
146
|
+
// delimiter and matching the field name byte-for-byte. Field-name bytes
|
|
147
|
+
// are ASCII per SSE spec, so byte offsets equal char offsets in the
|
|
148
|
+
// decoded string and we can `slice` the value directly off `text` without
|
|
149
|
+
// re-decoding.
|
|
150
|
+
//
|
|
151
|
+
// ASCII signatures (verified against SSE spec):
|
|
152
|
+
// "event" = 0x65 0x76 0x65 0x6e 0x74 (5 bytes)
|
|
153
|
+
// "data" = 0x64 0x61 0x74 0x61 (4 bytes)
|
|
154
|
+
let colon = -1;
|
|
155
|
+
for (let k = start; k < end; k++) {
|
|
156
|
+
if (buf[k] === 0x3a) {
|
|
157
|
+
colon = k;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const fieldEnd = colon === -1 ? end : colon;
|
|
162
|
+
let valueStart = colon === -1 ? end : colon + 1;
|
|
163
|
+
// Per SSE spec, a single leading SP after the colon is stripped.
|
|
164
|
+
if (valueStart < end && buf[valueStart] === 0x20 /* ' ' */) valueStart++;
|
|
165
|
+
const fieldLen = fieldEnd - start;
|
|
166
|
+
const isEvent =
|
|
167
|
+
fieldLen === 5 &&
|
|
168
|
+
buf[start] === 0x65 &&
|
|
169
|
+
buf[start + 1] === 0x76 &&
|
|
170
|
+
buf[start + 2] === 0x65 &&
|
|
171
|
+
buf[start + 3] === 0x6e &&
|
|
172
|
+
buf[start + 4] === 0x74;
|
|
173
|
+
const isData =
|
|
174
|
+
!isEvent &&
|
|
175
|
+
fieldLen === 4 &&
|
|
176
|
+
buf[start] === 0x64 &&
|
|
177
|
+
buf[start + 1] === 0x61 &&
|
|
178
|
+
buf[start + 2] === 0x74 &&
|
|
179
|
+
buf[start + 3] === 0x61;
|
|
180
|
+
// Decode the line exactly once. Raw observers (debug buffer) want it
|
|
181
|
+
// regardless of field kind; `id`/`retry`/unknown lines pay only the
|
|
182
|
+
// decode cost, not any extra slicing.
|
|
183
|
+
const text = decodeSseLine(buf, start, end);
|
|
184
|
+
this.#raw.push(text);
|
|
185
|
+
if (isEvent) {
|
|
186
|
+
// `valueStart - start` is a byte offset into the line; since the
|
|
187
|
+
// "event:" prefix (and the optional SP) are pure ASCII, that byte
|
|
188
|
+
// offset equals the char offset in the decoded `text`.
|
|
189
|
+
this.#event = valueStart === end ? "" : text.slice(valueStart - start);
|
|
190
|
+
} else if (isData) {
|
|
191
|
+
const value = valueStart === end ? "" : text.slice(valueStart - start);
|
|
192
|
+
if (this.#data === null) this.#data = value;
|
|
193
|
+
else this.#data = `${this.#data}\n${value}`;
|
|
194
|
+
}
|
|
195
|
+
// `id` and `retry` are intentionally ignored — providers don't use them
|
|
196
|
+
// and reconnects are handled by the underlying transport.
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Hands ownership of the accumulated `raw` array to the observer. The
|
|
200
|
+
// observer (currently only `RawSseDebugBuffer.recordEvent`) MAY retain the
|
|
201
|
+
// array; we install a fresh `#raw = []` for the next event before invoking
|
|
202
|
+
// the observer so there is no aliasing across dispatches. This contract is
|
|
203
|
+
// mirrored in `notifyRawSseEvent` (no defensive clone) — see its comment.
|
|
204
|
+
//
|
|
205
|
+
// TODO(BufferOpt): once the buffer-side audit confirms it never mutates
|
|
206
|
+
// `event.raw`, the defensive `[...event.raw]` clone in older call paths
|
|
207
|
+
// (search for `notifyRawSseEvent`) can be dropped repository-wide.
|
|
208
|
+
#dispatch(): void {
|
|
209
|
+
if (this.#event === null && this.#data === null) return;
|
|
210
|
+
const event: RawSseEvent = {
|
|
211
|
+
event: this.#event,
|
|
212
|
+
data: this.#data ?? "",
|
|
213
|
+
raw: this.#raw,
|
|
214
|
+
};
|
|
215
|
+
this.#event = null;
|
|
216
|
+
this.#data = null;
|
|
217
|
+
this.#raw = [];
|
|
218
|
+
try {
|
|
219
|
+
this.#observer(event);
|
|
220
|
+
} catch {
|
|
221
|
+
// Raw stream observers are diagnostic only and must not affect generation.
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export function wrapFetchForSseDebug(
|
|
227
|
+
fetchImpl: FetchWithPreconnect,
|
|
228
|
+
observer: RawSseObserver | undefined,
|
|
229
|
+
): FetchWithPreconnect {
|
|
230
|
+
if (!observer) return fetchImpl;
|
|
231
|
+
|
|
232
|
+
const wrapped = Object.assign(
|
|
233
|
+
async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
|
|
234
|
+
const response = await fetchImpl(input, init);
|
|
235
|
+
if (!isSseResponse(response)) {
|
|
236
|
+
return response;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const body = response.body;
|
|
240
|
+
if (!body) return response;
|
|
241
|
+
|
|
242
|
+
// Single-pass interception. Previously implemented as
|
|
243
|
+
// `body.pipeThrough(new TransformStream({...}))`, but the WHATWG
|
|
244
|
+
// TransformStream machinery imposes a per-chunk Promise boundary
|
|
245
|
+
// (`#handleNumberResult` showed at 8.8% self-time in CPU profile).
|
|
246
|
+
// A manual ReadableStream pulling directly from `body.getReader()`
|
|
247
|
+
// skips that hop: every `read()` immediately feeds both the parser
|
|
248
|
+
// and the controller in the same microtask.
|
|
249
|
+
const parser = new SseTeeParser(observer);
|
|
250
|
+
const reader = body.getReader();
|
|
251
|
+
const teed = new ReadableStream<Uint8Array>({
|
|
252
|
+
async pull(controller) {
|
|
253
|
+
try {
|
|
254
|
+
const { done, value } = await reader.read();
|
|
255
|
+
if (done) {
|
|
256
|
+
parser.flush();
|
|
257
|
+
controller.close();
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
// Enqueue first so the consumer sees bytes ASAP; parser
|
|
261
|
+
// dispatch is best-effort diagnostic and runs after.
|
|
262
|
+
controller.enqueue(value);
|
|
263
|
+
parser.push(value);
|
|
264
|
+
} catch (err) {
|
|
265
|
+
// Mirror TransformStream semantics: surface upstream
|
|
266
|
+
// errors to the consumer; do not flush a partial event.
|
|
267
|
+
controller.error(err);
|
|
268
|
+
}
|
|
269
|
+
},
|
|
270
|
+
cancel(reason) {
|
|
271
|
+
// Propagate downstream cancellation to the source body so the
|
|
272
|
+
// underlying connection is released. Matches `pipeThrough`'s
|
|
273
|
+
// cancel-propagation behavior; `flush()` is intentionally NOT
|
|
274
|
+
// called (TransformStream skips `flush` on abort too).
|
|
275
|
+
return reader.cancel(reason);
|
|
276
|
+
},
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
return new Response(teed, {
|
|
280
|
+
status: response.status,
|
|
281
|
+
statusText: response.statusText,
|
|
282
|
+
headers: response.headers,
|
|
283
|
+
});
|
|
284
|
+
},
|
|
285
|
+
fetchImpl.preconnect ? { preconnect: fetchImpl.preconnect } : {},
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
return wrapped;
|
|
289
|
+
}
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming-safe filter for the Kimi K2 chat-template "tool-call section"
|
|
3
|
+
* grammar.
|
|
4
|
+
*
|
|
5
|
+
* Some providers hosting Kimi K2 (the native `kimi-code` API, OpenRouter,
|
|
6
|
+
* Fireworks, and others) leak the raw chat-template special tokens into
|
|
7
|
+
* `delta.content` instead of emitting structured `tool_calls`. Visually
|
|
8
|
+
* that looks like:
|
|
9
|
+
*
|
|
10
|
+
* <|tool_calls_section_begin|>
|
|
11
|
+
* <|tool_call_begin|>functions.read:0<|tool_call_argument_begin|>{"path":"foo"}<|tool_call_end|>
|
|
12
|
+
* <|tool_calls_section_end|>
|
|
13
|
+
*
|
|
14
|
+
* Without healing, the user sees the raw markers and the agent loop never
|
|
15
|
+
* sees a tool call. This module reconstructs the embedded calls and strips
|
|
16
|
+
* the markers from visible text. It is stream-aware: any partial token at
|
|
17
|
+
* the end of a chunk is held back until the next chunk arrives.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { parseJsonWithRepair } from "./json-parse";
|
|
21
|
+
|
|
22
|
+
const TOK_SECTION_BEGIN = "<|tool_calls_section_begin|>";
|
|
23
|
+
const TOK_SECTION_END = "<|tool_calls_section_end|>";
|
|
24
|
+
const TOK_CALL_BEGIN = "<|tool_call_begin|>";
|
|
25
|
+
const TOK_CALL_END = "<|tool_call_end|>";
|
|
26
|
+
const TOK_ARG_BEGIN = "<|tool_call_argument_begin|>";
|
|
27
|
+
|
|
28
|
+
const TOKENS = [TOK_SECTION_BEGIN, TOK_SECTION_END, TOK_CALL_BEGIN, TOK_CALL_END, TOK_ARG_BEGIN] as const;
|
|
29
|
+
|
|
30
|
+
/** Maximum buffered partial-token length before we give up holding back. */
|
|
31
|
+
const MAX_PARTIAL_HOLD = 64;
|
|
32
|
+
|
|
33
|
+
export interface HealedToolCall {
|
|
34
|
+
readonly id: string;
|
|
35
|
+
readonly name: string;
|
|
36
|
+
readonly arguments: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* State machine that consumes streamed text, emits visible text with all
|
|
41
|
+
* Kimi tool-call markers stripped, and accumulates the embedded tool calls
|
|
42
|
+
* for the caller to drain after each `feed()`.
|
|
43
|
+
*
|
|
44
|
+
* One instance per stream. Feed only the channel that may carry leaked
|
|
45
|
+
* markers (typically `delta.content`); mixing reasoning + content into the
|
|
46
|
+
* same accumulator corrupts the holdback buffer if both channels race in
|
|
47
|
+
* the same chunk.
|
|
48
|
+
*/
|
|
49
|
+
export class ToolCallHealer {
|
|
50
|
+
#buffer = "";
|
|
51
|
+
#offset = 0;
|
|
52
|
+
#inSection = false;
|
|
53
|
+
#inCall = false;
|
|
54
|
+
#inArgs = false;
|
|
55
|
+
#pendingId = "";
|
|
56
|
+
#pendingArgs = "";
|
|
57
|
+
#sectionTerminated = false;
|
|
58
|
+
readonly #completed: HealedToolCall[] = [];
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Feed a chunk of streamed text. Returns the portion safe to emit
|
|
62
|
+
* downstream (with all tokens stripped). Any partial token suffix is
|
|
63
|
+
* held back until the next chunk arrives or {@link flushPending} is
|
|
64
|
+
* called.
|
|
65
|
+
*/
|
|
66
|
+
feed(text: string): string {
|
|
67
|
+
if (text.length === 0) return "";
|
|
68
|
+
this.#compact();
|
|
69
|
+
this.#buffer += text;
|
|
70
|
+
return this.#consume();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Like {@link feed}, but discards any tool calls that the chunk completes.
|
|
75
|
+
* Used when the upstream provider also emits structured `delta.tool_calls`
|
|
76
|
+
* for the same chunk: the healer still strips leaked marker text from the
|
|
77
|
+
* visible output, but the structured payload remains the single source of
|
|
78
|
+
* truth for the call list.
|
|
79
|
+
*/
|
|
80
|
+
consumeWithoutCalls(text: string): string {
|
|
81
|
+
const clean = this.feed(text);
|
|
82
|
+
if (this.#completed.length > 0) this.#completed.length = 0;
|
|
83
|
+
return clean;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Drain accumulated tool calls. The internal list is cleared so a
|
|
88
|
+
* subsequent section in the same stream (rare) yields fresh calls.
|
|
89
|
+
*/
|
|
90
|
+
drainCompleted(): HealedToolCall[] {
|
|
91
|
+
if (this.#completed.length === 0) return [];
|
|
92
|
+
return this.#completed.splice(0, this.#completed.length);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Flush any held-back fragment when the stream ends. If we were mid-call
|
|
97
|
+
* the partial is dropped (emitting raw token bytes would surface markers
|
|
98
|
+
* to the user); otherwise the fragment is returned verbatim so a literal
|
|
99
|
+
* `<|` in prose is not silently lost.
|
|
100
|
+
*/
|
|
101
|
+
flushPending(): string {
|
|
102
|
+
const tail = this.#remaining();
|
|
103
|
+
this.#buffer = "";
|
|
104
|
+
this.#offset = 0;
|
|
105
|
+
if (this.#inCall || this.#inSection) return "";
|
|
106
|
+
return tail;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** True once any tool-call section in this stream has fully closed. */
|
|
110
|
+
get sectionClosed(): boolean {
|
|
111
|
+
return this.#sectionTerminated;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
#remaining(): string {
|
|
115
|
+
return this.#offset === 0 ? this.#buffer : this.#buffer.slice(this.#offset);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
#compact(): void {
|
|
119
|
+
if (this.#offset === 0) return;
|
|
120
|
+
this.#buffer = this.#buffer.slice(this.#offset);
|
|
121
|
+
this.#offset = 0;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
#consume(): string {
|
|
125
|
+
let clean = "";
|
|
126
|
+
|
|
127
|
+
while (this.#offset < this.#buffer.length) {
|
|
128
|
+
if (this.#startsWithPartialToken()) break;
|
|
129
|
+
|
|
130
|
+
if (this.#matches(TOK_SECTION_BEGIN)) {
|
|
131
|
+
this.#inSection = true;
|
|
132
|
+
this.#offset += TOK_SECTION_BEGIN.length;
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
if (this.#matches(TOK_SECTION_END)) {
|
|
136
|
+
this.#inSection = false;
|
|
137
|
+
this.#sectionTerminated = true;
|
|
138
|
+
this.#offset += TOK_SECTION_END.length;
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
if (this.#matches(TOK_CALL_BEGIN)) {
|
|
142
|
+
if (!this.#inSection) {
|
|
143
|
+
// Literal mention outside a section — pass through as text so
|
|
144
|
+
// docs/examples explaining tool tokens are not silently eaten.
|
|
145
|
+
clean += TOK_CALL_BEGIN;
|
|
146
|
+
this.#offset += TOK_CALL_BEGIN.length;
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
this.#inCall = true;
|
|
150
|
+
this.#inArgs = false;
|
|
151
|
+
this.#pendingId = "";
|
|
152
|
+
this.#pendingArgs = "";
|
|
153
|
+
this.#offset += TOK_CALL_BEGIN.length;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (this.#matches(TOK_ARG_BEGIN)) {
|
|
157
|
+
if (!this.#inSection) {
|
|
158
|
+
clean += TOK_ARG_BEGIN;
|
|
159
|
+
this.#offset += TOK_ARG_BEGIN.length;
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
this.#inArgs = true;
|
|
163
|
+
this.#offset += TOK_ARG_BEGIN.length;
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
if (this.#matches(TOK_CALL_END)) {
|
|
167
|
+
if (!this.#inSection || !this.#inCall) {
|
|
168
|
+
// Token appeared outside an active call (e.g. an assistant
|
|
169
|
+
// turn explaining the Kimi format). Emit it verbatim instead
|
|
170
|
+
// of synthesizing a bogus empty tool call.
|
|
171
|
+
clean += TOK_CALL_END;
|
|
172
|
+
this.#offset += TOK_CALL_END.length;
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
this.#finalizeCall();
|
|
176
|
+
this.#offset += TOK_CALL_END.length;
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const ch = this.#buffer[this.#offset]!;
|
|
181
|
+
this.#offset += 1;
|
|
182
|
+
|
|
183
|
+
if (this.#inCall) {
|
|
184
|
+
if (this.#inArgs) {
|
|
185
|
+
this.#pendingArgs += ch;
|
|
186
|
+
} else {
|
|
187
|
+
this.#pendingId += ch;
|
|
188
|
+
}
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Inside the section but outside an individual call: swallow
|
|
193
|
+
// inter-call whitespace/newlines. Outside the section: pass through.
|
|
194
|
+
if (!this.#inSection) clean += ch;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return clean;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
#matches(token: string): boolean {
|
|
201
|
+
return this.#buffer.startsWith(token, this.#offset);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* True if the remaining buffer is a strict prefix of any known token —
|
|
206
|
+
* we need more bytes before deciding whether it's a token or prose.
|
|
207
|
+
* Capped so a stray `<|` in normal text can't grow the holdback
|
|
208
|
+
* unboundedly.
|
|
209
|
+
*/
|
|
210
|
+
#startsWithPartialToken(): boolean {
|
|
211
|
+
const remainingLength = this.#buffer.length - this.#offset;
|
|
212
|
+
if (remainingLength === 0 || remainingLength > MAX_PARTIAL_HOLD) return false;
|
|
213
|
+
for (const token of TOKENS) {
|
|
214
|
+
if (token.length <= remainingLength) continue;
|
|
215
|
+
if (this.#bufferIsPrefixOf(token, remainingLength)) return true;
|
|
216
|
+
}
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
#bufferIsPrefixOf(token: string, remainingLength: number): boolean {
|
|
221
|
+
for (let i = 0; i < remainingLength; i++) {
|
|
222
|
+
if (this.#buffer[this.#offset + i] !== token[i]) return false;
|
|
223
|
+
}
|
|
224
|
+
return true;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
#finalizeCall(): void {
|
|
228
|
+
const rawId = this.#pendingId.trim();
|
|
229
|
+
const rawArgs = this.#pendingArgs.trim();
|
|
230
|
+
const name = normalizeFunctionName(rawId);
|
|
231
|
+
const id = generateHealedToolCallId();
|
|
232
|
+
|
|
233
|
+
let argsJson = rawArgs;
|
|
234
|
+
if (rawArgs.length > 0) {
|
|
235
|
+
try {
|
|
236
|
+
// Round-trip to normalize whitespace and repair near-valid JSON.
|
|
237
|
+
argsJson = JSON.stringify(parseJsonWithRepair<unknown>(rawArgs));
|
|
238
|
+
} catch {
|
|
239
|
+
// Leave raw; downstream parseStreamingJson absorbs the failure.
|
|
240
|
+
}
|
|
241
|
+
} else {
|
|
242
|
+
argsJson = "{}";
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
this.#completed.push({ id, name, arguments: argsJson });
|
|
246
|
+
this.#inCall = false;
|
|
247
|
+
this.#inArgs = false;
|
|
248
|
+
this.#pendingId = "";
|
|
249
|
+
this.#pendingArgs = "";
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Cheap test for whether a given model is known to leak Kimi-K2 chat-template
|
|
255
|
+
* tool-call tokens into visible text. Used to gate the per-stream healer so
|
|
256
|
+
* non-Kimi providers do not pay for the scan.
|
|
257
|
+
*/
|
|
258
|
+
export function modelMayLeakKimiToolCalls(provider: string, modelId: string): boolean {
|
|
259
|
+
if (provider === "kimi-code" || provider === "moonshot") return true;
|
|
260
|
+
return /kimi[-/_.]?k2/i.test(modelId);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function normalizeFunctionName(rawId: string): string {
|
|
264
|
+
const stripped = rawId.startsWith("functions.") ? rawId.slice("functions.".length) : rawId;
|
|
265
|
+
const colon = stripped.indexOf(":");
|
|
266
|
+
return colon >= 0 ? stripped.slice(0, colon) : stripped;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function generateHealedToolCallId(): string {
|
|
270
|
+
return `call_${crypto.randomUUID().replace(/-/g, "").slice(0, 24)}`;
|
|
271
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for mapping unified ToolChoice to provider-specific formats.
|
|
3
|
+
*/
|
|
4
|
+
import type { ToolChoice } from "../types";
|
|
5
|
+
|
|
6
|
+
/** OpenAI Completions API tool choice format */
|
|
7
|
+
export type OpenAICompletionsToolChoice =
|
|
8
|
+
| "auto"
|
|
9
|
+
| "none"
|
|
10
|
+
| "required"
|
|
11
|
+
| { type: "function"; function: { name: string } }
|
|
12
|
+
| undefined;
|
|
13
|
+
|
|
14
|
+
/** OpenAI Responses API tool choice format (flat structure) */
|
|
15
|
+
export type OpenAIResponsesToolChoice =
|
|
16
|
+
| "auto"
|
|
17
|
+
| "none"
|
|
18
|
+
| "required"
|
|
19
|
+
| { type: "function"; name: string }
|
|
20
|
+
| { type: "custom"; name: string }
|
|
21
|
+
| undefined;
|
|
22
|
+
|
|
23
|
+
/** Anthropic-compatible tool choice format */
|
|
24
|
+
export type AnthropicToolChoice = "auto" | "none" | "any" | { type: "tool"; name: string } | undefined;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Extract function name from unified ToolChoice.
|
|
28
|
+
*/
|
|
29
|
+
function extractFunctionName(choice: ToolChoice): string | undefined {
|
|
30
|
+
if (typeof choice === "string") return undefined;
|
|
31
|
+
if (choice.type === "tool" && "name" in choice) return choice.name;
|
|
32
|
+
if (choice.type === "function") {
|
|
33
|
+
if ("function" in choice && choice.function && typeof choice.function === "object") {
|
|
34
|
+
return (choice.function as { name?: string }).name;
|
|
35
|
+
}
|
|
36
|
+
if ("name" in choice) return choice.name;
|
|
37
|
+
}
|
|
38
|
+
return undefined;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Map unified ToolChoice to OpenAI Completions API format.
|
|
43
|
+
* - "any" → "required"
|
|
44
|
+
* - { type: "tool", name } → { type: "function", function: { name } }
|
|
45
|
+
*/
|
|
46
|
+
export function mapToOpenAICompletionsToolChoice(choice?: ToolChoice): OpenAICompletionsToolChoice {
|
|
47
|
+
if (!choice) return undefined;
|
|
48
|
+
if (typeof choice === "string") {
|
|
49
|
+
if (choice === "any") return "required";
|
|
50
|
+
if (choice === "auto" || choice === "none" || choice === "required") return choice;
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
const name = extractFunctionName(choice);
|
|
54
|
+
return name ? { type: "function", function: { name } } : undefined;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Returns true when an OpenAI-completions `tool_choice` value forces a tool
|
|
59
|
+
* call (`"required"` or a function-name pin), as opposed to leaving it open
|
|
60
|
+
* (`"auto"`, `"none"`, or unset). Accepts `unknown` because the param shape
|
|
61
|
+
* pulled from the OpenAI SDK (`ChatCompletionToolChoiceOption`) widens with
|
|
62
|
+
* each release; this check only needs the open/forced bit.
|
|
63
|
+
*/
|
|
64
|
+
export function isForcedToolChoice(choice: unknown): boolean {
|
|
65
|
+
if (choice === undefined || choice === "auto" || choice === "none") return false;
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Map unified ToolChoice to OpenAI Responses API format.
|
|
71
|
+
* - "any" → "required"
|
|
72
|
+
* - { type: "tool", name } → { type: "function", name } (flat structure)
|
|
73
|
+
*/
|
|
74
|
+
export function mapToOpenAIResponsesToolChoice(choice?: ToolChoice): OpenAIResponsesToolChoice {
|
|
75
|
+
if (!choice) return undefined;
|
|
76
|
+
if (typeof choice === "string") {
|
|
77
|
+
if (choice === "any") return "required";
|
|
78
|
+
if (choice === "auto" || choice === "none" || choice === "required") return choice;
|
|
79
|
+
return undefined;
|
|
80
|
+
}
|
|
81
|
+
const name = extractFunctionName(choice);
|
|
82
|
+
return name ? { type: "function", name } : undefined;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Map unified ToolChoice to Anthropic-compatible format.
|
|
87
|
+
* - "required" → "any"
|
|
88
|
+
* - { type: "function", ... } → { type: "tool", name }
|
|
89
|
+
*/
|
|
90
|
+
export function mapToAnthropicToolChoice(choice?: ToolChoice): AnthropicToolChoice {
|
|
91
|
+
if (!choice) return undefined;
|
|
92
|
+
if (typeof choice === "string") {
|
|
93
|
+
if (choice === "required") return "any";
|
|
94
|
+
if (choice === "auto" || choice === "none" || choice === "any") return choice;
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
const name = extractFunctionName(choice);
|
|
98
|
+
return name ? { type: "tool", name } : undefined;
|
|
99
|
+
}
|