veryfront 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/esm/cli/app/data/slug-words.d.ts.map +1 -1
  2. package/esm/cli/app/data/slug-words.js +225 -90
  3. package/esm/cli/app/operations/project-creation.js +4 -3
  4. package/esm/cli/app/shell.js +1 -1
  5. package/esm/cli/app/utils.d.ts +5 -4
  6. package/esm/cli/app/utils.d.ts.map +1 -1
  7. package/esm/cli/app/utils.js +0 -23
  8. package/esm/cli/app/views/dashboard.d.ts +1 -1
  9. package/esm/cli/app/views/dashboard.d.ts.map +1 -1
  10. package/esm/cli/app/views/dashboard.js +22 -4
  11. package/esm/cli/auth/callback-server.d.ts.map +1 -1
  12. package/esm/cli/auth/callback-server.js +3 -2
  13. package/esm/cli/commands/dev/handler.d.ts.map +1 -1
  14. package/esm/cli/commands/dev/handler.js +2 -0
  15. package/esm/cli/commands/init/init-command.d.ts.map +1 -1
  16. package/esm/cli/commands/init/init-command.js +20 -3
  17. package/esm/cli/commands/init/interactive-wizard.d.ts +3 -2
  18. package/esm/cli/commands/init/interactive-wizard.d.ts.map +1 -1
  19. package/esm/cli/commands/init/interactive-wizard.js +55 -27
  20. package/esm/cli/mcp/remote-file-tools.d.ts +0 -6
  21. package/esm/cli/mcp/remote-file-tools.d.ts.map +1 -1
  22. package/esm/cli/mcp/remote-file-tools.js +37 -15
  23. package/esm/cli/shared/reserve-slug.d.ts.map +1 -1
  24. package/esm/cli/shared/reserve-slug.js +8 -3
  25. package/esm/cli/utils/env-prompt.d.ts.map +1 -1
  26. package/esm/cli/utils/env-prompt.js +3 -0
  27. package/esm/deno.d.ts +5 -1
  28. package/esm/deno.js +11 -4
  29. package/esm/src/agent/chat-handler.d.ts +4 -3
  30. package/esm/src/agent/chat-handler.d.ts.map +1 -1
  31. package/esm/src/agent/chat-handler.js +55 -4
  32. package/esm/src/agent/react/index.d.ts +1 -1
  33. package/esm/src/agent/react/index.d.ts.map +1 -1
  34. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts +18 -0
  35. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts.map +1 -0
  36. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.js +54 -0
  37. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts +43 -0
  38. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts.map +1 -0
  39. package/esm/src/agent/react/use-chat/browser-inference/types.js +4 -0
  40. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts +23 -0
  41. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts.map +1 -0
  42. package/esm/src/agent/react/use-chat/browser-inference/worker-client.js +67 -0
  43. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts +8 -0
  44. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts.map +1 -0
  45. package/esm/src/agent/react/use-chat/browser-inference/worker-script.js +97 -0
  46. package/esm/src/agent/react/use-chat/index.d.ts +1 -1
  47. package/esm/src/agent/react/use-chat/index.d.ts.map +1 -1
  48. package/esm/src/agent/react/use-chat/types.d.ts +12 -0
  49. package/esm/src/agent/react/use-chat/types.d.ts.map +1 -1
  50. package/esm/src/agent/react/use-chat/use-chat.d.ts.map +1 -1
  51. package/esm/src/agent/react/use-chat/use-chat.js +120 -6
  52. package/esm/src/agent/runtime/index.d.ts.map +1 -1
  53. package/esm/src/agent/runtime/index.js +59 -7
  54. package/esm/src/build/production-build/templates.d.ts +2 -2
  55. package/esm/src/build/production-build/templates.d.ts.map +1 -1
  56. package/esm/src/build/production-build/templates.js +2 -68
  57. package/esm/src/chat/index.d.ts +1 -1
  58. package/esm/src/chat/index.d.ts.map +1 -1
  59. package/esm/src/errors/veryfront-error.d.ts +3 -0
  60. package/esm/src/errors/veryfront-error.d.ts.map +1 -1
  61. package/esm/src/platform/adapters/runtime/deno/adapter.d.ts.map +1 -1
  62. package/esm/src/platform/adapters/runtime/deno/adapter.js +24 -3
  63. package/esm/src/platform/compat/http/deno-server.d.ts.map +1 -1
  64. package/esm/src/platform/compat/http/deno-server.js +23 -2
  65. package/esm/src/provider/index.d.ts +1 -1
  66. package/esm/src/provider/index.d.ts.map +1 -1
  67. package/esm/src/provider/index.js +1 -1
  68. package/esm/src/provider/local/ai-sdk-adapter.d.ts +19 -0
  69. package/esm/src/provider/local/ai-sdk-adapter.d.ts.map +1 -0
  70. package/esm/src/provider/local/ai-sdk-adapter.js +164 -0
  71. package/esm/src/provider/local/env.d.ts +10 -0
  72. package/esm/src/provider/local/env.d.ts.map +1 -0
  73. package/esm/src/provider/local/env.js +23 -0
  74. package/esm/src/provider/local/local-engine.d.ts +61 -0
  75. package/esm/src/provider/local/local-engine.d.ts.map +1 -0
  76. package/esm/src/provider/local/local-engine.js +211 -0
  77. package/esm/src/provider/local/model-catalog.d.ts +30 -0
  78. package/esm/src/provider/local/model-catalog.d.ts.map +1 -0
  79. package/esm/src/provider/local/model-catalog.js +58 -0
  80. package/esm/src/provider/model-registry.d.ts +14 -0
  81. package/esm/src/provider/model-registry.d.ts.map +1 -1
  82. package/esm/src/provider/model-registry.js +58 -2
  83. package/esm/src/proxy/main.js +34 -6
  84. package/esm/src/proxy/server-resolver.d.ts +23 -0
  85. package/esm/src/proxy/server-resolver.d.ts.map +1 -0
  86. package/esm/src/proxy/server-resolver.js +124 -0
  87. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts +8 -0
  88. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts.map +1 -0
  89. package/esm/src/react/components/ai/chat/components/inference-badge.js +36 -0
  90. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts +7 -0
  91. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts.map +1 -0
  92. package/esm/src/react/components/ai/chat/components/upgrade-cta.js +33 -0
  93. package/esm/src/react/components/ai/chat/index.d.ts +7 -1
  94. package/esm/src/react/components/ai/chat/index.d.ts.map +1 -1
  95. package/esm/src/react/components/ai/chat/index.js +16 -4
  96. package/esm/src/sandbox/index.d.ts +31 -0
  97. package/esm/src/sandbox/index.d.ts.map +1 -0
  98. package/esm/src/sandbox/index.js +30 -0
  99. package/esm/src/sandbox/sandbox.d.ts +48 -0
  100. package/esm/src/sandbox/sandbox.d.ts.map +1 -0
  101. package/esm/src/sandbox/sandbox.js +178 -0
  102. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.d.ts.map +1 -1
  103. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.js +8 -2
  104. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts +1 -0
  105. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts.map +1 -1
  106. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.js +1 -0
  107. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.d.ts.map +1 -1
  108. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.js +15 -1
  109. package/package.json +8 -1
  110. package/src/cli/app/data/slug-words.ts +225 -90
  111. package/src/cli/app/operations/project-creation.ts +3 -3
  112. package/src/cli/app/shell.ts +1 -1
  113. package/src/cli/app/utils.ts +0 -30
  114. package/src/cli/app/views/dashboard.ts +27 -4
  115. package/src/cli/auth/callback-server.ts +3 -2
  116. package/src/cli/commands/dev/handler.ts +2 -0
  117. package/src/cli/commands/init/init-command.ts +30 -3
  118. package/src/cli/commands/init/interactive-wizard.ts +62 -34
  119. package/src/cli/mcp/remote-file-tools.ts +50 -15
  120. package/src/cli/shared/reserve-slug.ts +9 -2
  121. package/src/cli/utils/env-prompt.ts +3 -0
  122. package/src/deno.js +11 -4
  123. package/src/src/agent/chat-handler.ts +57 -4
  124. package/src/src/agent/react/index.ts +2 -0
  125. package/src/src/agent/react/use-chat/browser-inference/browser-engine.ts +81 -0
  126. package/src/src/agent/react/use-chat/browser-inference/types.ts +52 -0
  127. package/src/src/agent/react/use-chat/browser-inference/worker-client.ts +89 -0
  128. package/src/src/agent/react/use-chat/browser-inference/worker-script.ts +98 -0
  129. package/src/src/agent/react/use-chat/index.ts +2 -0
  130. package/src/src/agent/react/use-chat/types.ts +20 -0
  131. package/src/src/agent/react/use-chat/use-chat.ts +148 -8
  132. package/src/src/agent/runtime/index.ts +72 -6
  133. package/src/src/build/production-build/templates.ts +2 -68
  134. package/src/src/chat/index.ts +2 -0
  135. package/src/src/errors/veryfront-error.ts +2 -1
  136. package/src/src/platform/adapters/runtime/deno/adapter.ts +25 -3
  137. package/src/src/platform/compat/http/deno-server.ts +28 -1
  138. package/src/src/provider/index.ts +1 -0
  139. package/src/src/provider/local/ai-sdk-adapter.ts +207 -0
  140. package/src/src/provider/local/env.ts +26 -0
  141. package/src/src/provider/local/local-engine.ts +288 -0
  142. package/src/src/provider/local/model-catalog.ts +73 -0
  143. package/src/src/provider/model-registry.ts +66 -2
  144. package/src/src/proxy/main.ts +41 -6
  145. package/src/src/proxy/server-resolver.ts +151 -0
  146. package/src/src/react/components/ai/chat/components/inference-badge.tsx +48 -0
  147. package/src/src/react/components/ai/chat/components/upgrade-cta.tsx +56 -0
  148. package/src/src/react/components/ai/chat/index.tsx +43 -6
  149. package/src/src/sandbox/index.ts +32 -0
  150. package/src/src/sandbox/sandbox.ts +236 -0
  151. package/src/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.ts +9 -2
  152. package/src/src/transforms/pipeline/stages/ssr-vf-modules/index.ts +1 -0
  153. package/src/src/transforms/pipeline/stages/ssr-vf-modules/transform.ts +17 -0
@@ -0,0 +1,89 @@
1
+ /**
2
+ * BrowserInferenceClient — manages Web Worker lifecycle for browser-side inference.
3
+ *
4
+ * Singleton per session. Lazily creates Worker on first generate() call.
5
+ * Uses inline Blob URL approach — no separate build entry point needed.
6
+ */
7
+ import * as dntShim from "../../../../../_dnt.shims.js";
8
+
9
+
10
+ import type { WorkerRequest, WorkerResponse } from "./types.js";
11
+ import { WORKER_SCRIPT } from "./worker-script.js";
12
+
13
+ export interface GenerateCallbacks {
14
+ onStatus?: (status: "loading-runtime" | "downloading-model" | "ready" | "generating") => void;
15
+ onDownloadProgress?: (progress: number, file?: string) => void;
16
+ onToken?: (token: string) => void;
17
+ onDone?: (text: string) => void;
18
+ onError?: (error: string) => void;
19
+ }
20
+
21
+ let instance: BrowserInferenceClient | null = null;
22
+
23
+ export class BrowserInferenceClient {
24
+ private worker: Worker | null = null;
25
+ private blobUrl: string | null = null;
26
+
27
+ static getInstance(): BrowserInferenceClient {
28
+ if (!instance) instance = new BrowserInferenceClient();
29
+ return instance;
30
+ }
31
+
32
+ private ensureWorker(): Worker {
33
+ if (this.worker) return this.worker;
34
+
35
+ const blob = new dntShim.Blob([WORKER_SCRIPT], { type: "application/javascript" });
36
+ this.blobUrl = URL.createObjectURL(blob);
37
+ this.worker = new Worker(this.blobUrl, { type: "module" });
38
+
39
+ return this.worker;
40
+ }
41
+
42
+ generate(
43
+ id: string,
44
+ messages: Array<{ role: string; content: string }>,
45
+ options: { maxNewTokens?: number; temperature?: number; systemPrompt?: string },
46
+ callbacks: GenerateCallbacks,
47
+ ): void {
48
+ const worker = this.ensureWorker();
49
+
50
+ worker.onmessage = (event: MessageEvent<WorkerResponse>) => {
51
+ const msg = event.data;
52
+ switch (msg.type) {
53
+ case "status":
54
+ callbacks.onStatus?.(msg.status);
55
+ break;
56
+ case "download-progress":
57
+ callbacks.onDownloadProgress?.(msg.progress, msg.file);
58
+ break;
59
+ case "token":
60
+ if (msg.id === id) callbacks.onToken?.(msg.token);
61
+ break;
62
+ case "done":
63
+ if (msg.id === id) callbacks.onDone?.(msg.text);
64
+ break;
65
+ case "error":
66
+ if (msg.id === id) callbacks.onError?.(msg.error);
67
+ break;
68
+ }
69
+ };
70
+
71
+ worker.onerror = (event) => {
72
+ callbacks.onError?.(event.message || "Worker error");
73
+ };
74
+
75
+ const request: WorkerRequest = { type: "generate", id, messages, options };
76
+ worker.postMessage(request);
77
+ }
78
+
79
+ stop(): void {
80
+ if (this.worker) {
81
+ this.worker.terminate();
82
+ this.worker = null;
83
+ }
84
+ if (this.blobUrl) {
85
+ URL.revokeObjectURL(this.blobUrl);
86
+ this.blobUrl = null;
87
+ }
88
+ }
89
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Inline Worker script as a string.
3
+ *
4
+ * Loaded via Blob URL — no separate build entry point needed.
5
+ * Dynamically imports @huggingface/transformers from CDN inside the Worker.
6
+ */
7
+
8
+ export const WORKER_SCRIPT = /* js */ `
9
+ let pipeline = null;
10
+ let generating = false;
11
+
12
+ async function loadPipeline(callbacks) {
13
+ if (pipeline) return pipeline;
14
+
15
+ callbacks.onStatus("loading-runtime");
16
+
17
+ const { pipeline: createPipeline, env } =
18
+ await import("https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2");
19
+
20
+ env.useBrowserCache = true;
21
+ env.allowLocalModels = false;
22
+
23
+ callbacks.onStatus("downloading-model");
24
+
25
+ pipeline = await createPipeline(
26
+ "text-generation",
27
+ "HuggingFaceTB/SmolLM2-135M-Instruct",
28
+ {
29
+ dtype: "q4",
30
+ device: "wasm",
31
+ progress_callback: (progress) => {
32
+ if (progress.status === "progress" && progress.total) {
33
+ callbacks.onProgress(Math.round((progress.loaded / progress.total) * 100), progress.file);
34
+ }
35
+ },
36
+ },
37
+ );
38
+
39
+ callbacks.onStatus("ready");
40
+ return pipeline;
41
+ }
42
+
43
+ self.onmessage = async (event) => {
44
+ const request = event.data;
45
+ if (request.type !== "generate") return;
46
+
47
+ const { id, messages, options } = request;
48
+
49
+ try {
50
+ const pipe = await loadPipeline({
51
+ onStatus: (status) => self.postMessage({ type: "status", status }),
52
+ onProgress: (progress, file) => self.postMessage({ type: "download-progress", progress, file }),
53
+ });
54
+
55
+ self.postMessage({ type: "status", status: "generating" });
56
+ generating = true;
57
+
58
+ const chatMessages = [];
59
+ if (options?.systemPrompt) {
60
+ chatMessages.push({ role: "system", content: options.systemPrompt });
61
+ }
62
+ chatMessages.push(...messages);
63
+
64
+ // Helper: generated_text is a plain string for raw prompts but an array
65
+ // of {role, content} message objects when using chat format. Extract the
66
+ // last assistant message's content in either case.
67
+ function extractText(generated) {
68
+ if (typeof generated === "string") return generated;
69
+ if (Array.isArray(generated)) {
70
+ const last = generated[generated.length - 1];
71
+ return last?.content ?? "";
72
+ }
73
+ return "";
74
+ }
75
+
76
+ const result = await pipe(chatMessages, {
77
+ max_new_tokens: options?.maxNewTokens ?? 512,
78
+ temperature: options?.temperature ?? 0.7,
79
+ do_sample: true,
80
+ return_full_text: false,
81
+ callback_function: (output) => {
82
+ if (!generating) return;
83
+ const text = extractText(output?.[0]?.generated_text);
84
+ if (text) {
85
+ self.postMessage({ type: "token", id, token: text });
86
+ }
87
+ },
88
+ });
89
+
90
+ generating = false;
91
+ const finalText = extractText(result?.[0]?.generated_text);
92
+ self.postMessage({ type: "done", id, text: finalText });
93
+ } catch (error) {
94
+ generating = false;
95
+ self.postMessage({ type: "error", id, error: error?.message ?? String(error) });
96
+ }
97
+ };
98
+ `;
@@ -7,7 +7,9 @@
7
7
 
8
8
  export { useChat } from "./use-chat.js";
9
9
  export type {
10
+ BrowserInferenceStatus,
10
11
  DynamicToolUIPart,
12
+ InferenceMode,
11
13
  OnToolCallArg,
12
14
  ReasoningUIPart,
13
15
  TextUIPart,
@@ -1,6 +1,18 @@
1
1
  import * as dntShim from "../../../../_dnt.shims.js";
2
2
  export type StreamState = "streaming" | "done";
3
3
 
4
+ /** Where inference is happening */
5
+ export type InferenceMode = "cloud" | "server-local" | "browser";
6
+
7
+ /** Browser-side model loading and inference status */
8
+ export type BrowserInferenceStatus =
9
+ | "idle"
10
+ | "loading-runtime"
11
+ | "downloading-model"
12
+ | "ready"
13
+ | "generating"
14
+ | "error";
15
+
4
16
  export interface TextUIPart {
5
17
  type: "text";
6
18
  text: string;
@@ -90,6 +102,10 @@ export interface UseChatOptions {
90
102
  credentials?: RequestCredentials;
91
103
  /** Override model at runtime (e.g. "openai/gpt-4o", "anthropic/claude-sonnet-4-5-20250929") */
92
104
  model?: string;
105
+ /** System prompt for browser-side inference (server uses agent config) */
106
+ systemPrompt?: string;
107
+ /** Enable/disable browser fallback when server can't provide AI. Default: true */
108
+ browserFallback?: boolean;
93
109
  onResponse?: (response: dntShim.Response) => void;
94
110
  onFinish?: (message: UIMessage) => void;
95
111
  onError?: (error: Error) => void;
@@ -103,6 +119,10 @@ export interface UseChatResult {
103
119
  error: Error | null;
104
120
  /** Current model override (undefined = use agent default) */
105
121
  model: string | undefined;
122
+ /** Where inference is currently happening */
123
+ inferenceMode: InferenceMode;
124
+ /** Browser-side model loading/inference status (null when not using browser fallback) */
125
+ browserStatus: BrowserInferenceStatus | null;
106
126
  setInput: (input: string) => void;
107
127
  /** Change the model for subsequent requests */
108
128
  setModel: (model: string | undefined) => void;
@@ -4,6 +4,11 @@
4
4
  * Complete chat state management with zero UI.
5
5
  * Consumes the veryfront streaming protocol
6
6
  * (message-start/message-finish + step-start/step-end).
7
+ *
8
+ * Supports three inference modes:
9
+ * - cloud: API key present, normal server-side inference
10
+ * - server-local: No API key, server runs local model via ONNX
11
+ * - browser: Server can't run ONNX (compiled binary), falls back to browser Worker
7
12
  */
8
13
  import * as dntShim from "../../../../_dnt.shims.js";
9
14
 
@@ -12,7 +17,15 @@ import { useCallback, useRef, useState } from "react";
12
17
  import { createError, ensureError, toError } from "../../../errors/veryfront-error.js";
13
18
 
14
19
  import { handleStreamingResponse } from "./streaming/index.js";
15
- import type { ToolOutput, UIMessage, UseChatOptions, UseChatResult } from "./types.js";
20
+ import type {
21
+ BrowserInferenceStatus,
22
+ InferenceMode,
23
+ ToolOutput,
24
+ UIMessage,
25
+ UIMessagePart,
26
+ UseChatOptions,
27
+ UseChatResult,
28
+ } from "./types.js";
16
29
  import { generateClientId } from "./utils.js";
17
30
 
18
31
  /**
@@ -25,7 +38,16 @@ export function useChat(options: UseChatOptions): UseChatResult {
25
38
  const [error, setError] = useState<Error | null>(null);
26
39
  const [data, setData] = useState<unknown>(null);
27
40
  const [model, setModel] = useState<string | undefined>(options.model);
41
+ const [inferenceMode, setInferenceMode] = useState<InferenceMode>("cloud");
42
+ const [browserStatus, setBrowserStatus] = useState<BrowserInferenceStatus | null>(null);
28
43
  const abortControllerRef = useRef<AbortController | null>(null);
44
+ const browserInferenceActiveRef = useRef(false);
45
+ const browserInferenceRejectRef = useRef<((reason: Error) => void) | null>(null);
46
+
47
+ // System prompt for browser fallback (from 503 response or options)
48
+ const systemPromptRef = useRef<string>(
49
+ options.systemPrompt ?? "You are a helpful AI assistant.",
50
+ );
29
51
 
30
52
  // Track pending tool outputs for addToolOutput
31
53
  const pendingToolOutputsRef = useRef<Map<string, ToolOutput>>(new Map());
@@ -57,6 +79,64 @@ export function useChat(options: UseChatOptions): UseChatResult {
57
79
  );
58
80
  }, []);
59
81
 
82
+ /**
83
+ * Run inference in the browser via Web Worker.
84
+ * Lazily imports the browser-inference module to avoid bundling it
85
+ * when server-side inference works fine.
86
+ */
87
+ const doBrowserInference = useCallback(
88
+ async (allMessages: UIMessage[]) => {
89
+ browserInferenceActiveRef.current = true;
90
+
91
+ try {
92
+ const { runBrowserInference } = await import(
93
+ "./browser-inference/browser-engine.js"
94
+ );
95
+
96
+ await new Promise<void>((resolve, reject) => {
97
+ browserInferenceRejectRef.current = reject;
98
+ let hasAddedMessage = false;
99
+
100
+ runBrowserInference(allMessages, systemPromptRef.current, {
101
+ onUpdate: (parts: UIMessagePart[], messageId: string) => {
102
+ if (!hasAddedMessage) {
103
+ hasAddedMessage = true;
104
+ setMessages((prev) => [
105
+ ...prev,
106
+ { id: messageId, role: "assistant", parts },
107
+ ]);
108
+ return;
109
+ }
110
+ setMessages((prev) => prev.map((m) => (m.id === messageId ? { ...m, parts } : m)));
111
+ },
112
+ onMessage: (assistantMessage: UIMessage) => {
113
+ setMessages((prev) => {
114
+ if (!hasAddedMessage) return [...prev, assistantMessage];
115
+ return prev.map((m) => m.id === assistantMessage.id ? assistantMessage : m);
116
+ });
117
+ options.onFinish?.(assistantMessage);
118
+ browserInferenceRejectRef.current = null;
119
+ resolve();
120
+ },
121
+ onStatusChange: (status: BrowserInferenceStatus) => {
122
+ setBrowserStatus(status);
123
+ },
124
+ onDownloadProgress: () => {
125
+ // Progress is tracked via onStatusChange("downloading-model")
126
+ },
127
+ onError: (err: Error) => {
128
+ browserInferenceRejectRef.current = null;
129
+ reject(err);
130
+ },
131
+ });
132
+ });
133
+ } finally {
134
+ browserInferenceActiveRef.current = false;
135
+ }
136
+ },
137
+ [options],
138
+ );
139
+
60
140
  /**
61
141
  * Send a message and stream assistant updates.
62
142
  */
@@ -72,10 +152,18 @@ export function useChat(options: UseChatOptions): UseChatResult {
72
152
  setIsLoading(true);
73
153
  setError(null);
74
154
 
75
- const abortController = new AbortController();
76
- abortControllerRef.current = abortController;
77
-
78
155
  try {
156
+ const allMessages = [...messages, userMessage];
157
+
158
+ // If already in browser mode, skip fetch entirely
159
+ if (inferenceMode === "browser") {
160
+ await doBrowserInference(allMessages);
161
+ return;
162
+ }
163
+
164
+ const abortController = new AbortController();
165
+ abortControllerRef.current = abortController;
166
+
79
167
  const response = await dntShim.fetch(options.api, {
80
168
  method: "POST",
81
169
  headers: {
@@ -84,13 +172,31 @@ export function useChat(options: UseChatOptions): UseChatResult {
84
172
  },
85
173
  credentials: options.credentials,
86
174
  body: JSON.stringify({
87
- messages: [...messages, userMessage],
175
+ messages: allMessages,
88
176
  ...(model ? { model } : {}),
89
177
  ...options.body,
90
178
  }),
91
179
  signal: abortController.signal,
92
180
  });
93
181
 
182
+ // Handle 503 — server can't provide AI, fall back to browser
183
+ if (response.status === 503 && (options.browserFallback ?? true)) {
184
+ try {
185
+ const body = await response.json();
186
+ if (body.code === "NO_AI_AVAILABLE") {
187
+ if (body.systemPrompt) {
188
+ systemPromptRef.current = body.systemPrompt;
189
+ }
190
+ setInferenceMode("browser");
191
+ setBrowserStatus("idle");
192
+ await doBrowserInference(allMessages);
193
+ return;
194
+ }
195
+ } catch {
196
+ // If parsing fails, fall through to normal error handling
197
+ }
198
+ }
199
+
94
200
  if (!response.ok) {
95
201
  throw toError(
96
202
  createError({
@@ -116,7 +222,20 @@ export function useChat(options: UseChatOptions): UseChatResult {
116
222
  });
117
223
  options.onFinish?.(assistantMessage);
118
224
  },
119
- onData: setData,
225
+ onData: (eventData) => {
226
+ setData(eventData);
227
+ // Detect inference mode from server metadata
228
+ if (
229
+ eventData &&
230
+ typeof eventData === "object" &&
231
+ "inferenceMode" in eventData
232
+ ) {
233
+ const mode = (eventData as { inferenceMode: string }).inferenceMode;
234
+ if (mode === "server-local" || mode === "cloud") {
235
+ setInferenceMode(mode);
236
+ }
237
+ }
238
+ },
120
239
  onUpdate: (parts, messageId) => {
121
240
  const id = messageId ?? streamingMessageId;
122
241
 
@@ -153,7 +272,7 @@ export function useChat(options: UseChatOptions): UseChatResult {
153
272
  abortControllerRef.current = null;
154
273
  }
155
274
  },
156
- [messages, model, options],
275
+ [messages, model, options, inferenceMode, doBrowserInference],
157
276
  );
158
277
 
159
278
  /**
@@ -176,9 +295,28 @@ export function useChat(options: UseChatOptions): UseChatResult {
176
295
  /**
177
296
  * Stop generation
178
297
  */
179
- const stop = useCallback(() => {
298
+ const stop = useCallback(async () => {
180
299
  abortControllerRef.current?.abort();
181
300
  abortControllerRef.current = null;
301
+
302
+ // Also stop browser inference Worker if active
303
+ if (browserInferenceActiveRef.current) {
304
+ // Settle the pending doBrowserInference promise before terminating the Worker
305
+ browserInferenceRejectRef.current?.(new Error("Generation stopped by user"));
306
+ browserInferenceRejectRef.current = null;
307
+
308
+ try {
309
+ const { stopBrowserInference } = await import(
310
+ "./browser-inference/browser-engine.js"
311
+ );
312
+ stopBrowserInference();
313
+ } catch {
314
+ // Worker module may already be terminated or unavailable
315
+ }
316
+ browserInferenceActiveRef.current = false;
317
+ setBrowserStatus("ready");
318
+ }
319
+
182
320
  setIsLoading(false);
183
321
  }, []);
184
322
 
@@ -215,6 +353,8 @@ export function useChat(options: UseChatOptions): UseChatResult {
215
353
  isLoading,
216
354
  error,
217
355
  model,
356
+ inferenceMode,
357
+ browserStatus,
218
358
  setInput,
219
359
  setModel,
220
360
  sendMessage,
@@ -21,7 +21,7 @@ import {
21
21
  type MessagePart,
22
22
  type ToolCall,
23
23
  } from "../types.js";
24
- import { resolveModel } from "../../provider/index.js";
24
+ import { ensureModelReady, resolveModel } from "../../provider/index.js";
25
25
  import { executeTool } from "../../tool/index.js";
26
26
  import { generateId } from "../../utils/id.js";
27
27
  import { detectPlatform, getPlatformCapabilities } from "../../platform/core-platform.js";
@@ -36,7 +36,7 @@ import { convertToModelMessages } from "./model-message-converter.js";
36
36
  import { convertToolsToAISDK } from "./model-tool-converter.js";
37
37
  import { createStreamState, processStream } from "./ai-stream-handler.js";
38
38
  import { MiddlewareChain } from "../middleware/chain.js";
39
- import { generateText, streamText } from "ai";
39
+ import { generateText, type LanguageModel, streamText } from "ai";
40
40
 
41
41
  // Re-export from submodules
42
42
  export { generateMessageId, sendSSE } from "./sse-utils.js";
@@ -59,6 +59,27 @@ import { accumulateUsage, getMaxSteps, normalizeInput } from "./input-utils.js";
59
59
 
60
60
  const logger = serverLogger.component("agent");
61
61
 
62
+ /**
63
+ * Detect whether the resolved model is local inference.
64
+ * Handles both explicit "local/*" requests and cloud->local auto-fallback.
65
+ */
66
+ function isLocalInferenceModel(model: LanguageModel, requestedModel: string): boolean {
67
+ if (requestedModel.startsWith("local/")) return true;
68
+
69
+ // LanguageModel is a union that includes string, so we need to narrow first
70
+ if (typeof model === "string") return model.startsWith("local/");
71
+
72
+ if ("provider" in model && model.provider === "local") return true;
73
+
74
+ if (
75
+ "modelId" in model && typeof model.modelId === "string" && model.modelId.startsWith("local/")
76
+ ) {
77
+ return true;
78
+ }
79
+
80
+ return false;
81
+ }
82
+
62
83
  export class AgentRuntime {
63
84
  private id: string;
64
85
  private config: AgentConfig;
@@ -125,6 +146,7 @@ export class AgentRuntime {
125
146
  modelOverride?: string,
126
147
  ): Promise<ReadableStream<Uint8Array>> {
127
148
  const modelString = modelOverride || this.config.model;
149
+ const requestedModel = modelString || this.config.model;
128
150
 
129
151
  for (const msg of messages) await this.memory.add(msg);
130
152
 
@@ -135,6 +157,18 @@ export class AgentRuntime {
135
157
  const toolContext = { agentId: this.id, ...context };
136
158
  const textPartId = generateId("text");
137
159
 
160
+ // Resolve model BEFORE creating the ReadableStream — if this throws
161
+ // (e.g., no_ai_available), the error propagates to the caller who can
162
+ // return a proper error response (503) instead of a 200 with an error event.
163
+ const languageModel = resolveModel(requestedModel);
164
+
165
+ // Eagerly verify the model runtime is available. For local models this
166
+ // checks that @huggingface/transformers can be imported. Must happen
167
+ // BEFORE creating the ReadableStream so no_ai_available errors propagate
168
+ // to the caller (createChatHandler) who returns a 503 with browser fallback
169
+ // info, instead of being swallowed as an in-band SSE error in a 200 response.
170
+ await ensureModelReady(languageModel);
171
+
138
172
  return new ReadableStream<Uint8Array>({
139
173
  start: async (controller) => {
140
174
  try {
@@ -142,6 +176,14 @@ export class AgentRuntime {
142
176
 
143
177
  const messageId = generateMessageId();
144
178
  sendSSE(controller, encoder, { type: "message-start", messageId });
179
+ sendSSE(controller, encoder, {
180
+ type: "data",
181
+ data: {
182
+ inferenceMode: isLocalInferenceModel(languageModel, requestedModel)
183
+ ? "server-local"
184
+ : "cloud",
185
+ },
186
+ });
145
187
  sendSSE(controller, encoder, { type: "text-start", id: textPartId });
146
188
 
147
189
  await this.executeAgentLoopStreaming(
@@ -153,6 +195,7 @@ export class AgentRuntime {
153
195
  textPartId,
154
196
  toolContext,
155
197
  modelString,
198
+ languageModel,
156
199
  );
157
200
 
158
201
  sendSSE(controller, encoder, { type: "text-end", id: textPartId });
@@ -181,17 +224,28 @@ export class AgentRuntime {
181
224
  return withSpan("agent.execution_loop", async (loopSpan) => {
182
225
  const { maxAgentSteps } = getPlatformCapabilities();
183
226
  const maxSteps = this.computeMaxSteps(maxAgentSteps);
184
- const languageModel = resolveModel(modelString || this.config.model);
227
+ const requestedModel = modelString || this.config.model;
228
+ const languageModel = resolveModel(requestedModel);
185
229
 
186
230
  const toolCalls: ToolCall[] = [];
187
231
  const currentMessages = [...messages];
188
232
  const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
189
233
 
234
+ // Local models can't reliably do function calling — skip tools gracefully.
235
+ const isLocal = isLocalInferenceModel(languageModel, requestedModel);
236
+ if (isLocal && this.config.tools) {
237
+ logger.warn(
238
+ `Agent "${this.id}" has tools configured but is using local model "${requestedModel}". ` +
239
+ "Local models don't support tool calling — tools will be skipped. " +
240
+ "Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY for full tool support.",
241
+ );
242
+ }
243
+
190
244
  for (let step = 0; step < maxSteps; step++) {
191
245
  this.status = "thinking";
192
246
  addSpanEvent(loopSpan, "step_start", { step });
193
247
 
194
- const tools = getAvailableTools(this.config.tools);
248
+ const tools = isLocal ? [] : getAvailableTools(this.config.tools);
195
249
 
196
250
  const response = await withSpan("agent.generate_text", async (span) => {
197
251
  setSpanAttributes(span, {
@@ -350,19 +404,31 @@ export class AgentRuntime {
350
404
  textPartId?: string,
351
405
  toolContext?: Record<string, unknown>,
352
406
  modelString?: string,
407
+ resolvedModel?: LanguageModel,
353
408
  ): Promise<AgentResponse> {
354
409
  const { maxAgentSteps } = getPlatformCapabilities();
355
410
  const maxSteps = this.computeMaxSteps(maxAgentSteps);
356
- const languageModel = resolveModel(modelString || this.config.model);
411
+ const requestedModel = modelString || this.config.model;
412
+ const languageModel = resolvedModel ?? resolveModel(requestedModel);
357
413
 
358
414
  const toolCalls: ToolCall[] = [];
359
415
  const currentMessages = [...messages];
360
416
  const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
361
417
 
418
+ // Local models can't reliably do function calling — skip tools gracefully.
419
+ const isLocalStreaming = isLocalInferenceModel(languageModel, requestedModel);
420
+ if (isLocalStreaming && this.config.tools) {
421
+ logger.warn(
422
+ `Agent "${this.id}" has tools configured but is using local model "${requestedModel}". ` +
423
+ "Local models don't support tool calling — tools will be skipped. " +
424
+ "Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY for full tool support.",
425
+ );
426
+ }
427
+
362
428
  for (let step = 0; step < maxSteps; step++) {
363
429
  sendSSE(controller, encoder, { type: "step-start" });
364
430
 
365
- const tools = getAvailableTools(this.config.tools);
431
+ const tools = isLocalStreaming ? [] : getAvailableTools(this.config.tools);
366
432
  const result = streamText({
367
433
  model: languageModel,
368
434
  system: systemPrompt,