veryfront 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/cli/app/data/slug-words.d.ts.map +1 -1
- package/esm/cli/app/data/slug-words.js +225 -90
- package/esm/cli/app/operations/project-creation.js +4 -3
- package/esm/cli/app/shell.js +1 -1
- package/esm/cli/app/utils.d.ts +5 -4
- package/esm/cli/app/utils.d.ts.map +1 -1
- package/esm/cli/app/utils.js +0 -23
- package/esm/cli/app/views/dashboard.d.ts +1 -1
- package/esm/cli/app/views/dashboard.d.ts.map +1 -1
- package/esm/cli/app/views/dashboard.js +22 -4
- package/esm/cli/auth/callback-server.d.ts.map +1 -1
- package/esm/cli/auth/callback-server.js +3 -2
- package/esm/cli/commands/dev/handler.d.ts.map +1 -1
- package/esm/cli/commands/dev/handler.js +2 -0
- package/esm/cli/commands/init/init-command.d.ts.map +1 -1
- package/esm/cli/commands/init/init-command.js +20 -3
- package/esm/cli/commands/init/interactive-wizard.d.ts +3 -2
- package/esm/cli/commands/init/interactive-wizard.d.ts.map +1 -1
- package/esm/cli/commands/init/interactive-wizard.js +55 -27
- package/esm/cli/mcp/remote-file-tools.d.ts +0 -6
- package/esm/cli/mcp/remote-file-tools.d.ts.map +1 -1
- package/esm/cli/mcp/remote-file-tools.js +37 -15
- package/esm/cli/shared/reserve-slug.d.ts.map +1 -1
- package/esm/cli/shared/reserve-slug.js +8 -3
- package/esm/cli/utils/env-prompt.d.ts.map +1 -1
- package/esm/cli/utils/env-prompt.js +3 -0
- package/esm/deno.d.ts +5 -1
- package/esm/deno.js +11 -4
- package/esm/src/agent/chat-handler.d.ts +4 -3
- package/esm/src/agent/chat-handler.d.ts.map +1 -1
- package/esm/src/agent/chat-handler.js +55 -4
- package/esm/src/agent/react/index.d.ts +1 -1
- package/esm/src/agent/react/index.d.ts.map +1 -1
- package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts +18 -0
- package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts.map +1 -0
- package/esm/src/agent/react/use-chat/browser-inference/browser-engine.js +54 -0
- package/esm/src/agent/react/use-chat/browser-inference/types.d.ts +43 -0
- package/esm/src/agent/react/use-chat/browser-inference/types.d.ts.map +1 -0
- package/esm/src/agent/react/use-chat/browser-inference/types.js +4 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts +23 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts.map +1 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-client.js +67 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts +8 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts.map +1 -0
- package/esm/src/agent/react/use-chat/browser-inference/worker-script.js +97 -0
- package/esm/src/agent/react/use-chat/index.d.ts +1 -1
- package/esm/src/agent/react/use-chat/index.d.ts.map +1 -1
- package/esm/src/agent/react/use-chat/types.d.ts +12 -0
- package/esm/src/agent/react/use-chat/types.d.ts.map +1 -1
- package/esm/src/agent/react/use-chat/use-chat.d.ts.map +1 -1
- package/esm/src/agent/react/use-chat/use-chat.js +120 -6
- package/esm/src/agent/runtime/index.d.ts.map +1 -1
- package/esm/src/agent/runtime/index.js +59 -7
- package/esm/src/build/production-build/templates.d.ts +2 -2
- package/esm/src/build/production-build/templates.d.ts.map +1 -1
- package/esm/src/build/production-build/templates.js +2 -68
- package/esm/src/chat/index.d.ts +1 -1
- package/esm/src/chat/index.d.ts.map +1 -1
- package/esm/src/errors/veryfront-error.d.ts +3 -0
- package/esm/src/errors/veryfront-error.d.ts.map +1 -1
- package/esm/src/platform/adapters/runtime/deno/adapter.d.ts.map +1 -1
- package/esm/src/platform/adapters/runtime/deno/adapter.js +24 -3
- package/esm/src/platform/compat/http/deno-server.d.ts.map +1 -1
- package/esm/src/platform/compat/http/deno-server.js +23 -2
- package/esm/src/provider/index.d.ts +1 -1
- package/esm/src/provider/index.d.ts.map +1 -1
- package/esm/src/provider/index.js +1 -1
- package/esm/src/provider/local/ai-sdk-adapter.d.ts +19 -0
- package/esm/src/provider/local/ai-sdk-adapter.d.ts.map +1 -0
- package/esm/src/provider/local/ai-sdk-adapter.js +164 -0
- package/esm/src/provider/local/env.d.ts +10 -0
- package/esm/src/provider/local/env.d.ts.map +1 -0
- package/esm/src/provider/local/env.js +23 -0
- package/esm/src/provider/local/local-engine.d.ts +61 -0
- package/esm/src/provider/local/local-engine.d.ts.map +1 -0
- package/esm/src/provider/local/local-engine.js +211 -0
- package/esm/src/provider/local/model-catalog.d.ts +30 -0
- package/esm/src/provider/local/model-catalog.d.ts.map +1 -0
- package/esm/src/provider/local/model-catalog.js +58 -0
- package/esm/src/provider/model-registry.d.ts +14 -0
- package/esm/src/provider/model-registry.d.ts.map +1 -1
- package/esm/src/provider/model-registry.js +58 -2
- package/esm/src/proxy/main.js +34 -6
- package/esm/src/proxy/server-resolver.d.ts +23 -0
- package/esm/src/proxy/server-resolver.d.ts.map +1 -0
- package/esm/src/proxy/server-resolver.js +124 -0
- package/esm/src/react/components/ai/chat/components/inference-badge.d.ts +8 -0
- package/esm/src/react/components/ai/chat/components/inference-badge.d.ts.map +1 -0
- package/esm/src/react/components/ai/chat/components/inference-badge.js +36 -0
- package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts +7 -0
- package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts.map +1 -0
- package/esm/src/react/components/ai/chat/components/upgrade-cta.js +33 -0
- package/esm/src/react/components/ai/chat/index.d.ts +7 -1
- package/esm/src/react/components/ai/chat/index.d.ts.map +1 -1
- package/esm/src/react/components/ai/chat/index.js +16 -4
- package/esm/src/sandbox/index.d.ts +31 -0
- package/esm/src/sandbox/index.d.ts.map +1 -0
- package/esm/src/sandbox/index.js +30 -0
- package/esm/src/sandbox/sandbox.d.ts +48 -0
- package/esm/src/sandbox/sandbox.d.ts.map +1 -0
- package/esm/src/sandbox/sandbox.js +178 -0
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.d.ts.map +1 -1
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.js +8 -2
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts +1 -0
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts.map +1 -1
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.js +1 -0
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.d.ts.map +1 -1
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.js +15 -1
- package/package.json +8 -1
- package/src/cli/app/data/slug-words.ts +225 -90
- package/src/cli/app/operations/project-creation.ts +3 -3
- package/src/cli/app/shell.ts +1 -1
- package/src/cli/app/utils.ts +0 -30
- package/src/cli/app/views/dashboard.ts +27 -4
- package/src/cli/auth/callback-server.ts +3 -2
- package/src/cli/commands/dev/handler.ts +2 -0
- package/src/cli/commands/init/init-command.ts +30 -3
- package/src/cli/commands/init/interactive-wizard.ts +62 -34
- package/src/cli/mcp/remote-file-tools.ts +50 -15
- package/src/cli/shared/reserve-slug.ts +9 -2
- package/src/cli/utils/env-prompt.ts +3 -0
- package/src/deno.js +11 -4
- package/src/src/agent/chat-handler.ts +57 -4
- package/src/src/agent/react/index.ts +2 -0
- package/src/src/agent/react/use-chat/browser-inference/browser-engine.ts +81 -0
- package/src/src/agent/react/use-chat/browser-inference/types.ts +52 -0
- package/src/src/agent/react/use-chat/browser-inference/worker-client.ts +89 -0
- package/src/src/agent/react/use-chat/browser-inference/worker-script.ts +98 -0
- package/src/src/agent/react/use-chat/index.ts +2 -0
- package/src/src/agent/react/use-chat/types.ts +20 -0
- package/src/src/agent/react/use-chat/use-chat.ts +148 -8
- package/src/src/agent/runtime/index.ts +72 -6
- package/src/src/build/production-build/templates.ts +2 -68
- package/src/src/chat/index.ts +2 -0
- package/src/src/errors/veryfront-error.ts +2 -1
- package/src/src/platform/adapters/runtime/deno/adapter.ts +25 -3
- package/src/src/platform/compat/http/deno-server.ts +28 -1
- package/src/src/provider/index.ts +1 -0
- package/src/src/provider/local/ai-sdk-adapter.ts +207 -0
- package/src/src/provider/local/env.ts +26 -0
- package/src/src/provider/local/local-engine.ts +288 -0
- package/src/src/provider/local/model-catalog.ts +73 -0
- package/src/src/provider/model-registry.ts +66 -2
- package/src/src/proxy/main.ts +41 -6
- package/src/src/proxy/server-resolver.ts +151 -0
- package/src/src/react/components/ai/chat/components/inference-badge.tsx +48 -0
- package/src/src/react/components/ai/chat/components/upgrade-cta.tsx +56 -0
- package/src/src/react/components/ai/chat/index.tsx +43 -6
- package/src/src/sandbox/index.ts +32 -0
- package/src/src/sandbox/sandbox.ts +236 -0
- package/src/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.ts +9 -2
- package/src/src/transforms/pipeline/stages/ssr-vf-modules/index.ts +1 -0
- package/src/src/transforms/pipeline/stages/ssr-vf-modules/transform.ts +17 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrowserInferenceClient — manages Web Worker lifecycle for browser-side inference.
|
|
3
|
+
*
|
|
4
|
+
* Singleton per session. Lazily creates Worker on first generate() call.
|
|
5
|
+
* Uses inline Blob URL approach — no separate build entry point needed.
|
|
6
|
+
*/
|
|
7
|
+
import * as dntShim from "../../../../../_dnt.shims.js";
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import type { WorkerRequest, WorkerResponse } from "./types.js";
|
|
11
|
+
import { WORKER_SCRIPT } from "./worker-script.js";
|
|
12
|
+
|
|
13
|
+
export interface GenerateCallbacks {
|
|
14
|
+
onStatus?: (status: "loading-runtime" | "downloading-model" | "ready" | "generating") => void;
|
|
15
|
+
onDownloadProgress?: (progress: number, file?: string) => void;
|
|
16
|
+
onToken?: (token: string) => void;
|
|
17
|
+
onDone?: (text: string) => void;
|
|
18
|
+
onError?: (error: string) => void;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
let instance: BrowserInferenceClient | null = null;
|
|
22
|
+
|
|
23
|
+
export class BrowserInferenceClient {
|
|
24
|
+
private worker: Worker | null = null;
|
|
25
|
+
private blobUrl: string | null = null;
|
|
26
|
+
|
|
27
|
+
static getInstance(): BrowserInferenceClient {
|
|
28
|
+
if (!instance) instance = new BrowserInferenceClient();
|
|
29
|
+
return instance;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
private ensureWorker(): Worker {
|
|
33
|
+
if (this.worker) return this.worker;
|
|
34
|
+
|
|
35
|
+
const blob = new dntShim.Blob([WORKER_SCRIPT], { type: "application/javascript" });
|
|
36
|
+
this.blobUrl = URL.createObjectURL(blob);
|
|
37
|
+
this.worker = new Worker(this.blobUrl, { type: "module" });
|
|
38
|
+
|
|
39
|
+
return this.worker;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
generate(
|
|
43
|
+
id: string,
|
|
44
|
+
messages: Array<{ role: string; content: string }>,
|
|
45
|
+
options: { maxNewTokens?: number; temperature?: number; systemPrompt?: string },
|
|
46
|
+
callbacks: GenerateCallbacks,
|
|
47
|
+
): void {
|
|
48
|
+
const worker = this.ensureWorker();
|
|
49
|
+
|
|
50
|
+
worker.onmessage = (event: MessageEvent<WorkerResponse>) => {
|
|
51
|
+
const msg = event.data;
|
|
52
|
+
switch (msg.type) {
|
|
53
|
+
case "status":
|
|
54
|
+
callbacks.onStatus?.(msg.status);
|
|
55
|
+
break;
|
|
56
|
+
case "download-progress":
|
|
57
|
+
callbacks.onDownloadProgress?.(msg.progress, msg.file);
|
|
58
|
+
break;
|
|
59
|
+
case "token":
|
|
60
|
+
if (msg.id === id) callbacks.onToken?.(msg.token);
|
|
61
|
+
break;
|
|
62
|
+
case "done":
|
|
63
|
+
if (msg.id === id) callbacks.onDone?.(msg.text);
|
|
64
|
+
break;
|
|
65
|
+
case "error":
|
|
66
|
+
if (msg.id === id) callbacks.onError?.(msg.error);
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
worker.onerror = (event) => {
|
|
72
|
+
callbacks.onError?.(event.message || "Worker error");
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const request: WorkerRequest = { type: "generate", id, messages, options };
|
|
76
|
+
worker.postMessage(request);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
stop(): void {
|
|
80
|
+
if (this.worker) {
|
|
81
|
+
this.worker.terminate();
|
|
82
|
+
this.worker = null;
|
|
83
|
+
}
|
|
84
|
+
if (this.blobUrl) {
|
|
85
|
+
URL.revokeObjectURL(this.blobUrl);
|
|
86
|
+
this.blobUrl = null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inline Worker script as a string.
|
|
3
|
+
*
|
|
4
|
+
* Loaded via Blob URL — no separate build entry point needed.
|
|
5
|
+
* Dynamically imports @huggingface/transformers from CDN inside the Worker.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export const WORKER_SCRIPT = /* js */ `
|
|
9
|
+
let pipeline = null;
|
|
10
|
+
let generating = false;
|
|
11
|
+
|
|
12
|
+
async function loadPipeline(callbacks) {
|
|
13
|
+
if (pipeline) return pipeline;
|
|
14
|
+
|
|
15
|
+
callbacks.onStatus("loading-runtime");
|
|
16
|
+
|
|
17
|
+
const { pipeline: createPipeline, env } =
|
|
18
|
+
await import("https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2");
|
|
19
|
+
|
|
20
|
+
env.useBrowserCache = true;
|
|
21
|
+
env.allowLocalModels = false;
|
|
22
|
+
|
|
23
|
+
callbacks.onStatus("downloading-model");
|
|
24
|
+
|
|
25
|
+
pipeline = await createPipeline(
|
|
26
|
+
"text-generation",
|
|
27
|
+
"HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
28
|
+
{
|
|
29
|
+
dtype: "q4",
|
|
30
|
+
device: "wasm",
|
|
31
|
+
progress_callback: (progress) => {
|
|
32
|
+
if (progress.status === "progress" && progress.total) {
|
|
33
|
+
callbacks.onProgress(Math.round((progress.loaded / progress.total) * 100), progress.file);
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
callbacks.onStatus("ready");
|
|
40
|
+
return pipeline;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
self.onmessage = async (event) => {
|
|
44
|
+
const request = event.data;
|
|
45
|
+
if (request.type !== "generate") return;
|
|
46
|
+
|
|
47
|
+
const { id, messages, options } = request;
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const pipe = await loadPipeline({
|
|
51
|
+
onStatus: (status) => self.postMessage({ type: "status", status }),
|
|
52
|
+
onProgress: (progress, file) => self.postMessage({ type: "download-progress", progress, file }),
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
self.postMessage({ type: "status", status: "generating" });
|
|
56
|
+
generating = true;
|
|
57
|
+
|
|
58
|
+
const chatMessages = [];
|
|
59
|
+
if (options?.systemPrompt) {
|
|
60
|
+
chatMessages.push({ role: "system", content: options.systemPrompt });
|
|
61
|
+
}
|
|
62
|
+
chatMessages.push(...messages);
|
|
63
|
+
|
|
64
|
+
// Helper: generated_text is a plain string for raw prompts but an array
|
|
65
|
+
// of {role, content} message objects when using chat format. Extract the
|
|
66
|
+
// last assistant message's content in either case.
|
|
67
|
+
function extractText(generated) {
|
|
68
|
+
if (typeof generated === "string") return generated;
|
|
69
|
+
if (Array.isArray(generated)) {
|
|
70
|
+
const last = generated[generated.length - 1];
|
|
71
|
+
return last?.content ?? "";
|
|
72
|
+
}
|
|
73
|
+
return "";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const result = await pipe(chatMessages, {
|
|
77
|
+
max_new_tokens: options?.maxNewTokens ?? 512,
|
|
78
|
+
temperature: options?.temperature ?? 0.7,
|
|
79
|
+
do_sample: true,
|
|
80
|
+
return_full_text: false,
|
|
81
|
+
callback_function: (output) => {
|
|
82
|
+
if (!generating) return;
|
|
83
|
+
const text = extractText(output?.[0]?.generated_text);
|
|
84
|
+
if (text) {
|
|
85
|
+
self.postMessage({ type: "token", id, token: text });
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
generating = false;
|
|
91
|
+
const finalText = extractText(result?.[0]?.generated_text);
|
|
92
|
+
self.postMessage({ type: "done", id, text: finalText });
|
|
93
|
+
} catch (error) {
|
|
94
|
+
generating = false;
|
|
95
|
+
self.postMessage({ type: "error", id, error: error?.message ?? String(error) });
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
`;
|
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
import * as dntShim from "../../../../_dnt.shims.js";
|
|
2
2
|
export type StreamState = "streaming" | "done";
|
|
3
3
|
|
|
4
|
+
/** Where inference is happening */
|
|
5
|
+
export type InferenceMode = "cloud" | "server-local" | "browser";
|
|
6
|
+
|
|
7
|
+
/** Browser-side model loading and inference status */
|
|
8
|
+
export type BrowserInferenceStatus =
|
|
9
|
+
| "idle"
|
|
10
|
+
| "loading-runtime"
|
|
11
|
+
| "downloading-model"
|
|
12
|
+
| "ready"
|
|
13
|
+
| "generating"
|
|
14
|
+
| "error";
|
|
15
|
+
|
|
4
16
|
export interface TextUIPart {
|
|
5
17
|
type: "text";
|
|
6
18
|
text: string;
|
|
@@ -90,6 +102,10 @@ export interface UseChatOptions {
|
|
|
90
102
|
credentials?: RequestCredentials;
|
|
91
103
|
/** Override model at runtime (e.g. "openai/gpt-4o", "anthropic/claude-sonnet-4-5-20250929") */
|
|
92
104
|
model?: string;
|
|
105
|
+
/** System prompt for browser-side inference (server uses agent config) */
|
|
106
|
+
systemPrompt?: string;
|
|
107
|
+
/** Enable/disable browser fallback when server can't provide AI. Default: true */
|
|
108
|
+
browserFallback?: boolean;
|
|
93
109
|
onResponse?: (response: dntShim.Response) => void;
|
|
94
110
|
onFinish?: (message: UIMessage) => void;
|
|
95
111
|
onError?: (error: Error) => void;
|
|
@@ -103,6 +119,10 @@ export interface UseChatResult {
|
|
|
103
119
|
error: Error | null;
|
|
104
120
|
/** Current model override (undefined = use agent default) */
|
|
105
121
|
model: string | undefined;
|
|
122
|
+
/** Where inference is currently happening */
|
|
123
|
+
inferenceMode: InferenceMode;
|
|
124
|
+
/** Browser-side model loading/inference status (null when not using browser fallback) */
|
|
125
|
+
browserStatus: BrowserInferenceStatus | null;
|
|
106
126
|
setInput: (input: string) => void;
|
|
107
127
|
/** Change the model for subsequent requests */
|
|
108
128
|
setModel: (model: string | undefined) => void;
|
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
* Complete chat state management with zero UI.
|
|
5
5
|
* Consumes the veryfront streaming protocol
|
|
6
6
|
* (message-start/message-finish + step-start/step-end).
|
|
7
|
+
*
|
|
8
|
+
* Supports three inference modes:
|
|
9
|
+
* - cloud: API key present, normal server-side inference
|
|
10
|
+
* - server-local: No API key, server runs local model via ONNX
|
|
11
|
+
* - browser: Server can't run ONNX (compiled binary), falls back to browser Worker
|
|
7
12
|
*/
|
|
8
13
|
import * as dntShim from "../../../../_dnt.shims.js";
|
|
9
14
|
|
|
@@ -12,7 +17,15 @@ import { useCallback, useRef, useState } from "react";
|
|
|
12
17
|
import { createError, ensureError, toError } from "../../../errors/veryfront-error.js";
|
|
13
18
|
|
|
14
19
|
import { handleStreamingResponse } from "./streaming/index.js";
|
|
15
|
-
import type {
|
|
20
|
+
import type {
|
|
21
|
+
BrowserInferenceStatus,
|
|
22
|
+
InferenceMode,
|
|
23
|
+
ToolOutput,
|
|
24
|
+
UIMessage,
|
|
25
|
+
UIMessagePart,
|
|
26
|
+
UseChatOptions,
|
|
27
|
+
UseChatResult,
|
|
28
|
+
} from "./types.js";
|
|
16
29
|
import { generateClientId } from "./utils.js";
|
|
17
30
|
|
|
18
31
|
/**
|
|
@@ -25,7 +38,16 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
25
38
|
const [error, setError] = useState<Error | null>(null);
|
|
26
39
|
const [data, setData] = useState<unknown>(null);
|
|
27
40
|
const [model, setModel] = useState<string | undefined>(options.model);
|
|
41
|
+
const [inferenceMode, setInferenceMode] = useState<InferenceMode>("cloud");
|
|
42
|
+
const [browserStatus, setBrowserStatus] = useState<BrowserInferenceStatus | null>(null);
|
|
28
43
|
const abortControllerRef = useRef<AbortController | null>(null);
|
|
44
|
+
const browserInferenceActiveRef = useRef(false);
|
|
45
|
+
const browserInferenceRejectRef = useRef<((reason: Error) => void) | null>(null);
|
|
46
|
+
|
|
47
|
+
// System prompt for browser fallback (from 503 response or options)
|
|
48
|
+
const systemPromptRef = useRef<string>(
|
|
49
|
+
options.systemPrompt ?? "You are a helpful AI assistant.",
|
|
50
|
+
);
|
|
29
51
|
|
|
30
52
|
// Track pending tool outputs for addToolOutput
|
|
31
53
|
const pendingToolOutputsRef = useRef<Map<string, ToolOutput>>(new Map());
|
|
@@ -57,6 +79,64 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
57
79
|
);
|
|
58
80
|
}, []);
|
|
59
81
|
|
|
82
|
+
/**
|
|
83
|
+
* Run inference in the browser via Web Worker.
|
|
84
|
+
* Lazily imports the browser-inference module to avoid bundling it
|
|
85
|
+
* when server-side inference works fine.
|
|
86
|
+
*/
|
|
87
|
+
const doBrowserInference = useCallback(
|
|
88
|
+
async (allMessages: UIMessage[]) => {
|
|
89
|
+
browserInferenceActiveRef.current = true;
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
const { runBrowserInference } = await import(
|
|
93
|
+
"./browser-inference/browser-engine.js"
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
await new Promise<void>((resolve, reject) => {
|
|
97
|
+
browserInferenceRejectRef.current = reject;
|
|
98
|
+
let hasAddedMessage = false;
|
|
99
|
+
|
|
100
|
+
runBrowserInference(allMessages, systemPromptRef.current, {
|
|
101
|
+
onUpdate: (parts: UIMessagePart[], messageId: string) => {
|
|
102
|
+
if (!hasAddedMessage) {
|
|
103
|
+
hasAddedMessage = true;
|
|
104
|
+
setMessages((prev) => [
|
|
105
|
+
...prev,
|
|
106
|
+
{ id: messageId, role: "assistant", parts },
|
|
107
|
+
]);
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
setMessages((prev) => prev.map((m) => (m.id === messageId ? { ...m, parts } : m)));
|
|
111
|
+
},
|
|
112
|
+
onMessage: (assistantMessage: UIMessage) => {
|
|
113
|
+
setMessages((prev) => {
|
|
114
|
+
if (!hasAddedMessage) return [...prev, assistantMessage];
|
|
115
|
+
return prev.map((m) => m.id === assistantMessage.id ? assistantMessage : m);
|
|
116
|
+
});
|
|
117
|
+
options.onFinish?.(assistantMessage);
|
|
118
|
+
browserInferenceRejectRef.current = null;
|
|
119
|
+
resolve();
|
|
120
|
+
},
|
|
121
|
+
onStatusChange: (status: BrowserInferenceStatus) => {
|
|
122
|
+
setBrowserStatus(status);
|
|
123
|
+
},
|
|
124
|
+
onDownloadProgress: () => {
|
|
125
|
+
// Progress is tracked via onStatusChange("downloading-model")
|
|
126
|
+
},
|
|
127
|
+
onError: (err: Error) => {
|
|
128
|
+
browserInferenceRejectRef.current = null;
|
|
129
|
+
reject(err);
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
} finally {
|
|
134
|
+
browserInferenceActiveRef.current = false;
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
[options],
|
|
138
|
+
);
|
|
139
|
+
|
|
60
140
|
/**
|
|
61
141
|
* Send a message and stream assistant updates.
|
|
62
142
|
*/
|
|
@@ -72,10 +152,18 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
72
152
|
setIsLoading(true);
|
|
73
153
|
setError(null);
|
|
74
154
|
|
|
75
|
-
const abortController = new AbortController();
|
|
76
|
-
abortControllerRef.current = abortController;
|
|
77
|
-
|
|
78
155
|
try {
|
|
156
|
+
const allMessages = [...messages, userMessage];
|
|
157
|
+
|
|
158
|
+
// If already in browser mode, skip fetch entirely
|
|
159
|
+
if (inferenceMode === "browser") {
|
|
160
|
+
await doBrowserInference(allMessages);
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const abortController = new AbortController();
|
|
165
|
+
abortControllerRef.current = abortController;
|
|
166
|
+
|
|
79
167
|
const response = await dntShim.fetch(options.api, {
|
|
80
168
|
method: "POST",
|
|
81
169
|
headers: {
|
|
@@ -84,13 +172,31 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
84
172
|
},
|
|
85
173
|
credentials: options.credentials,
|
|
86
174
|
body: JSON.stringify({
|
|
87
|
-
messages:
|
|
175
|
+
messages: allMessages,
|
|
88
176
|
...(model ? { model } : {}),
|
|
89
177
|
...options.body,
|
|
90
178
|
}),
|
|
91
179
|
signal: abortController.signal,
|
|
92
180
|
});
|
|
93
181
|
|
|
182
|
+
// Handle 503 — server can't provide AI, fall back to browser
|
|
183
|
+
if (response.status === 503 && (options.browserFallback ?? true)) {
|
|
184
|
+
try {
|
|
185
|
+
const body = await response.json();
|
|
186
|
+
if (body.code === "NO_AI_AVAILABLE") {
|
|
187
|
+
if (body.systemPrompt) {
|
|
188
|
+
systemPromptRef.current = body.systemPrompt;
|
|
189
|
+
}
|
|
190
|
+
setInferenceMode("browser");
|
|
191
|
+
setBrowserStatus("idle");
|
|
192
|
+
await doBrowserInference(allMessages);
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
} catch {
|
|
196
|
+
// If parsing fails, fall through to normal error handling
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
94
200
|
if (!response.ok) {
|
|
95
201
|
throw toError(
|
|
96
202
|
createError({
|
|
@@ -116,7 +222,20 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
116
222
|
});
|
|
117
223
|
options.onFinish?.(assistantMessage);
|
|
118
224
|
},
|
|
119
|
-
onData:
|
|
225
|
+
onData: (eventData) => {
|
|
226
|
+
setData(eventData);
|
|
227
|
+
// Detect inference mode from server metadata
|
|
228
|
+
if (
|
|
229
|
+
eventData &&
|
|
230
|
+
typeof eventData === "object" &&
|
|
231
|
+
"inferenceMode" in eventData
|
|
232
|
+
) {
|
|
233
|
+
const mode = (eventData as { inferenceMode: string }).inferenceMode;
|
|
234
|
+
if (mode === "server-local" || mode === "cloud") {
|
|
235
|
+
setInferenceMode(mode);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
},
|
|
120
239
|
onUpdate: (parts, messageId) => {
|
|
121
240
|
const id = messageId ?? streamingMessageId;
|
|
122
241
|
|
|
@@ -153,7 +272,7 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
153
272
|
abortControllerRef.current = null;
|
|
154
273
|
}
|
|
155
274
|
},
|
|
156
|
-
[messages, model, options],
|
|
275
|
+
[messages, model, options, inferenceMode, doBrowserInference],
|
|
157
276
|
);
|
|
158
277
|
|
|
159
278
|
/**
|
|
@@ -176,9 +295,28 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
176
295
|
/**
|
|
177
296
|
* Stop generation
|
|
178
297
|
*/
|
|
179
|
-
const stop = useCallback(() => {
|
|
298
|
+
const stop = useCallback(async () => {
|
|
180
299
|
abortControllerRef.current?.abort();
|
|
181
300
|
abortControllerRef.current = null;
|
|
301
|
+
|
|
302
|
+
// Also stop browser inference Worker if active
|
|
303
|
+
if (browserInferenceActiveRef.current) {
|
|
304
|
+
// Settle the pending doBrowserInference promise before terminating the Worker
|
|
305
|
+
browserInferenceRejectRef.current?.(new Error("Generation stopped by user"));
|
|
306
|
+
browserInferenceRejectRef.current = null;
|
|
307
|
+
|
|
308
|
+
try {
|
|
309
|
+
const { stopBrowserInference } = await import(
|
|
310
|
+
"./browser-inference/browser-engine.js"
|
|
311
|
+
);
|
|
312
|
+
stopBrowserInference();
|
|
313
|
+
} catch {
|
|
314
|
+
// Worker module may already be terminated or unavailable
|
|
315
|
+
}
|
|
316
|
+
browserInferenceActiveRef.current = false;
|
|
317
|
+
setBrowserStatus("ready");
|
|
318
|
+
}
|
|
319
|
+
|
|
182
320
|
setIsLoading(false);
|
|
183
321
|
}, []);
|
|
184
322
|
|
|
@@ -215,6 +353,8 @@ export function useChat(options: UseChatOptions): UseChatResult {
|
|
|
215
353
|
isLoading,
|
|
216
354
|
error,
|
|
217
355
|
model,
|
|
356
|
+
inferenceMode,
|
|
357
|
+
browserStatus,
|
|
218
358
|
setInput,
|
|
219
359
|
setModel,
|
|
220
360
|
sendMessage,
|
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
type MessagePart,
|
|
22
22
|
type ToolCall,
|
|
23
23
|
} from "../types.js";
|
|
24
|
-
import { resolveModel } from "../../provider/index.js";
|
|
24
|
+
import { ensureModelReady, resolveModel } from "../../provider/index.js";
|
|
25
25
|
import { executeTool } from "../../tool/index.js";
|
|
26
26
|
import { generateId } from "../../utils/id.js";
|
|
27
27
|
import { detectPlatform, getPlatformCapabilities } from "../../platform/core-platform.js";
|
|
@@ -36,7 +36,7 @@ import { convertToModelMessages } from "./model-message-converter.js";
|
|
|
36
36
|
import { convertToolsToAISDK } from "./model-tool-converter.js";
|
|
37
37
|
import { createStreamState, processStream } from "./ai-stream-handler.js";
|
|
38
38
|
import { MiddlewareChain } from "../middleware/chain.js";
|
|
39
|
-
import { generateText, streamText } from "ai";
|
|
39
|
+
import { generateText, type LanguageModel, streamText } from "ai";
|
|
40
40
|
|
|
41
41
|
// Re-export from submodules
|
|
42
42
|
export { generateMessageId, sendSSE } from "./sse-utils.js";
|
|
@@ -59,6 +59,27 @@ import { accumulateUsage, getMaxSteps, normalizeInput } from "./input-utils.js";
|
|
|
59
59
|
|
|
60
60
|
const logger = serverLogger.component("agent");
|
|
61
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Detect whether the resolved model is local inference.
|
|
64
|
+
* Handles both explicit "local/*" requests and cloud->local auto-fallback.
|
|
65
|
+
*/
|
|
66
|
+
function isLocalInferenceModel(model: LanguageModel, requestedModel: string): boolean {
|
|
67
|
+
if (requestedModel.startsWith("local/")) return true;
|
|
68
|
+
|
|
69
|
+
// LanguageModel is a union that includes string, so we need to narrow first
|
|
70
|
+
if (typeof model === "string") return model.startsWith("local/");
|
|
71
|
+
|
|
72
|
+
if ("provider" in model && model.provider === "local") return true;
|
|
73
|
+
|
|
74
|
+
if (
|
|
75
|
+
"modelId" in model && typeof model.modelId === "string" && model.modelId.startsWith("local/")
|
|
76
|
+
) {
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
|
|
62
83
|
export class AgentRuntime {
|
|
63
84
|
private id: string;
|
|
64
85
|
private config: AgentConfig;
|
|
@@ -125,6 +146,7 @@ export class AgentRuntime {
|
|
|
125
146
|
modelOverride?: string,
|
|
126
147
|
): Promise<ReadableStream<Uint8Array>> {
|
|
127
148
|
const modelString = modelOverride || this.config.model;
|
|
149
|
+
const requestedModel = modelString || this.config.model;
|
|
128
150
|
|
|
129
151
|
for (const msg of messages) await this.memory.add(msg);
|
|
130
152
|
|
|
@@ -135,6 +157,18 @@ export class AgentRuntime {
|
|
|
135
157
|
const toolContext = { agentId: this.id, ...context };
|
|
136
158
|
const textPartId = generateId("text");
|
|
137
159
|
|
|
160
|
+
// Resolve model BEFORE creating the ReadableStream — if this throws
|
|
161
|
+
// (e.g., no_ai_available), the error propagates to the caller who can
|
|
162
|
+
// return a proper error response (503) instead of a 200 with an error event.
|
|
163
|
+
const languageModel = resolveModel(requestedModel);
|
|
164
|
+
|
|
165
|
+
// Eagerly verify the model runtime is available. For local models this
|
|
166
|
+
// checks that @huggingface/transformers can be imported. Must happen
|
|
167
|
+
// BEFORE creating the ReadableStream so no_ai_available errors propagate
|
|
168
|
+
// to the caller (createChatHandler) who returns a 503 with browser fallback
|
|
169
|
+
// info, instead of being swallowed as an in-band SSE error in a 200 response.
|
|
170
|
+
await ensureModelReady(languageModel);
|
|
171
|
+
|
|
138
172
|
return new ReadableStream<Uint8Array>({
|
|
139
173
|
start: async (controller) => {
|
|
140
174
|
try {
|
|
@@ -142,6 +176,14 @@ export class AgentRuntime {
|
|
|
142
176
|
|
|
143
177
|
const messageId = generateMessageId();
|
|
144
178
|
sendSSE(controller, encoder, { type: "message-start", messageId });
|
|
179
|
+
sendSSE(controller, encoder, {
|
|
180
|
+
type: "data",
|
|
181
|
+
data: {
|
|
182
|
+
inferenceMode: isLocalInferenceModel(languageModel, requestedModel)
|
|
183
|
+
? "server-local"
|
|
184
|
+
: "cloud",
|
|
185
|
+
},
|
|
186
|
+
});
|
|
145
187
|
sendSSE(controller, encoder, { type: "text-start", id: textPartId });
|
|
146
188
|
|
|
147
189
|
await this.executeAgentLoopStreaming(
|
|
@@ -153,6 +195,7 @@ export class AgentRuntime {
|
|
|
153
195
|
textPartId,
|
|
154
196
|
toolContext,
|
|
155
197
|
modelString,
|
|
198
|
+
languageModel,
|
|
156
199
|
);
|
|
157
200
|
|
|
158
201
|
sendSSE(controller, encoder, { type: "text-end", id: textPartId });
|
|
@@ -181,17 +224,28 @@ export class AgentRuntime {
|
|
|
181
224
|
return withSpan("agent.execution_loop", async (loopSpan) => {
|
|
182
225
|
const { maxAgentSteps } = getPlatformCapabilities();
|
|
183
226
|
const maxSteps = this.computeMaxSteps(maxAgentSteps);
|
|
184
|
-
const
|
|
227
|
+
const requestedModel = modelString || this.config.model;
|
|
228
|
+
const languageModel = resolveModel(requestedModel);
|
|
185
229
|
|
|
186
230
|
const toolCalls: ToolCall[] = [];
|
|
187
231
|
const currentMessages = [...messages];
|
|
188
232
|
const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
189
233
|
|
|
234
|
+
// Local models can't reliably do function calling — skip tools gracefully.
|
|
235
|
+
const isLocal = isLocalInferenceModel(languageModel, requestedModel);
|
|
236
|
+
if (isLocal && this.config.tools) {
|
|
237
|
+
logger.warn(
|
|
238
|
+
`Agent "${this.id}" has tools configured but is using local model "${requestedModel}". ` +
|
|
239
|
+
"Local models don't support tool calling — tools will be skipped. " +
|
|
240
|
+
"Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY for full tool support.",
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
190
244
|
for (let step = 0; step < maxSteps; step++) {
|
|
191
245
|
this.status = "thinking";
|
|
192
246
|
addSpanEvent(loopSpan, "step_start", { step });
|
|
193
247
|
|
|
194
|
-
const tools = getAvailableTools(this.config.tools);
|
|
248
|
+
const tools = isLocal ? [] : getAvailableTools(this.config.tools);
|
|
195
249
|
|
|
196
250
|
const response = await withSpan("agent.generate_text", async (span) => {
|
|
197
251
|
setSpanAttributes(span, {
|
|
@@ -350,19 +404,31 @@ export class AgentRuntime {
|
|
|
350
404
|
textPartId?: string,
|
|
351
405
|
toolContext?: Record<string, unknown>,
|
|
352
406
|
modelString?: string,
|
|
407
|
+
resolvedModel?: LanguageModel,
|
|
353
408
|
): Promise<AgentResponse> {
|
|
354
409
|
const { maxAgentSteps } = getPlatformCapabilities();
|
|
355
410
|
const maxSteps = this.computeMaxSteps(maxAgentSteps);
|
|
356
|
-
const
|
|
411
|
+
const requestedModel = modelString || this.config.model;
|
|
412
|
+
const languageModel = resolvedModel ?? resolveModel(requestedModel);
|
|
357
413
|
|
|
358
414
|
const toolCalls: ToolCall[] = [];
|
|
359
415
|
const currentMessages = [...messages];
|
|
360
416
|
const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
361
417
|
|
|
418
|
+
// Local models can't reliably do function calling — skip tools gracefully.
|
|
419
|
+
const isLocalStreaming = isLocalInferenceModel(languageModel, requestedModel);
|
|
420
|
+
if (isLocalStreaming && this.config.tools) {
|
|
421
|
+
logger.warn(
|
|
422
|
+
`Agent "${this.id}" has tools configured but is using local model "${requestedModel}". ` +
|
|
423
|
+
"Local models don't support tool calling — tools will be skipped. " +
|
|
424
|
+
"Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY for full tool support.",
|
|
425
|
+
);
|
|
426
|
+
}
|
|
427
|
+
|
|
362
428
|
for (let step = 0; step < maxSteps; step++) {
|
|
363
429
|
sendSSE(controller, encoder, { type: "step-start" });
|
|
364
430
|
|
|
365
|
-
const tools = getAvailableTools(this.config.tools);
|
|
431
|
+
const tools = isLocalStreaming ? [] : getAvailableTools(this.config.tools);
|
|
366
432
|
const result = streamText({
|
|
367
433
|
model: languageModel,
|
|
368
434
|
system: systemPrompt,
|