veryfront 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/esm/cli/app/data/slug-words.d.ts.map +1 -1
  2. package/esm/cli/app/data/slug-words.js +225 -90
  3. package/esm/cli/app/operations/project-creation.js +4 -3
  4. package/esm/cli/app/shell.js +1 -1
  5. package/esm/cli/app/utils.d.ts +5 -4
  6. package/esm/cli/app/utils.d.ts.map +1 -1
  7. package/esm/cli/app/utils.js +0 -23
  8. package/esm/cli/app/views/dashboard.d.ts +1 -1
  9. package/esm/cli/app/views/dashboard.d.ts.map +1 -1
  10. package/esm/cli/app/views/dashboard.js +22 -4
  11. package/esm/cli/auth/callback-server.d.ts.map +1 -1
  12. package/esm/cli/auth/callback-server.js +3 -2
  13. package/esm/cli/commands/dev/handler.d.ts.map +1 -1
  14. package/esm/cli/commands/dev/handler.js +2 -0
  15. package/esm/cli/commands/init/init-command.d.ts.map +1 -1
  16. package/esm/cli/commands/init/init-command.js +20 -3
  17. package/esm/cli/commands/init/interactive-wizard.d.ts +3 -2
  18. package/esm/cli/commands/init/interactive-wizard.d.ts.map +1 -1
  19. package/esm/cli/commands/init/interactive-wizard.js +55 -27
  20. package/esm/cli/mcp/remote-file-tools.d.ts +0 -6
  21. package/esm/cli/mcp/remote-file-tools.d.ts.map +1 -1
  22. package/esm/cli/mcp/remote-file-tools.js +37 -15
  23. package/esm/cli/shared/reserve-slug.d.ts.map +1 -1
  24. package/esm/cli/shared/reserve-slug.js +8 -3
  25. package/esm/cli/utils/env-prompt.d.ts.map +1 -1
  26. package/esm/cli/utils/env-prompt.js +3 -0
  27. package/esm/deno.d.ts +2 -1
  28. package/esm/deno.js +8 -4
  29. package/esm/src/agent/chat-handler.d.ts +4 -3
  30. package/esm/src/agent/chat-handler.d.ts.map +1 -1
  31. package/esm/src/agent/chat-handler.js +55 -4
  32. package/esm/src/agent/react/index.d.ts +1 -1
  33. package/esm/src/agent/react/index.d.ts.map +1 -1
  34. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts +18 -0
  35. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts.map +1 -0
  36. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.js +54 -0
  37. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts +43 -0
  38. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts.map +1 -0
  39. package/esm/src/agent/react/use-chat/browser-inference/types.js +4 -0
  40. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts +23 -0
  41. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts.map +1 -0
  42. package/esm/src/agent/react/use-chat/browser-inference/worker-client.js +67 -0
  43. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts +8 -0
  44. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts.map +1 -0
  45. package/esm/src/agent/react/use-chat/browser-inference/worker-script.js +97 -0
  46. package/esm/src/agent/react/use-chat/index.d.ts +1 -1
  47. package/esm/src/agent/react/use-chat/index.d.ts.map +1 -1
  48. package/esm/src/agent/react/use-chat/types.d.ts +12 -0
  49. package/esm/src/agent/react/use-chat/types.d.ts.map +1 -1
  50. package/esm/src/agent/react/use-chat/use-chat.d.ts.map +1 -1
  51. package/esm/src/agent/react/use-chat/use-chat.js +120 -6
  52. package/esm/src/agent/runtime/index.d.ts.map +1 -1
  53. package/esm/src/agent/runtime/index.js +59 -7
  54. package/esm/src/build/production-build/templates.d.ts +2 -2
  55. package/esm/src/build/production-build/templates.d.ts.map +1 -1
  56. package/esm/src/build/production-build/templates.js +2 -68
  57. package/esm/src/chat/index.d.ts +1 -1
  58. package/esm/src/chat/index.d.ts.map +1 -1
  59. package/esm/src/errors/veryfront-error.d.ts +3 -0
  60. package/esm/src/errors/veryfront-error.d.ts.map +1 -1
  61. package/esm/src/platform/adapters/runtime/deno/adapter.d.ts.map +1 -1
  62. package/esm/src/platform/adapters/runtime/deno/adapter.js +5 -1
  63. package/esm/src/platform/compat/http/deno-server.d.ts.map +1 -1
  64. package/esm/src/platform/compat/http/deno-server.js +3 -2
  65. package/esm/src/provider/index.d.ts +1 -1
  66. package/esm/src/provider/index.d.ts.map +1 -1
  67. package/esm/src/provider/index.js +1 -1
  68. package/esm/src/provider/local/ai-sdk-adapter.d.ts +19 -0
  69. package/esm/src/provider/local/ai-sdk-adapter.d.ts.map +1 -0
  70. package/esm/src/provider/local/ai-sdk-adapter.js +164 -0
  71. package/esm/src/provider/local/env.d.ts +10 -0
  72. package/esm/src/provider/local/env.d.ts.map +1 -0
  73. package/esm/src/provider/local/env.js +23 -0
  74. package/esm/src/provider/local/local-engine.d.ts +61 -0
  75. package/esm/src/provider/local/local-engine.d.ts.map +1 -0
  76. package/esm/src/provider/local/local-engine.js +211 -0
  77. package/esm/src/provider/local/model-catalog.d.ts +30 -0
  78. package/esm/src/provider/local/model-catalog.d.ts.map +1 -0
  79. package/esm/src/provider/local/model-catalog.js +58 -0
  80. package/esm/src/provider/model-registry.d.ts +14 -0
  81. package/esm/src/provider/model-registry.d.ts.map +1 -1
  82. package/esm/src/provider/model-registry.js +58 -2
  83. package/esm/src/proxy/main.js +34 -6
  84. package/esm/src/proxy/server-resolver.d.ts +23 -0
  85. package/esm/src/proxy/server-resolver.d.ts.map +1 -0
  86. package/esm/src/proxy/server-resolver.js +124 -0
  87. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts +8 -0
  88. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts.map +1 -0
  89. package/esm/src/react/components/ai/chat/components/inference-badge.js +36 -0
  90. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts +7 -0
  91. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts.map +1 -0
  92. package/esm/src/react/components/ai/chat/components/upgrade-cta.js +33 -0
  93. package/esm/src/react/components/ai/chat/index.d.ts +7 -1
  94. package/esm/src/react/components/ai/chat/index.d.ts.map +1 -1
  95. package/esm/src/react/components/ai/chat/index.js +16 -4
  96. package/package.json +5 -1
  97. package/src/cli/app/data/slug-words.ts +225 -90
  98. package/src/cli/app/operations/project-creation.ts +3 -3
  99. package/src/cli/app/shell.ts +1 -1
  100. package/src/cli/app/utils.ts +0 -30
  101. package/src/cli/app/views/dashboard.ts +27 -4
  102. package/src/cli/auth/callback-server.ts +3 -2
  103. package/src/cli/commands/dev/handler.ts +2 -0
  104. package/src/cli/commands/init/init-command.ts +30 -3
  105. package/src/cli/commands/init/interactive-wizard.ts +62 -34
  106. package/src/cli/mcp/remote-file-tools.ts +50 -15
  107. package/src/cli/shared/reserve-slug.ts +9 -2
  108. package/src/cli/utils/env-prompt.ts +3 -0
  109. package/src/deno.js +8 -4
  110. package/src/src/agent/chat-handler.ts +57 -4
  111. package/src/src/agent/react/index.ts +2 -0
  112. package/src/src/agent/react/use-chat/browser-inference/browser-engine.ts +81 -0
  113. package/src/src/agent/react/use-chat/browser-inference/types.ts +52 -0
  114. package/src/src/agent/react/use-chat/browser-inference/worker-client.ts +89 -0
  115. package/src/src/agent/react/use-chat/browser-inference/worker-script.ts +98 -0
  116. package/src/src/agent/react/use-chat/index.ts +2 -0
  117. package/src/src/agent/react/use-chat/types.ts +20 -0
  118. package/src/src/agent/react/use-chat/use-chat.ts +148 -8
  119. package/src/src/agent/runtime/index.ts +72 -6
  120. package/src/src/build/production-build/templates.ts +2 -68
  121. package/src/src/chat/index.ts +2 -0
  122. package/src/src/errors/veryfront-error.ts +2 -1
  123. package/src/src/platform/adapters/runtime/deno/adapter.ts +5 -1
  124. package/src/src/platform/compat/http/deno-server.ts +3 -1
  125. package/src/src/provider/index.ts +1 -0
  126. package/src/src/provider/local/ai-sdk-adapter.ts +207 -0
  127. package/src/src/provider/local/env.ts +26 -0
  128. package/src/src/provider/local/local-engine.ts +288 -0
  129. package/src/src/provider/local/model-catalog.ts +73 -0
  130. package/src/src/provider/model-registry.ts +66 -2
  131. package/src/src/proxy/main.ts +41 -6
  132. package/src/src/proxy/server-resolver.ts +151 -0
  133. package/src/src/react/components/ai/chat/components/inference-badge.tsx +48 -0
  134. package/src/src/react/components/ai/chat/components/upgrade-cta.tsx +56 -0
  135. package/src/src/react/components/ai/chat/index.tsx +43 -6
@@ -0,0 +1,288 @@
1
+ /**
2
+ * Local Model Engine
3
+ *
4
+ * Singleton wrapper around `@huggingface/transformers` for server-side
5
+ * local LLM inference. Provides lazy model loading and streaming text
6
+ * generation via async generators.
7
+ *
8
+ * Uses ONNX Runtime for inference with q4 quantization — NOT q4f16
9
+ * due to a known ONNX bug with f16 LayerNorm on CPU.
10
+ *
11
+ * @module provider/local
12
+ */
13
+
14
+ import { serverLogger } from "../../utils/index.js";
15
+ import { createError, toError } from "../../errors/veryfront-error.js";
16
+ import { DEFAULT_LOCAL_MODEL, type ModelInfo, resolveLocalModel } from "./model-catalog.js";
17
+ import { isLocalAIDisabled } from "./env.js";
18
+
19
+ const logger = serverLogger.component("local-llm");
20
+
21
+ /** Chat message format expected by Transformers.js */
22
+ export interface ChatMessage {
23
+ role: "system" | "user" | "assistant";
24
+ content: string;
25
+ }
26
+
27
+ /** Options for text generation */
28
+ export interface GenerateOptions {
29
+ maxNewTokens?: number;
30
+ temperature?: number;
31
+ topP?: number;
32
+ topK?: number;
33
+ stopSequences?: string[];
34
+ }
35
+
36
+ // deno-lint-ignore no-explicit-any
37
+ type TransformersModule = any;
38
+ // deno-lint-ignore no-explicit-any
39
+ type Pipeline = any;
40
+
41
+ /** Cached pipeline instances keyed by HuggingFace model ID */
42
+ const pipelineCache = new Map<string, Pipeline>();
43
+
44
+ /** Whether a model is currently being loaded (prevents concurrent loads) */
45
+ const loadingLocks = new Map<string, Promise<Pipeline>>();
46
+
47
+ /** Lazily loaded @huggingface/transformers module */
48
+ let transformersModule: TransformersModule | null = null;
49
+
50
+ /**
51
+ * Lazily import @huggingface/transformers.
52
+ * Only loads when actually needed, keeping startup fast when API keys are present.
53
+ */
54
+ async function getTransformers(): Promise<TransformersModule> {
55
+ if (transformersModule) return transformersModule;
56
+
57
+ if (isLocalAIDisabled()) {
58
+ throw toError(
59
+ createError({
60
+ type: "no_ai_available",
61
+ message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
62
+ }),
63
+ );
64
+ }
65
+
66
+ logger.info("Loading @huggingface/transformers...");
67
+
68
+ try {
69
+ transformersModule = await import("@huggingface/transformers");
70
+ } catch {
71
+ throw toError(
72
+ createError({
73
+ type: "no_ai_available",
74
+ message:
75
+ "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
76
+ "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
77
+ "in your .env file to use a cloud provider instead.",
78
+ }),
79
+ );
80
+ }
81
+
82
+ // Configure cache directory for model files
83
+ transformersModule.env.cacheDir = "./.cache/models";
84
+ // Disable browser-specific features in Node/Deno
85
+ transformersModule.env.useBrowserCache = false;
86
+
87
+ return transformersModule;
88
+ }
89
+
90
+ /**
91
+ * Load a text-generation pipeline for the given model.
92
+ * Returns a cached pipeline if already loaded.
93
+ */
94
+ async function loadPipeline(modelInfo: ModelInfo): Promise<Pipeline> {
95
+ const cacheKey = modelInfo.hfId;
96
+
97
+ // Return cached pipeline
98
+ const cached = pipelineCache.get(cacheKey);
99
+ if (cached) return cached;
100
+
101
+ // Wait for existing load if in progress
102
+ const existingLock = loadingLocks.get(cacheKey);
103
+ if (existingLock) return existingLock;
104
+
105
+ // Start loading
106
+ const loadPromise = (async () => {
107
+ const transformers = await getTransformers();
108
+
109
+ logger.info(
110
+ `Loading local model: ${modelInfo.hfId} (${modelInfo.dtype}, ~${modelInfo.sizeMB}MB)...`,
111
+ );
112
+
113
+ const pipe = await transformers.pipeline(
114
+ "text-generation",
115
+ modelInfo.hfId,
116
+ {
117
+ dtype: modelInfo.dtype,
118
+ device: "cpu",
119
+ },
120
+ );
121
+
122
+ logger.info(`Model loaded: ${modelInfo.hfId}`);
123
+ pipelineCache.set(cacheKey, pipe);
124
+ loadingLocks.delete(cacheKey);
125
+ return pipe;
126
+ })();
127
+
128
+ loadingLocks.set(cacheKey, loadPromise);
129
+
130
+ try {
131
+ return await loadPromise;
132
+ } catch (error) {
133
+ loadingLocks.delete(cacheKey);
134
+
135
+ // Convert ONNX / native-addon errors to no_ai_available so they propagate
136
+ // correctly through the chat handler (503) instead of being swallowed as
137
+ // in-band SSE errors inside a 200 response stream.
138
+ const msg = error instanceof Error ? error.message : String(error);
139
+ if (
140
+ msg.includes("onnx") || msg.includes("ONNX") ||
141
+ msg.includes("dlopen") || msg.includes("dynamic linking") ||
142
+ msg.includes("native module") || msg.includes("SharedArrayBuffer")
143
+ ) {
144
+ transformersModule = null;
145
+ throw toError(
146
+ createError({
147
+ type: "no_ai_available",
148
+ message:
149
+ "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
150
+ "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
151
+ "in your .env file to use a cloud provider instead.",
152
+ }),
153
+ );
154
+ }
155
+ throw error;
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Eagerly verify that the local AI runtime (@huggingface/transformers + ONNX)
161
+ * is available by loading the default model pipeline.
162
+ *
163
+ * Call this *before* creating the HTTP response stream so that failures surface
164
+ * as a thrown error (→ 503) rather than being swallowed inside a ReadableStream
165
+ * (→ 200 with in-band SSE error).
166
+ *
167
+ * In compiled binaries, `import("@huggingface/transformers")` itself fails
168
+ * because `onnxruntime-node` eagerly `require()`s a native `.node` addon at
169
+ * import time and the addon isn't embedded in the binary. In dev mode (Deno)
170
+ * the native addon exists on disk so the import succeeds, but `pipeline()` can
171
+ * still fail if the ONNX model files are missing. Either way this function
172
+ * surfaces the error before the response stream is created. The pipeline is
173
+ * cached after the first successful call, so subsequent checks are instant.
174
+ */
175
+ export async function verifyLocalRuntime(modelId?: string): Promise<void> {
176
+ const modelInfo = resolveLocalModel(modelId || DEFAULT_LOCAL_MODEL);
177
+ await loadPipeline(modelInfo);
178
+ }
179
+
180
+ /**
181
+ * Generate text in a streaming fashion using an async generator.
182
+ *
183
+ * Yields individual tokens as they are generated by the model.
184
+ */
185
+ export async function* generateStream(
186
+ modelId: string,
187
+ messages: ChatMessage[],
188
+ options: GenerateOptions = {},
189
+ ): AsyncGenerator<string, void, undefined> {
190
+ const modelInfo = resolveLocalModel(modelId);
191
+ const pipe = await loadPipeline(modelInfo);
192
+ const transformers = await getTransformers();
193
+
194
+ const {
195
+ maxNewTokens = 512,
196
+ temperature = 0.7,
197
+ topP,
198
+ topK,
199
+ } = options;
200
+
201
+ // Use a queue to bridge TextStreamer callbacks → async generator
202
+ const tokenQueue: string[] = [];
203
+ let resolveWaiting: (() => void) | null = null;
204
+ let done = false;
205
+
206
+ const streamer = new transformers.TextStreamer(pipe.tokenizer, {
207
+ skip_prompt: true,
208
+ skip_special_tokens: true,
209
+ callback_function: (text: string) => {
210
+ tokenQueue.push(text);
211
+ if (resolveWaiting) {
212
+ resolveWaiting();
213
+ resolveWaiting = null;
214
+ }
215
+ },
216
+ });
217
+
218
+ // Start generation in the background
219
+ const generatePromise = pipe(messages, {
220
+ max_new_tokens: maxNewTokens,
221
+ temperature,
222
+ top_p: topP,
223
+ top_k: topK,
224
+ do_sample: temperature > 0,
225
+ streamer,
226
+ }).then(() => {
227
+ done = true;
228
+ if (resolveWaiting) {
229
+ resolveWaiting();
230
+ resolveWaiting = null;
231
+ }
232
+ }).catch((error: Error) => {
233
+ done = true;
234
+ if (resolveWaiting) {
235
+ resolveWaiting();
236
+ resolveWaiting = null;
237
+ }
238
+ throw error;
239
+ });
240
+
241
+ // Yield tokens as they arrive
242
+ while (true) {
243
+ while (tokenQueue.length > 0) {
244
+ yield tokenQueue.shift()!;
245
+ }
246
+
247
+ if (done) break;
248
+
249
+ // Wait for more tokens
250
+ await new Promise<void>((resolve) => {
251
+ resolveWaiting = resolve;
252
+ });
253
+ }
254
+
255
+ // Ensure generation has completed
256
+ await generatePromise;
257
+ }
258
+
259
+ /**
260
+ * Generate text without streaming (full completion).
261
+ */
262
+ export async function generate(
263
+ modelId: string,
264
+ messages: ChatMessage[],
265
+ options: GenerateOptions = {},
266
+ ): Promise<string> {
267
+ const chunks: string[] = [];
268
+ for await (const token of generateStream(modelId, messages, options)) {
269
+ chunks.push(token);
270
+ }
271
+ return chunks.join("");
272
+ }
273
+
274
+ /**
275
+ * Preload a model into memory. Useful for warming up on server start.
276
+ */
277
+ export async function preloadModel(modelId: string): Promise<void> {
278
+ const modelInfo = resolveLocalModel(modelId);
279
+ await loadPipeline(modelInfo);
280
+ }
281
+
282
+ /**
283
+ * Check if a model is currently loaded in memory.
284
+ */
285
+ export function isModelLoaded(modelId: string): boolean {
286
+ const modelInfo = resolveLocalModel(modelId);
287
+ return pipelineCache.has(modelInfo.hfId);
288
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Local Model Catalog
3
+ *
4
+ * Maps friendly model IDs to HuggingFace model repository IDs.
5
+ * Used by the local inference engine to resolve model names.
6
+ *
7
+ * @module provider/local
8
+ */
9
+
10
+ export interface ModelInfo {
11
+ /** HuggingFace model repository ID */
12
+ hfId: string;
13
+ /** Quantization dtype for ONNX Runtime */
14
+ dtype: "q4" | "q8" | "fp32";
15
+ /** Approximate download size in MB */
16
+ sizeMB: number;
17
+ /** Human-readable description */
18
+ description: string;
19
+ }
20
+
21
+ /**
22
+ * Catalog of supported local models.
23
+ *
24
+ * **Important:** Only `q4` quantization is used — `q4f16` has a known
25
+ * ONNX Runtime bug with LayerNorm on CPU that produces NaN outputs.
26
+ */
27
+ const MODEL_CATALOG: Record<string, ModelInfo> = {
28
+ "smollm2-135m": {
29
+ hfId: "HuggingFaceTB/SmolLM2-135M-Instruct",
30
+ dtype: "q4",
31
+ sizeMB: 100,
32
+ description: "SmolLM2 135M — fast, lightweight chat model",
33
+ },
34
+ "smollm2-360m": {
35
+ hfId: "HuggingFaceTB/SmolLM2-360M-Instruct",
36
+ dtype: "q4",
37
+ sizeMB: 250,
38
+ description: "SmolLM2 360M — better quality, still fast",
39
+ },
40
+ "smollm2-1.7b": {
41
+ hfId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
42
+ dtype: "q4",
43
+ sizeMB: 1000,
44
+ description: "SmolLM2 1.7B — highest quality local model",
45
+ },
46
+ };
47
+
48
+ /** Default model used when no specific model ID is provided */
49
+ export const DEFAULT_LOCAL_MODEL = "smollm2-135m";
50
+
51
+ /**
52
+ * Resolve a friendly model ID to its HuggingFace model info.
53
+ * Falls back to treating the ID as a raw HuggingFace repository ID.
54
+ */
55
+ export function resolveLocalModel(modelId: string): ModelInfo {
56
+ const catalogEntry = MODEL_CATALOG[modelId];
57
+ if (catalogEntry) return catalogEntry;
58
+
59
+ // Treat as raw HuggingFace model ID (e.g. "HuggingFaceTB/SmolLM2-135M-Instruct")
60
+ return {
61
+ hfId: modelId,
62
+ dtype: "q4",
63
+ sizeMB: 0,
64
+ description: `Custom model: ${modelId}`,
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Get all available local model IDs.
70
+ */
71
+ export function getLocalModelIds(): string[] {
72
+ return Object.keys(MODEL_CATALOG);
73
+ }
@@ -14,7 +14,7 @@
14
14
  */
15
15
 
16
16
  import type { LanguageModel } from "ai";
17
- import { createError, toError } from "../errors/veryfront-error.js";
17
+ import { createError, fromError, toError } from "../errors/veryfront-error.js";
18
18
  import { createOpenAI } from "@ai-sdk/openai";
19
19
  import { createAnthropic } from "@ai-sdk/anthropic";
20
20
  import { createGoogleGenerativeAI } from "@ai-sdk/google";
@@ -24,6 +24,13 @@ import {
24
24
  getOpenAIEnvConfig,
25
25
  } from "../config/env.js";
26
26
  import { ProjectScopedRegistryManager } from "../ai/registry-manager.js";
27
+ import { serverLogger } from "../utils/index.js";
28
+ import { DEFAULT_LOCAL_MODEL } from "./local/model-catalog.js";
29
+ import { createLocalModel } from "./local/ai-sdk-adapter.js";
30
+ import { isLocalAIDisabled } from "./local/env.js";
31
+ import { verifyLocalRuntime } from "./local/local-engine.js";
32
+
33
+ const localLogger = serverLogger.component("local-llm");
27
34
 
28
35
  export type ModelProviderFactory = (modelId: string) => LanguageModel;
29
36
 
@@ -118,6 +125,16 @@ function autoInitializeFromEnv(): void {
118
125
  return createGoogleGenerativeAI({ apiKey: config.apiKey })(id);
119
126
  });
120
127
  }
128
+
129
+ // Register the local provider (always available, no API key needed).
130
+ // createLocalModel is a lightweight synchronous constructor — the actual
131
+ // @huggingface/transformers import and model loading happen lazily on
132
+ // the first doGenerate/doStream call, so this doesn't add startup overhead.
133
+ if (!manager.has("local")) {
134
+ manager.registerShared("local", (id) => {
135
+ return createLocalModel(id);
136
+ });
137
+ }
121
138
  }
122
139
 
123
140
  /**
@@ -168,7 +185,31 @@ export function resolveModel(modelString: string): LanguageModel {
168
185
  );
169
186
  }
170
187
 
171
- return factory(modelId);
188
+ try {
189
+ return factory(modelId);
190
+ } catch (error) {
191
+ // Auto-fallback: when a cloud provider fails due to missing API key,
192
+ // transparently switch to the local model so chat works out of the box.
193
+ const errorData = fromError(error);
194
+ if (errorData?.type === "config" && providerName !== "local" && manager.has("local")) {
195
+ // Check if local AI is explicitly disabled (e.g., for testing)
196
+ if (isLocalAIDisabled()) {
197
+ throw toError(
198
+ createError({
199
+ type: "no_ai_available",
200
+ message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
201
+ }),
202
+ );
203
+ }
204
+
205
+ localLogger.info(
206
+ `⚡ "${providerName}" unavailable (missing API key). Falling back to local model.`,
207
+ );
208
+ const localFactory = manager.get("local")!;
209
+ return localFactory(DEFAULT_LOCAL_MODEL);
210
+ }
211
+ throw error;
212
+ }
172
213
  }
173
214
 
174
215
  /**
@@ -187,6 +228,29 @@ export function getRegisteredModelProviders(): string[] {
187
228
  return manager.getAllIds();
188
229
  }
189
230
 
231
+ /**
232
+ * Eagerly verify that the resolved model's runtime is available.
233
+ *
234
+ * For real local-engine models (created by `createLocalModel()`) this
235
+ * eagerly loads the ONNX pipeline to surface `no_ai_available` errors
236
+ * **before** the HTTP response stream is created. Must happen before the
237
+ * ReadableStream so the chat handler can return a proper 503 (with
238
+ * browser-fallback info) rather than a 200 with an in-band SSE error.
239
+ *
240
+ * Uses the `_isVfLocalModel` marker set by `createLocalModel()` to
241
+ * distinguish real local-engine models from mock/custom providers that
242
+ * happen to use `provider: "local"`.
243
+ */
244
+ export async function ensureModelReady(
245
+ model: LanguageModel,
246
+ ): Promise<void> {
247
+ const m = model as Record<string, unknown>;
248
+ if (!m._isVfLocalModel) return;
249
+ // modelId is "local/<id>" — strip the prefix to get the catalog id.
250
+ const catalogId = typeof m.modelId === "string" ? m.modelId.replace(/^local\//, "") : undefined;
251
+ await verifyLocalRuntime(catalogId);
252
+ }
253
+
190
254
  /**
191
255
  * Clear all registered model providers (for testing).
192
256
  */
@@ -15,6 +15,9 @@
15
15
  * - LOCAL_PROJECTS: JSON map of slug → filesystem path (for dev)
16
16
  * - CACHE_TYPE: "memory" (default) or "redis"
17
17
  * - REDIS_URL: Redis connection URL (required if CACHE_TYPE=redis)
18
+ * - VERYFRONT_API_INTERNAL_URL: API URL for internal endpoints (falls back to VERYFRONT_PROXY_API_BASE_URL)
19
+ * - VERYFRONT_API_INTERNAL_USER: Basic auth user for internal API
20
+ * - VERYFRONT_API_INTERNAL_PASS: Basic auth pass for internal API
18
21
  */
19
22
  import * as dntShim from "../../_dnt.shims.js";
20
23
 
@@ -36,6 +39,7 @@ import {
36
39
  import { proxyLogger, runWithProxyRequestContext } from "./logger.js";
37
40
  import { ErrorPages } from "../server/utils/error-html.js";
38
41
  import { RendererRouter } from "./renderer-router.js";
42
+ import { ServerResolver } from "./server-resolver.js";
39
43
  import { parseProjectDomain } from "../server/utils/domain-parser.js";
40
44
  import { exit, getEnv, onSignal } from "../platform/compat/process.js";
41
45
  import { createHttpServer, upgradeWebSocket } from "../platform/compat/http/index.js";
@@ -83,6 +87,13 @@ const rendererRouter = (discoveryHost || staticTargets)
83
87
  parseInt(getEnv("VERYFRONT_SERVER_DISCOVERY_INTERVAL_MS") || "15000") || 15000,
84
88
  )
85
89
  : null;
90
+
91
+ // Dedicated server resolver: routes environments to their dedicated server if assigned
92
+ const apiInternalUrl = getEnv("VERYFRONT_API_INTERNAL_URL") || config.apiBaseUrl;
93
+ const apiInternalUser = getEnv("VERYFRONT_API_INTERNAL_USER") || "";
94
+ const apiInternalPass = getEnv("VERYFRONT_API_INTERNAL_PASS") || "";
95
+ const serverResolver = new ServerResolver(apiInternalUrl, apiInternalUser, apiInternalPass);
96
+
86
97
  const { hostname: HOST, port: PORT } = resolveProxyBinding();
87
98
  const WS_CONNECT_TIMEOUT_MS = 30000;
88
99
  // Timeout for forwarding requests to production server (SSR can take time on cold start)
@@ -340,10 +351,17 @@ function forwardToServer(req: dntShim.Request): Promise<dntShim.Response> {
340
351
  const isIdempotent = ["GET", "HEAD", "OPTIONS"].includes(req.method);
341
352
  const maxRetries = isIdempotent ? VERYFRONT_SERVER_RETRY_COUNT : 0;
342
353
  let lastError: Error | null = null;
354
+ // After a retryable connection error to a dedicated server, fall back to shared pool
355
+ let skipDedicated = false;
343
356
 
344
357
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
345
- // Re-resolve on each attempt so retries can pick a different pod
346
- const baseUrl = rendererRouter?.resolve(ctx.projectSlug) ?? PRODUCTION_SERVER_URL;
358
+ // Resolve dedicated server per attempt so retries can fall back to shared pool
359
+ const dedicatedServerUrl = skipDedicated
360
+ ? null
361
+ : await serverResolver.resolve(ctx.environmentId);
362
+ const baseUrl = dedicatedServerUrl ??
363
+ rendererRouter?.resolve(ctx.projectSlug) ??
364
+ PRODUCTION_SERVER_URL;
347
365
  const serverUrl = new URL(url.pathname + url.search, baseUrl);
348
366
  // Delay before retry (not on first attempt)
349
367
  if (attempt > 0) {
@@ -425,10 +443,26 @@ function forwardToServer(req: dntShim.Request): Promise<dntShim.Response> {
425
443
 
426
444
  // Check if this is a retryable error and we have retries left
427
445
  if (isRetryableConnectionError(error) && attempt < maxRetries) {
428
- proxyLogger.warn(`[Retry] Retryable connection error on attempt ${attempt + 1}`, {
429
- pathname: url.pathname,
430
- error: error instanceof Error ? error.message : String(error),
431
- });
446
+ // If we were targeting a dedicated server, fall back to shared pool on retry
447
+ if (dedicatedServerUrl) {
448
+ skipDedicated = true;
449
+ proxyLogger.warn(
450
+ `[Retry] Dedicated server unreachable, falling back to shared pool`,
451
+ {
452
+ pathname: url.pathname,
453
+ dedicatedServerUrl,
454
+ error: error instanceof Error ? error.message : String(error),
455
+ },
456
+ );
457
+ } else {
458
+ proxyLogger.warn(
459
+ `[Retry] Retryable connection error on attempt ${attempt + 1}`,
460
+ {
461
+ pathname: url.pathname,
462
+ error: error instanceof Error ? error.message : String(error),
463
+ },
464
+ );
465
+ }
432
466
  continue; // Try again
433
467
  }
434
468
 
@@ -551,6 +585,7 @@ function router(req: dntShim.Request): Promise<dntShim.Response> {
551
585
  async function shutdown(): Promise<void> {
552
586
  proxyLogger.info("Shutting down");
553
587
  rendererRouter?.close();
588
+ serverResolver.close();
554
589
  await proxyHandler.close();
555
590
  await shutdownOTLP();
556
591
  proxyLogger.info("Closed connections");