veryfront 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/esm/cli/app/data/slug-words.d.ts.map +1 -1
  2. package/esm/cli/app/data/slug-words.js +225 -90
  3. package/esm/cli/app/operations/project-creation.js +4 -3
  4. package/esm/cli/app/shell.js +1 -1
  5. package/esm/cli/app/utils.d.ts +5 -4
  6. package/esm/cli/app/utils.d.ts.map +1 -1
  7. package/esm/cli/app/utils.js +0 -23
  8. package/esm/cli/app/views/dashboard.d.ts +1 -1
  9. package/esm/cli/app/views/dashboard.d.ts.map +1 -1
  10. package/esm/cli/app/views/dashboard.js +22 -4
  11. package/esm/cli/auth/callback-server.d.ts.map +1 -1
  12. package/esm/cli/auth/callback-server.js +3 -2
  13. package/esm/cli/commands/dev/handler.d.ts.map +1 -1
  14. package/esm/cli/commands/dev/handler.js +2 -0
  15. package/esm/cli/commands/init/init-command.d.ts.map +1 -1
  16. package/esm/cli/commands/init/init-command.js +20 -3
  17. package/esm/cli/commands/init/interactive-wizard.d.ts +3 -2
  18. package/esm/cli/commands/init/interactive-wizard.d.ts.map +1 -1
  19. package/esm/cli/commands/init/interactive-wizard.js +55 -27
  20. package/esm/cli/mcp/remote-file-tools.d.ts +0 -6
  21. package/esm/cli/mcp/remote-file-tools.d.ts.map +1 -1
  22. package/esm/cli/mcp/remote-file-tools.js +37 -15
  23. package/esm/cli/shared/reserve-slug.d.ts.map +1 -1
  24. package/esm/cli/shared/reserve-slug.js +8 -3
  25. package/esm/cli/utils/env-prompt.d.ts.map +1 -1
  26. package/esm/cli/utils/env-prompt.js +3 -0
  27. package/esm/deno.d.ts +5 -1
  28. package/esm/deno.js +11 -4
  29. package/esm/src/agent/chat-handler.d.ts +4 -3
  30. package/esm/src/agent/chat-handler.d.ts.map +1 -1
  31. package/esm/src/agent/chat-handler.js +55 -4
  32. package/esm/src/agent/react/index.d.ts +1 -1
  33. package/esm/src/agent/react/index.d.ts.map +1 -1
  34. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts +18 -0
  35. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts.map +1 -0
  36. package/esm/src/agent/react/use-chat/browser-inference/browser-engine.js +54 -0
  37. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts +43 -0
  38. package/esm/src/agent/react/use-chat/browser-inference/types.d.ts.map +1 -0
  39. package/esm/src/agent/react/use-chat/browser-inference/types.js +4 -0
  40. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts +23 -0
  41. package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts.map +1 -0
  42. package/esm/src/agent/react/use-chat/browser-inference/worker-client.js +67 -0
  43. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts +8 -0
  44. package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts.map +1 -0
  45. package/esm/src/agent/react/use-chat/browser-inference/worker-script.js +97 -0
  46. package/esm/src/agent/react/use-chat/index.d.ts +1 -1
  47. package/esm/src/agent/react/use-chat/index.d.ts.map +1 -1
  48. package/esm/src/agent/react/use-chat/types.d.ts +12 -0
  49. package/esm/src/agent/react/use-chat/types.d.ts.map +1 -1
  50. package/esm/src/agent/react/use-chat/use-chat.d.ts.map +1 -1
  51. package/esm/src/agent/react/use-chat/use-chat.js +120 -6
  52. package/esm/src/agent/runtime/index.d.ts.map +1 -1
  53. package/esm/src/agent/runtime/index.js +59 -7
  54. package/esm/src/build/production-build/templates.d.ts +2 -2
  55. package/esm/src/build/production-build/templates.d.ts.map +1 -1
  56. package/esm/src/build/production-build/templates.js +2 -68
  57. package/esm/src/chat/index.d.ts +1 -1
  58. package/esm/src/chat/index.d.ts.map +1 -1
  59. package/esm/src/errors/veryfront-error.d.ts +3 -0
  60. package/esm/src/errors/veryfront-error.d.ts.map +1 -1
  61. package/esm/src/platform/adapters/runtime/deno/adapter.d.ts.map +1 -1
  62. package/esm/src/platform/adapters/runtime/deno/adapter.js +24 -3
  63. package/esm/src/platform/compat/http/deno-server.d.ts.map +1 -1
  64. package/esm/src/platform/compat/http/deno-server.js +23 -2
  65. package/esm/src/provider/index.d.ts +1 -1
  66. package/esm/src/provider/index.d.ts.map +1 -1
  67. package/esm/src/provider/index.js +1 -1
  68. package/esm/src/provider/local/ai-sdk-adapter.d.ts +19 -0
  69. package/esm/src/provider/local/ai-sdk-adapter.d.ts.map +1 -0
  70. package/esm/src/provider/local/ai-sdk-adapter.js +164 -0
  71. package/esm/src/provider/local/env.d.ts +10 -0
  72. package/esm/src/provider/local/env.d.ts.map +1 -0
  73. package/esm/src/provider/local/env.js +23 -0
  74. package/esm/src/provider/local/local-engine.d.ts +61 -0
  75. package/esm/src/provider/local/local-engine.d.ts.map +1 -0
  76. package/esm/src/provider/local/local-engine.js +211 -0
  77. package/esm/src/provider/local/model-catalog.d.ts +30 -0
  78. package/esm/src/provider/local/model-catalog.d.ts.map +1 -0
  79. package/esm/src/provider/local/model-catalog.js +58 -0
  80. package/esm/src/provider/model-registry.d.ts +14 -0
  81. package/esm/src/provider/model-registry.d.ts.map +1 -1
  82. package/esm/src/provider/model-registry.js +58 -2
  83. package/esm/src/proxy/main.js +34 -6
  84. package/esm/src/proxy/server-resolver.d.ts +23 -0
  85. package/esm/src/proxy/server-resolver.d.ts.map +1 -0
  86. package/esm/src/proxy/server-resolver.js +124 -0
  87. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts +8 -0
  88. package/esm/src/react/components/ai/chat/components/inference-badge.d.ts.map +1 -0
  89. package/esm/src/react/components/ai/chat/components/inference-badge.js +36 -0
  90. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts +7 -0
  91. package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts.map +1 -0
  92. package/esm/src/react/components/ai/chat/components/upgrade-cta.js +33 -0
  93. package/esm/src/react/components/ai/chat/index.d.ts +7 -1
  94. package/esm/src/react/components/ai/chat/index.d.ts.map +1 -1
  95. package/esm/src/react/components/ai/chat/index.js +16 -4
  96. package/esm/src/sandbox/index.d.ts +31 -0
  97. package/esm/src/sandbox/index.d.ts.map +1 -0
  98. package/esm/src/sandbox/index.js +30 -0
  99. package/esm/src/sandbox/sandbox.d.ts +48 -0
  100. package/esm/src/sandbox/sandbox.d.ts.map +1 -0
  101. package/esm/src/sandbox/sandbox.js +178 -0
  102. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.d.ts.map +1 -1
  103. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.js +8 -2
  104. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts +1 -0
  105. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts.map +1 -1
  106. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.js +1 -0
  107. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.d.ts.map +1 -1
  108. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.js +15 -1
  109. package/package.json +8 -1
  110. package/src/cli/app/data/slug-words.ts +225 -90
  111. package/src/cli/app/operations/project-creation.ts +3 -3
  112. package/src/cli/app/shell.ts +1 -1
  113. package/src/cli/app/utils.ts +0 -30
  114. package/src/cli/app/views/dashboard.ts +27 -4
  115. package/src/cli/auth/callback-server.ts +3 -2
  116. package/src/cli/commands/dev/handler.ts +2 -0
  117. package/src/cli/commands/init/init-command.ts +30 -3
  118. package/src/cli/commands/init/interactive-wizard.ts +62 -34
  119. package/src/cli/mcp/remote-file-tools.ts +50 -15
  120. package/src/cli/shared/reserve-slug.ts +9 -2
  121. package/src/cli/utils/env-prompt.ts +3 -0
  122. package/src/deno.js +11 -4
  123. package/src/src/agent/chat-handler.ts +57 -4
  124. package/src/src/agent/react/index.ts +2 -0
  125. package/src/src/agent/react/use-chat/browser-inference/browser-engine.ts +81 -0
  126. package/src/src/agent/react/use-chat/browser-inference/types.ts +52 -0
  127. package/src/src/agent/react/use-chat/browser-inference/worker-client.ts +89 -0
  128. package/src/src/agent/react/use-chat/browser-inference/worker-script.ts +98 -0
  129. package/src/src/agent/react/use-chat/index.ts +2 -0
  130. package/src/src/agent/react/use-chat/types.ts +20 -0
  131. package/src/src/agent/react/use-chat/use-chat.ts +148 -8
  132. package/src/src/agent/runtime/index.ts +72 -6
  133. package/src/src/build/production-build/templates.ts +2 -68
  134. package/src/src/chat/index.ts +2 -0
  135. package/src/src/errors/veryfront-error.ts +2 -1
  136. package/src/src/platform/adapters/runtime/deno/adapter.ts +25 -3
  137. package/src/src/platform/compat/http/deno-server.ts +28 -1
  138. package/src/src/provider/index.ts +1 -0
  139. package/src/src/provider/local/ai-sdk-adapter.ts +207 -0
  140. package/src/src/provider/local/env.ts +26 -0
  141. package/src/src/provider/local/local-engine.ts +288 -0
  142. package/src/src/provider/local/model-catalog.ts +73 -0
  143. package/src/src/provider/model-registry.ts +66 -2
  144. package/src/src/proxy/main.ts +41 -6
  145. package/src/src/proxy/server-resolver.ts +151 -0
  146. package/src/src/react/components/ai/chat/components/inference-badge.tsx +48 -0
  147. package/src/src/react/components/ai/chat/components/upgrade-cta.tsx +56 -0
  148. package/src/src/react/components/ai/chat/index.tsx +43 -6
  149. package/src/src/sandbox/index.ts +32 -0
  150. package/src/src/sandbox/sandbox.ts +236 -0
  151. package/src/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.ts +9 -2
  152. package/src/src/transforms/pipeline/stages/ssr-vf-modules/index.ts +1 -0
  153. package/src/src/transforms/pipeline/stages/ssr-vf-modules/transform.ts +17 -0
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Local Model Engine
3
+ *
4
+ * Singleton wrapper around `@huggingface/transformers` for server-side
5
+ * local LLM inference. Provides lazy model loading and streaming text
6
+ * generation via async generators.
7
+ *
8
+ * Uses ONNX Runtime for inference with q4 quantization — NOT q4f16
9
+ * due to a known ONNX bug with f16 LayerNorm on CPU.
10
+ *
11
+ * @module provider/local
12
+ */
13
+ /** Chat message format expected by Transformers.js */
14
+ export interface ChatMessage {
15
+ role: "system" | "user" | "assistant";
16
+ content: string;
17
+ }
18
+ /** Options for text generation */
19
+ export interface GenerateOptions {
20
+ maxNewTokens?: number;
21
+ temperature?: number;
22
+ topP?: number;
23
+ topK?: number;
24
+ stopSequences?: string[];
25
+ }
26
+ /**
27
+ * Eagerly verify that the local AI runtime (@huggingface/transformers + ONNX)
28
+ * is available by loading the default model pipeline.
29
+ *
30
+ * Call this *before* creating the HTTP response stream so that failures surface
31
+ * as a thrown error (→ 503) rather than being swallowed inside a ReadableStream
32
+ * (→ 200 with in-band SSE error).
33
+ *
34
+ * In compiled binaries, `import("@huggingface/transformers")` itself fails
35
+ * because `onnxruntime-node` eagerly `require()`s a native `.node` addon at
36
+ * import time and the addon isn't embedded in the binary. In dev mode (Deno)
37
+ * the native addon exists on disk so the import succeeds, but `pipeline()` can
38
+ * still fail if the ONNX model files are missing. Either way this function
39
+ * surfaces the error before the response stream is created. The pipeline is
40
+ * cached after the first successful call, so subsequent checks are instant.
41
+ */
42
+ export declare function verifyLocalRuntime(modelId?: string): Promise<void>;
43
+ /**
44
+ * Generate text in a streaming fashion using an async generator.
45
+ *
46
+ * Yields individual tokens as they are generated by the model.
47
+ */
48
+ export declare function generateStream(modelId: string, messages: ChatMessage[], options?: GenerateOptions): AsyncGenerator<string, void, undefined>;
49
+ /**
50
+ * Generate text without streaming (full completion).
51
+ */
52
+ export declare function generate(modelId: string, messages: ChatMessage[], options?: GenerateOptions): Promise<string>;
53
+ /**
54
+ * Preload a model into memory. Useful for warming up on server start.
55
+ */
56
+ export declare function preloadModel(modelId: string): Promise<void>;
57
+ /**
58
+ * Check if a model is currently loaded in memory.
59
+ */
60
+ export declare function isModelLoaded(modelId: string): boolean;
61
+ //# sourceMappingURL=local-engine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"local-engine.d.ts","sourceRoot":"","sources":["../../../../src/src/provider/local/local-engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AASH,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,kCAAkC;AAClC,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AA6HD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGxE;AAED;;;;GAIG;AACH,wBAAuB,cAAc,CACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAoEzC;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC,CAMjB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAGtD"}
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Local Model Engine
3
+ *
4
+ * Singleton wrapper around `@huggingface/transformers` for server-side
5
+ * local LLM inference. Provides lazy model loading and streaming text
6
+ * generation via async generators.
7
+ *
8
+ * Uses ONNX Runtime for inference with q4 quantization — NOT q4f16
9
+ * due to a known ONNX bug with f16 LayerNorm on CPU.
10
+ *
11
+ * @module provider/local
12
+ */
13
+ import { serverLogger } from "../../utils/index.js";
14
+ import { createError, toError } from "../../errors/veryfront-error.js";
15
+ import { DEFAULT_LOCAL_MODEL, resolveLocalModel } from "./model-catalog.js";
16
+ import { isLocalAIDisabled } from "./env.js";
17
+ const logger = serverLogger.component("local-llm");
18
+ /** Cached pipeline instances keyed by HuggingFace model ID */
19
+ const pipelineCache = new Map();
20
+ /** Whether a model is currently being loaded (prevents concurrent loads) */
21
+ const loadingLocks = new Map();
22
+ /** Lazily loaded @huggingface/transformers module */
23
+ let transformersModule = null;
24
+ /**
25
+ * Lazily import @huggingface/transformers.
26
+ * Only loads when actually needed, keeping startup fast when API keys are present.
27
+ */
28
+ async function getTransformers() {
29
+ if (transformersModule)
30
+ return transformersModule;
31
+ if (isLocalAIDisabled()) {
32
+ throw toError(createError({
33
+ type: "no_ai_available",
34
+ message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
35
+ }));
36
+ }
37
+ logger.info("Loading @huggingface/transformers...");
38
+ try {
39
+ transformersModule = await import("@huggingface/transformers");
40
+ }
41
+ catch {
42
+ throw toError(createError({
43
+ type: "no_ai_available",
44
+ message: "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
45
+ "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
46
+ "in your .env file to use a cloud provider instead.",
47
+ }));
48
+ }
49
+ // Configure cache directory for model files
50
+ transformersModule.env.cacheDir = "./.cache/models";
51
+ // Disable browser-specific features in Node/Deno
52
+ transformersModule.env.useBrowserCache = false;
53
+ return transformersModule;
54
+ }
55
+ /**
56
+ * Load a text-generation pipeline for the given model.
57
+ * Returns a cached pipeline if already loaded.
58
+ */
59
+ async function loadPipeline(modelInfo) {
60
+ const cacheKey = modelInfo.hfId;
61
+ // Return cached pipeline
62
+ const cached = pipelineCache.get(cacheKey);
63
+ if (cached)
64
+ return cached;
65
+ // Wait for existing load if in progress
66
+ const existingLock = loadingLocks.get(cacheKey);
67
+ if (existingLock)
68
+ return existingLock;
69
+ // Start loading
70
+ const loadPromise = (async () => {
71
+ const transformers = await getTransformers();
72
+ logger.info(`Loading local model: ${modelInfo.hfId} (${modelInfo.dtype}, ~${modelInfo.sizeMB}MB)...`);
73
+ const pipe = await transformers.pipeline("text-generation", modelInfo.hfId, {
74
+ dtype: modelInfo.dtype,
75
+ device: "cpu",
76
+ });
77
+ logger.info(`Model loaded: ${modelInfo.hfId}`);
78
+ pipelineCache.set(cacheKey, pipe);
79
+ loadingLocks.delete(cacheKey);
80
+ return pipe;
81
+ })();
82
+ loadingLocks.set(cacheKey, loadPromise);
83
+ try {
84
+ return await loadPromise;
85
+ }
86
+ catch (error) {
87
+ loadingLocks.delete(cacheKey);
88
+ // Convert ONNX / native-addon errors to no_ai_available so they propagate
89
+ // correctly through the chat handler (503) instead of being swallowed as
90
+ // in-band SSE errors inside a 200 response stream.
91
+ const msg = error instanceof Error ? error.message : String(error);
92
+ if (msg.includes("onnx") || msg.includes("ONNX") ||
93
+ msg.includes("dlopen") || msg.includes("dynamic linking") ||
94
+ msg.includes("native module") || msg.includes("SharedArrayBuffer")) {
95
+ transformersModule = null;
96
+ throw toError(createError({
97
+ type: "no_ai_available",
98
+ message: "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
99
+ "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
100
+ "in your .env file to use a cloud provider instead.",
101
+ }));
102
+ }
103
+ throw error;
104
+ }
105
+ }
106
+ /**
107
+ * Eagerly verify that the local AI runtime (@huggingface/transformers + ONNX)
108
+ * is available by loading the default model pipeline.
109
+ *
110
+ * Call this *before* creating the HTTP response stream so that failures surface
111
+ * as a thrown error (→ 503) rather than being swallowed inside a ReadableStream
112
+ * (→ 200 with in-band SSE error).
113
+ *
114
+ * In compiled binaries, `import("@huggingface/transformers")` itself fails
115
+ * because `onnxruntime-node` eagerly `require()`s a native `.node` addon at
116
+ * import time and the addon isn't embedded in the binary. In dev mode (Deno)
117
+ * the native addon exists on disk so the import succeeds, but `pipeline()` can
118
+ * still fail if the ONNX model files are missing. Either way this function
119
+ * surfaces the error before the response stream is created. The pipeline is
120
+ * cached after the first successful call, so subsequent checks are instant.
121
+ */
122
+ export async function verifyLocalRuntime(modelId) {
123
+ const modelInfo = resolveLocalModel(modelId || DEFAULT_LOCAL_MODEL);
124
+ await loadPipeline(modelInfo);
125
+ }
126
+ /**
127
+ * Generate text in a streaming fashion using an async generator.
128
+ *
129
+ * Yields individual tokens as they are generated by the model.
130
+ */
131
+ export async function* generateStream(modelId, messages, options = {}) {
132
+ const modelInfo = resolveLocalModel(modelId);
133
+ const pipe = await loadPipeline(modelInfo);
134
+ const transformers = await getTransformers();
135
+ const { maxNewTokens = 512, temperature = 0.7, topP, topK, } = options;
136
+ // Use a queue to bridge TextStreamer callbacks → async generator
137
+ const tokenQueue = [];
138
+ let resolveWaiting = null;
139
+ let done = false;
140
+ const streamer = new transformers.TextStreamer(pipe.tokenizer, {
141
+ skip_prompt: true,
142
+ skip_special_tokens: true,
143
+ callback_function: (text) => {
144
+ tokenQueue.push(text);
145
+ if (resolveWaiting) {
146
+ resolveWaiting();
147
+ resolveWaiting = null;
148
+ }
149
+ },
150
+ });
151
+ // Start generation in the background
152
+ const generatePromise = pipe(messages, {
153
+ max_new_tokens: maxNewTokens,
154
+ temperature,
155
+ top_p: topP,
156
+ top_k: topK,
157
+ do_sample: temperature > 0,
158
+ streamer,
159
+ }).then(() => {
160
+ done = true;
161
+ if (resolveWaiting) {
162
+ resolveWaiting();
163
+ resolveWaiting = null;
164
+ }
165
+ }).catch((error) => {
166
+ done = true;
167
+ if (resolveWaiting) {
168
+ resolveWaiting();
169
+ resolveWaiting = null;
170
+ }
171
+ throw error;
172
+ });
173
+ // Yield tokens as they arrive
174
+ while (true) {
175
+ while (tokenQueue.length > 0) {
176
+ yield tokenQueue.shift();
177
+ }
178
+ if (done)
179
+ break;
180
+ // Wait for more tokens
181
+ await new Promise((resolve) => {
182
+ resolveWaiting = resolve;
183
+ });
184
+ }
185
+ // Ensure generation has completed
186
+ await generatePromise;
187
+ }
188
+ /**
189
+ * Generate text without streaming (full completion).
190
+ */
191
+ export async function generate(modelId, messages, options = {}) {
192
+ const chunks = [];
193
+ for await (const token of generateStream(modelId, messages, options)) {
194
+ chunks.push(token);
195
+ }
196
+ return chunks.join("");
197
+ }
198
+ /**
199
+ * Preload a model into memory. Useful for warming up on server start.
200
+ */
201
+ export async function preloadModel(modelId) {
202
+ const modelInfo = resolveLocalModel(modelId);
203
+ await loadPipeline(modelInfo);
204
+ }
205
+ /**
206
+ * Check if a model is currently loaded in memory.
207
+ */
208
+ export function isModelLoaded(modelId) {
209
+ const modelInfo = resolveLocalModel(modelId);
210
+ return pipelineCache.has(modelInfo.hfId);
211
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Local Model Catalog
3
+ *
4
+ * Maps friendly model IDs to HuggingFace model repository IDs.
5
+ * Used by the local inference engine to resolve model names.
6
+ *
7
+ * @module provider/local
8
+ */
9
+ export interface ModelInfo {
10
+ /** HuggingFace model repository ID */
11
+ hfId: string;
12
+ /** Quantization dtype for ONNX Runtime */
13
+ dtype: "q4" | "q8" | "fp32";
14
+ /** Approximate download size in MB */
15
+ sizeMB: number;
16
+ /** Human-readable description */
17
+ description: string;
18
+ }
19
+ /** Default model used when no specific model ID is provided */
20
+ export declare const DEFAULT_LOCAL_MODEL = "smollm2-135m";
21
+ /**
22
+ * Resolve a friendly model ID to its HuggingFace model info.
23
+ * Falls back to treating the ID as a raw HuggingFace repository ID.
24
+ */
25
+ export declare function resolveLocalModel(modelId: string): ModelInfo;
26
+ /**
27
+ * Get all available local model IDs.
28
+ */
29
+ export declare function getLocalModelIds(): string[];
30
+ //# sourceMappingURL=model-catalog.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-catalog.d.ts","sourceRoot":"","sources":["../../../../src/src/provider/local/model-catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,SAAS;IACxB,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,0CAA0C;IAC1C,KAAK,EAAE,IAAI,GAAG,IAAI,GAAG,MAAM,CAAC;IAC5B,sCAAsC;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;CACrB;AA6BD,+DAA+D;AAC/D,eAAO,MAAM,mBAAmB,iBAAiB,CAAC;AAElD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,CAW5D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,EAAE,CAE3C"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Local Model Catalog
3
+ *
4
+ * Maps friendly model IDs to HuggingFace model repository IDs.
5
+ * Used by the local inference engine to resolve model names.
6
+ *
7
+ * @module provider/local
8
+ */
9
+ /**
10
+ * Catalog of supported local models.
11
+ *
12
+ * **Important:** Only `q4` quantization is used — `q4f16` has a known
13
+ * ONNX Runtime bug with LayerNorm on CPU that produces NaN outputs.
14
+ */
15
+ const MODEL_CATALOG = {
16
+ "smollm2-135m": {
17
+ hfId: "HuggingFaceTB/SmolLM2-135M-Instruct",
18
+ dtype: "q4",
19
+ sizeMB: 100,
20
+ description: "SmolLM2 135M — fast, lightweight chat model",
21
+ },
22
+ "smollm2-360m": {
23
+ hfId: "HuggingFaceTB/SmolLM2-360M-Instruct",
24
+ dtype: "q4",
25
+ sizeMB: 250,
26
+ description: "SmolLM2 360M — better quality, still fast",
27
+ },
28
+ "smollm2-1.7b": {
29
+ hfId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
30
+ dtype: "q4",
31
+ sizeMB: 1000,
32
+ description: "SmolLM2 1.7B — highest quality local model",
33
+ },
34
+ };
35
+ /** Default model used when no specific model ID is provided */
36
+ export const DEFAULT_LOCAL_MODEL = "smollm2-135m";
37
+ /**
38
+ * Resolve a friendly model ID to its HuggingFace model info.
39
+ * Falls back to treating the ID as a raw HuggingFace repository ID.
40
+ */
41
+ export function resolveLocalModel(modelId) {
42
+ const catalogEntry = MODEL_CATALOG[modelId];
43
+ if (catalogEntry)
44
+ return catalogEntry;
45
+ // Treat as raw HuggingFace model ID (e.g. "HuggingFaceTB/SmolLM2-135M-Instruct")
46
+ return {
47
+ hfId: modelId,
48
+ dtype: "q4",
49
+ sizeMB: 0,
50
+ description: `Custom model: ${modelId}`,
51
+ };
52
+ }
53
+ /**
54
+ * Get all available local model IDs.
55
+ */
56
+ export function getLocalModelIds() {
57
+ return Object.keys(MODEL_CATALOG);
58
+ }
@@ -43,6 +43,20 @@ export declare function hasModelProvider(name: string): boolean;
43
43
  * Get list of registered model provider names (project-scoped + shared).
44
44
  */
45
45
  export declare function getRegisteredModelProviders(): string[];
46
+ /**
47
+ * Eagerly verify that the resolved model's runtime is available.
48
+ *
49
+ * For real local-engine models (created by `createLocalModel()`) this
50
+ * eagerly loads the ONNX pipeline to surface `no_ai_available` errors
51
+ * **before** the HTTP response stream is created. Must happen before the
52
+ * ReadableStream so the chat handler can return a proper 503 (with
53
+ * browser-fallback info) rather than a 200 with an in-band SSE error.
54
+ *
55
+ * Uses the `_isVfLocalModel` marker set by `createLocalModel()` to
56
+ * distinguish real local-engine models from mock/custom providers that
57
+ * happen to use `provider: "local"`.
58
+ */
59
+ export declare function ensureModelReady(model: LanguageModel): Promise<void>;
46
60
  /**
47
61
  * Clear all registered model providers (for testing).
48
62
  */
@@ -1 +1 @@
1
- {"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../../../src/src/provider/model-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAYxC,MAAM,MAAM,oBAAoB,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,aAAa,CAAC;AAOtE;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAC5B,IAAI,CAEN;AA0ED;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,aAAa,CAuC/D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,EAAE,CAGtD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAG1C"}
1
+ {"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../../../src/src/provider/model-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAmBxC,MAAM,MAAM,oBAAoB,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,aAAa,CAAC;AAOtE;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAC5B,IAAI,CAEN;AAoFD;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,aAAa,CA+D/D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,EAAE,CAGtD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,gBAAgB,CACpC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,IAAI,CAAC,CAMf;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAG1C"}
@@ -12,12 +12,18 @@
12
12
  *
13
13
  * @module
14
14
  */
15
- import { createError, toError } from "../errors/veryfront-error.js";
15
+ import { createError, fromError, toError } from "../errors/veryfront-error.js";
16
16
  import { createOpenAI } from "@ai-sdk/openai";
17
17
  import { createAnthropic } from "@ai-sdk/anthropic";
18
18
  import { createGoogleGenerativeAI } from "@ai-sdk/google";
19
19
  import { getAnthropicEnvConfig, getGoogleGenAIEnvConfig, getOpenAIEnvConfig, } from "../config/env.js";
20
20
  import { ProjectScopedRegistryManager } from "../ai/registry-manager.js";
21
+ import { serverLogger } from "../utils/index.js";
22
+ import { DEFAULT_LOCAL_MODEL } from "./local/model-catalog.js";
23
+ import { createLocalModel } from "./local/ai-sdk-adapter.js";
24
+ import { isLocalAIDisabled } from "./local/env.js";
25
+ import { verifyLocalRuntime } from "./local/local-engine.js";
26
+ const localLogger = serverLogger.component("local-llm");
21
27
  const manager = new ProjectScopedRegistryManager("model-provider");
22
28
  let autoInitialized = false;
23
29
  /**
@@ -88,6 +94,15 @@ function autoInitializeFromEnv() {
88
94
  return createGoogleGenerativeAI({ apiKey: config.apiKey })(id);
89
95
  });
90
96
  }
97
+ // Register the local provider (always available, no API key needed).
98
+ // createLocalModel is a lightweight synchronous constructor — the actual
99
+ // @huggingface/transformers import and model loading happen lazily on
100
+ // the first doGenerate/doStream call, so this doesn't add startup overhead.
101
+ if (!manager.has("local")) {
102
+ manager.registerShared("local", (id) => {
103
+ return createLocalModel(id);
104
+ });
105
+ }
91
106
  }
92
107
  /**
93
108
  * Resolve a "provider/model" string to an AI SDK LanguageModel instance.
@@ -124,7 +139,27 @@ export function resolveModel(modelString) {
124
139
  message: `Model provider "${providerName}" not registered. Available: ${available}`,
125
140
  }));
126
141
  }
127
- return factory(modelId);
142
+ try {
143
+ return factory(modelId);
144
+ }
145
+ catch (error) {
146
+ // Auto-fallback: when a cloud provider fails due to missing API key,
147
+ // transparently switch to the local model so chat works out of the box.
148
+ const errorData = fromError(error);
149
+ if (errorData?.type === "config" && providerName !== "local" && manager.has("local")) {
150
+ // Check if local AI is explicitly disabled (e.g., for testing)
151
+ if (isLocalAIDisabled()) {
152
+ throw toError(createError({
153
+ type: "no_ai_available",
154
+ message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
155
+ }));
156
+ }
157
+ localLogger.info(`⚡ "${providerName}" unavailable (missing API key). Falling back to local model.`);
158
+ const localFactory = manager.get("local");
159
+ return localFactory(DEFAULT_LOCAL_MODEL);
160
+ }
161
+ throw error;
162
+ }
128
163
  }
129
164
  /**
130
165
  * Check if a model provider is registered (project-scoped or shared).
@@ -140,6 +175,27 @@ export function getRegisteredModelProviders() {
140
175
  autoInitializeFromEnv();
141
176
  return manager.getAllIds();
142
177
  }
178
+ /**
179
+ * Eagerly verify that the resolved model's runtime is available.
180
+ *
181
+ * For real local-engine models (created by `createLocalModel()`) this
182
+ * eagerly loads the ONNX pipeline to surface `no_ai_available` errors
183
+ * **before** the HTTP response stream is created. Must happen before the
184
+ * ReadableStream so the chat handler can return a proper 503 (with
185
+ * browser-fallback info) rather than a 200 with an in-band SSE error.
186
+ *
187
+ * Uses the `_isVfLocalModel` marker set by `createLocalModel()` to
188
+ * distinguish real local-engine models from mock/custom providers that
189
+ * happen to use `provider: "local"`.
190
+ */
191
+ export async function ensureModelReady(model) {
192
+ const m = model;
193
+ if (!m._isVfLocalModel)
194
+ return;
195
+ // modelId is "local/<id>" — strip the prefix to get the catalog id.
196
+ const catalogId = typeof m.modelId === "string" ? m.modelId.replace(/^local\//, "") : undefined;
197
+ await verifyLocalRuntime(catalogId);
198
+ }
143
199
  /**
144
200
  * Clear all registered model providers (for testing).
145
201
  */
@@ -15,6 +15,9 @@
15
15
  * - LOCAL_PROJECTS: JSON map of slug → filesystem path (for dev)
16
16
  * - CACHE_TYPE: "memory" (default) or "redis"
17
17
  * - REDIS_URL: Redis connection URL (required if CACHE_TYPE=redis)
18
+ * - VERYFRONT_API_INTERNAL_URL: API URL for internal endpoints (falls back to VERYFRONT_PROXY_API_BASE_URL)
19
+ * - VERYFRONT_API_INTERNAL_USER: Basic auth user for internal API
20
+ * - VERYFRONT_API_INTERNAL_PASS: Basic auth pass for internal API
18
21
  */
19
22
  import * as dntShim from "../../_dnt.shims.js";
20
23
  import { createProxyHandler, INTERNAL_PROXY_HEADERS } from "./handler.js";
@@ -24,6 +27,7 @@ import { endSpan, extractContext, initializeOTLPWithApis, injectContext, ProxySp
24
27
  import { proxyLogger, runWithProxyRequestContext } from "./logger.js";
25
28
  import { ErrorPages } from "../server/utils/error-html.js";
26
29
  import { RendererRouter } from "./renderer-router.js";
30
+ import { ServerResolver } from "./server-resolver.js";
27
31
  import { parseProjectDomain } from "../server/utils/domain-parser.js";
28
32
  import { exit, getEnv, onSignal } from "../platform/compat/process.js";
29
33
  import { createHttpServer, upgradeWebSocket } from "../platform/compat/http/index.js";
@@ -60,6 +64,11 @@ const staticTargets = getEnv("VERYFRONT_SERVER_TARGETS");
60
64
  const rendererRouter = (discoveryHost || staticTargets)
61
65
  ? new RendererRouter(discoveryHost || "static-targets", PRODUCTION_SERVER_URL, parseInt(getEnv("VERYFRONT_SERVER_DISCOVERY_INTERVAL_MS") || "15000") || 15000)
62
66
  : null;
67
+ // Dedicated server resolver: routes environments to their dedicated server if assigned
68
+ const apiInternalUrl = getEnv("VERYFRONT_API_INTERNAL_URL") || config.apiBaseUrl;
69
+ const apiInternalUser = getEnv("VERYFRONT_API_INTERNAL_USER") || "";
70
+ const apiInternalPass = getEnv("VERYFRONT_API_INTERNAL_PASS") || "";
71
+ const serverResolver = new ServerResolver(apiInternalUrl, apiInternalUser, apiInternalPass);
63
72
  const { hostname: HOST, port: PORT } = resolveProxyBinding();
64
73
  const WS_CONNECT_TIMEOUT_MS = 30000;
65
74
  // Timeout for forwarding requests to production server (SSR can take time on cold start)
@@ -288,9 +297,16 @@ function forwardToServer(req) {
288
297
  const isIdempotent = ["GET", "HEAD", "OPTIONS"].includes(req.method);
289
298
  const maxRetries = isIdempotent ? VERYFRONT_SERVER_RETRY_COUNT : 0;
290
299
  let lastError = null;
300
+ // After a retryable connection error to a dedicated server, fall back to shared pool
301
+ let skipDedicated = false;
291
302
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
292
- // Re-resolve on each attempt so retries can pick a different pod
293
- const baseUrl = rendererRouter?.resolve(ctx.projectSlug) ?? PRODUCTION_SERVER_URL;
303
+ // Resolve dedicated server per attempt so retries can fall back to shared pool
304
+ const dedicatedServerUrl = skipDedicated
305
+ ? null
306
+ : await serverResolver.resolve(ctx.environmentId);
307
+ const baseUrl = dedicatedServerUrl ??
308
+ rendererRouter?.resolve(ctx.projectSlug) ??
309
+ PRODUCTION_SERVER_URL;
294
310
  const serverUrl = new URL(url.pathname + url.search, baseUrl);
295
311
  // Delay before retry (not on first attempt)
296
312
  if (attempt > 0) {
@@ -352,10 +368,21 @@ function forwardToServer(req) {
352
368
  }
353
369
  // Check if this is a retryable error and we have retries left
354
370
  if (isRetryableConnectionError(error) && attempt < maxRetries) {
355
- proxyLogger.warn(`[Retry] Retryable connection error on attempt ${attempt + 1}`, {
356
- pathname: url.pathname,
357
- error: error instanceof Error ? error.message : String(error),
358
- });
371
+ // If we were targeting a dedicated server, fall back to shared pool on retry
372
+ if (dedicatedServerUrl) {
373
+ skipDedicated = true;
374
+ proxyLogger.warn(`[Retry] Dedicated server unreachable, falling back to shared pool`, {
375
+ pathname: url.pathname,
376
+ dedicatedServerUrl,
377
+ error: error instanceof Error ? error.message : String(error),
378
+ });
379
+ }
380
+ else {
381
+ proxyLogger.warn(`[Retry] Retryable connection error on attempt ${attempt + 1}`, {
382
+ pathname: url.pathname,
383
+ error: error instanceof Error ? error.message : String(error),
384
+ });
385
+ }
359
386
  continue; // Try again
360
387
  }
361
388
  // No more retries or non-retryable error
@@ -459,6 +486,7 @@ function router(req) {
459
486
  async function shutdown() {
460
487
  proxyLogger.info("Shutting down");
461
488
  rendererRouter?.close();
489
+ serverResolver.close();
462
490
  await proxyHandler.close();
463
491
  await shutdownOTLP();
464
492
  proxyLogger.info("Closed connections");
@@ -0,0 +1,23 @@
1
+ export declare class ServerResolver {
2
+ private apiInternalUrl;
3
+ private apiUser;
4
+ private apiPass;
5
+ private cacheTtlMs;
6
+ private cache;
7
+ private pending;
8
+ private cleanupTimer;
9
+ constructor(apiInternalUrl: string, apiUser: string, apiPass: string, cacheTtlMs?: number);
10
+ /**
11
+ * Resolve an environment ID to a dedicated server URL, or null for shared pool.
12
+ */
13
+ resolve(environmentId: string | undefined): Promise<string | null>;
14
+ close(): void;
15
+ /**
16
+ * Fetch dedicated server from API.
17
+ * Returns DedicatedServer | null on success (null = no dedicated server assigned).
18
+ * Throws ServerResolverError on transient failures (network, non-OK status).
19
+ */
20
+ private fetchServer;
21
+ private cleanup;
22
+ }
23
+ //# sourceMappingURL=server-resolver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server-resolver.d.ts","sourceRoot":"","sources":["../../../src/src/proxy/server-resolver.ts"],"names":[],"mappings":"AAoCA,qBAAa,cAAc;IAMvB,OAAO,CAAC,cAAc;IACtB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,UAAU;IARpB,OAAO,CAAC,KAAK,CAAiC;IAC9C,OAAO,CAAC,OAAO,CAAsD;IACrE,OAAO,CAAC,YAAY,CAAuD;gBAGjE,cAAc,EAAE,MAAM,EACtB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,UAAU,GAAE,MAAe;IAQrC;;OAEG;IACG,OAAO,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAwCxE,KAAK,IAAI,IAAI;IAQb;;;;OAIG;YACW,WAAW;IAmCzB,OAAO,CAAC,OAAO;CAMhB"}