npm - veryfront - Versions diffs - 0.1.13 → 0.1.15 - Mend

veryfront 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/esm/cli/app/data/slug-words.d.ts.map +1 -1
package/esm/cli/app/data/slug-words.js +225 -90
package/esm/cli/app/operations/project-creation.js +4 -3
package/esm/cli/app/shell.js +1 -1
package/esm/cli/app/utils.d.ts +5 -4
package/esm/cli/app/utils.d.ts.map +1 -1
package/esm/cli/app/utils.js +0 -23
package/esm/cli/app/views/dashboard.d.ts +1 -1
package/esm/cli/app/views/dashboard.d.ts.map +1 -1
package/esm/cli/app/views/dashboard.js +22 -4
package/esm/cli/auth/callback-server.d.ts.map +1 -1
package/esm/cli/auth/callback-server.js +3 -2
package/esm/cli/commands/dev/handler.d.ts.map +1 -1
package/esm/cli/commands/dev/handler.js +2 -0
package/esm/cli/commands/init/init-command.d.ts.map +1 -1
package/esm/cli/commands/init/init-command.js +20 -3
package/esm/cli/commands/init/interactive-wizard.d.ts +3 -2
package/esm/cli/commands/init/interactive-wizard.d.ts.map +1 -1
package/esm/cli/commands/init/interactive-wizard.js +55 -27
package/esm/cli/mcp/remote-file-tools.d.ts +0 -6
package/esm/cli/mcp/remote-file-tools.d.ts.map +1 -1
package/esm/cli/mcp/remote-file-tools.js +37 -15
package/esm/cli/shared/reserve-slug.d.ts.map +1 -1
package/esm/cli/shared/reserve-slug.js +8 -3
package/esm/cli/utils/env-prompt.d.ts.map +1 -1
package/esm/cli/utils/env-prompt.js +3 -0
package/esm/deno.d.ts +5 -1
package/esm/deno.js +11 -4
package/esm/src/agent/chat-handler.d.ts +4 -3
package/esm/src/agent/chat-handler.d.ts.map +1 -1
package/esm/src/agent/chat-handler.js +55 -4
package/esm/src/agent/react/index.d.ts +1 -1
package/esm/src/agent/react/index.d.ts.map +1 -1
package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts +18 -0
package/esm/src/agent/react/use-chat/browser-inference/browser-engine.d.ts.map +1 -0
package/esm/src/agent/react/use-chat/browser-inference/browser-engine.js +54 -0
package/esm/src/agent/react/use-chat/browser-inference/types.d.ts +43 -0
package/esm/src/agent/react/use-chat/browser-inference/types.d.ts.map +1 -0
package/esm/src/agent/react/use-chat/browser-inference/types.js +4 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts +23 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-client.d.ts.map +1 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-client.js +67 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts +8 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-script.d.ts.map +1 -0
package/esm/src/agent/react/use-chat/browser-inference/worker-script.js +97 -0
package/esm/src/agent/react/use-chat/index.d.ts +1 -1
package/esm/src/agent/react/use-chat/index.d.ts.map +1 -1
package/esm/src/agent/react/use-chat/types.d.ts +12 -0
package/esm/src/agent/react/use-chat/types.d.ts.map +1 -1
package/esm/src/agent/react/use-chat/use-chat.d.ts.map +1 -1
package/esm/src/agent/react/use-chat/use-chat.js +120 -6
package/esm/src/agent/runtime/index.d.ts.map +1 -1
package/esm/src/agent/runtime/index.js +59 -7
package/esm/src/build/production-build/templates.d.ts +2 -2
package/esm/src/build/production-build/templates.d.ts.map +1 -1
package/esm/src/build/production-build/templates.js +2 -68
package/esm/src/chat/index.d.ts +1 -1
package/esm/src/chat/index.d.ts.map +1 -1
package/esm/src/errors/veryfront-error.d.ts +3 -0
package/esm/src/errors/veryfront-error.d.ts.map +1 -1
package/esm/src/platform/adapters/runtime/deno/adapter.d.ts.map +1 -1
package/esm/src/platform/adapters/runtime/deno/adapter.js +24 -3
package/esm/src/platform/compat/http/deno-server.d.ts.map +1 -1
package/esm/src/platform/compat/http/deno-server.js +23 -2
package/esm/src/provider/index.d.ts +1 -1
package/esm/src/provider/index.d.ts.map +1 -1
package/esm/src/provider/index.js +1 -1
package/esm/src/provider/local/ai-sdk-adapter.d.ts +19 -0
package/esm/src/provider/local/ai-sdk-adapter.d.ts.map +1 -0
package/esm/src/provider/local/ai-sdk-adapter.js +164 -0
package/esm/src/provider/local/env.d.ts +10 -0
package/esm/src/provider/local/env.d.ts.map +1 -0
package/esm/src/provider/local/env.js +23 -0
package/esm/src/provider/local/local-engine.d.ts +61 -0
package/esm/src/provider/local/local-engine.d.ts.map +1 -0
package/esm/src/provider/local/local-engine.js +211 -0
package/esm/src/provider/local/model-catalog.d.ts +30 -0
package/esm/src/provider/local/model-catalog.d.ts.map +1 -0
package/esm/src/provider/local/model-catalog.js +58 -0
package/esm/src/provider/model-registry.d.ts +14 -0
package/esm/src/provider/model-registry.d.ts.map +1 -1
package/esm/src/provider/model-registry.js +58 -2
package/esm/src/proxy/main.js +34 -6
package/esm/src/proxy/server-resolver.d.ts +23 -0
package/esm/src/proxy/server-resolver.d.ts.map +1 -0
package/esm/src/proxy/server-resolver.js +124 -0
package/esm/src/react/components/ai/chat/components/inference-badge.d.ts +8 -0
package/esm/src/react/components/ai/chat/components/inference-badge.d.ts.map +1 -0
package/esm/src/react/components/ai/chat/components/inference-badge.js +36 -0
package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts +7 -0
package/esm/src/react/components/ai/chat/components/upgrade-cta.d.ts.map +1 -0
package/esm/src/react/components/ai/chat/components/upgrade-cta.js +33 -0
package/esm/src/react/components/ai/chat/index.d.ts +7 -1
package/esm/src/react/components/ai/chat/index.d.ts.map +1 -1
package/esm/src/react/components/ai/chat/index.js +16 -4
package/esm/src/sandbox/index.d.ts +31 -0
package/esm/src/sandbox/index.d.ts.map +1 -0
package/esm/src/sandbox/index.js +30 -0
package/esm/src/sandbox/sandbox.d.ts +48 -0
package/esm/src/sandbox/sandbox.d.ts.map +1 -0
package/esm/src/sandbox/sandbox.js +178 -0
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.d.ts.map +1 -1
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.js +8 -2
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts +1 -0
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.d.ts.map +1 -1
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/index.js +1 -0
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.d.ts.map +1 -1
package/esm/src/transforms/pipeline/stages/ssr-vf-modules/transform.js +15 -1
package/package.json +8 -1
package/src/cli/app/data/slug-words.ts +225 -90
package/src/cli/app/operations/project-creation.ts +3 -3
package/src/cli/app/shell.ts +1 -1
package/src/cli/app/utils.ts +0 -30
package/src/cli/app/views/dashboard.ts +27 -4
package/src/cli/auth/callback-server.ts +3 -2
package/src/cli/commands/dev/handler.ts +2 -0
package/src/cli/commands/init/init-command.ts +30 -3
package/src/cli/commands/init/interactive-wizard.ts +62 -34
package/src/cli/mcp/remote-file-tools.ts +50 -15
package/src/cli/shared/reserve-slug.ts +9 -2
package/src/cli/utils/env-prompt.ts +3 -0
package/src/deno.js +11 -4
package/src/src/agent/chat-handler.ts +57 -4
package/src/src/agent/react/index.ts +2 -0
package/src/src/agent/react/use-chat/browser-inference/browser-engine.ts +81 -0
package/src/src/agent/react/use-chat/browser-inference/types.ts +52 -0
package/src/src/agent/react/use-chat/browser-inference/worker-client.ts +89 -0
package/src/src/agent/react/use-chat/browser-inference/worker-script.ts +98 -0
package/src/src/agent/react/use-chat/index.ts +2 -0
package/src/src/agent/react/use-chat/types.ts +20 -0
package/src/src/agent/react/use-chat/use-chat.ts +148 -8
package/src/src/agent/runtime/index.ts +72 -6
package/src/src/build/production-build/templates.ts +2 -68
package/src/src/chat/index.ts +2 -0
package/src/src/errors/veryfront-error.ts +2 -1
package/src/src/platform/adapters/runtime/deno/adapter.ts +25 -3
package/src/src/platform/compat/http/deno-server.ts +28 -1
package/src/src/provider/index.ts +1 -0
package/src/src/provider/local/ai-sdk-adapter.ts +207 -0
package/src/src/provider/local/env.ts +26 -0
package/src/src/provider/local/local-engine.ts +288 -0
package/src/src/provider/local/model-catalog.ts +73 -0
package/src/src/provider/model-registry.ts +66 -2
package/src/src/proxy/main.ts +41 -6
package/src/src/proxy/server-resolver.ts +151 -0
package/src/src/react/components/ai/chat/components/inference-badge.tsx +48 -0
package/src/src/react/components/ai/chat/components/upgrade-cta.tsx +56 -0
package/src/src/react/components/ai/chat/index.tsx +43 -6
package/src/src/sandbox/index.ts +32 -0
package/src/src/sandbox/sandbox.ts +236 -0
package/src/src/transforms/pipeline/stages/ssr-vf-modules/import-finder.ts +9 -2
package/src/src/transforms/pipeline/stages/ssr-vf-modules/index.ts +1 -0
package/src/src/transforms/pipeline/stages/ssr-vf-modules/transform.ts +17 -0

package/esm/src/provider/local/local-engine.d.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * Local Model Engine
+ *
+ * Singleton wrapper around `@huggingface/transformers` for server-side
+ * local LLM inference. Provides lazy model loading and streaming text
+ * generation via async generators.
+ *
+ * Uses ONNX Runtime for inference with q4 quantization — NOT q4f16
+ * due to a known ONNX bug with f16 LayerNorm on CPU.
+ *
+ * @module provider/local
+ */
+/** Chat message format expected by Transformers.js */
+export interface ChatMessage {
+    role: "system" | "user" | "assistant";
+    content: string;
+}
+/** Options for text generation */
+export interface GenerateOptions {
+    maxNewTokens?: number;
+    temperature?: number;
+    topP?: number;
+    topK?: number;
+    stopSequences?: string[];
+}
+/**
+ * Eagerly verify that the local AI runtime (@huggingface/transformers + ONNX)
+ * is available by loading the default model pipeline.
+ *
+ * Call this *before* creating the HTTP response stream so that failures surface
+ * as a thrown error (→ 503) rather than being swallowed inside a ReadableStream
+ * (→ 200 with in-band SSE error).
+ *
+ * In compiled binaries, `import("@huggingface/transformers")` itself fails
+ * because `onnxruntime-node` eagerly `require()`s a native `.node` addon at
+ * import time and the addon isn't embedded in the binary.  In dev mode (Deno)
+ * the native addon exists on disk so the import succeeds, but `pipeline()` can
+ * still fail if the ONNX model files are missing.  Either way this function
+ * surfaces the error before the response stream is created.  The pipeline is
+ * cached after the first successful call, so subsequent checks are instant.
+ */
+export declare function verifyLocalRuntime(modelId?: string): Promise<void>;
+/**
+ * Generate text in a streaming fashion using an async generator.
+ *
+ * Yields individual tokens as they are generated by the model.
+ */
+export declare function generateStream(modelId: string, messages: ChatMessage[], options?: GenerateOptions): AsyncGenerator<string, void, undefined>;
+/**
+ * Generate text without streaming (full completion).
+ */
+export declare function generate(modelId: string, messages: ChatMessage[], options?: GenerateOptions): Promise<string>;
+/**
+ * Preload a model into memory. Useful for warming up on server start.
+ */
+export declare function preloadModel(modelId: string): Promise<void>;
+/**
+ * Check if a model is currently loaded in memory.
+ */
+export declare function isModelLoaded(modelId: string): boolean;
+//# sourceMappingURL=local-engine.d.ts.map

package/esm/src/provider/local/local-engine.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"local-engine.d.ts","sourceRoot":"","sources":["../../../../src/src/provider/local/local-engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AASH,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,kCAAkC;AAClC,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AA6HD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGxE;AAED;;;;GAIG;AACH,wBAAuB,cAAc,CACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAoEzC;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC,CAMjB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAGtD"}

package/esm/src/provider/local/local-engine.js ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Local Model Engine
+ *
+ * Singleton wrapper around `@huggingface/transformers` for server-side
+ * local LLM inference. Provides lazy model loading and streaming text
+ * generation via async generators.
+ *
+ * Uses ONNX Runtime for inference with q4 quantization — NOT q4f16
+ * due to a known ONNX bug with f16 LayerNorm on CPU.
+ *
+ * @module provider/local
+ */
+import { serverLogger } from "../../utils/index.js";
+import { createError, toError } from "../../errors/veryfront-error.js";
+import { DEFAULT_LOCAL_MODEL, resolveLocalModel } from "./model-catalog.js";
+import { isLocalAIDisabled } from "./env.js";
+const logger = serverLogger.component("local-llm");
+/** Cached pipeline instances keyed by HuggingFace model ID */
+const pipelineCache = new Map();
+/** Whether a model is currently being loaded (prevents concurrent loads) */
+const loadingLocks = new Map();
+/** Lazily loaded @huggingface/transformers module */
+let transformersModule = null;
+/**
+ * Lazily import @huggingface/transformers.
+ * Only loads when actually needed, keeping startup fast when API keys are present.
+ */
+async function getTransformers() {
+    if (transformersModule)
+        return transformersModule;
+    if (isLocalAIDisabled()) {
+        throw toError(createError({
+            type: "no_ai_available",
+            message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
+        }));
+    }
+    logger.info("Loading @huggingface/transformers...");
+    try {
+        transformersModule = await import("@huggingface/transformers");
+    }
+    catch {
+        throw toError(createError({
+            type: "no_ai_available",
+            message: "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
+                "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
+                "in your .env file to use a cloud provider instead.",
+        }));
+    }
+    // Configure cache directory for model files
+    transformersModule.env.cacheDir = "./.cache/models";
+    // Disable browser-specific features in Node/Deno
+    transformersModule.env.useBrowserCache = false;
+    return transformersModule;
+}
+/**
+ * Load a text-generation pipeline for the given model.
+ * Returns a cached pipeline if already loaded.
+ */
+async function loadPipeline(modelInfo) {
+    const cacheKey = modelInfo.hfId;
+    // Return cached pipeline
+    const cached = pipelineCache.get(cacheKey);
+    if (cached)
+        return cached;
+    // Wait for existing load if in progress
+    const existingLock = loadingLocks.get(cacheKey);
+    if (existingLock)
+        return existingLock;
+    // Start loading
+    const loadPromise = (async () => {
+        const transformers = await getTransformers();
+        logger.info(`Loading local model: ${modelInfo.hfId} (${modelInfo.dtype}, ~${modelInfo.sizeMB}MB)...`);
+        const pipe = await transformers.pipeline("text-generation", modelInfo.hfId, {
+            dtype: modelInfo.dtype,
+            device: "cpu",
+        });
+        logger.info(`Model loaded: ${modelInfo.hfId}`);
+        pipelineCache.set(cacheKey, pipe);
+        loadingLocks.delete(cacheKey);
+        return pipe;
+    })();
+    loadingLocks.set(cacheKey, loadPromise);
+    try {
+        return await loadPromise;
+    }
+    catch (error) {
+        loadingLocks.delete(cacheKey);
+        // Convert ONNX / native-addon errors to no_ai_available so they propagate
+        // correctly through the chat handler (503) instead of being swallowed as
+        // in-band SSE errors inside a 200 response stream.
+        const msg = error instanceof Error ? error.message : String(error);
+        if (msg.includes("onnx") || msg.includes("ONNX") ||
+            msg.includes("dlopen") || msg.includes("dynamic linking") ||
+            msg.includes("native module") || msg.includes("SharedArrayBuffer")) {
+            transformersModule = null;
+            throw toError(createError({
+                type: "no_ai_available",
+                message: "Local AI model unavailable — native ONNX Runtime is not supported in this environment " +
+                    "(e.g. compiled binaries). Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY " +
+                    "in your .env file to use a cloud provider instead.",
+            }));
+        }
+        throw error;
+    }
+}
+/**
+ * Eagerly verify that the local AI runtime (@huggingface/transformers + ONNX)
+ * is available by loading the default model pipeline.
+ *
+ * Call this *before* creating the HTTP response stream so that failures surface
+ * as a thrown error (→ 503) rather than being swallowed inside a ReadableStream
+ * (→ 200 with in-band SSE error).
+ *
+ * In compiled binaries, `import("@huggingface/transformers")` itself fails
+ * because `onnxruntime-node` eagerly `require()`s a native `.node` addon at
+ * import time and the addon isn't embedded in the binary.  In dev mode (Deno)
+ * the native addon exists on disk so the import succeeds, but `pipeline()` can
+ * still fail if the ONNX model files are missing.  Either way this function
+ * surfaces the error before the response stream is created.  The pipeline is
+ * cached after the first successful call, so subsequent checks are instant.
+ */
+export async function verifyLocalRuntime(modelId) {
+    const modelInfo = resolveLocalModel(modelId || DEFAULT_LOCAL_MODEL);
+    await loadPipeline(modelInfo);
+}
+/**
+ * Generate text in a streaming fashion using an async generator.
+ *
+ * Yields individual tokens as they are generated by the model.
+ */
+export async function* generateStream(modelId, messages, options = {}) {
+    const modelInfo = resolveLocalModel(modelId);
+    const pipe = await loadPipeline(modelInfo);
+    const transformers = await getTransformers();
+    const { maxNewTokens = 512, temperature = 0.7, topP, topK, } = options;
+    // Use a queue to bridge TextStreamer callbacks → async generator
+    const tokenQueue = [];
+    let resolveWaiting = null;
+    let done = false;
+    const streamer = new transformers.TextStreamer(pipe.tokenizer, {
+        skip_prompt: true,
+        skip_special_tokens: true,
+        callback_function: (text) => {
+            tokenQueue.push(text);
+            if (resolveWaiting) {
+                resolveWaiting();
+                resolveWaiting = null;
+            }
+        },
+    });
+    // Start generation in the background
+    const generatePromise = pipe(messages, {
+        max_new_tokens: maxNewTokens,
+        temperature,
+        top_p: topP,
+        top_k: topK,
+        do_sample: temperature > 0,
+        streamer,
+    }).then(() => {
+        done = true;
+        if (resolveWaiting) {
+            resolveWaiting();
+            resolveWaiting = null;
+        }
+    }).catch((error) => {
+        done = true;
+        if (resolveWaiting) {
+            resolveWaiting();
+            resolveWaiting = null;
+        }
+        throw error;
+    });
+    // Yield tokens as they arrive
+    while (true) {
+        while (tokenQueue.length > 0) {
+            yield tokenQueue.shift();
+        }
+        if (done)
+            break;
+        // Wait for more tokens
+        await new Promise((resolve) => {
+            resolveWaiting = resolve;
+        });
+    }
+    // Ensure generation has completed
+    await generatePromise;
+}
+/**
+ * Generate text without streaming (full completion).
+ */
+export async function generate(modelId, messages, options = {}) {
+    const chunks = [];
+    for await (const token of generateStream(modelId, messages, options)) {
+        chunks.push(token);
+    }
+    return chunks.join("");
+}
+/**
+ * Preload a model into memory. Useful for warming up on server start.
+ */
+export async function preloadModel(modelId) {
+    const modelInfo = resolveLocalModel(modelId);
+    await loadPipeline(modelInfo);
+}
+/**
+ * Check if a model is currently loaded in memory.
+ */
+export function isModelLoaded(modelId) {
+    const modelInfo = resolveLocalModel(modelId);
+    return pipelineCache.has(modelInfo.hfId);
+}

package/esm/src/provider/local/model-catalog.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Local Model Catalog
+ *
+ * Maps friendly model IDs to HuggingFace model repository IDs.
+ * Used by the local inference engine to resolve model names.
+ *
+ * @module provider/local
+ */
+export interface ModelInfo {
+    /** HuggingFace model repository ID */
+    hfId: string;
+    /** Quantization dtype for ONNX Runtime */
+    dtype: "q4" | "q8" | "fp32";
+    /** Approximate download size in MB */
+    sizeMB: number;
+    /** Human-readable description */
+    description: string;
+}
+/** Default model used when no specific model ID is provided */
+export declare const DEFAULT_LOCAL_MODEL = "smollm2-135m";
+/**
+ * Resolve a friendly model ID to its HuggingFace model info.
+ * Falls back to treating the ID as a raw HuggingFace repository ID.
+ */
+export declare function resolveLocalModel(modelId: string): ModelInfo;
+/**
+ * Get all available local model IDs.
+ */
+export declare function getLocalModelIds(): string[];
+//# sourceMappingURL=model-catalog.d.ts.map

package/esm/src/provider/local/model-catalog.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"model-catalog.d.ts","sourceRoot":"","sources":["../../../../src/src/provider/local/model-catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,SAAS;IACxB,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,0CAA0C;IAC1C,KAAK,EAAE,IAAI,GAAG,IAAI,GAAG,MAAM,CAAC;IAC5B,sCAAsC;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;CACrB;AA6BD,+DAA+D;AAC/D,eAAO,MAAM,mBAAmB,iBAAiB,CAAC;AAElD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,CAW5D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,EAAE,CAE3C"}

package/esm/src/provider/local/model-catalog.js ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * Local Model Catalog
+ *
+ * Maps friendly model IDs to HuggingFace model repository IDs.
+ * Used by the local inference engine to resolve model names.
+ *
+ * @module provider/local
+ */
+/**
+ * Catalog of supported local models.
+ *
+ * **Important:** Only `q4` quantization is used — `q4f16` has a known
+ * ONNX Runtime bug with LayerNorm on CPU that produces NaN outputs.
+ */
+const MODEL_CATALOG = {
+    "smollm2-135m": {
+        hfId: "HuggingFaceTB/SmolLM2-135M-Instruct",
+        dtype: "q4",
+        sizeMB: 100,
+        description: "SmolLM2 135M — fast, lightweight chat model",
+    },
+    "smollm2-360m": {
+        hfId: "HuggingFaceTB/SmolLM2-360M-Instruct",
+        dtype: "q4",
+        sizeMB: 250,
+        description: "SmolLM2 360M — better quality, still fast",
+    },
+    "smollm2-1.7b": {
+        hfId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+        dtype: "q4",
+        sizeMB: 1000,
+        description: "SmolLM2 1.7B — highest quality local model",
+    },
+};
+/** Default model used when no specific model ID is provided */
+export const DEFAULT_LOCAL_MODEL = "smollm2-135m";
+/**
+ * Resolve a friendly model ID to its HuggingFace model info.
+ * Falls back to treating the ID as a raw HuggingFace repository ID.
+ */
+export function resolveLocalModel(modelId) {
+    const catalogEntry = MODEL_CATALOG[modelId];
+    if (catalogEntry)
+        return catalogEntry;
+    // Treat as raw HuggingFace model ID (e.g. "HuggingFaceTB/SmolLM2-135M-Instruct")
+    return {
+        hfId: modelId,
+        dtype: "q4",
+        sizeMB: 0,
+        description: `Custom model: ${modelId}`,
+    };
+}
+/**
+ * Get all available local model IDs.
+ */
+export function getLocalModelIds() {
+    return Object.keys(MODEL_CATALOG);
+}

package/esm/src/provider/model-registry.d.ts CHANGED Viewed

@@ -43,6 +43,20 @@ export declare function hasModelProvider(name: string): boolean;
  * Get list of registered model provider names (project-scoped + shared).
  */
 export declare function getRegisteredModelProviders(): string[];
+/**
+ * Eagerly verify that the resolved model's runtime is available.
+ *
+ * For real local-engine models (created by `createLocalModel()`) this
+ * eagerly loads the ONNX pipeline to surface `no_ai_available` errors
+ * **before** the HTTP response stream is created. Must happen before the
+ * ReadableStream so the chat handler can return a proper 503 (with
+ * browser-fallback info) rather than a 200 with an in-band SSE error.
+ *
+ * Uses the `_isVfLocalModel` marker set by `createLocalModel()` to
+ * distinguish real local-engine models from mock/custom providers that
+ * happen to use `provider: "local"`.
+ */
+export declare function ensureModelReady(model: LanguageModel): Promise<void>;
 /**
  * Clear all registered model providers (for testing).
  */

package/esm/src/provider/model-registry.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../../../src/src/provider/model-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;~~AAYxC~~,MAAM,MAAM,oBAAoB,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,aAAa,CAAC;AAOtE;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAC5B,IAAI,CAEN;~~AA0ED~~;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,aAAa,~~CAuC~~/D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,EAAE,CAGtD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAG1C"}
1	+ {"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../../../src/src/provider/model-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAmBxC,MAAM,MAAM,oBAAoB,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,aAAa,CAAC;AAOtE;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAC5B,IAAI,CAEN;AAoFD;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,aAAa,CA+D/D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,EAAE,CAGtD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,gBAAgB,CACpC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,IAAI,CAAC,CAMf;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAG1C"}

package/esm/src/provider/model-registry.js CHANGED Viewed

@@ -12,12 +12,18 @@
  *
  * @module
  */
-import { createError, toError } from "../errors/veryfront-error.js";
+import { createError, fromError, toError } from "../errors/veryfront-error.js";
 import { createOpenAI } from "@ai-sdk/openai";
 import { createAnthropic } from "@ai-sdk/anthropic";
 import { createGoogleGenerativeAI } from "@ai-sdk/google";
 import { getAnthropicEnvConfig, getGoogleGenAIEnvConfig, getOpenAIEnvConfig, } from "../config/env.js";
 import { ProjectScopedRegistryManager } from "../ai/registry-manager.js";
+import { serverLogger } from "../utils/index.js";
+import { DEFAULT_LOCAL_MODEL } from "./local/model-catalog.js";
+import { createLocalModel } from "./local/ai-sdk-adapter.js";
+import { isLocalAIDisabled } from "./local/env.js";
+import { verifyLocalRuntime } from "./local/local-engine.js";
+const localLogger = serverLogger.component("local-llm");
 const manager = new ProjectScopedRegistryManager("model-provider");
 let autoInitialized = false;
 /**
@@ -88,6 +94,15 @@ function autoInitializeFromEnv() {
             return createGoogleGenerativeAI({ apiKey: config.apiKey })(id);
         });
     }
+    // Register the local provider (always available, no API key needed).
+    // createLocalModel is a lightweight synchronous constructor — the actual
+    // @huggingface/transformers import and model loading happen lazily on
+    // the first doGenerate/doStream call, so this doesn't add startup overhead.
+    if (!manager.has("local")) {
+        manager.registerShared("local", (id) => {
+            return createLocalModel(id);
+        });
+    }
 }
 /**
  * Resolve a "provider/model" string to an AI SDK LanguageModel instance.
@@ -124,7 +139,27 @@ export function resolveModel(modelString) {
             message: `Model provider "${providerName}" not registered. Available: ${available}`,
         }));
     }
-    return factory(modelId);
+    try {
+        return factory(modelId);
+    }
+    catch (error) {
+        // Auto-fallback: when a cloud provider fails due to missing API key,
+        // transparently switch to the local model so chat works out of the box.
+        const errorData = fromError(error);
+        if (errorData?.type === "config" && providerName !== "local" && manager.has("local")) {
+            // Check if local AI is explicitly disabled (e.g., for testing)
+            if (isLocalAIDisabled()) {
+                throw toError(createError({
+                    type: "no_ai_available",
+                    message: "Local AI disabled via VERYFRONT_DISABLE_LOCAL_AI environment variable.",
+                }));
+            }
+            localLogger.info(`⚡ "${providerName}" unavailable (missing API key). Falling back to local model.`);
+            const localFactory = manager.get("local");
+            return localFactory(DEFAULT_LOCAL_MODEL);
+        }
+        throw error;
+    }
 }
 /**
  * Check if a model provider is registered (project-scoped or shared).
@@ -140,6 +175,27 @@ export function getRegisteredModelProviders() {
     autoInitializeFromEnv();
     return manager.getAllIds();
 }
+/**
+ * Eagerly verify that the resolved model's runtime is available.
+ *
+ * For real local-engine models (created by `createLocalModel()`) this
+ * eagerly loads the ONNX pipeline to surface `no_ai_available` errors
+ * **before** the HTTP response stream is created. Must happen before the
+ * ReadableStream so the chat handler can return a proper 503 (with
+ * browser-fallback info) rather than a 200 with an in-band SSE error.
+ *
+ * Uses the `_isVfLocalModel` marker set by `createLocalModel()` to
+ * distinguish real local-engine models from mock/custom providers that
+ * happen to use `provider: "local"`.
+ */
+export async function ensureModelReady(model) {
+    const m = model;
+    if (!m._isVfLocalModel)
+        return;
+    // modelId is "local/<id>" — strip the prefix to get the catalog id.
+    const catalogId = typeof m.modelId === "string" ? m.modelId.replace(/^local\//, "") : undefined;
+    await verifyLocalRuntime(catalogId);
+}
 /**
  * Clear all registered model providers (for testing).
  */

package/esm/src/proxy/main.js CHANGED Viewed

@@ -15,6 +15,9 @@
  * - LOCAL_PROJECTS: JSON map of slug → filesystem path (for dev)
  * - CACHE_TYPE: "memory" (default) or "redis"
  * - REDIS_URL: Redis connection URL (required if CACHE_TYPE=redis)
+ * - VERYFRONT_API_INTERNAL_URL: API URL for internal endpoints (falls back to VERYFRONT_PROXY_API_BASE_URL)
+ * - VERYFRONT_API_INTERNAL_USER: Basic auth user for internal API
+ * - VERYFRONT_API_INTERNAL_PASS: Basic auth pass for internal API
  */
 import * as dntShim from "../../_dnt.shims.js";
 import { createProxyHandler, INTERNAL_PROXY_HEADERS } from "./handler.js";
@@ -24,6 +27,7 @@ import { endSpan, extractContext, initializeOTLPWithApis, injectContext, ProxySp
 import { proxyLogger, runWithProxyRequestContext } from "./logger.js";
 import { ErrorPages } from "../server/utils/error-html.js";
 import { RendererRouter } from "./renderer-router.js";
+import { ServerResolver } from "./server-resolver.js";
 import { parseProjectDomain } from "../server/utils/domain-parser.js";
 import { exit, getEnv, onSignal } from "../platform/compat/process.js";
 import { createHttpServer, upgradeWebSocket } from "../platform/compat/http/index.js";
@@ -60,6 +64,11 @@ const staticTargets = getEnv("VERYFRONT_SERVER_TARGETS");
 const rendererRouter = (discoveryHost || staticTargets)
     ? new RendererRouter(discoveryHost || "static-targets", PRODUCTION_SERVER_URL, parseInt(getEnv("VERYFRONT_SERVER_DISCOVERY_INTERVAL_MS") || "15000") || 15000)
     : null;
+// Dedicated server resolver: routes environments to their dedicated server if assigned
+const apiInternalUrl = getEnv("VERYFRONT_API_INTERNAL_URL") || config.apiBaseUrl;
+const apiInternalUser = getEnv("VERYFRONT_API_INTERNAL_USER") || "";
+const apiInternalPass = getEnv("VERYFRONT_API_INTERNAL_PASS") || "";
+const serverResolver = new ServerResolver(apiInternalUrl, apiInternalUser, apiInternalPass);
 const { hostname: HOST, port: PORT } = resolveProxyBinding();
 const WS_CONNECT_TIMEOUT_MS = 30000;
 // Timeout for forwarding requests to production server (SSR can take time on cold start)
@@ -288,9 +297,16 @@ function forwardToServer(req) {
                 const isIdempotent = ["GET", "HEAD", "OPTIONS"].includes(req.method);
                 const maxRetries = isIdempotent ? VERYFRONT_SERVER_RETRY_COUNT : 0;
                 let lastError = null;
+                // After a retryable connection error to a dedicated server, fall back to shared pool
+                let skipDedicated = false;
                 for (let attempt = 0; attempt <= maxRetries; attempt++) {
-                    // Re-resolve on each attempt so retries can pick a different pod
-                    const baseUrl = rendererRouter?.resolve(ctx.projectSlug) ?? PRODUCTION_SERVER_URL;
+                    // Resolve dedicated server per attempt so retries can fall back to shared pool
+                    const dedicatedServerUrl = skipDedicated
+                        ? null
+                        : await serverResolver.resolve(ctx.environmentId);
+                    const baseUrl = dedicatedServerUrl ??
+                        rendererRouter?.resolve(ctx.projectSlug) ??
+                        PRODUCTION_SERVER_URL;
                     const serverUrl = new URL(url.pathname + url.search, baseUrl);
                     // Delay before retry (not on first attempt)
                     if (attempt > 0) {
@@ -352,10 +368,21 @@ function forwardToServer(req) {
                         }
                         // Check if this is a retryable error and we have retries left
                         if (isRetryableConnectionError(error) && attempt < maxRetries) {
-                            proxyLogger.warn(`[Retry] Retryable connection error on attempt ${attempt + 1}`, {
-                                pathname: url.pathname,
-                                error: error instanceof Error ? error.message : String(error),
-                            });
+                            // If we were targeting a dedicated server, fall back to shared pool on retry
+                            if (dedicatedServerUrl) {
+                                skipDedicated = true;
+                                proxyLogger.warn(`[Retry] Dedicated server unreachable, falling back to shared pool`, {
+                                    pathname: url.pathname,
+                                    dedicatedServerUrl,
+                                    error: error instanceof Error ? error.message : String(error),
+                                });
+                            }
+                            else {
+                                proxyLogger.warn(`[Retry] Retryable connection error on attempt ${attempt + 1}`, {
+                                    pathname: url.pathname,
+                                    error: error instanceof Error ? error.message : String(error),
+                                });
+                            }
                             continue; // Try again
                         }
                         // No more retries or non-retryable error
@@ -459,6 +486,7 @@ function router(req) {
 async function shutdown() {
     proxyLogger.info("Shutting down");
     rendererRouter?.close();
+    serverResolver.close();
     await proxyHandler.close();
     await shutdownOTLP();
     proxyLogger.info("Closed connections");

package/esm/src/proxy/server-resolver.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+export declare class ServerResolver {
+    private apiInternalUrl;
+    private apiUser;
+    private apiPass;
+    private cacheTtlMs;
+    private cache;
+    private pending;
+    private cleanupTimer;
+    constructor(apiInternalUrl: string, apiUser: string, apiPass: string, cacheTtlMs?: number);
+    /**
+     * Resolve an environment ID to a dedicated server URL, or null for shared pool.
+     */
+    resolve(environmentId: string | undefined): Promise<string | null>;
+    close(): void;
+    /**
+     * Fetch dedicated server from API.
+     * Returns DedicatedServer | null on success (null = no dedicated server assigned).
+     * Throws ServerResolverError on transient failures (network, non-OK status).
+     */
+    private fetchServer;
+    private cleanup;
+}
+//# sourceMappingURL=server-resolver.d.ts.map

package/esm/src/proxy/server-resolver.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"server-resolver.d.ts","sourceRoot":"","sources":["../../../src/src/proxy/server-resolver.ts"],"names":[],"mappings":"AAoCA,qBAAa,cAAc;IAMvB,OAAO,CAAC,cAAc;IACtB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,UAAU;IARpB,OAAO,CAAC,KAAK,CAAiC;IAC9C,OAAO,CAAC,OAAO,CAAsD;IACrE,OAAO,CAAC,YAAY,CAAuD;gBAGjE,cAAc,EAAE,MAAM,EACtB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,UAAU,GAAE,MAAe;IAQrC;;OAEG;IACG,OAAO,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAwCxE,KAAK,IAAI,IAAI;IAQb;;;;OAIG;YACW,WAAW;IAmCzB,OAAO,CAAC,OAAO;CAMhB"}