npm - @webmcp-auto-ui/agent - Versions diffs - 2.5.27 → 2.5.28 - Mend

@webmcp-auto-ui/agent 2.5.27 → 2.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/package.json +10 -2
package/src/autoui-server.ts +63 -75
package/src/index.ts +7 -2
package/src/loop.ts +48 -21
package/src/providers/factory.ts +15 -1
package/src/providers/hawk-models.ts +22 -0
package/src/providers/hawk.ts +181 -0
package/src/providers/transformers.worker.ts +5 -32
package/src/recipes/_generated.ts +81 -17
package/src/recipes/notebook-playbook.md +81 -17
package/src/server/hawkProxy.ts +54 -0
package/src/server/index.ts +2 -0
package/src/util/opfs-cache.ts +101 -2
package/src/util/storage-inventory.ts +195 -0
package/src/notebook-widgets/compact.ts +0 -312
package/src/notebook-widgets/document.ts +0 -372
package/src/notebook-widgets/editorial.ts +0 -348
package/src/notebook-widgets/recipes/compact.md +0 -104
package/src/notebook-widgets/recipes/document.md +0 -100
package/src/notebook-widgets/recipes/editorial.md +0 -104
package/src/notebook-widgets/recipes/workspace.md +0 -94
package/src/notebook-widgets/shared.ts +0 -1064
package/src/notebook-widgets/workspace.ts +0 -328

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@webmcp-auto-ui/agent",
-  "version": "2.5.27",
+  "version": "2.5.28",
   "description": "LLM agent loop + remote/WASM/local providers + MCP wrapper",
   "license": "AGPL-3.0-or-later",
   "type": "module",
@@ -11,7 +11,7 @@
       "import": "./src/index.ts"
     },
     "./server": {
-      "import": "./src/server/llmProxy.ts"
+      "import": "./src/server/index.ts"
     }
   },
   "scripts": {
@@ -27,5 +27,13 @@
     "@webmcp-auto-ui/core": "file:../core",
     "onnxruntime-web": "^1.24.3",
     "typescript": "^5.0.0"
+  },
+  "peerDependencies": {
+    "vega-embed": "^6.24.0"
+  },
+  "peerDependenciesMeta": {
+    "vega-embed": {
+      "optional": true
+    }
   }
 }

package/src/autoui-server.ts CHANGED Viewed

@@ -5,21 +5,64 @@
 import { createWebMcpServer, parseFrontmatter } from '@webmcp-auto-ui/core';
 import { RAW_RECIPES } from './recipes/_generated.js';
-// Notebook widget recipes (vanilla renderers)
+// Notebook widget recipes (vanilla renderers) — moved to @webmcp-auto-ui/ui
 // @ts-ignore — Vite raw imports, not resolved by tsc
-import compactRecipe from './notebook-widgets/recipes/compact.md?raw';
+import compactRecipe from '@webmcp-auto-ui/ui/widgets/notebook/recipes/compact.md?raw';
 // @ts-ignore
-import workspaceRecipe from './notebook-widgets/recipes/workspace.md?raw';
+import workspaceRecipe from '@webmcp-auto-ui/ui/widgets/notebook/recipes/workspace.md?raw';
 // @ts-ignore
-import documentRecipe from './notebook-widgets/recipes/document.md?raw';
+import documentRecipe from '@webmcp-auto-ui/ui/widgets/notebook/recipes/document.md?raw';
 // @ts-ignore
-import editorialRecipe from './notebook-widgets/recipes/editorial.md?raw';
+import editorialRecipe from '@webmcp-auto-ui/ui/widgets/notebook/recipes/editorial.md?raw';
+// Notebook widget renderers (vanilla JS) — import via subpath to avoid pulling
+// the .svelte exports of the ui package root through tsc.
+import { render as renderCompact } from '@webmcp-auto-ui/ui/widgets/notebook/compact.js';
+import { render as renderWorkspace } from '@webmcp-auto-ui/ui/widgets/notebook/workspace.js';
+import { render as renderDocument } from '@webmcp-auto-ui/ui/widgets/notebook/document.js';
+import { render as renderEditorial } from '@webmcp-auto-ui/ui/widgets/notebook/editorial.js';
+import { render as renderRecipeBrowser } from '@webmcp-auto-ui/ui/widgets/notebook/recipe-browser.js';
+// Inline recipe for recipe-browser (real vanilla widget)
+const recipeBrowserRecipe = `---
+widget: recipe-browser
+description: Interactive recipe browser with search, kind/tag filters, preview and pick. Use when the user wants to browse, search, or select recipes from connected servers.
+group: rich
+schema:
+  type: object
+  required:
+    - recipes
+  properties:
+    recipes:
+      type: array
+      description: List of Recipe objects (id, name, description, body, servers, ...).
+      items:
+        type: object
+    filters:
+      type: object
+      description: Initial filters
+      properties:
+        q:
+          type: string
+        kind:
+          type: string
+          enum: [all, webmcp, mcp]
+        tags:
+          type: array
+          items:
+            type: string
+    layout:
+      type: string
+      enum: [list, grid]
+      description: Default layout (default list)
+---
+## When to use
+When the user wants to browse, search, or pick a recipe — for example "show me the available recipes" or "let me choose a recipe".
-// Notebook widget renderers (vanilla JS)
-import { render as renderCompact } from './notebook-widgets/compact.js';
-import { render as renderWorkspace } from './notebook-widgets/workspace.js';
-import { render as renderDocument } from './notebook-widgets/document.js';
-import { render as renderEditorial } from './notebook-widgets/editorial.js';
+## How to use
+Call widget_display({name: "recipe-browser", params: {recipes: [...], layout: "list"}}). The widget emits a bubbling 'widget:interact' CustomEvent with detail={action:"pick", payload: recipe} when the user clicks Pick.
+`;
 // ---------------------------------------------------------------------------
 // Inline recipes (frontmatter + body)
@@ -936,77 +979,21 @@ Pour des visualisations custom, animations, ou prototypes interactifs en JS pur.
 Call widget_display({name: "js-sandbox", params: {code: "document.getElementById('root').innerHTML = '<h1>Hello</h1>'"}}).
 `,
-  // ── recipe-browser ──────────────────────────────────────────────────────
-  `---
-widget: recipe-browser
-description: Displays available recipes as interactive cards and allows browsing each recipe's details.
-group: rich
-schema:
-  type: object
-  required:
-    - cards
-  properties:
-    title:
-      type: string
-    cards:
-      type: array
-      items:
-        type: object
-        required:
-          - title
-        properties:
-          title:
-            type: string
-          description:
-            type: string
-          tags:
-            type: array
-            items:
-              type: string
-          meta:
-            type: object
-            properties:
-              recipe_name:
-                type: string
-              server:
-                type: string
-    interactive:
-      type: boolean
----
-## When to use
-Quand l'utilisateur veut voir les recettes disponibles, explorer les possibilites du serveur, ou comprendre comment utiliser un widget specifique.
-## Comment
-### Etape 1 — Lister les recettes
-Appelle search_recipes() sur chaque serveur connecte (MCP et WebMCP) pour obtenir la liste des recettes.
-### Etape 2 — Afficher en cartes interactives
-Utilise widget_display({name: "cards", params: {...}}) avec le parametre interactive: true pour rendre les cartes cliquables :
-widget_display({name: "cards", params: {title: "Recettes disponibles", cards: [{title: "Nom", description: "Description", tags: ["serveur"], meta: {recipe_name: "nom_technique", server: "nom_serveur"}}], interactive: true}})
-Le champ meta est important : il sera renvoye dans l'evenement d'interaction quand l'utilisateur clique sur la carte.
-### Etape 3 — Reagir au clic
-Quand l'utilisateur clique sur une carte, tu recevras un message d'interaction contenant les donnees de meta. Utilise meta.recipe_name et meta.server pour :
-1. Appeler get_recipe(meta.recipe_name) sur le bon serveur
-2. Afficher le contenu dans un widget code avec lang: 'markdown'
-3. Lier les deux widgets : reutiliser le widget detail existant via canvas('update', ...) au lieu d'en creer un nouveau a chaque clic.
-## Common mistakes
-- Ne pas oublier interactive: true dans les cartes — sans ca, les clics ne remontent pas
-- Ne pas creer un nouveau widget detail a chaque clic — reutiliser l'existant via canvas('update', ...)
-- Les recettes MCP et WebMCP ont des noms de serveur differents — utiliser le bon prefixe pour get_recipe()
-`,
 ];
 // ---------------------------------------------------------------------------
 // Native widget names — derived from RECIPES frontmatter
 // ---------------------------------------------------------------------------
-/** Derived from RECIPES frontmatter — always in sync with registered widgets */
-export const NATIVE_WIDGET_NAMES = RECIPES.map(r => {
+/** Derived from RECIPES + notebook widget recipes — always in sync with registered widgets */
+const _NOTEBOOK_RECIPE_SOURCES: string[] = [
+  compactRecipe as string,
+  workspaceRecipe as string,
+  documentRecipe as string,
+  editorialRecipe as string,
+  recipeBrowserRecipe,
+];
+export const NATIVE_WIDGET_NAMES = [...RECIPES, ..._NOTEBOOK_RECIPE_SOURCES].map(r => {
   const match = r.match(/widget:\s*(\S+)/);
   return match ? match[1] : '';
 }).filter(Boolean) as string[];
@@ -1030,6 +1017,7 @@ const NOTEBOOK_WIDGETS: Array<[string, (container: HTMLElement, data: any) => an
   [workspaceRecipe as string, renderWorkspace],
   [documentRecipe as string, renderDocument],
   [editorialRecipe as string, renderEditorial],
+  [recipeBrowserRecipe, renderRecipeBrowser],
 ];
 for (const [recipe, renderer] of NOTEBOOK_WIDGETS) {
   autoui.registerWidget(recipe, renderer as any);

package/src/index.ts CHANGED Viewed

@@ -11,12 +11,17 @@ export { TRANSFORMERS_MODELS, getTransformersModel, listTransformersModels } fro
 export type { TransformersModelEntry, TransformersFamily, ToolCallFormat } from './providers/transformers-models.js';
 export { parseToolCalls } from './prompts/tool-call-parsers.js';
 export type { ParseResult } from './prompts/tool-call-parsers.js';
-export { loadOrDownloadModel, clearModelCache } from './util/opfs-cache.js';
-export type { ModelFileSpec, CacheProgress } from './util/opfs-cache.js';
+export { loadOrDownloadModel, clearModelCache, listCachedModels, clearAllModelCaches, walkDirectoryStats } from './util/opfs-cache.js';
+export type { ModelFileSpec, CacheProgress, CachedModelInfo } from './util/opfs-cache.js';
+export { listAllStorage, deleteStorageEntry, clearAllStorage } from './util/storage-inventory.js';
+export type { StorageEntry, StorageSource } from './util/storage-inventory.js';
 export { buildGemmaPrompt } from './prompts/index.js';
 export type { BuildGemmaPromptInput } from './prompts/index.js';
 export { LocalLLMProvider } from './providers/local.js';
 export type { LocalLLMProviderOptions, LocalBackend } from './providers/local.js';
+export { HawkProvider } from './providers/hawk.js';
+export type { HawkLLMProviderOptions } from './providers/hawk.js';
+export { HAWK_MODELS, listHawkModels, type HawkModelEntry } from './providers/hawk-models.js';
 export { createProvider } from './providers/factory.js';
 export type { LLMConfig } from './providers/factory.js';

package/src/loop.ts CHANGED Viewed

@@ -168,6 +168,9 @@ export async function runAgentLoop(
   // Use local alias maps (parallel-safe — no global singleton)
   const activatedServers = new Set<string>();
   const localAliasMap = new Map<string, string>();
+  // Snapshot pathMaps locally (parallel-safe). Reading the global flattenPathMaps
+  // singleton at dispatch-time races when two loops run concurrently.
+  const localPathMaps = new Map<string, Record<string, string[]>>(flattenPathMaps);
   const trace = new PipelineTrace();
   const disc = buildDiscoveryToolsWithAliases(options.layers ?? [], schemaOptions, trace);
@@ -228,17 +231,24 @@ export async function runAgentLoop(
     // After 5+ iterations without render, inject a nudge message (once)
     // Merge into existing user message if the last message is already role=user (to avoid consecutive user messages)
     if (iterationsWithoutRender >= 5 && !hasRendered && !nudgedOnce) {
-      nudgedOnce = true;
       const nudgeText = 'STOP exploration. Use the data you already collected. Call widget_display() NOW to display results.';
       const lastMsg = messages[messages.length - 1];
-      if (lastMsg && lastMsg.role === 'user') {
-        if (typeof lastMsg.content === 'string') {
-          lastMsg.content = [{ type: 'text', text: lastMsg.content }, { type: 'text', text: nudgeText }];
-        } else if (Array.isArray(lastMsg.content)) {
-          (lastMsg.content as ContentBlock[]).push({ type: 'text', text: nudgeText });
+      // Skip if last turn carries tool_result blocks — mixing raw text with tool_response
+      // in one turn violates Gemma spec §7 (the serializer would emit text + <|tool_response|>
+      // together). Defer the nudge to a later iteration where the turn is pure-user.
+      const lastHasToolResult = lastMsg && Array.isArray(lastMsg.content)
+        && (lastMsg.content as ContentBlock[]).some(b => b.type === 'tool_result');
+      if (!lastHasToolResult) {
+        nudgedOnce = true;
+        if (lastMsg && lastMsg.role === 'user') {
+          if (typeof lastMsg.content === 'string') {
+            lastMsg.content = [{ type: 'text', text: lastMsg.content }, { type: 'text', text: nudgeText }];
+          } else if (Array.isArray(lastMsg.content)) {
+            (lastMsg.content as ContentBlock[]).push({ type: 'text', text: nudgeText });
+          }
+        } else {
+          messages.push({ role: 'user', content: nudgeText });
         }
-      } else {
-        messages.push({ role: 'user', content: nudgeText });
       }
     }
@@ -401,8 +411,11 @@ export async function runAgentLoop(
           const protocol = tokenToProtocol(token);
           // Auto-repair + validate params before dispatch
+          // Resolve from the full activeTools (not iterationTools, which may be filtered
+          // to strip discovery tools after 4 iterations — would make toolDef undefined
+          // and silently skip auto-repair + schema validation).
           let toolInput = block.input as Record<string, unknown>;
-          const toolDef = iterationTools.find(t => t.name === block.name);
+          const toolDef = activeTools.find(t => t.name === block.name);
           if (toolDef?.input_schema) {
             const repair = autoRepairParams(toolInput, toolDef.input_schema, realToolName);
             if (repair.fixes.length > 0) {
@@ -451,8 +464,9 @@ export async function runAgentLoop(
                 result = `Error: no WebMCP server "${serverName}" found.`;
               } else {
                 // Unflatten params if schema was flattened
+                // Use the local snapshot (parallel-safe) rather than the global singleton.
                 if (schemaOptions?.flatten) {
-                  const pathMap = flattenPathMaps.get(block.name);
+                  const pathMap = localPathMaps.get(block.name);
                   if (pathMap) {
                     toolInput = unflattenParams(toolInput, pathMap);
                   }
@@ -638,18 +652,31 @@ export function trimConversationHistory(history: ChatMessage[], maxTokens: numbe
     total -= removed.reduce((s, m) => s + JSON.stringify(m).length, 0);
   }
-  // Remove orphaned tool_result messages at the start — these reference
-  // a tool_use in a message that was trimmed away, causing API errors.
-  while (trimmed.length > 0) {
-    const first = trimmed[0];
-    if (first.role === 'system') break; // preserve system messages at the front
-    const blocks = Array.isArray(first.content) ? first.content : [];
-    const hasToolResult = blocks.some((b: any) => b.type === 'tool_result');
-    if (hasToolResult) {
-      trimmed.shift();
-    } else {
-      break;
+  // Remove orphaned tool_result blocks anywhere in history — strict providers
+  // (Anthropic, etc.) reject tool_result blocks whose tool_use_id does not
+  // correspond to an earlier assistant tool_use. Head-only pruning misses
+  // internal orphans caused by mid-history trims.
+  const validToolUseIds = new Set<string>();
+  for (let i = 0; i < trimmed.length; i++) {
+    const msg = trimmed[i];
+    // Collect tool_use ids from assistant messages seen so far
+    if (msg.role === 'assistant' && Array.isArray(msg.content)) {
+      for (const b of msg.content as any[]) {
+        if (b?.type === 'tool_use' && typeof b.id === 'string') validToolUseIds.add(b.id);
+      }
     }
+    // Filter out orphan tool_result blocks in user messages
+    if (msg.role === 'user' && Array.isArray(msg.content)) {
+      msg.content = (msg.content as any[]).filter(b => {
+        if (b?.type !== 'tool_result') return true;
+        return typeof b.tool_use_id === 'string' && validToolUseIds.has(b.tool_use_id);
+      }) as any;
+    }
+  }
+  // Drop user messages that became empty after orphan-pruning
+  for (let i = trimmed.length - 1; i >= 0; i--) {
+    const c = trimmed[i].content;
+    if (Array.isArray(c) && c.length === 0) trimmed.splice(i, 1);
   }
   // Ensure the first non-system message is role=user (API requirement)

package/src/providers/factory.ts CHANGED Viewed

@@ -3,18 +3,27 @@ import { RemoteLLMProvider } from './remote.js';
 import { WasmProvider } from './wasm.js';
 import { LocalLLMProvider, type LocalBackend } from './local.js';
 import { TransformersProvider } from './transformers.js';
+import { HawkProvider } from './hawk.js';
 export type LLMConfig =
   | { type: 'remote';       model?: RemoteModelId; proxyUrl?: string; apiKey?: string }
   | { type: 'wasm';         model?: WasmModelId;   onProgress?: (loaded: number, total: number) => void }
   | { type: 'transformers'; model: string;         onProgress?: (loaded: number, total: number) => void }
-  | { type: 'local';        model: string;         baseUrl: string; backend?: LocalBackend };
+  | { type: 'local';        model: string;         baseUrl: string; backend?: LocalBackend }
+  | { type: 'hawk';         model: string;         proxyUrl?: string };
 export function createProvider(config: LLMConfig): LLMProvider {
   const base = typeof window !== 'undefined' ? (document.querySelector('base') as HTMLBaseElement | null)?.href ?? '' : '';
   // Prefix-based dispatch: a `transformers-*` model routes to TransformersProvider
   // regardless of the declared type (defensive).
+  if ('model' in config && typeof config.model === 'string' && config.model.startsWith('hawk-')) {
+    return new HawkProvider({
+      proxyUrl: (config as { proxyUrl?: string }).proxyUrl ?? `${base}api/hawk`,
+      model: config.model.slice(5),
+    });
+  }
   if ('model' in config && typeof config.model === 'string' && config.model.startsWith('transformers-')) {
     const onProgress = (config as { onProgress?: (loaded: number, total: number) => void }).onProgress;
     return new TransformersProvider({
@@ -46,5 +55,10 @@ export function createProvider(config: LLMConfig): LLMProvider {
         model: config.model,
         backend: config.backend,
       });
+    case 'hawk':
+      return new HawkProvider({
+        proxyUrl: config.proxyUrl ?? `${base}api/hawk`,
+        model: config.model,
+      });
   }
 }

package/src/providers/hawk-models.ts ADDED Viewed

@@ -0,0 +1,22 @@
+export interface HawkModelEntry {
+  id: string;      // ID Hawk (sans préfixe)
+  label: string;   // Label humain pour le selector
+  tokps?: number;  // Tokens/sec estimés (warm, indicatif)
+}
+export const HAWK_MODELS: HawkModelEntry[] = [
+  { id: 'qwen35-2b',       label: 'Qwen 3.5 2B — 49 tok/s',       tokps: 49 },
+  { id: 'bielik-1.5b-v3',  label: 'Bielik 1.5B — 47 tok/s',       tokps: 47 },
+  { id: 'gemma4-e2b',      label: 'Gemma 4 E2B — 43 tok/s',       tokps: 43 },
+  { id: 'ministral3-3b',   label: 'Ministral 3B — 35 tok/s',      tokps: 35 },
+  { id: 'qwen3-4b',        label: 'Qwen 3 4B — 28 tok/s',         tokps: 28 },
+  { id: 'gemma4-e4b',      label: 'Gemma 4 E4B — 26 tok/s',       tokps: 26 },
+  { id: 'qwen35-4b',       label: 'Qwen 3.5 4B — 23 tok/s',       tokps: 23 },
+  { id: 'qwen36-35b-a3b',  label: 'Qwen 3.6 35B MoE — 22 tok/s',  tokps: 22 },
+  { id: 'gemma4-26b-a4b',  label: 'Gemma 4 26B MoE — 20 tok/s',   tokps: 20 },
+  { id: 'ministral-8b',    label: 'Ministral 8B — 16 tok/s',      tokps: 16 },
+];
+export function listHawkModels(): HawkModelEntry[] {
+  return HAWK_MODELS;
+}

package/src/providers/hawk.ts ADDED Viewed

@@ -0,0 +1,181 @@
+import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, ContentBlock } from '../types.js';
+export interface HawkLLMProviderOptions {
+  proxyUrl: string;   // SvelteKit proxy endpoint, e.g. '/api/hawk'
+  model: string;      // e.g. 'qwen35-2b' (ID Hawk sans préfixe)
+}
+// ── OpenAI-compatible types ─────────────────────────────────────────
+interface OaiTool {
+  type: 'function';
+  function: { name: string; description: string; parameters: Record<string, unknown> };
+}
+interface OaiMessage {
+  role: 'system' | 'user' | 'assistant' | 'tool';
+  content?: string | null;
+  tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[];
+  tool_call_id?: string;
+}
+interface OaiChoice {
+  message: {
+    content?: string | null;
+    tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[];
+  };
+  finish_reason: string;
+}
+// ── Helpers ─────────────────────────────────────────────────────────
+let _counter = 0;
+function hawkId(): string {
+  return 'hawk_' + (++_counter).toString(36) + '_' + Date.now().toString(36);
+}
+function toOaiTools(tools: ProviderTool[]): OaiTool[] {
+  return tools.map(t => ({
+    type: 'function' as const,
+    function: {
+      name: t.name,
+      description: t.description,
+      parameters: t.input_schema,
+    },
+  }));
+}
+function toOaiMessages(messages: ChatMessage[], system?: string): OaiMessage[] {
+  const out: OaiMessage[] = [];
+  if (system) out.push({ role: 'system', content: system });
+  for (const msg of messages) {
+    if (typeof msg.content === 'string') {
+      out.push({ role: msg.role === 'assistant' ? 'assistant' : 'user', content: msg.content });
+      continue;
+    }
+    const blocks = msg.content as ContentBlock[];
+    const textParts = blocks.filter(b => b.type === 'text').map(b => (b as { type: 'text'; text: string }).text);
+    const toolUses = blocks.filter(b => b.type === 'tool_use') as { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }[];
+    const toolResults = blocks.filter(b => b.type === 'tool_result') as { type: 'tool_result'; tool_use_id: string; content: string }[];
+    if (msg.role === 'assistant') {
+      const oai: OaiMessage = { role: 'assistant', content: textParts.join('\n') || null };
+      if (toolUses.length > 0) {
+        oai.tool_calls = toolUses.map(tu => ({
+          id: tu.id,
+          type: 'function' as const,
+          function: { name: tu.name, arguments: JSON.stringify(tu.input) },
+        }));
+      }
+      out.push(oai);
+    } else {
+      // User turn — may contain tool_result blocks (sent back after assistant tool_use)
+      for (const tr of toolResults) {
+        out.push({ role: 'tool', tool_call_id: tr.tool_use_id, content: tr.content });
+      }
+      if (textParts.length > 0) {
+        out.push({ role: 'user', content: textParts.join('\n') });
+      }
+      // If only tool_results and no text, we've already pushed them
+      if (toolResults.length === 0 && textParts.length === 0) {
+        out.push({ role: 'user', content: '' });
+      }
+    }
+  }
+  return out;
+}
+function parseArguments(raw: string): Record<string, unknown> {
+  try { return JSON.parse(raw); } catch { return { _raw: raw }; }
+}
+// ── Provider ────────────────────────────────────────────────────────
+export class HawkProvider implements LLMProvider {
+  readonly name = 'hawk';
+  readonly model: string;
+  private proxyUrl: string;
+  constructor(options: HawkLLMProviderOptions) {
+    this.model = options.model;
+    this.proxyUrl = options.proxyUrl;
+  }
+  async chat(
+    messages: ChatMessage[],
+    tools: ProviderTool[],
+    options?: { signal?: AbortSignal; system?: string; maxTokens?: number; temperature?: number },
+  ): Promise<LLMResponse> {
+    const oaiMessages = toOaiMessages(messages, options?.system);
+    const oaiTools = tools.length > 0 ? toOaiTools(tools) : undefined;
+    // NOTE: `model` is NOT sent in the body — the server proxy injects it
+    // from the X-Model header into the upstream Hawk request.
+    const body: Record<string, unknown> = {
+      messages: oaiMessages,
+      stream: false,
+    };
+    if (oaiTools) body.tools = oaiTools;
+    if (options?.maxTokens) body.max_tokens = options.maxTokens;
+    if (options?.temperature != null) body.temperature = options.temperature;
+    const response = await fetch(this.proxyUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'X-Model': this.model,
+      },
+      body: JSON.stringify(body),
+      signal: options?.signal,
+    });
+    if (!response.ok) {
+      const txt = await response.text().catch(() => '');
+      throw new Error(`Hawk LLM ${response.status}${txt ? ': ' + txt.slice(0, 200) : ''}`);
+    }
+    const data = await response.json() as { choices?: OaiChoice[]; usage?: { prompt_tokens?: number; completion_tokens?: number } };
+    const choice = data.choices?.[0];
+    if (!choice) throw new Error('Hawk LLM returned no choices');
+    const content: ContentBlock[] = [];
+    const toolCalls = choice.message.tool_calls;
+    if (choice.message.content) {
+      content.push({ type: 'text', text: choice.message.content });
+    }
+    if (toolCalls && toolCalls.length > 0) {
+      for (const tc of toolCalls) {
+        content.push({
+          type: 'tool_use',
+          id: tc.id || hawkId(),
+          name: tc.function.name,
+          input: parseArguments(tc.function.arguments),
+        });
+      }
+    }
+    // Ensure at least one block
+    if (content.length === 0) {
+      content.push({ type: 'text', text: '' });
+    }
+    const hasToolUse = content.some(b => b.type === 'tool_use');
+    const stopReason = hasToolUse ? 'tool_use'
+      : choice.finish_reason === 'tool_calls' ? 'tool_use'
+      : 'end_turn';
+    return {
+      content,
+      stopReason,
+      usage: data.usage ? {
+        input_tokens: data.usage.prompt_tokens ?? 0,
+        output_tokens: data.usage.completion_tokens ?? 0,
+      } : undefined,
+    };
+  }
+}

package/src/providers/transformers.worker.ts CHANGED Viewed

@@ -114,28 +114,13 @@ async function parseToolCalls(
 }
 // --------------------------------------------------------------------------
-// OPFS cache — loaded lazily with a best-effort fallback that defers entirely
-// to transformers.js's built-in HF cache (no OPFS intervention on our side).
+// Cache note: transformers.js manages its own cache via Cache Storage API
+// (enabled by `env.useBrowserCache = true` below). No OPFS pre-download from
+// this worker — the generic OPFS helper requires an explicit file list that
+// transformers.js doesn't expose. Progress is surfaced via `progress_callback`
+// in `fromPretrainedOpts`.
 // --------------------------------------------------------------------------
-async function loadOrDownloadModel(
-  _repo: string,
-  _onProgress: (fileProgress: number, totalProgress: number, status: string, loaded?: number, total?: number) => void,
-): Promise<void> {
-  try {
-    // Optional fallback import — module is shipped (../util/opfs-cache.ts);
-    // the try/catch is defensive only, guarding against bundler quirks or
-    // OPFS being unavailable in the worker (older browsers).
-    const mod: any = await import('../util/opfs-cache.js');
-    const fn = mod.loadOrDownloadModel ?? mod.default;
-    if (typeof fn === 'function') return await fn(_repo, _onProgress);
-  } catch {
-    // Import/OPFS unavailable — transformers.js falls back to its internal
-    // HTTP fetch + `caches` API. Progress arrives via from_pretrained's
-    // progress_callback below.
-  }
-}
 // --------------------------------------------------------------------------
 // Helpers
 // --------------------------------------------------------------------------
@@ -229,18 +214,6 @@ async function loadModel(modelEntry: TransformersModelEntry): Promise<void> {
   stoppingCriteria = new InterruptableStoppingCriteria();
-  // Pre-download (OPFS-aware when the cache module is available).
-  await loadOrDownloadModel(modelEntry.repo, (fp, tp, status, loaded, total) => {
-    post({
-      type: 'progress',
-      fileProgress: fp,
-      totalProgress: tp,
-      status,
-      loaded: loaded ?? 0,
-      total: total ?? modelEntry.size,
-    });
-  });
   // Aggregated progress callback — sums loaded/total across every file we see,
   // emitting a monotonic aggregate ratio. Two guards eliminate flicker:
   //   1. Files with total < 1_000_000 bytes are ignored (configs, tokenizers,