npm - @tryhamster/gerbil - Versions diffs - 1.0.0-rc.9 → 1.0.1 - Mend

@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/LICENSE +1 -1
package/README.md +318 -104
package/dist/architectures-C1I5V3Dt.mjs +6070 -0
package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
package/dist/browser/index.d.ts +276 -590
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +592 -2334
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +625 -1098
package/dist/cli.mjs.map +1 -1
package/dist/defaults-9komdrbY.mjs +24 -0
package/dist/defaults-9komdrbY.mjs.map +1 -0
package/dist/frameworks/express.d.mts +1 -3
package/dist/frameworks/express.d.mts.map +1 -1
package/dist/frameworks/express.mjs +7 -7
package/dist/frameworks/express.mjs.map +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.d.mts.map +1 -1
package/dist/frameworks/fastify.mjs +3 -3
package/dist/frameworks/fastify.mjs.map +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.d.mts.map +1 -1
package/dist/frameworks/hono.mjs +4 -4
package/dist/frameworks/hono.mjs.map +1 -1
package/dist/frameworks/next.d.mts +3 -2
package/dist/frameworks/next.d.mts.map +1 -1
package/dist/frameworks/next.mjs +4 -4
package/dist/frameworks/next.mjs.map +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.d.mts.map +1 -1
package/dist/frameworks/trpc.mjs +4 -4
package/dist/frameworks/trpc.mjs.map +1 -1
package/dist/gerbil-BetB5xb0.d.mts +488 -0
package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
package/dist/gerbil-CTZUa8EZ.mjs +4 -0
package/dist/gerbil-DNniplr4.mjs +1656 -0
package/dist/gerbil-DNniplr4.mjs.map +1 -0
package/dist/gpu/hooks.d.mts +640 -0
package/dist/gpu/hooks.d.mts.map +1 -0
package/dist/gpu/hooks.mjs +1369 -0
package/dist/gpu/hooks.mjs.map +1 -0
package/dist/gpu/index.d.mts +2 -0
package/dist/gpu/index.mjs +6 -0
package/dist/gpu-DFuglcEx.mjs +3790 -0
package/dist/gpu-DFuglcEx.mjs.map +1 -0
package/dist/index-Dgmb2kE3.d.mts +245 -0
package/dist/index-Dgmb2kE3.d.mts.map +1 -0
package/dist/index-DukkJRMj.d.mts +2114 -0
package/dist/index-DukkJRMj.d.mts.map +1 -0
package/dist/index.d.mts +22 -487
package/dist/index.d.mts.map +1 -1
package/dist/index.mjs +13 -8
package/dist/index.mjs.map +1 -1
package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
package/dist/integrations/ai-sdk.d.mts +75 -6
package/dist/integrations/ai-sdk.d.mts.map +1 -1
package/dist/integrations/ai-sdk.mjs +131 -15
package/dist/integrations/ai-sdk.mjs.map +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.d.mts.map +1 -1
package/dist/integrations/langchain.mjs +5 -5
package/dist/integrations/langchain.mjs.map +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.d.mts.map +1 -1
package/dist/integrations/llamaindex.mjs +5 -5
package/dist/integrations/llamaindex.mjs.map +1 -1
package/dist/integrations/mcp-client.mjs +3 -3
package/dist/integrations/mcp-client.mjs.map +1 -1
package/dist/integrations/mcp.d.mts +3 -2
package/dist/integrations/mcp.d.mts.map +1 -1
package/dist/integrations/mcp.mjs +5 -5
package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
package/dist/memory/index.d.mts +3 -0
package/dist/memory/index.mjs +6 -0
package/dist/memory-D1P7Tmda.mjs +4 -0
package/dist/memory-DVN0MnIG.mjs +132 -0
package/dist/memory-DVN0MnIG.mjs.map +1 -0
package/dist/memory-Dj0J1v88.mjs +294 -0
package/dist/memory-Dj0J1v88.mjs.map +1 -0
package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
package/dist/repl-BDRkwPGX.mjs +9 -0
package/dist/skills/index.d.mts +270 -320
package/dist/skills/index.d.mts.map +1 -1
package/dist/skills/index.mjs +5 -5
package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
package/dist/skills-CU694Dc8.mjs.map +1 -0
package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
package/dist/tools-DQ1mPUw5.mjs.map +1 -0
package/dist/types-DQBe2lFo.d.mts +165 -0
package/dist/types-DQBe2lFo.d.mts.map +1 -0
package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
package/dist/types-LlyYILII.d.mts.map +1 -0
package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
package/dist/vector-B0panuy6.mjs +95 -0
package/dist/vector-B0panuy6.mjs.map +1 -0
package/docs/PROJECT-STATE.md +321 -0
package/docs/adding-a-model-family.md +280 -0
package/docs/ai-sdk.md +70 -61
package/docs/architecture/overview.md +17 -7
package/docs/browser.md +203 -8
package/docs/embeddings.md +156 -0
package/docs/gerbil-site-native-migration.md +217 -0
package/docs/gpu-engine/architectures.md +398 -0
package/docs/gpu-engine/ir.md +372 -0
package/docs/gpu-engine/kernels.md +718 -0
package/docs/gpu-engine/paper.html +1759 -0
package/docs/gpu-engine/paper.md +2109 -0
package/docs/gpu-engine/safetensors.md +312 -0
package/docs/gpu-engine/tokenizer.md +302 -0
package/docs/memory-rag.md +91 -0
package/docs/metal-safari-intel.md +190 -0
package/docs/mobile-failure-diagnosis.md +124 -0
package/docs/mobile.md +99 -0
package/docs/observability.md +230 -0
package/docs/onnx-removal-plan.md +339 -0
package/docs/research/autoresearch-portable.md +904 -0
package/docs/research/dispatch-reduction-hivemind.md +84 -0
package/docs/research/ios-safari-model-caching.md +117 -0
package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
package/docs/research/native-stt-model-selection.md +49 -0
package/docs/research/native-tts-model-selection.md +90 -0
package/docs/research/native-vs-chromium-decision.md +152 -0
package/docs/research/nemotron-mamba2-inference.md +910 -0
package/docs/research/qwen35-multimodal.md +293 -0
package/docs/research/qwen36-gemma4-targets.md +337 -0
package/docs/research/sota-embedding-models.md +179 -0
package/docs/research/sota-mobile-models-2026.md +263 -0
package/docs/research/sota-modality-models.md +202 -0
package/docs/research/tps-baselines.md +71 -0
package/docs/research/webgpu-m4-reference.md +104 -0
package/docs/site-update-plan.md +155 -0
package/docs/structured-output.md +123 -0
package/docs/stt.md +63 -446
package/docs/tts.md +77 -499
package/docs/vision.md +100 -338
package/package.json +22 -7
package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
package/dist/gerbil-CJ3ifloF.mjs +0 -4
package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
package/dist/gerbil-qOTe1nl2.d.mts +0 -431
package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
package/dist/kokoro-BNTb6egA.mjs +0 -20210
package/dist/kokoro-BNTb6egA.mjs.map +0 -1
package/dist/kokoro-CMOGDSgT.js +0 -20212
package/dist/kokoro-CMOGDSgT.js.map +0 -1
package/dist/mcp-BvbriaBy.mjs.map +0 -1
package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
package/dist/repl-DveXw36T.mjs +0 -9
package/dist/skills-CD3Orlex.mjs.map +0 -1
package/dist/stt-Bu-E23Sc.js +0 -433
package/dist/stt-Bu-E23Sc.js.map +0 -1
package/dist/stt-CpLYbGFd.mjs +0 -433
package/dist/stt-CpLYbGFd.mjs.map +0 -1
package/dist/stt-DRPLEEHB.mjs +0 -3
package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
package/dist/transformers.web-DiD1gTwk.js +0 -44695
package/dist/transformers.web-DiD1gTwk.js.map +0 -1
package/dist/transformers.web-u34VxRFM.js +0 -3
package/dist/tts-CqroPaSK.js +0 -724
package/dist/tts-CqroPaSK.js.map +0 -1
package/dist/tts-DXgsKGCe.mjs +0 -3
package/dist/tts-DeGANMNV.mjs +0 -730
package/dist/tts-DeGANMNV.mjs.map +0 -1
package/dist/types-CiTc7ez3.d.mts.map +0 -1
/package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
/package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
/package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0

package/dist/browser/index.d.ts CHANGED Viewed

@@ -41,8 +41,12 @@ type GenerateOptions = {
   system?: string;
   /** Enable thinking/reasoning mode (Qwen3) */
   thinking?: boolean;
-  /** Callback for each token (streaming) */
-  onToken?: (token: string) => void;
+  /** Callback for each token (streaming); `meta` carries live decode-only tok/s */
+  onToken?: (token: string, meta?: {
+    tokenIndex: number;
+    tps: number;
+    elapsedMs: number;
+  }) => void;
   /** Images to include (only used if model supports vision) */
   images?: ImageInput[];
   /** Enable response caching (default: false) */
@@ -92,16 +96,47 @@ type EmbedResult = {
   /** Time in ms */
   totalTime: number;
 };
+type SearchResult = {
+  /** The matched text */
+  text: string;
+  /** Similarity score (0-1, higher is more similar) */
+  score: number;
+  /** Index in the original corpus */
+  index: number;
+};
+type SimilarityResult = {
+  /** Similarity score (0-1, higher is more similar) */
+  score: number;
+  /** First text */
+  textA: string;
+  /** Second text */
+  textB: string;
+  /** Time in ms */
+  totalTime: number;
+};
 type LoadOptions = {
   /** Progress callback */
   onProgress?: (info: ProgressInfo) => void;
-  /** Device: 'auto', 'gpu', 'cpu', 'webgpu' (default: 'auto') */
-  device?: "auto" | "gpu" | "cpu" | "webgpu";
-  /** Quantization: 'q4', 'q8', 'fp16', 'fp32' (default: 'q4') */
+  /**
+   * Compute device. The only inference backend is the native WebGPU engine
+   * (Dawn in Node, WebGPU in the browser); "auto" resolves to "webgpu". There
+   * is no CPU/WASM or ONNX path.
+   */
+  device?: "auto" | "webgpu";
+  /**
+   * Weight quantization. The engine quantizes to INT4 ("q4") on load; the other
+   * values are accepted for forward-compat but currently map to q4.
+   */
   dtype?: "q4" | "q8" | "fp16" | "fp32";
   /** Override context length */
   contextLength?: number;
 };
+type PreloadOptions = {
+  /** Progress callback for download status */
+  onProgress?: (info: ProgressInfo) => void;
+  /** Keep model loaded in memory after preload (default: false - disposes to free memory) */
+  keepLoaded?: boolean;
+};
 type ProgressInfo = {
   status: string;
   progress?: number;
@@ -112,14 +147,18 @@ type ProgressInfo = {
 type GerbilConfig = {
   /** Default model */
   model?: string;
-  /** Default device */
-  device?: "auto" | "gpu" | "cpu";
-  /** Default quantization */
+  /** Default device (native WebGPU only; "auto" resolves to "webgpu") */
+  device?: "auto" | "webgpu";
+  /** Default quantization (engine uses INT4 "q4") */
   dtype?: "q4" | "q8" | "fp16" | "fp32";
   /** Cache configuration */
   cache?: CacheConfig;
   /** Fallback configuration */
   fallback?: FallbackConfig;
+  /** Telemetry hooks for observability (Sentry, logging, etc.) */
+  telemetry?: TelemetryConfig;
+  /** Concurrency control for request queuing */
+  concurrency?: ConcurrencyConfig;
 };
 type CacheConfig = {
   /** Enable caching (default: true) */
@@ -183,14 +222,14 @@ type SystemInfo = {
 type GerbilModelSettings = {
   /** Enable thinking mode */
   thinking?: boolean;
-  /** Device to use */
-  device?: "auto" | "gpu" | "cpu";
+  /** Device to use (native WebGPU only) */
+  device?: "auto" | "webgpu";
   /** Quantization level */
   dtype?: "q4" | "q8" | "fp16" | "fp32";
 };
 type GerbilProviderSettings = {
-  /** Default device */
-  device?: "auto" | "gpu" | "cpu";
+  /** Default device (native WebGPU only) */
+  device?: "auto" | "webgpu";
   /** Default quantization */
   dtype?: "q4" | "q8" | "fp16" | "fp32";
 };
@@ -348,381 +387,150 @@ type StreamingTranscriptionSession = {
   /** Reset session (clear buffer and transcript) */
   reset: () => void;
 };
-//#endregion
-//#region src/core/models.d.ts
-declare const BUILTIN_MODELS: Record<string, ModelConfig>;
-//#endregion
-//#region src/browser/index.d.ts
-type WorkerProgress = {
-  status: "loading" | "downloading" | "ready" | "error";
-  message?: string;
-  file?: string;
-  progress?: number;
-  /** Number of files being downloaded (0 = loading from cache) */
-  downloadCount?: number;
-  /** Total files to process */
-  totalFiles?: number;
-  error?: string;
-};
-type WorkerToken = {
-  status: "token";
-  text: string;
-  state: "thinking" | "answering";
-  numTokens: number;
-  tps: number;
-};
-type WorkerComplete = {
-  status: "complete";
-  text: string;
-  numTokens: number;
-  totalTime: number;
-  tps: number;
-};
-type GerbilWorkerOptions = {
-  /** Model ID to load (default: "qwen3-0.6b") */
-  modelId?: string;
-  /** Called during model loading with progress updates */
-  onProgress?: (progress: WorkerProgress) => void;
-  /** Called for each token during streaming generation */
-  onToken?: (token: WorkerToken) => void;
-  /** Called when generation is complete */
-  onComplete?: (result: WorkerComplete) => void;
-  /** Called on errors */
-  onError?: (error: string) => void;
-  /** Worker script URL (auto-detected if not provided) */
-  workerUrl?: string;
-};
-type GenerateStreamOptions = {
-  /** Maximum tokens to generate */
-  maxTokens?: number;
-  /** Temperature for sampling (0 = deterministic) */
-  temperature?: number;
-  /** Top-p nucleus sampling */
-  topP?: number;
-  /** Top-k sampling */
-  topK?: number;
-  /** Enable thinking mode (Qwen3) */
-  thinking?: boolean;
-  /** System prompt */
-  system?: string;
-  /** Image URLs or data URIs (for vision models) */
-  images?: string[];
-  /** Conversation history for multi-turn (includes all previous messages) */
-  history?: Array<{
-    role: "user" | "assistant" | "system";
-    content: string;
-  }>;
-};
-type GerbilWorker = {
-  /** Generate text with streaming */
-  generate: (prompt: string, options?: GenerateStreamOptions) => Promise<string>;
-  /** Interrupt current generation */
-  interrupt: () => void;
-  /** Reset conversation cache */
-  reset: () => void;
-  /** Terminate the worker */
-  terminate: () => void;
-  /** Check if model is loaded */
-  isReady: () => boolean;
-};
 /**
- * Create a Gerbil worker for streaming WebGPU inference
- *
- * Uses a Web Worker to keep the UI responsive during model loading
- * and text generation, with real-time token streaming.
+ * Telemetry hooks for production observability.
+ * Pass your own Sentry instance or custom logging functions.
  */
-declare function createGerbilWorker(options?: GerbilWorkerOptions): Promise<GerbilWorker>;
-/** Message in a chat conversation */
-type Message = {
-  id: string;
-  role: "user" | "assistant";
-  content: string;
-  thinking?: string;
-  /** Attached images (URLs or data URIs) - for vision models */
-  images?: string[];
-};
-/** Loading progress state */
-type LoadingProgress = {
-  status: "loading" | "downloading" | "ready" | "error";
-  message?: string;
-  file?: string;
-  progress?: number;
-  /** Number of files being downloaded (0 = loading from cache) */
-  downloadCount?: number;
-  /** Total files to process */
-  totalFiles?: number;
-};
-/** Options for useChat hook */
-type UseChatOptions = {
-  /** Model ID (default: "qwen3-0.6b") */
-  model?: string;
-  /** System prompt */
-  system?: string;
-  /** Enable thinking mode (Qwen3) */
-  thinking?: boolean;
-  /** Max tokens per response */
-  maxTokens?: number;
-  /** Temperature (0-2) */
-  temperature?: number;
-  /** Initial messages */
-  initialMessages?: Message[];
-  /** Auto-load model on mount (default: false - loads on first generate or load()) */
-  autoLoad?: boolean;
-  /** Called when model is ready */
-  onReady?: () => void;
-  /** Called on error */
-  onError?: (error: string) => void;
-};
-/** Return type for useChat hook */
-type UseChatReturn = {
-  /** Chat messages */
-  messages: Message[];
-  /** Current input value */
-  input: string;
-  /** Set input value */
-  setInput: (value: string) => void;
-  /** Submit current input */
-  handleSubmit: (e?: {
-    preventDefault?: () => void;
-  }) => void;
-  /** Whether model is loading */
-  isLoading: boolean;
-  /** Loading progress */
-  loadingProgress: LoadingProgress | null;
-  /** Whether generating a response */
-  isGenerating: boolean;
-  /** Current thinking content (streaming) */
-  thinking: string;
-  /** Stop generation */
-  stop: () => void;
-  /** Clear all messages */
-  clear: () => void;
-  /** Current tokens per second */
-  tps: number;
-  /** Whether model is ready */
-  isReady: boolean;
-  /** Error message if any */
-  error: string | null;
-  /** Load the model (only needed if lazy: true) */
-  load: () => void;
-  /** Currently attached images (for next message) */
-  attachedImages: string[];
-  /** Attach an image to the next message */
-  attachImage: (imageUrl: string) => void;
-  /** Remove an attached image */
-  removeImage: (index: number) => void;
-  /** Clear all attached images */
-  clearImages: () => void;
-  /** Send message with specific images (convenience method) */
-  sendWithImages: (text: string, images: string[]) => void;
+type TelemetryConfig = {
+  /**
+   * Called after successful generation with full result and timing.
+   * Use for logging, metrics, or analytics.
+   */
+  onGenerate?: (event: GenerateEvent) => void;
+  /**
+   * Called when any error occurs during Gerbil operations.
+   * Perfect for Sentry.captureException() or similar.
+   */
+  onError?: (error: Error, context: ErrorContext) => void;
+  /**
+   * Called after model loading completes (success or failure).
+   */
+  onModelLoad?: (event: ModelLoadEvent) => void;
+  /**
+   * Called when a request is queued (if concurrency limit reached).
+   */
+  onQueueWait?: (waitTimeMs: number) => void;
+};
+type GenerateEvent = {
+  /** Model used for generation */
+  modelId: string;
+  /** Generation result */
+  result: GenerateResult;
+  /** Whether response came from cache */
+  cached: boolean;
+  /** Time spent waiting in queue (if any) */
+  queueTimeMs?: number;
 };
 /**
- * React hook for chat with local LLM
- *
- * @example
- * ```tsx
- * import { useChat } from "@tryhamster/gerbil/browser";
- *
- * function Chat() {
- *   const { messages, input, setInput, handleSubmit, isLoading, isGenerating } = useChat();
- *
- *   if (isLoading) return <div>Loading model...</div>;
- *
- *   return (
- *     <div>
- *       {messages.map(m => (
- *         <div key={m.id}>{m.role}: {m.content}</div>
- *       ))}
- *       <form onSubmit={handleSubmit}>
- *         <input value={input} onChange={e => setInput(e.target.value)} />
- *         <button disabled={isGenerating}>Send</button>
- *       </form>
- *     </div>
- *   );
- * }
- * ```
+ * Context passed to telemetry onError callback.
+ * Flexible record to allow any relevant context data.
  */
-declare function useChat(options?: UseChatOptions): UseChatReturn;
-/** Options for useCompletion hook */
-type UseCompletionOptions = {
-  /** Model ID (default: "qwen3-0.6b") */
-  model?: string;
-  /** System prompt */
-  system?: string;
-  /** Enable thinking mode (Qwen3) */
-  thinking?: boolean;
-  /** Max tokens */
-  maxTokens?: number;
-  /** Temperature (0-2) */
-  temperature?: number;
-  /** Auto-load model on mount (default: false - loads on first complete() or load()) */
-  autoLoad?: boolean;
-  /** Called when model is ready */
-  onReady?: () => void;
-  /** Called on error */
-  onError?: (error: string) => void;
+type ErrorContext = Record<string, unknown>;
+type ModelLoadEvent = {
+  /** Model that was loaded */
+  modelId: string;
+  /** Time to load in ms */
+  loadTimeMs: number;
+  /** Whether loaded from cache */
+  fromCache: boolean;
+  /** Device used */
+  device: "webgpu" | "cpu" | "wasm";
+  /** Whether load succeeded */
+  success: boolean;
+  /** Error message if failed */
+  error?: string;
 };
-/** Options for single completion call */
-type CompleteOptions = {
-  /** Image URLs or data URIs to analyze (for vision models) */
-  images?: string[];
-};
-/** Return type for useCompletion hook */
-type UseCompletionReturn = {
-  /** Generated completion */
-  completion: string;
-  /** Thinking content (if enabled) */
-  thinking: string;
-  /** Generate completion (optionally with images for vision models) */
-  complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
-  /** Whether model is loading */
-  isLoading: boolean;
-  /** Loading progress */
-  loadingProgress: LoadingProgress | null;
-  /** Whether generating */
-  isGenerating: boolean;
-  /** Stop generation */
-  stop: () => void;
-  /** Current tokens per second */
-  tps: number;
-  /** Whether model is ready */
-  isReady: boolean;
-  /** Error message if any */
-  error: string | null;
-  /** Load the model (only needed if lazy: true) */
-  load: () => void;
+type ConcurrencyConfig = {
+  /** Maximum concurrent generation requests (default: 1 for LLM) */
+  maxConcurrent?: number;
+  /** Request timeout in ms (default: 300000 = 5 min) */
+  timeout?: number;
 };
+//#endregion
+//#region src/core/models.d.ts
+declare const BUILTIN_MODELS: Record<string, ModelConfig>;
+//#endregion
+//#region src/browser/pwa.d.ts
 /**
- * React hook for text completion with local LLM
+ * Mobile / PWA storage helpers.
  *
- * @example
- * ```tsx
- * import { useCompletion } from "@tryhamster/gerbil/browser";
+ * On-device models are large (a 4-bit 0.8B is ~400 MB; vision/larger models are
+ * GBs). Mobile browsers — iOS Safari especially — wall a web origin off from the
+ * real disk with TWO independent ceilings:
  *
- * function App() {
- *   const { complete, completion, isLoading, isGenerating } = useCompletion();
+ *   1. **Storage quota** (disk for the model cache). An *uninstalled* Safari tab
+ *      gets only ~1 GB, best-effort and evictable, regardless of how much free
+ *      disk the device has. Exceed it and every cache write fails → the model
+ *      re-downloads on every visit.
+ *   2. **Tab memory** (RAM during load/inference) — a separate, smaller ceiling.
  *
- *   if (isLoading) return <div>Loading...</div>;
+ * The unlock for the storage ceiling is **persistent storage**, which iOS Safari
+ * grants when the site is **installed to the Home Screen** (a PWA). Installed, the
+ * quota jumps to a large fraction of actual disk and is never evicted — so models
+ * cache once and stay. These helpers let an app surface that to its users and
+ * request it, so on-device AI is actually practical on mobile.
  *
- *   return (
- *     <div>
- *       <button onClick={() => complete("Write a haiku")}>Generate</button>
- *       <p>{completion}</p>
- *     </div>
- *   );
- * }
- * ```
+ * All functions are SSR/Node-safe (guarded; return conservative defaults).
  */
-declare function useCompletion(options?: UseCompletionOptions): UseCompletionReturn;
-/** TTS loading progress */
-type TTSProgress = {
-  status: "idle" | "loading" | "downloading" | "ready" | "error";
-  message?: string;
-  file?: string;
-  progress?: number;
-  error?: string;
-};
-/** Available TTS models */
-type TTSModelId = "kokoro-82m" | "supertonic-66m";
-/** Voice info for TTS models */
-type BrowserVoiceInfo = {
-  id: string;
-  name: string;
-  gender: "male" | "female";
-  language: string;
-  description: string;
-};
-/** Options for useSpeech hook */
-type UseSpeechOptions = {
-  /** TTS model to use (default: "kokoro-82m") */
-  model?: TTSModelId;
-  /** Default voice ID (default: model's default voice) */
-  voice?: string;
-  /** Speech speed multiplier (default: 1.0) */
-  speed?: number;
-  /** Auto-load TTS model on mount (default: false) */
-  autoLoad?: boolean;
-  /** Called when model is ready */
-  onReady?: () => void;
-  /** Called on error */
-  onError?: (error: string) => void;
-  /** Called when speech starts */
-  onStart?: () => void;
-  /** Called when speech ends */
-  onEnd?: () => void;
-};
-/** Return type for useSpeech hook */
-type UseSpeechReturn = {
-  /** Speak text aloud */
-  speak: (text: string, options?: {
-    voice?: string;
-    speed?: number;
-  }) => Promise<void>;
-  /** Stop current speech */
-  stop: () => void;
-  /** Whether TTS model is loading */
-  isLoading: boolean;
-  /** Loading progress */
-  loadingProgress: TTSProgress | null;
-  /** Whether currently speaking */
-  isSpeaking: boolean;
-  /** Whether TTS model is ready */
-  isReady: boolean;
-  /** Load the TTS model */
-  load: () => void;
-  /** Error message if any */
-  error: string | null;
-  /** List available voices for current model */
-  listVoices: () => BrowserVoiceInfo[];
-  /** Current voice ID */
-  currentVoice: string;
-  /** Set current voice */
-  setVoice: (voiceId: string) => void;
-  /** Current speed */
-  currentSpeed: number;
-  /** Set speed */
-  setSpeed: (speed: number) => void;
-  /** Current TTS model ID */
-  currentModel: TTSModelId;
-  /** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
-  sampleRate: number;
+/** True when the page is running as an installed/standalone PWA (Home Screen). */
+declare function isStandalone(): boolean;
+/** True when running on iOS/iPadOS (where install is the quota unlock and the
+ * install flow is manual: Share → Add to Home Screen). iPadOS masquerades as
+ * macOS, so we also treat touch-capable WebKit-on-Mac as iOS. */
+declare function isIOS(): boolean;
+type StorageStatus = {
+  /** Total quota granted to this origin, in MB (best-effort estimate). */
+  quotaMB: number;
+  /** Bytes currently used by this origin, in MB. */
+  usageMB: number;
+  /** quota − usage, in MB. */
+  availableMB: number;
+  /** Storage is persistent (exempt from eviction). On iOS this is effectively
+   * only true once the site is installed to the Home Screen. */
+  persisted: boolean;
+  /** Running as an installed/standalone PWA. */
+  installed: boolean;
+  /** Platform is iOS/iPadOS (install is the quota unlock here). */
+  ios: boolean;
+};
+/** Snapshot of the origin's storage situation — quota, usage, persistence, and
+ * whether the app is installed. Use it to decide whether to recommend install
+ * before downloading a large model. */
+declare function getStorageStatus(): Promise<StorageStatus>;
+/**
+ * Request persistent storage (exempt from eviction). Returns whether the origin
+ * is persistent afterwards. Browsers grant this based on engagement/installation;
+ * on iOS Safari it is effectively granted only to an installed (Home Screen) PWA,
+ * so call this AND guide users to install when it returns false on iOS.
+ */
+declare function requestPersistentStorage(): Promise<boolean>;
+type ModelFit = {
+  /** The model likely fits in the currently-available quota. */
+  fits: boolean;
+  availableMB: number;
+  /** Caching durably would benefit from installing to the Home Screen — true when
+   * not installed on iOS, or when the model doesn't fit the current quota. */
+  recommendInstall: boolean;
 };
 /**
- * React hook for text-to-speech with Web Audio API playback
- *
- * Supports both Kokoro (24kHz, high quality) and Supertonic (44.1kHz, faster).
- *
- * @example
- * ```tsx
- * import { useSpeech } from "@tryhamster/gerbil/browser";
- *
- * function App() {
- *   // Default: Kokoro TTS
- *   const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
- *
- *   // Or use Supertonic (44.1kHz, faster)
- *   // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
- *
- *   if (isLoading) return <div>Loading TTS...</div>;
- *
- *   return (
- *     <div>
- *       <select onChange={e => setVoice(e.target.value)}>
- *         {listVoices().map(v => (
- *           <option key={v.id} value={v.id}>{v.name}</option>
- *         ))}
- *       </select>
- *       <button onClick={() => speak("Hello world!")}>
- *         {isSpeaking ? "Speaking..." : "Speak"}
- *       </button>
- *       {isSpeaking && <button onClick={stop}>Stop</button>}
- *     </div>
- *   );
- * }
- * ```
+ * Estimate whether a model of `sizeMB` will cache in the current quota, and
+ * whether you should recommend installing to the Home Screen first. Pair with a
+ * one-time "Install for offline use" prompt before a large download on mobile.
+ */
+declare function canCacheModel(sizeMB: number): Promise<ModelFit>;
+/**
+ * Platform-appropriate install guidance. iOS Safari has NO programmatic install
+ * prompt — installation is manual (Share → Add to Home Screen), so apps should
+ * show these instructions. Other platforms (Android/Chrome) fire
+ * `beforeinstallprompt`, which apps can capture for a one-tap button.
  */
-declare function useSpeech(options?: UseSpeechOptions): UseSpeechReturn;
+declare function getInstallGuidance(): {
+  installed: boolean;
+  manual: boolean;
+  steps: string;
+};
+//#endregion
+//#region src/browser/audio.d.ts
 /**
  * Play audio from Float32Array using Web Audio API
  *
@@ -763,247 +571,125 @@ declare function createAudioPlayer(sampleRate?: number): {
   stop: () => void;
   isPlaying: () => boolean;
 };
+//#endregion
+//#region src/browser/device-guards.d.ts
 /**
- * Progress info for STT loading
+ * Approximate on-device (INT4) memory footprint in MB for the models the native
+ * engine actually ships. Used for memory-aware selection and messaging.
  */
-type STTProgress = {
-  status: "downloading" | "loading" | "ready" | "error";
-  message?: string;
-  progress?: number;
-  file?: string;
-};
+declare const MODEL_SIZES: Record<string, number>;
 /**
- * Options for useVoiceInput hook
+ * Check if a model is safe to load on the current device.
+ * Returns guidance specific to iOS memory constraints. Matches on the real
+ * native-engine repo ids (MLX 4-bit / upstream Qwen / Liquid).
  */
-type UseVoiceInputOptions = {
-  /** STT model ID (default: whisper-tiny.en) */
-  model?: string;
-  /** Auto-load model on mount (default: false) */
-  autoLoad?: boolean;
-  /** Callback when model is ready */
-  onReady?: () => void;
-  /** Callback when transcription completes (or for each chunk in streaming mode) */
-  onTranscript?: (text: string) => void;
-  /** Callback on error */
-  onError?: (error: string) => void;
-  /** Callback during loading */
-  onProgress?: (progress: STTProgress) => void;
-  /** Enable streaming transcription - transcribes audio in chunks as you speak */
-  streaming?: boolean;
-  /** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
-  chunkDuration?: number;
-  /** Callback for each streaming chunk with partial transcript */
-  onChunk?: (text: string, chunkIndex: number) => void;
+declare function isModelSafeForDevice(modelId: string): {
+  safe: boolean;
+  /**
+   * Borderline: may run on the newest hardware but is prone to OOM. Reported
+   * unsafe (`safe: false`) so callers block by default; a UI can use `risky` to
+   * offer an explicit "load anyway" on capable devices.
+   */
+  risky: boolean;
+  reason: string;
+  recommendation?: string;
+  maxSafeModel?: string;
 };
 /**
- * Return type for useVoiceInput hook
+ * Get recommended models based on device memory and capabilities.
+ * Helps prevent OOM crashes on low-memory mobile devices.
  */
-type UseVoiceInputReturn = {
-  /** Start recording audio */
-  startRecording: () => Promise<void>;
-  /** Stop recording and transcribe */
-  stopRecording: () => Promise<string>;
-  /** Cancel recording without transcribing */
-  cancelRecording: () => void;
-  /** Transcribe raw audio data (Float32Array at 16kHz) */
-  transcribe: (audio: Float32Array) => Promise<string>;
-  /** Whether currently recording */
-  isRecording: boolean;
-  /** Whether transcribing */
-  isTranscribing: boolean;
-  /** Whether model is loading */
-  isLoading: boolean;
-  /** Whether model is ready */
-  isReady: boolean;
-  /** Latest transcription result (full transcript in streaming mode) */
-  transcript: string;
-  /** Current streaming chunk being transcribed (streaming mode only) */
-  streamingChunk: string;
-  /** Number of chunks transcribed so far (streaming mode only) */
-  chunkCount: number;
-  /** Loading progress */
-  loadingProgress: STTProgress | null;
-  /** Error message */
-  error: string | null;
-  /** Manually load the model */
-  load: () => void;
+declare function getRecommendedModels(): {
+  chat: string;
+  tts: string;
+  stt: string;
+  embedding: string;
+  reason: string;
+  deviceMemory: number | null;
+  isMobile: boolean;
+};
+type DownloadPhase = "idle" | "downloading" | "caching" | "initializing" | "ready" | "error";
+declare const SESSION_STORAGE_KEY = "gerbil_session_phase";
+type SessionState = {
+  phase: DownloadPhase;
+  modelId: string | null;
+  sessionId: string;
+  timestamp: number;
+  bytesDownloaded?: number;
+  totalBytes?: number;
 };
 /**
- * React hook for voice input with browser microphone
- *
- * Uses MediaRecorder to capture audio and Whisper for transcription.
- * Supports both one-shot and streaming transcription modes.
- *
- * @example Basic usage (one-shot)
- * ```tsx
- * function VoiceInput() {
- *   const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
- *     onTranscript: (text) => console.log("User said:", text),
- *   });
- *
- *   return (
- *     <button onClick={isRecording ? stopRecording : startRecording}>
- *       {isRecording ? "Stop" : "Record"}
- *     </button>
- *   );
- * }
- * ```
- *
- * @example Streaming transcription (real-time)
- * ```tsx
- * function LiveTranscription() {
- *   const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
- *     streaming: true,           // Enable streaming mode
- *     chunkDuration: 1500,       // Transcribe every 1.5 seconds (default)
- *     onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
- *   });
- *
- *   return (
- *     <div>
- *       <button onClick={isRecording ? stopRecording : startRecording}>
- *         {isRecording ? "Stop" : "Start Live Transcription"}
- *       </button>
- *       <p>Current chunk: {streamingChunk}</p>
- *       <p>Full transcript: {transcript}</p>
- *     </div>
- *   );
- * }
- * ```
+ * Set the current download/initialization phase.
+ * Used to detect if a reload happened during a critical operation.
  */
-declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
+declare function setDownloadPhase(phase: DownloadPhase, modelId?: string, progress?: {
+  bytesDownloaded: number;
+  totalBytes: number;
+}): void;
 /**
- * Options for useVoiceChat hook
+ * Get the last known download phase from storage.
  */
-type UseVoiceChatOptions = {
-  /** LLM model ID (default: qwen3-0.6b) */
-  llmModel?: string;
-  /** STT model ID (default: whisper-tiny.en) */
-  sttModel?: string;
-  /** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
-  ttsModel?: TTSModelId;
-  /** System prompt for LLM */
-  system?: string;
-  /** Enable thinking mode (default: false) */
-  thinking?: boolean;
-  /** TTS voice ID (default: model's default voice) */
-  voice?: string;
-  /** TTS speech speed (default: 1.0) */
-  speed?: number;
-  /** Auto-load all models on mount (default: false) */
-  autoLoad?: boolean;
-  /** Callback when user speaks */
-  onUserSpeak?: (text: string) => void;
-  /** Callback when assistant responds */
-  onAssistantSpeak?: (text: string) => void;
-  /** Callback on error */
-  onError?: (error: string) => void;
-};
+declare function getDownloadPhase(): SessionState | null;
 /**
- * Message in voice chat
+ * Detect if the page reloaded during a model download/initialization.
+ * This typically indicates an iOS memory crash.
+ *
+ * @returns Detection result with recommended action
  */
-type VoiceChatMessage = {
-  id: string;
-  role: "user" | "assistant";
-  content: string;
-  thinking?: string;
-  audioUrl?: string;
+declare function detectMemoryCrash(): {
+  crashed: boolean;
+  phase?: DownloadPhase;
+  modelId?: string;
+  timeSinceCrash?: number;
+  recommendation?: string;
 };
 /**
- * Return type for useVoiceChat hook
+ * Clear session phase (call when model loads successfully).
  */
-type UseVoiceChatReturn = {
-  /** Messages in the conversation */
-  messages: VoiceChatMessage[];
-  /** Start recording user speech */
-  startListening: () => Promise<void>;
-  /** Stop recording and process (STT → LLM → TTS) */
-  stopListening: () => Promise<void>;
-  /** Cancel current operation */
-  cancel: () => void;
-  /** Clear conversation history */
-  clear: () => void;
-  /** Whether recording user speech */
-  isListening: boolean;
-  /** Whether processing (STT/LLM/TTS) */
-  isProcessing: boolean;
-  /** Whether assistant is speaking */
-  isSpeaking: boolean;
-  /** Current stage: idle, listening, transcribing, thinking, speaking */
-  stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
-  /** Whether all models are loaded */
-  isReady: boolean;
-  /** Whether loading models */
-  isLoading: boolean;
-  /** Loading progress message */
-  loadingMessage: string;
-  /** Error message */
-  error: string | null;
-  /** Manually load all models */
-  load: () => void;
-};
+declare function clearDownloadPhase(): void;
+//#endregion
+//#region src/browser/download.d.ts
+/** Chunk size for downloads: 1.5MB (safe for iOS IndexedDB transactions) */
+declare const CHUNK_SIZE_BYTES: number;
+/** IndexedDB database name for chunked downloads */
+declare const DOWNLOAD_DB_NAME = "gerbil-model-chunks";
 /**
- * React hook for voice conversation with STT + LLM + TTS
- *
- * Complete voice-to-voice conversation loop:
- * 1. User presses button to speak
- * 2. Speech is transcribed (Whisper)
- * 3. LLM generates response
- * 4. Response is spoken aloud (Kokoro or Supertonic TTS)
- *
- * @example
- * ```tsx
- * function VoiceChat() {
- *   const {
- *     messages,
- *     startListening,
- *     stopListening,
- *     isListening,
- *     isSpeaking,
- *     stage,
- *   } = useVoiceChat({
- *     system: "You are a helpful voice assistant.",
- *     voice: "af_bella",
- *     // Or use Supertonic for faster synthesis:
- *     // ttsModel: "supertonic-66m",
- *     // voice: "F1",
- *   });
- *
- *   return (
- *     <div>
- *       {messages.map(m => (
- *         <div key={m.id}>{m.role}: {m.content}</div>
- *       ))}
- *       <button
- *         onMouseDown={startListening}
- *         onMouseUp={stopListening}
- *       >
- *         {stage === "idle" ? "🎤 Hold to Speak" : stage}
- *       </button>
- *     </div>
- *   );
- * }
- * ```
+ * Chunked resumable downloader for large model files.
+ * Downloads in 1.5MB chunks to avoid iOS memory pressure.
  */
-declare function useVoiceChat(options?: UseVoiceChatOptions): UseVoiceChatReturn;
+declare function downloadModelChunked(url: string, modelId: string, options?: {
+  onProgress?: (info: {
+    phase: string;
+    bytesDownloaded: number;
+    totalBytes: number;
+    percent: number;
+  }) => void;
+  signal?: AbortSignal;
+}): Promise<ArrayBuffer>;
 /**
- * Check if WebGPU is supported
+ * Check if a model has an incomplete download.
  */
-declare function isWebGPUSupported(): boolean;
+declare function hasIncompleteDownload(modelId: string): Promise<{
+  incomplete: boolean;
+  bytesDownloaded?: number;
+  totalBytes?: number;
+  percent?: number;
+}>;
 /**
- * Get WebGPU adapter info
+ * Clear incomplete download data for a model.
  */
-declare function getWebGPUInfo(): Promise<{
-  supported: boolean;
-  adapter?: string;
-  device?: string;
-} | null>;
-declare const _default: {
-  isWebGPUSupported: typeof isWebGPUSupported;
-  getWebGPUInfo: typeof getWebGPUInfo;
-  createGerbilWorker: typeof createGerbilWorker;
-  playAudio: typeof playAudio;
-  createAudioPlayer: typeof createAudioPlayer;
-};
+declare function clearIncompleteDownload(modelId: string): Promise<void>;
+/**
+ * Check if there's enough storage quota for a model download.
+ * Returns estimated available space and whether download should proceed.
+ */
+declare function checkStorageQuota(requiredMB?: number): Promise<{
+  ok: boolean;
+  availableMB: number;
+  usedMB: number;
+  quotaMB: number;
+  message?: string;
+}>;
 //#endregion
-export { AudioChunk, BUILTIN_MODELS, BrowserVoiceInfo, CacheConfig, CompleteOptions, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, STTModelConfig, STTProgress, SessionStats, SpeakOptions, SpeakResult, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TTSModelId, TTSProgress, TranscribeOptions, TranscribeResult, TranscribeSegment, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, UseSpeechOptions, UseSpeechReturn, UseVoiceChatOptions, UseVoiceChatReturn, UseVoiceInputOptions, UseVoiceInputReturn, VoiceChatMessage, VoiceInfo, WorkerComplete, WorkerProgress, WorkerToken, createAudioPlayer, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, playAudio, useChat, useCompletion, useSpeech, useVoiceChat, useVoiceInput };
+export { AudioChunk, BUILTIN_MODELS, CHUNK_SIZE_BYTES, CacheConfig, ConcurrencyConfig, DOWNLOAD_DB_NAME, EmbedOptions, EmbedResult, ErrorContext, FallbackConfig, GenerateEvent, GenerateOptions, GenerateResult, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, MODEL_SIZES, ModelConfig, type ModelFit, ModelLoadEvent, ModelSource, ModelStats, PreloadOptions, ProgressInfo, SESSION_STORAGE_KEY, STTModelConfig, SearchResult, type SessionState, SessionStats, SimilarityResult, SpeakOptions, SpeakResult, type StorageStatus, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TelemetryConfig, TranscribeOptions, TranscribeResult, TranscribeSegment, VoiceInfo, canCacheModel, checkStorageQuota, clearDownloadPhase, clearIncompleteDownload, createAudioPlayer, detectMemoryCrash, downloadModelChunked, getDownloadPhase, getInstallGuidance, getRecommendedModels, getStorageStatus, hasIncompleteDownload, isIOS, isModelSafeForDevice, isStandalone, playAudio, requestPersistentStorage, setDownloadPhase };
 //# sourceMappingURL=index.d.ts.map