@tryhamster/gerbil 1.0.0-rc.22 → 1.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/browser/index.d.ts +146 -2
  2. package/dist/browser/index.d.ts.map +1 -1
  3. package/dist/browser/index.js +496 -20
  4. package/dist/browser/index.js.map +1 -1
  5. package/dist/cli.mjs +7 -7
  6. package/dist/cli.mjs.map +1 -1
  7. package/dist/frameworks/express.d.mts +1 -3
  8. package/dist/frameworks/express.d.mts.map +1 -1
  9. package/dist/frameworks/express.mjs +3 -3
  10. package/dist/frameworks/express.mjs.map +1 -1
  11. package/dist/frameworks/fastify.d.mts +1 -1
  12. package/dist/frameworks/fastify.mjs +1 -1
  13. package/dist/frameworks/hono.d.mts +1 -1
  14. package/dist/frameworks/hono.mjs +1 -1
  15. package/dist/frameworks/next.d.mts +2 -2
  16. package/dist/frameworks/next.mjs +1 -1
  17. package/dist/frameworks/react.d.mts +1 -1
  18. package/dist/frameworks/react.d.mts.map +1 -1
  19. package/dist/frameworks/trpc.d.mts +1 -1
  20. package/dist/frameworks/trpc.mjs +1 -1
  21. package/dist/{gerbil-DJygY0sJ.d.mts → gerbil-CbnV_cG5.d.mts} +9 -2
  22. package/dist/gerbil-CbnV_cG5.d.mts.map +1 -0
  23. package/dist/{gerbil-PzPtcdeM.mjs → gerbil-DODVGr-u.mjs} +1 -1
  24. package/dist/{gerbil-DzZ-L6n8.mjs → gerbil-jO9anIh_.mjs} +90 -3
  25. package/dist/gerbil-jO9anIh_.mjs.map +1 -0
  26. package/dist/index.d.mts +3 -3
  27. package/dist/index.d.mts.map +1 -1
  28. package/dist/index.mjs +2 -2
  29. package/dist/index.mjs.map +1 -1
  30. package/dist/integrations/ai-sdk.d.mts +1 -1
  31. package/dist/integrations/ai-sdk.mjs +1 -1
  32. package/dist/integrations/langchain.d.mts +1 -1
  33. package/dist/integrations/langchain.mjs +1 -1
  34. package/dist/integrations/llamaindex.d.mts +1 -1
  35. package/dist/integrations/llamaindex.mjs +1 -1
  36. package/dist/integrations/mcp.d.mts +2 -2
  37. package/dist/integrations/mcp.mjs +4 -4
  38. package/dist/{mcp-D161vL_C.mjs → mcp-tavZtFY1.mjs} +3 -3
  39. package/dist/{mcp-D161vL_C.mjs.map → mcp-tavZtFY1.mjs.map} +1 -1
  40. package/dist/{one-liner-C-pRqDK2.mjs → one-liner-Ba58M_6j.mjs} +2 -2
  41. package/dist/{one-liner-C-pRqDK2.mjs.map → one-liner-Ba58M_6j.mjs.map} +1 -1
  42. package/dist/{repl-D9x3TnQc.mjs → repl-BGly-o_e.mjs} +3 -3
  43. package/dist/skills/index.d.mts +6 -6
  44. package/dist/skills/index.d.mts.map +1 -1
  45. package/dist/skills/index.mjs +3 -3
  46. package/dist/{skills-D14RwyUN.mjs → skills-BKxP2pex.mjs} +2 -2
  47. package/dist/{skills-D14RwyUN.mjs.map → skills-BKxP2pex.mjs.map} +1 -1
  48. package/dist/{types-evP8RShr.d.mts → types-6uG8lC7u.d.mts} +65 -2
  49. package/dist/types-6uG8lC7u.d.mts.map +1 -0
  50. package/docs/architecture/overview.md +2 -0
  51. package/docs/observability.md +230 -0
  52. package/package.json +5 -4
  53. package/dist/gerbil-DJygY0sJ.d.mts.map +0 -1
  54. package/dist/gerbil-DzZ-L6n8.mjs.map +0 -1
  55. package/dist/types-evP8RShr.d.mts.map +0 -1
@@ -257,21 +257,32 @@ async function createGerbilWorker(options = {}) {
257
257
  modelId.toLowerCase().includes("vision") ||
258
258
  modelId.toLowerCase().includes("vlm");
259
259
 
260
- const dtype = options.dtype || "q4f16";
260
+ // Detect mobile - must use q4 (not q4f16) due to memory/pagefile limitations
261
+ const isMobile = /iPhone|iPad|Android|Mobile/i.test(navigator.userAgent);
262
+
263
+ // q4f16 requires more memory and float16 GPU support
264
+ // Mobile devices MUST use q4 - pagefile cannot handle q4f16
265
+ let dtype = isMobile ? "q4" : (options.dtype || "q4f16");
266
+
267
+ if (isMobile && options.dtype === "q4f16") {
268
+ console.warn("Mobile detected: forcing q4 instead of q4f16 (memory constraints)");
269
+ dtype = "q4";
270
+ }
261
271
  let device = options.device || "webgpu";
262
272
  let usedFallback = false;
263
273
 
264
274
  // Helper to load model with WASM fallback
265
275
  async function loadWithFallback(loadFn, opts) {
266
276
  try {
267
- return await loadFn({ ...opts, device });
277
+ return await loadFn({ ...opts, device, dtype });
268
278
  } catch (webgpuError) {
269
279
  if (device === "webgpu") {
270
280
  console.warn("WebGPU failed, falling back to WASM:", webgpuError.message);
271
281
  self.postMessage({ status: "fallback", backend: "wasm", reason: webgpuError.message });
272
282
  device = "wasm";
283
+ dtype = "q4"; // WASM doesn't support q4f16
273
284
  usedFallback = true;
274
- return await loadFn({ ...opts, device: "wasm" });
285
+ return await loadFn({ ...opts, device: "wasm", dtype: "q4" });
275
286
  }
276
287
  throw webgpuError;
277
288
  }
@@ -279,7 +290,7 @@ async function createGerbilWorker(options = {}) {
279
290
 
280
291
  if (this.isVision) {
281
292
  // Load vision model components
282
- // Note: Don't specify dtype for vision models - let transformers.js pick defaults
293
+ // On mobile, force q4 to avoid memory issues
283
294
  if (!this.processor) {
284
295
  this.processor = await AutoProcessor.from_pretrained(modelId, {
285
296
  progress_callback: progressCallback,
@@ -288,7 +299,7 @@ async function createGerbilWorker(options = {}) {
288
299
  if (!this.visionModel) {
289
300
  this.visionModel = await loadWithFallback(
290
301
  (opts) => AutoModelForImageTextToText.from_pretrained(modelId, opts),
291
- { progress_callback: progressCallback }
302
+ { dtype, progress_callback: progressCallback }
292
303
  );
293
304
  }
294
305
  return {
@@ -755,6 +766,13 @@ function useChat(options = {}) {
755
766
  setIsLoading(true);
756
767
  setShouldLoad(true);
757
768
  }, [isLoading]);
769
+ useEffect(() => {
770
+ const crash = detectMemoryCrash();
771
+ if (crash.crashed) {
772
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
773
+ onError?.(crash.recommendation || "Previous model load failed");
774
+ }
775
+ }, []);
758
776
  useEffect(() => {
759
777
  if (!shouldLoad) return;
760
778
  if (!isWebGPUSupported()) {
@@ -764,13 +782,27 @@ function useChat(options = {}) {
764
782
  onError?.(gpuError);
765
783
  return;
766
784
  }
785
+ const safetyCheck = isModelSafeForDevice(model);
786
+ if (!safetyCheck.safe) {
787
+ setError(safetyCheck.reason);
788
+ setIsLoading(false);
789
+ onError?.(safetyCheck.reason);
790
+ return;
791
+ }
767
792
  mountedRef.current = true;
793
+ setDownloadPhase("downloading", model);
768
794
  createGerbilWorker({
769
795
  modelId: model,
770
796
  onProgress: (p) => {
771
797
  if (!mountedRef.current) return;
772
798
  setLoadingProgress(p);
799
+ if (p.status === "downloading" && p.progress !== void 0) setDownloadPhase("downloading", model, {
800
+ bytesDownloaded: p.progress,
801
+ totalBytes: 100
802
+ });
803
+ else if (p.status === "loading") setDownloadPhase("initializing", model);
773
804
  if (p.status === "ready") {
805
+ clearDownloadPhase();
774
806
  setIsLoading(false);
775
807
  setIsReady(true);
776
808
  onReady?.();
@@ -788,6 +820,7 @@ function useChat(options = {}) {
788
820
  },
789
821
  onError: (err) => {
790
822
  if (!mountedRef.current) return;
823
+ setDownloadPhase("error", model);
791
824
  setError(err);
792
825
  setIsGenerating(false);
793
826
  onError?.(err);
@@ -797,6 +830,7 @@ function useChat(options = {}) {
797
830
  else worker.terminate();
798
831
  }).catch((err) => {
799
832
  if (mountedRef.current) {
833
+ setDownloadPhase("error", model);
800
834
  setError(err.message);
801
835
  setIsLoading(false);
802
836
  onError?.(err.message);
@@ -1024,6 +1058,13 @@ function useCompletion(options = {}) {
1024
1058
  setIsLoading(true);
1025
1059
  setShouldLoad(true);
1026
1060
  }, [isLoading]);
1061
+ useEffect(() => {
1062
+ const crash = detectMemoryCrash();
1063
+ if (crash.crashed) {
1064
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
1065
+ onError?.(crash.recommendation || "Previous model load failed");
1066
+ }
1067
+ }, []);
1027
1068
  useEffect(() => {
1028
1069
  if (!shouldLoad) return;
1029
1070
  if (!isWebGPUSupported()) {
@@ -1033,13 +1074,27 @@ function useCompletion(options = {}) {
1033
1074
  onError?.(gpuError);
1034
1075
  return;
1035
1076
  }
1077
+ const safetyCheck = isModelSafeForDevice(model);
1078
+ if (!safetyCheck.safe) {
1079
+ setError(safetyCheck.reason);
1080
+ setIsLoading(false);
1081
+ onError?.(safetyCheck.reason);
1082
+ return;
1083
+ }
1036
1084
  mountedRef.current = true;
1085
+ setDownloadPhase("downloading", model);
1037
1086
  createGerbilWorker({
1038
1087
  modelId: model,
1039
1088
  onProgress: (p) => {
1040
1089
  if (!mountedRef.current) return;
1041
1090
  setLoadingProgress(p);
1091
+ if (p.status === "downloading" && p.progress !== void 0) setDownloadPhase("downloading", model, {
1092
+ bytesDownloaded: p.progress,
1093
+ totalBytes: 100
1094
+ });
1095
+ else if (p.status === "loading") setDownloadPhase("initializing", model);
1042
1096
  if (p.status === "ready") {
1097
+ clearDownloadPhase();
1043
1098
  setIsLoading(false);
1044
1099
  setIsReady(true);
1045
1100
  onReady?.();
@@ -1059,6 +1114,7 @@ function useCompletion(options = {}) {
1059
1114
  },
1060
1115
  onError: (err) => {
1061
1116
  if (!mountedRef.current) return;
1117
+ setDownloadPhase("error", model);
1062
1118
  setError(err);
1063
1119
  setIsGenerating(false);
1064
1120
  onError?.(err);
@@ -1068,6 +1124,7 @@ function useCompletion(options = {}) {
1068
1124
  else worker.terminate();
1069
1125
  }).catch((err) => {
1070
1126
  if (mountedRef.current) {
1127
+ setDownloadPhase("error", model);
1071
1128
  setError(err.message);
1072
1129
  setIsLoading(false);
1073
1130
  onError?.(err.message);
@@ -1434,12 +1491,12 @@ const TTS_WORKER_CODE = `
1434
1491
  // Load Supertonic using transformers.js pipeline with WASM fallback
1435
1492
  let device = "webgpu";
1436
1493
  try {
1437
- ttsInstance = await pipeline("text-to-speech", repo, {
1494
+ ttsInstance = await pipeline("text-to-speech", repo, {
1438
1495
  device,
1439
- progress_callback: (progress) => {
1440
- self.postMessage({ type: "progress", payload: progress });
1441
- },
1442
- });
1496
+ progress_callback: (progress) => {
1497
+ self.postMessage({ type: "progress", payload: progress });
1498
+ },
1499
+ });
1443
1500
  } catch (webgpuError) {
1444
1501
  console.warn("WebGPU failed for TTS, falling back to WASM:", webgpuError.message);
1445
1502
  self.postMessage({ type: "fallback", payload: { backend: "wasm", reason: webgpuError.message } });
@@ -1480,12 +1537,26 @@ const TTS_WORKER_CODE = `
1480
1537
  const kokoroModule = await import("https://cdn.jsdelivr.net/npm/kokoro-js@1.2.1/dist/kokoro.web.min.js");
1481
1538
  const { KokoroTTS } = kokoroModule;
1482
1539
 
1540
+ // Try WebGPU first, fallback to WASM
1541
+ try {
1483
1542
  kokoroTTS = await KokoroTTS.from_pretrained(repo, {
1484
1543
  dtype: "fp32",
1544
+ device: "webgpu",
1545
+ progress_callback: (progress) => {
1546
+ self.postMessage({ type: "progress", payload: progress });
1547
+ },
1548
+ });
1549
+ } catch (webgpuError) {
1550
+ console.warn("WebGPU failed for Kokoro TTS, falling back:", webgpuError.message);
1551
+ self.postMessage({ type: "fallback", payload: { backend: "wasm", reason: webgpuError.message } });
1552
+ kokoroTTS = await KokoroTTS.from_pretrained(repo, {
1553
+ dtype: "fp32",
1554
+ device: "wasm",
1485
1555
  progress_callback: (progress) => {
1486
1556
  self.postMessage({ type: "progress", payload: progress });
1487
1557
  },
1488
1558
  });
1559
+ }
1489
1560
  }
1490
1561
 
1491
1562
  self.postMessage({ type: "ready" });
@@ -1604,10 +1675,18 @@ function useSpeech(options = {}) {
1604
1675
  setIsLoading(true);
1605
1676
  setShouldLoad(true);
1606
1677
  }, [isLoading]);
1678
+ useEffect(() => {
1679
+ const crash = detectMemoryCrash();
1680
+ if (crash.crashed) {
1681
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
1682
+ onError?.(crash.recommendation || "Previous model load failed");
1683
+ }
1684
+ }, []);
1607
1685
  useEffect(() => {
1608
1686
  if (!shouldLoad) return;
1609
1687
  mountedRef.current = true;
1610
1688
  modelIdRef.current = modelId;
1689
+ setDownloadPhase("downloading", modelId);
1611
1690
  const config = TTS_MODELS[modelId];
1612
1691
  setLoadingProgress({
1613
1692
  status: "loading",
@@ -1624,6 +1703,7 @@ function useSpeech(options = {}) {
1624
1703
  progress: Math.round(payload.progress || 0)
1625
1704
  });
1626
1705
  if (type === "ready") {
1706
+ clearDownloadPhase();
1627
1707
  setIsLoading(false);
1628
1708
  setIsReady(true);
1629
1709
  setLoadingProgress({ status: "ready" });
@@ -1646,6 +1726,7 @@ function useSpeech(options = {}) {
1646
1726
  playAudioData(audio, sampleRate);
1647
1727
  }
1648
1728
  if (type === "error") {
1729
+ setDownloadPhase("error", modelId);
1649
1730
  const errorMsg = payload;
1650
1731
  setError(errorMsg);
1651
1732
  setIsLoading(false);
@@ -1659,6 +1740,7 @@ function useSpeech(options = {}) {
1659
1740
  };
1660
1741
  worker.onerror = (err) => {
1661
1742
  if (!mountedRef.current) return;
1743
+ setDownloadPhase("error", modelId);
1662
1744
  let errorMsg = err.message || "";
1663
1745
  if (!errorMsg || errorMsg === "Script error.") errorMsg = getWebGPUErrorMessage();
1664
1746
  setError(errorMsg);
@@ -1916,12 +1998,12 @@ const STT_WORKER_CODE = `
1916
1998
  // Load Whisper model with WASM fallback
1917
1999
  let device = "webgpu";
1918
2000
  try {
1919
- sttPipeline = await pipeline("automatic-speech-recognition", model, {
2001
+ sttPipeline = await pipeline("automatic-speech-recognition", model, {
1920
2002
  device,
1921
- progress_callback: (progress) => {
1922
- self.postMessage({ type: "progress", payload: progress });
1923
- },
1924
- });
2003
+ progress_callback: (progress) => {
2004
+ self.postMessage({ type: "progress", payload: progress });
2005
+ },
2006
+ });
1925
2007
  } catch (webgpuError) {
1926
2008
  console.warn("WebGPU failed for STT, falling back to WASM:", webgpuError.message);
1927
2009
  self.postMessage({ type: "fallback", payload: { backend: "wasm", reason: webgpuError.message } });
@@ -2040,9 +2122,17 @@ function useVoiceInput(options = {}) {
2040
2122
  "whisper-small.en": "onnx-community/whisper-small.en"
2041
2123
  }[modelId] || modelId;
2042
2124
  };
2125
+ useEffect(() => {
2126
+ const crash = detectMemoryCrash();
2127
+ if (crash.crashed) {
2128
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
2129
+ onError?.(crash.recommendation || "Previous model load failed");
2130
+ }
2131
+ }, []);
2043
2132
  useEffect(() => {
2044
2133
  if (!shouldLoad || isReady) return;
2045
2134
  mountedRef.current = true;
2135
+ setDownloadPhase("downloading", model);
2046
2136
  setIsLoading(true);
2047
2137
  setLoadingProgress({
2048
2138
  status: "loading",
@@ -2068,6 +2158,7 @@ function useVoiceInput(options = {}) {
2068
2158
  onProgress?.(progress);
2069
2159
  }
2070
2160
  if (type === "ready") {
2161
+ clearDownloadPhase();
2071
2162
  setIsReady(true);
2072
2163
  setIsLoading(false);
2073
2164
  setLoadingProgress({ status: "ready" });
@@ -2084,6 +2175,7 @@ function useVoiceInput(options = {}) {
2084
2175
  }
2085
2176
  }
2086
2177
  if (type === "error") {
2178
+ setDownloadPhase("error", model);
2087
2179
  const errMsg = payload;
2088
2180
  setError(errMsg);
2089
2181
  setIsLoading(false);
@@ -2106,6 +2198,7 @@ function useVoiceInput(options = {}) {
2106
2198
  };
2107
2199
  worker.onerror = (err) => {
2108
2200
  if (!mountedRef.current) return;
2201
+ setDownloadPhase("error", model);
2109
2202
  let errMsg = err.message || "";
2110
2203
  if (!errMsg || errMsg === "Script error.") errMsg = getWebGPUErrorMessage();
2111
2204
  setError(errMsg);
@@ -3027,6 +3120,13 @@ function useEmbedding(options = {}) {
3027
3120
  if (magnitude === 0) return 0;
3028
3121
  return dotProduct / magnitude;
3029
3122
  }, []);
3123
+ useEffect(() => {
3124
+ const crash = detectMemoryCrash();
3125
+ if (crash.crashed) {
3126
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
3127
+ onError?.(crash.recommendation || "Previous model load failed");
3128
+ }
3129
+ }, []);
3030
3130
  const load = useCallback(() => {
3031
3131
  if (isReady && workerRef.current) return Promise.resolve();
3032
3132
  if (loadRequestedRef.current && readyPromiseRef.current) return readyPromiseRef.current;
@@ -3036,6 +3136,7 @@ function useEmbedding(options = {}) {
3036
3136
  status: "loading",
3037
3137
  message: "Loading embedding model..."
3038
3138
  });
3139
+ setDownloadPhase("downloading", model);
3039
3140
  readyPromiseRef.current = new Promise((resolve) => {
3040
3141
  readyResolveRef.current = resolve;
3041
3142
  });
@@ -3050,18 +3151,21 @@ function useEmbedding(options = {}) {
3050
3151
  progress: Math.round(payload.loaded / payload.total * 100)
3051
3152
  });
3052
3153
  } else if (type === "ready") {
3154
+ clearDownloadPhase();
3053
3155
  setIsLoading(false);
3054
3156
  setIsReady(true);
3055
3157
  setLoadingProgress({ status: "ready" });
3056
3158
  readyResolveRef.current?.();
3057
3159
  onReady?.();
3058
3160
  } else if (type === "error") {
3161
+ setDownloadPhase("error", model);
3059
3162
  setIsLoading(false);
3060
3163
  setError(payload);
3061
3164
  onError?.(payload);
3062
3165
  }
3063
3166
  });
3064
3167
  worker.onerror = (err) => {
3168
+ setDownloadPhase("error", model);
3065
3169
  setIsLoading(false);
3066
3170
  let errMsg = err.message || "";
3067
3171
  if (!errMsg || errMsg === "Script error.") errMsg = getWebGPUErrorMessage();
@@ -3566,11 +3670,11 @@ async function getBrowserDiagnostics() {
3566
3670
  } catch {
3567
3671
  moduleWorkers = false;
3568
3672
  }
3569
- let indexedDB = false;
3673
+ let indexedDB$1 = false;
3570
3674
  try {
3571
- indexedDB = typeof window !== "undefined" && "indexedDB" in window;
3675
+ indexedDB$1 = typeof window !== "undefined" && "indexedDB" in window;
3572
3676
  } catch {
3573
- indexedDB = false;
3677
+ indexedDB$1 = false;
3574
3678
  }
3575
3679
  return {
3576
3680
  browser,
@@ -3583,7 +3687,7 @@ async function getBrowserDiagnostics() {
3583
3687
  webgpuExpected,
3584
3688
  webgpu,
3585
3689
  moduleWorkers,
3586
- indexedDB
3690
+ indexedDB: indexedDB$1
3587
3691
  };
3588
3692
  }
3589
3693
  /**
@@ -3621,6 +3725,370 @@ function getRecommendedModels() {
3621
3725
  };
3622
3726
  }
3623
3727
  /**
3728
+ * Maximum safe model sizes for iOS devices (in MB).
3729
+ * Based on WKWebView effective memory limit of ~200-400MB.
3730
+ */
3731
+ const IOS_MODEL_LIMITS = {
3732
+ safe: ["smollm2-135m", "smollm2-360m"],
3733
+ risky: ["qwen3-0.6b"],
3734
+ blocked: ["qwen3-1.7b", "qwen3-4b"],
3735
+ maxBudgetMB: 350
3736
+ };
3737
+ /**
3738
+ * Check if a model is safe to load on the current device.
3739
+ * Returns guidance specific to iOS memory constraints.
3740
+ */
3741
+ function isModelSafeForDevice(modelId) {
3742
+ const ua = typeof navigator !== "undefined" ? navigator.userAgent : "";
3743
+ const isIOS = /iPhone|iPad|iPod/.test(ua);
3744
+ const isIOSChrome = isIOS && /CriOS/.test(ua);
3745
+ const deviceMemory = typeof navigator !== "undefined" ? navigator.deviceMemory : null;
3746
+ const normalizedId = modelId.toLowerCase().replace(/[^a-z0-9]/g, "-");
3747
+ if (isIOS) {
3748
+ if (IOS_MODEL_LIMITS.blocked.some((m) => normalizedId.includes(m.toLowerCase().replace(/[^a-z0-9]/g, "-")))) return {
3749
+ safe: false,
3750
+ reason: `Model ${modelId} is too large for iOS devices${isIOSChrome ? " (iOS Chrome uses WKWebView, same limits as Safari)" : ""}. WKWebView memory limit (~300-400MB) will cause crashes.`,
3751
+ recommendation: "Use smollm2-360m or qwen3-0.6b on iOS. For larger models, use desktop.",
3752
+ maxSafeModel: "qwen3-0.6b"
3753
+ };
3754
+ if (IOS_MODEL_LIMITS.risky.some((m) => normalizedId.includes(m.toLowerCase().replace(/[^a-z0-9]/g, "-")))) {
3755
+ if (!(deviceMemory && deviceMemory >= 4)) return {
3756
+ safe: false,
3757
+ reason: `Model ${modelId} may crash on older iOS devices. Your device reports ${deviceMemory || "unknown"}GB RAM.`,
3758
+ recommendation: "Use smollm2-360m for reliable performance, or try on iPhone 14+ / iPad Pro.",
3759
+ maxSafeModel: "smollm2-360m"
3760
+ };
3761
+ return {
3762
+ safe: true,
3763
+ reason: `Model ${modelId} should work on your high-memory iOS device, but may be slow.`
3764
+ };
3765
+ }
3766
+ return {
3767
+ safe: true,
3768
+ reason: "Model is within iOS memory limits."
3769
+ };
3770
+ }
3771
+ if (/Android/.test(ua)) {
3772
+ if (normalizedId.includes("qwen3-4b") || normalizedId.includes("7b")) return {
3773
+ safe: false,
3774
+ reason: `Model ${modelId} is very large and may crash on Android devices.`,
3775
+ recommendation: "Use qwen3-1.7b or smaller on Android.",
3776
+ maxSafeModel: "qwen3-1.7b"
3777
+ };
3778
+ }
3779
+ return {
3780
+ safe: true,
3781
+ reason: "Desktop browser has sufficient memory."
3782
+ };
3783
+ }
3784
+ const SESSION_STORAGE_KEY = "gerbil_session_phase";
3785
+ /**
3786
+ * Generate a unique session ID for tracking across reloads.
3787
+ */
3788
+ function generateSessionId() {
3789
+ return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
3790
+ }
3791
+ /**
3792
+ * Get or create the current session ID.
3793
+ */
3794
+ function getSessionId() {
3795
+ if (typeof localStorage === "undefined") return generateSessionId();
3796
+ let sessionId = sessionStorage.getItem("gerbil_session_id");
3797
+ if (!sessionId) {
3798
+ sessionId = generateSessionId();
3799
+ sessionStorage.setItem("gerbil_session_id", sessionId);
3800
+ }
3801
+ return sessionId;
3802
+ }
3803
+ /**
3804
+ * Set the current download/initialization phase.
3805
+ * Used to detect if a reload happened during a critical operation.
3806
+ */
3807
+ function setDownloadPhase(phase, modelId, progress) {
3808
+ if (typeof localStorage === "undefined") return;
3809
+ const state = {
3810
+ phase,
3811
+ modelId: modelId || null,
3812
+ sessionId: getSessionId(),
3813
+ timestamp: Date.now(),
3814
+ bytesDownloaded: progress?.bytesDownloaded,
3815
+ totalBytes: progress?.totalBytes
3816
+ };
3817
+ localStorage.setItem(SESSION_STORAGE_KEY, JSON.stringify(state));
3818
+ }
3819
+ /**
3820
+ * Get the last known download phase from storage.
3821
+ */
3822
+ function getDownloadPhase() {
3823
+ if (typeof localStorage === "undefined") return null;
3824
+ try {
3825
+ const raw = localStorage.getItem(SESSION_STORAGE_KEY);
3826
+ if (!raw) return null;
3827
+ return JSON.parse(raw);
3828
+ } catch {
3829
+ return null;
3830
+ }
3831
+ }
3832
+ /**
3833
+ * Detect if the page reloaded during a model download/initialization.
3834
+ * This typically indicates an iOS memory crash.
3835
+ *
3836
+ * @returns Detection result with recommended action
3837
+ */
3838
+ function detectMemoryCrash() {
3839
+ const lastState = getDownloadPhase();
3840
+ const currentSessionId = getSessionId();
3841
+ if (!lastState) return { crashed: false };
3842
+ const wasInCriticalPhase = [
3843
+ "downloading",
3844
+ "caching",
3845
+ "initializing"
3846
+ ].includes(lastState.phase);
3847
+ const sessionChanged = lastState.sessionId !== currentSessionId;
3848
+ const timeSinceCrash = Date.now() - lastState.timestamp;
3849
+ if (wasInCriticalPhase && sessionChanged && timeSinceCrash < 300 * 1e3) {
3850
+ localStorage.removeItem(SESSION_STORAGE_KEY);
3851
+ return {
3852
+ crashed: true,
3853
+ phase: lastState.phase,
3854
+ modelId: lastState.modelId || void 0,
3855
+ timeSinceCrash,
3856
+ recommendation: lastState.modelId?.includes("1.7b") ? "The model was too large for your device. Try smollm2-360m or qwen3-0.6b instead." : "Your device ran out of memory. Try a smaller model or use a desktop browser."
3857
+ };
3858
+ }
3859
+ return { crashed: false };
3860
+ }
3861
+ /**
3862
+ * Clear session phase (call when model loads successfully).
3863
+ */
3864
+ function clearDownloadPhase() {
3865
+ if (typeof localStorage === "undefined") return;
3866
+ localStorage.removeItem(SESSION_STORAGE_KEY);
3867
+ }
3868
+ /** Chunk size for downloads: 1.5MB (safe for iOS IndexedDB transactions) */
3869
+ const CHUNK_SIZE_BYTES = 1.5 * 1024 * 1024;
3870
+ /** IndexedDB database name for chunked downloads */
3871
+ const DOWNLOAD_DB_NAME = "gerbil-model-chunks";
3872
+ const DOWNLOAD_DB_VERSION = 1;
3873
+ /**
3874
+ * Open (or create) the IndexedDB for chunked downloads.
3875
+ */
3876
+ async function openDownloadDB() {
3877
+ return new Promise((resolve, reject) => {
3878
+ const request = indexedDB.open(DOWNLOAD_DB_NAME, DOWNLOAD_DB_VERSION);
3879
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to open download DB: ${request.error?.message}`));
3880
+ request.onsuccess = () => resolve(request.result);
3881
+ request.onupgradeneeded = (event) => {
3882
+ const db = event.target.result;
3883
+ if (!db.objectStoreNames.contains("manifests")) db.createObjectStore("manifests", { keyPath: "modelId" });
3884
+ if (!db.objectStoreNames.contains("chunks")) db.createObjectStore("chunks");
3885
+ };
3886
+ });
3887
+ }
3888
+ /**
3889
+ * Get download manifest for a model.
3890
+ */
3891
+ async function getManifest(db, modelId) {
3892
+ return new Promise((resolve, reject) => {
3893
+ const request = db.transaction("manifests", "readonly").objectStore("manifests").get(modelId);
3894
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to get manifest: ${request.error?.message}`));
3895
+ request.onsuccess = () => resolve(request.result || null);
3896
+ });
3897
+ }
3898
+ /**
3899
+ * Save download manifest.
3900
+ */
3901
+ async function saveManifest(db, manifest) {
3902
+ return new Promise((resolve, reject) => {
3903
+ const request = db.transaction("manifests", "readwrite").objectStore("manifests").put(manifest);
3904
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to save manifest: ${request.error?.message}`));
3905
+ request.onsuccess = () => resolve();
3906
+ });
3907
+ }
3908
+ /**
3909
+ * Save a single chunk.
3910
+ */
3911
+ async function saveChunk(db, modelId, chunkIndex, data) {
3912
+ return new Promise((resolve, reject) => {
3913
+ const store = db.transaction("chunks", "readwrite").objectStore("chunks");
3914
+ const key = `${modelId}-${chunkIndex}`;
3915
+ const request = store.put(data, key);
3916
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to save chunk ${chunkIndex}: ${request.error?.message}`));
3917
+ request.onsuccess = () => resolve();
3918
+ });
3919
+ }
3920
+ /**
3921
+ * Get a single chunk.
3922
+ */
3923
+ async function getChunk(db, modelId, chunkIndex) {
3924
+ return new Promise((resolve, reject) => {
3925
+ const store = db.transaction("chunks", "readonly").objectStore("chunks");
3926
+ const key = `${modelId}-${chunkIndex}`;
3927
+ const request = store.get(key);
3928
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to get chunk ${chunkIndex}: ${request.error?.message}`));
3929
+ request.onsuccess = () => resolve(request.result || null);
3930
+ });
3931
+ }
3932
+ /**
3933
+ * Delete all chunks and manifest for a model.
3934
+ */
3935
+ async function clearModelData(db, modelId) {
3936
+ const manifest = await getManifest(db, modelId);
3937
+ return new Promise((resolve, reject) => {
3938
+ const tx = db.transaction(["manifests", "chunks"], "readwrite");
3939
+ tx.objectStore("manifests").delete(modelId);
3940
+ if (manifest) {
3941
+ const totalChunks = Math.ceil(manifest.totalBytes / manifest.chunkSize);
3942
+ const chunkStore = tx.objectStore("chunks");
3943
+ for (let i = 0; i < totalChunks; i++) chunkStore.delete(`${modelId}-${i}`);
3944
+ }
3945
+ tx.oncomplete = () => resolve();
3946
+ tx.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to clear model data: ${tx.error?.message}`));
3947
+ });
3948
+ }
3949
+ /**
3950
+ * Chunked resumable downloader for large model files.
3951
+ * Downloads in 1.5MB chunks to avoid iOS memory pressure.
3952
+ */
3953
+ async function downloadModelChunked(url, modelId, options = {}) {
3954
+ const { onProgress, signal } = options;
3955
+ setDownloadPhase("downloading", modelId);
3956
+ const db = await openDownloadDB();
3957
+ try {
3958
+ let manifest = await getManifest(db, modelId);
3959
+ const headResponse = await fetch(url, {
3960
+ method: "HEAD",
3961
+ signal
3962
+ });
3963
+ if (!headResponse.ok) throw new Error(`HEAD request failed: ${headResponse.status} ${headResponse.statusText}`);
3964
+ const contentLength = parseInt(headResponse.headers.get("content-length") || "0", 10);
3965
+ const etag = headResponse.headers.get("etag");
3966
+ const acceptRanges = headResponse.headers.get("accept-ranges");
3967
+ if (!contentLength) throw new Error("Server did not provide content-length");
3968
+ if (manifest && manifest.etag !== etag) {
3969
+ console.warn(`Model ${modelId} has been updated (etag mismatch). Clearing cached chunks.`);
3970
+ await clearModelData(db, modelId);
3971
+ manifest = null;
3972
+ }
3973
+ if (!(acceptRanges === "bytes")) {
3974
+ console.warn(`Server doesn't support range requests for ${modelId}. Using regular download.`);
3975
+ db.close();
3976
+ const response = await fetch(url, { signal });
3977
+ if (!response.ok) throw new Error(`Download failed: ${response.status}`);
3978
+ setDownloadPhase("caching", modelId);
3979
+ const buffer = await response.arrayBuffer();
3980
+ setDownloadPhase("ready", modelId);
3981
+ return buffer;
3982
+ }
3983
+ const totalChunks = Math.ceil(contentLength / CHUNK_SIZE_BYTES);
3984
+ if (!manifest) {
3985
+ manifest = {
3986
+ modelId,
3987
+ url,
3988
+ etag,
3989
+ totalBytes: contentLength,
3990
+ chunkSize: CHUNK_SIZE_BYTES,
3991
+ completedChunks: [],
3992
+ createdAt: Date.now(),
3993
+ updatedAt: Date.now()
3994
+ };
3995
+ await saveManifest(db, manifest);
3996
+ }
3997
+ for (let i = 0; i < totalChunks; i++) {
3998
+ if (signal?.aborted) throw new Error("Download aborted");
3999
+ if (manifest.completedChunks.includes(i)) {
4000
+ const bytesDownloaded$1 = manifest.completedChunks.length / totalChunks * contentLength;
4001
+ onProgress?.({
4002
+ phase: "resuming",
4003
+ bytesDownloaded: bytesDownloaded$1,
4004
+ totalBytes: contentLength,
4005
+ percent: Math.round(bytesDownloaded$1 / contentLength * 100)
4006
+ });
4007
+ continue;
4008
+ }
4009
+ const start = i * CHUNK_SIZE_BYTES;
4010
+ const end = Math.min(start + CHUNK_SIZE_BYTES - 1, contentLength - 1);
4011
+ const response = await fetch(url, {
4012
+ headers: { Range: `bytes=${start}-${end}` },
4013
+ signal
4014
+ });
4015
+ if (response.status !== 206) throw new Error(`Range request failed: ${response.status} (expected 206)`);
4016
+ const chunkData = await response.arrayBuffer();
4017
+ await saveChunk(db, modelId, i, chunkData);
4018
+ manifest.completedChunks.push(i);
4019
+ manifest.updatedAt = Date.now();
4020
+ await saveManifest(db, manifest);
4021
+ const bytesDownloaded = manifest.completedChunks.length * CHUNK_SIZE_BYTES;
4022
+ setDownloadPhase("downloading", modelId, {
4023
+ bytesDownloaded,
4024
+ totalBytes: contentLength
4025
+ });
4026
+ onProgress?.({
4027
+ phase: "downloading",
4028
+ bytesDownloaded: Math.min(bytesDownloaded, contentLength),
4029
+ totalBytes: contentLength,
4030
+ percent: Math.round(manifest.completedChunks.length / totalChunks * 100)
4031
+ });
4032
+ response.body = null;
4033
+ }
4034
+ setDownloadPhase("caching", modelId);
4035
+ onProgress?.({
4036
+ phase: "assembling",
4037
+ bytesDownloaded: contentLength,
4038
+ totalBytes: contentLength,
4039
+ percent: 100
4040
+ });
4041
+ const finalBuffer = new ArrayBuffer(contentLength);
4042
+ const finalView = new Uint8Array(finalBuffer);
4043
+ for (let i = 0; i < totalChunks; i++) {
4044
+ const chunk = await getChunk(db, modelId, i);
4045
+ if (!chunk) throw new Error(`Missing chunk ${i} during assembly`);
4046
+ const offset = i * CHUNK_SIZE_BYTES;
4047
+ finalView.set(new Uint8Array(chunk), offset);
4048
+ }
4049
+ await clearModelData(db, modelId);
4050
+ db.close();
4051
+ setDownloadPhase("ready", modelId);
4052
+ return finalBuffer;
4053
+ } catch (error) {
4054
+ setDownloadPhase("error", modelId);
4055
+ db.close();
4056
+ throw error;
4057
+ }
4058
+ }
4059
+ /**
4060
+ * Check if a model has an incomplete download.
4061
+ */
4062
+ async function hasIncompleteDownload(modelId) {
4063
+ try {
4064
+ const db = await openDownloadDB();
4065
+ const manifest = await getManifest(db, modelId);
4066
+ db.close();
4067
+ if (!manifest) return { incomplete: false };
4068
+ const totalChunks = Math.ceil(manifest.totalBytes / manifest.chunkSize);
4069
+ const completedChunks = manifest.completedChunks.length;
4070
+ if (completedChunks < totalChunks) return {
4071
+ incomplete: true,
4072
+ bytesDownloaded: completedChunks * manifest.chunkSize,
4073
+ totalBytes: manifest.totalBytes,
4074
+ percent: Math.round(completedChunks / totalChunks * 100)
4075
+ };
4076
+ return { incomplete: false };
4077
+ } catch {
4078
+ return { incomplete: false };
4079
+ }
4080
+ }
4081
+ /**
4082
+ * Clear incomplete download data for a model.
4083
+ */
4084
+ async function clearIncompleteDownload(modelId) {
4085
+ try {
4086
+ const db = await openDownloadDB();
4087
+ await clearModelData(db, modelId);
4088
+ db.close();
4089
+ } catch {}
4090
+ }
4091
+ /**
3624
4092
  * Check if there's enough storage quota for a model download.
3625
4093
  * Returns estimated available space and whether download should proceed.
3626
4094
  */
@@ -3668,6 +4136,14 @@ var browser_default = {
3668
4136
  getBrowserDiagnostics,
3669
4137
  getRecommendedModels,
3670
4138
  checkStorageQuota,
4139
+ isModelSafeForDevice,
4140
+ setDownloadPhase,
4141
+ getDownloadPhase,
4142
+ detectMemoryCrash,
4143
+ clearDownloadPhase,
4144
+ downloadModelChunked,
4145
+ hasIncompleteDownload,
4146
+ clearIncompleteDownload,
3671
4147
  createGerbilWorker,
3672
4148
  playAudio,
3673
4149
  createAudioPlayer,
@@ -3678,5 +4154,5 @@ var browser_default = {
3678
4154
  };
3679
4155
 
3680
4156
  //#endregion
3681
- export { BUILTIN_MODELS, checkStorageQuota, checkWebGPUCapabilities, checkWebGPUReady, createAudioPlayer, createGerbilWorker, browser_default as default, getBrowserDiagnostics, getRecommendedModels, getWebGPUInfo, isWebGPUSupported, playAudio, preloadChatModel, preloadEmbeddingModel, preloadSTTModel, preloadTTSModel, useChat, useCompletion, useEmbedding, useSpeech, useVoiceChat, useVoiceInput };
4157
+ export { BUILTIN_MODELS, checkStorageQuota, checkWebGPUCapabilities, checkWebGPUReady, clearDownloadPhase, clearIncompleteDownload, createAudioPlayer, createGerbilWorker, browser_default as default, detectMemoryCrash, downloadModelChunked, getBrowserDiagnostics, getDownloadPhase, getRecommendedModels, getWebGPUInfo, hasIncompleteDownload, isModelSafeForDevice, isWebGPUSupported, playAudio, preloadChatModel, preloadEmbeddingModel, preloadSTTModel, preloadTTSModel, setDownloadPhase, useChat, useCompletion, useEmbedding, useSpeech, useVoiceChat, useVoiceInput };
3682
4158
  //# sourceMappingURL=index.js.map