@tryhamster/gerbil 1.0.0-rc.23 → 1.0.0-rc.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/browser/index.d.ts +146 -2
  2. package/dist/browser/index.d.ts.map +1 -1
  3. package/dist/browser/index.js +507 -22
  4. package/dist/browser/index.js.map +1 -1
  5. package/dist/cli.mjs +7 -7
  6. package/dist/cli.mjs.map +1 -1
  7. package/dist/frameworks/express.d.mts +1 -3
  8. package/dist/frameworks/express.d.mts.map +1 -1
  9. package/dist/frameworks/express.mjs +3 -3
  10. package/dist/frameworks/express.mjs.map +1 -1
  11. package/dist/frameworks/fastify.d.mts +1 -1
  12. package/dist/frameworks/fastify.mjs +1 -1
  13. package/dist/frameworks/hono.d.mts +1 -1
  14. package/dist/frameworks/hono.mjs +1 -1
  15. package/dist/frameworks/next.d.mts +2 -2
  16. package/dist/frameworks/next.mjs +1 -1
  17. package/dist/frameworks/react.d.mts +1 -1
  18. package/dist/frameworks/react.d.mts.map +1 -1
  19. package/dist/frameworks/trpc.d.mts +1 -1
  20. package/dist/frameworks/trpc.mjs +1 -1
  21. package/dist/{gerbil-DJygY0sJ.d.mts → gerbil-CbnV_cG5.d.mts} +9 -2
  22. package/dist/gerbil-CbnV_cG5.d.mts.map +1 -0
  23. package/dist/{gerbil-PzPtcdeM.mjs → gerbil-DODVGr-u.mjs} +1 -1
  24. package/dist/{gerbil-DzZ-L6n8.mjs → gerbil-jO9anIh_.mjs} +90 -3
  25. package/dist/gerbil-jO9anIh_.mjs.map +1 -0
  26. package/dist/index.d.mts +3 -3
  27. package/dist/index.d.mts.map +1 -1
  28. package/dist/index.mjs +2 -2
  29. package/dist/index.mjs.map +1 -1
  30. package/dist/integrations/ai-sdk.d.mts +1 -1
  31. package/dist/integrations/ai-sdk.mjs +1 -1
  32. package/dist/integrations/langchain.d.mts +1 -1
  33. package/dist/integrations/langchain.mjs +1 -1
  34. package/dist/integrations/llamaindex.d.mts +1 -1
  35. package/dist/integrations/llamaindex.mjs +1 -1
  36. package/dist/integrations/mcp.d.mts +2 -2
  37. package/dist/integrations/mcp.mjs +4 -4
  38. package/dist/{mcp-D161vL_C.mjs → mcp-tavZtFY1.mjs} +3 -3
  39. package/dist/{mcp-D161vL_C.mjs.map → mcp-tavZtFY1.mjs.map} +1 -1
  40. package/dist/{one-liner-C-pRqDK2.mjs → one-liner-Ba58M_6j.mjs} +2 -2
  41. package/dist/{one-liner-C-pRqDK2.mjs.map → one-liner-Ba58M_6j.mjs.map} +1 -1
  42. package/dist/{repl-D9x3TnQc.mjs → repl-BGly-o_e.mjs} +3 -3
  43. package/dist/skills/index.d.mts +3 -3
  44. package/dist/skills/index.d.mts.map +1 -1
  45. package/dist/skills/index.mjs +3 -3
  46. package/dist/{skills-D14RwyUN.mjs → skills-BKxP2pex.mjs} +2 -2
  47. package/dist/{skills-D14RwyUN.mjs.map → skills-BKxP2pex.mjs.map} +1 -1
  48. package/dist/{types-evP8RShr.d.mts → types-6uG8lC7u.d.mts} +65 -2
  49. package/dist/types-6uG8lC7u.d.mts.map +1 -0
  50. package/docs/architecture/overview.md +2 -0
  51. package/docs/observability.md +230 -0
  52. package/package.json +5 -4
  53. package/dist/gerbil-DJygY0sJ.d.mts.map +0 -1
  54. package/dist/gerbil-DzZ-L6n8.mjs.map +0 -1
  55. package/dist/types-evP8RShr.d.mts.map +0 -1
@@ -230,7 +230,17 @@ async function createGerbilWorker(options = {}) {
230
230
 
231
231
  const { AutoTokenizer, AutoModelForCausalLM, AutoProcessor, AutoModelForImageTextToText, RawImage, TextStreamer, InterruptableStoppingCriteria, env } = transformers;
232
232
 
233
- // Enable IndexedDB caching for browser (prevents re-downloading models)
233
+ // Detect iOS (Safari, Chrome on iOS, etc.)
234
+ const isIOS = /iPhone|iPad|iPod/.test(navigator.userAgent);
235
+
236
+ // iOS fix: Force single-threaded WASM to prevent memory blowup during cache reads
237
+ // See: https://github.com/huggingface/transformers.js/issues/1242
238
+ if (isIOS && env.backends?.onnx?.wasm) {
239
+ console.log("[Gerbil] iOS detected: forcing single-threaded WASM to prevent crashes");
240
+ env.backends.onnx.wasm.numThreads = 1;
241
+ }
242
+
243
+ // Enable Cache API caching for browser (prevents re-downloading models)
234
244
  env.useBrowserCache = true;
235
245
  env.allowLocalModels = false;
236
246
 
@@ -766,6 +776,13 @@ function useChat(options = {}) {
766
776
  setIsLoading(true);
767
777
  setShouldLoad(true);
768
778
  }, [isLoading]);
779
+ useEffect(() => {
780
+ const crash = detectMemoryCrash();
781
+ if (crash.crashed) {
782
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
783
+ onError?.(crash.recommendation || "Previous model load failed");
784
+ }
785
+ }, []);
769
786
  useEffect(() => {
770
787
  if (!shouldLoad) return;
771
788
  if (!isWebGPUSupported()) {
@@ -775,13 +792,27 @@ function useChat(options = {}) {
775
792
  onError?.(gpuError);
776
793
  return;
777
794
  }
795
+ const safetyCheck = isModelSafeForDevice(model);
796
+ if (!safetyCheck.safe) {
797
+ setError(safetyCheck.reason);
798
+ setIsLoading(false);
799
+ onError?.(safetyCheck.reason);
800
+ return;
801
+ }
778
802
  mountedRef.current = true;
803
+ setDownloadPhase("downloading", model);
779
804
  createGerbilWorker({
780
805
  modelId: model,
781
806
  onProgress: (p) => {
782
807
  if (!mountedRef.current) return;
783
808
  setLoadingProgress(p);
809
+ if (p.status === "downloading" && p.progress !== void 0) setDownloadPhase("downloading", model, {
810
+ bytesDownloaded: p.progress,
811
+ totalBytes: 100
812
+ });
813
+ else if (p.status === "loading") setDownloadPhase("initializing", model);
784
814
  if (p.status === "ready") {
815
+ clearDownloadPhase();
785
816
  setIsLoading(false);
786
817
  setIsReady(true);
787
818
  onReady?.();
@@ -799,6 +830,7 @@ function useChat(options = {}) {
799
830
  },
800
831
  onError: (err) => {
801
832
  if (!mountedRef.current) return;
833
+ setDownloadPhase("error", model);
802
834
  setError(err);
803
835
  setIsGenerating(false);
804
836
  onError?.(err);
@@ -808,6 +840,7 @@ function useChat(options = {}) {
808
840
  else worker.terminate();
809
841
  }).catch((err) => {
810
842
  if (mountedRef.current) {
843
+ setDownloadPhase("error", model);
811
844
  setError(err.message);
812
845
  setIsLoading(false);
813
846
  onError?.(err.message);
@@ -1035,6 +1068,13 @@ function useCompletion(options = {}) {
1035
1068
  setIsLoading(true);
1036
1069
  setShouldLoad(true);
1037
1070
  }, [isLoading]);
1071
+ useEffect(() => {
1072
+ const crash = detectMemoryCrash();
1073
+ if (crash.crashed) {
1074
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
1075
+ onError?.(crash.recommendation || "Previous model load failed");
1076
+ }
1077
+ }, []);
1038
1078
  useEffect(() => {
1039
1079
  if (!shouldLoad) return;
1040
1080
  if (!isWebGPUSupported()) {
@@ -1044,13 +1084,27 @@ function useCompletion(options = {}) {
1044
1084
  onError?.(gpuError);
1045
1085
  return;
1046
1086
  }
1087
+ const safetyCheck = isModelSafeForDevice(model);
1088
+ if (!safetyCheck.safe) {
1089
+ setError(safetyCheck.reason);
1090
+ setIsLoading(false);
1091
+ onError?.(safetyCheck.reason);
1092
+ return;
1093
+ }
1047
1094
  mountedRef.current = true;
1095
+ setDownloadPhase("downloading", model);
1048
1096
  createGerbilWorker({
1049
1097
  modelId: model,
1050
1098
  onProgress: (p) => {
1051
1099
  if (!mountedRef.current) return;
1052
1100
  setLoadingProgress(p);
1101
+ if (p.status === "downloading" && p.progress !== void 0) setDownloadPhase("downloading", model, {
1102
+ bytesDownloaded: p.progress,
1103
+ totalBytes: 100
1104
+ });
1105
+ else if (p.status === "loading") setDownloadPhase("initializing", model);
1053
1106
  if (p.status === "ready") {
1107
+ clearDownloadPhase();
1054
1108
  setIsLoading(false);
1055
1109
  setIsReady(true);
1056
1110
  onReady?.();
@@ -1070,6 +1124,7 @@ function useCompletion(options = {}) {
1070
1124
  },
1071
1125
  onError: (err) => {
1072
1126
  if (!mountedRef.current) return;
1127
+ setDownloadPhase("error", model);
1073
1128
  setError(err);
1074
1129
  setIsGenerating(false);
1075
1130
  onError?.(err);
@@ -1079,6 +1134,7 @@ function useCompletion(options = {}) {
1079
1134
  else worker.terminate();
1080
1135
  }).catch((err) => {
1081
1136
  if (mountedRef.current) {
1137
+ setDownloadPhase("error", model);
1082
1138
  setError(err.message);
1083
1139
  setIsLoading(false);
1084
1140
  onError?.(err.message);
@@ -1424,6 +1480,14 @@ const TTS_WORKER_CODE = `
1424
1480
 
1425
1481
  const { pipeline, env } = transformers;
1426
1482
 
1483
+ // iOS fix: Force single-threaded WASM to prevent memory blowup during cache reads
1484
+ // See: https://github.com/huggingface/transformers.js/issues/1242
1485
+ const isIOS = /iPhone|iPad|iPod/.test(navigator.userAgent);
1486
+ if (isIOS && env.backends?.onnx?.wasm) {
1487
+ console.log("[Gerbil TTS] iOS detected: forcing single-threaded WASM");
1488
+ env.backends.onnx.wasm.numThreads = 1;
1489
+ }
1490
+
1427
1491
  // Configure environment
1428
1492
  env.useBrowserCache = true;
1429
1493
  env.allowLocalModels = false;
@@ -1445,12 +1509,12 @@ const TTS_WORKER_CODE = `
1445
1509
  // Load Supertonic using transformers.js pipeline with WASM fallback
1446
1510
  let device = "webgpu";
1447
1511
  try {
1448
- ttsInstance = await pipeline("text-to-speech", repo, {
1512
+ ttsInstance = await pipeline("text-to-speech", repo, {
1449
1513
  device,
1450
- progress_callback: (progress) => {
1451
- self.postMessage({ type: "progress", payload: progress });
1452
- },
1453
- });
1514
+ progress_callback: (progress) => {
1515
+ self.postMessage({ type: "progress", payload: progress });
1516
+ },
1517
+ });
1454
1518
  } catch (webgpuError) {
1455
1519
  console.warn("WebGPU failed for TTS, falling back to WASM:", webgpuError.message);
1456
1520
  self.postMessage({ type: "fallback", payload: { backend: "wasm", reason: webgpuError.message } });
@@ -1493,8 +1557,8 @@ const TTS_WORKER_CODE = `
1493
1557
 
1494
1558
  // Try WebGPU first, fallback to WASM
1495
1559
  try {
1496
- kokoroTTS = await KokoroTTS.from_pretrained(repo, {
1497
- dtype: "fp32",
1560
+ kokoroTTS = await KokoroTTS.from_pretrained(repo, {
1561
+ dtype: "fp32",
1498
1562
  device: "webgpu",
1499
1563
  progress_callback: (progress) => {
1500
1564
  self.postMessage({ type: "progress", payload: progress });
@@ -1506,10 +1570,10 @@ const TTS_WORKER_CODE = `
1506
1570
  kokoroTTS = await KokoroTTS.from_pretrained(repo, {
1507
1571
  dtype: "fp32",
1508
1572
  device: "wasm",
1509
- progress_callback: (progress) => {
1510
- self.postMessage({ type: "progress", payload: progress });
1511
- },
1512
- });
1573
+ progress_callback: (progress) => {
1574
+ self.postMessage({ type: "progress", payload: progress });
1575
+ },
1576
+ });
1513
1577
  }
1514
1578
  }
1515
1579
 
@@ -1629,10 +1693,18 @@ function useSpeech(options = {}) {
1629
1693
  setIsLoading(true);
1630
1694
  setShouldLoad(true);
1631
1695
  }, [isLoading]);
1696
+ useEffect(() => {
1697
+ const crash = detectMemoryCrash();
1698
+ if (crash.crashed) {
1699
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
1700
+ onError?.(crash.recommendation || "Previous model load failed");
1701
+ }
1702
+ }, []);
1632
1703
  useEffect(() => {
1633
1704
  if (!shouldLoad) return;
1634
1705
  mountedRef.current = true;
1635
1706
  modelIdRef.current = modelId;
1707
+ setDownloadPhase("downloading", modelId);
1636
1708
  const config = TTS_MODELS[modelId];
1637
1709
  setLoadingProgress({
1638
1710
  status: "loading",
@@ -1649,6 +1721,7 @@ function useSpeech(options = {}) {
1649
1721
  progress: Math.round(payload.progress || 0)
1650
1722
  });
1651
1723
  if (type === "ready") {
1724
+ clearDownloadPhase();
1652
1725
  setIsLoading(false);
1653
1726
  setIsReady(true);
1654
1727
  setLoadingProgress({ status: "ready" });
@@ -1671,6 +1744,7 @@ function useSpeech(options = {}) {
1671
1744
  playAudioData(audio, sampleRate);
1672
1745
  }
1673
1746
  if (type === "error") {
1747
+ setDownloadPhase("error", modelId);
1674
1748
  const errorMsg = payload;
1675
1749
  setError(errorMsg);
1676
1750
  setIsLoading(false);
@@ -1684,6 +1758,7 @@ function useSpeech(options = {}) {
1684
1758
  };
1685
1759
  worker.onerror = (err) => {
1686
1760
  if (!mountedRef.current) return;
1761
+ setDownloadPhase("error", modelId);
1687
1762
  let errorMsg = err.message || "";
1688
1763
  if (!errorMsg || errorMsg === "Script error.") errorMsg = getWebGPUErrorMessage();
1689
1764
  setError(errorMsg);
@@ -1925,6 +2000,14 @@ const STT_WORKER_CODE = `
1925
2000
 
1926
2001
  const { pipeline, env } = transformers;
1927
2002
 
2003
+ // iOS fix: Force single-threaded WASM to prevent memory blowup during cache reads
2004
+ // See: https://github.com/huggingface/transformers.js/issues/1242
2005
+ const isIOS = /iPhone|iPad|iPod/.test(navigator.userAgent);
2006
+ if (isIOS && env.backends?.onnx?.wasm) {
2007
+ console.log("[Gerbil STT] iOS detected: forcing single-threaded WASM");
2008
+ env.backends.onnx.wasm.numThreads = 1;
2009
+ }
2010
+
1928
2011
  // Configure environment
1929
2012
  env.useBrowserCache = true;
1930
2013
  env.allowLocalModels = false;
@@ -1941,12 +2024,12 @@ const STT_WORKER_CODE = `
1941
2024
  // Load Whisper model with WASM fallback
1942
2025
  let device = "webgpu";
1943
2026
  try {
1944
- sttPipeline = await pipeline("automatic-speech-recognition", model, {
2027
+ sttPipeline = await pipeline("automatic-speech-recognition", model, {
1945
2028
  device,
1946
- progress_callback: (progress) => {
1947
- self.postMessage({ type: "progress", payload: progress });
1948
- },
1949
- });
2029
+ progress_callback: (progress) => {
2030
+ self.postMessage({ type: "progress", payload: progress });
2031
+ },
2032
+ });
1950
2033
  } catch (webgpuError) {
1951
2034
  console.warn("WebGPU failed for STT, falling back to WASM:", webgpuError.message);
1952
2035
  self.postMessage({ type: "fallback", payload: { backend: "wasm", reason: webgpuError.message } });
@@ -2065,9 +2148,17 @@ function useVoiceInput(options = {}) {
2065
2148
  "whisper-small.en": "onnx-community/whisper-small.en"
2066
2149
  }[modelId] || modelId;
2067
2150
  };
2151
+ useEffect(() => {
2152
+ const crash = detectMemoryCrash();
2153
+ if (crash.crashed) {
2154
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
2155
+ onError?.(crash.recommendation || "Previous model load failed");
2156
+ }
2157
+ }, []);
2068
2158
  useEffect(() => {
2069
2159
  if (!shouldLoad || isReady) return;
2070
2160
  mountedRef.current = true;
2161
+ setDownloadPhase("downloading", model);
2071
2162
  setIsLoading(true);
2072
2163
  setLoadingProgress({
2073
2164
  status: "loading",
@@ -2093,6 +2184,7 @@ function useVoiceInput(options = {}) {
2093
2184
  onProgress?.(progress);
2094
2185
  }
2095
2186
  if (type === "ready") {
2187
+ clearDownloadPhase();
2096
2188
  setIsReady(true);
2097
2189
  setIsLoading(false);
2098
2190
  setLoadingProgress({ status: "ready" });
@@ -2109,6 +2201,7 @@ function useVoiceInput(options = {}) {
2109
2201
  }
2110
2202
  }
2111
2203
  if (type === "error") {
2204
+ setDownloadPhase("error", model);
2112
2205
  const errMsg = payload;
2113
2206
  setError(errMsg);
2114
2207
  setIsLoading(false);
@@ -2131,6 +2224,7 @@ function useVoiceInput(options = {}) {
2131
2224
  };
2132
2225
  worker.onerror = (err) => {
2133
2226
  if (!mountedRef.current) return;
2227
+ setDownloadPhase("error", model);
2134
2228
  let errMsg = err.message || "";
2135
2229
  if (!errMsg || errMsg === "Script error.") errMsg = getWebGPUErrorMessage();
2136
2230
  setError(errMsg);
@@ -2905,6 +2999,14 @@ const EMBEDDING_WORKER_CODE = `
2905
2999
 
2906
3000
  const { pipeline, env } = transformers;
2907
3001
 
3002
+ // iOS fix: Force single-threaded WASM to prevent memory blowup during cache reads
3003
+ // See: https://github.com/huggingface/transformers.js/issues/1242
3004
+ const isIOS = /iPhone|iPad|iPod/.test(navigator.userAgent);
3005
+ if (isIOS && env.backends?.onnx?.wasm) {
3006
+ console.log("[Gerbil Embedding] iOS detected: forcing single-threaded WASM");
3007
+ env.backends.onnx.wasm.numThreads = 1;
3008
+ }
3009
+
2908
3010
  // Configure environment
2909
3011
  env.useBrowserCache = true;
2910
3012
  env.allowLocalModels = false;
@@ -3052,6 +3154,13 @@ function useEmbedding(options = {}) {
3052
3154
  if (magnitude === 0) return 0;
3053
3155
  return dotProduct / magnitude;
3054
3156
  }, []);
3157
+ useEffect(() => {
3158
+ const crash = detectMemoryCrash();
3159
+ if (crash.crashed) {
3160
+ setError(crash.recommendation || "Previous model load failed due to device memory limits.");
3161
+ onError?.(crash.recommendation || "Previous model load failed");
3162
+ }
3163
+ }, []);
3055
3164
  const load = useCallback(() => {
3056
3165
  if (isReady && workerRef.current) return Promise.resolve();
3057
3166
  if (loadRequestedRef.current && readyPromiseRef.current) return readyPromiseRef.current;
@@ -3061,6 +3170,7 @@ function useEmbedding(options = {}) {
3061
3170
  status: "loading",
3062
3171
  message: "Loading embedding model..."
3063
3172
  });
3173
+ setDownloadPhase("downloading", model);
3064
3174
  readyPromiseRef.current = new Promise((resolve) => {
3065
3175
  readyResolveRef.current = resolve;
3066
3176
  });
@@ -3075,18 +3185,21 @@ function useEmbedding(options = {}) {
3075
3185
  progress: Math.round(payload.loaded / payload.total * 100)
3076
3186
  });
3077
3187
  } else if (type === "ready") {
3188
+ clearDownloadPhase();
3078
3189
  setIsLoading(false);
3079
3190
  setIsReady(true);
3080
3191
  setLoadingProgress({ status: "ready" });
3081
3192
  readyResolveRef.current?.();
3082
3193
  onReady?.();
3083
3194
  } else if (type === "error") {
3195
+ setDownloadPhase("error", model);
3084
3196
  setIsLoading(false);
3085
3197
  setError(payload);
3086
3198
  onError?.(payload);
3087
3199
  }
3088
3200
  });
3089
3201
  worker.onerror = (err) => {
3202
+ setDownloadPhase("error", model);
3090
3203
  setIsLoading(false);
3091
3204
  let errMsg = err.message || "";
3092
3205
  if (!errMsg || errMsg === "Script error.") errMsg = getWebGPUErrorMessage();
@@ -3591,11 +3704,11 @@ async function getBrowserDiagnostics() {
3591
3704
  } catch {
3592
3705
  moduleWorkers = false;
3593
3706
  }
3594
- let indexedDB = false;
3707
+ let indexedDB$1 = false;
3595
3708
  try {
3596
- indexedDB = typeof window !== "undefined" && "indexedDB" in window;
3709
+ indexedDB$1 = typeof window !== "undefined" && "indexedDB" in window;
3597
3710
  } catch {
3598
- indexedDB = false;
3711
+ indexedDB$1 = false;
3599
3712
  }
3600
3713
  return {
3601
3714
  browser,
@@ -3608,7 +3721,7 @@ async function getBrowserDiagnostics() {
3608
3721
  webgpuExpected,
3609
3722
  webgpu,
3610
3723
  moduleWorkers,
3611
- indexedDB
3724
+ indexedDB: indexedDB$1
3612
3725
  };
3613
3726
  }
3614
3727
  /**
@@ -3646,6 +3759,370 @@ function getRecommendedModels() {
3646
3759
  };
3647
3760
  }
3648
3761
  /**
3762
+ * Maximum safe model sizes for iOS devices (in MB).
3763
+ * Based on WKWebView effective memory limit of ~200-400MB.
3764
+ */
3765
+ const IOS_MODEL_LIMITS = {
3766
+ safe: ["smollm2-135m", "smollm2-360m"],
3767
+ risky: ["qwen3-0.6b"],
3768
+ blocked: ["qwen3-1.7b", "qwen3-4b"],
3769
+ maxBudgetMB: 350
3770
+ };
3771
+ /**
3772
+ * Check if a model is safe to load on the current device.
3773
+ * Returns guidance specific to iOS memory constraints.
3774
+ */
3775
+ function isModelSafeForDevice(modelId) {
3776
+ const ua = typeof navigator !== "undefined" ? navigator.userAgent : "";
3777
+ const isIOS = /iPhone|iPad|iPod/.test(ua);
3778
+ const isIOSChrome = isIOS && /CriOS/.test(ua);
3779
+ const deviceMemory = typeof navigator !== "undefined" ? navigator.deviceMemory : null;
3780
+ const normalizedId = modelId.toLowerCase().replace(/[^a-z0-9]/g, "-");
3781
+ if (isIOS) {
3782
+ if (IOS_MODEL_LIMITS.blocked.some((m) => normalizedId.includes(m.toLowerCase().replace(/[^a-z0-9]/g, "-")))) return {
3783
+ safe: false,
3784
+ reason: `Model ${modelId} is too large for iOS devices${isIOSChrome ? " (iOS Chrome uses WKWebView, same limits as Safari)" : ""}. WKWebView memory limit (~300-400MB) will cause crashes.`,
3785
+ recommendation: "Use smollm2-360m or qwen3-0.6b on iOS. For larger models, use desktop.",
3786
+ maxSafeModel: "qwen3-0.6b"
3787
+ };
3788
+ if (IOS_MODEL_LIMITS.risky.some((m) => normalizedId.includes(m.toLowerCase().replace(/[^a-z0-9]/g, "-")))) {
3789
+ if (deviceMemory !== null && deviceMemory < 4) return {
3790
+ safe: false,
3791
+ reason: `Model ${modelId} may crash on your device (${deviceMemory} GB RAM detected).`,
3792
+ recommendation: "Use smollm2-360m for reliable performance, or try on a newer device.",
3793
+ maxSafeModel: "smollm2-360m"
3794
+ };
3795
+ return {
3796
+ safe: true,
3797
+ reason: `Model ${modelId} should work on modern iOS devices.`
3798
+ };
3799
+ }
3800
+ return {
3801
+ safe: true,
3802
+ reason: "Model is within iOS memory limits."
3803
+ };
3804
+ }
3805
+ if (/Android/.test(ua)) {
3806
+ if (normalizedId.includes("qwen3-4b") || normalizedId.includes("7b")) return {
3807
+ safe: false,
3808
+ reason: `Model ${modelId} is very large and may crash on Android devices.`,
3809
+ recommendation: "Use qwen3-1.7b or smaller on Android.",
3810
+ maxSafeModel: "qwen3-1.7b"
3811
+ };
3812
+ }
3813
+ return {
3814
+ safe: true,
3815
+ reason: "Desktop browser has sufficient memory."
3816
+ };
3817
+ }
3818
+ const SESSION_STORAGE_KEY = "gerbil_session_phase";
3819
+ /**
3820
+ * Generate a unique session ID for tracking across reloads.
3821
+ */
3822
+ function generateSessionId() {
3823
+ return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
3824
+ }
3825
+ /**
3826
+ * Get or create the current session ID.
3827
+ */
3828
+ function getSessionId() {
3829
+ if (typeof localStorage === "undefined") return generateSessionId();
3830
+ let sessionId = sessionStorage.getItem("gerbil_session_id");
3831
+ if (!sessionId) {
3832
+ sessionId = generateSessionId();
3833
+ sessionStorage.setItem("gerbil_session_id", sessionId);
3834
+ }
3835
+ return sessionId;
3836
+ }
3837
+ /**
3838
+ * Set the current download/initialization phase.
3839
+ * Used to detect if a reload happened during a critical operation.
3840
+ */
3841
+ function setDownloadPhase(phase, modelId, progress) {
3842
+ if (typeof localStorage === "undefined") return;
3843
+ const state = {
3844
+ phase,
3845
+ modelId: modelId || null,
3846
+ sessionId: getSessionId(),
3847
+ timestamp: Date.now(),
3848
+ bytesDownloaded: progress?.bytesDownloaded,
3849
+ totalBytes: progress?.totalBytes
3850
+ };
3851
+ localStorage.setItem(SESSION_STORAGE_KEY, JSON.stringify(state));
3852
+ }
3853
+ /**
3854
+ * Get the last known download phase from storage.
3855
+ */
3856
+ function getDownloadPhase() {
3857
+ if (typeof localStorage === "undefined") return null;
3858
+ try {
3859
+ const raw = localStorage.getItem(SESSION_STORAGE_KEY);
3860
+ if (!raw) return null;
3861
+ return JSON.parse(raw);
3862
+ } catch {
3863
+ return null;
3864
+ }
3865
+ }
3866
+ /**
3867
+ * Detect if the page reloaded during a model download/initialization.
3868
+ * This typically indicates an iOS memory crash.
3869
+ *
3870
+ * @returns Detection result with recommended action
3871
+ */
3872
+ function detectMemoryCrash() {
3873
+ const lastState = getDownloadPhase();
3874
+ const currentSessionId = getSessionId();
3875
+ if (!lastState) return { crashed: false };
3876
+ const wasInCriticalPhase = [
3877
+ "downloading",
3878
+ "caching",
3879
+ "initializing"
3880
+ ].includes(lastState.phase);
3881
+ const sessionChanged = lastState.sessionId !== currentSessionId;
3882
+ const timeSinceCrash = Date.now() - lastState.timestamp;
3883
+ if (wasInCriticalPhase && sessionChanged && timeSinceCrash < 300 * 1e3) {
3884
+ localStorage.removeItem(SESSION_STORAGE_KEY);
3885
+ return {
3886
+ crashed: true,
3887
+ phase: lastState.phase,
3888
+ modelId: lastState.modelId || void 0,
3889
+ timeSinceCrash,
3890
+ recommendation: lastState.modelId?.includes("1.7b") ? "The model was too large for your device. Try smollm2-360m or qwen3-0.6b instead." : "Your device ran out of memory. Try a smaller model or use a desktop browser."
3891
+ };
3892
+ }
3893
+ return { crashed: false };
3894
+ }
3895
+ /**
3896
+ * Clear session phase (call when model loads successfully).
3897
+ */
3898
+ function clearDownloadPhase() {
3899
+ if (typeof localStorage === "undefined") return;
3900
+ localStorage.removeItem(SESSION_STORAGE_KEY);
3901
+ }
3902
+ /** Chunk size for downloads: 1.5MB (safe for iOS IndexedDB transactions) */
3903
+ const CHUNK_SIZE_BYTES = 1.5 * 1024 * 1024;
3904
+ /** IndexedDB database name for chunked downloads */
3905
+ const DOWNLOAD_DB_NAME = "gerbil-model-chunks";
3906
+ const DOWNLOAD_DB_VERSION = 1;
3907
+ /**
3908
+ * Open (or create) the IndexedDB for chunked downloads.
3909
+ */
3910
+ async function openDownloadDB() {
3911
+ return new Promise((resolve, reject) => {
3912
+ const request = indexedDB.open(DOWNLOAD_DB_NAME, DOWNLOAD_DB_VERSION);
3913
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to open download DB: ${request.error?.message}`));
3914
+ request.onsuccess = () => resolve(request.result);
3915
+ request.onupgradeneeded = (event) => {
3916
+ const db = event.target.result;
3917
+ if (!db.objectStoreNames.contains("manifests")) db.createObjectStore("manifests", { keyPath: "modelId" });
3918
+ if (!db.objectStoreNames.contains("chunks")) db.createObjectStore("chunks");
3919
+ };
3920
+ });
3921
+ }
3922
+ /**
3923
+ * Get download manifest for a model.
3924
+ */
3925
+ async function getManifest(db, modelId) {
3926
+ return new Promise((resolve, reject) => {
3927
+ const request = db.transaction("manifests", "readonly").objectStore("manifests").get(modelId);
3928
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to get manifest: ${request.error?.message}`));
3929
+ request.onsuccess = () => resolve(request.result || null);
3930
+ });
3931
+ }
3932
+ /**
3933
+ * Save download manifest.
3934
+ */
3935
+ async function saveManifest(db, manifest) {
3936
+ return new Promise((resolve, reject) => {
3937
+ const request = db.transaction("manifests", "readwrite").objectStore("manifests").put(manifest);
3938
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to save manifest: ${request.error?.message}`));
3939
+ request.onsuccess = () => resolve();
3940
+ });
3941
+ }
3942
+ /**
3943
+ * Save a single chunk.
3944
+ */
3945
+ async function saveChunk(db, modelId, chunkIndex, data) {
3946
+ return new Promise((resolve, reject) => {
3947
+ const store = db.transaction("chunks", "readwrite").objectStore("chunks");
3948
+ const key = `${modelId}-${chunkIndex}`;
3949
+ const request = store.put(data, key);
3950
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to save chunk ${chunkIndex}: ${request.error?.message}`));
3951
+ request.onsuccess = () => resolve();
3952
+ });
3953
+ }
3954
+ /**
3955
+ * Get a single chunk.
3956
+ */
3957
+ async function getChunk(db, modelId, chunkIndex) {
3958
+ return new Promise((resolve, reject) => {
3959
+ const store = db.transaction("chunks", "readonly").objectStore("chunks");
3960
+ const key = `${modelId}-${chunkIndex}`;
3961
+ const request = store.get(key);
3962
+ request.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to get chunk ${chunkIndex}: ${request.error?.message}`));
3963
+ request.onsuccess = () => resolve(request.result || null);
3964
+ });
3965
+ }
3966
+ /**
3967
+ * Delete all chunks and manifest for a model.
3968
+ */
3969
+ async function clearModelData(db, modelId) {
3970
+ const manifest = await getManifest(db, modelId);
3971
+ return new Promise((resolve, reject) => {
3972
+ const tx = db.transaction(["manifests", "chunks"], "readwrite");
3973
+ tx.objectStore("manifests").delete(modelId);
3974
+ if (manifest) {
3975
+ const totalChunks = Math.ceil(manifest.totalBytes / manifest.chunkSize);
3976
+ const chunkStore = tx.objectStore("chunks");
3977
+ for (let i = 0; i < totalChunks; i++) chunkStore.delete(`${modelId}-${i}`);
3978
+ }
3979
+ tx.oncomplete = () => resolve();
3980
+ tx.onerror = () => reject(/* @__PURE__ */ new Error(`Failed to clear model data: ${tx.error?.message}`));
3981
+ });
3982
+ }
3983
+ /**
3984
+ * Chunked resumable downloader for large model files.
3985
+ * Downloads in 1.5MB chunks to avoid iOS memory pressure.
3986
+ */
3987
+ async function downloadModelChunked(url, modelId, options = {}) {
3988
+ const { onProgress, signal } = options;
3989
+ setDownloadPhase("downloading", modelId);
3990
+ const db = await openDownloadDB();
3991
+ try {
3992
+ let manifest = await getManifest(db, modelId);
3993
+ const headResponse = await fetch(url, {
3994
+ method: "HEAD",
3995
+ signal
3996
+ });
3997
+ if (!headResponse.ok) throw new Error(`HEAD request failed: ${headResponse.status} ${headResponse.statusText}`);
3998
+ const contentLength = parseInt(headResponse.headers.get("content-length") || "0", 10);
3999
+ const etag = headResponse.headers.get("etag");
4000
+ const acceptRanges = headResponse.headers.get("accept-ranges");
4001
+ if (!contentLength) throw new Error("Server did not provide content-length");
4002
+ if (manifest && manifest.etag !== etag) {
4003
+ console.warn(`Model ${modelId} has been updated (etag mismatch). Clearing cached chunks.`);
4004
+ await clearModelData(db, modelId);
4005
+ manifest = null;
4006
+ }
4007
+ if (!(acceptRanges === "bytes")) {
4008
+ console.warn(`Server doesn't support range requests for ${modelId}. Using regular download.`);
4009
+ db.close();
4010
+ const response = await fetch(url, { signal });
4011
+ if (!response.ok) throw new Error(`Download failed: ${response.status}`);
4012
+ setDownloadPhase("caching", modelId);
4013
+ const buffer = await response.arrayBuffer();
4014
+ setDownloadPhase("ready", modelId);
4015
+ return buffer;
4016
+ }
4017
+ const totalChunks = Math.ceil(contentLength / CHUNK_SIZE_BYTES);
4018
+ if (!manifest) {
4019
+ manifest = {
4020
+ modelId,
4021
+ url,
4022
+ etag,
4023
+ totalBytes: contentLength,
4024
+ chunkSize: CHUNK_SIZE_BYTES,
4025
+ completedChunks: [],
4026
+ createdAt: Date.now(),
4027
+ updatedAt: Date.now()
4028
+ };
4029
+ await saveManifest(db, manifest);
4030
+ }
4031
+ for (let i = 0; i < totalChunks; i++) {
4032
+ if (signal?.aborted) throw new Error("Download aborted");
4033
+ if (manifest.completedChunks.includes(i)) {
4034
+ const bytesDownloaded$1 = manifest.completedChunks.length / totalChunks * contentLength;
4035
+ onProgress?.({
4036
+ phase: "resuming",
4037
+ bytesDownloaded: bytesDownloaded$1,
4038
+ totalBytes: contentLength,
4039
+ percent: Math.round(bytesDownloaded$1 / contentLength * 100)
4040
+ });
4041
+ continue;
4042
+ }
4043
+ const start = i * CHUNK_SIZE_BYTES;
4044
+ const end = Math.min(start + CHUNK_SIZE_BYTES - 1, contentLength - 1);
4045
+ const response = await fetch(url, {
4046
+ headers: { Range: `bytes=${start}-${end}` },
4047
+ signal
4048
+ });
4049
+ if (response.status !== 206) throw new Error(`Range request failed: ${response.status} (expected 206)`);
4050
+ const chunkData = await response.arrayBuffer();
4051
+ await saveChunk(db, modelId, i, chunkData);
4052
+ manifest.completedChunks.push(i);
4053
+ manifest.updatedAt = Date.now();
4054
+ await saveManifest(db, manifest);
4055
+ const bytesDownloaded = manifest.completedChunks.length * CHUNK_SIZE_BYTES;
4056
+ setDownloadPhase("downloading", modelId, {
4057
+ bytesDownloaded,
4058
+ totalBytes: contentLength
4059
+ });
4060
+ onProgress?.({
4061
+ phase: "downloading",
4062
+ bytesDownloaded: Math.min(bytesDownloaded, contentLength),
4063
+ totalBytes: contentLength,
4064
+ percent: Math.round(manifest.completedChunks.length / totalChunks * 100)
4065
+ });
4066
+ response.body = null;
4067
+ }
4068
+ setDownloadPhase("caching", modelId);
4069
+ onProgress?.({
4070
+ phase: "assembling",
4071
+ bytesDownloaded: contentLength,
4072
+ totalBytes: contentLength,
4073
+ percent: 100
4074
+ });
4075
+ const finalBuffer = new ArrayBuffer(contentLength);
4076
+ const finalView = new Uint8Array(finalBuffer);
4077
+ for (let i = 0; i < totalChunks; i++) {
4078
+ const chunk = await getChunk(db, modelId, i);
4079
+ if (!chunk) throw new Error(`Missing chunk ${i} during assembly`);
4080
+ const offset = i * CHUNK_SIZE_BYTES;
4081
+ finalView.set(new Uint8Array(chunk), offset);
4082
+ }
4083
+ await clearModelData(db, modelId);
4084
+ db.close();
4085
+ setDownloadPhase("ready", modelId);
4086
+ return finalBuffer;
4087
+ } catch (error) {
4088
+ setDownloadPhase("error", modelId);
4089
+ db.close();
4090
+ throw error;
4091
+ }
4092
+ }
4093
+ /**
4094
+ * Check if a model has an incomplete download.
4095
+ */
4096
+ async function hasIncompleteDownload(modelId) {
4097
+ try {
4098
+ const db = await openDownloadDB();
4099
+ const manifest = await getManifest(db, modelId);
4100
+ db.close();
4101
+ if (!manifest) return { incomplete: false };
4102
+ const totalChunks = Math.ceil(manifest.totalBytes / manifest.chunkSize);
4103
+ const completedChunks = manifest.completedChunks.length;
4104
+ if (completedChunks < totalChunks) return {
4105
+ incomplete: true,
4106
+ bytesDownloaded: completedChunks * manifest.chunkSize,
4107
+ totalBytes: manifest.totalBytes,
4108
+ percent: Math.round(completedChunks / totalChunks * 100)
4109
+ };
4110
+ return { incomplete: false };
4111
+ } catch {
4112
+ return { incomplete: false };
4113
+ }
4114
+ }
4115
+ /**
4116
+ * Clear incomplete download data for a model.
4117
+ */
4118
+ async function clearIncompleteDownload(modelId) {
4119
+ try {
4120
+ const db = await openDownloadDB();
4121
+ await clearModelData(db, modelId);
4122
+ db.close();
4123
+ } catch {}
4124
+ }
4125
+ /**
3649
4126
  * Check if there's enough storage quota for a model download.
3650
4127
  * Returns estimated available space and whether download should proceed.
3651
4128
  */
@@ -3693,6 +4170,14 @@ var browser_default = {
3693
4170
  getBrowserDiagnostics,
3694
4171
  getRecommendedModels,
3695
4172
  checkStorageQuota,
4173
+ isModelSafeForDevice,
4174
+ setDownloadPhase,
4175
+ getDownloadPhase,
4176
+ detectMemoryCrash,
4177
+ clearDownloadPhase,
4178
+ downloadModelChunked,
4179
+ hasIncompleteDownload,
4180
+ clearIncompleteDownload,
3696
4181
  createGerbilWorker,
3697
4182
  playAudio,
3698
4183
  createAudioPlayer,
@@ -3703,5 +4188,5 @@ var browser_default = {
3703
4188
  };
3704
4189
 
3705
4190
  //#endregion
3706
- export { BUILTIN_MODELS, checkStorageQuota, checkWebGPUCapabilities, checkWebGPUReady, createAudioPlayer, createGerbilWorker, browser_default as default, getBrowserDiagnostics, getRecommendedModels, getWebGPUInfo, isWebGPUSupported, playAudio, preloadChatModel, preloadEmbeddingModel, preloadSTTModel, preloadTTSModel, useChat, useCompletion, useEmbedding, useSpeech, useVoiceChat, useVoiceInput };
4191
+ export { BUILTIN_MODELS, checkStorageQuota, checkWebGPUCapabilities, checkWebGPUReady, clearDownloadPhase, clearIncompleteDownload, createAudioPlayer, createGerbilWorker, browser_default as default, detectMemoryCrash, downloadModelChunked, getBrowserDiagnostics, getDownloadPhase, getRecommendedModels, getWebGPUInfo, hasIncompleteDownload, isModelSafeForDevice, isWebGPUSupported, playAudio, preloadChatModel, preloadEmbeddingModel, preloadSTTModel, preloadTTSModel, setDownloadPhase, useChat, useCompletion, useEmbedding, useSpeech, useVoiceChat, useVoiceInput };
3707
4192
  //# sourceMappingURL=index.js.map