web-llm-runner 0.1.9 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -986,7 +986,7 @@ const prebuiltAppConfig = {
986
986
  modelVersion +
987
987
  "/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
988
988
  vram_required_MB: 1128.82,
989
- low_resource_required: true,
989
+ low_resource_required: false,
990
990
  overrides: {
991
991
  context_window_size: 4096,
992
992
  },
@@ -998,7 +998,7 @@ const prebuiltAppConfig = {
998
998
  modelVersion +
999
999
  "/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
1000
1000
  vram_required_MB: 879.04,
1001
- low_resource_required: true,
1001
+ low_resource_required: false,
1002
1002
  overrides: {
1003
1003
  context_window_size: 4096,
1004
1004
  },
@@ -1024,7 +1024,7 @@ const prebuiltAppConfig = {
1024
1024
  modelVersion +
1025
1025
  "/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
1026
1026
  vram_required_MB: 2573.13,
1027
- low_resource_required: true,
1027
+ low_resource_required: false,
1028
1028
  overrides: {
1029
1029
  context_window_size: 4096,
1030
1030
  },
@@ -1036,7 +1036,7 @@ const prebuiltAppConfig = {
1036
1036
  modelVersion +
1037
1037
  "/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
1038
1038
  vram_required_MB: 2951.51,
1039
- low_resource_required: true,
1039
+ low_resource_required: false,
1040
1040
  overrides: {
1041
1041
  context_window_size: 4096,
1042
1042
  },
@@ -1048,7 +1048,7 @@ const prebuiltAppConfig = {
1048
1048
  modelVersion +
1049
1049
  "/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
1050
1050
  vram_required_MB: 2263.69,
1051
- low_resource_required: true,
1051
+ low_resource_required: false,
1052
1052
  overrides: {
1053
1053
  context_window_size: 4096,
1054
1054
  },
@@ -1483,7 +1483,7 @@ const prebuiltAppConfig = {
1483
1483
  modelVersion +
1484
1484
  "/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
1485
1485
  vram_required_MB: 871.99,
1486
- low_resource_required: true,
1486
+ low_resource_required: false,
1487
1487
  required_features: ["shader-f16"],
1488
1488
  overrides: {
1489
1489
  context_window_size: 4096,
@@ -1496,7 +1496,7 @@ const prebuiltAppConfig = {
1496
1496
  modelVersion +
1497
1497
  "/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1498
1498
  vram_required_MB: 1743.99,
1499
- low_resource_required: true,
1499
+ low_resource_required: false,
1500
1500
  overrides: {
1501
1501
  context_window_size: 4096,
1502
1502
  },
@@ -1508,7 +1508,7 @@ const prebuiltAppConfig = {
1508
1508
  modelVersion +
1509
1509
  "/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
1510
1510
  vram_required_MB: 376.06,
1511
- low_resource_required: true,
1511
+ low_resource_required: false,
1512
1512
  required_features: ["shader-f16"],
1513
1513
  overrides: {
1514
1514
  context_window_size: 4096,
@@ -1521,7 +1521,7 @@ const prebuiltAppConfig = {
1521
1521
  modelVersion +
1522
1522
  "/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
1523
1523
  vram_required_MB: 579.61,
1524
- low_resource_required: true,
1524
+ low_resource_required: false,
1525
1525
  overrides: {
1526
1526
  context_window_size: 4096,
1527
1527
  },
@@ -1533,7 +1533,7 @@ const prebuiltAppConfig = {
1533
1533
  modelVersion +
1534
1534
  "/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
1535
1535
  vram_required_MB: 359.69,
1536
- low_resource_required: true,
1536
+ low_resource_required: false,
1537
1537
  required_features: ["shader-f16"],
1538
1538
  overrides: {
1539
1539
  context_window_size: 4096,
@@ -1546,7 +1546,7 @@ const prebuiltAppConfig = {
1546
1546
  modelVersion +
1547
1547
  "/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1548
1548
  vram_required_MB: 719.38,
1549
- low_resource_required: true,
1549
+ low_resource_required: false,
1550
1550
  overrides: {
1551
1551
  context_window_size: 4096,
1552
1552
  },
@@ -1554,14 +1554,14 @@ const prebuiltAppConfig = {
1554
1554
  // Transformers.js (ONNX) confirmed models from llm_runner_poc
1555
1555
  {
1556
1556
  model: "https://huggingface.co/Xenova/LaMini-Flan-T5-248M",
1557
- model_id: "LaMini-Flan-T5-248M",
1557
+ model_id: "LaMini-Flan-T5 (~248MB) — Instruction Tuned",
1558
1558
  onnx_id: "Xenova/LaMini-Flan-T5-248M",
1559
1559
  model_lib: "",
1560
1560
  low_resource_required: true,
1561
1561
  },
1562
1562
  {
1563
1563
  model: "https://huggingface.co/Xenova/flan-t5-small",
1564
- model_id: "flan-t5-small",
1564
+ model_id: "Flan-T5 Small (~300MB) — Instruction Tuned",
1565
1565
  onnx_id: "Xenova/flan-t5-small",
1566
1566
  model_lib: "",
1567
1567
  low_resource_required: true,
@@ -36727,7 +36727,14 @@ class ONNXEngine {
36727
36727
  this.initProgressCallback({
36728
36728
  progress: 0,
36729
36729
  timeElapsed: 0,
36730
- text: `Initializing ONNX fallback for ${repoId}...`
36730
+ text: `Initializing ONNX engine for ${repoId}...`
36731
+ });
36732
+ }
36733
+ if (this.initProgressCallback) {
36734
+ this.initProgressCallback({
36735
+ progress: 0.05,
36736
+ timeElapsed: 0,
36737
+ text: `Loading model configuration and tokenizer...`
36731
36738
  });
36732
36739
  }
36733
36740
  try {
@@ -37983,18 +37990,55 @@ class WebLLM {
37983
37990
  messages = [];
37984
37991
  downloadProgress = {};
37985
37992
  config;
37993
+ isLowResourceDevice = false;
37986
37994
  constructor(config = {}) {
37987
37995
  this.config = config;
37988
37996
  this.messages = this.loadContext();
37997
+ this.detectDevice();
37998
+ }
37999
+ detectDevice() {
38000
+ if (typeof navigator === "undefined")
38001
+ return;
38002
+ // Check for explicit override in config (the user can "tell" the module)
38003
+ if (this.config.is_low_resource !== undefined) {
38004
+ this.isLowResourceDevice = !!this.config.is_low_resource;
38005
+ console.log(`[WebLLM] Using explicit is_low_resource: ${this.isLowResourceDevice}`);
38006
+ return;
38007
+ }
38008
+ const isMobileUA = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);
38009
+ // Special check for iPad/modern tablets that report as desktop but have touch points
38010
+ const isTablet = navigator.maxTouchPoints > 1 && !window.MSStream;
38011
+ const isMobile = isMobileUA || isTablet;
38012
+ const hasWebGPU = !!navigator.gpu;
38013
+ // If it's mobile OR if WebGPU is missing, we consider it a low-resource/fallback device
38014
+ this.isLowResourceDevice = isMobile || !hasWebGPU;
38015
+ console.log(`[WebLLM] Device detection: isMobile=${isMobile}, hasWebGPU=${hasWebGPU} -> isLowResourceDevice=${this.isLowResourceDevice}`);
38016
+ }
38017
+ get is_low_resource() {
38018
+ return this.isLowResourceDevice;
37989
38019
  }
37990
38020
  // manage_model endpoints
37991
38021
  get models_available() {
37992
- return prebuiltAppConfig.model_list.map((m) => m.model_id);
38022
+ let list = prebuiltAppConfig.model_list;
38023
+ if (this.isLowResourceDevice) {
38024
+ // Filter for specific approved mobile models
38025
+ const approvedIds = [
38026
+ "LaMini-Flan-T5 (~248MB) — Instruction Tuned",
38027
+ "Flan-T5 Small (~300MB) — Instruction Tuned"
38028
+ ];
38029
+ list = list.filter(m => approvedIds.includes(m.model_id));
38030
+ }
38031
+ return list.map((m) => m.model_id);
37993
38032
  }
37994
38033
  async local_model_available(model_id) {
37995
38034
  return await hasModelInCache(model_id);
37996
38035
  }
37997
38036
  async download_model(model_id, progressCallback) {
38037
+ // Initial feedback
38038
+ const initText = "Initializing model engine...";
38039
+ this.downloadProgress[model_id] = initText;
38040
+ if (progressCallback)
38041
+ progressCallback(initText);
37998
38042
  this.engine = await CreateMLCEngine(model_id, {
37999
38043
  ...this.config,
38000
38044
  initProgressCallback: (progress) => {