web-llm-runner 0.1.9 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js
CHANGED
|
@@ -986,7 +986,7 @@ const prebuiltAppConfig = {
|
|
|
986
986
|
modelVersion +
|
|
987
987
|
"/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
|
|
988
988
|
vram_required_MB: 1128.82,
|
|
989
|
-
low_resource_required:
|
|
989
|
+
low_resource_required: false,
|
|
990
990
|
overrides: {
|
|
991
991
|
context_window_size: 4096,
|
|
992
992
|
},
|
|
@@ -998,7 +998,7 @@ const prebuiltAppConfig = {
|
|
|
998
998
|
modelVersion +
|
|
999
999
|
"/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
|
|
1000
1000
|
vram_required_MB: 879.04,
|
|
1001
|
-
low_resource_required:
|
|
1001
|
+
low_resource_required: false,
|
|
1002
1002
|
overrides: {
|
|
1003
1003
|
context_window_size: 4096,
|
|
1004
1004
|
},
|
|
@@ -1024,7 +1024,7 @@ const prebuiltAppConfig = {
|
|
|
1024
1024
|
modelVersion +
|
|
1025
1025
|
"/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
|
|
1026
1026
|
vram_required_MB: 2573.13,
|
|
1027
|
-
low_resource_required:
|
|
1027
|
+
low_resource_required: false,
|
|
1028
1028
|
overrides: {
|
|
1029
1029
|
context_window_size: 4096,
|
|
1030
1030
|
},
|
|
@@ -1036,7 +1036,7 @@ const prebuiltAppConfig = {
|
|
|
1036
1036
|
modelVersion +
|
|
1037
1037
|
"/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
|
|
1038
1038
|
vram_required_MB: 2951.51,
|
|
1039
|
-
low_resource_required:
|
|
1039
|
+
low_resource_required: false,
|
|
1040
1040
|
overrides: {
|
|
1041
1041
|
context_window_size: 4096,
|
|
1042
1042
|
},
|
|
@@ -1048,7 +1048,7 @@ const prebuiltAppConfig = {
|
|
|
1048
1048
|
modelVersion +
|
|
1049
1049
|
"/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
|
|
1050
1050
|
vram_required_MB: 2263.69,
|
|
1051
|
-
low_resource_required:
|
|
1051
|
+
low_resource_required: false,
|
|
1052
1052
|
overrides: {
|
|
1053
1053
|
context_window_size: 4096,
|
|
1054
1054
|
},
|
|
@@ -1483,7 +1483,7 @@ const prebuiltAppConfig = {
|
|
|
1483
1483
|
modelVersion +
|
|
1484
1484
|
"/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
|
|
1485
1485
|
vram_required_MB: 871.99,
|
|
1486
|
-
low_resource_required:
|
|
1486
|
+
low_resource_required: false,
|
|
1487
1487
|
required_features: ["shader-f16"],
|
|
1488
1488
|
overrides: {
|
|
1489
1489
|
context_window_size: 4096,
|
|
@@ -1496,7 +1496,7 @@ const prebuiltAppConfig = {
|
|
|
1496
1496
|
modelVersion +
|
|
1497
1497
|
"/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
|
|
1498
1498
|
vram_required_MB: 1743.99,
|
|
1499
|
-
low_resource_required:
|
|
1499
|
+
low_resource_required: false,
|
|
1500
1500
|
overrides: {
|
|
1501
1501
|
context_window_size: 4096,
|
|
1502
1502
|
},
|
|
@@ -1508,7 +1508,7 @@ const prebuiltAppConfig = {
|
|
|
1508
1508
|
modelVersion +
|
|
1509
1509
|
"/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
|
|
1510
1510
|
vram_required_MB: 376.06,
|
|
1511
|
-
low_resource_required:
|
|
1511
|
+
low_resource_required: false,
|
|
1512
1512
|
required_features: ["shader-f16"],
|
|
1513
1513
|
overrides: {
|
|
1514
1514
|
context_window_size: 4096,
|
|
@@ -1521,7 +1521,7 @@ const prebuiltAppConfig = {
|
|
|
1521
1521
|
modelVersion +
|
|
1522
1522
|
"/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
|
|
1523
1523
|
vram_required_MB: 579.61,
|
|
1524
|
-
low_resource_required:
|
|
1524
|
+
low_resource_required: false,
|
|
1525
1525
|
overrides: {
|
|
1526
1526
|
context_window_size: 4096,
|
|
1527
1527
|
},
|
|
@@ -1533,7 +1533,7 @@ const prebuiltAppConfig = {
|
|
|
1533
1533
|
modelVersion +
|
|
1534
1534
|
"/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
|
|
1535
1535
|
vram_required_MB: 359.69,
|
|
1536
|
-
low_resource_required:
|
|
1536
|
+
low_resource_required: false,
|
|
1537
1537
|
required_features: ["shader-f16"],
|
|
1538
1538
|
overrides: {
|
|
1539
1539
|
context_window_size: 4096,
|
|
@@ -1546,7 +1546,7 @@ const prebuiltAppConfig = {
|
|
|
1546
1546
|
modelVersion +
|
|
1547
1547
|
"/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
|
|
1548
1548
|
vram_required_MB: 719.38,
|
|
1549
|
-
low_resource_required:
|
|
1549
|
+
low_resource_required: false,
|
|
1550
1550
|
overrides: {
|
|
1551
1551
|
context_window_size: 4096,
|
|
1552
1552
|
},
|
|
@@ -1554,14 +1554,14 @@ const prebuiltAppConfig = {
|
|
|
1554
1554
|
// Transformers.js (ONNX) confirmed models from llm_runner_poc
|
|
1555
1555
|
{
|
|
1556
1556
|
model: "https://huggingface.co/Xenova/LaMini-Flan-T5-248M",
|
|
1557
|
-
model_id: "LaMini-Flan-T5
|
|
1557
|
+
model_id: "LaMini-Flan-T5 (~248MB) — Instruction Tuned",
|
|
1558
1558
|
onnx_id: "Xenova/LaMini-Flan-T5-248M",
|
|
1559
1559
|
model_lib: "",
|
|
1560
1560
|
low_resource_required: true,
|
|
1561
1561
|
},
|
|
1562
1562
|
{
|
|
1563
1563
|
model: "https://huggingface.co/Xenova/flan-t5-small",
|
|
1564
|
-
model_id: "
|
|
1564
|
+
model_id: "Flan-T5 Small (~300MB) — Instruction Tuned",
|
|
1565
1565
|
onnx_id: "Xenova/flan-t5-small",
|
|
1566
1566
|
model_lib: "",
|
|
1567
1567
|
low_resource_required: true,
|
|
@@ -36727,7 +36727,14 @@ class ONNXEngine {
|
|
|
36727
36727
|
this.initProgressCallback({
|
|
36728
36728
|
progress: 0,
|
|
36729
36729
|
timeElapsed: 0,
|
|
36730
|
-
text: `Initializing ONNX
|
|
36730
|
+
text: `Initializing ONNX engine for ${repoId}...`
|
|
36731
|
+
});
|
|
36732
|
+
}
|
|
36733
|
+
if (this.initProgressCallback) {
|
|
36734
|
+
this.initProgressCallback({
|
|
36735
|
+
progress: 0.05,
|
|
36736
|
+
timeElapsed: 0,
|
|
36737
|
+
text: `Loading model configuration and tokenizer...`
|
|
36731
36738
|
});
|
|
36732
36739
|
}
|
|
36733
36740
|
try {
|
|
@@ -37983,18 +37990,55 @@ class WebLLM {
|
|
|
37983
37990
|
messages = [];
|
|
37984
37991
|
downloadProgress = {};
|
|
37985
37992
|
config;
|
|
37993
|
+
isLowResourceDevice = false;
|
|
37986
37994
|
constructor(config = {}) {
|
|
37987
37995
|
this.config = config;
|
|
37988
37996
|
this.messages = this.loadContext();
|
|
37997
|
+
this.detectDevice();
|
|
37998
|
+
}
|
|
37999
|
+
detectDevice() {
|
|
38000
|
+
if (typeof navigator === "undefined")
|
|
38001
|
+
return;
|
|
38002
|
+
// Check for explicit override in config (the user can "tell" the module)
|
|
38003
|
+
if (this.config.is_low_resource !== undefined) {
|
|
38004
|
+
this.isLowResourceDevice = !!this.config.is_low_resource;
|
|
38005
|
+
console.log(`[WebLLM] Using explicit is_low_resource: ${this.isLowResourceDevice}`);
|
|
38006
|
+
return;
|
|
38007
|
+
}
|
|
38008
|
+
const isMobileUA = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);
|
|
38009
|
+
// Special check for iPad/modern tablets that report as desktop but have touch points
|
|
38010
|
+
const isTablet = navigator.maxTouchPoints > 1 && !window.MSStream;
|
|
38011
|
+
const isMobile = isMobileUA || isTablet;
|
|
38012
|
+
const hasWebGPU = !!navigator.gpu;
|
|
38013
|
+
// If it's mobile OR if WebGPU is missing, we consider it a low-resource/fallback device
|
|
38014
|
+
this.isLowResourceDevice = isMobile || !hasWebGPU;
|
|
38015
|
+
console.log(`[WebLLM] Device detection: isMobile=${isMobile}, hasWebGPU=${hasWebGPU} -> isLowResourceDevice=${this.isLowResourceDevice}`);
|
|
38016
|
+
}
|
|
38017
|
+
get is_low_resource() {
|
|
38018
|
+
return this.isLowResourceDevice;
|
|
37989
38019
|
}
|
|
37990
38020
|
// manage_model endpoints
|
|
37991
38021
|
get models_available() {
|
|
37992
|
-
|
|
38022
|
+
let list = prebuiltAppConfig.model_list;
|
|
38023
|
+
if (this.isLowResourceDevice) {
|
|
38024
|
+
// Filter for specific approved mobile models
|
|
38025
|
+
const approvedIds = [
|
|
38026
|
+
"LaMini-Flan-T5 (~248MB) — Instruction Tuned",
|
|
38027
|
+
"Flan-T5 Small (~300MB) — Instruction Tuned"
|
|
38028
|
+
];
|
|
38029
|
+
list = list.filter(m => approvedIds.includes(m.model_id));
|
|
38030
|
+
}
|
|
38031
|
+
return list.map((m) => m.model_id);
|
|
37993
38032
|
}
|
|
37994
38033
|
async local_model_available(model_id) {
|
|
37995
38034
|
return await hasModelInCache(model_id);
|
|
37996
38035
|
}
|
|
37997
38036
|
async download_model(model_id, progressCallback) {
|
|
38037
|
+
// Initial feedback
|
|
38038
|
+
const initText = "Initializing model engine...";
|
|
38039
|
+
this.downloadProgress[model_id] = initText;
|
|
38040
|
+
if (progressCallback)
|
|
38041
|
+
progressCallback(initText);
|
|
37998
38042
|
this.engine = await CreateMLCEngine(model_id, {
|
|
37999
38043
|
...this.config,
|
|
38000
38044
|
initProgressCallback: (progress) => {
|