web-llm-runner 0.1.18 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -84,3 +84,19 @@ Safely removes a downloaded model completely from the browser's local cache to i
84
84
  ```javascript
85
85
  await llm.delete_model("TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC");
86
86
  ```
87
+
88
+ ---
89
+
90
+ ## 📱 Hybrid Inference & Device Sensitivity
91
+
92
+ `web-llm-runner` features an intelligent hybrid engine that automatically selects the best inference backend for your hardware:
93
+
94
+ - **WebGPU (Primary)**: Uses high-performance hardware acceleration on supported desktop and mobile browsers.
95
+ - **ONNX/WASM (Fallback)**: Automatically switches to a CPU-based fallback for older devices or browsers without WebGPU support (like many current mobile browsers).
96
+
97
+ > [!NOTE]
98
+ > **Streaming Support**: While WebGPU streaming is fully stable, ONNX-based streaming is currently in active development. Performance and responsiveness may vary on low-resource mobile devices.
99
+
100
+ ## ⚖ License
101
+
102
+ MIT
package/lib/index.js CHANGED
@@ -1554,14 +1554,14 @@ const prebuiltAppConfig = {
1554
1554
  // Transformers.js (ONNX) confirmed models from llm_runner_poc
1555
1555
  {
1556
1556
  model: "https://huggingface.co/Xenova/LaMini-Flan-T5-248M",
1557
- model_id: "LaMini-Flan-T5 (~248MB) — Instruction Tuned",
1557
+ model_id: "LaMini-Flan-T5 248M parameter",
1558
1558
  onnx_id: "Xenova/LaMini-Flan-T5-248M",
1559
1559
  model_lib: "",
1560
1560
  low_resource_required: true,
1561
1561
  },
1562
1562
  {
1563
1563
  model: "https://huggingface.co/Xenova/flan-t5-small",
1564
- model_id: "Flan-T5 Small (~300MB) — Instruction Tuned",
1564
+ model_id: "Flan-T5 Small 77M parameter",
1565
1565
  onnx_id: "Xenova/flan-t5-small",
1566
1566
  model_lib: "",
1567
1567
  low_resource_required: true,
@@ -38145,8 +38145,8 @@ class WebLLM {
38145
38145
  if (this.isLowResourceDevice) {
38146
38146
  // Filter for specific approved mobile models
38147
38147
  const approvedIds = [
38148
- "LaMini-Flan-T5 (~248MB) — Instruction Tuned",
38149
- "Flan-T5 Small (~300MB) — Instruction Tuned"
38148
+ "LaMini-Flan-T5 248M parameter",
38149
+ "Flan-T5 Small 77M parameter"
38150
38150
  ];
38151
38151
  list = list.filter(m => approvedIds.includes(m.model_id));
38152
38152
  }