@blank-utils/llm 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,43 +56,86 @@ async function logCapabilities() {
56
56
  // src/models.ts
57
57
  var DEFAULT_WEBLLM_MODEL = "Phi-3.5-mini-instruct-q4f16_1-MLC";
58
58
  var WEBLLM_MODELS = {
59
- // === Llama 3.2 Models (Meta) - Excellent quality, reasonable size ===
59
+ // === Llama 3.2 Models ===
60
60
  "llama-3.2-1b": "Llama-3.2-1B-Instruct-q4f16_1-MLC",
61
61
  "llama-3.2-3b": "Llama-3.2-3B-Instruct-q4f16_1-MLC",
62
- // === Llama 3.1 Models (Meta) - Larger, higher quality ===
62
+ // === Llama 3.1 & 3 Models ===
63
63
  "llama-3.1-8b": "Llama-3.1-8B-Instruct-q4f16_1-MLC",
64
64
  "llama-3.1-8b-1k": "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k",
65
- // Smaller context for lower memory
66
- // === Phi Models (Microsoft) - Great balance of size/quality ===
65
+ "llama-3.1-70b": "Llama-3.1-70B-Instruct-q3f16_1-MLC",
66
+ "llama-3-8b": "Llama-3-8B-Instruct-q4f16_1-MLC",
67
+ "llama-3-8b-1k": "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
68
+ "llama-3-70b": "Llama-3-70B-Instruct-q3f16_1-MLC",
69
+ // === Llama 2 Models ===
70
+ "llama-2-7b": "Llama-2-7b-chat-hf-q4f16_1-MLC",
71
+ "llama-2-7b-1k": "Llama-2-7b-chat-hf-q4f16_1-MLC-1k",
72
+ "llama-2-13b": "Llama-2-13b-chat-hf-q4f16_1-MLC",
73
+ // === Phi Models ===
67
74
  "phi-3.5-mini": "Phi-3.5-mini-instruct-q4f16_1-MLC",
68
75
  "phi-3.5-mini-1k": "Phi-3.5-mini-instruct-q4f16_1-MLC-1k",
69
- // Smaller context for lower memory
70
76
  "phi-3.5-vision": "Phi-3.5-vision-instruct-q4f16_1-MLC",
71
77
  // Vision model
72
- // === Qwen 2.5 Models (Alibaba) - Good multilingual support ===
78
+ "phi-3-mini-4k": "Phi-3-mini-4k-instruct-q4f16_1-MLC",
79
+ "phi-3-mini-4k-1k": "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
80
+ "phi-2": "phi-2-q4f16_1-MLC",
81
+ "phi-1.5": "phi-1_5-q4f16_1-MLC",
82
+ // === Qwen 3 Models ===
83
+ "qwen3-0.6b": "Qwen3-0.6B-q4f16_1-MLC",
84
+ "qwen3-1.7b": "Qwen3-1.7B-q4f16_1-MLC",
85
+ "qwen3-4b": "Qwen3-4B-q4f16_1-MLC",
86
+ "qwen3-8b": "Qwen3-8B-q4f16_1-MLC",
87
+ // === Qwen 2.5 Models ===
73
88
  "qwen-2.5-0.5b": "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
74
89
  "qwen-2.5-1.5b": "Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
75
90
  "qwen-2.5-3b": "Qwen2.5-3B-Instruct-q4f16_1-MLC",
76
91
  "qwen-2.5-7b": "Qwen2.5-7B-Instruct-q4f16_1-MLC",
77
92
  "qwen-2.5-coder-0.5b": "Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC",
78
93
  "qwen-2.5-coder-1.5b": "Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
79
- // === Gemma 2 Models (Google) - Efficient and capable ===
94
+ "qwen-2.5-coder-3b": "Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
95
+ "qwen-2.5-coder-7b": "Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC",
96
+ "qwen-2.5-math-1.5b": "Qwen2.5-Math-1.5B-Instruct-q4f16_1-MLC",
97
+ // === Qwen 2 Models ===
98
+ "qwen2-math-1.5b": "Qwen2-Math-1.5B-Instruct-q4f16_1-MLC",
99
+ "qwen2-math-7b": "Qwen2-Math-7B-Instruct-q4f16_1-MLC",
100
+ // === Gemma 2 Models ===
80
101
  "gemma-2-2b": "gemma-2-2b-it-q4f16_1-MLC",
81
102
  "gemma-2-2b-1k": "gemma-2-2b-it-q4f16_1-MLC-1k",
82
- // Smaller context for lower memory
83
103
  "gemma-2-9b": "gemma-2-9b-it-q4f16_1-MLC",
84
- // === SmolLM2 Models (HuggingFace) - Ultra lightweight ===
104
+ "gemma-2-2b-jpn": "gemma-2-2b-jpn-it-q4f16_1-MLC",
105
+ // === Gemma 1 Models ===
106
+ "gemma-2b": "gemma-2b-it-q4f16_1-MLC",
107
+ "gemma-2b-1k": "gemma-2b-it-q4f16_1-MLC-1k",
108
+ // === SmolLM2 Models ===
85
109
  "smollm2-135m": "SmolLM2-135M-Instruct-q0f16-MLC",
86
110
  "smollm2-360m": "SmolLM2-360M-Instruct-q4f16_1-MLC",
87
111
  "smollm2-1.7b": "SmolLM2-1.7B-Instruct-q4f16_1-MLC",
88
- // === Mistral Models - Good general purpose ===
112
+ // === Mistral & Ministral Models ===
89
113
  "mistral-7b": "Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
90
- // === DeepSeek R1 Distill Models - Reasoning focused ===
114
+ "mistral-7b-v0.2": "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
115
+ "ministral-3-3b-base": "Ministral-3-3B-Base-2512-q4f16_1-MLC",
116
+ "ministral-3-3b-reasoning": "Ministral-3-3B-Reasoning-2512-q4f16_1-MLC",
117
+ "ministral-3-3b-instruct": "Ministral-3-3B-Instruct-2512-BF16-q4f16_1-MLC",
118
+ // === DeepSeek R1 Distill Models ===
91
119
  "deepseek-r1-qwen-7b": "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
92
120
  "deepseek-r1-llama-8b": "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
93
- // === Hermes Models - Function calling capable ===
121
+ // === Hermes Models ===
94
122
  "hermes-3-llama-3.2-3b": "Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
95
- "hermes-3-llama-3.1-8b": "Hermes-3-Llama-3.1-8B-q4f16_1-MLC"
123
+ "hermes-3-llama-3.1-8b": "Hermes-3-Llama-3.1-8B-q4f16_1-MLC",
124
+ "hermes-2-theta-llama-3-8b": "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC",
125
+ "hermes-2-pro-llama-3-8b": "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
126
+ "hermes-2-pro-mistral-7b": "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
127
+ "openhermes-2.5-mistral-7b": "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
128
+ "neuralhermes-2.5-mistral-7b": "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
129
+ // === Other Models ===
130
+ "tinyllama-1.1b": "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
131
+ "tinyllama-1.1b-1k": "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC-1k",
132
+ "tinyllama-1.1b-v0.4": "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
133
+ "tinyllama-1.1b-v0.4-1k": "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k",
134
+ "redpajama-3b": "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
135
+ "redpajama-3b-1k": "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k",
136
+ "stablelm-2-zephyr-1.6b": "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
137
+ "stablelm-2-zephyr-1.6b-1k": "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k",
138
+ "wizardmath-7b": "WizardMath-7B-V1.1-q4f16_1-MLC"
96
139
  };
97
140
 
98
141
  // src/backends/webllm.ts
@@ -20,26 +20,67 @@ declare const WEBLLM_MODELS: {
20
20
  readonly 'llama-3.2-3b': "Llama-3.2-3B-Instruct-q4f16_1-MLC";
21
21
  readonly 'llama-3.1-8b': "Llama-3.1-8B-Instruct-q4f16_1-MLC";
22
22
  readonly 'llama-3.1-8b-1k': "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k";
23
+ readonly 'llama-3.1-70b': "Llama-3.1-70B-Instruct-q3f16_1-MLC";
24
+ readonly 'llama-3-8b': "Llama-3-8B-Instruct-q4f16_1-MLC";
25
+ readonly 'llama-3-8b-1k': "Llama-3-8B-Instruct-q4f16_1-MLC-1k";
26
+ readonly 'llama-3-70b': "Llama-3-70B-Instruct-q3f16_1-MLC";
27
+ readonly 'llama-2-7b': "Llama-2-7b-chat-hf-q4f16_1-MLC";
28
+ readonly 'llama-2-7b-1k': "Llama-2-7b-chat-hf-q4f16_1-MLC-1k";
29
+ readonly 'llama-2-13b': "Llama-2-13b-chat-hf-q4f16_1-MLC";
23
30
  readonly 'phi-3.5-mini': "Phi-3.5-mini-instruct-q4f16_1-MLC";
24
31
  readonly 'phi-3.5-mini-1k': "Phi-3.5-mini-instruct-q4f16_1-MLC-1k";
25
32
  readonly 'phi-3.5-vision': "Phi-3.5-vision-instruct-q4f16_1-MLC";
33
+ readonly 'phi-3-mini-4k': "Phi-3-mini-4k-instruct-q4f16_1-MLC";
34
+ readonly 'phi-3-mini-4k-1k': "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k";
35
+ readonly 'phi-2': "phi-2-q4f16_1-MLC";
36
+ readonly 'phi-1.5': "phi-1_5-q4f16_1-MLC";
37
+ readonly 'qwen3-0.6b': "Qwen3-0.6B-q4f16_1-MLC";
38
+ readonly 'qwen3-1.7b': "Qwen3-1.7B-q4f16_1-MLC";
39
+ readonly 'qwen3-4b': "Qwen3-4B-q4f16_1-MLC";
40
+ readonly 'qwen3-8b': "Qwen3-8B-q4f16_1-MLC";
26
41
  readonly 'qwen-2.5-0.5b': "Qwen2.5-0.5B-Instruct-q4f16_1-MLC";
27
42
  readonly 'qwen-2.5-1.5b': "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
28
43
  readonly 'qwen-2.5-3b': "Qwen2.5-3B-Instruct-q4f16_1-MLC";
29
44
  readonly 'qwen-2.5-7b': "Qwen2.5-7B-Instruct-q4f16_1-MLC";
30
45
  readonly 'qwen-2.5-coder-0.5b': "Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC";
31
46
  readonly 'qwen-2.5-coder-1.5b': "Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC";
47
+ readonly 'qwen-2.5-coder-3b': "Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC";
48
+ readonly 'qwen-2.5-coder-7b': "Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC";
49
+ readonly 'qwen-2.5-math-1.5b': "Qwen2.5-Math-1.5B-Instruct-q4f16_1-MLC";
50
+ readonly 'qwen2-math-1.5b': "Qwen2-Math-1.5B-Instruct-q4f16_1-MLC";
51
+ readonly 'qwen2-math-7b': "Qwen2-Math-7B-Instruct-q4f16_1-MLC";
32
52
  readonly 'gemma-2-2b': "gemma-2-2b-it-q4f16_1-MLC";
33
53
  readonly 'gemma-2-2b-1k': "gemma-2-2b-it-q4f16_1-MLC-1k";
34
54
  readonly 'gemma-2-9b': "gemma-2-9b-it-q4f16_1-MLC";
55
+ readonly 'gemma-2-2b-jpn': "gemma-2-2b-jpn-it-q4f16_1-MLC";
56
+ readonly 'gemma-2b': "gemma-2b-it-q4f16_1-MLC";
57
+ readonly 'gemma-2b-1k': "gemma-2b-it-q4f16_1-MLC-1k";
35
58
  readonly 'smollm2-135m': "SmolLM2-135M-Instruct-q0f16-MLC";
36
59
  readonly 'smollm2-360m': "SmolLM2-360M-Instruct-q4f16_1-MLC";
37
60
  readonly 'smollm2-1.7b': "SmolLM2-1.7B-Instruct-q4f16_1-MLC";
38
61
  readonly 'mistral-7b': "Mistral-7B-Instruct-v0.3-q4f16_1-MLC";
62
+ readonly 'mistral-7b-v0.2': "Mistral-7B-Instruct-v0.2-q4f16_1-MLC";
63
+ readonly 'ministral-3-3b-base': "Ministral-3-3B-Base-2512-q4f16_1-MLC";
64
+ readonly 'ministral-3-3b-reasoning': "Ministral-3-3B-Reasoning-2512-q4f16_1-MLC";
65
+ readonly 'ministral-3-3b-instruct': "Ministral-3-3B-Instruct-2512-BF16-q4f16_1-MLC";
39
66
  readonly 'deepseek-r1-qwen-7b': "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC";
40
67
  readonly 'deepseek-r1-llama-8b': "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC";
41
68
  readonly 'hermes-3-llama-3.2-3b': "Hermes-3-Llama-3.2-3B-q4f16_1-MLC";
42
69
  readonly 'hermes-3-llama-3.1-8b': "Hermes-3-Llama-3.1-8B-q4f16_1-MLC";
70
+ readonly 'hermes-2-theta-llama-3-8b': "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC";
71
+ readonly 'hermes-2-pro-llama-3-8b': "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
72
+ readonly 'hermes-2-pro-mistral-7b': "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC";
73
+ readonly 'openhermes-2.5-mistral-7b': "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC";
74
+ readonly 'neuralhermes-2.5-mistral-7b': "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC";
75
+ readonly 'tinyllama-1.1b': "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC";
76
+ readonly 'tinyllama-1.1b-1k': "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC-1k";
77
+ readonly 'tinyllama-1.1b-v0.4': "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC";
78
+ readonly 'tinyllama-1.1b-v0.4-1k': "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
79
+ readonly 'redpajama-3b': "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC";
80
+ readonly 'redpajama-3b-1k': "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k";
81
+ readonly 'stablelm-2-zephyr-1.6b': "stablelm-2-zephyr-1_6b-q4f16_1-MLC";
82
+ readonly 'stablelm-2-zephyr-1.6b-1k': "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k";
83
+ readonly 'wizardmath-7b': "WizardMath-7B-V1.1-q4f16_1-MLC";
43
84
  };
44
85
  type WebLLMModelID = keyof typeof WEBLLM_MODELS;
45
86
  /**
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { B as BrowserCapabilities, L as LLMProvider, a as Backend, b as LoadProgressCallback, C as ChatMessage, G as GenerateOptions, S as StreamCallback, A as AttachOptions } from './index-DJmiDzY3.js';
2
- export { c as Chat, d as ChatInput, e as ChatInputProps, f as ChatProps, D as DEFAULT_WEBLLM_MODEL, g as Device, h as LLMConfig, i as LLMContextValue, j as LLMLoading, k as LLMLoadingProps, l as LLMProvider, m as LLMProviderProps, n as LLMReady, o as LLMReadyProps, p as LoadProgress, q as LocalLLM, M as MessageRole, Q as Quantization, U as UseChatOptions, r as UseChatReturn, s as UseCompletionOptions, t as UseCompletionReturn, u as UseStreamOptions, v as UseStreamReturn, W as WEBLLM_MODELS, w as createLLM, w as default, x as isWebGPUSupported, y as useChat, z as useCompletion, E as useLLM, F as useStream } from './index-DJmiDzY3.js';
1
+ import { B as BrowserCapabilities, L as LLMProvider, a as Backend, b as LoadProgressCallback, C as ChatMessage, G as GenerateOptions, S as StreamCallback, A as AttachOptions } from './index-3qtk9myB.js';
2
+ export { c as Chat, d as ChatInput, e as ChatInputProps, f as ChatProps, D as DEFAULT_WEBLLM_MODEL, g as Device, h as LLMConfig, i as LLMContextValue, j as LLMLoading, k as LLMLoadingProps, l as LLMProvider, m as LLMProviderProps, n as LLMReady, o as LLMReadyProps, p as LoadProgress, q as LocalLLM, M as MessageRole, Q as Quantization, U as UseChatOptions, r as UseChatReturn, s as UseCompletionOptions, t as UseCompletionReturn, u as UseStreamOptions, v as UseStreamReturn, W as WEBLLM_MODELS, w as createLLM, w as default, x as isWebGPUSupported, y as useChat, z as useCompletion, E as useLLM, F as useStream } from './index-3qtk9myB.js';
3
3
  import 'react/jsx-runtime';
4
4
  import 'react';
5
5
 
package/dist/index.js CHANGED
@@ -22,7 +22,7 @@ import {
22
22
  useCompletion,
23
23
  useLLM,
24
24
  useStream
25
- } from "./chunk-6EZY4F42.js";
25
+ } from "./chunk-OJCFVVNG.js";
26
26
  export {
27
27
  Chat,
28
28
  ChatInput,
@@ -1,3 +1,3 @@
1
- export { c as Chat, H as ChatApp, I as ChatAppProps, d as ChatInput, e as ChatInputProps, f as ChatProps, J as ImageAttachment, i as LLMContextValue, j as LLMLoading, k as LLMLoadingProps, l as LLMProvider, m as LLMProviderProps, n as LLMReady, o as LLMReadyProps, U as UseChatOptions, r as UseChatReturn, s as UseCompletionOptions, t as UseCompletionReturn, u as UseStreamOptions, v as UseStreamReturn, y as useChat, z as useCompletion, E as useLLM, F as useStream } from '../index-DJmiDzY3.js';
1
+ export { c as Chat, H as ChatApp, I as ChatAppProps, d as ChatInput, e as ChatInputProps, f as ChatProps, J as ImageAttachment, i as LLMContextValue, j as LLMLoading, k as LLMLoadingProps, l as LLMProvider, m as LLMProviderProps, n as LLMReady, o as LLMReadyProps, U as UseChatOptions, r as UseChatReturn, s as UseCompletionOptions, t as UseCompletionReturn, u as UseStreamOptions, v as UseStreamReturn, y as useChat, z as useCompletion, E as useLLM, F as useStream } from '../index-3qtk9myB.js';
2
2
  import 'react/jsx-runtime';
3
3
  import 'react';
@@ -9,7 +9,7 @@ import {
9
9
  useCompletion,
10
10
  useLLM,
11
11
  useStream
12
- } from "../chunk-6EZY4F42.js";
12
+ } from "../chunk-OJCFVVNG.js";
13
13
  export {
14
14
  Chat,
15
15
  ChatApp,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blank-utils/llm",
3
- "version": "0.5.3",
3
+ "version": "0.5.4",
4
4
  "description": "Run LLMs directly in your browser with WebGPU acceleration. Supports React hooks and eager background loading.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",