@blank-utils/llm 0.2.2 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +487 -89
- package/dist/index.js +1245 -65
- package/dist/react/index.js +1267 -66
- package/package.json +6 -3
- package/dist/backends/transformers.d.ts +0 -62
- package/dist/backends/transformers.d.ts.map +0 -1
- package/dist/backends/webllm.d.ts +0 -70
- package/dist/backends/webllm.d.ts.map +0 -1
- package/dist/core.d.ts +0 -71
- package/dist/core.d.ts.map +0 -1
- package/dist/detect.d.ts +0 -21
- package/dist/detect.d.ts.map +0 -1
- package/dist/helpers.d.ts +0 -35
- package/dist/helpers.d.ts.map +0 -1
- package/dist/index.d.ts +0 -27
- package/dist/index.d.ts.map +0 -1
- package/dist/react/index.d.ts +0 -309
- package/dist/react/index.d.ts.map +0 -1
- package/dist/types.d.ts +0 -178
- package/dist/types.d.ts.map +0 -1
package/README.md
CHANGED
|
@@ -1,41 +1,99 @@
|
|
|
1
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<strong>@blank-utils/llm</strong>
|
|
3
|
+
</p>
|
|
2
4
|
|
|
3
|
-
>
|
|
5
|
+
<p align="center">
|
|
6
|
+
Run LLMs directly in your browser — zero server, zero API keys.
|
|
7
|
+
</p>
|
|
4
8
|
|
|
5
|
-
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="https://www.npmjs.com/package/@blank-utils/llm"><img src="https://img.shields.io/npm/v/@blank-utils/llm?style=flat-square&color=0ea5e9" alt="npm"></a>
|
|
11
|
+
<a href="https://github.com/kiritocode1/local-llm/blob/main/LICENSE"><img src="https://img.shields.io/npm/l/@blank-utils/llm?style=flat-square" alt="license"></a>
|
|
12
|
+
<img src="https://img.shields.io/badge/react-%E2%89%A518-61dafb?style=flat-square" alt="react">
|
|
13
|
+
<img src="https://img.shields.io/badge/webgpu-supported-brightgreen?style=flat-square" alt="webgpu">
|
|
14
|
+
</p>
|
|
6
15
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
-
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Features
|
|
19
|
+
|
|
20
|
+
- 🚀 **WebGPU acceleration** via [WebLLM](https://github.com/mlc-ai/web-llm) — falls back to WASM through [Transformers.js](https://github.com/huggingface/transformers.js)
|
|
21
|
+
- ⚛️ **React hooks** — `useChat`, `useStream`, `useCompletion` with eager background loading
|
|
22
|
+
- 🔤 **Type-safe model selection** — full autocomplete for 30+ supported models across both backends
|
|
23
|
+
- 📝 **Streaming support** — real-time token output with abort control
|
|
24
|
+
- 🔄 **Message queueing** — users can type while models download; messages are processed once ready
|
|
25
|
+
- 🧩 **Vanilla JS friendly** — works outside React with DOM helpers and a simple `createLLM()` API
|
|
26
|
+
- 📦 **Zero config** — auto-detects WebGPU/WASM and picks the best backend
|
|
12
27
|
|
|
13
28
|
## Installation
|
|
14
29
|
|
|
15
30
|
```bash
|
|
16
31
|
pnpm add @blank-utils/llm
|
|
17
32
|
# or
|
|
18
|
-
npm install @blank-utils/llm
|
|
19
|
-
# or
|
|
20
33
|
bun add @blank-utils/llm
|
|
21
34
|
```
|
|
22
35
|
|
|
36
|
+
> React is an **optional** peer dependency. The core API works without it.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
23
40
|
## Quick Start
|
|
24
41
|
|
|
25
|
-
###
|
|
42
|
+
### Quick Chat (Fully Featured App)
|
|
43
|
+
|
|
44
|
+
The fastest way to get started. `<ChatApp>` includes the provider, model management, and UI in a single component:
|
|
45
|
+
|
|
46
|
+
```tsx
|
|
47
|
+
import { ChatApp } from "@blank-utils/llm/react";
|
|
48
|
+
|
|
49
|
+
export default function App() {
|
|
50
|
+
return (
|
|
51
|
+
<ChatApp
|
|
52
|
+
defaultModel="qwen-2.5-0.5b"
|
|
53
|
+
theme="dark" // 'dark' | 'light'
|
|
54
|
+
systemPrompt="You are a helpful assistant."
|
|
55
|
+
/>
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Components (Custom Setup)
|
|
61
|
+
|
|
62
|
+
If you already have a provider or want more control, use the `<Chat>` component:
|
|
63
|
+
|
|
64
|
+
```tsx
|
|
65
|
+
import { LLMProvider, Chat } from "@blank-utils/llm/react";
|
|
66
|
+
|
|
67
|
+
export default function App() {
|
|
68
|
+
return (
|
|
69
|
+
<LLMProvider model="qwen-2.5-0.5b">
|
|
70
|
+
<Chat
|
|
71
|
+
theme="dark"
|
|
72
|
+
systemPrompt="You are a helpful assistant."
|
|
73
|
+
placeholder="Ask me anything..."
|
|
74
|
+
/>
|
|
75
|
+
</LLMProvider>
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Custom UI with Hooks
|
|
81
|
+
|
|
82
|
+
Build your own interface from scratch using our hooks:
|
|
26
83
|
|
|
27
84
|
```tsx
|
|
28
85
|
import { LLMProvider, useChat, useLLM } from "@blank-utils/llm/react";
|
|
86
|
+
// ... (rest of the hooks example)
|
|
29
87
|
|
|
30
88
|
function App() {
|
|
31
89
|
return (
|
|
32
90
|
<LLMProvider model="qwen-2.5-0.5b">
|
|
33
|
-
<
|
|
91
|
+
<ChatUI />
|
|
34
92
|
</LLMProvider>
|
|
35
93
|
);
|
|
36
94
|
}
|
|
37
95
|
|
|
38
|
-
function
|
|
96
|
+
function ChatUI() {
|
|
39
97
|
const { isLoading, loadProgress } = useLLM();
|
|
40
98
|
const {
|
|
41
99
|
messages,
|
|
@@ -45,7 +103,10 @@ function Chat() {
|
|
|
45
103
|
isGenerating,
|
|
46
104
|
isPending,
|
|
47
105
|
streamingText,
|
|
48
|
-
} = useChat(
|
|
106
|
+
} = useChat({
|
|
107
|
+
systemPrompt: "You are a helpful assistant.",
|
|
108
|
+
queueWhileLoading: true,
|
|
109
|
+
});
|
|
49
110
|
|
|
50
111
|
return (
|
|
51
112
|
<div>
|
|
@@ -53,7 +114,7 @@ function Chat() {
|
|
|
53
114
|
|
|
54
115
|
{messages.map((m, i) => (
|
|
55
116
|
<div key={i}>
|
|
56
|
-
{m.role}
|
|
117
|
+
<strong>{m.role}:</strong> {m.content}
|
|
57
118
|
</div>
|
|
58
119
|
))}
|
|
59
120
|
|
|
@@ -83,148 +144,485 @@ const llm = await createLLM({
|
|
|
83
144
|
});
|
|
84
145
|
|
|
85
146
|
// Streaming
|
|
86
|
-
await llm.stream("Tell me a joke", (token) => {
|
|
87
|
-
|
|
147
|
+
await llm.stream("Tell me a joke", (token, fullText) => {
|
|
148
|
+
document.getElementById("output")!.textContent = fullText;
|
|
88
149
|
});
|
|
89
150
|
|
|
90
|
-
//
|
|
151
|
+
// Non-streaming
|
|
91
152
|
const response = await llm.chat("Hello!");
|
|
153
|
+
console.log(response);
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Attach to DOM Elements
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
import { createLLM } from "@blank-utils/llm";
|
|
160
|
+
|
|
161
|
+
const llm = await createLLM({ model: "smollm2-360m" });
|
|
162
|
+
|
|
163
|
+
// Wire up an input + output with one call
|
|
164
|
+
const cleanup = llm.attachToInput("#prompt-input", "#response-output", {
|
|
165
|
+
triggerOnEnter: true,
|
|
166
|
+
clearOnSend: true,
|
|
167
|
+
});
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Architecture
|
|
173
|
+
|
|
92
174
|
```
|
|
175
|
+
@blank-utils/llm
|
|
176
|
+
├── src/
|
|
177
|
+
│ ├── index.ts # Main entry — re-exports everything
|
|
178
|
+
│ ├── core.ts # createLLM() factory, LocalLLM interface
|
|
179
|
+
│ ├── models.ts # Centralized model registry (single source of truth)
|
|
180
|
+
│ ├── types.ts # All TypeScript interfaces & types
|
|
181
|
+
│ ├── detect.ts # WebGPU / WASM capability detection
|
|
182
|
+
│ ├── helpers.ts # DOM utilities (attachToElements, createChatUI, etc.)
|
|
183
|
+
│ ├── backends/
|
|
184
|
+
│ │ ├── webllm.ts # WebLLM backend (WebGPU)
|
|
185
|
+
│ │ └── transformers.ts # Transformers.js backend (WASM / WebGPU)
|
|
186
|
+
│ └── react/
|
|
187
|
+
│ ├── index.tsx # React context, provider, hooks
|
|
188
|
+
│ ├── components.tsx # <Chat> — ready-made chat interface
|
|
189
|
+
│ └── chat-input.tsx # <ChatInput> — auto-resizing input widget
|
|
190
|
+
└── dist/ # Built output (ESM)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Dual Backend System
|
|
194
|
+
|
|
195
|
+
| | **WebLLM** | **Transformers.js** |
|
|
196
|
+
| ----------------- | ------------------------- | ------------------- |
|
|
197
|
+
| **Engine** | MLC / TVM compiled models | ONNX Runtime |
|
|
198
|
+
| **Device** | WebGPU only | WebGPU or WASM |
|
|
199
|
+
| **Performance** | Best (GPU-native) | Good (CPU fallback) |
|
|
200
|
+
| **Model source** | MLC prebuilt cache | HuggingFace Hub |
|
|
201
|
+
| **Auto-detected** | ✅ when WebGPU present | ✅ fallback |
|
|
202
|
+
|
|
203
|
+
The library auto-selects the best backend via `detectCapabilities()`. You can also force a backend:
|
|
204
|
+
|
|
205
|
+
```tsx
|
|
206
|
+
<LLMProvider model="llama-3.2-3b" backend="webllm" />
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
93
210
|
|
|
94
|
-
## React
|
|
211
|
+
## React API
|
|
95
212
|
|
|
96
|
-
###
|
|
213
|
+
### `<LLMProvider>`
|
|
97
214
|
|
|
98
|
-
Wrap your app
|
|
215
|
+
Wrap your app to enable LLM functionality. All hooks must be used inside this provider.
|
|
99
216
|
|
|
100
217
|
```tsx
|
|
101
218
|
<LLMProvider
|
|
102
|
-
model="qwen-2.5-0.5b" // Model
|
|
219
|
+
model="qwen-2.5-0.5b" // Model alias or full ID
|
|
103
220
|
backend="auto" // 'webllm' | 'transformers' | 'auto'
|
|
104
|
-
autoLoad={true} // Start loading
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
221
|
+
autoLoad={true} // Start loading on mount
|
|
222
|
+
device="auto" // 'webgpu' | 'wasm' | 'auto'
|
|
223
|
+
quantization="q4" // 'q4' | 'q8' | 'fp16' | 'fp32'
|
|
224
|
+
systemPrompt="..." // Default system prompt
|
|
225
|
+
onProgress={(p) => {}} // Loading progress
|
|
226
|
+
onLoad={(llm) => {}} // Called when model is ready
|
|
227
|
+
onError={(err) => {}} // Error handler
|
|
108
228
|
>
|
|
109
229
|
{children}
|
|
110
230
|
</LLMProvider>
|
|
111
231
|
```
|
|
112
232
|
|
|
233
|
+
> **Tip:** Use `key={modelId}` on `<LLMProvider>` to force a full re-mount when switching models dynamically.
|
|
234
|
+
|
|
113
235
|
### `useLLM()`
|
|
114
236
|
|
|
115
|
-
Access the LLM instance and loading state:
|
|
237
|
+
Access the raw LLM instance and loading state:
|
|
116
238
|
|
|
117
239
|
```tsx
|
|
118
240
|
const {
|
|
119
|
-
llm, //
|
|
120
|
-
isLoading, //
|
|
121
|
-
isReady, //
|
|
122
|
-
loadProgress, // { progress: number, status: string }
|
|
123
|
-
error, // Error
|
|
124
|
-
modelId, //
|
|
125
|
-
backend, // 'webllm' | 'transformers'
|
|
126
|
-
reload, //
|
|
127
|
-
unload, //
|
|
241
|
+
llm, // LocalLLM | null
|
|
242
|
+
isLoading, // boolean — model is downloading
|
|
243
|
+
isReady, // boolean — model ready for inference
|
|
244
|
+
loadProgress, // { progress: number, status: string } | null
|
|
245
|
+
error, // Error | null
|
|
246
|
+
modelId, // string | null — current model ID
|
|
247
|
+
backend, // 'webllm' | 'transformers' | null
|
|
248
|
+
reload, // () => Promise<void>
|
|
249
|
+
unload, // () => Promise<void>
|
|
128
250
|
} = useLLM();
|
|
129
251
|
```
|
|
130
252
|
|
|
131
|
-
### `useChat(options)`
|
|
253
|
+
### `useChat(options?)`
|
|
132
254
|
|
|
133
|
-
Full chat conversation management with **eager loading**
|
|
255
|
+
Full chat conversation management with **eager loading** — users can send messages while the model downloads. Messages are queued and processed automatically once the model is ready.
|
|
134
256
|
|
|
135
257
|
```tsx
|
|
136
258
|
const {
|
|
137
259
|
messages, // ChatMessage[]
|
|
138
|
-
input, //
|
|
139
|
-
setInput, //
|
|
140
|
-
send, //
|
|
141
|
-
isGenerating, //
|
|
142
|
-
isPending, //
|
|
143
|
-
streamingText, //
|
|
144
|
-
stop, //
|
|
145
|
-
clear, //
|
|
146
|
-
append, //
|
|
147
|
-
reload, //
|
|
260
|
+
input, // string — controlled input value
|
|
261
|
+
setInput, // (value: string) => void
|
|
262
|
+
send, // (content?: string) => Promise<string>
|
|
263
|
+
isGenerating, // boolean
|
|
264
|
+
isPending, // boolean — message queued, waiting for model
|
|
265
|
+
streamingText, // string — current partial response
|
|
266
|
+
stop, // () => void
|
|
267
|
+
clear, // () => void
|
|
268
|
+
append, // (message: ChatMessage) => void
|
|
269
|
+
reload, // () => Promise<string> — regenerate last response
|
|
148
270
|
} = useChat({
|
|
149
271
|
systemPrompt: "You are a helpful assistant.",
|
|
150
|
-
queueWhileLoading: true, //
|
|
272
|
+
queueWhileLoading: true, // default: true
|
|
273
|
+
initialMessages: [],
|
|
274
|
+
generateOptions: { temperature: 0.7, maxTokens: 512 },
|
|
275
|
+
onStart: () => {},
|
|
151
276
|
onToken: (token, fullText) => {},
|
|
152
277
|
onFinish: (response) => {},
|
|
278
|
+
onError: (error) => {},
|
|
153
279
|
});
|
|
154
280
|
```
|
|
155
281
|
|
|
156
|
-
### `useStream(options)`
|
|
282
|
+
### `useStream(options?)`
|
|
157
283
|
|
|
158
|
-
Simple streaming generation:
|
|
284
|
+
Simple streaming generation without chat history management:
|
|
159
285
|
|
|
160
286
|
```tsx
|
|
161
|
-
const { text, isStreaming, stream, stop, clear } = useStream(
|
|
287
|
+
const { text, isStreaming, stream, stop, clear } = useStream({
|
|
288
|
+
onToken: (token, fullText) => {},
|
|
289
|
+
onFinish: (response) => {},
|
|
290
|
+
onError: (error) => {},
|
|
291
|
+
generateOptions: { temperature: 0.7 },
|
|
292
|
+
});
|
|
162
293
|
|
|
163
294
|
await stream("Tell me a story");
|
|
295
|
+
// or with message array:
|
|
296
|
+
await stream([{ role: "user", content: "Tell me a story" }]);
|
|
164
297
|
```
|
|
165
298
|
|
|
166
|
-
### `useCompletion(options)`
|
|
299
|
+
### `useCompletion(options?)`
|
|
167
300
|
|
|
168
|
-
Non-streaming completion:
|
|
301
|
+
Non-streaming, single-shot completion:
|
|
169
302
|
|
|
170
303
|
```tsx
|
|
171
|
-
const { completion, isLoading, complete, clear } = useCompletion(
|
|
304
|
+
const { completion, isLoading, complete, clear } = useCompletion({
|
|
305
|
+
generateOptions: { maxTokens: 256 },
|
|
306
|
+
});
|
|
172
307
|
|
|
173
|
-
await complete("Summarize this text");
|
|
308
|
+
const result = await complete("Summarize this text");
|
|
174
309
|
```
|
|
175
310
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
### `<LLMLoading>`
|
|
311
|
+
### `<LLMLoading>` / `<LLMReady>`
|
|
179
312
|
|
|
180
|
-
|
|
313
|
+
Conditional rendering components:
|
|
181
314
|
|
|
182
315
|
```tsx
|
|
183
|
-
<LLMLoading>
|
|
184
|
-
<p>
|
|
316
|
+
<LLMLoading className="loading-state">
|
|
317
|
+
<p>Downloading model...</p>
|
|
185
318
|
</LLMLoading>
|
|
319
|
+
|
|
320
|
+
<LLMReady fallback={<Spinner />}>
|
|
321
|
+
<ChatInterface />
|
|
322
|
+
</LLMReady>
|
|
186
323
|
```
|
|
187
324
|
|
|
188
|
-
###
|
|
325
|
+
### Default Chat Interface explained
|
|
326
|
+
|
|
327
|
+
The `<Chat>` and `<ChatApp>` components provide a production-ready interface with "Terminal Luxury" aesthetics.
|
|
328
|
+
|
|
329
|
+
**Key Features & Usage Points:**
|
|
330
|
+
|
|
331
|
+
- **✨ Zero Config**: Just drop it in. No CSS files to import, no state to manage.
|
|
332
|
+
- **🎨 Rich Text Rendering**:
|
|
333
|
+
- **Global Markdown**: Bold, cursives, lists, tables.
|
|
334
|
+
- **Code Blocks**: Syntax highlighting for 20+ languages.
|
|
335
|
+
- **Diagrams**: Renders `mermaid` diagrams automatically.
|
|
336
|
+
- **Math**: Supports LateX expressions.
|
|
337
|
+
- **⚡ Eager Interaction**: Users can type and send messages _while_ the model is still initializing. The chat controls the queue.
|
|
338
|
+
- **🌗 Theming**: Built-in 'dark' (cherry red accents) and 'light' modes.
|
|
339
|
+
- **🔄 Model Switching**:
|
|
340
|
+
- If using `<ChatApp />`, a model selector dropdown is included automatically.
|
|
341
|
+
- If using `<Chat />`, pass `onModelChange` to enable the dropdown.
|
|
342
|
+
- **🛠️ Extensible Toolbar**: Use the `inputActions` prop to add your own buttons (e.g., upload, clear) to the input area.
|
|
189
343
|
|
|
190
|
-
|
|
344
|
+
### `<Chat>` Component API
|
|
191
345
|
|
|
192
346
|
```tsx
|
|
193
|
-
<
|
|
194
|
-
|
|
195
|
-
|
|
347
|
+
<Chat
|
|
348
|
+
// Appearance
|
|
349
|
+
theme="dark" // 'dark' | 'light'
|
|
350
|
+
maxHeight="600px" // CSS max-height
|
|
351
|
+
className="my-chat" // Extra classes
|
|
352
|
+
// Content
|
|
353
|
+
systemPrompt="..." // Default: "You are a helpful AI assistant..."
|
|
354
|
+
welcomeMessage="..." // Text shown when chat is empty
|
|
355
|
+
placeholder="..." // Input placeholder
|
|
356
|
+
// Features
|
|
357
|
+
showHeader={true} // Toggle header/model info
|
|
358
|
+
showProgress={true} // Toggle loading progress bar
|
|
359
|
+
// Callbacks
|
|
360
|
+
onSend={(msg) => {}} // Listen to user messages
|
|
361
|
+
onResponse={(res) => {}} // Listen to AI responses
|
|
362
|
+
onModelChange={(id) => {}} // Enable model switching dropdown
|
|
363
|
+
inputActions={
|
|
364
|
+
<>
|
|
365
|
+
<button>Clear</button>
|
|
366
|
+
</>
|
|
367
|
+
} // Add custom buttons
|
|
368
|
+
/>
|
|
196
369
|
```
|
|
197
370
|
|
|
198
|
-
|
|
371
|
+
### `<ChatApp>` Component API
|
|
372
|
+
|
|
373
|
+
Wrapper that combines `LLMProvider` and `Chat`.
|
|
199
374
|
|
|
200
|
-
|
|
375
|
+
```tsx
|
|
376
|
+
<ChatApp
|
|
377
|
+
defaultModel="qwen-2.5-0.5b"
|
|
378
|
+
defaultBackend="auto" // 'webllm' | 'transformers'
|
|
379
|
+
autoLoad={true} // Start downloading immediately
|
|
380
|
+
{...chatProps} // All <Chat> props are supported
|
|
381
|
+
/>
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
### `<ChatInput>`
|
|
385
|
+
|
|
386
|
+
Standalone auto-resizing input component. Use it to build custom chat layouts:
|
|
387
|
+
|
|
388
|
+
```tsx
|
|
389
|
+
import { ChatInput } from "@blank-utils/llm/react";
|
|
390
|
+
|
|
391
|
+
<ChatInput
|
|
392
|
+
value={input} // Controlled value
|
|
393
|
+
onChange={setInput} // Value change handler
|
|
394
|
+
onSend={handleSend} // Submit handler (Enter or button)
|
|
395
|
+
onStop={handleStop} // Stop generation
|
|
396
|
+
disabled={false} // Disable input
|
|
397
|
+
isGenerating={false} // Show stop button instead of send
|
|
398
|
+
placeholder="Type..." // Placeholder text
|
|
399
|
+
maxRows={5} // Max rows before scroll
|
|
400
|
+
theme="dark" // 'dark' | 'light'
|
|
401
|
+
actions={<MyButtons />} // Custom toolbar actions
|
|
402
|
+
/>;
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
**Features:**
|
|
406
|
+
|
|
407
|
+
- 📝 Auto-resizes up to `maxRows` then scrolls
|
|
408
|
+
- ⌨️ Enter to send, Shift+Enter for newline
|
|
409
|
+
- ⏹️ Stop button while generating
|
|
410
|
+
- 🎨 Dark/light theme support
|
|
411
|
+
|
|
412
|
+
---
|
|
413
|
+
|
|
414
|
+
## Vanilla JS API
|
|
415
|
+
|
|
416
|
+
### `createLLM(config?)`
|
|
417
|
+
|
|
418
|
+
Factory function that auto-detects capabilities, picks a backend, loads a model, and returns a ready-to-use `LocalLLM` instance:
|
|
419
|
+
|
|
420
|
+
```typescript
|
|
421
|
+
import { createLLM } from "@blank-utils/llm";
|
|
422
|
+
|
|
423
|
+
const llm = await createLLM({
|
|
424
|
+
model: "phi-3.5-mini",
|
|
425
|
+
backend: "auto",
|
|
426
|
+
systemPrompt: "You are a helpful assistant.",
|
|
427
|
+
onLoadProgress: (p) => console.log(`${p.status}: ${p.progress}%`),
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
// Chat (non-streaming)
|
|
431
|
+
const answer = await llm.chat("What is 2+2?");
|
|
432
|
+
|
|
433
|
+
// Stream
|
|
434
|
+
await llm.stream("Write a poem", (token, fullText) => {
|
|
435
|
+
process.stdout.write(token);
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// Attach to DOM
|
|
439
|
+
const cleanup = llm.attachToInput("#input", "#output");
|
|
440
|
+
|
|
441
|
+
// Free resources
|
|
442
|
+
await llm.unload();
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
### DOM Helpers
|
|
446
|
+
|
|
447
|
+
```typescript
|
|
448
|
+
import {
|
|
449
|
+
createOutputStreamer,
|
|
450
|
+
attachToElements,
|
|
451
|
+
createChatUI,
|
|
452
|
+
createLoadingIndicator,
|
|
453
|
+
} from "@blank-utils/llm";
|
|
454
|
+
|
|
455
|
+
// Auto-scroll streaming output into an element
|
|
456
|
+
const streamer = createOutputStreamer("#output", { scrollToBottom: true });
|
|
457
|
+
|
|
458
|
+
// Create a full chat UI in one call
|
|
459
|
+
const { input, output, sendButton, cleanup } = createChatUI("#container");
|
|
460
|
+
|
|
461
|
+
// Progress indicator
|
|
462
|
+
const loading = createLoadingIndicator("#loading-container");
|
|
463
|
+
loading.show();
|
|
464
|
+
loading.setProgress(50, "Downloading weights...");
|
|
465
|
+
loading.hide();
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
### Capability Detection
|
|
201
469
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
470
|
+
```typescript
|
|
471
|
+
import {
|
|
472
|
+
detectCapabilities,
|
|
473
|
+
logCapabilities,
|
|
474
|
+
isWebGPUSupported,
|
|
475
|
+
} from "@blank-utils/llm";
|
|
476
|
+
|
|
477
|
+
const caps = await detectCapabilities();
|
|
478
|
+
// { webgpu: true, wasm: true, recommendedBackend: 'webllm', recommendedDevice: 'webgpu' }
|
|
479
|
+
|
|
480
|
+
await logCapabilities(); // Pretty-prints to console
|
|
481
|
+
|
|
482
|
+
if (await isWebGPUSupported()) {
|
|
483
|
+
console.log("WebGPU is available!");
|
|
484
|
+
}
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## Available Models
|
|
490
|
+
|
|
491
|
+
All models are defined in `src/models.ts` and exported as `WEBLLM_MODELS` and `TRANSFORMERS_MODELS`.
|
|
492
|
+
You can use either the **alias** (short name) or the **full model ID** when specifying a model.
|
|
211
493
|
|
|
212
494
|
### WebLLM Backend (WebGPU)
|
|
213
495
|
|
|
214
|
-
| Alias
|
|
215
|
-
|
|
|
216
|
-
| `
|
|
217
|
-
| `llama-3.2-
|
|
218
|
-
| `llama-3.
|
|
219
|
-
| `
|
|
220
|
-
| `
|
|
221
|
-
| `
|
|
496
|
+
| Alias | Model | Notes |
|
|
497
|
+
| ----------------------- | ---------------------------- | --------------------------- |
|
|
498
|
+
| `llama-3.2-1b` | Llama 3.2 1B Instruct | Compact, great quality |
|
|
499
|
+
| `llama-3.2-3b` | Llama 3.2 3B Instruct | Balanced |
|
|
500
|
+
| `llama-3.1-8b` | Llama 3.1 8B Instruct | High quality |
|
|
501
|
+
| `llama-3.1-8b-1k` | Llama 3.1 8B (1K ctx) | Lower memory |
|
|
502
|
+
| `phi-3.5-mini` | Phi 3.5 Mini Instruct | **Default** — great balance |
|
|
503
|
+
| `phi-3.5-mini-1k` | Phi 3.5 Mini (1K ctx) | Lower memory |
|
|
504
|
+
| `phi-3.5-vision` | Phi 3.5 Vision | Vision model |
|
|
505
|
+
| `qwen-2.5-0.5b` | Qwen 2.5 0.5B | Tiny, fast |
|
|
506
|
+
| `qwen-2.5-1.5b` | Qwen 2.5 1.5B | Small |
|
|
507
|
+
| `qwen-2.5-3b` | Qwen 2.5 3B | Medium |
|
|
508
|
+
| `qwen-2.5-7b` | Qwen 2.5 7B | Large |
|
|
509
|
+
| `qwen-2.5-coder-0.5b` | Qwen 2.5 Coder 0.5B | Code-focused |
|
|
510
|
+
| `qwen-2.5-coder-1.5b` | Qwen 2.5 Coder 1.5B | Code-focused |
|
|
511
|
+
| `qwen-3-0.6b` | Qwen 3 0.6B | Latest gen |
|
|
512
|
+
| `qwen-3-1.7b` | Qwen 3 1.7B | Latest gen |
|
|
513
|
+
| `qwen-3-4b` | Qwen 3 4B | Latest gen |
|
|
514
|
+
| `qwen-3-8b` | Qwen 3 8B | Latest gen |
|
|
515
|
+
| `gemma-2-2b` | Gemma 2 2B | Google, efficient |
|
|
516
|
+
| `gemma-2-2b-1k` | Gemma 2 2B (1K ctx) | Lower memory |
|
|
517
|
+
| `gemma-2-9b` | Gemma 2 9B | Large |
|
|
518
|
+
| `smollm2-135m` | SmolLM2 135M | Ultra lightweight |
|
|
519
|
+
| `smollm2-360m` | SmolLM2 360M | Lightweight |
|
|
520
|
+
| `smollm2-1.7b` | SmolLM2 1.7B | Small |
|
|
521
|
+
| `mistral-7b` | Mistral 7B v0.3 | General purpose |
|
|
522
|
+
| `deepseek-r1-qwen-7b` | DeepSeek R1 Distill Qwen 7B | Reasoning |
|
|
523
|
+
| `deepseek-r1-llama-8b` | DeepSeek R1 Distill Llama 8B | Reasoning |
|
|
524
|
+
| `hermes-3-llama-3.2-3b` | Hermes 3 Llama 3.2 3B | Function calling |
|
|
525
|
+
| `hermes-3-llama-3.1-8b` | Hermes 3 Llama 3.1 8B | Function calling |
|
|
526
|
+
|
|
527
|
+
### Transformers.js Backend (CPU / WASM)
|
|
528
|
+
|
|
529
|
+
| Alias | HuggingFace Model ID | Notes |
|
|
530
|
+
| --------------------- | -------------------------------------------- | ------------ |
|
|
531
|
+
| `qwen-2.5-0.5b` | `onnx-community/Qwen2.5-0.5B-Instruct` | **Default** |
|
|
532
|
+
| `qwen-2.5-1.5b` | `onnx-community/Qwen2.5-1.5B-Instruct` | Good quality |
|
|
533
|
+
| `qwen-2.5-coder-0.5b` | `onnx-community/Qwen2.5-Coder-0.5B-Instruct` | Code |
|
|
534
|
+
| `qwen-2.5-coder-1.5b` | `onnx-community/Qwen2.5-Coder-1.5B-Instruct` | Code |
|
|
535
|
+
| `qwen-3-0.6b` | `onnx-community/Qwen3-0.6B-ONNX` | Latest gen |
|
|
536
|
+
| `smollm2-135m` | `HuggingFaceTB/SmolLM2-135M-Instruct` | Ultra fast |
|
|
537
|
+
| `smollm2-360m` | `HuggingFaceTB/SmolLM2-360M-Instruct` | Fast |
|
|
538
|
+
| `smollm2-1.7b` | `HuggingFaceTB/SmolLM2-1.7B-Instruct` | Good |
|
|
539
|
+
| `phi-3-mini` | `Xenova/Phi-3-mini-4k-instruct` | Strong |
|
|
540
|
+
| `tinyllama` | `Xenova/TinyLlama-1.1B-Chat-v1.0` | Very fast |
|
|
541
|
+
|
|
542
|
+
### Type-Safe Model Selection
|
|
543
|
+
|
|
544
|
+
The `model` prop accepts any key from `WEBLLM_MODELS` or `TRANSFORMERS_MODELS` with full autocomplete, while still allowing arbitrary strings for custom models:
|
|
545
|
+
|
|
546
|
+
```typescript
|
|
547
|
+
import type {
|
|
548
|
+
SupportedModel,
|
|
549
|
+
WebLLMModelID,
|
|
550
|
+
TransformersModelID,
|
|
551
|
+
} from "@blank-utils/llm";
|
|
552
|
+
|
|
553
|
+
// Full autocomplete for known models
|
|
554
|
+
const model: SupportedModel = "qwen-2.5-0.5b"; // ✅ autocomplete
|
|
555
|
+
|
|
556
|
+
// Custom model IDs still work
|
|
557
|
+
const custom: SupportedModel = "my-org/custom-model-onnx"; // ✅ no error
|
|
558
|
+
|
|
559
|
+
// Import the model maps for programmatic use
|
|
560
|
+
import { WEBLLM_MODELS, TRANSFORMERS_MODELS } from "@blank-utils/llm";
|
|
561
|
+
|
|
562
|
+
Object.keys(WEBLLM_MODELS); // all WebLLM aliases
|
|
563
|
+
Object.keys(TRANSFORMERS_MODELS); // all Transformers.js aliases
|
|
564
|
+
```
|
|
565
|
+
|
|
566
|
+
---
|
|
567
|
+
|
|
568
|
+
## Build & Development
|
|
569
|
+
|
|
570
|
+
```bash
|
|
571
|
+
# Install dependencies
|
|
572
|
+
bun install
|
|
573
|
+
|
|
574
|
+
# Build (clean → bundle → assets → types)
|
|
575
|
+
bun run build
|
|
576
|
+
|
|
577
|
+
# Type-check only
|
|
578
|
+
bun run typecheck
|
|
579
|
+
|
|
580
|
+
# Run demo page
|
|
581
|
+
bun run demo
|
|
582
|
+
|
|
583
|
+
# Run tests
|
|
584
|
+
bun test
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
### Build Pipeline
|
|
588
|
+
|
|
589
|
+
| Script | What it does |
|
|
590
|
+
| ------------- | ------------------------------------------------------------------------------------------------------------------ |
|
|
591
|
+
| `clean` | Removes `dist/` |
|
|
592
|
+
| `build:js` | Bundles `src/index.ts` → `dist/index.js` and `src/react/index.tsx` → `dist/react/index.js` (ESM, externals: react) |
|
|
593
|
+
| `postbuild` | Copies WASM + ONNX runtime assets into `dist/` and `dist/react/` |
|
|
594
|
+
| `build:types` | Generates `.d.ts` declaration files via `tsc` |
|
|
595
|
+
| `build` | Runs all of the above in sequence |
|
|
596
|
+
|
|
597
|
+
### Package Exports
|
|
598
|
+
|
|
599
|
+
```jsonc
|
|
600
|
+
{
|
|
601
|
+
".": {
|
|
602
|
+
"types": "./dist/index.d.ts",
|
|
603
|
+
"import": "./dist/index.js",
|
|
604
|
+
},
|
|
605
|
+
"./react": {
|
|
606
|
+
"types": "./dist/react/index.d.ts",
|
|
607
|
+
"import": "./dist/react/index.js",
|
|
608
|
+
},
|
|
609
|
+
}
|
|
610
|
+
```
|
|
611
|
+
|
|
612
|
+
---
|
|
222
613
|
|
|
223
614
|
## Browser Requirements
|
|
224
615
|
|
|
225
|
-
|
|
226
|
-
|
|
616
|
+
| Feature | Minimum | Notes |
|
|
617
|
+
| --------------------- | -------------------------- | ------------------------------ |
|
|
618
|
+
| **WebGPU** | Chrome 113+, Edge 113+ | Required for WebLLM backend |
|
|
619
|
+
| **WebAssembly** | All modern browsers | Fallback for Transformers.js |
|
|
620
|
+
| **SharedArrayBuffer** | Requires COOP/COEP headers | Needed for multi-threaded WASM |
|
|
621
|
+
|
|
622
|
+
The library automatically detects capabilities and picks the best backend. No manual configuration needed.
|
|
623
|
+
|
|
624
|
+
---
|
|
227
625
|
|
|
228
626
|
## License
|
|
229
627
|
|
|
230
|
-
MIT
|
|
628
|
+
MIT © [blank](https://github.com/kiritocode1)
|