@agorapete/wllama 3.5.1-q2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.gitmodules +3 -0
  2. package/.prettierignore +38 -0
  3. package/AGENTS.md +1 -0
  4. package/CMakeLists.txt +131 -0
  5. package/LICENCE +21 -0
  6. package/README-dev.md +178 -0
  7. package/README.md +225 -0
  8. package/README_banner.png +0 -0
  9. package/assets/screenshot_0.png +0 -0
  10. package/cpp/generate_glue_prototype.js +115 -0
  11. package/cpp/glue.hpp +664 -0
  12. package/cpp/test_glue.cpp +80 -0
  13. package/cpp/wllama-context.h +1172 -0
  14. package/cpp/wllama-fs.h +148 -0
  15. package/cpp/wllama.cpp +187 -0
  16. package/cpp/wllama.h +6 -0
  17. package/esm/cache-manager.d.ts +130 -0
  18. package/esm/debug.d.ts +28 -0
  19. package/esm/glue/glue.d.ts +22 -0
  20. package/esm/glue/messages.d.ts +146 -0
  21. package/esm/huggingface.d.ts +31 -0
  22. package/esm/index.cjs +3406 -0
  23. package/esm/index.d.ts +8 -0
  24. package/esm/index.js +3387 -0
  25. package/esm/index.min.js +1 -0
  26. package/esm/index.min.js.map +1 -0
  27. package/esm/model-manager.d.ts +136 -0
  28. package/esm/storage/cos.d.ts +36 -0
  29. package/esm/storage/index.d.ts +33 -0
  30. package/esm/storage/opfs.d.ts +12 -0
  31. package/esm/types/oai-compat.d.ts +278 -0
  32. package/esm/types/types.d.ts +112 -0
  33. package/esm/utils.d.ts +119 -0
  34. package/esm/wasm/source-map.d.ts +1 -0
  35. package/esm/wasm/wllama.wasm +0 -0
  36. package/esm/wasm-from-cdn.d.ts +8 -0
  37. package/esm/wllama.d.ts +397 -0
  38. package/esm/worker.d.ts +92 -0
  39. package/esm/workers-code/generated.d.ts +4 -0
  40. package/guides/intro-v2.md +132 -0
  41. package/guides/intro-v3.1.md +40 -0
  42. package/guides/intro-v3.md +230 -0
  43. package/index.ts +1 -0
  44. package/package.json +71 -0
  45. package/scripts/bisect_test.sh +33 -0
  46. package/scripts/build_hf_space.sh +26 -0
  47. package/scripts/build_source_map.js +269 -0
  48. package/scripts/build_wasm.sh +19 -0
  49. package/scripts/build_worker.sh +38 -0
  50. package/scripts/check_debug_build.js +30 -0
  51. package/scripts/check_package_size.js +25 -0
  52. package/scripts/docker-compose.yml +76 -0
  53. package/scripts/generate_wasm_from_cdn.js +24 -0
  54. package/scripts/http_server.js +44 -0
  55. package/scripts/post_build.sh +32 -0
  56. package/src/cache-manager.ts +358 -0
  57. package/src/debug.ts +111 -0
  58. package/src/glue/glue.ts +291 -0
  59. package/src/glue/messages.ts +773 -0
  60. package/src/huggingface.ts +151 -0
  61. package/src/index.ts +8 -0
  62. package/src/mjs.test.ts +44 -0
  63. package/src/model-manager.test.ts +200 -0
  64. package/src/model-manager.ts +359 -0
  65. package/src/storage/cos.test.ts +83 -0
  66. package/src/storage/cos.ts +171 -0
  67. package/src/storage/index.ts +40 -0
  68. package/src/storage/opfs.ts +119 -0
  69. package/src/types/oai-compat.ts +342 -0
  70. package/src/types/types.ts +133 -0
  71. package/src/utils.test.ts +231 -0
  72. package/src/utils.ts +403 -0
  73. package/src/wasm/source-map.ts +7 -0
  74. package/src/wasm/wllama.js +1 -0
  75. package/src/wasm/wllama.wasm +0 -0
  76. package/src/wasm-from-cdn.ts +13 -0
  77. package/src/wllama.test.ts +392 -0
  78. package/src/wllama.ts +1138 -0
  79. package/src/wllama.wgpu.test.ts +62 -0
  80. package/src/worker.ts +443 -0
  81. package/src/workers-code/generated.ts +11 -0
  82. package/src/workers-code/llama-cpp.js +511 -0
  83. package/src/workers-code/opfs-utils.js +150 -0
  84. package/tsconfig.build.json +34 -0
  85. package/tsup.config.ts +23 -0
  86. package/vitest.config.ts +61 -0
@@ -0,0 +1,151 @@
1
+ import { type ModelSource } from './model-manager';
2
+
3
+ export interface HuggingFaceParams {
4
+ /**
5
+ * The repo name, e.g. user/model
6
+ */
7
+ repo: string;
8
+ /**
9
+ * The file name or path to file in the repo. Only file or quant is needed.
10
+ */
11
+ file?: string;
12
+ /**
13
+ * The GGUF quantization name, e.g. Q4_K_M, Q8_0, etc. Only file or quant is needed.
14
+ *
15
+ * By default, Q4_K_M will be used, then fallback to Q8_0, and finally the non-quantized version if no quantized version is found.
16
+ */
17
+ quant?: string;
18
+ /**
19
+ * The file name or path to file in the repo for mmproj. Only mmprojFile or mmprojQuant is needed.
20
+ */
21
+ mmprojFile?: string;
22
+ /**
23
+ * The GGUF quantization name for mmproj, e.g. Q4_K_M, Q8_0, etc. Only mmprojFile or mmprojQuant is needed.
24
+ */
25
+ mmprojQuant?: string;
26
+ /**
27
+ * The Hugging Face token with permission to access the repo. It can be omitted if the repo is public.
28
+ */
29
+ hfToken?: string;
30
+ }
31
+
32
+ const HF_BASE = 'https://huggingface.co';
33
+ const DEFAULT_QUANTS = ['Q4_K_M', 'Q8_0'];
34
+
35
+ interface HFFileEntry {
36
+ type: string;
37
+ path: string;
38
+ size: number;
39
+ oid: string;
40
+ lfs?: { oid: string; size: number };
41
+ }
42
+
43
+ async function fetchRepoFiles(
44
+ repo: string,
45
+ token?: string
46
+ ): Promise<HFFileEntry[]> {
47
+ const url = `${HF_BASE}/api/models/${repo}/tree/main?recursive=true`;
48
+ const headers: Record<string, string> = { Accept: 'application/json' };
49
+ if (token) {
50
+ headers['Authorization'] = `Bearer ${token}`;
51
+ }
52
+ const res = await fetch(url, { headers });
53
+ if (!res.ok) {
54
+ let msg = res.statusText;
55
+ try {
56
+ msg = (await res.json()).error ?? msg;
57
+ } catch {
58
+ /* ignore */
59
+ }
60
+ throw new Error(`HF API error (${res.status}): ${msg}`);
61
+ }
62
+ return res.json();
63
+ }
64
+
65
+ // For split GGUF (-00001-of-00005.gguf), return the first shard path.
66
+ // For non-split, return path unchanged.
67
+ function firstShardPath(files: HFFileEntry[], path: string): string {
68
+ const m = path.match(/^(.+)-(\d{5})-of-(\d{5})\.gguf$/i);
69
+ if (!m) return path;
70
+ const first = `${m[1]}-00001-of-${m[3]}.gguf`;
71
+ return files.some((f) => f.path === first) ? first : path;
72
+ }
73
+
74
+ function selectFile(
75
+ files: HFFileEntry[],
76
+ quant: string | undefined,
77
+ mmprojOnly: boolean
78
+ ): string | null {
79
+ const candidates = files.filter((f) => {
80
+ if (f.type !== 'file' || !f.path.toLowerCase().endsWith('.gguf'))
81
+ return false;
82
+ const ismmproj = f.path.toLowerCase().includes('mmproj');
83
+ return mmprojOnly ? ismmproj : !ismmproj;
84
+ });
85
+
86
+ if (candidates.length === 0) return null;
87
+
88
+ if (quant) {
89
+ const upper = quant.toUpperCase();
90
+ const match = candidates.find((f) => f.path.toUpperCase().includes(upper));
91
+ if (match) return firstShardPath(candidates, match.path);
92
+ return null;
93
+ }
94
+
95
+ for (const q of DEFAULT_QUANTS) {
96
+ const match = candidates.find((f) => f.path.toUpperCase().includes(q));
97
+ if (match) return firstShardPath(candidates, match.path);
98
+ }
99
+
100
+ // Fallback: first candidate
101
+ return firstShardPath(candidates, candidates[0].path);
102
+ }
103
+
104
+ export async function getHFModelSource(
105
+ config: HuggingFaceParams
106
+ ): Promise<ModelSource> {
107
+ const { repo, file, quant, mmprojFile, mmprojQuant, hfToken } = config;
108
+
109
+ const files = await fetchRepoFiles(repo, hfToken);
110
+
111
+ const modelPath = file ?? selectFile(files, quant, false);
112
+ if (!modelPath) {
113
+ throw new Error(`No GGUF file found in repo "${repo}"`);
114
+ }
115
+
116
+ const source: ModelSource = {
117
+ url: `${HF_BASE}/${repo}/resolve/main/${modelPath}`,
118
+ };
119
+
120
+ if (mmprojFile || mmprojQuant !== undefined) {
121
+ const mmpath = mmprojFile ?? selectFile(files, mmprojQuant, true);
122
+ if (mmpath) {
123
+ source.mmprojUrl = `${HF_BASE}/${repo}/resolve/main/${mmpath}`;
124
+ }
125
+ }
126
+
127
+ if (hfToken) {
128
+ const params = new URLSearchParams({ token: hfToken });
129
+ source.url += `?${params}`;
130
+ if (source.mmprojUrl) {
131
+ source.mmprojUrl += `?${params}`;
132
+ }
133
+ }
134
+
135
+ return source;
136
+ }
137
+
138
+ export async function getHFFileSHA256(
139
+ url: string,
140
+ headers: Record<string, string>
141
+ ): Promise<string | undefined> {
142
+ if (!url.includes('/resolve/')) return undefined;
143
+ const rawUrl = url.replace('/resolve/', '/raw/');
144
+ try {
145
+ const text = await fetch(rawUrl, { headers }).then((r) => r.text());
146
+ const match = text.match(/^oid sha256:([0-9a-f]{64})$/m);
147
+ return match ? match[1] : undefined;
148
+ } catch {
149
+ return undefined;
150
+ }
151
+ }
package/src/index.ts ADDED
@@ -0,0 +1,8 @@
1
+ export * from './wllama';
2
+ export * from './cache-manager';
3
+ export * from './model-manager';
4
+ export * from './huggingface';
5
+ export * from './types/types';
6
+ export * from './types/oai-compat';
7
+ export { CacheManager } from './cache-manager';
8
+ export { isValidGgufFile } from './utils';
@@ -0,0 +1,44 @@
1
+ import { test, expect } from 'vitest';
2
+ import { Wllama as WllamaMJS } from '../esm/index.js';
3
+ import { Wllama as WllamaMJSMinified } from '../esm/index.min.js';
4
+
5
+ const CONFIG_PATHS = {
6
+ default: '/src/wasm/wllama.wasm',
7
+ };
8
+
9
+ const TINY_MODEL =
10
+ 'https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q4_0.gguf';
11
+
12
+ const testFunc = async (wllama: WllamaMJS) => {
13
+ await wllama.loadModelFromUrl(TINY_MODEL, {
14
+ n_ctx: 1024,
15
+ });
16
+
17
+ const res = await wllama.createCompletion({
18
+ prompt: 'Once upon a time',
19
+ max_tokens: 10,
20
+ temperature: 0.0,
21
+ top_p: 0.95,
22
+ top_k: 40,
23
+ seed: 42,
24
+ });
25
+
26
+ expect(res).toBeDefined();
27
+ expect(res.choices[0].text).toMatch(/(there|little|girl|Lily)+/);
28
+ expect(res.choices[0].text.length).toBeGreaterThan(10);
29
+
30
+ await wllama.exit();
31
+ };
32
+
33
+ // TODO: enable compat mode in tests once test infrastructure supports Safari/asyncify
34
+ test.sequential('(mjs) generates completion', async () => {
35
+ const wllama = new WllamaMJS(CONFIG_PATHS);
36
+ wllama.setCompat(null);
37
+ await testFunc(wllama);
38
+ });
39
+
40
+ test.sequential('(mjs/minified) generates completion', async () => {
41
+ const wllama = new WllamaMJSMinified(CONFIG_PATHS);
42
+ wllama.setCompat(null);
43
+ await testFunc(wllama as unknown as WllamaMJS);
44
+ });
@@ -0,0 +1,200 @@
1
+ import { test, expect } from 'vitest';
2
+ import { ModelManager, Model, ModelValidationStatus } from './model-manager';
3
+
4
+ const TINY_MODEL =
5
+ 'https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf';
6
+ const SPLIT_MODEL =
7
+ 'https://huggingface.co/ngxson/tinyllama_split_test/resolve/main/stories15M-q8_0-00001-of-00003.gguf';
8
+
9
+ test.sequential('parseModelUrl handles single model URL', () => {
10
+ const urls = ModelManager.parseModelUrl(TINY_MODEL);
11
+ expect(urls.length).toBe(1);
12
+ expect(urls[0]).toBe(TINY_MODEL);
13
+ });
14
+
15
+ test.sequential('parseModelUrl handles array of URLs', () => {
16
+ const urls = ModelManager.parseModelUrl(SPLIT_MODEL);
17
+ expect(urls.length).toBe(3);
18
+ expect(urls[0]).toMatch(/-00001-of-00003\.gguf$/);
19
+ expect(urls[1]).toMatch(/-00002-of-00003\.gguf$/);
20
+ expect(urls[2]).toMatch(/-00003-of-00003\.gguf$/);
21
+ });
22
+
23
+ test.sequential('parseModelUrl handles URLs with query parameters', () => {
24
+ // Test with a simple query parameter
25
+ const urlWithQuery =
26
+ 'https://example.com/models/model-00001-of-00003.gguf?param=value';
27
+ const urls = ModelManager.parseModelUrl(urlWithQuery);
28
+ expect(urls.length).toBe(3);
29
+ expect(urls[0]).toBe(
30
+ 'https://example.com/models/model-00001-of-00003.gguf?param=value'
31
+ );
32
+ expect(urls[1]).toBe(
33
+ 'https://example.com/models/model-00002-of-00003.gguf?param=value'
34
+ );
35
+ expect(urls[2]).toBe(
36
+ 'https://example.com/models/model-00003-of-00003.gguf?param=value'
37
+ );
38
+
39
+ // Test with multiple query parameters
40
+ const urlWithMultipleParams =
41
+ 'https://example.com/models/model-00001-of-00002.gguf?param1=value1&param2=value2';
42
+ const urlsMultiParams = ModelManager.parseModelUrl(urlWithMultipleParams);
43
+ expect(urlsMultiParams.length).toBe(2);
44
+ expect(urlsMultiParams[0]).toBe(
45
+ 'https://example.com/models/model-00001-of-00002.gguf?param1=value1&param2=value2'
46
+ );
47
+ expect(urlsMultiParams[1]).toBe(
48
+ 'https://example.com/models/model-00002-of-00002.gguf?param1=value1&param2=value2'
49
+ );
50
+
51
+ // Test with no-inline parameter (common in Vite)
52
+ const urlWithNoInline =
53
+ 'https://example.com/models/model-00001-of-00002.gguf?no-inline';
54
+ const urlsNoInline = ModelManager.parseModelUrl(urlWithNoInline);
55
+ expect(urlsNoInline.length).toBe(2);
56
+ expect(urlsNoInline[0]).toBe(
57
+ 'https://example.com/models/model-00001-of-00002.gguf?no-inline'
58
+ );
59
+ expect(urlsNoInline[1]).toBe(
60
+ 'https://example.com/models/model-00002-of-00002.gguf?no-inline'
61
+ );
62
+ });
63
+
64
+ test.sequential('download split model', async () => {
65
+ const manager = new ModelManager();
66
+ const model = await manager.downloadModel(SPLIT_MODEL);
67
+ expect(model.files.length).toBe(3);
68
+ // check names
69
+ expect(model.files[0].metadata.originalURL).toMatch(/-00001-of-00003\.gguf$/);
70
+ expect(model.files[1].metadata.originalURL).toMatch(/-00002-of-00003\.gguf$/);
71
+ expect(model.files[2].metadata.originalURL).toMatch(/-00003-of-00003\.gguf$/);
72
+ // check sizes
73
+ expect(model.files[0].size).toBe(10517152);
74
+ expect(model.files[1].size).toBe(10381216);
75
+ expect(model.files[2].size).toBe(5773312);
76
+ });
77
+
78
+ test.sequential('get downloaded split model', async () => {
79
+ const manager = new ModelManager();
80
+ const models = await manager.getModels();
81
+ const model = models.find((m) => m.url === SPLIT_MODEL);
82
+ expect(model).toBeDefined();
83
+ if (!model) throw new Error();
84
+ // check names
85
+ expect(model.files[0].metadata.originalURL).toMatch(/-00001-of-00003\.gguf$/);
86
+ expect(model.files[1].metadata.originalURL).toMatch(/-00002-of-00003\.gguf$/);
87
+ expect(model.files[2].metadata.originalURL).toMatch(/-00003-of-00003\.gguf$/);
88
+ });
89
+
90
+ // skip on CI, only run locally with a slow connection
91
+ test.skip('interrupt download split model (partial files downloaded)', async () => {
92
+ const manager = new ModelManager();
93
+ await manager.clear();
94
+ const controller = new AbortController();
95
+ const downloadPromise = manager.downloadModel(SPLIT_MODEL, {
96
+ signal: controller.signal,
97
+ progressCallback: ({ loaded, total }) => {
98
+ const progress = loaded / total;
99
+ if (progress > 0.8) {
100
+ controller.abort();
101
+ }
102
+ },
103
+ });
104
+ await expect(downloadPromise).rejects.toThrow('aborted');
105
+ expect((await manager.getModels()).length).toBe(0);
106
+ expect((await manager.getModels({ includeInvalid: true })).length).toBe(1);
107
+ });
108
+
109
+ test.sequential('download invalid model URL', async () => {
110
+ const manager = new ModelManager();
111
+ const invalidUrl = 'https://invalid.example.com/model.gguf';
112
+ await expect(manager.downloadModel(invalidUrl)).rejects.toThrow();
113
+ });
114
+
115
+ test.sequential('download with abort signal', async () => {
116
+ const manager = new ModelManager();
117
+ await manager.clear();
118
+ const controller = new AbortController();
119
+ const downloadPromise = manager.downloadModel(TINY_MODEL, {
120
+ signal: controller.signal,
121
+ });
122
+ setTimeout(() => controller.abort(), 10);
123
+ await downloadPromise.catch(console.error);
124
+ await expect(downloadPromise).rejects.toThrow('aborted');
125
+ expect((await manager.getModels()).length).toBe(0);
126
+ });
127
+
128
+ test.sequential('download with progress callback', async () => {
129
+ const manager = new ModelManager();
130
+ await manager.clear();
131
+
132
+ let progressCalled = false;
133
+ let lastLoaded = 0;
134
+ const model = await manager.downloadModel(TINY_MODEL, {
135
+ progressCallback: ({ loaded, total }) => {
136
+ expect(loaded).toBeGreaterThan(0);
137
+ expect(total).toBeGreaterThan(0);
138
+ expect(loaded).toBeLessThanOrEqual(total);
139
+ expect(loaded).toBeGreaterThanOrEqual(lastLoaded);
140
+ progressCalled = true;
141
+ lastLoaded = loaded;
142
+ },
143
+ });
144
+
145
+ expect(progressCalled).toBe(true);
146
+ expect(model).toBeDefined();
147
+ expect(model.size).toBeGreaterThan(0);
148
+ });
149
+
150
+ test.sequential('model validation status for new model', async () => {
151
+ const manager = new ModelManager();
152
+ const model = new Model(manager, TINY_MODEL);
153
+ const status = await model.validate();
154
+ expect(status).toBe(ModelValidationStatus.INVALID);
155
+ });
156
+
157
+ test.sequential('downloadModel throws on invalid URL', async () => {
158
+ const manager = new ModelManager();
159
+ await expect(manager.downloadModel('invalid.txt')).rejects.toThrow();
160
+ });
161
+
162
+ test.sequential('model size calculation', async () => {
163
+ const manager = new ModelManager();
164
+ const model = await manager.downloadModel(TINY_MODEL);
165
+ expect(model.size).toBe(1185376);
166
+ });
167
+
168
+ test.sequential('remove model from cache', async () => {
169
+ const manager = new ModelManager();
170
+ await manager.clear();
171
+
172
+ // Download model first
173
+ const model = await manager.downloadModel(TINY_MODEL);
174
+ expect((await manager.getModels()).length).toBe(1);
175
+ expect(model.size).toBeGreaterThan(0);
176
+
177
+ // Remove model
178
+ await model.remove();
179
+ expect(model.size).toBe(-1);
180
+
181
+ // Try to open removed model
182
+ await expect(model.open()).rejects.toThrow('deleted from the cache');
183
+
184
+ // Validate removed model
185
+ const status = await model.validate();
186
+ expect(status).toBe(ModelValidationStatus.DELETED);
187
+
188
+ // Cannot see it in list of models
189
+ const models = await manager.getModels();
190
+ expect(models.find((m) => m.url === TINY_MODEL)).toBeUndefined();
191
+ });
192
+
193
+ test.sequential('clear model manager', async () => {
194
+ const manager = new ModelManager();
195
+ const model = await manager.downloadModel(TINY_MODEL);
196
+ expect(model).toBeDefined();
197
+ expect((await manager.getModels()).length).toBeGreaterThan(0);
198
+ await manager.clear();
199
+ expect((await manager.getModels()).length).toBe(0);
200
+ });