@agorapete/wllama 3.5.1-q2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/.prettierignore +38 -0
- package/AGENTS.md +1 -0
- package/CMakeLists.txt +131 -0
- package/LICENCE +21 -0
- package/README-dev.md +178 -0
- package/README.md +225 -0
- package/README_banner.png +0 -0
- package/assets/screenshot_0.png +0 -0
- package/cpp/generate_glue_prototype.js +115 -0
- package/cpp/glue.hpp +664 -0
- package/cpp/test_glue.cpp +80 -0
- package/cpp/wllama-context.h +1172 -0
- package/cpp/wllama-fs.h +148 -0
- package/cpp/wllama.cpp +187 -0
- package/cpp/wllama.h +6 -0
- package/esm/cache-manager.d.ts +130 -0
- package/esm/debug.d.ts +28 -0
- package/esm/glue/glue.d.ts +22 -0
- package/esm/glue/messages.d.ts +146 -0
- package/esm/huggingface.d.ts +31 -0
- package/esm/index.cjs +3406 -0
- package/esm/index.d.ts +8 -0
- package/esm/index.js +3387 -0
- package/esm/index.min.js +1 -0
- package/esm/index.min.js.map +1 -0
- package/esm/model-manager.d.ts +136 -0
- package/esm/storage/cos.d.ts +36 -0
- package/esm/storage/index.d.ts +33 -0
- package/esm/storage/opfs.d.ts +12 -0
- package/esm/types/oai-compat.d.ts +278 -0
- package/esm/types/types.d.ts +112 -0
- package/esm/utils.d.ts +119 -0
- package/esm/wasm/source-map.d.ts +1 -0
- package/esm/wasm/wllama.wasm +0 -0
- package/esm/wasm-from-cdn.d.ts +8 -0
- package/esm/wllama.d.ts +397 -0
- package/esm/worker.d.ts +92 -0
- package/esm/workers-code/generated.d.ts +4 -0
- package/guides/intro-v2.md +132 -0
- package/guides/intro-v3.1.md +40 -0
- package/guides/intro-v3.md +230 -0
- package/index.ts +1 -0
- package/package.json +71 -0
- package/scripts/bisect_test.sh +33 -0
- package/scripts/build_hf_space.sh +26 -0
- package/scripts/build_source_map.js +269 -0
- package/scripts/build_wasm.sh +19 -0
- package/scripts/build_worker.sh +38 -0
- package/scripts/check_debug_build.js +30 -0
- package/scripts/check_package_size.js +25 -0
- package/scripts/docker-compose.yml +76 -0
- package/scripts/generate_wasm_from_cdn.js +24 -0
- package/scripts/http_server.js +44 -0
- package/scripts/post_build.sh +32 -0
- package/src/cache-manager.ts +358 -0
- package/src/debug.ts +111 -0
- package/src/glue/glue.ts +291 -0
- package/src/glue/messages.ts +773 -0
- package/src/huggingface.ts +151 -0
- package/src/index.ts +8 -0
- package/src/mjs.test.ts +44 -0
- package/src/model-manager.test.ts +200 -0
- package/src/model-manager.ts +359 -0
- package/src/storage/cos.test.ts +83 -0
- package/src/storage/cos.ts +171 -0
- package/src/storage/index.ts +40 -0
- package/src/storage/opfs.ts +119 -0
- package/src/types/oai-compat.ts +342 -0
- package/src/types/types.ts +133 -0
- package/src/utils.test.ts +231 -0
- package/src/utils.ts +403 -0
- package/src/wasm/source-map.ts +7 -0
- package/src/wasm/wllama.js +1 -0
- package/src/wasm/wllama.wasm +0 -0
- package/src/wasm-from-cdn.ts +13 -0
- package/src/wllama.test.ts +392 -0
- package/src/wllama.ts +1138 -0
- package/src/wllama.wgpu.test.ts +62 -0
- package/src/worker.ts +443 -0
- package/src/workers-code/generated.ts +11 -0
- package/src/workers-code/llama-cpp.js +511 -0
- package/src/workers-code/opfs-utils.js +150 -0
- package/tsconfig.build.json +34 -0
- package/tsup.config.ts +23 -0
- package/vitest.config.ts +61 -0
package/cpp/wllama-fs.h
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#ifdef __EMSCRIPTEN__
|
|
4
|
+
#include <emscripten/emscripten.h>
|
|
5
|
+
#endif
|
|
6
|
+
|
|
7
|
+
#include <algorithm>
|
|
8
|
+
#include <map>
|
|
9
|
+
#include <vector>
|
|
10
|
+
#include <cstring>
|
|
11
|
+
|
|
12
|
+
static std::map<FILE *, std::string> s_file_path_map;
|
|
13
|
+
|
|
14
|
+
namespace wllama_fs
|
|
15
|
+
{
|
|
16
|
+
bool ready = false;
|
|
17
|
+
bool use_async = false;
|
|
18
|
+
|
|
19
|
+
static const size_t CACHE_SIZE = 1024 * 1024; // 1 MB read-ahead
|
|
20
|
+
|
|
21
|
+
std::vector<uint8_t> cache_data;
|
|
22
|
+
size_t cache_start = 0;
|
|
23
|
+
FILE *cache_file = nullptr;
|
|
24
|
+
|
|
25
|
+
void make_sure_ready()
|
|
26
|
+
{
|
|
27
|
+
if (ready)
|
|
28
|
+
return;
|
|
29
|
+
use_async = getenv("USE_ASYNC_FILE") != nullptr;
|
|
30
|
+
ready = true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
size_t try_cache(FILE *f, char *ptr, size_t req_bytes, size_t fpos)
|
|
34
|
+
{
|
|
35
|
+
if (f != cache_file || cache_data.empty())
|
|
36
|
+
return 0;
|
|
37
|
+
if (fpos >= cache_start && fpos + req_bytes <= cache_start + cache_data.size())
|
|
38
|
+
{
|
|
39
|
+
memcpy(ptr, cache_data.data() + (fpos - cache_start), req_bytes);
|
|
40
|
+
return req_bytes;
|
|
41
|
+
}
|
|
42
|
+
return 0;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Thin stub — real implementation lives in llama-cpp.js to avoid
|
|
47
|
+
// C++ formatter mangling the JS syntax inside EM_ASYNC_JS macros.
|
|
48
|
+
|
|
49
|
+
EM_ASYNC_JS(size_t, js_file_read, (const char *path_ptr, size_t offset, size_t req_size, void *out_ptr), {
|
|
50
|
+
return await _wllama_js_file_read(UTF8ToString(Number(path_ptr)), Number(offset), Number(req_size), Number(out_ptr));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
extern "C"
|
|
54
|
+
{
|
|
55
|
+
FILE *__real_fopen(const char *path, const char *mode);
|
|
56
|
+
int __real_fclose(FILE *f);
|
|
57
|
+
size_t __real_fread(void *ptr, size_t size, size_t nmemb, FILE *f);
|
|
58
|
+
int __real_fseek(FILE *f, long offset, int whence);
|
|
59
|
+
long __real_ftell(FILE *f);
|
|
60
|
+
|
|
61
|
+
FILE *__wrap_fopen(const char *path, const char *mode)
|
|
62
|
+
{
|
|
63
|
+
wllama_fs::make_sure_ready();
|
|
64
|
+
FILE *f = __real_fopen(path, mode);
|
|
65
|
+
if (f)
|
|
66
|
+
{
|
|
67
|
+
s_file_path_map[f] = path;
|
|
68
|
+
}
|
|
69
|
+
return f;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
int __wrap_fclose(FILE *f)
|
|
73
|
+
{
|
|
74
|
+
if (wllama_fs::cache_file == f)
|
|
75
|
+
{
|
|
76
|
+
wllama_fs::cache_file = nullptr;
|
|
77
|
+
wllama_fs::cache_data.clear();
|
|
78
|
+
}
|
|
79
|
+
s_file_path_map.erase(f);
|
|
80
|
+
return __real_fclose(f);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
int __wrap_fseek(FILE *f, long offset, int whence)
|
|
84
|
+
{
|
|
85
|
+
return __real_fseek(f, offset, whence);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
long __wrap_ftell(FILE *f)
|
|
89
|
+
{
|
|
90
|
+
return __real_ftell(f);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
size_t __wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *f)
|
|
94
|
+
{
|
|
95
|
+
wllama_fs::make_sure_ready();
|
|
96
|
+
if (!wllama_fs::use_async)
|
|
97
|
+
return __real_fread(ptr, size, nmemb, f);
|
|
98
|
+
|
|
99
|
+
auto nit = s_file_path_map.find(f);
|
|
100
|
+
if (nit == s_file_path_map.end())
|
|
101
|
+
return __real_fread(ptr, size, nmemb, f);
|
|
102
|
+
|
|
103
|
+
size_t req_bytes = size * nmemb;
|
|
104
|
+
if (req_bytes == 0)
|
|
105
|
+
return 0;
|
|
106
|
+
|
|
107
|
+
size_t fpos = (size_t)__real_ftell(f);
|
|
108
|
+
|
|
109
|
+
// Large reads (>= 1 MB): write directly into ptr, skip cache entirely.
|
|
110
|
+
if (req_bytes >= wllama_fs::CACHE_SIZE)
|
|
111
|
+
{
|
|
112
|
+
size_t actual = (size_t)js_file_read(
|
|
113
|
+
nit->second.c_str(), fpos, req_bytes, ptr);
|
|
114
|
+
if (actual == 0)
|
|
115
|
+
return 0;
|
|
116
|
+
size_t copy_bytes = std::min(req_bytes, actual);
|
|
117
|
+
__real_fseek(f, fpos + copy_bytes, SEEK_SET);
|
|
118
|
+
return copy_bytes / size;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Small reads: try cache first.
|
|
122
|
+
size_t cached = wllama_fs::try_cache(f, (char *)ptr, req_bytes, fpos);
|
|
123
|
+
if (cached == req_bytes)
|
|
124
|
+
{
|
|
125
|
+
__real_fseek(f, fpos + req_bytes, SEEK_SET);
|
|
126
|
+
return nmemb;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Cache miss: fetch a full CACHE_SIZE block from main thread.
|
|
130
|
+
wllama_fs::cache_data.resize(wllama_fs::CACHE_SIZE);
|
|
131
|
+
size_t actual = (size_t)js_file_read(
|
|
132
|
+
nit->second.c_str(), fpos, wllama_fs::CACHE_SIZE,
|
|
133
|
+
wllama_fs::cache_data.data());
|
|
134
|
+
|
|
135
|
+
wllama_fs::cache_data.resize(actual);
|
|
136
|
+
wllama_fs::cache_file = f;
|
|
137
|
+
wllama_fs::cache_start = fpos;
|
|
138
|
+
|
|
139
|
+
if (actual == 0)
|
|
140
|
+
return 0;
|
|
141
|
+
|
|
142
|
+
size_t copy_bytes = std::min(req_bytes, actual);
|
|
143
|
+
memcpy(ptr, wllama_fs::cache_data.data(), copy_bytes);
|
|
144
|
+
__real_fseek(f, fpos + copy_bytes, SEEK_SET);
|
|
145
|
+
|
|
146
|
+
return copy_bytes / size;
|
|
147
|
+
}
|
|
148
|
+
}
|
package/cpp/wllama.cpp
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <sstream>
|
|
5
|
+
#include <stdio.h>
|
|
6
|
+
|
|
7
|
+
#include <stdlib.h>
|
|
8
|
+
#include <unistd.h>
|
|
9
|
+
|
|
10
|
+
#ifdef __EMSCRIPTEN__
|
|
11
|
+
#include <malloc.h>
|
|
12
|
+
#include <emscripten/emscripten.h>
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
// #define GLUE_DEBUG(...) fprintf(stderr, "@@ERROR@@" __VA_ARGS__)
|
|
16
|
+
|
|
17
|
+
#include "llama.h"
|
|
18
|
+
#include "wllama-context.h"
|
|
19
|
+
#include "wllama-fs.h"
|
|
20
|
+
#include "wllama.h"
|
|
21
|
+
|
|
22
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
23
|
+
|
|
24
|
+
#define WLLAMA_ACTION(name) \
|
|
25
|
+
else if (action == #name) \
|
|
26
|
+
{ \
|
|
27
|
+
auto res = app.action_##name(req_raw); \
|
|
28
|
+
res.handler.serialize(output_buffer); \
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void llama_log_callback_logTee(ggml_log_level level, const char *text, void *user_data)
|
|
32
|
+
{
|
|
33
|
+
(void)user_data;
|
|
34
|
+
const char *lvl = "@@DEBUG";
|
|
35
|
+
size_t len = strlen(text);
|
|
36
|
+
if (len == 0 || text[len - 1] != '\n')
|
|
37
|
+
{
|
|
38
|
+
// do not print if the line does not terminate with \n
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
if (level == GGML_LOG_LEVEL_ERROR)
|
|
42
|
+
{
|
|
43
|
+
lvl = "@@ERROR";
|
|
44
|
+
}
|
|
45
|
+
else if (level == GGML_LOG_LEVEL_WARN)
|
|
46
|
+
{
|
|
47
|
+
lvl = "@@WARN";
|
|
48
|
+
}
|
|
49
|
+
else if (level == GGML_LOG_LEVEL_INFO)
|
|
50
|
+
{
|
|
51
|
+
lvl = "@@INFO";
|
|
52
|
+
}
|
|
53
|
+
fprintf(stderr, "%s@@%s", lvl, text);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static void printStr(ggml_log_level level, const char *text)
|
|
57
|
+
{
|
|
58
|
+
std::string str = std::string(text) + "\n";
|
|
59
|
+
llama_log_callback_logTee(level, str.c_str(), nullptr);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
static glue_outbuf output_buffer;
|
|
63
|
+
static wllama_context app;
|
|
64
|
+
|
|
65
|
+
static std::vector<char> input_buffer;
|
|
66
|
+
// second argument is dummy
|
|
67
|
+
extern "C" const char *wllama_malloc(size_t size, uint32_t)
|
|
68
|
+
{
|
|
69
|
+
if (input_buffer.size() < size)
|
|
70
|
+
{
|
|
71
|
+
input_buffer.resize(size);
|
|
72
|
+
}
|
|
73
|
+
return input_buffer.data();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
extern "C" const char *wllama_start()
|
|
77
|
+
{
|
|
78
|
+
try
|
|
79
|
+
{
|
|
80
|
+
llama_backend_init();
|
|
81
|
+
// std::cerr << llama_print_system_info() << "\n";
|
|
82
|
+
llama_log_set(llama_log_callback_logTee, nullptr);
|
|
83
|
+
wllama_malloc(1024, 0);
|
|
84
|
+
|
|
85
|
+
wllama_fs::make_sure_ready();
|
|
86
|
+
if (wllama_fs::use_async)
|
|
87
|
+
{
|
|
88
|
+
printStr(GGML_LOG_LEVEL_INFO, "Using async file read");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return "{\"success\":true}";
|
|
92
|
+
}
|
|
93
|
+
catch (std::exception &e)
|
|
94
|
+
{
|
|
95
|
+
printStr(GGML_LOG_LEVEL_ERROR, e.what());
|
|
96
|
+
return "{\"error\":true}";
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
extern "C" const char *wllama_action(const char *name, const char *req_raw)
|
|
101
|
+
{
|
|
102
|
+
try
|
|
103
|
+
{
|
|
104
|
+
std::string action(name);
|
|
105
|
+
|
|
106
|
+
if (action.empty())
|
|
107
|
+
{
|
|
108
|
+
printStr(GGML_LOG_LEVEL_ERROR, "Empty action");
|
|
109
|
+
abort();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
WLLAMA_ACTION(load)
|
|
113
|
+
WLLAMA_ACTION(completion)
|
|
114
|
+
WLLAMA_ACTION(embedding)
|
|
115
|
+
WLLAMA_ACTION(rerank)
|
|
116
|
+
WLLAMA_ACTION(get_result)
|
|
117
|
+
WLLAMA_ACTION(test_backend_ops)
|
|
118
|
+
|
|
119
|
+
else
|
|
120
|
+
{
|
|
121
|
+
printStr(GGML_LOG_LEVEL_ERROR, (std::string("Unknown action: ") + name).c_str());
|
|
122
|
+
abort();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// length of response is written inside input_buffer
|
|
126
|
+
uint32_t *output_len = (uint32_t *)req_raw;
|
|
127
|
+
output_len[0] = output_buffer.data.size();
|
|
128
|
+
return output_buffer.data.data();
|
|
129
|
+
}
|
|
130
|
+
catch (std::exception &e)
|
|
131
|
+
{
|
|
132
|
+
printStr(GGML_LOG_LEVEL_ERROR, e.what());
|
|
133
|
+
return nullptr;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
extern "C" const char *wllama_exit()
|
|
138
|
+
{
|
|
139
|
+
try
|
|
140
|
+
{
|
|
141
|
+
// app.unload();
|
|
142
|
+
llama_backend_free();
|
|
143
|
+
return "{\"success\":true}";
|
|
144
|
+
}
|
|
145
|
+
catch (std::exception &e)
|
|
146
|
+
{
|
|
147
|
+
printStr(GGML_LOG_LEVEL_ERROR, e.what());
|
|
148
|
+
return "{\"error\":true}";
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
extern "C" const char *wllama_debug()
|
|
153
|
+
{
|
|
154
|
+
auto get_mem_total = [&]()
|
|
155
|
+
{
|
|
156
|
+
#ifdef __EMSCRIPTEN__
|
|
157
|
+
return EM_ASM_INT(return HEAP8.length);
|
|
158
|
+
#else
|
|
159
|
+
return 0;
|
|
160
|
+
#endif
|
|
161
|
+
};
|
|
162
|
+
auto get_mem_free = [&]()
|
|
163
|
+
{
|
|
164
|
+
#ifdef __EMSCRIPTEN__
|
|
165
|
+
auto i = mallinfo();
|
|
166
|
+
size_t total_mem = get_mem_total();
|
|
167
|
+
size_t dynamic_top = (size_t)sbrk(0);
|
|
168
|
+
return total_mem - dynamic_top + i.fordblks;
|
|
169
|
+
#else
|
|
170
|
+
return 0;
|
|
171
|
+
#endif
|
|
172
|
+
};
|
|
173
|
+
/*json res = json{
|
|
174
|
+
{"mem_total_MB", get_mem_total() / 1024 / 1024},
|
|
175
|
+
{"mem_free_MB", get_mem_free() / 1024 / 1024},
|
|
176
|
+
{"mem_used_MB", (get_mem_total() - get_mem_free()) / 1024 / 1024},
|
|
177
|
+
};
|
|
178
|
+
result = std::string(res.dump());
|
|
179
|
+
return result.c_str();*/
|
|
180
|
+
return nullptr;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
int main()
|
|
184
|
+
{
|
|
185
|
+
std::cerr << "Unused\n";
|
|
186
|
+
return 0;
|
|
187
|
+
}
|
package/cpp/wllama.h
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import type { DownloadProgressCallback } from './model-manager';
|
|
2
|
+
import type { StorageBackend } from './storage/index';
|
|
3
|
+
export type DownloadOptions = {
|
|
4
|
+
/**
|
|
5
|
+
* Callback function to track download progress
|
|
6
|
+
*/
|
|
7
|
+
progressCallback?: DownloadProgressCallback;
|
|
8
|
+
/**
|
|
9
|
+
* Additional metadata to be stored with the downloaded file
|
|
10
|
+
*/
|
|
11
|
+
metadataAdditional?: Record<string, any>;
|
|
12
|
+
/**
|
|
13
|
+
* Custom headers for the request. Useful for authentication (e.g. Bearer token)
|
|
14
|
+
*/
|
|
15
|
+
headers?: Record<string, string>;
|
|
16
|
+
/**
|
|
17
|
+
* Abort signal for the request
|
|
18
|
+
*/
|
|
19
|
+
signal?: AbortSignal;
|
|
20
|
+
};
|
|
21
|
+
export declare const POLYFILL_ETAG = "polyfill_for_older_version";
|
|
22
|
+
export interface CacheEntry {
|
|
23
|
+
/**
|
|
24
|
+
* Storage key for this file, in the format: `${hashSHA1(fullURL)}_${fileName}`
|
|
25
|
+
*/
|
|
26
|
+
name: string;
|
|
27
|
+
/**
|
|
28
|
+
* Size of file (in bytes)
|
|
29
|
+
*/
|
|
30
|
+
size: number;
|
|
31
|
+
/**
|
|
32
|
+
* Other metadata
|
|
33
|
+
*/
|
|
34
|
+
metadata: CacheEntryMetadata;
|
|
35
|
+
}
|
|
36
|
+
export interface CacheEntryMetadata {
|
|
37
|
+
/**
|
|
38
|
+
* ETag header from remote request
|
|
39
|
+
* https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
|
|
40
|
+
*/
|
|
41
|
+
etag: string;
|
|
42
|
+
/**
|
|
43
|
+
* Remote file size (in bytes), used for integrity check
|
|
44
|
+
*/
|
|
45
|
+
originalSize: number;
|
|
46
|
+
/**
|
|
47
|
+
* Original URL of the remote model. Unused for now
|
|
48
|
+
*/
|
|
49
|
+
originalURL: string;
|
|
50
|
+
/**
|
|
51
|
+
* URL to mmproj file, if exists
|
|
52
|
+
*/
|
|
53
|
+
mmprojURL?: string | undefined;
|
|
54
|
+
/**
|
|
55
|
+
* Optional SHA256, mostly used by COS backend
|
|
56
|
+
*/
|
|
57
|
+
sha256?: string | undefined;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Manages cached model files, backed by a pluggable StorageBackend.
|
|
61
|
+
*
|
|
62
|
+
* Defaults to OPFS (Origin Private File System).
|
|
63
|
+
*/
|
|
64
|
+
export declare class CacheManager {
|
|
65
|
+
private sb;
|
|
66
|
+
/**
|
|
67
|
+
* @param backends Array of storage backends to use, in order of preference ; if first is available, use it, otherwise try the next one.
|
|
68
|
+
*/
|
|
69
|
+
constructor(backends?: StorageBackend[]);
|
|
70
|
+
/**
|
|
71
|
+
* Convert a given URL into a storage key.
|
|
72
|
+
*
|
|
73
|
+
* Format: `${hashSHA1(fullURL)}_${fileName}`
|
|
74
|
+
*/
|
|
75
|
+
getNameFromURL(url: string): Promise<string>;
|
|
76
|
+
/**
|
|
77
|
+
* @deprecated Use `download()` instead
|
|
78
|
+
*
|
|
79
|
+
* Write a new file to cache. This will overwrite existing file.
|
|
80
|
+
*
|
|
81
|
+
* @param name The file name returned by `getNameFromURL()` or `list()`
|
|
82
|
+
*/
|
|
83
|
+
write(name: string, stream: ReadableStream, metadata: CacheEntryMetadata): Promise<void>;
|
|
84
|
+
download(url: string, options?: DownloadOptions): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Open a file in cache for reading
|
|
87
|
+
*
|
|
88
|
+
* @param nameOrURL The file name returned by `getNameFromURL()` or `list()`, or the original URL of the remote file
|
|
89
|
+
* @returns Blob, or null if file does not exist
|
|
90
|
+
*/
|
|
91
|
+
open(nameOrURL: string): Promise<Blob | null>;
|
|
92
|
+
/**
|
|
93
|
+
* Get the size of a file in stored cache
|
|
94
|
+
*
|
|
95
|
+
* NOTE: in case the download is stopped mid-way (i.e. user close browser tab), the file maybe corrupted, size maybe different from `metadata.originalSize`
|
|
96
|
+
*
|
|
97
|
+
* @param name The file name returned by `getNameFromURL()` or `list()`
|
|
98
|
+
* @returns number of bytes, or -1 if file does not exist
|
|
99
|
+
*/
|
|
100
|
+
getSize(name: string): Promise<number>;
|
|
101
|
+
/**
|
|
102
|
+
* Get metadata of a cached file
|
|
103
|
+
*/
|
|
104
|
+
getMetadata(name: string): Promise<CacheEntryMetadata | null>;
|
|
105
|
+
/**
|
|
106
|
+
* List all files currently in cache
|
|
107
|
+
*/
|
|
108
|
+
list(): Promise<CacheEntry[]>;
|
|
109
|
+
/**
|
|
110
|
+
* Clear all files currently in cache
|
|
111
|
+
*/
|
|
112
|
+
clear(): Promise<void>;
|
|
113
|
+
/**
|
|
114
|
+
* Delete a single file in cache
|
|
115
|
+
*
|
|
116
|
+
* @param nameOrURL Can be either an URL or a name returned by `getNameFromURL()` or `list()`
|
|
117
|
+
*/
|
|
118
|
+
delete(nameOrURL: string): Promise<void>;
|
|
119
|
+
/**
|
|
120
|
+
* Delete multiple files in cache.
|
|
121
|
+
*
|
|
122
|
+
* @param predicate A predicate like `array.filter(item => boolean)`
|
|
123
|
+
*/
|
|
124
|
+
deleteMany(predicate: (e: CacheEntry) => boolean): Promise<void>;
|
|
125
|
+
/**
|
|
126
|
+
* Write the metadata of the file to disk.
|
|
127
|
+
*/
|
|
128
|
+
writeMetadata(name: string, metadata: CacheEntryMetadata): Promise<void>;
|
|
129
|
+
}
|
|
130
|
+
export default CacheManager;
|
package/esm/debug.d.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export declare const Debug: {
|
|
2
|
+
/**
|
|
3
|
+
* Resolves a list of wasm function indices to their cleaned symbol names.
|
|
4
|
+
*/
|
|
5
|
+
decodeFuncIds: (funcIds: number[], isCompatBuild: boolean) => Promise<{
|
|
6
|
+
funcId: number;
|
|
7
|
+
name: string;
|
|
8
|
+
}[]>;
|
|
9
|
+
/**
|
|
10
|
+
* Annotates a wasm stack trace string with resolved function names.
|
|
11
|
+
*
|
|
12
|
+
* Example input from Chrome:
|
|
13
|
+
* at http://localhost:8080/esm/wasm/wllama.wasm:wasm-function[775]:0x74251
|
|
14
|
+
* at async blob:http://localhost:8080/53a863cc-7227-45cc-8594-ddbbf5257f20:317:28
|
|
15
|
+
*
|
|
16
|
+
* Example input from Firefox:
|
|
17
|
+
* @http://localhost:8080/esm/wasm/wllama.wasm:wasm-function[796]:0x7dfe2
|
|
18
|
+
* at wModuleInit/WebAssembly.promising/< (9b6a2acd-d909-44e2-b021-d42fb9087cfb:15:32) index.js:1433:45
|
|
19
|
+
*
|
|
20
|
+
* Example input from Safari:
|
|
21
|
+
* 2441@wasm-function[2441]
|
|
22
|
+
* at wrapper (d746f19e-4523-4f36-ba06-d0969acc0b05:22:126009)
|
|
23
|
+
*
|
|
24
|
+
* Example output:
|
|
25
|
+
* wasm-func[775] (server_response::send)
|
|
26
|
+
*/
|
|
27
|
+
decodeStackTrace: (stack: string, isCompatBuild: boolean) => Promise<string>;
|
|
28
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { type GlueMsg } from './messages';
|
|
2
|
+
/**
|
|
3
|
+
* Glue is a simple binary protocol for serializing and deserializing messages.
|
|
4
|
+
* It is inspired by protobuf, but much simpler.
|
|
5
|
+
*
|
|
6
|
+
* Interested in extending Glue? Open an issue on GitHub!
|
|
7
|
+
*/
|
|
8
|
+
type GlueType = 'str' | 'int' | 'float' | 'bool' | 'raw' | 'arr_str' | 'arr_int' | 'arr_float' | 'arr_bool' | 'arr_raw' | 'null';
|
|
9
|
+
export interface GlueField {
|
|
10
|
+
type: GlueType;
|
|
11
|
+
name: string;
|
|
12
|
+
isNullable: boolean;
|
|
13
|
+
}
|
|
14
|
+
export interface GlueMessageProto {
|
|
15
|
+
name: string;
|
|
16
|
+
structName: string;
|
|
17
|
+
className: string;
|
|
18
|
+
fields: GlueField[];
|
|
19
|
+
}
|
|
20
|
+
export declare function glueDeserialize(buf: Uint8Array): GlueMsg;
|
|
21
|
+
export declare function glueSerialize(msg: GlueMsg): Uint8Array;
|
|
22
|
+
export {};
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import type { GlueMessageProto } from './glue';
|
|
2
|
+
export declare const GLUE_VERSION = 1;
|
|
3
|
+
export declare const GLUE_MESSAGE_PROTOTYPES: {
|
|
4
|
+
[name: string]: GlueMessageProto;
|
|
5
|
+
};
|
|
6
|
+
export interface GlueMsgError {
|
|
7
|
+
_name: "erro_evt";
|
|
8
|
+
message: string;
|
|
9
|
+
}
|
|
10
|
+
export interface GlueMsgLoadReq {
|
|
11
|
+
_name: "load_req";
|
|
12
|
+
model_paths: string[];
|
|
13
|
+
mmproj_path?: string | undefined;
|
|
14
|
+
n_ctx_auto: boolean;
|
|
15
|
+
use_mmap: boolean;
|
|
16
|
+
use_mlock: boolean;
|
|
17
|
+
n_gpu_layers: number;
|
|
18
|
+
n_ctx: number;
|
|
19
|
+
n_threads: number;
|
|
20
|
+
model_alias?: string | undefined;
|
|
21
|
+
log_level?: number | undefined;
|
|
22
|
+
embeddings?: boolean | undefined;
|
|
23
|
+
offload_kqv?: boolean | undefined;
|
|
24
|
+
n_batch?: number | undefined;
|
|
25
|
+
n_ubatch?: number | undefined;
|
|
26
|
+
n_parallel?: number | undefined;
|
|
27
|
+
pooling_type?: string | undefined;
|
|
28
|
+
rope_scaling_type?: string | undefined;
|
|
29
|
+
rope_freq_base?: number | undefined;
|
|
30
|
+
rope_freq_scale?: number | undefined;
|
|
31
|
+
yarn_ext_factor?: number | undefined;
|
|
32
|
+
yarn_attn_factor?: number | undefined;
|
|
33
|
+
yarn_beta_fast?: number | undefined;
|
|
34
|
+
yarn_beta_slow?: number | undefined;
|
|
35
|
+
yarn_orig_ctx?: number | undefined;
|
|
36
|
+
cache_type_k?: string | undefined;
|
|
37
|
+
cache_type_v?: string | undefined;
|
|
38
|
+
kv_unified?: boolean | undefined;
|
|
39
|
+
flash_attn?: boolean | undefined;
|
|
40
|
+
swa_full?: boolean | undefined;
|
|
41
|
+
n_ctx_checkpoints?: number | undefined;
|
|
42
|
+
checkpoint_min_step?: number | undefined;
|
|
43
|
+
chat_template?: string | undefined;
|
|
44
|
+
jinja?: boolean | undefined;
|
|
45
|
+
default_template_kwargs_keys?: string[] | undefined;
|
|
46
|
+
default_template_kwargs_vals?: string[] | undefined;
|
|
47
|
+
reasoning?: boolean | undefined;
|
|
48
|
+
image_min_tokens?: number | undefined;
|
|
49
|
+
image_max_tokens?: number | undefined;
|
|
50
|
+
warmup?: boolean | undefined;
|
|
51
|
+
no_kv_offload?: boolean | undefined;
|
|
52
|
+
mmproj_offload?: boolean | undefined;
|
|
53
|
+
cont_batching?: boolean | undefined;
|
|
54
|
+
n_keep?: number | undefined;
|
|
55
|
+
ctx_shift?: boolean | undefined;
|
|
56
|
+
cache_idle_slots?: boolean | undefined;
|
|
57
|
+
n_cache_reuse?: number | undefined;
|
|
58
|
+
lora_paths?: string[] | undefined;
|
|
59
|
+
lora_scales?: number[] | undefined;
|
|
60
|
+
lora_init_without_apply?: boolean | undefined;
|
|
61
|
+
spec_draft_model?: string | undefined;
|
|
62
|
+
spec_draft_ngl?: number | undefined;
|
|
63
|
+
spec_draft_n_max?: number | undefined;
|
|
64
|
+
spec_draft_n_min?: number | undefined;
|
|
65
|
+
spec_draft_p_min?: number | undefined;
|
|
66
|
+
spec_draft_threads?: number | undefined;
|
|
67
|
+
spec_draft_threads_batch?: number | undefined;
|
|
68
|
+
kv_overrides_keys?: string[] | undefined;
|
|
69
|
+
kv_overrides_vals?: string[] | undefined;
|
|
70
|
+
reasoning_budget_tokens?: number | undefined;
|
|
71
|
+
reasoning_budget_message?: string | undefined;
|
|
72
|
+
reasoning_format?: string | undefined;
|
|
73
|
+
skip_chat_parsing?: boolean | undefined;
|
|
74
|
+
prefill_assistant?: boolean | undefined;
|
|
75
|
+
}
|
|
76
|
+
export interface GlueMsgLoadRes {
|
|
77
|
+
_name: "load_res";
|
|
78
|
+
success: boolean;
|
|
79
|
+
n_ctx: number;
|
|
80
|
+
n_batch: number;
|
|
81
|
+
n_ubatch: number;
|
|
82
|
+
n_vocab: number;
|
|
83
|
+
n_ctx_train: number;
|
|
84
|
+
n_embd: number;
|
|
85
|
+
n_layer: number;
|
|
86
|
+
metadata_key: string[];
|
|
87
|
+
metadata_val: string[];
|
|
88
|
+
token_bos: number;
|
|
89
|
+
token_eos: number;
|
|
90
|
+
token_eot: number;
|
|
91
|
+
list_tokens_eog: number[];
|
|
92
|
+
add_bos_token: boolean;
|
|
93
|
+
add_eos_token: boolean;
|
|
94
|
+
has_encoder: boolean;
|
|
95
|
+
token_decoder_start: number;
|
|
96
|
+
media_marker: string;
|
|
97
|
+
has_image_input: boolean;
|
|
98
|
+
has_audio_input: boolean;
|
|
99
|
+
}
|
|
100
|
+
export interface GlueMsgCompletionReq {
|
|
101
|
+
_name: "cmpl_req";
|
|
102
|
+
is_chat: boolean;
|
|
103
|
+
data_json: string;
|
|
104
|
+
files: Uint8Array[];
|
|
105
|
+
}
|
|
106
|
+
export interface GlueMsgCompletionRes {
|
|
107
|
+
_name: "cmpl_res";
|
|
108
|
+
success: boolean;
|
|
109
|
+
}
|
|
110
|
+
export interface GlueMsgEmbeddingReq {
|
|
111
|
+
_name: "embd_req";
|
|
112
|
+
data_json: string;
|
|
113
|
+
files: Uint8Array[];
|
|
114
|
+
}
|
|
115
|
+
export interface GlueMsgEmbeddingRes {
|
|
116
|
+
_name: "embd_res";
|
|
117
|
+
success: boolean;
|
|
118
|
+
}
|
|
119
|
+
export interface GlueMsgRerankReq {
|
|
120
|
+
_name: "rrnk_req";
|
|
121
|
+
data_json: string;
|
|
122
|
+
}
|
|
123
|
+
export interface GlueMsgRerankRes {
|
|
124
|
+
_name: "rrnk_res";
|
|
125
|
+
success: boolean;
|
|
126
|
+
}
|
|
127
|
+
export interface GlueMsgGetResultReq {
|
|
128
|
+
_name: "gres_req";
|
|
129
|
+
}
|
|
130
|
+
export interface GlueMsgGetResultRes {
|
|
131
|
+
_name: "gres_res";
|
|
132
|
+
success: boolean;
|
|
133
|
+
has_more: boolean;
|
|
134
|
+
is_error: boolean;
|
|
135
|
+
data_json: string;
|
|
136
|
+
}
|
|
137
|
+
export interface GlueMsgTestBackendOpsReq {
|
|
138
|
+
_name: "tbop_req";
|
|
139
|
+
args: string[];
|
|
140
|
+
}
|
|
141
|
+
export interface GlueMsgTestBackendOpsRes {
|
|
142
|
+
_name: "tbop_res";
|
|
143
|
+
retcode: number;
|
|
144
|
+
success: boolean;
|
|
145
|
+
}
|
|
146
|
+
export type GlueMsg = GlueMsgError | GlueMsgLoadReq | GlueMsgLoadRes | GlueMsgCompletionReq | GlueMsgCompletionRes | GlueMsgEmbeddingReq | GlueMsgEmbeddingRes | GlueMsgRerankReq | GlueMsgRerankRes | GlueMsgGetResultReq | GlueMsgGetResultRes | GlueMsgTestBackendOpsReq | GlueMsgTestBackendOpsRes;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { type ModelSource } from './model-manager';
|
|
2
|
+
export interface HuggingFaceParams {
|
|
3
|
+
/**
|
|
4
|
+
* The repo name, e.g. user/model
|
|
5
|
+
*/
|
|
6
|
+
repo: string;
|
|
7
|
+
/**
|
|
8
|
+
* The file name or path to file in the repo. Only file or quant is needed.
|
|
9
|
+
*/
|
|
10
|
+
file?: string;
|
|
11
|
+
/**
|
|
12
|
+
* The GGUF quantization name, e.g. Q4_K_M, Q8_0, etc. Only file or quant is needed.
|
|
13
|
+
*
|
|
14
|
+
* By default, Q4_K_M will be used, then fallback to Q8_0, and finally the non-quantized version if no quantized version is found.
|
|
15
|
+
*/
|
|
16
|
+
quant?: string;
|
|
17
|
+
/**
|
|
18
|
+
* The file name or path to file in the repo for mmproj. Only mmprojFile or mmprojQuant is needed.
|
|
19
|
+
*/
|
|
20
|
+
mmprojFile?: string;
|
|
21
|
+
/**
|
|
22
|
+
* The GGUF quantization name for mmproj, e.g. Q4_K_M, Q8_0, etc. Only mmprojFile or mmprojQuant is needed.
|
|
23
|
+
*/
|
|
24
|
+
mmprojQuant?: string;
|
|
25
|
+
/**
|
|
26
|
+
* The Hugging Face token with permission to access the repo. It can be omitted if the repo is public.
|
|
27
|
+
*/
|
|
28
|
+
hfToken?: string;
|
|
29
|
+
}
|
|
30
|
+
export declare function getHFModelSource(config: HuggingFaceParams): Promise<ModelSource>;
|
|
31
|
+
export declare function getHFFileSHA256(url: string, headers: Record<string, string>): Promise<string | undefined>;
|