nuxt-edge-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +141 -0
- package/dist/module.d.mts +23 -0
- package/dist/module.json +9 -0
- package/dist/module.mjs +121 -0
- package/dist/runtime/composables/useEdgeAI.d.ts +9 -0
- package/dist/runtime/composables/useEdgeAI.js +24 -0
- package/dist/runtime/plugin.d.ts +21 -0
- package/dist/runtime/plugin.js +31 -0
- package/dist/runtime/server/api/generate.post.d.ts +2 -0
- package/dist/runtime/server/api/generate.post.js +19 -0
- package/dist/runtime/server/api/health.get.d.ts +2 -0
- package/dist/runtime/server/api/health.get.js +6 -0
- package/dist/runtime/server/api/pull.post.d.ts +2 -0
- package/dist/runtime/server/api/pull.post.js +6 -0
- package/dist/runtime/server/tsconfig.json +3 -0
- package/dist/runtime/server/utils/edge-ai-engine.d.ts +4 -0
- package/dist/runtime/server/utils/edge-ai-engine.js +270 -0
- package/dist/runtime/server/vendor/huggingface/transformers.web.d.mts +857 -0
- package/dist/runtime/server/vendor/huggingface/transformers.web.js +38740 -0
- package/dist/runtime/server/vendor/licenses/huggingface-transformers.LICENSE +202 -0
- package/dist/runtime/server/vendor/onnxruntime/onnxruntime-common.d.mts +3 -0
- package/dist/runtime/server/vendor/onnxruntime/onnxruntime-common.js +3 -0
- package/dist/runtime/server/vendor/onnxruntime/onnxruntime-web.d.mts +3 -0
- package/dist/runtime/server/vendor/onnxruntime/onnxruntime-web.js +3 -0
- package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.d.mts +2 -0
- package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.js +59 -0
- package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.wasm +0 -0
- package/dist/runtime/server/vendor/onnxruntime/ort.wasm.min.d.mts +11 -0
- package/dist/runtime/server/vendor/onnxruntime/ort.wasm.min.js +7 -0
- package/dist/runtime/types.d.ts +74 -0
- package/dist/runtime/types.js +0 -0
- package/dist/types.d.mts +5 -0
- package/package.json +74 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import { access, readFile } from "node:fs/promises";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
import { dirname, extname, join, resolve } from "node:path";
|
|
4
|
+
import { pathToFileURL } from "node:url";
|
|
5
|
+
const state = {
|
|
6
|
+
loading: false,
|
|
7
|
+
warmed: false
|
|
8
|
+
};
|
|
9
|
+
const require = createRequire(resolve(process.cwd(), "package.json"));
|
|
10
|
+
function contentTypeForExtension(extension) {
|
|
11
|
+
switch (extension) {
|
|
12
|
+
case ".json":
|
|
13
|
+
return "application/json";
|
|
14
|
+
case ".wasm":
|
|
15
|
+
return "application/wasm";
|
|
16
|
+
case ".txt":
|
|
17
|
+
return "text/plain; charset=utf-8";
|
|
18
|
+
default:
|
|
19
|
+
return "application/octet-stream";
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
function installFileFetchShim() {
|
|
23
|
+
const globalWithShim = globalThis;
|
|
24
|
+
if (globalWithShim.__nuxtEdgeAIFileFetchShim) {
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
const originalFetch = globalThis.fetch.bind(globalThis);
|
|
28
|
+
globalThis.fetch = async (input, init) => {
|
|
29
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
|
30
|
+
if (url.startsWith("file://")) {
|
|
31
|
+
const fileUrl = new URL(url);
|
|
32
|
+
const buffer = await readFile(fileUrl);
|
|
33
|
+
return new Response(buffer, {
|
|
34
|
+
headers: {
|
|
35
|
+
"content-type": contentTypeForExtension(extname(fileUrl.pathname).toLowerCase())
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
return originalFetch(input, init);
|
|
40
|
+
};
|
|
41
|
+
globalWithShim.__nuxtEdgeAIFileFetchShim = true;
|
|
42
|
+
}
|
|
43
|
+
async function resolveVendoredOnnxRuntimeBaseUrl() {
|
|
44
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
45
|
+
try {
|
|
46
|
+
const moduleEntry = require.resolve("nuxt-edge-ai");
|
|
47
|
+
candidates.add(join(dirname(moduleEntry), "runtime", "server", "vendor", "onnxruntime"));
|
|
48
|
+
} catch {
|
|
49
|
+
}
|
|
50
|
+
candidates.add(resolve(process.cwd(), "node_modules", "nuxt-edge-ai", "dist", "runtime", "server", "vendor", "onnxruntime"));
|
|
51
|
+
candidates.add(resolve(process.cwd(), "dist", "runtime", "server", "vendor", "onnxruntime"));
|
|
52
|
+
candidates.add(resolve(process.cwd(), "src", "runtime", "server", "vendor", "onnxruntime"));
|
|
53
|
+
for (const candidate of candidates) {
|
|
54
|
+
try {
|
|
55
|
+
await access(join(candidate, "ort-wasm-simd-threaded.mjs"));
|
|
56
|
+
return `${pathToFileURL(candidate).href}/`;
|
|
57
|
+
} catch {
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
throw new Error("Unable to locate vendored onnxruntime-web assets on disk.");
|
|
61
|
+
}
|
|
62
|
+
async function loadTransformersRuntime() {
|
|
63
|
+
if (state.runtime) {
|
|
64
|
+
return state.runtime;
|
|
65
|
+
}
|
|
66
|
+
if (!state.initPromise) {
|
|
67
|
+
state.initPromise = (async () => {
|
|
68
|
+
installFileFetchShim();
|
|
69
|
+
const ortModule = await import("../vendor/onnxruntime/ort.wasm.min.mjs");
|
|
70
|
+
const ort = (ortModule.InferenceSession ? ortModule : ortModule.default) ?? ortModule;
|
|
71
|
+
const globalWithOrt = globalThis;
|
|
72
|
+
globalWithOrt[Symbol.for("onnxruntime")] = ort;
|
|
73
|
+
const transformers = await import("../vendor/huggingface/transformers.web.mjs");
|
|
74
|
+
state.runtime = transformers;
|
|
75
|
+
})().catch((error) => {
|
|
76
|
+
state.lastError = error instanceof Error ? error.message : String(error);
|
|
77
|
+
state.initPromise = void 0;
|
|
78
|
+
throw error;
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
await state.initPromise;
|
|
82
|
+
return state.runtime;
|
|
83
|
+
}
|
|
84
|
+
function ensureOnnxWasmEnv(env) {
|
|
85
|
+
const envWithMutableBackends = env;
|
|
86
|
+
envWithMutableBackends.backends ??= {};
|
|
87
|
+
envWithMutableBackends.backends.onnx ??= {};
|
|
88
|
+
envWithMutableBackends.backends.onnx.wasm ??= {};
|
|
89
|
+
return envWithMutableBackends.backends.onnx.wasm;
|
|
90
|
+
}
|
|
91
|
+
function resolveModelSource(config, modelOverride) {
|
|
92
|
+
const requestedModel = modelOverride?.trim();
|
|
93
|
+
const source = requestedModel || config.model.localPath || config.model.id;
|
|
94
|
+
return {
|
|
95
|
+
id: requestedModel || config.model.id,
|
|
96
|
+
task: config.model.task,
|
|
97
|
+
localPath: config.model.localPath,
|
|
98
|
+
allowRemote: config.model.allowRemote,
|
|
99
|
+
dtype: config.model.dtype,
|
|
100
|
+
source
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
function currentEngineState(config) {
|
|
104
|
+
return {
|
|
105
|
+
active: config.runtime,
|
|
106
|
+
ready: config.runtime === "mock" || Boolean(state.pipeline),
|
|
107
|
+
warmed: config.runtime === "mock" || state.warmed,
|
|
108
|
+
loading: state.loading,
|
|
109
|
+
cacheDir: config.cacheDir,
|
|
110
|
+
lastError: state.lastError
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
async function ensureTransformersPipeline(config, modelOverride) {
|
|
114
|
+
const model = resolveModelSource(config, modelOverride);
|
|
115
|
+
const modelKey = `${model.source}::${model.dtype ?? "default"}`;
|
|
116
|
+
if (state.pipeline && state.modelKey === modelKey) {
|
|
117
|
+
return { pipeline: state.pipeline, loadedNow: false, model };
|
|
118
|
+
}
|
|
119
|
+
if (!state.pipelinePromise || state.modelKey !== modelKey) {
|
|
120
|
+
state.loading = true;
|
|
121
|
+
state.pipelinePromise = (async () => {
|
|
122
|
+
const transformers = await loadTransformersRuntime();
|
|
123
|
+
transformers.env.allowLocalModels = Boolean(config.model.localPath);
|
|
124
|
+
transformers.env.allowRemoteModels = config.model.allowRemote;
|
|
125
|
+
transformers.env.useFS = false;
|
|
126
|
+
transformers.env.useFSCache = false;
|
|
127
|
+
if (transformers.env.useFSCache) {
|
|
128
|
+
transformers.env.cacheDir = config.cacheDir;
|
|
129
|
+
}
|
|
130
|
+
if (config.model.localPath) {
|
|
131
|
+
transformers.env.localModelPath = config.model.localPath;
|
|
132
|
+
}
|
|
133
|
+
const wasmBaseUrl = await resolveVendoredOnnxRuntimeBaseUrl();
|
|
134
|
+
const onnxWasmEnv = ensureOnnxWasmEnv(transformers.env);
|
|
135
|
+
onnxWasmEnv.wasmPaths = wasmBaseUrl;
|
|
136
|
+
onnxWasmEnv.numThreads = 1;
|
|
137
|
+
onnxWasmEnv.proxy = false;
|
|
138
|
+
const ortRuntime = globalThis[Symbol.for("onnxruntime")];
|
|
139
|
+
if (ortRuntime) {
|
|
140
|
+
ortRuntime.env ??= {};
|
|
141
|
+
ortRuntime.env.wasm ??= {};
|
|
142
|
+
ortRuntime.env.wasm.wasmPaths = wasmBaseUrl;
|
|
143
|
+
ortRuntime.env.wasm.numThreads = 1;
|
|
144
|
+
ortRuntime.env.wasm.proxy = false;
|
|
145
|
+
}
|
|
146
|
+
return transformers.pipeline(config.model.task, model.source, {
|
|
147
|
+
device: "wasm",
|
|
148
|
+
dtype: model.dtype
|
|
149
|
+
});
|
|
150
|
+
})().then((pipeline2) => {
|
|
151
|
+
state.pipeline = pipeline2;
|
|
152
|
+
state.modelKey = modelKey;
|
|
153
|
+
state.warmed = true;
|
|
154
|
+
state.lastError = void 0;
|
|
155
|
+
return pipeline2;
|
|
156
|
+
}).catch((error) => {
|
|
157
|
+
state.lastError = error instanceof Error ? error.message : String(error);
|
|
158
|
+
state.pipeline = void 0;
|
|
159
|
+
state.modelKey = void 0;
|
|
160
|
+
throw error;
|
|
161
|
+
}).finally(() => {
|
|
162
|
+
state.loading = false;
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
const pipeline = await state.pipelinePromise;
|
|
166
|
+
return { pipeline, loadedNow: true, model };
|
|
167
|
+
}
|
|
168
|
+
function resolveGenerationOptions(defaults, overrides) {
|
|
169
|
+
return {
|
|
170
|
+
maxNewTokens: overrides?.maxNewTokens ?? defaults.maxNewTokens,
|
|
171
|
+
temperature: overrides?.temperature ?? defaults.temperature,
|
|
172
|
+
topP: overrides?.topP ?? defaults.topP,
|
|
173
|
+
doSample: overrides?.doSample ?? defaults.doSample,
|
|
174
|
+
repetitionPenalty: overrides?.repetitionPenalty ?? defaults.repetitionPenalty
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
function extractGeneratedText(prompt, output) {
|
|
178
|
+
const firstItem = Array.isArray(output) ? output[0] : output;
|
|
179
|
+
if (firstItem && typeof firstItem === "object" && "generated_text" in firstItem) {
|
|
180
|
+
const generatedText = String(firstItem.generated_text ?? "");
|
|
181
|
+
return generatedText.startsWith(prompt) ? generatedText.slice(prompt.length).trim() : generatedText.trim();
|
|
182
|
+
}
|
|
183
|
+
if (typeof firstItem === "string") {
|
|
184
|
+
return firstItem.trim();
|
|
185
|
+
}
|
|
186
|
+
return JSON.stringify(output);
|
|
187
|
+
}
|
|
188
|
+
function runMockInference(config, input) {
|
|
189
|
+
const generation = resolveGenerationOptions(config.model.generation, input.generation);
|
|
190
|
+
const text = [
|
|
191
|
+
`Mock runtime for "${input.model || config.model.id}".`,
|
|
192
|
+
`Prompt received: ${input.prompt}`,
|
|
193
|
+
"Switch `edgeAI.runtime` to `transformers-wasm` to run the real WASM model runtime."
|
|
194
|
+
].join(" ");
|
|
195
|
+
return {
|
|
196
|
+
text,
|
|
197
|
+
model: input.model || config.model.id,
|
|
198
|
+
runtime: "mock",
|
|
199
|
+
provider: "mock",
|
|
200
|
+
generation,
|
|
201
|
+
metrics: {
|
|
202
|
+
latencyMs: 0,
|
|
203
|
+
promptLength: input.prompt.length,
|
|
204
|
+
completionLength: text.length
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
export async function getEdgeAIHealth(config) {
|
|
209
|
+
const model = resolveModelSource(config);
|
|
210
|
+
if (config.runtime === "transformers-wasm" && config.warmup && !state.pipeline && !state.loading) {
|
|
211
|
+
try {
|
|
212
|
+
await ensureTransformersPipeline(config);
|
|
213
|
+
} catch {
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return {
|
|
217
|
+
status: "ok",
|
|
218
|
+
runtime: config.runtime,
|
|
219
|
+
model,
|
|
220
|
+
defaults: config.model.generation,
|
|
221
|
+
engine: currentEngineState(config)
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
export async function pullEdgeAIModel(config) {
|
|
225
|
+
if (config.runtime === "mock") {
|
|
226
|
+
return {
|
|
227
|
+
status: "ready",
|
|
228
|
+
runtime: "mock",
|
|
229
|
+
model: resolveModelSource(config),
|
|
230
|
+
engine: currentEngineState(config),
|
|
231
|
+
loadedNow: false
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
const { loadedNow, model } = await ensureTransformersPipeline(config);
|
|
235
|
+
return {
|
|
236
|
+
status: "ready",
|
|
237
|
+
runtime: "transformers-wasm",
|
|
238
|
+
model,
|
|
239
|
+
engine: currentEngineState(config),
|
|
240
|
+
loadedNow
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
export async function generateEdgeAIText(config, input) {
|
|
244
|
+
if (config.runtime === "mock") {
|
|
245
|
+
return runMockInference(config, input);
|
|
246
|
+
}
|
|
247
|
+
const generation = resolveGenerationOptions(config.model.generation, input.generation);
|
|
248
|
+
const start = performance.now();
|
|
249
|
+
const { pipeline, model } = await ensureTransformersPipeline(config, input.model);
|
|
250
|
+
const output = await pipeline(input.prompt, {
|
|
251
|
+
max_new_tokens: generation.maxNewTokens,
|
|
252
|
+
temperature: generation.temperature,
|
|
253
|
+
top_p: generation.topP,
|
|
254
|
+
do_sample: generation.doSample,
|
|
255
|
+
repetition_penalty: generation.repetitionPenalty
|
|
256
|
+
});
|
|
257
|
+
const text = extractGeneratedText(input.prompt, output);
|
|
258
|
+
return {
|
|
259
|
+
text,
|
|
260
|
+
model: model.id,
|
|
261
|
+
runtime: "transformers-wasm",
|
|
262
|
+
provider: "transformers.js-wasm",
|
|
263
|
+
generation,
|
|
264
|
+
metrics: {
|
|
265
|
+
latencyMs: Number((performance.now() - start).toFixed(2)),
|
|
266
|
+
promptLength: input.prompt.length,
|
|
267
|
+
completionLength: text.length
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
}
|