nuxt-edge-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +141 -0
  3. package/dist/module.d.mts +23 -0
  4. package/dist/module.json +9 -0
  5. package/dist/module.mjs +121 -0
  6. package/dist/runtime/composables/useEdgeAI.d.ts +9 -0
  7. package/dist/runtime/composables/useEdgeAI.js +24 -0
  8. package/dist/runtime/plugin.d.ts +21 -0
  9. package/dist/runtime/plugin.js +31 -0
  10. package/dist/runtime/server/api/generate.post.d.ts +2 -0
  11. package/dist/runtime/server/api/generate.post.js +19 -0
  12. package/dist/runtime/server/api/health.get.d.ts +2 -0
  13. package/dist/runtime/server/api/health.get.js +6 -0
  14. package/dist/runtime/server/api/pull.post.d.ts +2 -0
  15. package/dist/runtime/server/api/pull.post.js +6 -0
  16. package/dist/runtime/server/tsconfig.json +3 -0
  17. package/dist/runtime/server/utils/edge-ai-engine.d.ts +4 -0
  18. package/dist/runtime/server/utils/edge-ai-engine.js +270 -0
  19. package/dist/runtime/server/vendor/huggingface/transformers.web.d.mts +857 -0
  20. package/dist/runtime/server/vendor/huggingface/transformers.web.js +38740 -0
  21. package/dist/runtime/server/vendor/licenses/huggingface-transformers.LICENSE +202 -0
  22. package/dist/runtime/server/vendor/onnxruntime/onnxruntime-common.d.mts +3 -0
  23. package/dist/runtime/server/vendor/onnxruntime/onnxruntime-common.js +3 -0
  24. package/dist/runtime/server/vendor/onnxruntime/onnxruntime-web.d.mts +3 -0
  25. package/dist/runtime/server/vendor/onnxruntime/onnxruntime-web.js +3 -0
  26. package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.d.mts +2 -0
  27. package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.js +59 -0
  28. package/dist/runtime/server/vendor/onnxruntime/ort-wasm-simd-threaded.wasm +0 -0
  29. package/dist/runtime/server/vendor/onnxruntime/ort.wasm.min.d.mts +11 -0
  30. package/dist/runtime/server/vendor/onnxruntime/ort.wasm.min.js +7 -0
  31. package/dist/runtime/types.d.ts +74 -0
  32. package/dist/runtime/types.js +0 -0
  33. package/dist/types.d.mts +5 -0
  34. package/package.json +74 -0
@@ -0,0 +1,270 @@
1
+ import { access, readFile } from "node:fs/promises";
2
+ import { createRequire } from "node:module";
3
+ import { dirname, extname, join, resolve } from "node:path";
4
+ import { pathToFileURL } from "node:url";
5
+ const state = {
6
+ loading: false,
7
+ warmed: false
8
+ };
9
+ const require = createRequire(resolve(process.cwd(), "package.json"));
10
+ function contentTypeForExtension(extension) {
11
+ switch (extension) {
12
+ case ".json":
13
+ return "application/json";
14
+ case ".wasm":
15
+ return "application/wasm";
16
+ case ".txt":
17
+ return "text/plain; charset=utf-8";
18
+ default:
19
+ return "application/octet-stream";
20
+ }
21
+ }
22
+ function installFileFetchShim() {
23
+ const globalWithShim = globalThis;
24
+ if (globalWithShim.__nuxtEdgeAIFileFetchShim) {
25
+ return;
26
+ }
27
+ const originalFetch = globalThis.fetch.bind(globalThis);
28
+ globalThis.fetch = async (input, init) => {
29
+ const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
30
+ if (url.startsWith("file://")) {
31
+ const fileUrl = new URL(url);
32
+ const buffer = await readFile(fileUrl);
33
+ return new Response(buffer, {
34
+ headers: {
35
+ "content-type": contentTypeForExtension(extname(fileUrl.pathname).toLowerCase())
36
+ }
37
+ });
38
+ }
39
+ return originalFetch(input, init);
40
+ };
41
+ globalWithShim.__nuxtEdgeAIFileFetchShim = true;
42
+ }
43
+ async function resolveVendoredOnnxRuntimeBaseUrl() {
44
+ const candidates = /* @__PURE__ */ new Set();
45
+ try {
46
+ const moduleEntry = require.resolve("nuxt-edge-ai");
47
+ candidates.add(join(dirname(moduleEntry), "runtime", "server", "vendor", "onnxruntime"));
48
+ } catch {
49
+ }
50
+ candidates.add(resolve(process.cwd(), "node_modules", "nuxt-edge-ai", "dist", "runtime", "server", "vendor", "onnxruntime"));
51
+ candidates.add(resolve(process.cwd(), "dist", "runtime", "server", "vendor", "onnxruntime"));
52
+ candidates.add(resolve(process.cwd(), "src", "runtime", "server", "vendor", "onnxruntime"));
53
+ for (const candidate of candidates) {
54
+ try {
55
+ await access(join(candidate, "ort-wasm-simd-threaded.mjs"));
56
+ return `${pathToFileURL(candidate).href}/`;
57
+ } catch {
58
+ }
59
+ }
60
+ throw new Error("Unable to locate vendored onnxruntime-web assets on disk.");
61
+ }
62
+ async function loadTransformersRuntime() {
63
+ if (state.runtime) {
64
+ return state.runtime;
65
+ }
66
+ if (!state.initPromise) {
67
+ state.initPromise = (async () => {
68
+ installFileFetchShim();
69
+ const ortModule = await import("../vendor/onnxruntime/ort.wasm.min.mjs");
70
+ const ort = (ortModule.InferenceSession ? ortModule : ortModule.default) ?? ortModule;
71
+ const globalWithOrt = globalThis;
72
+ globalWithOrt[Symbol.for("onnxruntime")] = ort;
73
+ const transformers = await import("../vendor/huggingface/transformers.web.mjs");
74
+ state.runtime = transformers;
75
+ })().catch((error) => {
76
+ state.lastError = error instanceof Error ? error.message : String(error);
77
+ state.initPromise = void 0;
78
+ throw error;
79
+ });
80
+ }
81
+ await state.initPromise;
82
+ return state.runtime;
83
+ }
84
+ function ensureOnnxWasmEnv(env) {
85
+ const envWithMutableBackends = env;
86
+ envWithMutableBackends.backends ??= {};
87
+ envWithMutableBackends.backends.onnx ??= {};
88
+ envWithMutableBackends.backends.onnx.wasm ??= {};
89
+ return envWithMutableBackends.backends.onnx.wasm;
90
+ }
91
+ function resolveModelSource(config, modelOverride) {
92
+ const requestedModel = modelOverride?.trim();
93
+ const source = requestedModel || config.model.localPath || config.model.id;
94
+ return {
95
+ id: requestedModel || config.model.id,
96
+ task: config.model.task,
97
+ localPath: config.model.localPath,
98
+ allowRemote: config.model.allowRemote,
99
+ dtype: config.model.dtype,
100
+ source
101
+ };
102
+ }
103
+ function currentEngineState(config) {
104
+ return {
105
+ active: config.runtime,
106
+ ready: config.runtime === "mock" || Boolean(state.pipeline),
107
+ warmed: config.runtime === "mock" || state.warmed,
108
+ loading: state.loading,
109
+ cacheDir: config.cacheDir,
110
+ lastError: state.lastError
111
+ };
112
+ }
113
+ async function ensureTransformersPipeline(config, modelOverride) {
114
+ const model = resolveModelSource(config, modelOverride);
115
+ const modelKey = `${model.source}::${model.dtype ?? "default"}`;
116
+ if (state.pipeline && state.modelKey === modelKey) {
117
+ return { pipeline: state.pipeline, loadedNow: false, model };
118
+ }
119
+ if (!state.pipelinePromise || state.modelKey !== modelKey) {
120
+ state.loading = true;
121
+ state.pipelinePromise = (async () => {
122
+ const transformers = await loadTransformersRuntime();
123
+ transformers.env.allowLocalModels = Boolean(config.model.localPath);
124
+ transformers.env.allowRemoteModels = config.model.allowRemote;
125
+ transformers.env.useFS = false;
126
+ transformers.env.useFSCache = false;
127
+ if (transformers.env.useFSCache) {
128
+ transformers.env.cacheDir = config.cacheDir;
129
+ }
130
+ if (config.model.localPath) {
131
+ transformers.env.localModelPath = config.model.localPath;
132
+ }
133
+ const wasmBaseUrl = await resolveVendoredOnnxRuntimeBaseUrl();
134
+ const onnxWasmEnv = ensureOnnxWasmEnv(transformers.env);
135
+ onnxWasmEnv.wasmPaths = wasmBaseUrl;
136
+ onnxWasmEnv.numThreads = 1;
137
+ onnxWasmEnv.proxy = false;
138
+ const ortRuntime = globalThis[Symbol.for("onnxruntime")];
139
+ if (ortRuntime) {
140
+ ortRuntime.env ??= {};
141
+ ortRuntime.env.wasm ??= {};
142
+ ortRuntime.env.wasm.wasmPaths = wasmBaseUrl;
143
+ ortRuntime.env.wasm.numThreads = 1;
144
+ ortRuntime.env.wasm.proxy = false;
145
+ }
146
+ return transformers.pipeline(config.model.task, model.source, {
147
+ device: "wasm",
148
+ dtype: model.dtype
149
+ });
150
+ })().then((pipeline2) => {
151
+ state.pipeline = pipeline2;
152
+ state.modelKey = modelKey;
153
+ state.warmed = true;
154
+ state.lastError = void 0;
155
+ return pipeline2;
156
+ }).catch((error) => {
157
+ state.lastError = error instanceof Error ? error.message : String(error);
158
+ state.pipeline = void 0;
159
+ state.modelKey = void 0;
160
+ throw error;
161
+ }).finally(() => {
162
+ state.loading = false;
163
+ });
164
+ }
165
+ const pipeline = await state.pipelinePromise;
166
+ return { pipeline, loadedNow: true, model };
167
+ }
168
+ function resolveGenerationOptions(defaults, overrides) {
169
+ return {
170
+ maxNewTokens: overrides?.maxNewTokens ?? defaults.maxNewTokens,
171
+ temperature: overrides?.temperature ?? defaults.temperature,
172
+ topP: overrides?.topP ?? defaults.topP,
173
+ doSample: overrides?.doSample ?? defaults.doSample,
174
+ repetitionPenalty: overrides?.repetitionPenalty ?? defaults.repetitionPenalty
175
+ };
176
+ }
177
+ function extractGeneratedText(prompt, output) {
178
+ const firstItem = Array.isArray(output) ? output[0] : output;
179
+ if (firstItem && typeof firstItem === "object" && "generated_text" in firstItem) {
180
+ const generatedText = String(firstItem.generated_text ?? "");
181
+ return generatedText.startsWith(prompt) ? generatedText.slice(prompt.length).trim() : generatedText.trim();
182
+ }
183
+ if (typeof firstItem === "string") {
184
+ return firstItem.trim();
185
+ }
186
+ return JSON.stringify(output);
187
+ }
188
+ function runMockInference(config, input) {
189
+ const generation = resolveGenerationOptions(config.model.generation, input.generation);
190
+ const text = [
191
+ `Mock runtime for "${input.model || config.model.id}".`,
192
+ `Prompt received: ${input.prompt}`,
193
+ "Switch `edgeAI.runtime` to `transformers-wasm` to run the real WASM model runtime."
194
+ ].join(" ");
195
+ return {
196
+ text,
197
+ model: input.model || config.model.id,
198
+ runtime: "mock",
199
+ provider: "mock",
200
+ generation,
201
+ metrics: {
202
+ latencyMs: 0,
203
+ promptLength: input.prompt.length,
204
+ completionLength: text.length
205
+ }
206
+ };
207
+ }
208
+ export async function getEdgeAIHealth(config) {
209
+ const model = resolveModelSource(config);
210
+ if (config.runtime === "transformers-wasm" && config.warmup && !state.pipeline && !state.loading) {
211
+ try {
212
+ await ensureTransformersPipeline(config);
213
+ } catch {
214
+ }
215
+ }
216
+ return {
217
+ status: "ok",
218
+ runtime: config.runtime,
219
+ model,
220
+ defaults: config.model.generation,
221
+ engine: currentEngineState(config)
222
+ };
223
+ }
224
+ export async function pullEdgeAIModel(config) {
225
+ if (config.runtime === "mock") {
226
+ return {
227
+ status: "ready",
228
+ runtime: "mock",
229
+ model: resolveModelSource(config),
230
+ engine: currentEngineState(config),
231
+ loadedNow: false
232
+ };
233
+ }
234
+ const { loadedNow, model } = await ensureTransformersPipeline(config);
235
+ return {
236
+ status: "ready",
237
+ runtime: "transformers-wasm",
238
+ model,
239
+ engine: currentEngineState(config),
240
+ loadedNow
241
+ };
242
+ }
243
+ export async function generateEdgeAIText(config, input) {
244
+ if (config.runtime === "mock") {
245
+ return runMockInference(config, input);
246
+ }
247
+ const generation = resolveGenerationOptions(config.model.generation, input.generation);
248
+ const start = performance.now();
249
+ const { pipeline, model } = await ensureTransformersPipeline(config, input.model);
250
+ const output = await pipeline(input.prompt, {
251
+ max_new_tokens: generation.maxNewTokens,
252
+ temperature: generation.temperature,
253
+ top_p: generation.topP,
254
+ do_sample: generation.doSample,
255
+ repetition_penalty: generation.repetitionPenalty
256
+ });
257
+ const text = extractGeneratedText(input.prompt, output);
258
+ return {
259
+ text,
260
+ model: model.id,
261
+ runtime: "transformers-wasm",
262
+ provider: "transformers.js-wasm",
263
+ generation,
264
+ metrics: {
265
+ latencyMs: Number((performance.now() - start).toFixed(2)),
266
+ promptLength: input.prompt.length,
267
+ completionLength: text.length
268
+ }
269
+ };
270
+ }