npm - @elizaos/capacitor-llama - Versions diffs - 0.1.0 - Mend

@elizaos/capacitor-llama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +68 -0
package/dist/esm/capacitor-llama-adapter.d.ts +5 -0
package/dist/esm/capacitor-llama-adapter.js +262 -0
package/dist/esm/definitions.d.ts +92 -0
package/dist/esm/definitions.js +10 -0
package/dist/esm/device-bridge-client.d.ts +48 -0
package/dist/esm/device-bridge-client.js +221 -0
package/dist/esm/index.d.ts +15 -0
package/dist/esm/index.js +15 -0
package/dist/esm/index.test.d.ts +1 -0
package/dist/esm/index.test.js +264 -0
package/dist/esm/load-capacitor-llama.d.ts +2 -0
package/dist/esm/load-capacitor-llama.js +9 -0
package/dist/esm/web.d.ts +11 -0
package/dist/esm/web.js +10 -0
package/dist/plugin.cjs.js +500 -0
package/dist/plugin.cjs.js.map +1 -0
package/dist/plugin.js +505 -0
package/dist/plugin.js.map +1 -0
package/package.json +52 -0

package/dist/esm/device-bridge-client.js ADDED Viewed

@@ -0,0 +1,221 @@
+/**
+ * Device-side half of the agent↔device inference bridge.
+ *
+ * Runs inside the mobile app (Capacitor iOS / Android) and dials out to
+ * the agent container over WebSocket. Receives `generate` requests,
+ * forwards to `capacitorLlama`, returns results. Auto-reconnects with
+ * exponential backoff when the link drops.
+ *
+ * Mirrors the message envelope defined in
+ * `@elizaos/app-core/src/services/local-inference/device-bridge.ts`.
+ * Keep the two in sync by hand — the message shape is the bridge
+ * contract.
+ */
+import { loadCapacitorLlama } from "./load-capacitor-llama";
+const INITIAL_BACKOFF_MS = 1000;
+const MAX_BACKOFF_MS = 30000;
+export class DeviceBridgeClient {
+    constructor(config) {
+        this.socket = null;
+        this.reconnectAttempt = 0;
+        this.stopped = false;
+        this.config = config;
+    }
+    start() {
+        this.stopped = false;
+        this.connect();
+    }
+    stop() {
+        this.stopped = true;
+        if (this.socket) {
+            try {
+                this.socket.close(1000, "client-stop");
+            }
+            catch (_a) {
+                /* best effort */
+            }
+            this.socket = null;
+        }
+    }
+    computeBackoffMs() {
+        const exp = Math.min(MAX_BACKOFF_MS, INITIAL_BACKOFF_MS * 2 ** Math.min(this.reconnectAttempt, 6));
+        // Full jitter: uniform random in [0, exp).
+        return Math.floor(Math.random() * exp);
+    }
+    connect() {
+        var _a, _b, _c, _d;
+        if (this.stopped)
+            return;
+        (_b = (_a = this.config).onStateChange) === null || _b === void 0 ? void 0 : _b.call(_a, "connecting");
+        const url = this.buildUrl();
+        let ws;
+        try {
+            ws = new WebSocket(url);
+        }
+        catch (err) {
+            (_d = (_c = this.config).onStateChange) === null || _d === void 0 ? void 0 : _d.call(_c, "error", err instanceof Error ? err.message : String(err));
+            this.scheduleReconnect();
+            return;
+        }
+        this.socket = ws;
+        ws.onopen = () => {
+            this.reconnectAttempt = 0;
+            void this.sendRegister(ws);
+        };
+        ws.onmessage = (event) => {
+            let msg;
+            try {
+                msg = JSON.parse(String(event.data));
+            }
+            catch (_a) {
+                return;
+            }
+            void this.handleAgentMessage(ws, msg);
+        };
+        ws.onerror = () => {
+            var _a, _b;
+            (_b = (_a = this.config).onStateChange) === null || _b === void 0 ? void 0 : _b.call(_a, "error", "websocket error");
+        };
+        ws.onclose = () => {
+            var _a, _b;
+            this.socket = null;
+            (_b = (_a = this.config).onStateChange) === null || _b === void 0 ? void 0 : _b.call(_a, "disconnected");
+            this.scheduleReconnect();
+        };
+    }
+    buildUrl() {
+        if (!this.config.pairingToken)
+            return this.config.agentUrl;
+        const hasQuery = this.config.agentUrl.includes("?");
+        const sep = hasQuery ? "&" : "?";
+        return `${this.config.agentUrl}${sep}token=${encodeURIComponent(this.config.pairingToken)}`;
+    }
+    scheduleReconnect() {
+        if (this.stopped)
+            return;
+        const delay = this.computeBackoffMs();
+        this.reconnectAttempt += 1;
+        setTimeout(() => this.connect(), delay);
+    }
+    async sendRegister(ws) {
+        var _a, _b;
+        const capacitorLlama = await loadCapacitorLlama();
+        const hardware = await capacitorLlama.getHardwareInfo();
+        const loaded = await capacitorLlama.isLoaded();
+        const msg = {
+            type: "register",
+            payload: {
+                deviceId: this.config.deviceId,
+                pairingToken: this.config.pairingToken,
+                capabilities: {
+                    platform: hardware.platform,
+                    deviceModel: hardware.deviceModel,
+                    totalRamGb: hardware.totalRamGb,
+                    cpuCores: hardware.cpuCores,
+                    gpu: hardware.gpu,
+                },
+                loadedPath: loaded.modelPath,
+            },
+        };
+        this.send(ws, msg);
+        (_b = (_a = this.config).onStateChange) === null || _b === void 0 ? void 0 : _b.call(_a, "connected");
+    }
+    send(ws, msg) {
+        if (ws.readyState !== WebSocket.OPEN)
+            return;
+        ws.send(JSON.stringify(msg));
+    }
+    async handleAgentMessage(ws, msg) {
+        if (msg.type === "ping") {
+            this.send(ws, { type: "pong", at: Date.now() });
+            return;
+        }
+        if (msg.type === "load") {
+            try {
+                const capacitorLlama = await loadCapacitorLlama();
+                await capacitorLlama.load({
+                    modelPath: msg.modelPath,
+                    contextSize: msg.contextSize,
+                    useGpu: msg.useGpu,
+                });
+                this.send(ws, {
+                    type: "loadResult",
+                    correlationId: msg.correlationId,
+                    ok: true,
+                    loadedPath: msg.modelPath,
+                });
+            }
+            catch (err) {
+                this.send(ws, {
+                    type: "loadResult",
+                    correlationId: msg.correlationId,
+                    ok: false,
+                    error: err instanceof Error ? err.message : String(err),
+                });
+            }
+            return;
+        }
+        if (msg.type === "unload") {
+            try {
+                const capacitorLlama = await loadCapacitorLlama();
+                await capacitorLlama.unload();
+                this.send(ws, {
+                    type: "unloadResult",
+                    correlationId: msg.correlationId,
+                    ok: true,
+                });
+            }
+            catch (err) {
+                this.send(ws, {
+                    type: "unloadResult",
+                    correlationId: msg.correlationId,
+                    ok: false,
+                    error: err instanceof Error ? err.message : String(err),
+                });
+            }
+            return;
+        }
+        if (msg.type === "generate") {
+            try {
+                const capacitorLlama = await loadCapacitorLlama();
+                const result = await capacitorLlama.generate({
+                    prompt: msg.prompt,
+                    stopSequences: msg.stopSequences,
+                    maxTokens: msg.maxTokens,
+                    temperature: msg.temperature,
+                });
+                this.send(ws, {
+                    type: "generateResult",
+                    correlationId: msg.correlationId,
+                    ok: true,
+                    text: result.text,
+                    promptTokens: result.promptTokens,
+                    outputTokens: result.outputTokens,
+                    durationMs: result.durationMs,
+                });
+            }
+            catch (err) {
+                this.send(ws, {
+                    type: "generateResult",
+                    correlationId: msg.correlationId,
+                    ok: false,
+                    error: err instanceof Error ? err.message : String(err),
+                });
+            }
+            return;
+        }
+    }
+}
+/**
+ * Convenience helper for the mobile bootstrap: starts a bridge client
+ * using values from the Eliza config or hardcoded env.
+ *
+ * The host app is expected to call this once during Capacitor bootstrap.
+ * `agentUrl` and `pairingToken` come from the user's pairing flow and
+ * should be persisted across launches.
+ */
+export function startDeviceBridgeClient(config) {
+    const client = new DeviceBridgeClient(config);
+    client.start();
+    return client;
+}

package/dist/esm/index.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * @elizaos/capacitor-llama
+ *
+ * Thin adapter that maps `llama-cpp-capacitor`'s contextId-based API onto
+ * Eliza's `LocalInferenceLoader` contract. At most one native context lives
+ * at a time; switching models disposes the previous context first so we
+ * never double-allocate VRAM.
+ *
+ * On web this package falls back to an "unavailable" stub. Mobile builds
+ * should call `registerCapacitorLlamaLoader(runtime)` during bootstrap to
+ * wire this adapter in as the runtime's `localInferenceLoader` service.
+ */
+export { capacitorLlama, registerCapacitorLlamaLoader, } from "./capacitor-llama-adapter";
+export * from "./definitions";
+export { DeviceBridgeClient, type DeviceBridgeClientConfig, startDeviceBridgeClient, } from "./device-bridge-client";

package/dist/esm/index.js ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * @elizaos/capacitor-llama
+ *
+ * Thin adapter that maps `llama-cpp-capacitor`'s contextId-based API onto
+ * Eliza's `LocalInferenceLoader` contract. At most one native context lives
+ * at a time; switching models disposes the previous context first so we
+ * never double-allocate VRAM.
+ *
+ * On web this package falls back to an "unavailable" stub. Mobile builds
+ * should call `registerCapacitorLlamaLoader(runtime)` during bootstrap to
+ * wire this adapter in as the runtime's `localInferenceLoader` service.
+ */
+export { capacitorLlama, registerCapacitorLlamaLoader, } from "./capacitor-llama-adapter";
+export * from "./definitions";
+export { DeviceBridgeClient, startDeviceBridgeClient, } from "./device-bridge-client";

package/dist/esm/index.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/esm/index.test.js ADDED Viewed

@@ -0,0 +1,264 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+function setNativeCapacitor(platform = "ios") {
+    globalThis.Capacitor = {
+        getPlatform: () => platform,
+        isNativePlatform: () => true,
+    };
+}
+function clearNativeCapacitor() {
+    delete globalThis.Capacitor;
+}
+function makePluginMock() {
+    let tokenListener = null;
+    const listenerHandle = { remove: vi.fn(async () => undefined) };
+    return {
+        initContext: vi.fn(async (_options) => ({
+            contextId: 1,
+            gpu: true,
+            reasonNoGPU: "",
+            model: {
+                desc: "test",
+                size: 1,
+                nEmbd: 1,
+                nParams: 1,
+                chatTemplates: {
+                    llamaChat: false,
+                    minja: {
+                        default: false,
+                        defaultCaps: {
+                            tools: false,
+                            toolCalls: false,
+                            toolResponses: false,
+                            systemRole: false,
+                            parallelToolCalls: false,
+                            toolCallId: false,
+                        },
+                        toolUse: false,
+                        toolUseCaps: {
+                            tools: false,
+                            toolCalls: false,
+                            toolResponses: false,
+                            systemRole: false,
+                            parallelToolCalls: false,
+                            toolCallId: false,
+                        },
+                    },
+                },
+                metadata: {},
+                isChatTemplateSupported: false,
+            },
+        })),
+        releaseContext: vi.fn(async (_options) => undefined),
+        releaseAllContexts: vi.fn(async () => undefined),
+        generateText: vi.fn(async (_options) => ({
+            text: "hello",
+            reasoning_content: "",
+            tool_calls: [],
+            content: "hello",
+            chat_format: 0,
+            tokens_predicted: 2,
+            tokens_evaluated: 3,
+            truncated: false,
+            stopped_eos: true,
+            stopped_word: "",
+            stopped_limit: 0,
+            stopping_word: "",
+            context_full: false,
+            interrupted: false,
+            tokens_cached: 0,
+            timings: {
+                prompt_n: 3,
+                prompt_ms: 1,
+                prompt_per_token_ms: 1,
+                prompt_per_second: 1,
+                predicted_n: 2,
+                predicted_ms: 7,
+                predicted_per_token_ms: 1,
+                predicted_per_second: 1,
+            },
+        })),
+        stopCompletion: vi.fn(async (_options) => undefined),
+        embedding: vi.fn(async (_options) => ({
+            embedding: [0.5, -0.25, 0.75],
+        })),
+        tokenize: vi.fn(async (_options) => ({
+            tokens: [1, 2, 3, 4],
+        })),
+        addListener: vi.fn(async (_event, listener) => {
+            tokenListener = listener;
+            return listenerHandle;
+        }),
+        emitToken(token) {
+            tokenListener === null || tokenListener === void 0 ? void 0 : tokenListener({ tokenResult: { token } });
+        },
+        listenerHandle,
+    };
+}
+let mockedPlugin = null;
+function currentPlugin() {
+    if (!mockedPlugin) {
+        throw new Error("llama-cpp-capacitor mock was not configured");
+    }
+    return mockedPlugin;
+}
+const llamaCppProxy = {
+    initContext(options) {
+        return currentPlugin().initContext(options);
+    },
+    releaseContext(options) {
+        return currentPlugin().releaseContext(options);
+    },
+    releaseAllContexts() {
+        return currentPlugin().releaseAllContexts();
+    },
+    generateText(options) {
+        return currentPlugin().generateText(options);
+    },
+    stopCompletion(options) {
+        return currentPlugin().stopCompletion(options);
+    },
+    embedding(options) {
+        return currentPlugin().embedding(options);
+    },
+    tokenize(options) {
+        return currentPlugin().tokenize(options);
+    },
+    addListener(event, listener) {
+        return currentPlugin().addListener(event, listener);
+    },
+};
+vi.mock("llama-cpp-capacitor", () => ({ LlamaCpp: llamaCppProxy }));
+afterEach(async () => {
+    var _a;
+    // Reset the module-level `capacitorLlama` singleton between tests so
+    // state from a prior load() doesn't leak into the next case.
+    try {
+        const mod = await import("./index");
+        const adapter = mod.capacitorLlama;
+        await ((_a = adapter.dispose) === null || _a === void 0 ? void 0 : _a.call(adapter));
+    }
+    catch (_b) {
+        /* dispose is best-effort */
+    }
+    clearNativeCapacitor();
+    mockedPlugin = null;
+});
+describe("@elizaos/capacitor-llama adapter", () => {
+    it("rejects model loads outside native Capacitor", async () => {
+        const plugin = makePluginMock();
+        mockedPlugin = plugin;
+        const { capacitorLlama } = await import("./index");
+        await expect(capacitorLlama.load({ modelPath: "/models/test.gguf" })).rejects.toThrow("only available on iOS and Android");
+        expect(plugin.initContext).not.toHaveBeenCalled();
+    });
+    it("loads, generates, streams tokens, and unloads through llama-cpp-capacitor", async () => {
+        setNativeCapacitor("ios");
+        const plugin = makePluginMock();
+        mockedPlugin = plugin;
+        const { capacitorLlama } = await import("./index");
+        const tokens = [];
+        const offToken = capacitorLlama.onToken((token, index) => {
+            tokens.push([token, index]);
+        });
+        await capacitorLlama.load({
+            modelPath: "/models/test.gguf",
+            contextSize: 2048,
+            useGpu: true,
+            maxThreads: 4,
+        });
+        plugin.emitToken("h");
+        const result = await capacitorLlama.generate({
+            prompt: "Say hello",
+            maxTokens: 16,
+            temperature: 0.2,
+            topP: 0.8,
+            stream: true,
+            stopSequences: ["</s>"],
+        });
+        offToken();
+        await capacitorLlama.unload();
+        expect(plugin.initContext).toHaveBeenCalledWith({
+            contextId: 1,
+            params: {
+                model: "/models/test.gguf",
+                n_ctx: 2048,
+                n_gpu_layers: 99,
+                n_threads: 4,
+                use_mmap: true,
+            },
+        });
+        expect(plugin.generateText).toHaveBeenCalledWith({
+            contextId: 1,
+            prompt: "Say hello",
+            params: {
+                n_predict: 16,
+                temperature: 0.2,
+                top_p: 0.8,
+                stop: ["</s>"],
+                emit_partial_completion: true,
+            },
+        });
+        expect(result).toEqual({
+            text: "hello",
+            promptTokens: 3,
+            outputTokens: 2,
+            durationMs: 7,
+        });
+        expect(tokens).toEqual([["h", 1]]);
+        expect(capacitorLlama.currentModelPath()).toBe(null);
+        expect(plugin.releaseContext).toHaveBeenCalledWith({ contextId: 1 });
+    });
+    it("embeds via the native llama-cpp-capacitor embedding() method", async () => {
+        setNativeCapacitor("ios");
+        const plugin = makePluginMock();
+        mockedPlugin = plugin;
+        const { capacitorLlama } = await import("./index");
+        await capacitorLlama.load({ modelPath: "/models/test.gguf" });
+        const result = await capacitorLlama.embed({
+            input: "Embed this please",
+            embdNormalize: 2,
+        });
+        expect(plugin.embedding).toHaveBeenCalledWith({
+            contextId: 1,
+            text: "Embed this please",
+            params: { embd_normalize: 2 },
+        });
+        expect(plugin.tokenize).toHaveBeenCalledWith({
+            contextId: 1,
+            text: "Embed this please",
+        });
+        expect(result).toEqual({
+            embedding: [0.5, -0.25, 0.75],
+            tokens: 4,
+        });
+    });
+    it("throws when embed is called before load", async () => {
+        setNativeCapacitor("ios");
+        const plugin = makePluginMock();
+        mockedPlugin = plugin;
+        const { capacitorLlama } = await import("./index");
+        await expect(capacitorLlama.embed({ input: "x" })).rejects.toThrow(/No model loaded/);
+    });
+    it("registers a localInferenceLoader service without private field casts", async () => {
+        setNativeCapacitor("android");
+        const plugin = makePluginMock();
+        mockedPlugin = plugin;
+        const { registerCapacitorLlamaLoader } = await import("./index");
+        const services = new Map();
+        registerCapacitorLlamaLoader({
+            registerService(name, impl) {
+                services.set(name, impl);
+            },
+        });
+        const loader = services.get("localInferenceLoader");
+        await loader.loadModel({ modelPath: "/models/mobile.gguf" });
+        expect(loader.currentModelPath()).toBe("/models/mobile.gguf");
+        await expect(loader.generate({ prompt: "Hello" })).resolves.toBe("hello");
+        await expect(loader.embed({ input: "Hi there" })).resolves.toEqual({
+            embedding: [0.5, -0.25, 0.75],
+            tokens: 4,
+        });
+        await loader.unloadModel();
+        expect(loader.currentModelPath()).toBe(null);
+    });
+});

package/dist/esm/load-capacitor-llama.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { LlamaAdapter } from "./definitions";
2	+ export declare function loadCapacitorLlama(): Promise<LlamaAdapter>;

package/dist/esm/load-capacitor-llama.js ADDED Viewed

@@ -0,0 +1,9 @@
+import { capacitorLlama } from "./capacitor-llama-adapter";
+let cachedAdapter = null;
+export async function loadCapacitorLlama() {
+    if (cachedAdapter) {
+        return cachedAdapter;
+    }
+    cachedAdapter = capacitorLlama;
+    return cachedAdapter;
+}

package/dist/esm/web.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Web fallback for `@elizaos/capacitor-llama`.
+ *
+ * On web (Vite dev server, Electrobun renderer) this package resolves to the
+ * main adapter but its `load`/`generate` methods reject with a clear
+ * "unavailable" error. The standalone node-llama-cpp engine in
+ * `@elizaos/app-core` handles desktop inference; this stub only exists so
+ * the Capacitor plugin resolution never crashes during web bundling.
+ */
+export type { GenerateOptions, GenerateResult, HardwareInfo, LlamaAdapter, LoadOptions, } from "./definitions";
+export { capacitorLlama, registerCapacitorLlamaLoader } from "./index";

package/dist/esm/web.js ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Web fallback for `@elizaos/capacitor-llama`.
+ *
+ * On web (Vite dev server, Electrobun renderer) this package resolves to the
+ * main adapter but its `load`/`generate` methods reject with a clear
+ * "unavailable" error. The standalone node-llama-cpp engine in
+ * `@elizaos/app-core` handles desktop inference; this stub only exists so
+ * the Capacitor plugin resolution never crashes during web bundling.
+ */
+export { capacitorLlama, registerCapacitorLlamaLoader } from "./index";