npm - @elizaos/capacitor-llama - Versions diffs - 0.1.0 - Mend

@elizaos/capacitor-llama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +68 -0
package/dist/esm/capacitor-llama-adapter.d.ts +5 -0
package/dist/esm/capacitor-llama-adapter.js +262 -0
package/dist/esm/definitions.d.ts +92 -0
package/dist/esm/definitions.js +10 -0
package/dist/esm/device-bridge-client.d.ts +48 -0
package/dist/esm/device-bridge-client.js +221 -0
package/dist/esm/index.d.ts +15 -0
package/dist/esm/index.js +15 -0
package/dist/esm/index.test.d.ts +1 -0
package/dist/esm/index.test.js +264 -0
package/dist/esm/load-capacitor-llama.d.ts +2 -0
package/dist/esm/load-capacitor-llama.js +9 -0
package/dist/esm/web.d.ts +11 -0
package/dist/esm/web.js +10 -0
package/dist/plugin.cjs.js +500 -0
package/dist/plugin.cjs.js.map +1 -0
package/dist/plugin.js +505 -0
package/dist/plugin.js.map +1 -0
package/package.json +52 -0

package/README.md ADDED Viewed

@@ -0,0 +1,68 @@
+# @elizaos/capacitor-llama
+Mobile llama.cpp adapter for Eliza. A **thin wrapper** over
+[`llama-cpp-capacitor`](https://github.com/arusatech/annadata-llama-cpp) that
+maps its contextId-based API onto Eliza's `LocalInferenceLoader` contract,
+so the standard `ActiveModelCoordinator` in `@elizaos/app-core` can switch
+between the desktop (node-llama-cpp) engine and mobile native inference
+transparently.
+## What it does
+- Registers as the runtime's `localInferenceLoader` service during the
+  Capacitor bootstrap.
+- Maps `loadModel({ modelPath })` → `initContext`.
+- Maps `unloadModel()` → `releaseContext` / `releaseAllContexts`.
+- Exposes a `generate()` surface matching the desktop engine.
+- Fans the native `@LlamaCpp_onToken` stream out to Eliza's token listeners.
+## What it does not do
+- It does not ship llama.cpp native binaries — `llama-cpp-capacitor`
+  handles iOS (arm64 + x86_64 with Metal) and Android (arm64-v8a,
+  armeabi-v7a, x86, x86_64) itself.
+- It does not run on web. On Electrobun / Vite we fall back to the
+  standalone `node-llama-cpp` engine in `@elizaos/app-core`.
+## Setup in apps/app
+1. Install the dependency (already declared here):
+   ```bash
+   bun install
+   ```
+2. Register the loader during Capacitor bootstrap. In `apps/app`'s
+   Capacitor init path (currently in `src/capacitor-shell.ts` or the
+   runtime bootstrap that owns the mobile `AgentRuntime`):
+   ```ts
+   import { registerCapacitorLlamaLoader } from "@elizaos/capacitor-llama";
+   // After runtime boot, before the Model Hub is mounted:
+   registerCapacitorLlamaLoader(runtime);
+   ```
+3. Run `npx cap sync` in `apps/app` to pick up the native plugin. iOS and
+   Android builds will pull in `llama-cpp-capacitor`'s prebuilt native
+   libraries automatically.
+## Scope notes
+- Only **one model is loaded at a time**. `load()` disposes the previous
+  context first so we never double-allocate VRAM on device.
+- GGUF files are downloaded to the app sandbox by the
+  `@elizaos/app-core` downloader (shared with desktop). The mobile UI
+  filters the catalog to small/tiny bucket models only, since anything
+  larger won't realistically run on a phone.
+- Streaming tokens flow over Capacitor's native event bus
+  (`@LlamaCpp_onToken`). Subscribe via `capacitorLlama.onToken(listener)`.
+- For a full desktop-level feature set (embeddings, reranking, chat
+  templates, tool calling), read the upstream
+  [`llama-cpp-capacitor` README](https://github.com/arusatech/annadata-llama-cpp).
+  This adapter only wires the minimal slice needed for Eliza's agent
+  runtime; extend it as the mobile product grows.
+## Licensing
+MIT — matches `llama-cpp-capacitor` and llama.cpp upstream.

package/dist/esm/capacitor-llama-adapter.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import type { LlamaAdapter } from "./definitions";
+export declare const capacitorLlama: LlamaAdapter;
+export declare function registerCapacitorLlamaLoader(runtime: {
+    registerService?: (name: string, impl: unknown) => unknown;
+}): void;

package/dist/esm/capacitor-llama-adapter.js ADDED Viewed

@@ -0,0 +1,262 @@
+const CONTEXT_ID = 1;
+function isObject(value) {
+    return typeof value === "object" && value !== null;
+}
+function isLlamaCppPluginLike(value) {
+    return (isObject(value) &&
+        typeof value.initContext === "function" &&
+        typeof value.releaseContext === "function" &&
+        typeof value.releaseAllContexts === "function" &&
+        typeof value.generateText === "function" &&
+        typeof value.stopCompletion === "function" &&
+        typeof value.addListener === "function");
+}
+function resolveLlamaCppPlugin(mod) {
+    if (!isObject(mod))
+        return null;
+    if (isLlamaCppPluginLike(mod.LlamaCpp))
+        return mod.LlamaCpp;
+    if (isLlamaCppPluginLike(mod.default))
+        return mod.default;
+    if (isObject(mod.default) && isLlamaCppPluginLike(mod.default.LlamaCpp)) {
+        return mod.default.LlamaCpp;
+    }
+    return null;
+}
+function isCapacitorNative() {
+    var _a;
+    const cap = globalThis.Capacitor;
+    return Boolean((_a = cap === null || cap === void 0 ? void 0 : cap.isNativePlatform) === null || _a === void 0 ? void 0 : _a.call(cap));
+}
+function detectPlatform() {
+    var _a;
+    const cap = globalThis.Capacitor;
+    const platform = (_a = cap === null || cap === void 0 ? void 0 : cap.getPlatform) === null || _a === void 0 ? void 0 : _a.call(cap);
+    if (platform === "ios")
+        return "ios";
+    if (platform === "android")
+        return "android";
+    return "web";
+}
+class CapacitorLlamaAdapter {
+    constructor() {
+        this.plugin = null;
+        /** Cached loader promise so concurrent `load()` calls don't race to register duplicate listeners. */
+        this.pluginLoadPromise = null;
+        this.loadedPath = null;
+        this.tokenIndex = 0;
+        this.tokenListeners = new Set();
+        this.pluginListenerHandle = null;
+    }
+    async loadPlugin() {
+        if (this.plugin)
+            return this.plugin;
+        if (this.pluginLoadPromise)
+            return this.pluginLoadPromise;
+        this.pluginLoadPromise = (async () => {
+            const plugin = resolveLlamaCppPlugin(await import("llama-cpp-capacitor"));
+            if (!plugin) {
+                throw new Error("llama-cpp-capacitor did not expose an initContext method");
+            }
+            const tokenListenerHandle = await plugin.addListener("@LlamaCpp_onToken", (data) => {
+                var _a, _b;
+                const token = (_b = (_a = data.tokenResult) === null || _a === void 0 ? void 0 : _a.token) !== null && _b !== void 0 ? _b : data.token;
+                if (!token)
+                    return;
+                this.tokenIndex += 1;
+                for (const listener of this.tokenListeners) {
+                    try {
+                        listener(token, this.tokenIndex);
+                    }
+                    catch (_c) {
+                        this.tokenListeners.delete(listener);
+                    }
+                }
+            });
+            this.pluginListenerHandle = tokenListenerHandle !== null && tokenListenerHandle !== void 0 ? tokenListenerHandle : null;
+            this.plugin = plugin;
+            return plugin;
+        })();
+        try {
+            return await this.pluginLoadPromise;
+        }
+        catch (err) {
+            this.pluginLoadPromise = null;
+            throw err;
+        }
+    }
+    async getHardwareInfo() {
+        var _a;
+        const platform = detectPlatform();
+        const nav = globalThis
+            .navigator;
+        return {
+            platform,
+            deviceModel: platform,
+            totalRamGb: 0,
+            availableRamGb: null,
+            cpuCores: (_a = nav === null || nav === void 0 ? void 0 : nav.hardwareConcurrency) !== null && _a !== void 0 ? _a : 0,
+            gpu: null,
+            gpuSupported: platform !== "web",
+        };
+    }
+    async isLoaded() {
+        return {
+            loaded: this.loadedPath !== null,
+            modelPath: this.loadedPath,
+        };
+    }
+    currentModelPath() {
+        return this.loadedPath;
+    }
+    async load(options) {
+        var _a, _b;
+        if (!isCapacitorNative()) {
+            throw new Error("capacitor-llama is only available on iOS and Android builds");
+        }
+        const plugin = await this.loadPlugin();
+        if (this.loadedPath && this.loadedPath !== options.modelPath) {
+            await plugin.releaseAllContexts();
+            this.loadedPath = null;
+        }
+        await plugin.initContext({
+            contextId: CONTEXT_ID,
+            params: {
+                model: options.modelPath,
+                n_ctx: (_a = options.contextSize) !== null && _a !== void 0 ? _a : 4096,
+                n_gpu_layers: options.useGpu === false ? 0 : 99,
+                n_threads: (_b = options.maxThreads) !== null && _b !== void 0 ? _b : 0,
+                use_mmap: true,
+            },
+        });
+        this.loadedPath = options.modelPath;
+    }
+    async unload() {
+        if (!this.plugin || !this.loadedPath)
+            return;
+        try {
+            await this.plugin.releaseContext({ contextId: CONTEXT_ID });
+        }
+        catch (_a) {
+            await this.plugin.releaseAllContexts();
+        }
+        this.loadedPath = null;
+    }
+    async generate(options) {
+        var _a, _b, _c, _d;
+        if (!this.plugin || !this.loadedPath) {
+            throw new Error("No model loaded. Call load() first.");
+        }
+        this.tokenIndex = 0;
+        const params = {
+            n_predict: (_a = options.maxTokens) !== null && _a !== void 0 ? _a : 2048,
+            temperature: (_b = options.temperature) !== null && _b !== void 0 ? _b : 0.7,
+            top_p: (_c = options.topP) !== null && _c !== void 0 ? _c : 0.9,
+        };
+        if (options.stopSequences && options.stopSequences.length > 0) {
+            params.stop = options.stopSequences;
+        }
+        if (options.stream) {
+            params.emit_partial_completion = true;
+        }
+        const started = Date.now();
+        const result = await this.plugin.generateText({
+            contextId: CONTEXT_ID,
+            prompt: options.prompt,
+            params,
+        });
+        const duration = ((_d = result.timings) === null || _d === void 0 ? void 0 : _d.predicted_ms) != null
+            ? Math.round(result.timings.predicted_ms)
+            : Date.now() - started;
+        return {
+            text: result.text,
+            promptTokens: result.tokens_evaluated,
+            outputTokens: result.tokens_predicted,
+            durationMs: duration,
+        };
+    }
+    async cancelGenerate() {
+        if (!this.plugin)
+            return;
+        await this.plugin.stopCompletion({ contextId: CONTEXT_ID });
+    }
+    async embed(options) {
+        var _a;
+        if (!this.plugin || !this.loadedPath) {
+            throw new Error("No model loaded. Call load() first.");
+        }
+        if (typeof this.plugin.embedding !== "function") {
+            throw new Error("llama-cpp-capacitor does not expose embedding() on this build; upgrade or use a cloud embedding provider");
+        }
+        const params = {
+            embd_normalize: (_a = options.embdNormalize) !== null && _a !== void 0 ? _a : 0,
+        };
+        const result = await this.plugin.embedding({
+            contextId: CONTEXT_ID,
+            text: options.input,
+            params,
+        });
+        let tokenCount = 0;
+        if (typeof this.plugin.tokenize === "function") {
+            try {
+                const tokenized = await this.plugin.tokenize({
+                    contextId: CONTEXT_ID,
+                    text: options.input,
+                });
+                tokenCount = tokenized.tokens.length;
+            }
+            catch (err) {
+                const message = err instanceof Error ? err.message : String(err);
+                console.debug("[capacitor-llama] tokenize fallback", {
+                    error: message,
+                });
+                tokenCount = 0;
+            }
+        }
+        return { embedding: result.embedding, tokens: tokenCount };
+    }
+    onToken(listener) {
+        this.tokenListeners.add(listener);
+        return () => {
+            this.tokenListeners.delete(listener);
+        };
+    }
+    async dispose() {
+        this.tokenListeners.clear();
+        if (this.pluginListenerHandle) {
+            await this.pluginListenerHandle.remove();
+            this.pluginListenerHandle = null;
+        }
+        await this.unload();
+        this.plugin = null;
+        this.pluginLoadPromise = null;
+    }
+}
+export const capacitorLlama = new CapacitorLlamaAdapter();
+export function registerCapacitorLlamaLoader(runtime) {
+    if (typeof runtime.registerService !== "function")
+        return;
+    runtime.registerService("localInferenceLoader", {
+        async loadModel(args) {
+            await capacitorLlama.load({ modelPath: args.modelPath });
+        },
+        async unloadModel() {
+            await capacitorLlama.unload();
+        },
+        currentModelPath() {
+            return capacitorLlama.currentModelPath();
+        },
+        async generate(args) {
+            const result = await capacitorLlama.generate({
+                prompt: args.prompt,
+                stopSequences: args.stopSequences,
+                maxTokens: args.maxTokens,
+                temperature: args.temperature,
+            });
+            return result.text;
+        },
+        async embed(args) {
+            return capacitorLlama.embed({ input: args.input });
+        },
+    });
+}

package/dist/esm/definitions.d.ts ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * Eliza-flavoured Capacitor llama.cpp adapter contract.
+ *
+ * This mirrors the `LocalInferenceLoader` interface in @elizaos/app-core so
+ * `ActiveModelCoordinator` can swap between the desktop engine
+ * (node-llama-cpp) and the mobile Capacitor plugin without caring which is
+ * active. Native llama.cpp work is handled by `llama-cpp-capacitor`; this
+ * package is intentionally just a thin mapping layer.
+ */
+export interface LoadOptions {
+    /**
+     * Absolute or sandbox path to a GGUF file on device storage. On iOS this
+     * lives under `Application Support/`. On Android under the app's internal
+     * files dir.
+     */
+    modelPath: string;
+    /** Context window size; default 4096, capped by model metadata. */
+    contextSize?: number;
+    /** Hint: when true, the native layer uses GPU/Metal/Vulkan where available. */
+    useGpu?: boolean;
+    /** Cap on native thread count; native layer picks a reasonable default otherwise. */
+    maxThreads?: number;
+}
+export interface GenerateOptions {
+    prompt: string;
+    maxTokens?: number;
+    temperature?: number;
+    topP?: number;
+    stopSequences?: string[];
+    /** When true, token events fire on the "token" listener. */
+    stream?: boolean;
+}
+export interface GenerateResult {
+    text: string;
+    promptTokens: number;
+    outputTokens: number;
+    durationMs: number;
+}
+export interface HardwareInfo {
+    platform: "ios" | "android" | "web";
+    /** Human-readable device model when the OS exposes one. */
+    deviceModel: string;
+    totalRamGb: number;
+    availableRamGb: number | null;
+    cpuCores: number;
+    gpu: {
+        backend: "metal" | "vulkan" | "gpu-delegate";
+        available: boolean;
+    } | null;
+    /** True when the underlying llama.cpp build has GPU support compiled in. */
+    gpuSupported: boolean;
+}
+export interface EmbedOptions {
+    /** Raw text to embed. The adapter forwards this verbatim to the native plugin. */
+    input: string;
+    /**
+     * Optional L2 normalisation passed through to llama-cpp-capacitor's
+     * `embd_normalize` parameter. Native default is 0 (off); set to 2 for
+     * L2-normalised vectors that match most cloud embedding APIs.
+     */
+    embdNormalize?: number;
+}
+export interface EmbedResult {
+    embedding: number[];
+    /**
+     * Token count of the embedded input. The native plugin doesn't return
+     * this directly so adapters may estimate via `tokenize` and report 0
+     * when an estimate is unavailable. Always present so downstream
+     * accounting code doesn't have to special-case undefined.
+     */
+    tokens: number;
+}
+export interface LlamaAdapter {
+    getHardwareInfo(): Promise<HardwareInfo>;
+    isLoaded(): Promise<{
+        loaded: boolean;
+        modelPath: string | null;
+    }>;
+    currentModelPath(): string | null;
+    load(options: LoadOptions): Promise<void>;
+    unload(): Promise<void>;
+    generate(options: GenerateOptions): Promise<GenerateResult>;
+    cancelGenerate(): Promise<void>;
+    /** Fires when `generate({ stream: true })` emits a new token. */
+    onToken(listener: (token: string, index: number) => void): () => void;
+    /**
+     * Compute a single sentence embedding. Returns the raw float vector and
+     * (when known) the input token count. Throws when the underlying plugin
+     * does not expose an embedding method on the active platform.
+     */
+    embed(options: EmbedOptions): Promise<EmbedResult>;
+}

package/dist/esm/definitions.js ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Eliza-flavoured Capacitor llama.cpp adapter contract.
+ *
+ * This mirrors the `LocalInferenceLoader` interface in @elizaos/app-core so
+ * `ActiveModelCoordinator` can swap between the desktop engine
+ * (node-llama-cpp) and the mobile Capacitor plugin without caring which is
+ * active. Native llama.cpp work is handled by `llama-cpp-capacitor`; this
+ * package is intentionally just a thin mapping layer.
+ */
+export {};

package/dist/esm/device-bridge-client.d.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * Device-side half of the agent↔device inference bridge.
+ *
+ * Runs inside the mobile app (Capacitor iOS / Android) and dials out to
+ * the agent container over WebSocket. Receives `generate` requests,
+ * forwards to `capacitorLlama`, returns results. Auto-reconnects with
+ * exponential backoff when the link drops.
+ *
+ * Mirrors the message envelope defined in
+ * `@elizaos/app-core/src/services/local-inference/device-bridge.ts`.
+ * Keep the two in sync by hand — the message shape is the bridge
+ * contract.
+ */
+export interface DeviceBridgeClientConfig {
+    /** Absolute WS URL of the agent: `wss://agent.example.com/api/local-inference/device-bridge`. */
+    agentUrl: string;
+    /** Shared pairing secret. Passed both as a `?token=` query param and in the register payload. */
+    pairingToken?: string;
+    /** Stable device identifier. Survives reinstalls when persisted by the host app. */
+    deviceId: string;
+    /** Called on state transitions so the host app can show a pairing UI. */
+    onStateChange?: (state: "connecting" | "connected" | "disconnected" | "error", detail?: string) => void;
+}
+export declare class DeviceBridgeClient {
+    private socket;
+    private reconnectAttempt;
+    private stopped;
+    private readonly config;
+    constructor(config: DeviceBridgeClientConfig);
+    start(): void;
+    stop(): void;
+    private computeBackoffMs;
+    private connect;
+    private buildUrl;
+    private scheduleReconnect;
+    private sendRegister;
+    private send;
+    private handleAgentMessage;
+}
+/**
+ * Convenience helper for the mobile bootstrap: starts a bridge client
+ * using values from the Eliza config or hardcoded env.
+ *
+ * The host app is expected to call this once during Capacitor bootstrap.
+ * `agentUrl` and `pairingToken` come from the user's pairing flow and
+ * should be persisted across launches.
+ */
+export declare function startDeviceBridgeClient(config: DeviceBridgeClientConfig): DeviceBridgeClient;