npm - @runanywhere/llamacpp - Versions diffs - 0.16.0 - Mend

@runanywhere/llamacpp 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/src/LlamaCPP.ts ADDED Viewed

@@ -0,0 +1,206 @@
+/**
+ * @runanywhere/llamacpp - LlamaCPP Module
+ *
+ * LlamaCPP module wrapper for RunAnywhere React Native SDK.
+ * Provides public API for module registration and model declaration.
+ *
+ * This mirrors the Swift SDK's LlamaCPP module pattern:
+ * - LlamaCPP.register() - Register the module with ServiceRegistry
+ * - LlamaCPP.addModel() - Declare a model for this module
+ *
+ * Reference: sdk/runanywhere-swift/Sources/LlamaCPPRuntime/LlamaCPPServiceProvider.swift
+ */
+import { LlamaCppProvider } from './LlamaCppProvider';
+import {
+  ModelRegistry,
+  FileSystem,
+  LLMFramework,
+  ModelCategory,
+  ModelFormat,
+  ConfigurationSource,
+  SDKLogger,
+  type ModelInfo,
+} from '@runanywhere/core';
+// SDKLogger instance for this module
+const log = new SDKLogger('LLM.LlamaCpp');
+/**
+ * Model registration options for LlamaCPP models
+ *
+ * Matches iOS: LlamaCPP.addModel() parameter structure
+ */
+export interface LlamaCPPModelOptions {
+  /** Unique model ID. If not provided, generated from URL filename */
+  id?: string;
+  /** Display name for the model */
+  name: string;
+  /** Download URL for the model */
+  url: string;
+  /** Model category (defaults to Language for LLM models) */
+  modality?: ModelCategory;
+  /** Memory requirement in bytes */
+  memoryRequirement?: number;
+  /** Whether model supports reasoning/thinking tokens */
+  supportsThinking?: boolean;
+}
+/**
+ * LlamaCPP Module
+ *
+ * Public API for registering LlamaCPP module and declaring GGUF models.
+ * This provides the same developer experience as the iOS SDK.
+ *
+ * ## Usage
+ *
+ * ```typescript
+ * import { LlamaCPP } from '@runanywhere/llamacpp';
+ *
+ * // Register module
+ * LlamaCPP.register();
+ *
+ * // Add models
+ * LlamaCPP.addModel({
+ *   id: 'smollm2-360m-q8_0',
+ *   name: 'SmolLM2 360M Q8_0',
+ *   url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
+ *   memoryRequirement: 500_000_000
+ * });
+ * ```
+ *
+ * Matches iOS: public enum LlamaCPP: RunAnywhereModule
+ */
+export const LlamaCPP = {
+  /**
+   * Module metadata
+   * Matches iOS: static let moduleId, moduleName, inferenceFramework
+   */
+  moduleId: 'llamacpp',
+  moduleName: 'LlamaCPP',
+  inferenceFramework: LLMFramework.LlamaCpp,
+  capabilities: ['llm'] as const,
+  defaultPriority: 100,
+  /**
+   * Register LlamaCPP module with the SDK
+   *
+   * This registers the LlamaCPP provider with ServiceRegistry,
+   * enabling it to handle GGUF models.
+   *
+   * Matches iOS: static func register(priority: Int = defaultPriority)
+   *
+   * @example
+   * ```typescript
+   * LlamaCPP.register();
+   * ```
+   */
+  register(): void {
+    log.debug('Registering LlamaCPP module');
+    LlamaCppProvider.register();
+    log.info('LlamaCPP module registered');
+  },
+  /**
+   * Add a model to this module
+   *
+   * Registers a GGUF model with the ModelRegistry.
+   * The model will use LlamaCPP framework automatically.
+   *
+   * Matches iOS: static func addModel(id:name:url:modality:memoryRequirement:supportsThinking:)
+   *
+   * @param options - Model registration options
+   * @returns Promise resolving to the created ModelInfo
+   *
+   * @example
+   * ```typescript
+   * await LlamaCPP.addModel({
+   *   id: 'llama-2-7b-chat-q4_k_m',
+   *   name: 'Llama 2 7B Chat Q4_K_M',
+   *   url: 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf',
+   *   memoryRequirement: 4_000_000_000
+   * });
+   * ```
+   */
+  async addModel(options: LlamaCPPModelOptions): Promise<ModelInfo> {
+    // Generate stable ID from URL if not provided
+    const modelId = options.id ?? this._generateModelId(options.url);
+    // Determine modality (default to Language for LLM)
+    const category = options.modality ?? ModelCategory.Language;
+    // Infer format from URL
+    const format = options.url.toLowerCase().includes('.gguf')
+      ? ModelFormat.GGUF
+      : ModelFormat.GGML;
+    const now = new Date().toISOString();
+    // Check if model already exists on disk (persistence across sessions)
+    let isDownloaded = false;
+    let localPath: string | undefined;
+    if (FileSystem.isAvailable()) {
+      try {
+        const exists = await FileSystem.modelExists(modelId, 'LlamaCpp');
+        if (exists) {
+          localPath = await FileSystem.getModelPath(modelId, 'LlamaCpp');
+          isDownloaded = true;
+          log.debug(`Model ${modelId} found on disk: ${localPath}`);
+        }
+      } catch (error) {
+        // Ignore errors checking for existing model
+        log.debug(`Could not check for existing model ${modelId}: ${error}`);
+      }
+    }
+    const modelInfo: ModelInfo = {
+      id: modelId,
+      name: options.name,
+      category,
+      format,
+      downloadURL: options.url,
+      localPath,
+      downloadSize: undefined,
+      memoryRequired: options.memoryRequirement,
+      compatibleFrameworks: [LLMFramework.LlamaCpp],
+      preferredFramework: LLMFramework.LlamaCpp,
+      supportsThinking: options.supportsThinking ?? false,
+      metadata: { tags: [] },
+      source: ConfigurationSource.Local,
+      createdAt: now,
+      updatedAt: now,
+      syncPending: false,
+      usageCount: 0,
+      isDownloaded,
+      isAvailable: true,
+    };
+    // Register with ModelRegistry and wait for completion
+    await ModelRegistry.registerModel(modelInfo);
+    log.info(`Added model: ${modelId} (${options.name})`, {
+      modelId,
+      isDownloaded,
+    });
+    return modelInfo;
+  },
+  /**
+   * Generate a stable model ID from URL
+   * @internal
+   */
+  _generateModelId(url: string): string {
+    try {
+      const urlObj = new URL(url);
+      const pathname = urlObj.pathname;
+      const filename = pathname.split('/').pop() ?? 'model';
+      // Remove common extensions
+      return filename.replace(/\.(gguf|ggml|bin)$/i, '');
+    } catch {
+      // Fallback for invalid URLs
+      return `model-${Date.now()}`;
+    }
+  },
+};

package/src/LlamaCppProvider.ts ADDED Viewed

@@ -0,0 +1,120 @@
+/**
+ * @runanywhere/llamacpp - LlamaCPP Provider
+ *
+ * LlamaCPP module registration for React Native SDK.
+ * Thin wrapper that triggers C++ backend registration.
+ *
+ * Reference: sdk/runanywhere-swift/Sources/LlamaCPPRuntime/LlamaCPP.swift
+ */
+import { requireNativeLlamaModule, isNativeLlamaModuleAvailable } from './native/NativeRunAnywhereLlama';
+import { SDKLogger } from '@runanywhere/core';
+// SDKLogger instance for this module
+const log = new SDKLogger('LLM.LlamaCppProvider');
+/**
+ * LlamaCPP Module
+ *
+ * Provides LLM capabilities using llama.cpp with GGUF models.
+ * The actual service is provided by the C++ backend.
+ *
+ * ## Registration
+ *
+ * ```typescript
+ * import { LlamaCppProvider } from '@runanywhere/llamacpp';
+ *
+ * // Register the backend
+ * await LlamaCppProvider.register();
+ * ```
+ */
+export class LlamaCppProvider {
+  static readonly moduleId = 'llamacpp';
+  static readonly moduleName = 'LlamaCPP';
+  static readonly version = '2.0.0';
+  private static isRegistered = false;
+  /**
+   * Register LlamaCPP backend with the C++ service registry.
+   * Calls rac_backend_llamacpp_register() to register the
+   * LlamaCPP service provider with the C++ commons layer.
+   * Safe to call multiple times - subsequent calls are no-ops.
+   * @returns Promise<boolean> true if registered successfully
+   */
+  static async register(): Promise<boolean> {
+    if (this.isRegistered) {
+      log.debug('LlamaCPP already registered, returning');
+      return true;
+    }
+    if (!isNativeLlamaModuleAvailable()) {
+      log.warning('LlamaCPP native module not available');
+      return false;
+    }
+    log.debug('Registering LlamaCPP backend with C++ registry');
+    try {
+      const native = requireNativeLlamaModule();
+      // Call the native registration method from the Llama module
+      const success = await native.registerBackend();
+      if (success) {
+        this.isRegistered = true;
+        log.info('LlamaCPP backend registered successfully');
+      }
+      return success;
+    } catch (error) {
+      const msg = error instanceof Error ? error.message : String(error);
+      log.warning(`LlamaCPP registration failed: ${msg}`);
+      return false;
+    }
+  }
+  /**
+   * Unregister the LlamaCPP backend from C++ registry.
+   * @returns Promise<boolean> true if unregistered successfully
+   */
+  static async unregister(): Promise<boolean> {
+    if (!this.isRegistered) {
+      return true;
+    }
+    if (!isNativeLlamaModuleAvailable()) {
+      return false;
+    }
+    try {
+      const native = requireNativeLlamaModule();
+      const success = await native.unregisterBackend();
+      if (success) {
+        this.isRegistered = false;
+        log.debug('LlamaCPP backend unregistered');
+      }
+      return success;
+    } catch (error) {
+      log.error(`LlamaCPP unregistration failed: ${error instanceof Error ? error.message : String(error)}`);
+      return false;
+    }
+  }
+  /**
+   * Check if LlamaCPP can handle a given model
+   */
+  static canHandle(modelId: string | null | undefined): boolean {
+    if (!modelId) {
+      return false;
+    }
+    const lowercased = modelId.toLowerCase();
+    return lowercased.includes('gguf') || lowercased.endsWith('.gguf');
+  }
+}
+/**
+ * Auto-register when module is imported
+ */
+export function autoRegister(): void {
+  LlamaCppProvider.register().catch(() => {
+    // Silently handle registration failure during auto-registration
+  });
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * @runanywhere/llamacpp - LlamaCPP Backend for RunAnywhere React Native SDK
+ *
+ * This package provides the LlamaCPP backend for on-device LLM inference.
+ * It supports GGUF models and provides the same API as the iOS SDK.
+ *
+ * ## Usage
+ *
+ * ```typescript
+ * import { RunAnywhere } from '@runanywhere/core';
+ * import { LlamaCPP, LlamaCppProvider } from '@runanywhere/llamacpp';
+ *
+ * // Initialize core SDK
+ * await RunAnywhere.initialize({ apiKey: 'your-key' });
+ *
+ * // Register LlamaCPP backend (calls native rac_backend_llamacpp_register)
+ * await LlamaCppProvider.register();
+ *
+ * // Add a model
+ * LlamaCPP.addModel({
+ *   id: 'smollm2-360m-q8_0',
+ *   name: 'SmolLM2 360M Q8_0',
+ *   url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
+ *   memoryRequirement: 500_000_000
+ * });
+ *
+ * // Download and use
+ * await RunAnywhere.downloadModel('smollm2-360m-q8_0');
+ * await RunAnywhere.loadModel('smollm2-360m-q8_0');
+ * const result = await RunAnywhere.generate('Hello, world!');
+ * ```
+ *
+ * @packageDocumentation
+ */
+// =============================================================================
+// Main API
+// =============================================================================
+export { LlamaCPP, type LlamaCPPModelOptions } from './LlamaCPP';
+export { LlamaCppProvider, autoRegister } from './LlamaCppProvider';
+// =============================================================================
+// Native Module
+// =============================================================================
+export {
+  NativeRunAnywhereLlama,
+  getNativeLlamaModule,
+  requireNativeLlamaModule,
+  isNativeLlamaModuleAvailable,
+} from './native/NativeRunAnywhereLlama';
+export type { NativeRunAnywhereLlamaModule } from './native/NativeRunAnywhereLlama';
+// =============================================================================
+// Nitrogen Spec Types
+// =============================================================================
+export type { RunAnywhereLlama } from './specs/RunAnywhereLlama.nitro';

package/src/native/NativeRunAnywhereLlama.ts ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * NativeRunAnywhereLlama.ts
+ *
+ * Exports the native RunAnywhereLlama Hybrid Object from Nitro Modules.
+ * This module provides Llama-based text generation capabilities.
+ */
+import { NitroModules } from 'react-native-nitro-modules';
+import type { RunAnywhereLlama } from '../specs/RunAnywhereLlama.nitro';
+/**
+ * The native RunAnywhereLlama module type
+ */
+export type NativeRunAnywhereLlamaModule = RunAnywhereLlama;
+/**
+ * Get the native RunAnywhereLlama Hybrid Object
+ */
+export function requireNativeLlamaModule(): NativeRunAnywhereLlamaModule {
+  return NitroModules.createHybridObject<RunAnywhereLlama>('RunAnywhereLlama');
+}
+/**
+ * Check if the native Llama module is available
+ */
+export function isNativeLlamaModuleAvailable(): boolean {
+  try {
+    requireNativeLlamaModule();
+    return true;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Singleton instance of the native module (lazy initialized)
+ */
+let _nativeModule: NativeRunAnywhereLlamaModule | undefined;
+/**
+ * Get the singleton native module instance
+ */
+export function getNativeLlamaModule(): NativeRunAnywhereLlamaModule {
+  if (!_nativeModule) {
+    _nativeModule = requireNativeLlamaModule();
+  }
+  return _nativeModule;
+}
+/**
+ * Default export - the native module getter
+ */
+export const NativeRunAnywhereLlama = {
+  get: getNativeLlamaModule,
+  isAvailable: isNativeLlamaModuleAvailable,
+};
+export default NativeRunAnywhereLlama;

package/src/native/index.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Native module exports for @runanywhere/llamacpp
+ */
+export {
+  NativeRunAnywhereLlama,
+  getNativeLlamaModule,
+  requireNativeLlamaModule,
+  isNativeLlamaModuleAvailable,
+} from './NativeRunAnywhereLlama';
+export type { NativeRunAnywhereLlamaModule } from './NativeRunAnywhereLlama';

package/src/specs/RunAnywhereLlama.nitro.ts ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * RunAnywhereLlama Nitrogen Spec
+ *
+ * LlamaCPP backend interface for Llama-based text generation:
+ * - Backend Registration
+ * - Model Loading/Unloading
+ * - Text Generation (non-streaming and streaming)
+ * - Structured Output (JSON schema generation)
+ *
+ * Matches Swift SDK: LlamaCPPRuntime/LlamaCPP.swift + CppBridge+LLM.swift
+ */
+import type { HybridObject } from 'react-native-nitro-modules';
+/**
+ * Llama text generation native interface
+ *
+ * This interface provides Llama-based LLM capabilities.
+ * Requires @runanywhere/core to be initialized first.
+ */
+export interface RunAnywhereLlama
+  extends HybridObject<{
+    ios: 'c++';
+    android: 'c++';
+  }> {
+  // ============================================================================
+  // Backend Registration
+  // Matches Swift: LlamaCPP.register(), LlamaCPP.unregister()
+  // ============================================================================
+  /**
+   * Register the LlamaCPP backend with the C++ service registry.
+   * Calls rac_backend_llamacpp_register() from runanywhere-binaries.
+   * Safe to call multiple times - subsequent calls are no-ops.
+   * @returns true if registered successfully (or already registered)
+   */
+  registerBackend(): Promise<boolean>;
+  /**
+   * Unregister the LlamaCPP backend from the C++ service registry.
+   * @returns true if unregistered successfully
+   */
+  unregisterBackend(): Promise<boolean>;
+  /**
+   * Check if the LlamaCPP backend is registered
+   * @returns true if backend is registered
+   */
+  isBackendRegistered(): Promise<boolean>;
+  // ============================================================================
+  // Model Loading
+  // Matches Swift: CppBridge+LLM.swift loadTextModel/unloadTextModel
+  // ============================================================================
+  /**
+   * Load a Llama model for text generation
+   * @param path Path to the model file (.gguf)
+   * @param modelId Optional unique identifier for the model
+   * @param modelName Optional human-readable name for the model
+   * @param configJson Optional JSON configuration (context_length, gpu_layers, etc.)
+   * @returns true if loaded successfully
+   */
+  loadModel(
+    path: string,
+    modelId?: string,
+    modelName?: string,
+    configJson?: string
+  ): Promise<boolean>;
+  /**
+   * Check if a Llama model is loaded
+   */
+  isModelLoaded(): Promise<boolean>;
+  /**
+   * Unload the current Llama model
+   */
+  unloadModel(): Promise<boolean>;
+  /**
+   * Get info about the currently loaded model
+   * @returns JSON with model info or empty if not loaded
+   */
+  getModelInfo(): Promise<string>;
+  // ============================================================================
+  // Text Generation
+  // Matches Swift: RunAnywhere+TextGeneration.swift
+  // ============================================================================
+  /**
+   * Generate text (non-streaming)
+   * @param prompt The prompt text
+   * @param optionsJson JSON string with generation options:
+   *   - max_tokens: Maximum tokens to generate (default: 512)
+   *   - temperature: Sampling temperature (default: 0.7)
+   *   - top_p: Nucleus sampling parameter (default: 0.9)
+   *   - top_k: Top-k sampling parameter (default: 40)
+   *   - system_prompt: Optional system prompt
+   * @returns JSON string with generation result:
+   *   - text: Generated text
+   *   - tokensUsed: Number of tokens generated
+   *   - latencyMs: Generation time in milliseconds
+   *   - cancelled: Whether generation was cancelled
+   */
+  generate(prompt: string, optionsJson?: string): Promise<string>;
+  /**
+   * Generate text with streaming callback
+   * @param prompt The prompt text
+   * @param optionsJson JSON string with generation options
+   * @param callback Called for each token with (token, isComplete)
+   * @returns Complete generated text
+   */
+  generateStream(
+    prompt: string,
+    optionsJson: string,
+    callback: (token: string, isComplete: boolean) => void
+  ): Promise<string>;
+  /**
+   * Cancel ongoing text generation
+   * @returns true if cancellation was successful
+   */
+  cancelGeneration(): Promise<boolean>;
+  // ============================================================================
+  // Structured Output
+  // Matches Swift: RunAnywhere+StructuredOutput.swift
+  // ============================================================================
+  /**
+   * Generate structured output following a JSON schema
+   * Uses constrained generation to ensure output conforms to schema
+   * @param prompt The prompt text
+   * @param schema JSON schema string defining the output structure
+   * @param optionsJson Optional generation options
+   * @returns JSON string conforming to the provided schema
+   */
+  generateStructured(
+    prompt: string,
+    schema: string,
+    optionsJson?: string
+  ): Promise<string>;
+  // ============================================================================
+  // Utilities
+  // ============================================================================
+  /**
+   * Get the last error message from the Llama backend
+   */
+  getLastError(): Promise<string>;
+  /**
+   * Get current memory usage of the Llama backend
+   * @returns Memory usage in bytes
+   */
+  getMemoryUsage(): Promise<number>;
+}