npm - react-native-gemma-agent - Versions diffs - 0.1.0 - Mend

react-native-gemma-agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/LICENSE +21 -0
package/README.md +457 -0
package/package.json +52 -0
package/skills/calculator.ts +47 -0
package/skills/deviceLocation.ts +180 -0
package/skills/index.ts +3 -0
package/skills/queryWikipedia.ts +96 -0
package/skills/readCalendar.ts +74 -0
package/skills/webSearch.ts +75 -0
package/src/AgentOrchestrator.ts +315 -0
package/src/BM25Scorer.ts +118 -0
package/src/FunctionCallParser.ts +113 -0
package/src/GemmaAgentProvider.tsx +101 -0
package/src/InferenceEngine.ts +301 -0
package/src/ModelManager.ts +244 -0
package/src/SkillRegistry.ts +60 -0
package/src/SkillSandbox.tsx +155 -0
package/src/index.ts +52 -0
package/src/types.ts +197 -0
package/src/useGemmaAgent.ts +222 -0
package/src/useModelDownload.ts +80 -0
package/src/useSkillRegistry.ts +58 -0

package/src/InferenceEngine.ts ADDED Viewed

@@ -0,0 +1,301 @@
+import {
+  initLlama,
+  releaseAllLlama,
+  type LlamaContext,
+  type TokenData,
+  type NativeCompletionResult,
+} from 'llama.rn';
+import type {
+  Message,
+  CompletionResult,
+  CompletionTimings,
+  ContextUsage,
+  GenerateOptions,
+  TokenEvent,
+  ToolCall,
+  ToolDefinition,
+  InferenceEngineConfig,
+} from './types';
+const DEFAULT_CONFIG: Required<InferenceEngineConfig> = {
+  contextSize: 4096,
+  batchSize: 512,
+  threads: 4,
+  flashAttn: 'auto',
+  useMlock: true,
+  gpuLayers: -1,
+};
+const DEFAULT_GENERATE: Required<Pick<GenerateOptions, 'maxTokens' | 'temperature' | 'topP' | 'topK'>> = {
+  maxTokens: 1024,
+  temperature: 0.7,
+  topP: 0.9,
+  topK: 40,
+};
+type LoadedModelInfo = {
+  gpu: boolean;
+  reasonNoGPU: string;
+  description: string | null;
+  nParams: number | null;
+};
+export class InferenceEngine {
+  private context: LlamaContext | null = null;
+  private config: Required<InferenceEngineConfig>;
+  private modelInfo: LoadedModelInfo | null = null;
+  private _isGenerating = false;
+  private _lastPromptTokens = 0;
+  private _lastPredictedTokens = 0;
+  constructor(config?: InferenceEngineConfig) {
+    this.config = { ...DEFAULT_CONFIG, ...config };
+  }
+  get isLoaded(): boolean {
+    return this.context !== null;
+  }
+  get isGenerating(): boolean {
+    return this._isGenerating;
+  }
+  get gpu(): boolean {
+    return this.modelInfo?.gpu ?? false;
+  }
+  /**
+   * Load a GGUF model into memory.
+   * @param modelPath — absolute path to the .gguf file on device
+   * @param onProgress — loading progress callback (0-100)
+   * @returns load time in ms
+   */
+  async loadModel(
+    modelPath: string,
+    onProgress?: (percent: number) => void,
+  ): Promise<number> {
+    if (this.context) {
+      throw new Error('Model already loaded. Call unload() first.');
+    }
+    const start = Date.now();
+    this.context = await initLlama(
+      {
+        model: modelPath,
+        n_ctx: this.config.contextSize,
+        n_batch: this.config.batchSize,
+        n_threads: this.config.threads,
+        flash_attn_type: this.config.flashAttn,
+        use_mlock: this.config.useMlock,
+        n_gpu_layers: this.config.gpuLayers,
+      },
+      (progress: number) => {
+        onProgress?.(progress);
+      },
+    );
+    const loadTimeMs = Date.now() - start;
+    this.modelInfo = {
+      gpu: this.context.gpu,
+      reasonNoGPU: this.context.reasonNoGPU,
+      description: this.context.model?.desc ?? null,
+      nParams: this.context.model?.nParams ?? null,
+    };
+    return loadTimeMs;
+  }
+  /**
+   * Run inference with messages and optional tools.
+   * Returns the full completion result including any tool calls.
+   */
+  async generate(
+    messages: Message[],
+    options?: GenerateOptions,
+    onToken?: (event: TokenEvent) => void,
+  ): Promise<CompletionResult> {
+    if (!this.context) {
+      throw new Error('No model loaded. Call loadModel() first.');
+    }
+    if (this._isGenerating) {
+      throw new Error('Generation already in progress. Call stopGeneration() first.');
+    }
+    this._isGenerating = true;
+    try {
+      const llamaMessages = messages.map(msg => {
+        const m: Record<string, unknown> = {
+          role: msg.role,
+          content: msg.content ?? '',
+        };
+        // Only include fields with actual string values — undefined/null
+        // fields become JSON null and crash llama.cpp's Jinja parser
+        if (msg.tool_calls && msg.tool_calls.length > 0) {
+          m.tool_calls = msg.tool_calls.map(tc => ({
+            type: tc.type,
+            id: tc.id ?? 'call_0',
+            function: {
+              name: tc.function.name,
+              arguments: tc.function.arguments ?? '{}',
+            },
+          }));
+        }
+        if (typeof msg.tool_call_id === 'string') {
+          m.tool_call_id = msg.tool_call_id;
+        }
+        if (typeof msg.name === 'string') {
+          m.name = msg.name;
+        }
+        return m;
+      });
+      const completionParams: Record<string, unknown> = {
+        messages: llamaMessages,
+        n_predict: options?.maxTokens ?? DEFAULT_GENERATE.maxTokens,
+        temperature: options?.temperature ?? DEFAULT_GENERATE.temperature,
+        top_p: options?.topP ?? DEFAULT_GENERATE.topP,
+        top_k: options?.topK ?? DEFAULT_GENERATE.topK,
+        stop: options?.stop ?? ['<end_of_turn>', '<eos>'],
+      };
+      if (options?.tools && options.tools.length > 0) {
+        completionParams.tools = options.tools;
+        completionParams.tool_choice = options.toolChoice ?? 'auto';
+      }
+      const result: NativeCompletionResult = await this.context.completion(
+        completionParams as any,
+        (data: TokenData) => {
+          if (onToken && data.token) {
+            onToken({
+              token: data.token,
+              toolCalls: data.tool_calls as ToolCall[] | undefined,
+            });
+          }
+        },
+      );
+      const mapped = this.mapResult(result);
+      this._lastPromptTokens = mapped.timings.promptTokens;
+      this._lastPredictedTokens = mapped.timings.predictedTokens;
+      return mapped;
+    } finally {
+      this._isGenerating = false;
+    }
+  }
+  /**
+   * Stop an in-progress generation.
+   */
+  async stopGeneration(): Promise<void> {
+    if (this.context && this._isGenerating) {
+      await this.context.stopCompletion();
+      this._isGenerating = false;
+    }
+  }
+  /**
+   * Unload the model and free memory.
+   */
+  async unload(): Promise<void> {
+    if (this.context) {
+      await releaseAllLlama();
+      this.context = null;
+      this.modelInfo = null;
+      this._isGenerating = false;
+    }
+  }
+  /**
+   * Get info about the loaded model.
+   */
+  getInfo(): {
+    loaded: boolean;
+    gpu: boolean;
+    reasonNoGPU: string | null;
+    description: string | null;
+    nParams: number | null;
+  } {
+    return {
+      loaded: this.isLoaded,
+      gpu: this.modelInfo?.gpu ?? false,
+      reasonNoGPU: this.modelInfo?.reasonNoGPU ?? null,
+      description: this.modelInfo?.description ?? null,
+      nParams: this.modelInfo?.nParams ?? null,
+    };
+  }
+  /**
+   * Get current context window usage.
+   * Uses the last generation's prompt token count as an estimate of how
+   * much of the KV cache is filled. This is accurate when called right
+   * after generate() — the prompt tokens represent the full conversation
+   * history that was fed into the model.
+   */
+  getContextUsage(): ContextUsage {
+    const total = this.config.contextSize;
+    const used = this._lastPromptTokens + this._lastPredictedTokens;
+    const percent = total > 0 ? Math.round((used / total) * 100) : 0;
+    return { used, total, percent };
+  }
+  /**
+   * Run a benchmark.
+   * Returns prompt processing and token generation speeds.
+   */
+  async bench(
+    pp = 512,
+    tg = 128,
+    pl = 1,
+    nr = 3,
+  ): Promise<{ ppSpeed: number; tgSpeed: number; flashAttn: boolean } | null> {
+    if (!this.context) {
+      return null;
+    }
+    try {
+      const result = await this.context.bench(pp, tg, pl, nr);
+      return {
+        ppSpeed: result.speedPp ?? 0,
+        tgSpeed: result.speedTg ?? 0,
+        flashAttn: Boolean(result.flashAttn),
+      };
+    } catch {
+      return null;
+    }
+  }
+  private mapResult(result: NativeCompletionResult): CompletionResult {
+    const timings: CompletionTimings = {
+      promptTokens: result.timings.prompt_n,
+      promptMs: result.timings.prompt_ms,
+      promptPerSecond: result.timings.prompt_per_second,
+      predictedTokens: result.timings.predicted_n,
+      predictedMs: result.timings.predicted_ms,
+      predictedPerSecond: result.timings.predicted_per_second,
+    };
+    const toolCalls: ToolCall[] = (result.tool_calls ?? []).map((tc, i) => ({
+      type: 'function' as const,
+      id: tc.id ?? `call_${i}`,
+      function: {
+        name: tc.function.name,
+        arguments: tc.function.arguments ?? '{}',
+      },
+    }));
+    return {
+      text: result.text,
+      content: result.content ?? result.text,
+      reasoning: result.reasoning_content || null,
+      toolCalls,
+      timings,
+      stoppedEos: result.stopped_eos,
+      stoppedLimit: result.stopped_limit > 0,
+      contextFull: result.context_full,
+    };
+  }
+}

package/src/ModelManager.ts ADDED Viewed

@@ -0,0 +1,244 @@
+import RNFS from 'react-native-fs';
+import type { ModelStatus, ModelConfig, DownloadProgress, ModelInfo } from './types';
+const DEFAULT_HF_BASE = 'https://huggingface.co';
+const DOWNLOAD_CHUNK_TIMEOUT = 60_000;
+type StatusListener = (status: ModelStatus) => void;
+type ProgressListener = (progress: DownloadProgress) => void;
+export class ModelManager {
+  private _status: ModelStatus = 'not_downloaded';
+  private _modelPath: string | null = null;
+  private _config: ModelConfig;
+  private _downloadJobId: number | null = null;
+  private _statusListeners: Set<StatusListener> = new Set();
+  private _sizeBytes: number | null = null;
+  constructor(config: ModelConfig) {
+    this._config = config;
+  }
+  get status(): ModelStatus {
+    return this._status;
+  }
+  get modelPath(): string | null {
+    return this._modelPath;
+  }
+  onStatusChange(listener: StatusListener): () => void {
+    this._statusListeners.add(listener);
+    return () => this._statusListeners.delete(listener);
+  }
+  private setStatus(status: ModelStatus): void {
+    this._status = status;
+    for (const listener of this._statusListeners) {
+      listener(status);
+    }
+  }
+  /**
+   * Check if the model file exists at common locations.
+   * Returns the path if found, null otherwise.
+   */
+  async findModel(): Promise<string | null> {
+    const candidates = [
+      `${RNFS.DocumentDirectoryPath}/${this._config.filename}`,
+      `/data/local/tmp/${this._config.filename}`,
+      `${RNFS.CachesDirectoryPath}/${this._config.filename}`,
+    ];
+    for (const path of candidates) {
+      if (await RNFS.exists(path)) {
+        const stat = await RNFS.stat(path);
+        this._sizeBytes = Number(stat.size);
+        this._modelPath = path;
+        this.setStatus('ready');
+        return path;
+      }
+    }
+    return null;
+  }
+  /**
+   * Check if model exists and update status accordingly.
+   */
+  async checkModel(): Promise<boolean> {
+    const path = await this.findModel();
+    if (path) {
+      return true;
+    }
+    this.setStatus('not_downloaded');
+    return false;
+  }
+  /**
+   * Download model from HuggingFace with progress.
+   * Supports resume via HTTP Range headers.
+   */
+  async download(onProgress?: ProgressListener): Promise<string> {
+    if (this._status === 'downloading') {
+      throw new Error('Download already in progress');
+    }
+    // Check if already downloaded
+    const existing = await this.findModel();
+    if (existing) {
+      onProgress?.({ bytesDownloaded: this._sizeBytes!, totalBytes: this._sizeBytes!, percent: 100 });
+      return existing;
+    }
+    this.setStatus('downloading');
+    const destPath = `${RNFS.DocumentDirectoryPath}/${this._config.filename}`;
+    const url = this.buildDownloadUrl();
+    // Check for partial download (resume support)
+    let existingBytes = 0;
+    const partialPath = `${destPath}.partial`;
+    if (await RNFS.exists(partialPath)) {
+      const stat = await RNFS.stat(partialPath);
+      existingBytes = Number(stat.size);
+    }
+    try {
+      const headers: Record<string, string> = {};
+      if (existingBytes > 0) {
+        headers['Range'] = `bytes=${existingBytes}-`;
+      }
+      const totalBytes = this._config.expectedSize ?? 0;
+      const result = RNFS.downloadFile({
+        fromUrl: url,
+        toFile: partialPath,
+        headers,
+        begin: (res) => {
+          const contentLength = res.contentLength;
+          if (contentLength > 0 && totalBytes === 0) {
+            // Update expected size from Content-Length
+          }
+        },
+        progress: (res) => {
+          const downloaded = existingBytes + res.bytesWritten;
+          const total = totalBytes || (existingBytes + res.contentLength);
+          const percent = total > 0 ? Math.round((downloaded / total) * 100) : 0;
+          onProgress?.({ bytesDownloaded: downloaded, totalBytes: total, percent });
+        },
+        progressInterval: 500,
+        progressDivider: 0,
+        readTimeout: DOWNLOAD_CHUNK_TIMEOUT,
+        connectionTimeout: 15_000,
+      });
+      this._downloadJobId = result.jobId;
+      const response = await result.promise;
+      if (response.statusCode >= 200 && response.statusCode < 300) {
+        // Move partial to final destination
+        if (await RNFS.exists(destPath)) {
+          await RNFS.unlink(destPath);
+        }
+        await RNFS.moveFile(partialPath, destPath);
+        this._modelPath = destPath;
+        const stat = await RNFS.stat(destPath);
+        this._sizeBytes = Number(stat.size);
+        this._downloadJobId = null;
+        this.setStatus('ready');
+        onProgress?.({ bytesDownloaded: this._sizeBytes, totalBytes: this._sizeBytes, percent: 100 });
+        return destPath;
+      }
+      throw new Error(`Download failed with status ${response.statusCode}`);
+    } catch (err) {
+      this._downloadJobId = null;
+      // Don't delete partial file — allows resume on retry
+      if (this._status !== 'ready') {
+        this.setStatus('error');
+      }
+      throw err;
+    }
+  }
+  /**
+   * Cancel an in-progress download.
+   */
+  cancelDownload(): void {
+    if (this._downloadJobId !== null) {
+      RNFS.stopDownload(this._downloadJobId);
+      this._downloadJobId = null;
+      this.setStatus('not_downloaded');
+    }
+  }
+  /**
+   * Set a custom model path (for pre-downloaded models).
+   */
+  async setModelPath(path: string): Promise<void> {
+    if (!(await RNFS.exists(path))) {
+      throw new Error(`Model file not found at ${path}`);
+    }
+    const stat = await RNFS.stat(path);
+    this._sizeBytes = Number(stat.size);
+    this._modelPath = path;
+    this.setStatus('ready');
+  }
+  /**
+   * Delete the downloaded model file.
+   */
+  async deleteModel(): Promise<void> {
+    if (this._modelPath && (await RNFS.exists(this._modelPath))) {
+      await RNFS.unlink(this._modelPath);
+    }
+    // Also clean up partial downloads
+    const partialPath = `${RNFS.DocumentDirectoryPath}/${this._config.filename}.partial`;
+    if (await RNFS.exists(partialPath)) {
+      await RNFS.unlink(partialPath);
+    }
+    this._modelPath = null;
+    this._sizeBytes = null;
+    this.setStatus('not_downloaded');
+  }
+  /**
+   * Get model info.
+   */
+  getInfo(): ModelInfo {
+    return {
+      status: this._status,
+      path: this._modelPath,
+      sizeBytes: this._sizeBytes,
+      description: null,
+      nParams: null,
+      nEmbd: null,
+    };
+  }
+  /**
+   * Check available storage space.
+   */
+  async checkStorage(): Promise<{ available: number; required: number; sufficient: boolean }> {
+    const fsInfo = await RNFS.getFSInfo();
+    const required = this._config.expectedSize ?? 0;
+    return {
+      available: fsInfo.freeSpace,
+      required,
+      sufficient: required === 0 || fsInfo.freeSpace > required * 1.1, // 10% buffer
+    };
+  }
+  private buildDownloadUrl(): string {
+    const { repoId, filename } = this._config;
+    return `${DEFAULT_HF_BASE}/${repoId}/resolve/main/${filename}`;
+  }
+}

package/src/SkillRegistry.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import type { SkillManifest, ToolDefinition } from './types';
+export class SkillRegistry {
+  private skills: Map<string, SkillManifest> = new Map();
+  /**
+   * Register a skill. Validates that required fields are present.
+   */
+  registerSkill(skill: SkillManifest): void {
+    if (skill.type === 'js' && !skill.html) {
+      throw new Error(`JS skill "${skill.name}" requires an html field`);
+    }
+    if (skill.type === 'native' && !skill.execute) {
+      throw new Error(`Native skill "${skill.name}" requires an execute function`);
+    }
+    this.skills.set(skill.name, skill);
+  }
+  unregisterSkill(name: string): void {
+    this.skills.delete(name);
+  }
+  getSkill(name: string): SkillManifest | null {
+    return this.skills.get(name) ?? null;
+  }
+  getSkills(): SkillManifest[] {
+    return Array.from(this.skills.values());
+  }
+  hasSkill(name: string): boolean {
+    return this.skills.has(name);
+  }
+  /**
+   * Convert registered skills to OpenAI-compatible tool definitions.
+   * Pass these to InferenceEngine.generate() as the `tools` parameter —
+   * llama.rn handles the rest via Jinja templates + Gemma 4 chat format.
+   */
+  toToolDefinitions(): ToolDefinition[] {
+    return this.getSkills().map(skill => ({
+      type: 'function' as const,
+      function: {
+        name: skill.name,
+        description:
+          skill.description +
+          (skill.instructions ? `\n${skill.instructions}` : ''),
+        parameters: {
+          type: 'object' as const,
+          properties: skill.parameters,
+          required: skill.requiredParameters,
+        },
+      },
+    }));
+  }
+  clear(): void {
+    this.skills.clear();
+  }
+}