npm - react-native-litert-lm - Versions diffs - 0.3.7 → 0.4.1 - Mend

react-native-litert-lm 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/README.md +153 -135
package/android/build.gradle +12 -0
package/android/src/main/AndroidManifest.xml +8 -0
package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +276 -62
package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +105 -0
package/ios/HybridLiteRTLM.swift +1344 -0
package/ios/Tests/HybridLiteRTLMTests.swift +113 -0
package/lib/__mocks__/react-native-nitro-modules.d.ts +65 -0
package/lib/__mocks__/react-native-nitro-modules.js +60 -0
package/lib/__tests__/hooks.test.d.ts +1 -0
package/lib/__tests__/hooks.test.js +124 -0
package/lib/__tests__/memoryTracker.test.d.ts +1 -0
package/lib/__tests__/memoryTracker.test.js +74 -0
package/lib/__tests__/modelFactory.test.d.ts +1 -0
package/lib/__tests__/modelFactory.test.js +68 -0
package/lib/hooks.js +27 -3
package/lib/index.d.ts +6 -2
package/lib/index.js +8 -8
package/lib/modelFactory.js +82 -63
package/lib/specs/LiteRTLM.nitro.d.ts +87 -2
package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -2
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +94 -9
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +5 -1
package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +28 -2
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +240 -0
package/nitrogen/generated/ios/swift/Backend.swift +44 -0
package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +71 -0
package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +431 -0
package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
package/nitrogen/generated/ios/swift/Message.swift +34 -0
package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
package/nitrogen/generated/ios/swift/PartType.swift +44 -0
package/nitrogen/generated/ios/swift/Role.swift +44 -0
package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +4 -0
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +9 -2
package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
package/package.json +22 -11
package/react-native-litert-lm.podspec +17 -19
package/scripts/download-ios-frameworks.sh +17 -50
package/scripts/framework-source.js +46 -0
package/scripts/postinstall.js +40 -18
package/src/__mocks__/react-native-nitro-modules.ts +58 -0
package/src/__tests__/hooks.test.ts +153 -0
package/src/__tests__/memoryTracker.test.ts +87 -0
package/src/__tests__/modelFactory.test.ts +96 -0
package/src/hooks.ts +29 -7
package/src/index.ts +7 -10
package/src/modelFactory.ts +104 -80
package/src/specs/LiteRTLM.nitro.ts +106 -2
package/cpp/HybridLiteRTLM.cpp +0 -939
package/cpp/HybridLiteRTLM.hpp +0 -169
package/cpp/IOSDownloadHelper.h +0 -24
package/ios/IOSDownloadHelper.mm +0 -129
package/scripts/build-ios-engine.sh +0 -302
package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
package/scripts/stubs/llguidance_stubs.c +0 -101
package/src/templates.ts +0 -105

package/src/__tests__/memoryTracker.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { createMemoryTracker, createNativeBuffer } from '../memoryTracker';
+import { NitroModules } from 'react-native-nitro-modules';
+describe('MemoryTracker Unit Tests', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+  it('should allocate correct native-backed ArrayBuffer size on initialization', () => {
+    const tracker = createMemoryTracker(10);
+    expect(NitroModules.createNativeArrayBuffer).toHaveBeenCalledWith(10 * 4 * 8); // 10 snapshots * 4 fields * 8 bytes/Float64
+    expect(tracker.getCapacity()).toBe(10);
+    expect(tracker.getSnapshotCount()).toBe(0);
+  });
+  it('should record snapshots correctly and retrieve them', () => {
+    const tracker = createMemoryTracker(5);
+    const snapshot1 = {
+      timestamp: 1000,
+      nativeHeapBytes: 100,
+      residentBytes: 200,
+      availableMemoryBytes: 500,
+    };
+    expect(tracker.record(snapshot1)).toBe(true);
+    expect(tracker.getSnapshotCount()).toBe(1);
+    expect(tracker.getLatestSnapshot()).toEqual(snapshot1);
+    const snapshots = tracker.getSnapshots();
+    expect(snapshots).toHaveLength(1);
+    expect(snapshots[0]).toEqual(snapshot1);
+  });
+  it('should reject new snapshots and return false when capacity is reached', () => {
+    const tracker = createMemoryTracker(2);
+    expect(tracker.record({ timestamp: 1, nativeHeapBytes: 10, residentBytes: 20, availableMemoryBytes: 50 })).toBe(true);
+    expect(tracker.record({ timestamp: 2, nativeHeapBytes: 20, residentBytes: 30, availableMemoryBytes: 40 })).toBe(true);
+    expect(tracker.record({ timestamp: 3, nativeHeapBytes: 30, residentBytes: 40, availableMemoryBytes: 30 })).toBe(false);
+    expect(tracker.getSnapshotCount()).toBe(2);
+  });
+  it('should calculate correct peak resident memory size', () => {
+    const tracker = createMemoryTracker(5);
+    tracker.record({ timestamp: 1, nativeHeapBytes: 100, residentBytes: 150, availableMemoryBytes: 1000 });
+    tracker.record({ timestamp: 2, nativeHeapBytes: 120, residentBytes: 300, availableMemoryBytes: 1000 });
+    tracker.record({ timestamp: 3, nativeHeapBytes: 110, residentBytes: 200, availableMemoryBytes: 1000 });
+    expect(tracker.getPeakMemory()).toBe(300);
+  });
+  it('should calculate accurate memory summary statistics', () => {
+    const tracker = createMemoryTracker(5);
+    tracker.record({ timestamp: 1, nativeHeapBytes: 50, residentBytes: 100, availableMemoryBytes: 1000 });
+    tracker.record({ timestamp: 2, nativeHeapBytes: 150, residentBytes: 300, availableMemoryBytes: 800 });
+    tracker.record({ timestamp: 3, nativeHeapBytes: 100, residentBytes: 200, availableMemoryBytes: 900 });
+    const summary = tracker.getSummary();
+    expect(summary.snapshotCount).toBe(3);
+    expect(summary.peakResidentBytes).toBe(300);
+    expect(summary.averageResidentBytes).toBe(200); // (100 + 300 + 200) / 3
+    expect(summary.currentResidentBytes).toBe(200);
+    expect(summary.peakNativeHeapBytes).toBe(150);
+    expect(summary.currentNativeHeapBytes).toBe(100);
+    expect(summary.residentDeltaBytes).toBe(100); // currentRss(200) - firstRss(100)
+    expect(summary.trackerBufferSizeBytes).toBe(5 * 4 * 8);
+  });
+  it('should preserve buffer but reset internal state when reset() is called', () => {
+    const tracker = createMemoryTracker(5);
+    tracker.record({ timestamp: 1, nativeHeapBytes: 50, residentBytes: 100, availableMemoryBytes: 1000 });
+    expect(tracker.getSnapshotCount()).toBe(1);
+    tracker.reset();
+    expect(tracker.getSnapshotCount()).toBe(0);
+    expect(tracker.getLatestSnapshot()).toBeUndefined();
+    expect(tracker.getSnapshots()).toEqual([]);
+  });
+  it('should allow standalone native ArrayBuffer allocation via createNativeBuffer', () => {
+    const size = 128;
+    const buffer = createNativeBuffer(size);
+    expect(NitroModules.createNativeArrayBuffer).toHaveBeenCalledWith(size);
+    expect(buffer.byteLength).toBe(size);
+  });
+});

package/src/__tests__/modelFactory.test.ts ADDED Viewed

@@ -0,0 +1,96 @@
+import { createLLM } from '../modelFactory';
+import { mockLiteRTLM } from '../__mocks__/react-native-nitro-modules';
+describe('modelFactory Security & Proxy Unit Tests', () => {
+  let llm: ReturnType<typeof createLLM>;
+  beforeEach(() => {
+    jest.clearAllMocks();
+    llm = createLLM({ enableMemoryTracking: true });
+  });
+  it('should block insecure HTTP downloads', async () => {
+    await expect(llm.loadModel('http://example.com/model.litertlm'))
+      .rejects.toThrow('Insecure HTTP URLs are not allowed for model downloads');
+  });
+  it('should allow secure HTTPS downloads and strip query parameters', async () => {
+    await llm.loadModel('https://example.com/model.litertlm?token=123');
+    expect(mockLiteRTLM.downloadModel).toHaveBeenCalledWith(
+      'https://example.com/model.litertlm?token=123',
+      'model.litertlm',
+      expect.any(Function)
+    );
+    expect(mockLiteRTLM.loadModel).toHaveBeenCalledWith('/mock/path/model.litertlm', undefined);
+  });
+  it('should throw an error for invalid model URL', async () => {
+    await expect(llm.loadModel('https://example.com/'))
+      .rejects.toThrow('Invalid model URL: https://example.com/');
+  });
+  it('should successfully proxy sendMessage and record memory metrics', async () => {
+    const response = await llm.sendMessage("Test prompt");
+    expect(response).toBe("Mock response");
+    expect(mockLiteRTLM.sendMessage).toHaveBeenCalledWith("Test prompt");
+    expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
+    expect(llm.memoryTracker?.getSnapshotCount()).toBe(1); // sendMessage records one
+  });
+  it('should successfully proxy resetConversation and record memory metrics', async () => {
+    await llm.resetConversation();
+    expect(mockLiteRTLM.resetConversation).toHaveBeenCalled();
+    expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
+  });
+  it('should successfully proxy sendMessageAsync and record memory metrics when done', async () => {
+    const onToken = jest.fn();
+    await llm.sendMessageAsync("Async prompt", onToken);
+    expect(onToken).toHaveBeenCalledWith("Mock ", false);
+    expect(onToken).toHaveBeenCalledWith("token", true);
+    expect(mockLiteRTLM.sendMessageAsync).toHaveBeenCalled();
+    expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
+  });
+  it('should successfully proxy sendMessageWithImageAsync and record memory metrics when done', async () => {
+    const onToken = jest.fn();
+    await llm.sendMessageWithImageAsync("Vision prompt", "/path/to/image.jpg", onToken);
+    expect(onToken).toHaveBeenCalledWith("Mock vision ", false);
+    expect(onToken).toHaveBeenCalledWith("token", true);
+    expect(mockLiteRTLM.sendMessageWithImageAsync).toHaveBeenCalledWith(
+      "Vision prompt",
+      "/path/to/image.jpg",
+      expect.any(Function)
+    );
+    expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
+  });
+  it('should successfully proxy sendMessageWithAudioAsync and record memory metrics when done', async () => {
+    const onToken = jest.fn();
+    await llm.sendMessageWithAudioAsync("Audio prompt", "/path/to/audio.wav", onToken);
+    expect(onToken).toHaveBeenCalledWith("Mock audio ", false);
+    expect(onToken).toHaveBeenCalledWith("token", true);
+    expect(mockLiteRTLM.sendMessageWithAudioAsync).toHaveBeenCalledWith(
+      "Audio prompt",
+      "/path/to/audio.wav",
+      expect.any(Function)
+    );
+    expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
+  });
+  it('should successfully access memoryTracker and getSnapshots when memory tracking is enabled', () => {
+    expect(llm.memoryTracker).toBeDefined();
+    expect(llm.memoryTracker?.getCapacity()).toBe(256);
+  });
+  it('should not initialize memoryTracker when enableMemoryTracking option is false', () => {
+    const untrackedLLM = createLLM({ enableMemoryTracking: false });
+    expect(untrackedLLM.memoryTracker).toBeUndefined();
+  });
+});

package/src/hooks.ts CHANGED Viewed

@@ -64,7 +64,8 @@ export function useModel(
   const [isGenerating, setIsGenerating] = useState(false);
   const [downloadProgress, setDownloadProgress] = useState(0);
   const [error, setError] = useState<string | null>(null);
-  const [memorySummary, setMemorySummary] = useState<MemoryTrackerSummary | null>(null);
+  const [memorySummary, setMemorySummary] =
+    useState<MemoryTrackerSummary | null>(null);
   // Destructure config into primitive values for stable dependency arrays.
   // This prevents infinite re-render loops when consumers pass inline config
@@ -78,6 +79,11 @@ export function useModel(
   const temperature = config?.temperature;
   const topK = config?.topK;
   const topP = config?.topP;
+  const validate = config?.validate;
+  const multimodal = config?.multimodal;
+  const tools = config?.tools;
+  const enableSpeculativeDecoding = config?.enableSpeculativeDecoding;
+  const toolsKey = tools ? JSON.stringify(tools) : undefined;
   // Build a stable config object from the destructured primitives
   const nativeConfig = useMemo<LLMConfig>(
@@ -88,8 +94,25 @@ export function useModel(
       ...(temperature !== undefined && { temperature }),
       ...(topK !== undefined && { topK }),
       ...(topP !== undefined && { topP }),
+      ...(validate !== undefined && { validate }),
+      ...(multimodal !== undefined && { multimodal }),
+      ...(tools !== undefined && { tools }),
+      ...(enableSpeculativeDecoding !== undefined && {
+        enableSpeculativeDecoding,
+      }),
     }),
-    [backend, systemPrompt, maxTokens, temperature, topK, topP],
+    [
+      backend,
+      systemPrompt,
+      maxTokens,
+      temperature,
+      topK,
+      topP,
+      validate,
+      multimodal,
+      toolsKey,
+      enableSpeculativeDecoding,
+    ],
   );
   /**
@@ -165,16 +188,15 @@ export function useModel(
         return new Promise<string>((resolve, reject) => {
           let fullResponse = "";
           try {
-            modelRef.current?.sendMessageAsync(
-              prompt,
-              (token: string, done: boolean) => {
+            modelRef.current
+              ?.sendMessageAsync(prompt, (token: string, done: boolean) => {
                 fullResponse += token;
                 if (done) {
                   refreshMemorySummary();
                   resolve(fullResponse);
                 }
-              },
-            );
+              })
+              .catch(reject);
           } catch (e: any) {
             reject(e);
           }

package/src/index.ts CHANGED Viewed

@@ -20,13 +20,6 @@ export type {
   MemoryUsage,
 } from "./specs/LiteRTLM.nitro";
-// Re-export template utilities
-export type { ChatMessage } from "./templates";
-export {
-  applyGemmaTemplate,
-  applyPhiTemplate,
-  applyLlamaTemplate,
-} from "./templates";
 // Re-export memory tracking utilities (uses NitroModules.createNativeArrayBuffer v0.35+)
 export type {
@@ -158,6 +151,12 @@ export function checkBackendSupport(backend: Backend): string | undefined {
  * Check if multimodal features (image/audio) are supported on the current platform.
  * Returns an error message if not supported, undefined if OK.
  *
+ * Both iOS (v0.12.0 CLiteRTLM xcframework) and Android (LiteRT-LM SDK) ship the
+ * vision/audio executor ops, so there is no platform-level block. Whether a
+ * given call succeeds depends on the **loaded model**: only multimodal models
+ * (e.g. Gemma 3n) bundle the vision/audio executors. Pass `multimodal: true` to
+ * `loadModel` for such models, or rely on filename sniffing ("3n"/"gemma3").
+ *
  * @returns Error message if multimodal is not supported, undefined if OK
  *
  * @example
@@ -172,9 +171,7 @@ export function checkBackendSupport(backend: Backend): string | undefined {
  * ```
  */
 export function checkMultimodalSupport(): string | undefined {
-  if (Platform.OS === "ios") {
-    return "Multimodal (image/audio) is not available on iOS. The XCFramework lacks compiled vision and audio executor ops.";
-  }
+  // Supported on both platforms with a multimodal model loaded.
   return undefined;
 }

package/src/modelFactory.ts CHANGED Viewed

@@ -54,92 +54,116 @@ export function createLLM(options?: {
     }
   };
-  return {
-    ...native,
-    memoryTracker: tracker,
-    loadModel: async (
-      pathOrUrl: string,
-      config?: LLMConfig,
-      onDownloadProgress?: (progress: number) => void,
-    ) => {
-      let modelPath = pathOrUrl;
-      // Check if it's a URL — enforce HTTPS for model downloads
-      if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
-        if (pathOrUrl.startsWith("http://")) {
-          throw new Error(
-            "Insecure HTTP URLs are not allowed for model downloads. " +
-              "Use HTTPS instead: " +
-              pathOrUrl.replace("http://", "https://"),
-          );
-        }
-        // Extract filename from URL
-        const fileName = pathOrUrl.split("/").pop();
-        if (!fileName) {
-          throw new Error(`Invalid model URL: ${pathOrUrl}`);
-        }
-        console.log(`Checking model at ${pathOrUrl}...`);
-        modelPath = await native.downloadModel(
-          pathOrUrl,
-          fileName,
-          (progress) => {
-            onDownloadProgress?.(progress);
-          },
+  const augmentedLoadModel = async (
+    pathOrUrl: string,
+    config?: LLMConfig,
+    onDownloadProgress?: (progress: number) => void,
+  ) => {
+    let modelPath = pathOrUrl;
+    // Check if it's a URL — enforce HTTPS for model downloads
+    if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
+      if (pathOrUrl.startsWith("http://")) {
+        throw new Error(
+          "Insecure HTTP URLs are not allowed for model downloads. " +
+            "Use HTTPS instead: " +
+            pathOrUrl.replace("http://", "https://"),
         );
-        console.log(`Model downloaded to: ${modelPath}`);
       }
-      const result = await native.loadModel(modelPath, config);
-      // Record initial memory snapshot after model load
-      if (tracker) {
-        tracker.reset();
-        recordMemorySnapshot();
+      // Extract filename from URL, stripping query parameters
+      const urlWithoutQuery = pathOrUrl.split("?")[0];
+      const fileName = urlWithoutQuery.split("/").pop();
+      if (!fileName) {
+        throw new Error(`Invalid model URL: ${pathOrUrl}`);
       }
-      return result;
-    },
-    sendMessage: async (...args: Parameters<typeof native.sendMessage>) => {
-      const result = await native.sendMessage(...args);
+      console.log(`Checking model at ${pathOrUrl}...`);
+      modelPath = await native.downloadModel(
+        pathOrUrl,
+        fileName,
+        (progress) => {
+          onDownloadProgress?.(progress);
+        },
+      );
+      console.log(`Model downloaded to: ${modelPath}`);
+    }
+    const result = await native.loadModel(modelPath, config);
+    // Record initial memory snapshot after model load
+    if (tracker) {
+      tracker.reset();
       recordMemorySnapshot();
-      return result;
-    },
-    sendMessageAsync: (...args: Parameters<typeof native.sendMessageAsync>) => {
-      const [message, onToken] = args;
-      native.sendMessageAsync(message, (token, done) => {
-        onToken(token, done);
-        if (done) {
+    }
+    return result;
+  };
+  const SNAPSHOT_TRIGGERS = new Set([
+    "sendMessage",
+    "sendMessageWithImage",
+    "sendMessageWithAudio",
+    "resetConversation",
+  ]);
+  return new Proxy(native, {
+    get(target, prop, receiver) {
+      if (prop === "memoryTracker") {
+        return tracker;
+      }
+      if (prop === "loadModel") {
+        return augmentedLoadModel;
+      }
+      const original = Reflect.get(target, prop, receiver);
+      if (typeof original !== "function") {
+        return original;
+      }
+      if (prop === "sendMessageAsync") {
+        return (message: string, onToken: (token: string, done: boolean) => void) => {
+          return original.call(target, message, (token: string, done: boolean) => {
+            onToken(token, done);
+            if (done) {
+              recordMemorySnapshot();
+            }
+          });
+        };
+      }
+      if (prop === "sendMessageWithImageAsync") {
+        return (message: string, imagePath: string, onToken: (token: string, done: boolean) => void) => {
+          return original.call(target, message, imagePath, (token: string, done: boolean) => {
+            onToken(token, done);
+            if (done) {
+              recordMemorySnapshot();
+            }
+          });
+        };
+      }
+      if (prop === "sendMessageWithAudioAsync") {
+        return (message: string, audioPath: string, onToken: (token: string, done: boolean) => void) => {
+          return original.call(target, message, audioPath, (token: string, done: boolean) => {
+            onToken(token, done);
+            if (done) {
+              recordMemorySnapshot();
+            }
+          });
+        };
+      }
+      if (SNAPSHOT_TRIGGERS.has(prop as string)) {
+        return async (...args: any[]) => {
+          const result = await original.apply(target, args);
           recordMemorySnapshot();
-        }
-      });
-    },
-    sendMessageWithImage: async (
-      ...args: Parameters<typeof native.sendMessageWithImage>
-    ) => {
-      const result = await native.sendMessageWithImage(...args);
-      recordMemorySnapshot();
-      return result;
-    },
-    sendMessageWithAudio: async (
-      ...args: Parameters<typeof native.sendMessageWithAudio>
-    ) => {
-      const result = await native.sendMessageWithAudio(...args);
-      recordMemorySnapshot();
-      return result;
-    },
-    getHistory: native.getHistory.bind(native),
-    resetConversation: () => {
-      native.resetConversation();
-      // KV cache is cleared on reset, record the drop
-      recordMemorySnapshot();
+          return result;
+        };
+      }
+      return original.bind(target);
     },
-    isReady: native.isReady.bind(native),
-    getStats: native.getStats.bind(native),
-    getMemoryUsage: native.getMemoryUsage.bind(native),
-    close: native.close.bind(native),
-    downloadModel: native.downloadModel.bind(native),
-    deleteModel: native.deleteModel.bind(native),
-  };
+  }) as unknown as LiteRTLMInstance;
 }

package/src/specs/LiteRTLM.nitro.ts CHANGED Viewed

@@ -17,6 +17,37 @@ export type Backend = "cpu" | "gpu" | "npu";
  */
 export type Role = "user" | "model" | "system";
+/**
+ * Definition for a function/tool that the model can request to execute.
+ */
+export interface ToolDefinition {
+  /** Name of the function/tool */
+  name: string;
+  /** Human-readable description of what the function/tool does */
+  description: string;
+  /** JSON schema defining parameter names and types (stringified) */
+  parametersJson: string;
+}
+/**
+ * The part type for a multimodal message content part.
+ */
+export type PartType = "text" | "image" | "audio";
+/**
+ * A part of a unified multimodal message payload.
+ */
+export interface MultimodalPart {
+  /** The part type: 'text', 'image', or 'audio' */
+  type: PartType;
+  /** The plain text content, if type is 'text' */
+  text?: string;
+  /** Raw image binary data, if type is 'image' (zero-copy ArrayBuffer mapping) */
+  imageBuffer?: ArrayBuffer;
+  /** Raw audio binary data, if type is 'audio' (zero-copy ArrayBuffer mapping) */
+  audioBuffer?: ArrayBuffer;
+}
 /**
  * Configuration options for loading an LLM.
  */
@@ -68,6 +99,41 @@ export interface LLMConfig {
    * @default 0.95
    */
   topP?: number;
+  /**
+   * Whether to run engine validation after loading the model.
+   * When enabled, sends a quick test inference ("Hi") and waits up to 30s
+   * for a response to confirm the backend works. This is useful for GPU/NPU
+   * backends that may silently fail during inference (they can initialize
+   * without error but produce no tokens).
+   *
+   * Validation is **always a no-op on CPU** — the CPU backend is inherently
+   * reliable and never needs validation.
+   *
+   * Disabled by default because it adds significant latency (5-30s) to model loading.
+   * Enable only to catch GPU/NPU silent failure issues during development.
+   *
+   * @default false
+   */
+  validate?: boolean;
+  /**
+   * Whether this is a multimodal model.
+   * When enabled, the engine handles image/audio tokens properly.
+   * If not specified, the system will fall back to filename sniffing.
+   */
+  multimodal?: boolean;
+  /**
+   * List of tools/functions that the model can call.
+   */
+  tools?: ToolDefinition[];
+  /**
+   * Whether to enable speculative decoding (multi-token prediction) if supported by the model.
+   * @default false
+   */
+  enableSpeculativeDecoding?: boolean;
 }
 /**
@@ -135,7 +201,7 @@ export interface MemoryUsage {
  * ```
  */
 export interface LiteRTLM extends HybridObject<{
-  ios: "c++";
+  ios: "swift";
   android: "kotlin";
 }> {
   /**
@@ -160,6 +226,19 @@ export interface LiteRTLM extends HybridObject<{
    */
   sendMessageWithImage(message: string, imagePath: string): Promise<string>;
+  /**
+   * Send a text message with an image and get a streaming response.
+   * Tokens are delivered via callback as they are generated.
+   * @param message User message text.
+   * @param imagePath Absolute path to an image file.
+   * @param onToken Callback invoked for each token (token, isDone).
+   */
+  sendMessageWithImageAsync(
+    message: string,
+    imagePath: string,
+    onToken: (token: string, done: boolean) => void,
+  ): Promise<void>;
   /**
    * Download a model file from a URL.
    * @param url URL to download from.
@@ -187,6 +266,26 @@ export interface LiteRTLM extends HybridObject<{
    */
   sendMessageWithAudio(message: string, audioPath: string): Promise<string>;
+  /**
+   * Send a text message with audio and get a streaming response.
+   * Tokens are delivered via callback as they are generated.
+   * @param message User message text.
+   * @param audioPath Absolute path to an audio file (WAV).
+   * @param onToken Callback invoked for each token (token, isDone).
+   */
+  sendMessageWithAudioAsync(
+    message: string,
+    audioPath: string,
+    onToken: (token: string, done: boolean) => void,
+  ): Promise<void>;
+  /**
+   * Send a unified multimodal message containing text and/or zero-copy binary buffers.
+   * @param parts The message content parts (text, image, and/or audio).
+   * @returns The model's response text.
+   */
+  sendMultimodalMessage(parts: MultimodalPart[]): Promise<string>;
   /**
    * Send a message with streaming response.
    * Tokens are delivered via callback as they are generated.
@@ -196,7 +295,7 @@ export interface LiteRTLM extends HybridObject<{
   sendMessageAsync(
     message: string,
     onToken: (token: string, done: boolean) => void,
-  ): void;
+  ): Promise<void>;
   /**
    * Get the current conversation history.
@@ -219,6 +318,11 @@ export interface LiteRTLM extends HybridObject<{
    */
   getStats(): GenerationStats;
+  /**
+   * Count tokens in a text string. Returns -1 if unavailable.
+   */
+  countTokens(text: string): number;
   /**
    * Get real memory usage from the native runtime.
    * Uses OS-level APIs to report actual memory consumption.