npm - react-native-litert-lm - Versions diffs - 0.2.0 → 0.2.2 - Mend

react-native-litert-lm 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +245 -29
package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +301 -58
package/cpp/HybridLiteRTLM.cpp +109 -9
package/cpp/HybridLiteRTLM.hpp +16 -0
package/cpp/cpp-adapter.cpp +10 -2
package/lib/hooks.d.ts +41 -0
package/lib/hooks.js +131 -0
package/lib/index.d.ts +30 -3
package/lib/index.js +53 -6
package/lib/memoryTracker.d.ts +128 -0
package/lib/memoryTracker.js +155 -0
package/lib/modelFactory.d.ts +18 -0
package/lib/modelFactory.js +104 -0
package/lib/specs/LiteRTLM.nitro.d.ts +38 -0
package/lib/templates.d.ts +51 -0
package/lib/templates.js +81 -0
package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +22 -17
package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +42 -1
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -0
package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +17 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +3 -0
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +6 -0
package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
package/package.json +3 -3
package/src/hooks.ts +195 -0
package/src/index.ts +51 -3
package/src/memoryTracker.ts +268 -0
package/src/modelFactory.ts +120 -0
package/src/specs/LiteRTLM.nitro.ts +47 -0
package/src/templates.ts +105 -0

package/README.md CHANGED Viewed

@@ -12,6 +12,8 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
 - 📱 **Cross-Platform** - Android API 26+
 - 🖼️ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
 - 🧵 **Async API** - Non-blocking inference to prevent UI freezes
+- 📊 **Real Memory Tracking** - OS-level memory metrics (RSS, native heap, available memory) via native APIs
+- 🧮 **Zero-Copy Buffers** - Memory snapshots stored in native ArrayBuffers via `NitroModules.createNativeArrayBuffer()` (v0.34+)
 ## Status
@@ -54,13 +56,44 @@ cd android && ./gradlew clean
 cd ios && pod install  # iOS coming soon
 ```
+## Example App
+The repository includes a fully functional example app in the `example/` directory with a dark-themed diagnostic UI that demonstrates model loading, inference, memory tracking, and performance stats.
+To run it:
+1.  **Build the library** (compiles TypeScript to `lib/`):
+    ```bash
+    npm run build
+    ```
+2.  **Navigate to the example directory and install dependencies:**
+    ```bash
+    cd example
+    npm install
+    ```
+3.  **Create a development build and run on Android:**
+    ```bash
+    npx expo prebuild --clean
+    npx expo run:android
+    ```
+> **Note:** If you change native code (C++/Kotlin), you must run `npx expo prebuild --clean` again.
 ## Model Management
 LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
-### Downloading Models
+### Automatic Downloading
+The library supports automatic downloading when you pass a URL to `loadModel` or `useModel`.
-We recommend using `rn-fetch-blob` or `expo-file-system` to download models.
+### Manual Downloading (Optional)
+If you prefer to manage downloads manually (e.g., using `rn-fetch-blob` or `expo-file-system`), you can download the file to a local path and pass that path to the library.
 ```typescript
 import { FileSystem } from "react-native-file-access";
@@ -80,18 +113,53 @@ async function downloadModel() {
 ## Usage
-### Basic Generation
+### React Hook (Recommended)
+The `useModel` hook manages the model lifecycle, including downloading, loading, and unloading.
+```typescript
+import { useModel, GEMMA_3N_E2B_IT_INT4 } from "react-native-litert-lm";
+function App() {
+  const {
+    model,
+    isReady,
+    downloadProgress,
+    load,   // Manually trigger load
+    deleteModel // Delete model file
+  } = useModel(
+    GEMMA_3N_E2B_IT_INT4,
+    {
+      backend: "cpu",
+      autoLoad: true, // Default: true. Set false to load manually.
+      systemPrompt: "You are a helpful assistant."
+    }
+  );
+  if (!isReady) {
+    return <Text>Loading... {Math.round(downloadProgress * 100)}%</Text>;
+  }
+  const generate = async () => {
+    const response = await model.sendMessage("Hello!");
+    console.log(response);
+  };
+  return <Button title="Generate" onPress={generate} />;
+}
+```
+### Manual Usage
 ```typescript
 import { createLLM } from "react-native-litert-lm";
 const llm = createLLM();
-// Load a Gemma 3n model (async)
-await llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
+// Load a model from URL (auto-downloads) or local path
+await llm.loadModel("https://example.com/model.litertlm", {
   backend: "gpu",
-  temperature: 0.7,
-  maxTokens: 512,
+  systemPrompt: "You are a helpful assistant.",
 });
 // Generate response (async)
@@ -114,18 +182,26 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
 ### Multimodal (Image/Audio)
 ```typescript
-// Image input (for vision models like Gemma 3n)
-// ⚠️ Ensure model is loaded with { maxTokens: 1024+ }
-const response = await llm.sendMessageWithImage(
-  "What's in this image?",
-  "/path/to/image.jpg",
-);
-// Audio input (for audio models)
-const transcription = await llm.sendMessageWithAudio(
-  "Transcribe this audio",
-  "/path/to/audio.wav",
-);
+import { checkMultimodalSupport } from "react-native-litert-lm";
+// Check platform support first
+const error = checkMultimodalSupport();
+if (error) {
+  console.warn(error); // iOS not yet supported
+} else {
+  // Image input (for vision models like Gemma 3n)
+  // Images >1024px are automatically resized to prevent OOM
+  const response = await llm.sendMessageWithImage(
+    "What's in this image?",
+    "/path/to/image.jpg",
+  );
+  // Audio input (for audio models)
+  const transcription = await llm.sendMessageWithAudio(
+    "Transcribe this audio",
+    "/path/to/audio.wav",
+  );
+}
 ```
 ### Check Performance
@@ -136,17 +212,98 @@ console.log(`Generated ${stats.completionTokens} tokens`);
 console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
 ```
+### Memory Tracking
+The library provides real OS-level memory usage data. You can query memory at any time, or enable automatic tracking to record snapshots after each inference call.
+#### Direct Memory Query
+```typescript
+// Get a single real-time snapshot from native APIs
+const usage = llm.getMemoryUsage();
+console.log(`Native heap: ${(usage.nativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`RSS: ${(usage.residentBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`Available: ${(usage.availableMemoryBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`Low memory: ${usage.isLowMemory}`);
+```
+#### Automatic Tracking with Native Buffers
+Enable memory tracking to automatically record snapshots in a native-backed `ArrayBuffer` (allocated via `NitroModules.createNativeArrayBuffer()`) after every inference call:
+```typescript
+import { createLLM } from 'react-native-litert-lm';
+const llm = createLLM({
+  enableMemoryTracking: true,
+  maxMemorySnapshots: 256, // default
+});
+await llm.loadModel('/path/to/model.litertlm', { backend: 'cpu' });
+await llm.sendMessage('Hello!');
+// Review tracked data
+const summary = llm.memoryTracker!.getSummary();
+console.log(`Peak RSS: ${(summary.peakResidentBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`Peak Native Heap: ${(summary.peakNativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`RSS Delta: ${(summary.residentDeltaBytes / 1024 / 1024).toFixed(1)} MB`);
+console.log(`Snapshots: ${summary.snapshotCount}`);
+```
+#### Using the `useModel` Hook with Memory Tracking
+```typescript
+import { useModel } from 'react-native-litert-lm';
+const { model, isReady, memorySummary, memoryTracker } = useModel(modelUrl, {
+  enableMemoryTracking: true,
+  maxMemorySnapshots: 100,
+});
+// memorySummary auto-updates after each inference call
+if (memorySummary) {
+  console.log(`Current RSS: ${memorySummary.currentResidentBytes}`);
+  console.log(`Peak RSS: ${memorySummary.peakResidentBytes}`);
+}
+```
+#### Standalone Memory Tracker
+```typescript
+import { createMemoryTracker, createNativeBuffer } from 'react-native-litert-lm';
+// Create a tracker backed by a native ArrayBuffer
+const tracker = createMemoryTracker(100);
+// Manually record snapshots
+tracker.record({
+  timestamp: Date.now(),
+  nativeHeapBytes: 50_000_000,
+  residentBytes: 200_000_000,
+  availableMemoryBytes: 4_000_000_000,
+});
+// Access the underlying native buffer (for zero-copy transfer to native code)
+const buffer = tracker.getNativeBuffer();
+// Create a standalone native buffer for custom use
+const customBuffer = createNativeBuffer(1024);
+```
 ## Supported Models
-Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
+Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
-| Model         | Size   | Min Device RAM | Use Case                  |
-| ------------- | ------ | -------------- | ------------------------- |
-| Gemma 3n E2B  | ~3GB   | 4GB+           | Efficient, fast responses |
-| Gemma 3n E4B  | ~4GB   | 8GB+           | Higher quality            |
-| Gemma 3 1B    | ~1GB   | 4GB+           | Smallest, fastest         |
-| Phi-4 Mini    | ~2GB   | 4GB+           | Microsoft's small LLM     |
-| Qwen 2.5 1.5B | ~1.5GB | 4GB+           | Multilingual              |
+| Model Constant         | Description                            | Size | Min Device RAM |
+| :--------------------- | :------------------------------------- | :--- | :------------- |
+| `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Instruction Tuned, Int4) | ~3GB | 4GB+           |
+| Other Models  | Size   | Min Device RAM | Use Case              |
+| ------------- | ------ | -------------- | --------------------- |
+| Gemma 3n E4B  | ~4GB   | 8GB+           | Higher quality        |
+| Gemma 3 1B    | ~1GB   | 4GB+           | Smallest, fastest     |
+| Phi-4 Mini    | ~2GB   | 4GB+           | Microsoft's small LLM |
+| Qwen 2.5 1.5B | ~1.5GB | 4GB+           | Multilingual          |
 ## API Reference
@@ -156,7 +313,8 @@ Creates a new LLM inference engine instance.
 ### `loadModel(path, config?): Promise<void>`
-- `path: string` - Absolute path to `.litertlm` file
+- `path: string` - Absolute path to `.litertlm` file OR a public URL (http/https). If a URL is provided, the model will be downloaded automatically.
+- `config.systemPrompt` - System prompt to guide model behavior (e.g., "You are a helpful assistant.")
 - `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
 - `config.temperature` - Sampling temperature (default: 0.7)
 - `config.topK` - Top-K sampling (default: 40)
@@ -190,6 +348,19 @@ Send a message with an image attachment (for vision models).
 Send a message with an audio attachment (for audio models).
+### `getMemoryUsage(): MemoryUsage`
+Returns real OS-level memory usage statistics from native APIs. No estimation — reads directly from `mach_task_basic_info` (iOS) / `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
+```typescript
+interface MemoryUsage {
+  nativeHeapBytes: number;      // Native heap allocated bytes
+  residentBytes: number;        // Process RSS in bytes
+  availableMemoryBytes: number; // Available system memory in bytes
+  isLowMemory: boolean;         // Whether the system considers memory low
+}
+```
 ### `getHistory(): Message[]`
 Get conversation history.
@@ -202,6 +373,10 @@ Clear context and start fresh.
 Release all native resources.
+### `deleteModel(fileName): Promise<void>`
+Deletes a model file from the app's internal storage and cleans up the engine instance.
 ### `getRecommendedBackend(): Backend`
 Returns the recommended backend for the current platform (usually `'gpu'`).
@@ -219,10 +394,51 @@ if (warning) {
 }
 ```
+### `checkMultimodalSupport(): string | undefined`
+Returns an error message if multimodal (image/audio) is not supported on the current platform, or `undefined` if OK.
+```typescript
+import { checkMultimodalSupport } from "react-native-litert-lm";
+const error = checkMultimodalSupport();
+if (error) {
+  console.warn(error); // iOS multimodal not yet supported
+}
+```
+### Prompt Templates
+For advanced use cases where you need to manually format prompts:
+```typescript
+import {
+  applyGemmaTemplate,
+  applyPhiTemplate,
+  applyLlamaTemplate,
+  ChatMessage,
+} from "react-native-litert-lm";
+const history: ChatMessage[] = [
+  { role: "user", content: "Hello!" },
+  { role: "model", content: "Hi there!" },
+  { role: "user", content: "Tell me a joke" },
+];
+// For Gemma models
+const gemmaPrompt = applyGemmaTemplate(history, "You are a comedian.");
+// For Phi models
+const phiPrompt = applyPhiTemplate(history);
+// For Llama models
+const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
+```
 ## Requirements
 - React Native 0.76+
-- react-native-nitro-modules 0.33.2+
+- react-native-nitro-modules **0.34.1+** (required for `createNativeArrayBuffer` and memory tracking)
 - Android API 26+ (ARM64 only)
 - **LiteRT-LM Android SDK**: `0.9.0-alpha01` (bundled automatically)
 - iOS 15.0+ (coming soon)