react-native-litert-lm 0.2.0 โ 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +245 -29
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +301 -58
- package/cpp/HybridLiteRTLM.cpp +109 -9
- package/cpp/HybridLiteRTLM.hpp +16 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/lib/hooks.d.ts +41 -0
- package/lib/hooks.js +131 -0
- package/lib/index.d.ts +30 -3
- package/lib/index.js +53 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +18 -0
- package/lib/modelFactory.js +104 -0
- package/lib/specs/LiteRTLM.nitro.d.ts +38 -0
- package/lib/templates.d.ts +51 -0
- package/lib/templates.js +81 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +22 -17
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +42 -1
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -0
- package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +17 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +3 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +6 -0
- package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +3 -3
- package/src/hooks.ts +195 -0
- package/src/index.ts +51 -3
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +120 -0
- package/src/specs/LiteRTLM.nitro.ts +47 -0
- package/src/templates.ts +105 -0
package/README.md
CHANGED
|
@@ -12,6 +12,8 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
|
|
|
12
12
|
- ๐ฑ **Cross-Platform** - Android API 26+
|
|
13
13
|
- ๐ผ๏ธ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
|
|
14
14
|
- ๐งต **Async API** - Non-blocking inference to prevent UI freezes
|
|
15
|
+
- ๐ **Real Memory Tracking** - OS-level memory metrics (RSS, native heap, available memory) via native APIs
|
|
16
|
+
- ๐งฎ **Zero-Copy Buffers** - Memory snapshots stored in native ArrayBuffers via `NitroModules.createNativeArrayBuffer()` (v0.34+)
|
|
15
17
|
|
|
16
18
|
## Status
|
|
17
19
|
|
|
@@ -54,13 +56,44 @@ cd android && ./gradlew clean
|
|
|
54
56
|
cd ios && pod install # iOS coming soon
|
|
55
57
|
```
|
|
56
58
|
|
|
59
|
+
## Example App
|
|
60
|
+
|
|
61
|
+
The repository includes a fully functional example app in the `example/` directory with a dark-themed diagnostic UI that demonstrates model loading, inference, memory tracking, and performance stats.
|
|
62
|
+
|
|
63
|
+
To run it:
|
|
64
|
+
|
|
65
|
+
1. **Build the library** (compiles TypeScript to `lib/`):
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
npm run build
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
2. **Navigate to the example directory and install dependencies:**
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
cd example
|
|
75
|
+
npm install
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
3. **Create a development build and run on Android:**
|
|
79
|
+
```bash
|
|
80
|
+
npx expo prebuild --clean
|
|
81
|
+
npx expo run:android
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
> **Note:** If you change native code (C++/Kotlin), you must run `npx expo prebuild --clean` again.
|
|
85
|
+
|
|
57
86
|
## Model Management
|
|
58
87
|
|
|
59
88
|
LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
|
|
60
89
|
|
|
61
|
-
### Downloading
|
|
90
|
+
### Automatic Downloading
|
|
91
|
+
|
|
92
|
+
The library supports automatic downloading when you pass a URL to `loadModel` or `useModel`.
|
|
62
93
|
|
|
63
|
-
|
|
94
|
+
### Manual Downloading (Optional)
|
|
95
|
+
|
|
96
|
+
If you prefer to manage downloads manually (e.g., using `rn-fetch-blob` or `expo-file-system`), you can download the file to a local path and pass that path to the library.
|
|
64
97
|
|
|
65
98
|
```typescript
|
|
66
99
|
import { FileSystem } from "react-native-file-access";
|
|
@@ -80,18 +113,53 @@ async function downloadModel() {
|
|
|
80
113
|
|
|
81
114
|
## Usage
|
|
82
115
|
|
|
83
|
-
###
|
|
116
|
+
### React Hook (Recommended)
|
|
117
|
+
|
|
118
|
+
The `useModel` hook manages the model lifecycle, including downloading, loading, and unloading.
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
import { useModel, GEMMA_3N_E2B_IT_INT4 } from "react-native-litert-lm";
|
|
122
|
+
|
|
123
|
+
function App() {
|
|
124
|
+
const {
|
|
125
|
+
model,
|
|
126
|
+
isReady,
|
|
127
|
+
downloadProgress,
|
|
128
|
+
load, // Manually trigger load
|
|
129
|
+
deleteModel // Delete model file
|
|
130
|
+
} = useModel(
|
|
131
|
+
GEMMA_3N_E2B_IT_INT4,
|
|
132
|
+
{
|
|
133
|
+
backend: "cpu",
|
|
134
|
+
autoLoad: true, // Default: true. Set false to load manually.
|
|
135
|
+
systemPrompt: "You are a helpful assistant."
|
|
136
|
+
}
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
if (!isReady) {
|
|
140
|
+
return <Text>Loading... {Math.round(downloadProgress * 100)}%</Text>;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const generate = async () => {
|
|
144
|
+
const response = await model.sendMessage("Hello!");
|
|
145
|
+
console.log(response);
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
return <Button title="Generate" onPress={generate} />;
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Manual Usage
|
|
84
153
|
|
|
85
154
|
```typescript
|
|
86
155
|
import { createLLM } from "react-native-litert-lm";
|
|
87
156
|
|
|
88
157
|
const llm = createLLM();
|
|
89
158
|
|
|
90
|
-
// Load a
|
|
91
|
-
await llm.loadModel("/
|
|
159
|
+
// Load a model from URL (auto-downloads) or local path
|
|
160
|
+
await llm.loadModel("https://example.com/model.litertlm", {
|
|
92
161
|
backend: "gpu",
|
|
93
|
-
|
|
94
|
-
maxTokens: 512,
|
|
162
|
+
systemPrompt: "You are a helpful assistant.",
|
|
95
163
|
});
|
|
96
164
|
|
|
97
165
|
// Generate response (async)
|
|
@@ -114,18 +182,26 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
|
|
|
114
182
|
### Multimodal (Image/Audio)
|
|
115
183
|
|
|
116
184
|
```typescript
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
//
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
185
|
+
import { checkMultimodalSupport } from "react-native-litert-lm";
|
|
186
|
+
|
|
187
|
+
// Check platform support first
|
|
188
|
+
const error = checkMultimodalSupport();
|
|
189
|
+
if (error) {
|
|
190
|
+
console.warn(error); // iOS not yet supported
|
|
191
|
+
} else {
|
|
192
|
+
// Image input (for vision models like Gemma 3n)
|
|
193
|
+
// Images >1024px are automatically resized to prevent OOM
|
|
194
|
+
const response = await llm.sendMessageWithImage(
|
|
195
|
+
"What's in this image?",
|
|
196
|
+
"/path/to/image.jpg",
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
// Audio input (for audio models)
|
|
200
|
+
const transcription = await llm.sendMessageWithAudio(
|
|
201
|
+
"Transcribe this audio",
|
|
202
|
+
"/path/to/audio.wav",
|
|
203
|
+
);
|
|
204
|
+
}
|
|
129
205
|
```
|
|
130
206
|
|
|
131
207
|
### Check Performance
|
|
@@ -136,17 +212,98 @@ console.log(`Generated ${stats.completionTokens} tokens`);
|
|
|
136
212
|
console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
|
|
137
213
|
```
|
|
138
214
|
|
|
215
|
+
### Memory Tracking
|
|
216
|
+
|
|
217
|
+
The library provides real OS-level memory usage data. You can query memory at any time, or enable automatic tracking to record snapshots after each inference call.
|
|
218
|
+
|
|
219
|
+
#### Direct Memory Query
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
// Get a single real-time snapshot from native APIs
|
|
223
|
+
const usage = llm.getMemoryUsage();
|
|
224
|
+
console.log(`Native heap: ${(usage.nativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
225
|
+
console.log(`RSS: ${(usage.residentBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
226
|
+
console.log(`Available: ${(usage.availableMemoryBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
227
|
+
console.log(`Low memory: ${usage.isLowMemory}`);
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### Automatic Tracking with Native Buffers
|
|
231
|
+
|
|
232
|
+
Enable memory tracking to automatically record snapshots in a native-backed `ArrayBuffer` (allocated via `NitroModules.createNativeArrayBuffer()`) after every inference call:
|
|
233
|
+
|
|
234
|
+
```typescript
|
|
235
|
+
import { createLLM } from 'react-native-litert-lm';
|
|
236
|
+
|
|
237
|
+
const llm = createLLM({
|
|
238
|
+
enableMemoryTracking: true,
|
|
239
|
+
maxMemorySnapshots: 256, // default
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
await llm.loadModel('/path/to/model.litertlm', { backend: 'cpu' });
|
|
243
|
+
await llm.sendMessage('Hello!');
|
|
244
|
+
|
|
245
|
+
// Review tracked data
|
|
246
|
+
const summary = llm.memoryTracker!.getSummary();
|
|
247
|
+
console.log(`Peak RSS: ${(summary.peakResidentBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
248
|
+
console.log(`Peak Native Heap: ${(summary.peakNativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
249
|
+
console.log(`RSS Delta: ${(summary.residentDeltaBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
250
|
+
console.log(`Snapshots: ${summary.snapshotCount}`);
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
#### Using the `useModel` Hook with Memory Tracking
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { useModel } from 'react-native-litert-lm';
|
|
257
|
+
|
|
258
|
+
const { model, isReady, memorySummary, memoryTracker } = useModel(modelUrl, {
|
|
259
|
+
enableMemoryTracking: true,
|
|
260
|
+
maxMemorySnapshots: 100,
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// memorySummary auto-updates after each inference call
|
|
264
|
+
if (memorySummary) {
|
|
265
|
+
console.log(`Current RSS: ${memorySummary.currentResidentBytes}`);
|
|
266
|
+
console.log(`Peak RSS: ${memorySummary.peakResidentBytes}`);
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
#### Standalone Memory Tracker
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import { createMemoryTracker, createNativeBuffer } from 'react-native-litert-lm';
|
|
274
|
+
|
|
275
|
+
// Create a tracker backed by a native ArrayBuffer
|
|
276
|
+
const tracker = createMemoryTracker(100);
|
|
277
|
+
|
|
278
|
+
// Manually record snapshots
|
|
279
|
+
tracker.record({
|
|
280
|
+
timestamp: Date.now(),
|
|
281
|
+
nativeHeapBytes: 50_000_000,
|
|
282
|
+
residentBytes: 200_000_000,
|
|
283
|
+
availableMemoryBytes: 4_000_000_000,
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
// Access the underlying native buffer (for zero-copy transfer to native code)
|
|
287
|
+
const buffer = tracker.getNativeBuffer();
|
|
288
|
+
|
|
289
|
+
// Create a standalone native buffer for custom use
|
|
290
|
+
const customBuffer = createNativeBuffer(1024);
|
|
291
|
+
```
|
|
292
|
+
|
|
139
293
|
## Supported Models
|
|
140
294
|
|
|
141
|
-
Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
|
|
295
|
+
Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
|
|
142
296
|
|
|
143
|
-
| Model | Size
|
|
144
|
-
|
|
|
145
|
-
| Gemma 3n E2B
|
|
146
|
-
|
|
147
|
-
|
|
|
148
|
-
|
|
|
149
|
-
|
|
|
297
|
+
| Model Constant | Description | Size | Min Device RAM |
|
|
298
|
+
| :--------------------- | :------------------------------------- | :--- | :------------- |
|
|
299
|
+
| `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Instruction Tuned, Int4) | ~3GB | 4GB+ |
|
|
300
|
+
|
|
301
|
+
| Other Models | Size | Min Device RAM | Use Case |
|
|
302
|
+
| ------------- | ------ | -------------- | --------------------- |
|
|
303
|
+
| Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
|
|
304
|
+
| Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
|
|
305
|
+
| Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
|
|
306
|
+
| Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
|
|
150
307
|
|
|
151
308
|
## API Reference
|
|
152
309
|
|
|
@@ -156,7 +313,8 @@ Creates a new LLM inference engine instance.
|
|
|
156
313
|
|
|
157
314
|
### `loadModel(path, config?): Promise<void>`
|
|
158
315
|
|
|
159
|
-
- `path: string` - Absolute path to `.litertlm` file
|
|
316
|
+
- `path: string` - Absolute path to `.litertlm` file OR a public URL (http/https). If a URL is provided, the model will be downloaded automatically.
|
|
317
|
+
- `config.systemPrompt` - System prompt to guide model behavior (e.g., "You are a helpful assistant.")
|
|
160
318
|
- `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
|
|
161
319
|
- `config.temperature` - Sampling temperature (default: 0.7)
|
|
162
320
|
- `config.topK` - Top-K sampling (default: 40)
|
|
@@ -190,6 +348,19 @@ Send a message with an image attachment (for vision models).
|
|
|
190
348
|
|
|
191
349
|
Send a message with an audio attachment (for audio models).
|
|
192
350
|
|
|
351
|
+
### `getMemoryUsage(): MemoryUsage`
|
|
352
|
+
|
|
353
|
+
Returns real OS-level memory usage statistics from native APIs. No estimation โ reads directly from `mach_task_basic_info` (iOS) / `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
interface MemoryUsage {
|
|
357
|
+
nativeHeapBytes: number; // Native heap allocated bytes
|
|
358
|
+
residentBytes: number; // Process RSS in bytes
|
|
359
|
+
availableMemoryBytes: number; // Available system memory in bytes
|
|
360
|
+
isLowMemory: boolean; // Whether the system considers memory low
|
|
361
|
+
}
|
|
362
|
+
```
|
|
363
|
+
|
|
193
364
|
### `getHistory(): Message[]`
|
|
194
365
|
|
|
195
366
|
Get conversation history.
|
|
@@ -202,6 +373,10 @@ Clear context and start fresh.
|
|
|
202
373
|
|
|
203
374
|
Release all native resources.
|
|
204
375
|
|
|
376
|
+
### `deleteModel(fileName): Promise<void>`
|
|
377
|
+
|
|
378
|
+
Deletes a model file from the app's internal storage and cleans up the engine instance.
|
|
379
|
+
|
|
205
380
|
### `getRecommendedBackend(): Backend`
|
|
206
381
|
|
|
207
382
|
Returns the recommended backend for the current platform (usually `'gpu'`).
|
|
@@ -219,10 +394,51 @@ if (warning) {
|
|
|
219
394
|
}
|
|
220
395
|
```
|
|
221
396
|
|
|
397
|
+
### `checkMultimodalSupport(): string | undefined`
|
|
398
|
+
|
|
399
|
+
Returns an error message if multimodal (image/audio) is not supported on the current platform, or `undefined` if OK.
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
import { checkMultimodalSupport } from "react-native-litert-lm";
|
|
403
|
+
|
|
404
|
+
const error = checkMultimodalSupport();
|
|
405
|
+
if (error) {
|
|
406
|
+
console.warn(error); // iOS multimodal not yet supported
|
|
407
|
+
}
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
### Prompt Templates
|
|
411
|
+
|
|
412
|
+
For advanced use cases where you need to manually format prompts:
|
|
413
|
+
|
|
414
|
+
```typescript
|
|
415
|
+
import {
|
|
416
|
+
applyGemmaTemplate,
|
|
417
|
+
applyPhiTemplate,
|
|
418
|
+
applyLlamaTemplate,
|
|
419
|
+
ChatMessage,
|
|
420
|
+
} from "react-native-litert-lm";
|
|
421
|
+
|
|
422
|
+
const history: ChatMessage[] = [
|
|
423
|
+
{ role: "user", content: "Hello!" },
|
|
424
|
+
{ role: "model", content: "Hi there!" },
|
|
425
|
+
{ role: "user", content: "Tell me a joke" },
|
|
426
|
+
];
|
|
427
|
+
|
|
428
|
+
// For Gemma models
|
|
429
|
+
const gemmaPrompt = applyGemmaTemplate(history, "You are a comedian.");
|
|
430
|
+
|
|
431
|
+
// For Phi models
|
|
432
|
+
const phiPrompt = applyPhiTemplate(history);
|
|
433
|
+
|
|
434
|
+
// For Llama models
|
|
435
|
+
const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
|
|
436
|
+
```
|
|
437
|
+
|
|
222
438
|
## Requirements
|
|
223
439
|
|
|
224
440
|
- React Native 0.76+
|
|
225
|
-
- react-native-nitro-modules 0.
|
|
441
|
+
- react-native-nitro-modules **0.34.1+** (required for `createNativeArrayBuffer` and memory tracking)
|
|
226
442
|
- Android API 26+ (ARM64 only)
|
|
227
443
|
- **LiteRT-LM Android SDK**: `0.9.0-alpha01` (bundled automatically)
|
|
228
444
|
- iOS 15.0+ (coming soon)
|