react-native-litert-lm 0.3.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +207 -158
- package/android/build.gradle +12 -0
- package/android/src/main/AndroidManifest.xml +5 -0
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +316 -63
- package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
- package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
- package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +83 -0
- package/cpp/include/README.md +9 -11
- package/ios/HybridLiteRTLM.swift +1058 -0
- package/ios/Tests/HybridLiteRTLMTests.swift +67 -0
- package/lib/__mocks__/react-native-nitro-modules.d.ts +61 -0
- package/lib/__mocks__/react-native-nitro-modules.js +50 -0
- package/lib/__tests__/hooks.test.d.ts +1 -0
- package/lib/__tests__/hooks.test.js +124 -0
- package/lib/__tests__/memoryTracker.test.d.ts +1 -0
- package/lib/__tests__/memoryTracker.test.js +74 -0
- package/lib/__tests__/modelFactory.test.d.ts +1 -0
- package/lib/__tests__/modelFactory.test.js +52 -0
- package/lib/hooks.js +1 -1
- package/lib/index.d.ts +2 -4
- package/lib/index.js +12 -7
- package/lib/modelFactory.js +62 -63
- package/lib/specs/LiteRTLM.nitro.d.ts +71 -2
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +62 -7
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -1
- package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
- package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
- package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
- package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +10 -2
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
- package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
- package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
- package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +224 -0
- package/nitrogen/generated/ios/swift/Backend.swift +44 -0
- package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
- package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
- package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +69 -0
- package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +383 -0
- package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
- package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
- package/nitrogen/generated/ios/swift/Message.swift +34 -0
- package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
- package/nitrogen/generated/ios/swift/PartType.swift +44 -0
- package/nitrogen/generated/ios/swift/Role.swift +44 -0
- package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +7 -2
- package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
- package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
- package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
- package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
- package/package.json +16 -8
- package/react-native-litert-lm.podspec +15 -19
- package/scripts/download-ios-frameworks.sh +14 -48
- package/scripts/postinstall.js +1 -2
- package/src/__mocks__/react-native-nitro-modules.ts +48 -0
- package/src/__tests__/hooks.test.ts +153 -0
- package/src/__tests__/memoryTracker.test.ts +87 -0
- package/src/__tests__/modelFactory.test.ts +68 -0
- package/src/hooks.ts +1 -1
- package/src/index.ts +12 -9
- package/src/modelFactory.ts +82 -80
- package/src/specs/LiteRTLM.nitro.ts +80 -2
- package/cpp/HybridLiteRTLM.cpp +0 -838
- package/cpp/HybridLiteRTLM.hpp +0 -167
- package/cpp/IOSDownloadHelper.h +0 -24
- package/ios/IOSDownloadHelper.mm +0 -129
- package/scripts/build-ios-engine.sh +0 -302
- package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
- package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
- package/scripts/stubs/llguidance_stubs.c +0 -101
- package/src/templates.ts +0 -105
package/README.md
CHANGED
|
@@ -4,16 +4,21 @@ High-performance on-device LLM inference for React Native, powered by [LiteRT-LM
|
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- 🚀 **Native
|
|
8
|
-
-
|
|
9
|
-
- ⚡ **
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
- 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs
|
|
15
|
-
-
|
|
16
|
-
|
|
7
|
+
- 🚀 **Native Swift Bridge (iOS)** — Bypasses Swift actor deadlocks (User Rule #1) via direct C FFI dispatched on a serial `dev.litert.engine` background queue.
|
|
8
|
+
- 🤖 **Stateless Kotlin Bridge (Android)** — Fully conforms to `HybridLiteRTLMSpec` using direct JSI memory access.
|
|
9
|
+
- ⚡ **Zero-Copy Multimodal API** — Native-owned `ArrayBuffer` mapping straight to FFI inputs for image/audio data without copy overhead (complying with User Rule #2).
|
|
10
|
+
- 🧠 **Speculative Decoding** — Active multi-token prediction support with pre-flight model capability validation.
|
|
11
|
+
- 🛠️ **Function / Tool Calling** — Native JSON-encoded schema specification support for structured outputs.
|
|
12
|
+
- 🏎️ **GPU Acceleration** — Metal (iOS), OpenCL GPU delegate (Android, Pixel devices).
|
|
13
|
+
- 🔄 **Streaming Support** — Non-blocking token-by-token callbacks.
|
|
14
|
+
- 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs (complying with User Rule #3).
|
|
15
|
+
- 📥 **Automatic Model Download** — Downloads models from URL with progress tracking and local caching.
|
|
16
|
+
|
|
17
|
+
## Demo
|
|
18
|
+
|
|
19
|
+
> Gemma 4 E2B running on-device on a Samsung Galaxy S22 (Snapdragon 8 Gen 1, 4 GB RAM) — CPU backend, streaming inference.
|
|
20
|
+
|
|
21
|
+
<video src="https://github.com/user-attachments/assets/1da527ce-0432-4f8b-8899-474f81b2feea" width="300" controls></video>
|
|
17
22
|
|
|
18
23
|
## Installation
|
|
19
24
|
|
|
@@ -65,7 +70,8 @@ The `example/` directory contains a fully functional test app with a dark-themed
|
|
|
65
70
|
- Multi-turn conversation with context retention
|
|
66
71
|
- Performance benchmarking (tokens/sec, latency)
|
|
67
72
|
- Real-time memory tracking
|
|
68
|
-
-
|
|
73
|
+
- Speculative decoding & tool calling settings toggles
|
|
74
|
+
- Zero-copy multimodal inference loading images/audio directly into ArrayBuffers
|
|
69
75
|
|
|
70
76
|
### Running the Example
|
|
71
77
|
|
|
@@ -87,42 +93,45 @@ The `example/` directory contains a fully functional test app with a dark-themed
|
|
|
87
93
|
```bash
|
|
88
94
|
npx expo prebuild --clean
|
|
89
95
|
npx expo run:android # Android
|
|
90
|
-
npx expo run:ios # iOS (
|
|
96
|
+
npx expo run:ios # iOS (pre-linked with CLiteRTLM.xcframework)
|
|
91
97
|
```
|
|
92
98
|
|
|
93
|
-
> **Note:** If you change native code (
|
|
99
|
+
> **Note:** If you change native code (Swift/Kotlin), you must run `npx expo prebuild --clean` again before rebuilding.
|
|
94
100
|
|
|
95
101
|
## Model Management
|
|
96
102
|
|
|
97
|
-
LiteRT-LM models (like Gemma 4) are large files (
|
|
103
|
+
LiteRT-LM models (like Gemma 4) are large files (1–4 GB) and cannot be bundled into your app binary. They are downloaded at runtime.
|
|
98
104
|
|
|
99
105
|
### Automatic Downloading
|
|
100
106
|
|
|
101
|
-
|
|
107
|
+
Pass an HTTPS URL to `useModel()` or `loadModel()` — the library handles the rest:
|
|
102
108
|
|
|
103
109
|
- **Progress tracking** — real-time download percentage via callbacks
|
|
104
110
|
- **Local caching** — downloaded models are cached and reused across app launches
|
|
105
|
-
- **Android**: app-
|
|
111
|
+
- **Android**: `files/models/` (app-private)
|
|
106
112
|
- **iOS**: `Library/Caches/litert_models/` (survives app relaunch; reclaimable by iOS under storage pressure)
|
|
107
113
|
- **HTTPS enforcement** — only secure URLs are accepted
|
|
108
114
|
|
|
109
|
-
### Manual Downloading
|
|
115
|
+
### Manual Downloading
|
|
110
116
|
|
|
111
|
-
If you
|
|
117
|
+
If you need custom control over downloads (e.g., authentication headers for private model hosting, resumable downloads, or custom caching), use your preferred HTTP client and pass the local file path:
|
|
112
118
|
|
|
113
119
|
```typescript
|
|
114
|
-
import
|
|
115
|
-
import {
|
|
120
|
+
import { fetch } from "expo/fetch";
|
|
121
|
+
import { File, Paths } from "expo-file-system";
|
|
122
|
+
import { useModel } from "react-native-litert-lm";
|
|
116
123
|
|
|
117
|
-
const
|
|
124
|
+
const MODEL_URL = "https://example.com/private-model.litertlm";
|
|
118
125
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
126
|
+
// Download with custom headers using expo/fetch
|
|
127
|
+
const response = await fetch(MODEL_URL, {
|
|
128
|
+
headers: { Authorization: `Bearer ${token}` },
|
|
129
|
+
});
|
|
130
|
+
const modelFile = new File(Paths.cache, "my-model.litertlm");
|
|
131
|
+
modelFile.write(await response.bytes());
|
|
122
132
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
133
|
+
// Pass the local path — no download occurs
|
|
134
|
+
const { model, isReady } = useModel(modelFile.uri, { backend: "cpu" });
|
|
126
135
|
```
|
|
127
136
|
|
|
128
137
|
## Usage
|
|
@@ -194,32 +203,80 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
|
|
|
194
203
|
});
|
|
195
204
|
```
|
|
196
205
|
|
|
197
|
-
### Multimodal (Image / Audio)
|
|
206
|
+
### Multimodal (Image / Audio) & Zero-Copy Buffers
|
|
207
|
+
|
|
208
|
+
Multimodal features are fully supported via standard file paths or high-performance zero-copy `ArrayBuffer` objects:
|
|
198
209
|
|
|
199
|
-
|
|
210
|
+
#### 1. Zero-Copy Multimodal Messages (Recommended)
|
|
211
|
+
This API uses Nitro Modules' native-backed `ArrayBuffer` directly mapped to native memory buffers, avoiding any base64 heap copying overhead (User Rule #2):
|
|
200
212
|
|
|
201
213
|
```typescript
|
|
202
214
|
import { checkMultimodalSupport } from "react-native-litert-lm";
|
|
203
215
|
|
|
204
216
|
const warning = checkMultimodalSupport();
|
|
205
217
|
if (warning) {
|
|
206
|
-
console.warn(warning); // Experimental on iOS
|
|
218
|
+
console.warn(warning); // Experimental or unsupported on current platform (e.g. iOS simulator)
|
|
207
219
|
} else {
|
|
208
|
-
//
|
|
209
|
-
|
|
210
|
-
const
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
"Transcribe this audio",
|
|
218
|
-
"/path/to/audio.wav",
|
|
219
|
-
);
|
|
220
|
+
// Read local assets or files straight into ArrayBuffers using fetch
|
|
221
|
+
const response = await fetch(Image.resolveAssetSource(require("./test.jpeg")).uri);
|
|
222
|
+
const imageBuffer = await response.arrayBuffer();
|
|
223
|
+
|
|
224
|
+
const reply = await llm.sendMultimodalMessage([
|
|
225
|
+
{ type: "image", imageBuffer },
|
|
226
|
+
{ type: "text", text: "Describe what is in this image." }
|
|
227
|
+
]);
|
|
228
|
+
console.log(reply);
|
|
220
229
|
}
|
|
221
230
|
```
|
|
222
231
|
|
|
232
|
+
#### 2. Path-Based Multimodal Messages
|
|
233
|
+
```typescript
|
|
234
|
+
// Image input
|
|
235
|
+
const response = await llm.sendMessageWithImage(
|
|
236
|
+
"What's in this image?",
|
|
237
|
+
"/path/to/image.jpg",
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
// Audio input
|
|
241
|
+
const transcription = await llm.sendMessageWithAudio(
|
|
242
|
+
"Transcribe this audio",
|
|
243
|
+
"/path/to/audio.wav",
|
|
244
|
+
);
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Speculative Decoding & Tools
|
|
248
|
+
|
|
249
|
+
#### 1. Speculative Decoding (MTP)
|
|
250
|
+
Enable speculative decoding in `LLMConfig` to accelerate inference using multi-token prediction when supported by your model:
|
|
251
|
+
|
|
252
|
+
```typescript
|
|
253
|
+
const { model } = useModel(GEMMA_4_E2B_IT, {
|
|
254
|
+
enableSpeculativeDecoding: true,
|
|
255
|
+
});
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
#### 2. Function / Tool Calling
|
|
259
|
+
Inject tools as an array of definitions, specifying parameter validation using standard JSON schema format:
|
|
260
|
+
|
|
261
|
+
```typescript
|
|
262
|
+
const { model } = useModel(GEMMA_4_E2B_IT, {
|
|
263
|
+
tools: [
|
|
264
|
+
{
|
|
265
|
+
name: "get_current_weather",
|
|
266
|
+
description: "Get the current weather for a location",
|
|
267
|
+
parametersJson: JSON.stringify({
|
|
268
|
+
type: "object",
|
|
269
|
+
properties: {
|
|
270
|
+
location: { type: "string", description: "The city and state, e.g. San Francisco, CA" },
|
|
271
|
+
unit: { type: "string", enum: ["celsius", "fahrenheit"] }
|
|
272
|
+
},
|
|
273
|
+
required: ["location"]
|
|
274
|
+
})
|
|
275
|
+
}
|
|
276
|
+
]
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
223
280
|
### Performance Stats
|
|
224
281
|
|
|
225
282
|
```typescript
|
|
@@ -229,6 +286,8 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
|
|
|
229
286
|
console.log(`Time to first token: ${stats.timeToFirstToken.toFixed(0)} ms`);
|
|
230
287
|
```
|
|
231
288
|
|
|
289
|
+
> **Note**: Stats are available for both sync (`sendMessage`) and streaming (`sendMessageAsync`) on both platforms. iOS uses real benchmark data from the C API; Android uses heuristic token counts with precise timing.
|
|
290
|
+
|
|
232
291
|
### Memory Tracking
|
|
233
292
|
|
|
234
293
|
The library provides real OS-level memory data — no estimation. It reads directly from `mach_task_basic_info` (iOS) and `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
|
|
@@ -307,19 +366,19 @@ const buffer = tracker.getNativeBuffer();
|
|
|
307
366
|
|
|
308
367
|
## Supported Models
|
|
309
368
|
|
|
310
|
-
|
|
369
|
+
All exported model URLs are **public — no authentication required**. Pass them directly to `useModel()` or `loadModel()` for automatic downloading with progress tracking and local caching.
|
|
311
370
|
|
|
312
|
-
| Constant | Model | Size | Min RAM |
|
|
313
|
-
| :--------------------- | :------------------------------ | :------ | :------ |
|
|
314
|
-
| `GEMMA_4_E2B_IT` | Gemma 4 E2B (Multimodal, IT) | 2.58 GB | 4 GB+ |
|
|
315
|
-
| `GEMMA_4_E4B_IT` | Gemma 4 E4B (Higher Quality) | 3.65 GB | 6 GB+ |
|
|
316
|
-
| `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Int4, Multimodal) | ~1.3 GB | 4 GB+ |
|
|
371
|
+
| Constant | Model | Size | Min RAM | Source |
|
|
372
|
+
| :--------------------- | :------------------------------ | :------ | :------ | :---------- |
|
|
373
|
+
| `GEMMA_4_E2B_IT` | Gemma 4 E2B (Multimodal, IT) | 2.58 GB | 4 GB+ | HuggingFace |
|
|
374
|
+
| `GEMMA_4_E4B_IT` | Gemma 4 E4B (Higher Quality) | 3.65 GB | 6 GB+ | HuggingFace |
|
|
375
|
+
| `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Int4, Multimodal) | ~1.3 GB | 4 GB+ | litert.dev |
|
|
317
376
|
|
|
318
|
-
> **Recommended:** Use `GEMMA_4_E2B_IT` for most use cases
|
|
377
|
+
> **Recommended:** Use `GEMMA_4_E2B_IT` for most use cases — multimodal (text + vision + audio) and the best quality-to-size ratio.
|
|
319
378
|
>
|
|
320
|
-
> **iOS Note:** Models larger than ~2 GB
|
|
379
|
+
> **iOS Note:** Models larger than ~2 GB require the `com.apple.developer.kernel.extended-virtual-addressing` entitlement. See [iOS Entitlements](#ios-entitlements) below. Gemma 3n E2B (~1.3 GB) works without it.
|
|
321
380
|
|
|
322
|
-
**Other compatible models** (download manually from HuggingFace):
|
|
381
|
+
**Other compatible models** (download `.litertlm` files manually from [HuggingFace](https://huggingface.co/litert-community)):
|
|
323
382
|
|
|
324
383
|
| Model | Size | Min RAM | Notes |
|
|
325
384
|
| ------------- | ------- | ------- | --------------------- |
|
|
@@ -352,13 +411,15 @@ Loads a model from a local path or HTTPS URL.
|
|
|
352
411
|
|
|
353
412
|
#### Backend Options
|
|
354
413
|
|
|
355
|
-
| Backend | Engine
|
|
356
|
-
| ------- |
|
|
357
|
-
| `'cpu'` | CPU inference
|
|
358
|
-
| `'gpu'` |
|
|
359
|
-
| `'npu'` | NPU / Neural Engine
|
|
414
|
+
| Backend | Engine | Speed | Notes |
|
|
415
|
+
| ------- | ------------------------------ | ------- | ---------------------------------------------------------------------------------- |
|
|
416
|
+
| `'cpu'` | CPU inference | Slowest | Always available on all devices |
|
|
417
|
+
| `'gpu'` | Metal (iOS) / OpenCL (Android) | Fast | iOS: always available. Android: requires OpenCL (Pixel only, not Samsung/Qualcomm) |
|
|
418
|
+
| `'npu'` | NPU / Neural Engine | Fastest | Requires supported hardware; experimental |
|
|
360
419
|
|
|
361
|
-
> **iOS**: `'cpu'`
|
|
420
|
+
> **iOS**: Both `'cpu'` and `'gpu'` (Metal) are supported. The engine automatically tries fallback backend combinations if the primary one fails.
|
|
421
|
+
>
|
|
422
|
+
> **Android GPU**: The GPU backend requires OpenCL, which is **not available on most Samsung and Qualcomm devices**. Use `checkBackendSupport('gpu')` to check before loading. The engine will throw a clear error if GPU is unsupported.
|
|
362
423
|
|
|
363
424
|
### `sendMessage(message): Promise<string>`
|
|
364
425
|
|
|
@@ -380,30 +441,10 @@ Send a message with audio (for audio-capable models like Gemma 4 E2B).
|
|
|
380
441
|
|
|
381
442
|
Returns performance metrics from the last inference call.
|
|
382
443
|
|
|
383
|
-
```typescript
|
|
384
|
-
interface GenerationStats {
|
|
385
|
-
tokensPerSecond: number;
|
|
386
|
-
totalTime: number; // seconds
|
|
387
|
-
timeToFirstToken: number; // seconds
|
|
388
|
-
promptTokens: number;
|
|
389
|
-
completionTokens: number;
|
|
390
|
-
prefillSpeed: number; // tokens/sec
|
|
391
|
-
}
|
|
392
|
-
```
|
|
393
|
-
|
|
394
444
|
### `getMemoryUsage(): MemoryUsage`
|
|
395
445
|
|
|
396
446
|
Returns real OS-level memory usage.
|
|
397
447
|
|
|
398
|
-
```typescript
|
|
399
|
-
interface MemoryUsage {
|
|
400
|
-
nativeHeapBytes: number;
|
|
401
|
-
residentBytes: number;
|
|
402
|
-
availableMemoryBytes: number;
|
|
403
|
-
isLowMemory: boolean;
|
|
404
|
-
}
|
|
405
|
-
```
|
|
406
|
-
|
|
407
448
|
### `getHistory(): Message[]`
|
|
408
449
|
|
|
409
450
|
Returns the conversation history.
|
|
@@ -427,21 +468,19 @@ import {
|
|
|
427
468
|
checkBackendSupport,
|
|
428
469
|
checkMultimodalSupport,
|
|
429
470
|
getRecommendedBackend,
|
|
430
|
-
applyGemmaTemplate,
|
|
431
|
-
applyPhiTemplate,
|
|
432
|
-
applyLlamaTemplate,
|
|
433
471
|
} from "react-native-litert-lm";
|
|
434
472
|
|
|
435
|
-
// Check if
|
|
436
|
-
const
|
|
473
|
+
// Check if GPU is supported on this device
|
|
474
|
+
const gpuWarning = checkBackendSupport("gpu");
|
|
475
|
+
|
|
476
|
+
// Check NPU support
|
|
477
|
+
const npuWarning = checkBackendSupport("npu"); // string | undefined
|
|
478
|
+
|
|
479
|
+
// Check multimodal support
|
|
437
480
|
const mmError = checkMultimodalSupport(); // string | undefined
|
|
438
|
-
const backend = getRecommendedBackend(); // 'gpu' | 'cpu'
|
|
439
481
|
|
|
440
|
-
//
|
|
441
|
-
const
|
|
442
|
-
[{ role: "user", content: "Hello!" }],
|
|
443
|
-
"You are helpful.",
|
|
444
|
-
);
|
|
482
|
+
// Get recommended backend
|
|
483
|
+
const backend = getRecommendedBackend(); // 'cpu'
|
|
445
484
|
```
|
|
446
485
|
|
|
447
486
|
## Requirements
|
|
@@ -452,29 +491,29 @@ const prompt = applyGemmaTemplate(
|
|
|
452
491
|
| react-native-nitro-modules | 0.35.0+ |
|
|
453
492
|
| Android API | 26+ (ARM64) |
|
|
454
493
|
| iOS | 15.0+ (ARM64) |
|
|
455
|
-
| LiteRT-LM Engine | 0.
|
|
494
|
+
| LiteRT-LM Engine | 0.12.0 |
|
|
456
495
|
|
|
457
496
|
## Platform Support
|
|
458
497
|
|
|
459
|
-
| Platform | Status | Architecture | Backends
|
|
460
|
-
| -------- | -------- | ------------ |
|
|
461
|
-
| Android | ✅ Ready | arm64-v8a | CPU, GPU, NPU
|
|
462
|
-
| iOS | ✅ Ready | arm64 | CPU, GPU (Metal)
|
|
498
|
+
| Platform | Status | Architecture | Backends |
|
|
499
|
+
| -------- | -------- | ------------ | ------------------------------------------------- |
|
|
500
|
+
| Android | ✅ Ready | arm64-v8a | CPU (all devices), GPU (OpenCL devices only), NPU |
|
|
501
|
+
| iOS | ✅ Ready | arm64 | CPU, GPU (Metal — always available) |
|
|
463
502
|
|
|
464
503
|
### iOS Feature Matrix
|
|
465
504
|
|
|
466
|
-
| Feature | Status | Notes
|
|
467
|
-
| ---------------------------- | ------ |
|
|
468
|
-
| Text inference (blocking) | ✅ |
|
|
469
|
-
| Text inference (streaming) | ✅ | Token-by-token callbacks
|
|
470
|
-
| CPU inference | ✅ |
|
|
471
|
-
| GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'`
|
|
472
|
-
| Model download with progress | ✅ |
|
|
473
|
-
| Memory tracking | ✅ |
|
|
474
|
-
| Multi-turn conversation | ✅ | Context retained across turns
|
|
475
|
-
| Multimodal (image/audio) |
|
|
476
|
-
|
|
|
477
|
-
| Function
|
|
505
|
+
| Feature | Status | Notes |
|
|
506
|
+
| ---------------------------- | ------ | ------------------------------------------------------ |
|
|
507
|
+
| Text inference (blocking) | ✅ | Direct FFI using `dev.litert.engine` background queue |
|
|
508
|
+
| Text inference (streaming) | ✅ | Token-by-token callbacks |
|
|
509
|
+
| CPU inference | ✅ | Safe fallback default |
|
|
510
|
+
| GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'` |
|
|
511
|
+
| Model download with progress | ✅ | URLSession-based, cached in `Caches/` |
|
|
512
|
+
| Memory tracking | ✅ | Real-time Resident Set Size (RSS) tracking |
|
|
513
|
+
| Multi-turn conversation | ✅ | Context retained across turns |
|
|
514
|
+
| Multimodal (image/audio) | ✅ | Zero-copy `ArrayBuffer` mapping to FFI input buffers |
|
|
515
|
+
| Speculative Decoding | ✅ | Dynamic capabilities check during model pre-load |
|
|
516
|
+
| Function / Tool Calling | ✅ | Supported via JSON-encoded schema specification |
|
|
478
517
|
|
|
479
518
|
### iOS Entitlements
|
|
480
519
|
|
|
@@ -489,78 +528,88 @@ Add to your app's `.entitlements` file:
|
|
|
489
528
|
|
|
490
529
|
> **Note:** This entitlement requires a **paid Apple Developer account** ($99/year). Gemma 3n E2B (~1.3 GB) works without it.
|
|
491
530
|
|
|
492
|
-
##
|
|
531
|
+
## iOS FFI Architecture & Integration
|
|
493
532
|
|
|
494
|
-
The
|
|
533
|
+
The library uses a highly optimized Swift Direct-FFI bridge that links directly with the pre-compiled C library `CLiteRTLM.xcframework`.
|
|
495
534
|
|
|
496
|
-
###
|
|
535
|
+
### Key Design Commitments
|
|
497
536
|
|
|
498
|
-
|
|
499
|
-
-
|
|
537
|
+
1. **JSI Thread Safety (User Rule #1)**:
|
|
538
|
+
- The JSI/JS thread must never be blocked by native synchronous lock-waiting operations.
|
|
539
|
+
- We dispatch all FFI calls to a serial background `dev.litert.engine` queue, executing callbacks asynchronously to prevent deadlocking JSI execution.
|
|
500
540
|
|
|
501
|
-
|
|
541
|
+
2. **Zero-Copy Memory Pipelines (User Rule #2)**:
|
|
542
|
+
- Enforce the use of Nitro Modules' `ArrayBuffer` directly referencing native memory pointers (`ArrayBuffer.data`) when processing heavy media assets like images or audio.
|
|
502
543
|
|
|
503
|
-
|
|
504
|
-
|
|
544
|
+
3. **Manual FFI Resource Management (User Rule #3)**:
|
|
545
|
+
- Raw pointers (`LiteRtLmEngine*`, `LiteRtLmConversation*`) are manually allocated and strictly deallocated inside Swift `deinit` and `close()` destructors to guarantee 0% memory leaks during prolonged inference sessions.
|
|
546
|
+
|
|
547
|
+
### Architecture Topology
|
|
548
|
+
|
|
549
|
+
```
|
|
550
|
+
┌──────────────────────────────────────────────────────────┐
|
|
551
|
+
│ React Native (TypeScript / JavaScript) │
|
|
552
|
+
├──────────────────────────────────────────────────────────┤
|
|
553
|
+
│ Nitro Modules JSI Bindings (`HybridLiteRTLMSpec`) │
|
|
554
|
+
├─────────────────────────────┬────────────────────────────┤
|
|
555
|
+
│ Android (Kotlin) │ iOS (Swift Direct FFI) │
|
|
556
|
+
│ `HybridLiteRTLM.kt` │ `HybridLiteRTLM.swift` │
|
|
557
|
+
│ `litertlm-android` AAR │ `CLiteRTLM.xcframework` │
|
|
558
|
+
└─────────────────────────────┴────────────────────────────┘
|
|
505
559
|
```
|
|
506
560
|
|
|
507
|
-
|
|
561
|
+
#### Android Bridging
|
|
562
|
+
- Conforms fully to `HybridLiteRTLMSpec` using Kotlin.
|
|
563
|
+
- Incorporates Proguard keep rules to prevent dynamic JSI/JNI code stripping.
|
|
564
|
+
- Declares `<uses-native-library android:name="libOpenCL.so" android:required="false" />` to load dynamic OpenCL for GPU delegate acceleration on Android 12+ without throwing platform installer exceptions.
|
|
508
565
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
5. Compile C/C++ stubs for unavailable Rust dependencies
|
|
514
|
-
6. Merge ~1,909 object files into a static library via `libtool`
|
|
515
|
-
7. Package into `ios/Frameworks/LiteRTLM.xcframework`
|
|
566
|
+
#### iOS Bridging
|
|
567
|
+
- Entirely written in native Swift (`HybridLiteRTLM.swift`) calling direct FFI.
|
|
568
|
+
- Avoids the upstream Swift SDK `actor` lock-blocking deadlocks by utilizing low-level C functions directly.
|
|
569
|
+
- Implements custom `getMemoryUsage` that queries the OS directly via `mach_task_basic_info` to get precise real-time Resident Set Size (RSS) metrics.
|
|
516
570
|
|
|
517
|
-
|
|
571
|
+
## Testing
|
|
518
572
|
|
|
519
|
-
|
|
520
|
-
ios/Frameworks/LiteRTLM.xcframework/
|
|
521
|
-
├── Info.plist
|
|
522
|
-
├── ios-arm64/LiteRTLM.framework/ # Device
|
|
523
|
-
│ ├── LiteRTLM # ~82 MB static library
|
|
524
|
-
│ └── Headers/litert_lm_engine.h
|
|
525
|
-
└── ios-arm64-simulator/LiteRTLM.framework/ # Simulator
|
|
526
|
-
├── LiteRTLM # ~84 MB static library
|
|
527
|
-
└── Headers/litert_lm_engine.h
|
|
528
|
-
```
|
|
573
|
+
The library includes a comprehensive multi-tier unit testing suite designed to run quickly on host machines (CI runners or local development environments) without requiring a physical test device.
|
|
529
574
|
|
|
530
|
-
###
|
|
575
|
+
### 1. JavaScript / TypeScript Layer (Jest)
|
|
531
576
|
|
|
532
|
-
|
|
577
|
+
The JS/TS layer uses Jest to validate the `useModel` hook, download progress callbacks, URL query scrubbing, file storage helpers, and the zero-copy native memory tracker buffer allocations.
|
|
533
578
|
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
579
|
+
* **Setup & Mocking**: Includes an active stub (`src/__mocks__/react-native-nitro-modules.ts`) that mocks the Nitro Modules `HybridObject` architecture.
|
|
580
|
+
* **How to run**:
|
|
581
|
+
```bash
|
|
582
|
+
npm run test
|
|
583
|
+
```
|
|
539
584
|
|
|
540
|
-
|
|
585
|
+
### 2. Android Kotlin Layer (Robolectric)
|
|
541
586
|
|
|
542
|
-
|
|
587
|
+
The Android layer uses local JUnit Robolectric tests to run Android code on the JVM, sandboxing OS dependencies. It validates HTTPS schema constraints, path traversal mitigations, and initial telemetry states.
|
|
543
588
|
|
|
544
|
-
|
|
589
|
+
* **Setup & Mocking**: Uses a local shadow `Promise` implementation to test thread-asynchronous errors.
|
|
590
|
+
* **How to run**:
|
|
591
|
+
```bash
|
|
592
|
+
cd example/android
|
|
593
|
+
./gradlew :react-native-litert-lm:testDebugUnitTest
|
|
594
|
+
```
|
|
545
595
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
│ useModel() / createLLM() / sendMessage() │
|
|
550
|
-
├─────────────────────────────────────────────────┤
|
|
551
|
-
│ Nitro Modules JSI Bridge │
|
|
552
|
-
├──────────────────────┬──────────────────────────┤
|
|
553
|
-
│ Android (Kotlin) │ iOS (C++) │
|
|
554
|
-
│ HybridLiteRTLM.kt │ HybridLiteRTLM.cpp │
|
|
555
|
-
│ litertlm-android │ LiteRTLM C API │
|
|
556
|
-
│ AAR (GPU delegate) │ XCFramework (Metal) │
|
|
557
|
-
└──────────────────────┴──────────────────────────┘
|
|
558
|
-
```
|
|
596
|
+
### 3. iOS Swift Layer (XCTest)
|
|
597
|
+
|
|
598
|
+
The iOS layer leverages native XCTests integrated directly into CocoaPods via standard development test specs. It verifies FFI path traversal blocking, non-HTTPS download blocks, automatic `deinit` cleanup, and Mach-based telemetry bounds.
|
|
559
599
|
|
|
560
|
-
|
|
561
|
-
|
|
600
|
+
* **How to run**:
|
|
601
|
+
1. Boot your preferred iOS simulator (e.g., iPhone 16 running iOS 18.6).
|
|
602
|
+
2. Run the tests using `xcodebuild`:
|
|
603
|
+
```bash
|
|
604
|
+
cd example/ios
|
|
605
|
+
xcodebuild test -workspace LLMTest.xcworkspace -scheme react-native-litert-lm-Unit-Tests -sdk iphonesimulator -destination 'platform=iOS Simulator,name=iPhone 16'
|
|
606
|
+
```
|
|
562
607
|
|
|
563
|
-
|
|
608
|
+
### Security & Sanitization Protections Checked
|
|
609
|
+
Every test run automatically asserts:
|
|
610
|
+
- **Defense in depth for download boundaries**: Blocks non-HTTPS schemes at both JS model factory and low-level native layers.
|
|
611
|
+
- **Path Traversal protections**: Prevents directory traversal attacks (`..`, `/`, `\`) in download and deletion APIs.
|
|
612
|
+
- **Telemetry sanity**: Ensures zero-leak memory usage telemetry boundaries stay strictly linear.
|
|
564
613
|
|
|
565
614
|
## License
|
|
566
615
|
|
package/android/build.gradle
CHANGED
|
@@ -19,6 +19,7 @@ android {
|
|
|
19
19
|
|
|
20
20
|
defaultConfig {
|
|
21
21
|
minSdk 26 // LiteRT-LM requires API 26+
|
|
22
|
+
consumerProguardFiles 'consumer-rules.pro'
|
|
22
23
|
|
|
23
24
|
externalNativeBuild {
|
|
24
25
|
cmake {
|
|
@@ -66,6 +67,12 @@ android {
|
|
|
66
67
|
keepDebugSymbols.add("**/*.so")
|
|
67
68
|
}
|
|
68
69
|
}
|
|
70
|
+
|
|
71
|
+
testOptions {
|
|
72
|
+
unitTests {
|
|
73
|
+
includeAndroidResources = true
|
|
74
|
+
}
|
|
75
|
+
}
|
|
69
76
|
}
|
|
70
77
|
|
|
71
78
|
repositories {
|
|
@@ -89,4 +96,9 @@ dependencies {
|
|
|
89
96
|
|
|
90
97
|
// LiteRT-LM Kotlin API
|
|
91
98
|
implementation "com.google.ai.edge.litertlm:litertlm-android:${litertLmVersion}"
|
|
99
|
+
|
|
100
|
+
// Testing Dependencies
|
|
101
|
+
testImplementation 'junit:junit:4.13.2'
|
|
102
|
+
testImplementation 'org.robolectric:robolectric:4.11.1'
|
|
103
|
+
testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.7.3'
|
|
92
104
|
}
|
|
@@ -7,5 +7,10 @@
|
|
|
7
7
|
android:authorities="${applicationId}.litertlm.init"
|
|
8
8
|
android:exported="false"
|
|
9
9
|
android:initOrder="100" />
|
|
10
|
+
|
|
11
|
+
<!-- Allow dynamic loading of OpenCL for GPU delegate acceleration on Android 12+ -->
|
|
12
|
+
<uses-native-library
|
|
13
|
+
android:name="libOpenCL.so"
|
|
14
|
+
android:required="false" />
|
|
10
15
|
</application>
|
|
11
16
|
</manifest>
|