react-native-litert-lm 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +207 -158
  2. package/android/build.gradle +12 -0
  3. package/android/src/main/AndroidManifest.xml +5 -0
  4. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +316 -63
  5. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
  6. package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
  7. package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +83 -0
  8. package/cpp/include/README.md +9 -11
  9. package/ios/HybridLiteRTLM.swift +1058 -0
  10. package/ios/Tests/HybridLiteRTLMTests.swift +67 -0
  11. package/lib/__mocks__/react-native-nitro-modules.d.ts +61 -0
  12. package/lib/__mocks__/react-native-nitro-modules.js +50 -0
  13. package/lib/__tests__/hooks.test.d.ts +1 -0
  14. package/lib/__tests__/hooks.test.js +124 -0
  15. package/lib/__tests__/memoryTracker.test.d.ts +1 -0
  16. package/lib/__tests__/memoryTracker.test.js +74 -0
  17. package/lib/__tests__/modelFactory.test.d.ts +1 -0
  18. package/lib/__tests__/modelFactory.test.js +52 -0
  19. package/lib/hooks.js +1 -1
  20. package/lib/index.d.ts +2 -4
  21. package/lib/index.js +12 -7
  22. package/lib/modelFactory.js +62 -63
  23. package/lib/specs/LiteRTLM.nitro.d.ts +71 -2
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +62 -7
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -1
  26. package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
  27. package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
  28. package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
  29. package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +10 -2
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
  38. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
  41. package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
  42. package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
  43. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
  44. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +224 -0
  45. package/nitrogen/generated/ios/swift/Backend.swift +44 -0
  46. package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
  47. package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
  48. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  49. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
  50. package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
  51. package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
  52. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +69 -0
  53. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +383 -0
  54. package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
  55. package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
  56. package/nitrogen/generated/ios/swift/Message.swift +34 -0
  57. package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
  58. package/nitrogen/generated/ios/swift/PartType.swift +44 -0
  59. package/nitrogen/generated/ios/swift/Role.swift +44 -0
  60. package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
  61. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
  62. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +7 -2
  63. package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
  64. package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
  65. package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
  66. package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
  67. package/package.json +16 -8
  68. package/react-native-litert-lm.podspec +15 -19
  69. package/scripts/download-ios-frameworks.sh +14 -48
  70. package/scripts/postinstall.js +1 -2
  71. package/src/__mocks__/react-native-nitro-modules.ts +48 -0
  72. package/src/__tests__/hooks.test.ts +153 -0
  73. package/src/__tests__/memoryTracker.test.ts +87 -0
  74. package/src/__tests__/modelFactory.test.ts +68 -0
  75. package/src/hooks.ts +1 -1
  76. package/src/index.ts +12 -9
  77. package/src/modelFactory.ts +82 -80
  78. package/src/specs/LiteRTLM.nitro.ts +80 -2
  79. package/cpp/HybridLiteRTLM.cpp +0 -838
  80. package/cpp/HybridLiteRTLM.hpp +0 -167
  81. package/cpp/IOSDownloadHelper.h +0 -24
  82. package/ios/IOSDownloadHelper.mm +0 -129
  83. package/scripts/build-ios-engine.sh +0 -302
  84. package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
  85. package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
  86. package/scripts/stubs/llguidance_stubs.c +0 -101
  87. package/src/templates.ts +0 -105
package/README.md CHANGED
@@ -4,16 +4,21 @@ High-performance on-device LLM inference for React Native, powered by [LiteRT-LM
4
4
 
5
5
  ## Features
6
6
 
7
- - 🚀 **Native Performance** — Kotlin (Android) / C++ (iOS) via Nitro Modules JSI bindings
8
- - 🧠 **Gemma 4 Ready** — First-class support for Gemma 4 E2B/E4B multimodal models (text + vision + audio)
9
- - ⚡ **GPU Acceleration** — GPU delegate (Android), Metal/MPS (iOS)
10
- - 🔄 **Streaming Support** — Token-by-token generation callbacks
11
- - 📱 **Cross-Platform** — Android API 26+ / iOS 15.0+
12
- - 🖼️ **Multimodal** — Image and audio input support
13
- - 🧵 **Async API** — Non-blocking inference on dedicated large-stack threads
14
- - 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs
15
- - 🧮 **Zero-Copy Buffers** — Memory snapshots stored in native ArrayBuffers via Nitro Modules
16
- - 📥 **Automatic Model Download** — Downloads models from URL with progress tracking and local caching
7
+ - 🚀 **Native Swift Bridge (iOS)** — Bypasses Swift actor deadlocks (User Rule #1) via direct C FFI dispatched on a serial `dev.litert.engine` background queue.
8
+ - 🤖 **Stateless Kotlin Bridge (Android)** — Fully conforms to `HybridLiteRTLMSpec` using direct JSI memory access.
9
+ - ⚡ **Zero-Copy Multimodal API** — Native-owned `ArrayBuffer` mapping straight to FFI inputs for image/audio data without copy overhead (complying with User Rule #2).
10
+ - 🧠 **Speculative Decoding** — Active multi-token prediction support with pre-flight model capability validation.
11
+ - 🛠️ **Function / Tool Calling** — Native JSON-encoded schema specification support for structured outputs.
12
+ - 🏎️ **GPU Acceleration** — Metal (iOS), OpenCL GPU delegate (Android, Pixel devices).
13
+ - 🔄 **Streaming Support** — Non-blocking token-by-token callbacks.
14
+ - 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs (complying with User Rule #3).
15
+ - 📥 **Automatic Model Download** — Downloads models from URL with progress tracking and local caching.
16
+
17
+ ## Demo
18
+
19
+ > Gemma 4 E2B running on-device on a Samsung Galaxy S22 (Snapdragon 8 Gen 1, 4 GB RAM) — CPU backend, streaming inference.
20
+
21
+ <video src="https://github.com/user-attachments/assets/1da527ce-0432-4f8b-8899-474f81b2feea" width="300" controls></video>
17
22
 
18
23
  ## Installation
19
24
 
@@ -65,7 +70,8 @@ The `example/` directory contains a fully functional test app with a dark-themed
65
70
  - Multi-turn conversation with context retention
66
71
  - Performance benchmarking (tokens/sec, latency)
67
72
  - Real-time memory tracking
68
- - Quick chat interface
73
+ - Speculative decoding & tool calling settings toggles
74
+ - Zero-copy multimodal inference loading images/audio directly into ArrayBuffers
69
75
 
70
76
  ### Running the Example
71
77
 
@@ -87,42 +93,45 @@ The `example/` directory contains a fully functional test app with a dark-themed
87
93
  ```bash
88
94
  npx expo prebuild --clean
89
95
  npx expo run:android # Android
90
- npx expo run:ios # iOS (requires XCFramework — see "Building the iOS Engine" below)
96
+ npx expo run:ios # iOS (pre-linked with CLiteRTLM.xcframework)
91
97
  ```
92
98
 
93
- > **Note:** If you change native code (C++/Kotlin/Obj-C++), you must run `npx expo prebuild --clean` again before rebuilding.
99
+ > **Note:** If you change native code (Swift/Kotlin), you must run `npx expo prebuild --clean` again before rebuilding.
94
100
 
95
101
  ## Model Management
96
102
 
97
- LiteRT-LM models (like Gemma 4) are large files (2–4 GB) and cannot be bundled into your app binary. They are downloaded at runtime.
103
+ LiteRT-LM models (like Gemma 4) are large files (1–4 GB) and cannot be bundled into your app binary. They are downloaded at runtime.
98
104
 
99
105
  ### Automatic Downloading
100
106
 
101
- The library handles downloading automatically when you pass a URL to `loadModel` or `useModel`. Downloads include:
107
+ Pass an HTTPS URL to `useModel()` or `loadModel()` the library handles the rest:
102
108
 
103
109
  - **Progress tracking** — real-time download percentage via callbacks
104
110
  - **Local caching** — downloaded models are cached and reused across app launches
105
- - **Android**: app-local temp directory
111
+ - **Android**: `files/models/` (app-private)
106
112
  - **iOS**: `Library/Caches/litert_models/` (survives app relaunch; reclaimable by iOS under storage pressure)
107
113
  - **HTTPS enforcement** — only secure URLs are accepted
108
114
 
109
- ### Manual Downloading (Optional)
115
+ ### Manual Downloading
110
116
 
111
- If you prefer to manage downloads yourself (e.g., using `expo-file-system`), download the `.litertlm` file to a local path and pass that path to the library:
117
+ If you need custom control over downloads (e.g., authentication headers for private model hosting, resumable downloads, or custom caching), use your preferred HTTP client and pass the local file path:
112
118
 
113
119
  ```typescript
114
- import * as FileSystem from "expo-file-system";
115
- import { GEMMA_4_E2B_IT } from "react-native-litert-lm";
120
+ import { fetch } from "expo/fetch";
121
+ import { File, Paths } from "expo-file-system";
122
+ import { useModel } from "react-native-litert-lm";
116
123
 
117
- const localPath = `${FileSystem.documentDirectory}gemma-4-E2B-it.litertlm`;
124
+ const MODEL_URL = "https://example.com/private-model.litertlm";
118
125
 
119
- async function downloadModel() {
120
- const info = await FileSystem.getInfoAsync(localPath);
121
- if (info.exists) return localPath;
126
+ // Download with custom headers using expo/fetch
127
+ const response = await fetch(MODEL_URL, {
128
+ headers: { Authorization: `Bearer ${token}` },
129
+ });
130
+ const modelFile = new File(Paths.cache, "my-model.litertlm");
131
+ modelFile.write(await response.bytes());
122
132
 
123
- await FileSystem.downloadAsync(GEMMA_4_E2B_IT, localPath);
124
- return localPath;
125
- }
133
+ // Pass the local path — no download occurs
134
+ const { model, isReady } = useModel(modelFile.uri, { backend: "cpu" });
126
135
  ```
127
136
 
128
137
  ## Usage
@@ -194,32 +203,80 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
194
203
  });
195
204
  ```
196
205
 
197
- ### Multimodal (Image / Audio)
206
+ ### Multimodal (Image / Audio) & Zero-Copy Buffers
207
+
208
+ Multimodal features are fully supported via standard file paths or high-performance zero-copy `ArrayBuffer` objects:
198
209
 
199
- > **Note**: Multimodal is fully supported on Android. iOS has the code paths implemented but vision/audio executors may not be available in the current XCFramework build — use `checkMultimodalSupport()` to verify at runtime.
210
+ #### 1. Zero-Copy Multimodal Messages (Recommended)
211
+ This API uses Nitro Modules' native-backed `ArrayBuffer` directly mapped to native memory buffers, avoiding any base64 heap copying overhead (User Rule #2):
200
212
 
201
213
  ```typescript
202
214
  import { checkMultimodalSupport } from "react-native-litert-lm";
203
215
 
204
216
  const warning = checkMultimodalSupport();
205
217
  if (warning) {
206
- console.warn(warning); // Experimental on iOS
218
+ console.warn(warning); // Experimental or unsupported on current platform (e.g. iOS simulator)
207
219
  } else {
208
- // Image input (for vision models like Gemma 4)
209
- // Images >1024px are automatically resized to prevent OOM
210
- const response = await llm.sendMessageWithImage(
211
- "What's in this image?",
212
- "/path/to/image.jpg",
213
- );
214
-
215
- // Audio input
216
- const transcription = await llm.sendMessageWithAudio(
217
- "Transcribe this audio",
218
- "/path/to/audio.wav",
219
- );
220
+ // Read local assets or files straight into ArrayBuffers using fetch
221
+ const response = await fetch(Image.resolveAssetSource(require("./test.jpeg")).uri);
222
+ const imageBuffer = await response.arrayBuffer();
223
+
224
+ const reply = await llm.sendMultimodalMessage([
225
+ { type: "image", imageBuffer },
226
+ { type: "text", text: "Describe what is in this image." }
227
+ ]);
228
+ console.log(reply);
220
229
  }
221
230
  ```
222
231
 
232
+ #### 2. Path-Based Multimodal Messages
233
+ ```typescript
234
+ // Image input
235
+ const response = await llm.sendMessageWithImage(
236
+ "What's in this image?",
237
+ "/path/to/image.jpg",
238
+ );
239
+
240
+ // Audio input
241
+ const transcription = await llm.sendMessageWithAudio(
242
+ "Transcribe this audio",
243
+ "/path/to/audio.wav",
244
+ );
245
+ ```
246
+
247
+ ### Speculative Decoding & Tools
248
+
249
+ #### 1. Speculative Decoding (MTP)
250
+ Enable speculative decoding in `LLMConfig` to accelerate inference using multi-token prediction when supported by your model:
251
+
252
+ ```typescript
253
+ const { model } = useModel(GEMMA_4_E2B_IT, {
254
+ enableSpeculativeDecoding: true,
255
+ });
256
+ ```
257
+
258
+ #### 2. Function / Tool Calling
259
+ Inject tools as an array of definitions, specifying parameter validation using standard JSON schema format:
260
+
261
+ ```typescript
262
+ const { model } = useModel(GEMMA_4_E2B_IT, {
263
+ tools: [
264
+ {
265
+ name: "get_current_weather",
266
+ description: "Get the current weather for a location",
267
+ parametersJson: JSON.stringify({
268
+ type: "object",
269
+ properties: {
270
+ location: { type: "string", description: "The city and state, e.g. San Francisco, CA" },
271
+ unit: { type: "string", enum: ["celsius", "fahrenheit"] }
272
+ },
273
+ required: ["location"]
274
+ })
275
+ }
276
+ ]
277
+ });
278
+ ```
279
+
223
280
  ### Performance Stats
224
281
 
225
282
  ```typescript
@@ -229,6 +286,8 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
229
286
  console.log(`Time to first token: ${stats.timeToFirstToken.toFixed(0)} ms`);
230
287
  ```
231
288
 
289
+ > **Note**: Stats are available for both sync (`sendMessage`) and streaming (`sendMessageAsync`) on both platforms. iOS uses real benchmark data from the C API; Android uses heuristic token counts with precise timing.
290
+
232
291
  ### Memory Tracking
233
292
 
234
293
  The library provides real OS-level memory data — no estimation. It reads directly from `mach_task_basic_info` (iOS) and `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
@@ -307,19 +366,19 @@ const buffer = tracker.getNativeBuffer();
307
366
 
308
367
  ## Supported Models
309
368
 
310
- Download `.litertlm` models automatically using the exported URL constants, or manually from [HuggingFace](https://huggingface.co/litert-community):
369
+ All exported model URLs are **public no authentication required**. Pass them directly to `useModel()` or `loadModel()` for automatic downloading with progress tracking and local caching.
311
370
 
312
- | Constant | Model | Size | Min RAM | Auth Required |
313
- | :--------------------- | :------------------------------ | :------ | :------ | :------------- |
314
- | `GEMMA_4_E2B_IT` | Gemma 4 E2B (Multimodal, IT) | 2.58 GB | 4 GB+ | No |
315
- | `GEMMA_4_E4B_IT` | Gemma 4 E4B (Higher Quality) | 3.65 GB | 6 GB+ | No |
316
- | `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Int4, Multimodal) | ~1.3 GB | 4 GB+ | ✅ HuggingFace |
371
+ | Constant | Model | Size | Min RAM | Source |
372
+ | :--------------------- | :------------------------------ | :------ | :------ | :---------- |
373
+ | `GEMMA_4_E2B_IT` | Gemma 4 E2B (Multimodal, IT) | 2.58 GB | 4 GB+ | HuggingFace |
374
+ | `GEMMA_4_E4B_IT` | Gemma 4 E4B (Higher Quality) | 3.65 GB | 6 GB+ | HuggingFace |
375
+ | `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Int4, Multimodal) | ~1.3 GB | 4 GB+ | litert.dev |
317
376
 
318
- > **Recommended:** Use `GEMMA_4_E2B_IT` for most use cases. It's multimodal (text + vision + audio) and downloads directly from HuggingFace without requiring an account.
377
+ > **Recommended:** Use `GEMMA_4_E2B_IT` for most use cases multimodal (text + vision + audio) and the best quality-to-size ratio.
319
378
  >
320
- > **iOS Note:** Models larger than ~2 GB (like Gemma 4) require the `com.apple.developer.kernel.extended-virtual-addressing` entitlement. See [iOS Entitlements](#ios-entitlements) below.
379
+ > **iOS Note:** Models larger than ~2 GB require the `com.apple.developer.kernel.extended-virtual-addressing` entitlement. See [iOS Entitlements](#ios-entitlements) below. Gemma 3n E2B (~1.3 GB) works without it.
321
380
 
322
- **Other compatible models** (download manually from HuggingFace):
381
+ **Other compatible models** (download `.litertlm` files manually from [HuggingFace](https://huggingface.co/litert-community)):
323
382
 
324
383
  | Model | Size | Min RAM | Notes |
325
384
  | ------------- | ------- | ------- | --------------------- |
@@ -352,13 +411,15 @@ Loads a model from a local path or HTTPS URL.
352
411
 
353
412
  #### Backend Options
354
413
 
355
- | Backend | Engine | Speed | Notes |
356
- | ------- | ------------------- | ------- | ---------------------------------------------- |
357
- | `'cpu'` | CPU inference | Slowest | Always available, lower RAM requirement |
358
- | `'gpu'` | GPU / Metal | Fast | Recommended default |
359
- | `'npu'` | NPU / Neural Engine | Fastest | Requires supported hardware; falls back to GPU |
414
+ | Backend | Engine | Speed | Notes |
415
+ | ------- | ------------------------------ | ------- | ---------------------------------------------------------------------------------- |
416
+ | `'cpu'` | CPU inference | Slowest | Always available on all devices |
417
+ | `'gpu'` | Metal (iOS) / OpenCL (Android) | Fast | iOS: always available. Android: requires OpenCL (Pixel only, not Samsung/Qualcomm) |
418
+ | `'npu'` | NPU / Neural Engine | Fastest | Requires supported hardware; experimental |
360
419
 
361
- > **iOS**: `'cpu'` is the recommended default backend. `'gpu'` (Metal/MPS) is also supported. The engine automatically tries multiple backend combinations if the primary one fails.
420
+ > **iOS**: Both `'cpu'` and `'gpu'` (Metal) are supported. The engine automatically tries fallback backend combinations if the primary one fails.
421
+ >
422
+ > **Android GPU**: The GPU backend requires OpenCL, which is **not available on most Samsung and Qualcomm devices**. Use `checkBackendSupport('gpu')` to check before loading. The engine will throw a clear error if GPU is unsupported.
362
423
 
363
424
  ### `sendMessage(message): Promise<string>`
364
425
 
@@ -380,30 +441,10 @@ Send a message with audio (for audio-capable models like Gemma 4 E2B).
380
441
 
381
442
  Returns performance metrics from the last inference call.
382
443
 
383
- ```typescript
384
- interface GenerationStats {
385
- tokensPerSecond: number;
386
- totalTime: number; // seconds
387
- timeToFirstToken: number; // seconds
388
- promptTokens: number;
389
- completionTokens: number;
390
- prefillSpeed: number; // tokens/sec
391
- }
392
- ```
393
-
394
444
  ### `getMemoryUsage(): MemoryUsage`
395
445
 
396
446
  Returns real OS-level memory usage.
397
447
 
398
- ```typescript
399
- interface MemoryUsage {
400
- nativeHeapBytes: number;
401
- residentBytes: number;
402
- availableMemoryBytes: number;
403
- isLowMemory: boolean;
404
- }
405
- ```
406
-
407
448
  ### `getHistory(): Message[]`
408
449
 
409
450
  Returns the conversation history.
@@ -427,21 +468,19 @@ import {
427
468
  checkBackendSupport,
428
469
  checkMultimodalSupport,
429
470
  getRecommendedBackend,
430
- applyGemmaTemplate,
431
- applyPhiTemplate,
432
- applyLlamaTemplate,
433
471
  } from "react-native-litert-lm";
434
472
 
435
- // Check if a backend is supported
436
- const warning = checkBackendSupport("npu"); // string | undefined
473
+ // Check if GPU is supported on this device
474
+ const gpuWarning = checkBackendSupport("gpu");
475
+
476
+ // Check NPU support
477
+ const npuWarning = checkBackendSupport("npu"); // string | undefined
478
+
479
+ // Check multimodal support
437
480
  const mmError = checkMultimodalSupport(); // string | undefined
438
- const backend = getRecommendedBackend(); // 'gpu' | 'cpu'
439
481
 
440
- // Manual prompt formatting (advanced)
441
- const prompt = applyGemmaTemplate(
442
- [{ role: "user", content: "Hello!" }],
443
- "You are helpful.",
444
- );
482
+ // Get recommended backend
483
+ const backend = getRecommendedBackend(); // 'cpu'
445
484
  ```
446
485
 
447
486
  ## Requirements
@@ -452,29 +491,29 @@ const prompt = applyGemmaTemplate(
452
491
  | react-native-nitro-modules | 0.35.0+ |
453
492
  | Android API | 26+ (ARM64) |
454
493
  | iOS | 15.0+ (ARM64) |
455
- | LiteRT-LM Engine | 0.10.2 |
494
+ | LiteRT-LM Engine | 0.12.0 |
456
495
 
457
496
  ## Platform Support
458
497
 
459
- | Platform | Status | Architecture | Backends |
460
- | -------- | -------- | ------------ | ---------------- |
461
- | Android | ✅ Ready | arm64-v8a | CPU, GPU, NPU |
462
- | iOS | ✅ Ready | arm64 | CPU, GPU (Metal) |
498
+ | Platform | Status | Architecture | Backends |
499
+ | -------- | -------- | ------------ | ------------------------------------------------- |
500
+ | Android | ✅ Ready | arm64-v8a | CPU (all devices), GPU (OpenCL devices only), NPU |
501
+ | iOS | ✅ Ready | arm64 | CPU, GPU (Metal — always available) |
463
502
 
464
503
  ### iOS Feature Matrix
465
504
 
466
- | Feature | Status | Notes |
467
- | ---------------------------- | ------ | ----------------------------------------------------- |
468
- | Text inference (blocking) | ✅ | Via LiteRT-LM C API |
469
- | Text inference (streaming) | ✅ | Token-by-token callbacks |
470
- | CPU inference | ✅ | Recommended default backend |
471
- | GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'` |
472
- | Model download with progress | ✅ | NSURLSession, cached in `Caches/` |
473
- | Memory tracking | ✅ | `mach_task_basic_info` |
474
- | Multi-turn conversation | ✅ | Context retained across turns |
475
- | Multimodal (image/audio) | 🧪 | Code paths exist; vision/audio executors experimental |
476
- | Constrained decoding | | Requires llguidance Rust runtime |
477
- | Function calling | | Requires Rust CXX bridge runtime |
505
+ | Feature | Status | Notes |
506
+ | ---------------------------- | ------ | ------------------------------------------------------ |
507
+ | Text inference (blocking) | ✅ | Direct FFI using `dev.litert.engine` background queue |
508
+ | Text inference (streaming) | ✅ | Token-by-token callbacks |
509
+ | CPU inference | ✅ | Safe fallback default |
510
+ | GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'` |
511
+ | Model download with progress | ✅ | URLSession-based, cached in `Caches/` |
512
+ | Memory tracking | ✅ | Real-time Resident Set Size (RSS) tracking |
513
+ | Multi-turn conversation | ✅ | Context retained across turns |
514
+ | Multimodal (image/audio) | | Zero-copy `ArrayBuffer` mapping to FFI input buffers |
515
+ | Speculative Decoding | | Dynamic capabilities check during model pre-load |
516
+ | Function / Tool Calling | | Supported via JSON-encoded schema specification |
478
517
 
479
518
  ### iOS Entitlements
480
519
 
@@ -489,78 +528,88 @@ Add to your app's `.entitlements` file:
489
528
 
490
529
  > **Note:** This entitlement requires a **paid Apple Developer account** ($99/year). Gemma 3n E2B (~1.3 GB) works without it.
491
530
 
492
- ## Building the iOS Engine
531
+ ## iOS FFI Architecture & Integration
493
532
 
494
- The iOS build uses a **Bazel-to-XCFramework pipeline** that compiles the LiteRT-LM C engine and all transitive dependencies into a static library (~82–84 MB).
533
+ The library uses a highly optimized Swift Direct-FFI bridge that links directly with the pre-compiled C library `CLiteRTLM.xcframework`.
495
534
 
496
- ### Prerequisites
535
+ ### Key Design Commitments
497
536
 
498
- - **Bazel 7.6.1+** (via [Bazelisk](https://github.com/bazelbuild/bazelisk) recommended)
499
- - **Xcode command line tools** (`xcode-select --install`)
537
+ 1. **JSI Thread Safety (User Rule #1)**:
538
+ - The JSI/JS thread must never be blocked by native synchronous lock-waiting operations.
539
+ - We dispatch all FFI calls to a serial background `dev.litert.engine` queue, executing callbacks asynchronously to prevent deadlocking JSI execution.
500
540
 
501
- ### Build
541
+ 2. **Zero-Copy Memory Pipelines (User Rule #2)**:
542
+ - Enforce the use of Nitro Modules' `ArrayBuffer` directly referencing native memory pointers (`ArrayBuffer.data`) when processing heavy media assets like images or audio.
502
543
 
503
- ```bash
504
- ./scripts/build-ios-engine.sh
544
+ 3. **Manual FFI Resource Management (User Rule #3)**:
545
+ - Raw pointers (`LiteRtLmEngine*`, `LiteRtLmConversation*`) are manually allocated and strictly deallocated inside Swift `deinit` and `close()` destructors to guarantee 0% memory leaks during prolonged inference sessions.
546
+
547
+ ### Architecture Topology
548
+
549
+ ```
550
+ ┌──────────────────────────────────────────────────────────┐
551
+ │ React Native (TypeScript / JavaScript) │
552
+ ├──────────────────────────────────────────────────────────┤
553
+ │ Nitro Modules JSI Bindings (`HybridLiteRTLMSpec`) │
554
+ ├─────────────────────────────┬────────────────────────────┤
555
+ │ Android (Kotlin) │ iOS (Swift Direct FFI) │
556
+ │ `HybridLiteRTLM.kt` │ `HybridLiteRTLM.swift` │
557
+ │ `litertlm-android` AAR │ `CLiteRTLM.xcframework` │
558
+ └─────────────────────────────┴────────────────────────────┘
505
559
  ```
506
560
 
507
- This will:
561
+ #### Android Bridging
562
+ - Conforms fully to `HybridLiteRTLMSpec` using Kotlin.
563
+ - Incorporates Proguard keep rules to prevent dynamic JSI/JNI code stripping.
564
+ - Declares `<uses-native-library android:name="libOpenCL.so" android:required="false" />` to load dynamic OpenCL for GPU delegate acceleration on Android 12+ without throwing platform installer exceptions.
508
565
 
509
- 1. Clone/checkout LiteRT-LM `v0.10.2` source into `.litert-lm-build/`
510
- 2. Apply `scripts/patches/ios-engine-fixes.patch` (PromptTemplate simplification, linker fixes)
511
- 3. Build `//c:engine` for `ios_arm64` and `ios_sim_arm64` via Bazel
512
- 4. Collect all transitive `.o` files (engine, protobuf, re2, sentencepiece, etc.)
513
- 5. Compile C/C++ stubs for unavailable Rust dependencies
514
- 6. Merge ~1,909 object files into a static library via `libtool`
515
- 7. Package into `ios/Frameworks/LiteRTLM.xcframework`
566
+ #### iOS Bridging
567
+ - Entirely written in native Swift (`HybridLiteRTLM.swift`) calling direct FFI.
568
+ - Avoids the upstream Swift SDK `actor` lock-blocking deadlocks by utilizing low-level C functions directly.
569
+ - Implements custom `getMemoryUsage` that queries the OS directly via `mach_task_basic_info` to get precise real-time Resident Set Size (RSS) metrics.
516
570
 
517
- ### Output
571
+ ## Testing
518
572
 
519
- ```
520
- ios/Frameworks/LiteRTLM.xcframework/
521
- ├── Info.plist
522
- ├── ios-arm64/LiteRTLM.framework/ # Device
523
- │ ├── LiteRTLM # ~82 MB static library
524
- │ └── Headers/litert_lm_engine.h
525
- └── ios-arm64-simulator/LiteRTLM.framework/ # Simulator
526
- ├── LiteRTLM # ~84 MB static library
527
- └── Headers/litert_lm_engine.h
528
- ```
573
+ The library includes a comprehensive multi-tier unit testing suite designed to run quickly on host machines (CI runners or local development environments) without requiring a physical test device.
529
574
 
530
- ### FFI Stubs
575
+ ### 1. JavaScript / TypeScript Layer (Jest)
531
576
 
532
- Certain LiteRT-LM features depend on Rust libraries (llguidance, CXX bridge, MinijinjaTemplate) that are not available in the iOS Bazel build. These are replaced with stubs:
577
+ The JS/TS layer uses Jest to validate the `useModel` hook, download progress callbacks, URL query scrubbing, file storage helpers, and the zero-copy native memory tracker buffer allocations.
533
578
 
534
- | Stub File | Location | Purpose |
535
- | ------------------------------------ | ---------------- | ---------------------------------------- |
536
- | `cxx_bridge_stubs.cc` | `scripts/stubs/` | CXX bridge runtime + Rust FFI type stubs |
537
- | `llguidance_stubs.c` | `scripts/stubs/` | llguidance constrained decoding C API |
538
- | `gemma_model_constraint_provider.cc` | `scripts/stubs/` | Gemma constraint provider factory |
579
+ * **Setup & Mocking**: Includes an active stub (`src/__mocks__/react-native-nitro-modules.ts`) that mocks the Nitro Modules `HybridObject` architecture.
580
+ * **How to run**:
581
+ ```bash
582
+ npm run test
583
+ ```
539
584
 
540
- Additionally, `PromptTemplate` is patched at build time to use a simplified C++ template formatter instead of the Rust MinijinjaTemplate, which avoids all Rust FFI calls during conversation setup.
585
+ ### 2. Android Kotlin Layer (Robolectric)
541
586
 
542
- > **Text inference works fully without these Rust components.** Only constrained decoding, function calling parsers, and advanced Jinja2 template features are affected.
587
+ The Android layer uses local JUnit Robolectric tests to run Android code on the JVM, sandboxing OS dependencies. It validates HTTPS schema constraints, path traversal mitigations, and initial telemetry states.
543
588
 
544
- ## Architecture
589
+ * **Setup & Mocking**: Uses a local shadow `Promise` implementation to test thread-asynchronous errors.
590
+ * **How to run**:
591
+ ```bash
592
+ cd example/android
593
+ ./gradlew :react-native-litert-lm:testDebugUnitTest
594
+ ```
545
595
 
546
- ```
547
- ┌─────────────────────────────────────────────────┐
548
- │ React Native (TypeScript) │
549
- │ useModel() / createLLM() / sendMessage() │
550
- ├─────────────────────────────────────────────────┤
551
- │ Nitro Modules JSI Bridge │
552
- ├──────────────────────┬──────────────────────────┤
553
- │ Android (Kotlin) │ iOS (C++) │
554
- │ HybridLiteRTLM.kt │ HybridLiteRTLM.cpp │
555
- │ litertlm-android │ LiteRTLM C API │
556
- │ AAR (GPU delegate) │ XCFramework (Metal) │
557
- └──────────────────────┴──────────────────────────┘
558
- ```
596
+ ### 3. iOS Swift Layer (XCTest)
597
+
598
+ The iOS layer leverages native XCTests integrated directly into CocoaPods via standard development test specs. It verifies FFI path traversal blocking, non-HTTPS download blocks, automatic `deinit` cleanup, and Mach-based telemetry bounds.
559
599
 
560
- - **Android**: Kotlin (`HybridLiteRTLM.kt`) interfacing with the `litertlm-android` AAR.
561
- - **iOS**: C++ (`HybridLiteRTLM.cpp`) interfacing with the LiteRT-LM C API via a prebuilt `LiteRTLM.xcframework`. All engine operations (load, inference, streaming) run on dedicated `pthread` threads with 8 MB stack to accommodate XNNPack's stack requirements. Platform-specific code (model downloading, file management) is in Objective-C++ (`ios/IOSDownloadHelper.mm`).
600
+ * **How to run**:
601
+ 1. Boot your preferred iOS simulator (e.g., iPhone 16 running iOS 18.6).
602
+ 2. Run the tests using `xcodebuild`:
603
+ ```bash
604
+ cd example/ios
605
+ xcodebuild test -workspace LLMTest.xcworkspace -scheme react-native-litert-lm-Unit-Tests -sdk iphonesimulator -destination 'platform=iOS Simulator,name=iPhone 16'
606
+ ```
562
607
 
563
- > **For contributors**: Changes to `cpp/HybridLiteRTLM.cpp` do not affect Android. Feature changes must be applied to both the Kotlin and C++ implementations.
608
+ ### Security & Sanitization Protections Checked
609
+ Every test run automatically asserts:
610
+ - **Defense in depth for download boundaries**: Blocks non-HTTPS schemes at both JS model factory and low-level native layers.
611
+ - **Path Traversal protections**: Prevents directory traversal attacks (`..`, `/`, `\`) in download and deletion APIs.
612
+ - **Telemetry sanity**: Ensures zero-leak memory usage telemetry boundaries stay strictly linear.
564
613
 
565
614
  ## License
566
615
 
@@ -19,6 +19,7 @@ android {
19
19
 
20
20
  defaultConfig {
21
21
  minSdk 26 // LiteRT-LM requires API 26+
22
+ consumerProguardFiles 'consumer-rules.pro'
22
23
 
23
24
  externalNativeBuild {
24
25
  cmake {
@@ -66,6 +67,12 @@ android {
66
67
  keepDebugSymbols.add("**/*.so")
67
68
  }
68
69
  }
70
+
71
+ testOptions {
72
+ unitTests {
73
+ includeAndroidResources = true
74
+ }
75
+ }
69
76
  }
70
77
 
71
78
  repositories {
@@ -89,4 +96,9 @@ dependencies {
89
96
 
90
97
  // LiteRT-LM Kotlin API
91
98
  implementation "com.google.ai.edge.litertlm:litertlm-android:${litertLmVersion}"
99
+
100
+ // Testing Dependencies
101
+ testImplementation 'junit:junit:4.13.2'
102
+ testImplementation 'org.robolectric:robolectric:4.11.1'
103
+ testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.7.3'
92
104
  }
@@ -7,5 +7,10 @@
7
7
  android:authorities="${applicationId}.litertlm.init"
8
8
  android:exported="false"
9
9
  android:initOrder="100" />
10
+
11
+ <!-- Allow dynamic loading of OpenCL for GPU delegate acceleration on Android 12+ -->
12
+ <uses-native-library
13
+ android:name="libOpenCL.so"
14
+ android:required="false" />
10
15
  </application>
11
16
  </manifest>