react-native-litert-lm 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +153 -135
  2. package/android/build.gradle +12 -0
  3. package/android/src/main/AndroidManifest.xml +8 -0
  4. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +276 -62
  5. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
  6. package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
  7. package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +105 -0
  8. package/ios/HybridLiteRTLM.swift +1344 -0
  9. package/ios/Tests/HybridLiteRTLMTests.swift +113 -0
  10. package/lib/__mocks__/react-native-nitro-modules.d.ts +65 -0
  11. package/lib/__mocks__/react-native-nitro-modules.js +60 -0
  12. package/lib/__tests__/hooks.test.d.ts +1 -0
  13. package/lib/__tests__/hooks.test.js +124 -0
  14. package/lib/__tests__/memoryTracker.test.d.ts +1 -0
  15. package/lib/__tests__/memoryTracker.test.js +74 -0
  16. package/lib/__tests__/modelFactory.test.d.ts +1 -0
  17. package/lib/__tests__/modelFactory.test.js +68 -0
  18. package/lib/hooks.js +27 -3
  19. package/lib/index.d.ts +6 -2
  20. package/lib/index.js +8 -8
  21. package/lib/modelFactory.js +82 -63
  22. package/lib/specs/LiteRTLM.nitro.d.ts +87 -2
  23. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -2
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +94 -9
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +5 -1
  26. package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
  27. package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
  28. package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
  29. package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +28 -2
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
  38. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
  41. package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
  42. package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
  43. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
  44. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +240 -0
  45. package/nitrogen/generated/ios/swift/Backend.swift +44 -0
  46. package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
  47. package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
  48. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  49. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
  50. package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
  51. package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
  52. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +71 -0
  53. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +431 -0
  54. package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
  55. package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
  56. package/nitrogen/generated/ios/swift/Message.swift +34 -0
  57. package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
  58. package/nitrogen/generated/ios/swift/PartType.swift +44 -0
  59. package/nitrogen/generated/ios/swift/Role.swift +44 -0
  60. package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
  61. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +4 -0
  62. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +9 -2
  63. package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
  64. package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
  65. package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
  66. package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
  67. package/package.json +22 -11
  68. package/react-native-litert-lm.podspec +17 -19
  69. package/scripts/download-ios-frameworks.sh +17 -50
  70. package/scripts/framework-source.js +46 -0
  71. package/scripts/postinstall.js +40 -18
  72. package/src/__mocks__/react-native-nitro-modules.ts +58 -0
  73. package/src/__tests__/hooks.test.ts +153 -0
  74. package/src/__tests__/memoryTracker.test.ts +87 -0
  75. package/src/__tests__/modelFactory.test.ts +96 -0
  76. package/src/hooks.ts +29 -7
  77. package/src/index.ts +7 -10
  78. package/src/modelFactory.ts +104 -80
  79. package/src/specs/LiteRTLM.nitro.ts +106 -2
  80. package/cpp/HybridLiteRTLM.cpp +0 -939
  81. package/cpp/HybridLiteRTLM.hpp +0 -169
  82. package/cpp/IOSDownloadHelper.h +0 -24
  83. package/ios/IOSDownloadHelper.mm +0 -129
  84. package/scripts/build-ios-engine.sh +0 -302
  85. package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
  86. package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
  87. package/scripts/stubs/llguidance_stubs.c +0 -101
  88. package/src/templates.ts +0 -105
package/README.md CHANGED
@@ -4,16 +4,15 @@ High-performance on-device LLM inference for React Native, powered by [LiteRT-LM
4
4
 
5
5
  ## Features
6
6
 
7
- - 🚀 **Native Performance** — Kotlin (Android) / C++ (iOS) via Nitro Modules JSI bindings
8
- - 🧠 **Gemma 4 Ready** — First-class support for Gemma 4 E2B/E4B multimodal models (text + vision + audio)
9
- - ⚡ **GPU Acceleration** — Metal (iOS), OpenCL GPU delegate (Android, Pixel devices)
10
- - 🔄 **Streaming Support** — Token-by-token generation callbacks
11
- - 📱 **Cross-Platform** — Android API 26+ / iOS 15.0+
12
- - 🖼️ **Multimodal** — Image and audio input support
13
- - 🧵 **Async API** — Non-blocking inference on dedicated large-stack threads
14
- - 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs
15
- - 🧮 **Zero-Copy Buffers** — Memory snapshots stored in native ArrayBuffers via Nitro Modules
16
- - 📥 **Automatic Model Download** — Downloads models from URL with progress tracking and local caching
7
+ - 🚀 **Native Swift Bridge (iOS)** — Bypasses Swift actor deadlocks (User Rule #1) via direct C FFI dispatched on a serial `dev.litert.engine` background queue.
8
+ - 🤖 **Stateless Kotlin Bridge (Android)** — Fully conforms to `HybridLiteRTLMSpec` using direct JSI memory access.
9
+ - ⚡ **Zero-Copy Multimodal API** — Native-owned `ArrayBuffer` mapping straight to FFI inputs for image/audio data without copy overhead (complying with User Rule #2).
10
+ - 🧠 **Speculative Decoding** — Active multi-token prediction support with pre-flight model capability validation.
11
+ - 🛠️ **Function / Tool Calling** — Native JSON-encoded schema specification support for structured outputs.
12
+ - 🏎️ **GPU Acceleration** — Metal (iOS), OpenCL GPU delegate (Android, Pixel devices).
13
+ - 🔄 **Streaming Support** — Non-blocking token-by-token callbacks.
14
+ - 📊 **Real Memory Tracking** — OS-level memory metrics (RSS, native heap, available memory) via native APIs (complying with User Rule #3).
15
+ - 📥 **Automatic Model Download** — Downloads models from URL with progress tracking and local caching.
17
16
 
18
17
  ## Demo
19
18
 
@@ -71,7 +70,8 @@ The `example/` directory contains a fully functional test app with a dark-themed
71
70
  - Multi-turn conversation with context retention
72
71
  - Performance benchmarking (tokens/sec, latency)
73
72
  - Real-time memory tracking
74
- - Quick chat interface
73
+ - Speculative decoding & tool calling settings toggles
74
+ - Zero-copy multimodal inference loading images/audio directly into ArrayBuffers
75
75
 
76
76
  ### Running the Example
77
77
 
@@ -93,10 +93,10 @@ The `example/` directory contains a fully functional test app with a dark-themed
93
93
  ```bash
94
94
  npx expo prebuild --clean
95
95
  npx expo run:android # Android
96
- npx expo run:ios # iOS (requires XCFramework — see "Building the iOS Engine" below)
96
+ npx expo run:ios # iOS (pre-linked with CLiteRTLM.xcframework)
97
97
  ```
98
98
 
99
- > **Note:** If you change native code (C++/Kotlin/Obj-C++), you must run `npx expo prebuild --clean` again before rebuilding.
99
+ > **Note:** If you change native code (Swift/Kotlin), you must run `npx expo prebuild --clean` again before rebuilding.
100
100
 
101
101
  ## Model Management
102
102
 
@@ -203,32 +203,80 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
203
203
  });
204
204
  ```
205
205
 
206
- ### Multimodal (Image / Audio)
206
+ ### Multimodal (Image / Audio) & Zero-Copy Buffers
207
207
 
208
- > **Note**: Multimodal is fully supported on Android. iOS has the code paths implemented but vision/audio executors may not be available in the current XCFramework build — use `checkMultimodalSupport()` to verify at runtime.
208
+ Multimodal features are fully supported via standard file paths or high-performance zero-copy `ArrayBuffer` objects:
209
+
210
+ #### 1. Zero-Copy Multimodal Messages (Recommended)
211
+ This API uses Nitro Modules' native-backed `ArrayBuffer` directly mapped to native memory buffers, avoiding any base64 heap copying overhead (User Rule #2):
209
212
 
210
213
  ```typescript
211
214
  import { checkMultimodalSupport } from "react-native-litert-lm";
212
215
 
213
216
  const warning = checkMultimodalSupport();
214
217
  if (warning) {
215
- console.warn(warning); // Experimental on iOS
218
+ console.warn(warning); // Experimental or unsupported on current platform (e.g. iOS simulator)
216
219
  } else {
217
- // Image input (for vision models like Gemma 4)
218
- // Images >1024px are automatically resized to prevent OOM
219
- const response = await llm.sendMessageWithImage(
220
- "What's in this image?",
221
- "/path/to/image.jpg",
222
- );
223
-
224
- // Audio input
225
- const transcription = await llm.sendMessageWithAudio(
226
- "Transcribe this audio",
227
- "/path/to/audio.wav",
228
- );
220
+ // Read local assets or files straight into ArrayBuffers using fetch
221
+ const response = await fetch(Image.resolveAssetSource(require("./test.jpeg")).uri);
222
+ const imageBuffer = await response.arrayBuffer();
223
+
224
+ const reply = await llm.sendMultimodalMessage([
225
+ { type: "image", imageBuffer },
226
+ { type: "text", text: "Describe what is in this image." }
227
+ ]);
228
+ console.log(reply);
229
229
  }
230
230
  ```
231
231
 
232
+ #### 2. Path-Based Multimodal Messages
233
+ ```typescript
234
+ // Image input
235
+ const response = await llm.sendMessageWithImage(
236
+ "What's in this image?",
237
+ "/path/to/image.jpg",
238
+ );
239
+
240
+ // Audio input
241
+ const transcription = await llm.sendMessageWithAudio(
242
+ "Transcribe this audio",
243
+ "/path/to/audio.wav",
244
+ );
245
+ ```
246
+
247
+ ### Speculative Decoding & Tools
248
+
249
+ #### 1. Speculative Decoding (MTP)
250
+ Enable speculative decoding in `LLMConfig` to accelerate inference using multi-token prediction when supported by your model:
251
+
252
+ ```typescript
253
+ const { model } = useModel(GEMMA_4_E2B_IT, {
254
+ enableSpeculativeDecoding: true,
255
+ });
256
+ ```
257
+
258
+ #### 2. Function / Tool Calling
259
+ Inject tools as an array of definitions, specifying parameter validation using standard JSON schema format:
260
+
261
+ ```typescript
262
+ const { model } = useModel(GEMMA_4_E2B_IT, {
263
+ tools: [
264
+ {
265
+ name: "get_current_weather",
266
+ description: "Get the current weather for a location",
267
+ parametersJson: JSON.stringify({
268
+ type: "object",
269
+ properties: {
270
+ location: { type: "string", description: "The city and state, e.g. San Francisco, CA" },
271
+ unit: { type: "string", enum: ["celsius", "fahrenheit"] }
272
+ },
273
+ required: ["location"]
274
+ })
275
+ }
276
+ ]
277
+ });
278
+ ```
279
+
232
280
  ### Performance Stats
233
281
 
234
282
  ```typescript
@@ -238,6 +286,8 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
238
286
  console.log(`Time to first token: ${stats.timeToFirstToken.toFixed(0)} ms`);
239
287
  ```
240
288
 
289
+ > **Note**: Stats are available for both sync (`sendMessage`) and streaming (`sendMessageAsync`) on both platforms. iOS uses real benchmark data from the C API; Android uses heuristic token counts with precise timing.
290
+
241
291
  ### Memory Tracking
242
292
 
243
293
  The library provides real OS-level memory data — no estimation. It reads directly from `mach_task_basic_info` (iOS) and `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
@@ -391,32 +441,10 @@ Send a message with audio (for audio-capable models like Gemma 4 E2B).
391
441
 
392
442
  Returns performance metrics from the last inference call.
393
443
 
394
- ```typescript
395
- interface GenerationStats {
396
- tokensPerSecond: number;
397
- totalTime: number; // milliseconds
398
- timeToFirstToken: number; // milliseconds
399
- promptTokens: number;
400
- completionTokens: number;
401
- totalTokens: number;
402
- }
403
- ```
404
-
405
- > **Note**: Stats are available for both sync (`sendMessage`) and streaming (`sendMessageAsync`) on both platforms. iOS uses real benchmark data from the C API; Android uses heuristic token counts (~4 chars/token) with precise timing.
406
-
407
444
  ### `getMemoryUsage(): MemoryUsage`
408
445
 
409
446
  Returns real OS-level memory usage.
410
447
 
411
- ```typescript
412
- interface MemoryUsage {
413
- nativeHeapBytes: number;
414
- residentBytes: number;
415
- availableMemoryBytes: number;
416
- isLowMemory: boolean;
417
- }
418
- ```
419
-
420
448
  ### `getHistory(): Message[]`
421
449
 
422
450
  Returns the conversation history.
@@ -440,17 +468,10 @@ import {
440
468
  checkBackendSupport,
441
469
  checkMultimodalSupport,
442
470
  getRecommendedBackend,
443
- applyGemmaTemplate,
444
- applyPhiTemplate,
445
- applyLlamaTemplate,
446
471
  } from "react-native-litert-lm";
447
472
 
448
473
  // Check if GPU is supported on this device
449
474
  const gpuWarning = checkBackendSupport("gpu");
450
- if (gpuWarning) {
451
- console.warn(gpuWarning);
452
- // "GPU backend requires OpenCL support, which is unavailable on most Samsung and Qualcomm devices."
453
- }
454
475
 
455
476
  // Check NPU support
456
477
  const npuWarning = checkBackendSupport("npu"); // string | undefined
@@ -460,12 +481,6 @@ const mmError = checkMultimodalSupport(); // string | undefined
460
481
 
461
482
  // Get recommended backend
462
483
  const backend = getRecommendedBackend(); // 'cpu'
463
-
464
- // Manual prompt formatting (advanced)
465
- const prompt = applyGemmaTemplate(
466
- [{ role: "user", content: "Hello!" }],
467
- "You are helpful.",
468
- );
469
484
  ```
470
485
 
471
486
  ## Requirements
@@ -476,7 +491,7 @@ const prompt = applyGemmaTemplate(
476
491
  | react-native-nitro-modules | 0.35.0+ |
477
492
  | Android API | 26+ (ARM64) |
478
493
  | iOS | 15.0+ (ARM64) |
479
- | LiteRT-LM Engine | 0.10.2 |
494
+ | LiteRT-LM Engine | 0.12.0 |
480
495
 
481
496
  ## Platform Support
482
497
 
@@ -487,18 +502,18 @@ const prompt = applyGemmaTemplate(
487
502
 
488
503
  ### iOS Feature Matrix
489
504
 
490
- | Feature | Status | Notes |
491
- | ---------------------------- | ------ | ----------------------------------------------------- |
492
- | Text inference (blocking) | ✅ | Via LiteRT-LM C API |
493
- | Text inference (streaming) | ✅ | Token-by-token callbacks |
494
- | CPU inference | ✅ | Recommended default backend |
495
- | GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'` |
496
- | Model download with progress | ✅ | NSURLSession, cached in `Caches/` |
497
- | Memory tracking | ✅ | `mach_task_basic_info` |
498
- | Multi-turn conversation | ✅ | Context retained across turns |
499
- | Multimodal (image/audio) | 🧪 | Code paths exist; vision/audio executors experimental |
500
- | Constrained decoding | | Requires llguidance Rust runtime |
501
- | Function calling | | Requires Rust CXX bridge runtime |
505
+ | Feature | Status | Notes |
506
+ | ---------------------------- | ------ | ------------------------------------------------------ |
507
+ | Text inference (blocking) | ✅ | Direct FFI using `dev.litert.engine` background queue |
508
+ | Text inference (streaming) | ✅ | Token-by-token callbacks |
509
+ | CPU inference | ✅ | Safe fallback default |
510
+ | GPU inference (Metal/MPS) | ✅ | Supported via `backend: 'gpu'` |
511
+ | Model download with progress | ✅ | URLSession-based, cached in `Caches/` |
512
+ | Memory tracking | ✅ | Real-time Resident Set Size (RSS) tracking |
513
+ | Multi-turn conversation | ✅ | Context retained across turns |
514
+ | Multimodal (image/audio) | | Zero-copy `ArrayBuffer` mapping to FFI input buffers |
515
+ | Speculative Decoding | | Dynamic capabilities check during model pre-load |
516
+ | Function / Tool Calling | | Supported via JSON-encoded schema specification |
502
517
 
503
518
  ### iOS Entitlements
504
519
 
@@ -513,85 +528,88 @@ Add to your app's `.entitlements` file:
513
528
 
514
529
  > **Note:** This entitlement requires a **paid Apple Developer account** ($99/year). Gemma 3n E2B (~1.3 GB) works without it.
515
530
 
516
- ## Building the iOS Engine
531
+ ## iOS FFI Architecture & Integration
517
532
 
518
- The iOS build uses a **Bazel-to-XCFramework pipeline** that compiles the LiteRT-LM C engine and all transitive dependencies into a static library (~82–84 MB).
533
+ The library uses a highly optimized Swift Direct-FFI bridge that links directly with the pre-compiled C library `CLiteRTLM.xcframework`.
519
534
 
520
- ### Prerequisites
535
+ ### Key Design Commitments
521
536
 
522
- - **Bazel 7.6.1+** (via [Bazelisk](https://github.com/bazelbuild/bazelisk) recommended)
523
- - **Xcode command line tools** (`xcode-select --install`)
537
+ 1. **JSI Thread Safety (User Rule #1)**:
538
+ - The JSI/JS thread must never be blocked by native synchronous lock-waiting operations.
539
+ - We dispatch all FFI calls to a serial background `dev.litert.engine` queue, executing callbacks asynchronously to prevent deadlocking JSI execution.
524
540
 
525
- ### Build
541
+ 2. **Zero-Copy Memory Pipelines (User Rule #2)**:
542
+ - Enforce the use of Nitro Modules' `ArrayBuffer` directly referencing native memory pointers (`ArrayBuffer.data`) when processing heavy media assets like images or audio.
543
+
544
+ 3. **Manual FFI Resource Management (User Rule #3)**:
545
+ - Raw pointers (`LiteRtLmEngine*`, `LiteRtLmConversation*`) are manually allocated and strictly deallocated inside Swift `deinit` and `close()` destructors to guarantee 0% memory leaks during prolonged inference sessions.
546
+
547
+ ### Architecture Topology
526
548
 
527
- ```bash
528
- ./scripts/build-ios-engine.sh
549
+ ```
550
+ ┌──────────────────────────────────────────────────────────┐
551
+ │ React Native (TypeScript / JavaScript) │
552
+ ├──────────────────────────────────────────────────────────┤
553
+ │ Nitro Modules JSI Bindings (`HybridLiteRTLMSpec`) │
554
+ ├─────────────────────────────┬────────────────────────────┤
555
+ │ Android (Kotlin) │ iOS (Swift Direct FFI) │
556
+ │ `HybridLiteRTLM.kt` │ `HybridLiteRTLM.swift` │
557
+ │ `litertlm-android` AAR │ `CLiteRTLM.xcframework` │
558
+ └─────────────────────────────┴────────────────────────────┘
529
559
  ```
530
560
 
531
- This will:
561
+ #### Android Bridging
562
+ - Conforms fully to `HybridLiteRTLMSpec` using Kotlin.
563
+ - Incorporates Proguard keep rules to prevent dynamic JSI/JNI code stripping.
564
+ - Declares `<uses-native-library android:name="libOpenCL.so" android:required="false" />` to load dynamic OpenCL for GPU delegate acceleration on Android 12+ without throwing platform installer exceptions.
532
565
 
533
- 1. Clone/checkout LiteRT-LM `v0.10.2` source into `.litert-lm-build/`
534
- 2. Apply `scripts/patches/ios-engine-fixes.patch` (PromptTemplate simplification, linker fixes)
535
- 3. Build `//c:engine` for `ios_arm64` and `ios_sim_arm64` via Bazel
536
- 4. Collect all transitive `.o` files (engine, protobuf, re2, sentencepiece, etc.)
537
- 5. Compile C/C++ stubs for unavailable Rust dependencies
538
- 6. Merge ~1,909 object files into a static library via `libtool`
539
- 7. Package into `ios/Frameworks/LiteRTLM.xcframework`
566
+ #### iOS Bridging
567
+ - Entirely written in native Swift (`HybridLiteRTLM.swift`) calling direct FFI.
568
+ - Avoids the upstream Swift SDK `actor` lock-blocking deadlocks by utilizing low-level C functions directly.
569
+ - Implements custom `getMemoryUsage` that queries the OS directly via `mach_task_basic_info` to get precise real-time Resident Set Size (RSS) metrics.
540
570
 
541
- ### Output
571
+ ## Testing
542
572
 
543
- ```
544
- ios/Frameworks/LiteRTLM.xcframework/
545
- ├── Info.plist
546
- ├── ios-arm64/LiteRTLM.framework/ # Device
547
- │ ├── LiteRTLM # ~82 MB static library
548
- │ └── Headers/litert_lm_engine.h
549
- └── ios-arm64-simulator/LiteRTLM.framework/ # Simulator
550
- ├── LiteRTLM # ~84 MB static library
551
- └── Headers/litert_lm_engine.h
552
- ```
573
+ The library includes a comprehensive multi-tier unit testing suite designed to run quickly on host machines (CI runners or local development environments) without requiring a physical test device.
553
574
 
554
- ### FFI Stubs
575
+ ### 1. JavaScript / TypeScript Layer (Jest)
555
576
 
556
- Certain LiteRT-LM features depend on Rust libraries (llguidance, CXX bridge, MinijinjaTemplate) that are not available in the iOS Bazel build. These are replaced with stubs:
577
+ The JS/TS layer uses Jest to validate the `useModel` hook, download progress callbacks, URL query scrubbing, file storage helpers, and the zero-copy native memory tracker buffer allocations.
557
578
 
558
- | Stub File | Location | Purpose |
559
- | ------------------------------------ | ---------------- | ---------------------------------------- |
560
- | `cxx_bridge_stubs.cc` | `scripts/stubs/` | CXX bridge runtime + Rust FFI type stubs |
561
- | `llguidance_stubs.c` | `scripts/stubs/` | llguidance constrained decoding C API |
562
- | `gemma_model_constraint_provider.cc` | `scripts/stubs/` | Gemma constraint provider factory |
579
+ * **Setup & Mocking**: Includes an active stub (`src/__mocks__/react-native-nitro-modules.ts`) that mocks the Nitro Modules `HybridObject` architecture.
580
+ * **How to run**:
581
+ ```bash
582
+ npm run test
583
+ ```
563
584
 
564
- Additionally, `PromptTemplate` is patched at build time to use a simplified C++ template formatter instead of the Rust MinijinjaTemplate, which avoids all Rust FFI calls during conversation setup.
585
+ ### 2. Android Kotlin Layer (Robolectric)
565
586
 
566
- > **Text inference works fully without these Rust components.** Only constrained decoding, function calling parsers, and advanced Jinja2 template features are affected.
587
+ The Android layer uses local JUnit Robolectric tests to run Android code on the JVM, sandboxing OS dependencies. It validates HTTPS schema constraints, path traversal mitigations, and initial telemetry states.
567
588
 
568
- ## Architecture
589
+ * **Setup & Mocking**: Uses a local shadow `Promise` implementation to test thread-asynchronous errors.
590
+ * **How to run**:
591
+ ```bash
592
+ cd example/android
593
+ ./gradlew :react-native-litert-lm:testDebugUnitTest
594
+ ```
569
595
 
570
- ```
571
- ┌─────────────────────────────────────────────────┐
572
- │ React Native (TypeScript) │
573
- │ useModel() / createLLM() / sendMessage() │
574
- ├─────────────────────────────────────────────────┤
575
- │ Nitro Modules JSI Bridge │
576
- ├──────────────────────┬──────────────────────────┤
577
- │ Android (Kotlin) │ iOS (C++) │
578
- │ HybridLiteRTLM.kt │ HybridLiteRTLM.cpp │
579
- │ litertlm-android │ LiteRT-LM C API │
580
- │ AAR (GPU delegate) │ XCFramework (Metal) │
581
- └──────────────────────┴──────────────────────────┘
582
- ```
596
+ ### 3. iOS Swift Layer (XCTest)
583
597
 
584
- - **Android**: Kotlin (`HybridLiteRTLM.kt`) interfacing with the `litertlm-android` AAR via the **Kotlin SDK**. The SDK handles control token stripping and turn management automatically. Engine validation probes for OpenCL availability before GPU initialization. `ConversationConfig` with `SamplerConfig` is passed for all conversations (matching the Gallery app pattern).
585
- - **iOS**: C++ (`HybridLiteRTLM.cpp`) interfacing with the LiteRT-LM **C API** via a prebuilt `LiteRTLM.xcframework`. Unlike the Kotlin SDK, the C API emits raw tokens including control sequences (`<end_of_turn>`, `<start_of_turn>`) and echoed user messages. The C++ layer implements a robust sanitization pipeline:
586
- - **Accumulation-and-diff** — buffers the full response and emits only new deltas
587
- - **`stripControlTokens()`** — removes all control sequences from the accumulated buffer
588
- - **`safeEmitLength()`** — look-ahead buffering that withholds partial control tokens (e.g., `<end_of_tur`) from emission until the full token is received or the stream terminates
589
- - **Echo mitigation** — strips echoed user messages from the raw stream
590
- - **Final flush** — mandatory clean-and-flush step on stream termination
598
+ The iOS layer leverages native XCTests integrated directly into CocoaPods via standard development test specs. It verifies FFI path traversal blocking, non-HTTPS download blocks, automatic `deinit` cleanup, and Mach-based telemetry bounds.
591
599
 
592
- Platform-specific code (model downloading, file management) is in Objective-C++ (`ios/IOSDownloadHelper.mm`).
600
+ * **How to run**:
601
+ 1. Boot your preferred iOS simulator (e.g., iPhone 16 running iOS 18.6).
602
+ 2. Run the tests using `xcodebuild`:
603
+ ```bash
604
+ cd example/ios
605
+ xcodebuild test -workspace LLMTest.xcworkspace -scheme react-native-litert-lm-Unit-Tests -sdk iphonesimulator -destination 'platform=iOS Simulator,name=iPhone 16'
606
+ ```
593
607
 
594
- > **For contributors**: Changes to `cpp/HybridLiteRTLM.cpp` do not affect Android. Feature changes must be applied to both the Kotlin and C++ implementations.
608
+ ### Security & Sanitization Protections Checked
609
+ Every test run automatically asserts:
610
+ - **Defense in depth for download boundaries**: Blocks non-HTTPS schemes at both JS model factory and low-level native layers.
611
+ - **Path Traversal protections**: Prevents directory traversal attacks (`..`, `/`, `\`) in download and deletion APIs.
612
+ - **Telemetry sanity**: Ensures zero-leak memory usage telemetry boundaries stay strictly linear.
595
613
 
596
614
  ## License
597
615
 
@@ -19,6 +19,7 @@ android {
19
19
 
20
20
  defaultConfig {
21
21
  minSdk 26 // LiteRT-LM requires API 26+
22
+ consumerProguardFiles 'consumer-rules.pro'
22
23
 
23
24
  externalNativeBuild {
24
25
  cmake {
@@ -66,6 +67,12 @@ android {
66
67
  keepDebugSymbols.add("**/*.so")
67
68
  }
68
69
  }
70
+
71
+ testOptions {
72
+ unitTests {
73
+ includeAndroidResources = true
74
+ }
75
+ }
69
76
  }
70
77
 
71
78
  repositories {
@@ -89,4 +96,9 @@ dependencies {
89
96
 
90
97
  // LiteRT-LM Kotlin API
91
98
  implementation "com.google.ai.edge.litertlm:litertlm-android:${litertLmVersion}"
99
+
100
+ // Testing Dependencies
101
+ testImplementation 'junit:junit:4.13.2'
102
+ testImplementation 'org.robolectric:robolectric:4.11.1'
103
+ testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.7.3'
92
104
  }
@@ -7,5 +7,13 @@
7
7
  android:authorities="${applicationId}.litertlm.init"
8
8
  android:exported="false"
9
9
  android:initOrder="100" />
10
+
11
+ <!-- Allow dynamic loading of OpenCL for GPU delegate acceleration on Android 12+ -->
12
+ <uses-native-library
13
+ android:name="libOpenCL.so"
14
+ android:required="false" />
15
+ <uses-native-library
16
+ android:name="libvndksupport.so"
17
+ android:required="false" />
10
18
  </application>
11
19
  </manifest>