react-native-litert-lm 0.2.1 โ 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +103 -6
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +47 -0
- package/cpp/HybridLiteRTLM.cpp +79 -0
- package/cpp/HybridLiteRTLM.hpp +12 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/lib/hooks.d.ts +25 -0
- package/lib/hooks.js +21 -4
- package/lib/index.d.ts +3 -1
- package/lib/index.js +4 -1
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +14 -1
- package/lib/modelFactory.js +70 -8
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +21 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +9 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +1 -0
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +4 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +3 -3
- package/src/hooks.ts +48 -5
- package/src/index.ts +10 -0
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +79 -8
- package/src/specs/LiteRTLM.nitro.ts +21 -0
package/README.md
CHANGED
|
@@ -12,6 +12,8 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
|
|
|
12
12
|
- ๐ฑ **Cross-Platform** - Android API 26+
|
|
13
13
|
- ๐ผ๏ธ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
|
|
14
14
|
- ๐งต **Async API** - Non-blocking inference to prevent UI freezes
|
|
15
|
+
- ๐ **Real Memory Tracking** - OS-level memory metrics (RSS, native heap, available memory) via native APIs
|
|
16
|
+
- ๐งฎ **Zero-Copy Buffers** - Memory snapshots stored in native ArrayBuffers via `NitroModules.createNativeArrayBuffer()` (v0.34+)
|
|
15
17
|
|
|
16
18
|
## Status
|
|
17
19
|
|
|
@@ -56,27 +58,31 @@ cd ios && pod install # iOS coming soon
|
|
|
56
58
|
|
|
57
59
|
## Example App
|
|
58
60
|
|
|
59
|
-
The repository includes a fully functional example app in the `example/` directory.
|
|
61
|
+
The repository includes a fully functional example app in the `example/` directory with a dark-themed diagnostic UI that demonstrates model loading, inference, memory tracking, and performance stats.
|
|
60
62
|
|
|
61
63
|
To run it:
|
|
62
64
|
|
|
63
|
-
1. **
|
|
65
|
+
1. **Build the library** (compiles TypeScript to `lib/`):
|
|
64
66
|
|
|
65
67
|
```bash
|
|
66
|
-
|
|
68
|
+
npm run build
|
|
67
69
|
```
|
|
68
70
|
|
|
69
|
-
2. **
|
|
71
|
+
2. **Navigate to the example directory and install dependencies:**
|
|
70
72
|
|
|
71
73
|
```bash
|
|
74
|
+
cd example
|
|
72
75
|
npm install
|
|
73
76
|
```
|
|
74
77
|
|
|
75
|
-
3. **
|
|
78
|
+
3. **Create a development build and run on Android:**
|
|
76
79
|
```bash
|
|
80
|
+
npx expo prebuild --clean
|
|
77
81
|
npx expo run:android
|
|
78
82
|
```
|
|
79
83
|
|
|
84
|
+
> **Note:** If you change native code (C++/Kotlin), you must run `npx expo prebuild --clean` again.
|
|
85
|
+
|
|
80
86
|
## Model Management
|
|
81
87
|
|
|
82
88
|
LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
|
|
@@ -206,6 +212,84 @@ console.log(`Generated ${stats.completionTokens} tokens`);
|
|
|
206
212
|
console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
|
|
207
213
|
```
|
|
208
214
|
|
|
215
|
+
### Memory Tracking
|
|
216
|
+
|
|
217
|
+
The library provides real OS-level memory usage data. You can query memory at any time, or enable automatic tracking to record snapshots after each inference call.
|
|
218
|
+
|
|
219
|
+
#### Direct Memory Query
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
// Get a single real-time snapshot from native APIs
|
|
223
|
+
const usage = llm.getMemoryUsage();
|
|
224
|
+
console.log(`Native heap: ${(usage.nativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
225
|
+
console.log(`RSS: ${(usage.residentBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
226
|
+
console.log(`Available: ${(usage.availableMemoryBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
227
|
+
console.log(`Low memory: ${usage.isLowMemory}`);
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### Automatic Tracking with Native Buffers
|
|
231
|
+
|
|
232
|
+
Enable memory tracking to automatically record snapshots in a native-backed `ArrayBuffer` (allocated via `NitroModules.createNativeArrayBuffer()`) after every inference call:
|
|
233
|
+
|
|
234
|
+
```typescript
|
|
235
|
+
import { createLLM } from 'react-native-litert-lm';
|
|
236
|
+
|
|
237
|
+
const llm = createLLM({
|
|
238
|
+
enableMemoryTracking: true,
|
|
239
|
+
maxMemorySnapshots: 256, // default
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
await llm.loadModel('/path/to/model.litertlm', { backend: 'cpu' });
|
|
243
|
+
await llm.sendMessage('Hello!');
|
|
244
|
+
|
|
245
|
+
// Review tracked data
|
|
246
|
+
const summary = llm.memoryTracker!.getSummary();
|
|
247
|
+
console.log(`Peak RSS: ${(summary.peakResidentBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
248
|
+
console.log(`Peak Native Heap: ${(summary.peakNativeHeapBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
249
|
+
console.log(`RSS Delta: ${(summary.residentDeltaBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
250
|
+
console.log(`Snapshots: ${summary.snapshotCount}`);
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
#### Using the `useModel` Hook with Memory Tracking
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { useModel } from 'react-native-litert-lm';
|
|
257
|
+
|
|
258
|
+
const { model, isReady, memorySummary, memoryTracker } = useModel(modelUrl, {
|
|
259
|
+
enableMemoryTracking: true,
|
|
260
|
+
maxMemorySnapshots: 100,
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// memorySummary auto-updates after each inference call
|
|
264
|
+
if (memorySummary) {
|
|
265
|
+
console.log(`Current RSS: ${memorySummary.currentResidentBytes}`);
|
|
266
|
+
console.log(`Peak RSS: ${memorySummary.peakResidentBytes}`);
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
#### Standalone Memory Tracker
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import { createMemoryTracker, createNativeBuffer } from 'react-native-litert-lm';
|
|
274
|
+
|
|
275
|
+
// Create a tracker backed by a native ArrayBuffer
|
|
276
|
+
const tracker = createMemoryTracker(100);
|
|
277
|
+
|
|
278
|
+
// Manually record snapshots
|
|
279
|
+
tracker.record({
|
|
280
|
+
timestamp: Date.now(),
|
|
281
|
+
nativeHeapBytes: 50_000_000,
|
|
282
|
+
residentBytes: 200_000_000,
|
|
283
|
+
availableMemoryBytes: 4_000_000_000,
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
// Access the underlying native buffer (for zero-copy transfer to native code)
|
|
287
|
+
const buffer = tracker.getNativeBuffer();
|
|
288
|
+
|
|
289
|
+
// Create a standalone native buffer for custom use
|
|
290
|
+
const customBuffer = createNativeBuffer(1024);
|
|
291
|
+
```
|
|
292
|
+
|
|
209
293
|
## Supported Models
|
|
210
294
|
|
|
211
295
|
Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
|
|
@@ -264,6 +348,19 @@ Send a message with an image attachment (for vision models).
|
|
|
264
348
|
|
|
265
349
|
Send a message with an audio attachment (for audio models).
|
|
266
350
|
|
|
351
|
+
### `getMemoryUsage(): MemoryUsage`
|
|
352
|
+
|
|
353
|
+
Returns real OS-level memory usage statistics from native APIs. No estimation โ reads directly from `mach_task_basic_info` (iOS) / `Debug.getNativeHeapAllocatedSize()` + `/proc/self/status` (Android).
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
interface MemoryUsage {
|
|
357
|
+
nativeHeapBytes: number; // Native heap allocated bytes
|
|
358
|
+
residentBytes: number; // Process RSS in bytes
|
|
359
|
+
availableMemoryBytes: number; // Available system memory in bytes
|
|
360
|
+
isLowMemory: boolean; // Whether the system considers memory low
|
|
361
|
+
}
|
|
362
|
+
```
|
|
363
|
+
|
|
267
364
|
### `getHistory(): Message[]`
|
|
268
365
|
|
|
269
366
|
Get conversation history.
|
|
@@ -341,7 +438,7 @@ const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
|
|
|
341
438
|
## Requirements
|
|
342
439
|
|
|
343
440
|
- React Native 0.76+
|
|
344
|
-
- react-native-nitro-modules 0.
|
|
441
|
+
- react-native-nitro-modules **0.34.1+** (required for `createNativeArrayBuffer` and memory tracking)
|
|
345
442
|
- Android API 26+ (ARM64 only)
|
|
346
443
|
- **LiteRT-LM Android SDK**: `0.9.0-alpha01` (bundled automatically)
|
|
347
444
|
- iOS 15.0+ (coming soon)
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
package com.margelo.nitro.dev.litert.litertlm
|
|
7
7
|
|
|
8
8
|
import android.util.Log
|
|
9
|
+
import android.os.Debug
|
|
10
|
+
import android.app.ActivityManager
|
|
11
|
+
import android.content.Context
|
|
9
12
|
import androidx.annotation.Keep
|
|
10
13
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
11
14
|
import dev.litert.litertlm.LiteRTLMInitProvider
|
|
@@ -496,6 +499,50 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
496
499
|
return lastStats
|
|
497
500
|
}
|
|
498
501
|
|
|
502
|
+
override fun getMemoryUsage(): MemoryUsage {
|
|
503
|
+
// Native heap: allocated bytes from Debug APIs (most accurate for native allocations)
|
|
504
|
+
val nativeHeapBytes = Debug.getNativeHeapAllocatedSize().toDouble()
|
|
505
|
+
|
|
506
|
+
// Process RSS: read from /proc/self/status (VmRSS) in kB
|
|
507
|
+
var residentBytes = 0.0
|
|
508
|
+
try {
|
|
509
|
+
java.io.File("/proc/self/status").forEachLine { line ->
|
|
510
|
+
if (line.startsWith("VmRSS:")) {
|
|
511
|
+
val kb = line.substringAfter("VmRSS:").trim().split("\\s+".toRegex())[0].toDoubleOrNull()
|
|
512
|
+
if (kb != null) {
|
|
513
|
+
residentBytes = kb * 1024.0
|
|
514
|
+
}
|
|
515
|
+
return@forEachLine
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
} catch (e: Exception) {
|
|
519
|
+
Log.w(TAG, "Failed to read /proc/self/status: ${e.message}")
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// Available memory and low-memory flag from ActivityManager
|
|
523
|
+
var availableMemoryBytes = 0.0
|
|
524
|
+
var isLowMemory = false
|
|
525
|
+
try {
|
|
526
|
+
val context = LiteRTLMInitProvider.applicationContext
|
|
527
|
+
if (context != null) {
|
|
528
|
+
val activityManager = context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager
|
|
529
|
+
val memInfo = ActivityManager.MemoryInfo()
|
|
530
|
+
activityManager.getMemoryInfo(memInfo)
|
|
531
|
+
availableMemoryBytes = memInfo.availMem.toDouble()
|
|
532
|
+
isLowMemory = memInfo.lowMemory
|
|
533
|
+
}
|
|
534
|
+
} catch (e: Exception) {
|
|
535
|
+
Log.w(TAG, "Failed to get ActivityManager memory info: ${e.message}")
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
return MemoryUsage(
|
|
539
|
+
nativeHeapBytes = nativeHeapBytes,
|
|
540
|
+
residentBytes = residentBytes,
|
|
541
|
+
availableMemoryBytes = availableMemoryBytes,
|
|
542
|
+
isLowMemory = isLowMemory
|
|
543
|
+
)
|
|
544
|
+
}
|
|
545
|
+
|
|
499
546
|
override fun close() {
|
|
500
547
|
Log.d(TAG, "Closing resources")
|
|
501
548
|
isClosed = true
|
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -518,6 +518,85 @@ GenerationStats HybridLiteRTLM::getStats() {
|
|
|
518
518
|
return lastStats_;
|
|
519
519
|
}
|
|
520
520
|
|
|
521
|
+
//------------------------------------------------------------------------------
|
|
522
|
+
// getMemoryUsage - Return real memory usage from OS
|
|
523
|
+
//------------------------------------------------------------------------------
|
|
524
|
+
MemoryUsage HybridLiteRTLM::getMemoryUsage() {
|
|
525
|
+
double nativeHeapBytes = 0;
|
|
526
|
+
double residentBytes = 0;
|
|
527
|
+
double availableMemoryBytes = 0;
|
|
528
|
+
bool isLowMemory = false;
|
|
529
|
+
|
|
530
|
+
#ifdef __APPLE__
|
|
531
|
+
// Get process memory info via Mach APIs
|
|
532
|
+
struct mach_task_basic_info taskInfo;
|
|
533
|
+
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
|
534
|
+
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
|
|
535
|
+
(task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
|
|
536
|
+
residentBytes = static_cast<double>(taskInfo.resident_size);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Get system-wide memory pressure
|
|
540
|
+
vm_statistics64_data_t vmStats;
|
|
541
|
+
mach_msg_type_number_t vmCount = HOST_VM_INFO64_COUNT;
|
|
542
|
+
if (host_statistics64(mach_host_self(), HOST_VM_INFO64,
|
|
543
|
+
(host_info64_t)&vmStats, &vmCount) == KERN_SUCCESS) {
|
|
544
|
+
vm_size_t pageSize;
|
|
545
|
+
host_page_size(mach_host_self(), &pageSize);
|
|
546
|
+
availableMemoryBytes = static_cast<double>(vmStats.free_count) * pageSize;
|
|
547
|
+
// Consider low memory if free pages < 10% of total active+inactive+free
|
|
548
|
+
uint64_t totalPages = vmStats.active_count + vmStats.inactive_count + vmStats.free_count;
|
|
549
|
+
isLowMemory = (totalPages > 0) &&
|
|
550
|
+
(static_cast<double>(vmStats.free_count) / totalPages < 0.1);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// malloc_size is per-allocation; use resident_size as native heap proxy
|
|
554
|
+
nativeHeapBytes = residentBytes;
|
|
555
|
+
#endif
|
|
556
|
+
|
|
557
|
+
#ifdef __ANDROID__
|
|
558
|
+
// Parse /proc/self/status for VmRSS (resident set size)
|
|
559
|
+
std::ifstream statusFile("/proc/self/status");
|
|
560
|
+
if (statusFile.is_open()) {
|
|
561
|
+
std::string line;
|
|
562
|
+
while (std::getline(statusFile, line)) {
|
|
563
|
+
if (line.rfind("VmRSS:", 0) == 0) {
|
|
564
|
+
// Format: "VmRSS: 123456 kB"
|
|
565
|
+
std::istringstream iss(line.substr(6));
|
|
566
|
+
double kbValue = 0;
|
|
567
|
+
iss >> kbValue;
|
|
568
|
+
residentBytes = kbValue * 1024.0;
|
|
569
|
+
break;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Use mallinfo for native heap
|
|
575
|
+
struct mallinfo mi = mallinfo();
|
|
576
|
+
nativeHeapBytes = static_cast<double>(mi.uordblks); // total allocated space
|
|
577
|
+
|
|
578
|
+
// Parse /proc/meminfo for available memory
|
|
579
|
+
std::ifstream memFile("/proc/meminfo");
|
|
580
|
+
if (memFile.is_open()) {
|
|
581
|
+
std::string line;
|
|
582
|
+
while (std::getline(memFile, line)) {
|
|
583
|
+
if (line.rfind("MemAvailable:", 0) == 0) {
|
|
584
|
+
std::istringstream iss(line.substr(13));
|
|
585
|
+
double kbValue = 0;
|
|
586
|
+
iss >> kbValue;
|
|
587
|
+
availableMemoryBytes = kbValue * 1024.0;
|
|
588
|
+
break;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// Consider low if available < 256MB
|
|
594
|
+
isLowMemory = availableMemoryBytes > 0 && availableMemoryBytes < 256.0 * 1024 * 1024;
|
|
595
|
+
#endif
|
|
596
|
+
|
|
597
|
+
return MemoryUsage{nativeHeapBytes, residentBytes, availableMemoryBytes, isLowMemory};
|
|
598
|
+
}
|
|
599
|
+
|
|
521
600
|
//------------------------------------------------------------------------------
|
|
522
601
|
// close - Release all native resources
|
|
523
602
|
//------------------------------------------------------------------------------
|
package/cpp/HybridLiteRTLM.hpp
CHANGED
|
@@ -21,6 +21,16 @@
|
|
|
21
21
|
#include "litert/lm/types.h"
|
|
22
22
|
#endif
|
|
23
23
|
|
|
24
|
+
// Memory usage headers
|
|
25
|
+
#ifdef __APPLE__
|
|
26
|
+
#include <mach/mach.h>
|
|
27
|
+
#include <mach/mach_host.h>
|
|
28
|
+
#endif
|
|
29
|
+
#ifdef __ANDROID__
|
|
30
|
+
#include <malloc.h>
|
|
31
|
+
#include <fstream>
|
|
32
|
+
#endif
|
|
33
|
+
|
|
24
34
|
#include <string>
|
|
25
35
|
#include <optional>
|
|
26
36
|
#include <vector>
|
|
@@ -79,6 +89,8 @@ public:
|
|
|
79
89
|
|
|
80
90
|
GenerationStats getStats() override;
|
|
81
91
|
|
|
92
|
+
MemoryUsage getMemoryUsage() override;
|
|
93
|
+
|
|
82
94
|
void close() override;
|
|
83
95
|
|
|
84
96
|
private:
|
package/cpp/cpp-adapter.cpp
CHANGED
|
@@ -2,12 +2,20 @@
|
|
|
2
2
|
/// cpp-adapter.cpp
|
|
3
3
|
/// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
|
|
4
4
|
///
|
|
5
|
+
/// Updated for react-native-nitro-modules v0.34+:
|
|
6
|
+
/// Uses facebook::jni::initialize() directly with registerAllNatives().
|
|
7
|
+
///
|
|
5
8
|
|
|
6
9
|
#include <jni.h>
|
|
10
|
+
#include <fbjni/fbjni.h>
|
|
7
11
|
#include "LiteRTLMOnLoad.hpp"
|
|
8
12
|
|
|
9
13
|
// JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
|
|
10
|
-
// This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects
|
|
14
|
+
// This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects.
|
|
15
|
+
// The new v0.34 API allows registering custom C++ native JNI classes/functions
|
|
16
|
+
// alongside Nitrogen's auto-generated registrations.
|
|
11
17
|
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
|
|
12
|
-
return
|
|
18
|
+
return facebook::jni::initialize(vm, []() {
|
|
19
|
+
margelo::nitro::litertlm::registerAllNatives();
|
|
20
|
+
});
|
|
13
21
|
}
|
package/lib/hooks.d.ts
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
import { LiteRTLM, LLMConfig } from "./index";
|
|
2
|
+
import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
|
|
2
3
|
export interface UseModelConfig extends LLMConfig {
|
|
3
4
|
autoLoad?: boolean;
|
|
5
|
+
/**
|
|
6
|
+
* Enable memory tracking using native ArrayBuffers (v0.34+).
|
|
7
|
+
* When enabled, memory usage is tracked after each inference call
|
|
8
|
+
* using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
|
|
9
|
+
* @default false
|
|
10
|
+
*/
|
|
11
|
+
enableMemoryTracking?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Maximum number of memory snapshots to store.
|
|
14
|
+
* Each snapshot uses 32 bytes of native memory.
|
|
15
|
+
* @default 256
|
|
16
|
+
*/
|
|
17
|
+
maxMemorySnapshots?: number;
|
|
4
18
|
}
|
|
5
19
|
export interface UseModelResult {
|
|
6
20
|
model: LiteRTLM | null;
|
|
@@ -12,5 +26,16 @@ export interface UseModelResult {
|
|
|
12
26
|
reset: () => void;
|
|
13
27
|
deleteModel: (fileName: string) => Promise<void>;
|
|
14
28
|
load: () => Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Memory tracker instance (available when enableMemoryTracking is true).
|
|
31
|
+
* Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
|
|
32
|
+
* for efficient, zero-copy memory usage tracking.
|
|
33
|
+
*/
|
|
34
|
+
memoryTracker: MemoryTracker | null;
|
|
35
|
+
/**
|
|
36
|
+
* Current memory tracking summary (null if tracking is disabled).
|
|
37
|
+
* Updates automatically after each inference call.
|
|
38
|
+
*/
|
|
39
|
+
memorySummary: MemoryTrackerSummary | null;
|
|
15
40
|
}
|
|
16
41
|
export declare function useModel(pathOrUrl: string, config?: UseModelConfig): UseModelResult;
|
package/lib/hooks.js
CHANGED
|
@@ -9,11 +9,25 @@ function useModel(pathOrUrl, config) {
|
|
|
9
9
|
const [isGenerating, setIsGenerating] = (0, react_1.useState)(false);
|
|
10
10
|
const [downloadProgress, setDownloadProgress] = (0, react_1.useState)(0);
|
|
11
11
|
const [error, setError] = (0, react_1.useState)(null);
|
|
12
|
-
|
|
12
|
+
const [memorySummary, setMemorySummary] = (0, react_1.useState)(null);
|
|
13
|
+
// Extract autoLoad (default true) and memory tracking options
|
|
13
14
|
const autoLoad = config?.autoLoad ?? true;
|
|
15
|
+
const enableMemoryTracking = config?.enableMemoryTracking ?? false;
|
|
16
|
+
const maxMemorySnapshots = config?.maxMemorySnapshots ?? 256;
|
|
17
|
+
/**
|
|
18
|
+
* Refresh memory summary from the tracker's native buffer.
|
|
19
|
+
*/
|
|
20
|
+
const refreshMemorySummary = (0, react_1.useCallback)(() => {
|
|
21
|
+
if (modelRef.current?.memoryTracker) {
|
|
22
|
+
setMemorySummary(modelRef.current.memoryTracker.getSummary());
|
|
23
|
+
}
|
|
24
|
+
}, []);
|
|
14
25
|
// Initialize the model instance
|
|
15
26
|
(0, react_1.useEffect)(() => {
|
|
16
|
-
modelRef.current = (0, modelFactory_1.createLLM)(
|
|
27
|
+
modelRef.current = (0, modelFactory_1.createLLM)({
|
|
28
|
+
enableMemoryTracking,
|
|
29
|
+
maxMemorySnapshots,
|
|
30
|
+
});
|
|
17
31
|
let isMounted = true;
|
|
18
32
|
// Cleanup on unmount
|
|
19
33
|
return () => {
|
|
@@ -25,7 +39,7 @@ function useModel(pathOrUrl, config) {
|
|
|
25
39
|
console.warn("Failed to close model", e);
|
|
26
40
|
}
|
|
27
41
|
};
|
|
28
|
-
}, []);
|
|
42
|
+
}, [enableMemoryTracking, maxMemorySnapshots]);
|
|
29
43
|
const load = (0, react_1.useCallback)(async () => {
|
|
30
44
|
setIsReady(false);
|
|
31
45
|
setError(null);
|
|
@@ -71,6 +85,7 @@ function useModel(pathOrUrl, config) {
|
|
|
71
85
|
modelRef.current?.sendMessageAsync(prompt, (token, done) => {
|
|
72
86
|
fullResponse += token;
|
|
73
87
|
if (done) {
|
|
88
|
+
refreshMemorySummary();
|
|
74
89
|
resolve(fullResponse);
|
|
75
90
|
}
|
|
76
91
|
});
|
|
@@ -87,7 +102,7 @@ function useModel(pathOrUrl, config) {
|
|
|
87
102
|
finally {
|
|
88
103
|
setIsGenerating(false);
|
|
89
104
|
}
|
|
90
|
-
}, [isReady]);
|
|
105
|
+
}, [isReady, refreshMemorySummary]);
|
|
91
106
|
const reset = (0, react_1.useCallback)(() => {
|
|
92
107
|
if (modelRef.current) {
|
|
93
108
|
modelRef.current.resetConversation();
|
|
@@ -110,5 +125,7 @@ function useModel(pathOrUrl, config) {
|
|
|
110
125
|
reset,
|
|
111
126
|
deleteModel,
|
|
112
127
|
load,
|
|
128
|
+
memoryTracker: modelRef.current?.memoryTracker ?? null,
|
|
129
|
+
memorySummary,
|
|
113
130
|
};
|
|
114
131
|
}
|
package/lib/index.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import type { Backend } from "./specs/LiteRTLM.nitro";
|
|
2
|
-
export type { LiteRTLM, LLMConfig, Message, Backend, Role, GenerationStats, } from "./specs/LiteRTLM.nitro";
|
|
2
|
+
export type { LiteRTLM, LLMConfig, Message, Backend, Role, GenerationStats, MemoryUsage, } from "./specs/LiteRTLM.nitro";
|
|
3
3
|
export type { ChatMessage } from "./templates";
|
|
4
4
|
export { applyGemmaTemplate, applyPhiTemplate, applyLlamaTemplate, } from "./templates";
|
|
5
|
+
export type { MemorySnapshot, MemoryTracker, MemoryTrackerSummary, } from "./memoryTracker";
|
|
6
|
+
export { createMemoryTracker, createNativeBuffer } from "./memoryTracker";
|
|
5
7
|
export * from "./hooks";
|
|
6
8
|
/**
|
|
7
9
|
* Creates a new LiteRT-LM inference engine instance.
|
package/lib/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.GEMMA_3N_E2B_IT_INT4 = exports.Models = exports.createLLM = exports.applyLlamaTemplate = exports.applyPhiTemplate = exports.applyGemmaTemplate = void 0;
|
|
17
|
+
exports.GEMMA_3N_E2B_IT_INT4 = exports.Models = exports.createLLM = exports.createNativeBuffer = exports.createMemoryTracker = exports.applyLlamaTemplate = exports.applyPhiTemplate = exports.applyGemmaTemplate = void 0;
|
|
18
18
|
exports.getRecommendedBackend = getRecommendedBackend;
|
|
19
19
|
exports.checkBackendSupport = checkBackendSupport;
|
|
20
20
|
exports.checkMultimodalSupport = checkMultimodalSupport;
|
|
@@ -23,6 +23,9 @@ var templates_1 = require("./templates");
|
|
|
23
23
|
Object.defineProperty(exports, "applyGemmaTemplate", { enumerable: true, get: function () { return templates_1.applyGemmaTemplate; } });
|
|
24
24
|
Object.defineProperty(exports, "applyPhiTemplate", { enumerable: true, get: function () { return templates_1.applyPhiTemplate; } });
|
|
25
25
|
Object.defineProperty(exports, "applyLlamaTemplate", { enumerable: true, get: function () { return templates_1.applyLlamaTemplate; } });
|
|
26
|
+
var memoryTracker_1 = require("./memoryTracker");
|
|
27
|
+
Object.defineProperty(exports, "createMemoryTracker", { enumerable: true, get: function () { return memoryTracker_1.createMemoryTracker; } });
|
|
28
|
+
Object.defineProperty(exports, "createNativeBuffer", { enumerable: true, get: function () { return memoryTracker_1.createNativeBuffer; } });
|
|
26
29
|
__exportStar(require("./hooks"), exports);
|
|
27
30
|
/**
|
|
28
31
|
* Creates a new LiteRT-LM inference engine instance.
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory tracking utilities for LiteRT-LM using real native memory metrics.
|
|
3
|
+
*
|
|
4
|
+
* Records real memory usage from OS-level APIs via `getMemoryUsage()`,
|
|
5
|
+
* and stores snapshots in a native-backed ArrayBuffer allocated via
|
|
6
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.34+) for zero-copy interop.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { createMemoryTracker } from 'react-native-litert-lm';
|
|
11
|
+
*
|
|
12
|
+
* const tracker = createMemoryTracker(100);
|
|
13
|
+
*
|
|
14
|
+
* // Record a real snapshot (typically called internally after inference)
|
|
15
|
+
* tracker.record({
|
|
16
|
+
* timestamp: Date.now(),
|
|
17
|
+
* nativeHeapBytes: usage.nativeHeapBytes,
|
|
18
|
+
* residentBytes: usage.residentBytes,
|
|
19
|
+
* availableMemoryBytes: usage.availableMemoryBytes,
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* console.log(`Peak RSS: ${tracker.getPeakMemory()} bytes`);
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* A single memory usage snapshot with real data from OS APIs.
|
|
27
|
+
*/
|
|
28
|
+
export interface MemorySnapshot {
|
|
29
|
+
/** Unix timestamp in milliseconds */
|
|
30
|
+
timestamp: number;
|
|
31
|
+
/** Native heap allocated bytes (Debug.getNativeHeapAllocatedSize on Android, task_info on iOS) */
|
|
32
|
+
nativeHeapBytes: number;
|
|
33
|
+
/** Process resident set size (RSS) in bytes */
|
|
34
|
+
residentBytes: number;
|
|
35
|
+
/** Available system memory in bytes */
|
|
36
|
+
availableMemoryBytes: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Memory tracker that stores snapshots in a native-backed ArrayBuffer.
|
|
40
|
+
*
|
|
41
|
+
* Uses `NitroModules.createNativeArrayBuffer()` to allocate the backing
|
|
42
|
+
* buffer in native (C++) memory, ensuring zero-copy interop with native
|
|
43
|
+
* methods and keeping memory tracking data off the JS heap.
|
|
44
|
+
*/
|
|
45
|
+
export interface MemoryTracker {
|
|
46
|
+
/**
|
|
47
|
+
* Record a new memory snapshot.
|
|
48
|
+
* @param snapshot The memory usage data to record
|
|
49
|
+
* @returns true if recorded, false if buffer is full
|
|
50
|
+
*/
|
|
51
|
+
record(snapshot: MemorySnapshot): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Get all recorded snapshots as structured objects.
|
|
54
|
+
*/
|
|
55
|
+
getSnapshots(): MemorySnapshot[];
|
|
56
|
+
/**
|
|
57
|
+
* Get the number of recorded snapshots.
|
|
58
|
+
*/
|
|
59
|
+
getSnapshotCount(): number;
|
|
60
|
+
/**
|
|
61
|
+
* Get the maximum number of snapshots this tracker can hold.
|
|
62
|
+
*/
|
|
63
|
+
getCapacity(): number;
|
|
64
|
+
/**
|
|
65
|
+
* Get the peak resident set size across all snapshots.
|
|
66
|
+
*/
|
|
67
|
+
getPeakMemory(): number;
|
|
68
|
+
/**
|
|
69
|
+
* Get the latest memory snapshot, or undefined if none recorded.
|
|
70
|
+
*/
|
|
71
|
+
getLatestSnapshot(): MemorySnapshot | undefined;
|
|
72
|
+
/**
|
|
73
|
+
* Get the underlying native ArrayBuffer.
|
|
74
|
+
* This buffer is allocated via `NitroModules.createNativeArrayBuffer()`
|
|
75
|
+
* and lives in native memory, enabling zero-copy transfer to native methods.
|
|
76
|
+
*/
|
|
77
|
+
getNativeBuffer(): ArrayBuffer;
|
|
78
|
+
/**
|
|
79
|
+
* Get the Float64Array view over the native buffer.
|
|
80
|
+
*/
|
|
81
|
+
getView(): Float64Array;
|
|
82
|
+
/**
|
|
83
|
+
* Reset the tracker, clearing all recorded snapshots.
|
|
84
|
+
* The native buffer is preserved (not reallocated).
|
|
85
|
+
*/
|
|
86
|
+
reset(): void;
|
|
87
|
+
/**
|
|
88
|
+
* Get a summary of memory usage statistics.
|
|
89
|
+
*/
|
|
90
|
+
getSummary(): MemoryTrackerSummary;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Summary statistics from the memory tracker.
|
|
94
|
+
*/
|
|
95
|
+
export interface MemoryTrackerSummary {
|
|
96
|
+
/** Number of snapshots recorded */
|
|
97
|
+
snapshotCount: number;
|
|
98
|
+
/** Peak resident set size in bytes */
|
|
99
|
+
peakResidentBytes: number;
|
|
100
|
+
/** Average resident set size in bytes */
|
|
101
|
+
averageResidentBytes: number;
|
|
102
|
+
/** Latest resident set size in bytes */
|
|
103
|
+
currentResidentBytes: number;
|
|
104
|
+
/** Peak native heap allocated in bytes */
|
|
105
|
+
peakNativeHeapBytes: number;
|
|
106
|
+
/** Latest native heap allocated in bytes */
|
|
107
|
+
currentNativeHeapBytes: number;
|
|
108
|
+
/** RSS delta from first to last snapshot in bytes */
|
|
109
|
+
residentDeltaBytes: number;
|
|
110
|
+
/** Size of the native tracking buffer itself in bytes */
|
|
111
|
+
trackerBufferSizeBytes: number;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Create a new memory tracker backed by a native ArrayBuffer.
|
|
115
|
+
*
|
|
116
|
+
* @param maxSnapshots Maximum number of snapshots to store (default: 256)
|
|
117
|
+
* @returns A MemoryTracker instance
|
|
118
|
+
*/
|
|
119
|
+
export declare function createMemoryTracker(maxSnapshots?: number): MemoryTracker;
|
|
120
|
+
/**
|
|
121
|
+
* Create a native ArrayBuffer for efficient data transfer.
|
|
122
|
+
*
|
|
123
|
+
* A convenience wrapper around `NitroModules.createNativeArrayBuffer()`.
|
|
124
|
+
*
|
|
125
|
+
* @param size Size in bytes
|
|
126
|
+
* @returns A native-backed ArrayBuffer
|
|
127
|
+
*/
|
|
128
|
+
export declare function createNativeBuffer(size: number): ArrayBuffer;
|