react-native-litert-lm 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +103 -6
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +47 -0
- package/cpp/HybridLiteRTLM.cpp +79 -0
- package/cpp/HybridLiteRTLM.hpp +12 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/lib/hooks.d.ts +25 -0
- package/lib/hooks.js +21 -4
- package/lib/index.d.ts +3 -1
- package/lib/index.js +4 -1
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +14 -1
- package/lib/modelFactory.js +70 -8
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +21 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +9 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +1 -0
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +4 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +3 -3
- package/src/hooks.ts +48 -5
- package/src/index.ts +10 -0
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +79 -8
- package/src/specs/LiteRTLM.nitro.ts +21 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
///
|
|
2
|
+
/// MemoryUsage.hpp
|
|
3
|
+
/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
|
|
4
|
+
/// https://github.com/mrousavy/nitro
|
|
5
|
+
/// Copyright © Marc Rousavy @ Margelo
|
|
6
|
+
///
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#if __has_include(<NitroModules/JSIConverter.hpp>)
|
|
11
|
+
#include <NitroModules/JSIConverter.hpp>
|
|
12
|
+
#else
|
|
13
|
+
#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
|
|
14
|
+
#endif
|
|
15
|
+
#if __has_include(<NitroModules/NitroDefines.hpp>)
|
|
16
|
+
#include <NitroModules/NitroDefines.hpp>
|
|
17
|
+
#else
|
|
18
|
+
#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
|
|
19
|
+
#endif
|
|
20
|
+
#if __has_include(<NitroModules/JSIHelpers.hpp>)
|
|
21
|
+
#include <NitroModules/JSIHelpers.hpp>
|
|
22
|
+
#else
|
|
23
|
+
#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
|
|
24
|
+
#endif
|
|
25
|
+
#if __has_include(<NitroModules/PropNameIDCache.hpp>)
|
|
26
|
+
#include <NitroModules/PropNameIDCache.hpp>
|
|
27
|
+
#else
|
|
28
|
+
#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
|
|
29
|
+
#endif
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
namespace margelo::nitro::litertlm {
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* A struct which can be represented as a JavaScript object (MemoryUsage).
|
|
39
|
+
*/
|
|
40
|
+
struct MemoryUsage final {
|
|
41
|
+
public:
|
|
42
|
+
double nativeHeapBytes SWIFT_PRIVATE;
|
|
43
|
+
double residentBytes SWIFT_PRIVATE;
|
|
44
|
+
double availableMemoryBytes SWIFT_PRIVATE;
|
|
45
|
+
bool isLowMemory SWIFT_PRIVATE;
|
|
46
|
+
|
|
47
|
+
public:
|
|
48
|
+
MemoryUsage() = default;
|
|
49
|
+
explicit MemoryUsage(double nativeHeapBytes, double residentBytes, double availableMemoryBytes, bool isLowMemory): nativeHeapBytes(nativeHeapBytes), residentBytes(residentBytes), availableMemoryBytes(availableMemoryBytes), isLowMemory(isLowMemory) {}
|
|
50
|
+
|
|
51
|
+
public:
|
|
52
|
+
friend bool operator==(const MemoryUsage& lhs, const MemoryUsage& rhs) = default;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
} // namespace margelo::nitro::litertlm
|
|
56
|
+
|
|
57
|
+
namespace margelo::nitro {
|
|
58
|
+
|
|
59
|
+
// C++ MemoryUsage <> JS MemoryUsage (object)
|
|
60
|
+
template <>
|
|
61
|
+
struct JSIConverter<margelo::nitro::litertlm::MemoryUsage> final {
|
|
62
|
+
static inline margelo::nitro::litertlm::MemoryUsage fromJSI(jsi::Runtime& runtime, const jsi::Value& arg) {
|
|
63
|
+
jsi::Object obj = arg.asObject(runtime);
|
|
64
|
+
return margelo::nitro::litertlm::MemoryUsage(
|
|
65
|
+
JSIConverter<double>::fromJSI(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "nativeHeapBytes"))),
|
|
66
|
+
JSIConverter<double>::fromJSI(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "residentBytes"))),
|
|
67
|
+
JSIConverter<double>::fromJSI(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "availableMemoryBytes"))),
|
|
68
|
+
JSIConverter<bool>::fromJSI(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "isLowMemory")))
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
static inline jsi::Value toJSI(jsi::Runtime& runtime, const margelo::nitro::litertlm::MemoryUsage& arg) {
|
|
72
|
+
jsi::Object obj(runtime);
|
|
73
|
+
obj.setProperty(runtime, PropNameIDCache::get(runtime, "nativeHeapBytes"), JSIConverter<double>::toJSI(runtime, arg.nativeHeapBytes));
|
|
74
|
+
obj.setProperty(runtime, PropNameIDCache::get(runtime, "residentBytes"), JSIConverter<double>::toJSI(runtime, arg.residentBytes));
|
|
75
|
+
obj.setProperty(runtime, PropNameIDCache::get(runtime, "availableMemoryBytes"), JSIConverter<double>::toJSI(runtime, arg.availableMemoryBytes));
|
|
76
|
+
obj.setProperty(runtime, PropNameIDCache::get(runtime, "isLowMemory"), JSIConverter<bool>::toJSI(runtime, arg.isLowMemory));
|
|
77
|
+
return obj;
|
|
78
|
+
}
|
|
79
|
+
static inline bool canConvert(jsi::Runtime& runtime, const jsi::Value& value) {
|
|
80
|
+
if (!value.isObject()) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
jsi::Object obj = value.getObject(runtime);
|
|
84
|
+
if (!nitro::isPlainObject(runtime, obj)) {
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
if (!JSIConverter<double>::canConvert(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "nativeHeapBytes")))) return false;
|
|
88
|
+
if (!JSIConverter<double>::canConvert(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "residentBytes")))) return false;
|
|
89
|
+
if (!JSIConverter<double>::canConvert(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "availableMemoryBytes")))) return false;
|
|
90
|
+
if (!JSIConverter<bool>::canConvert(runtime, obj.getProperty(runtime, PropNameIDCache::get(runtime, "isLowMemory")))) return false;
|
|
91
|
+
return true;
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
} // namespace margelo::nitro
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "react-native-litert-lm",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "High-performance LLM inference for React Native using LiteRT-LM. Optimized for Gemma 3n and other on-device language models.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Hugh Chen (https://github.com/hung-yueh)",
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"@expo/config-plugins": "~54.0.4",
|
|
66
66
|
"@types/react": "~19.1.10",
|
|
67
67
|
"expo": "^54.0.31",
|
|
68
|
-
"nitrogen": "^0.
|
|
68
|
+
"nitrogen": "^0.34.1",
|
|
69
69
|
"react": "19.1.0",
|
|
70
70
|
"react-native": "0.81.5",
|
|
71
71
|
"release-it": "^19.2.4",
|
|
@@ -82,6 +82,6 @@
|
|
|
82
82
|
}
|
|
83
83
|
},
|
|
84
84
|
"dependencies": {
|
|
85
|
-
"react-native-nitro-modules": "^0.
|
|
85
|
+
"react-native-nitro-modules": "^0.34.1"
|
|
86
86
|
}
|
|
87
87
|
}
|
package/src/hooks.ts
CHANGED
|
@@ -1,9 +1,23 @@
|
|
|
1
1
|
import { useState, useEffect, useRef, useCallback } from "react";
|
|
2
2
|
import { LiteRTLM, LLMConfig } from "./index";
|
|
3
3
|
import { createLLM } from "./modelFactory";
|
|
4
|
+
import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
|
|
4
5
|
|
|
5
6
|
export interface UseModelConfig extends LLMConfig {
|
|
6
7
|
autoLoad?: boolean;
|
|
8
|
+
/**
|
|
9
|
+
* Enable memory tracking using native ArrayBuffers (v0.34+).
|
|
10
|
+
* When enabled, memory usage is tracked after each inference call
|
|
11
|
+
* using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
|
|
12
|
+
* @default false
|
|
13
|
+
*/
|
|
14
|
+
enableMemoryTracking?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Maximum number of memory snapshots to store.
|
|
17
|
+
* Each snapshot uses 32 bytes of native memory.
|
|
18
|
+
* @default 256
|
|
19
|
+
*/
|
|
20
|
+
maxMemorySnapshots?: number;
|
|
7
21
|
}
|
|
8
22
|
|
|
9
23
|
export interface UseModelResult {
|
|
@@ -16,24 +30,50 @@ export interface UseModelResult {
|
|
|
16
30
|
reset: () => void;
|
|
17
31
|
deleteModel: (fileName: string) => Promise<void>;
|
|
18
32
|
load: () => Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Memory tracker instance (available when enableMemoryTracking is true).
|
|
35
|
+
* Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
|
|
36
|
+
* for efficient, zero-copy memory usage tracking.
|
|
37
|
+
*/
|
|
38
|
+
memoryTracker: MemoryTracker | null;
|
|
39
|
+
/**
|
|
40
|
+
* Current memory tracking summary (null if tracking is disabled).
|
|
41
|
+
* Updates automatically after each inference call.
|
|
42
|
+
*/
|
|
43
|
+
memorySummary: MemoryTrackerSummary | null;
|
|
19
44
|
}
|
|
20
45
|
|
|
21
46
|
export function useModel(
|
|
22
47
|
pathOrUrl: string,
|
|
23
48
|
config?: UseModelConfig,
|
|
24
49
|
): UseModelResult {
|
|
25
|
-
const modelRef = useRef<LiteRTLM | null>(null);
|
|
50
|
+
const modelRef = useRef<(LiteRTLM & { memoryTracker?: MemoryTracker }) | null>(null);
|
|
26
51
|
const [isReady, setIsReady] = useState(false);
|
|
27
52
|
const [isGenerating, setIsGenerating] = useState(false);
|
|
28
53
|
const [downloadProgress, setDownloadProgress] = useState(0);
|
|
29
54
|
const [error, setError] = useState<string | null>(null);
|
|
55
|
+
const [memorySummary, setMemorySummary] = useState<MemoryTrackerSummary | null>(null);
|
|
30
56
|
|
|
31
|
-
// Extract autoLoad (default true)
|
|
57
|
+
// Extract autoLoad (default true) and memory tracking options
|
|
32
58
|
const autoLoad = config?.autoLoad ?? true;
|
|
59
|
+
const enableMemoryTracking = config?.enableMemoryTracking ?? false;
|
|
60
|
+
const maxMemorySnapshots = config?.maxMemorySnapshots ?? 256;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Refresh memory summary from the tracker's native buffer.
|
|
64
|
+
*/
|
|
65
|
+
const refreshMemorySummary = useCallback(() => {
|
|
66
|
+
if (modelRef.current?.memoryTracker) {
|
|
67
|
+
setMemorySummary(modelRef.current.memoryTracker.getSummary());
|
|
68
|
+
}
|
|
69
|
+
}, []);
|
|
33
70
|
|
|
34
71
|
// Initialize the model instance
|
|
35
72
|
useEffect(() => {
|
|
36
|
-
modelRef.current = createLLM(
|
|
73
|
+
modelRef.current = createLLM({
|
|
74
|
+
enableMemoryTracking,
|
|
75
|
+
maxMemorySnapshots,
|
|
76
|
+
});
|
|
37
77
|
let isMounted = true;
|
|
38
78
|
|
|
39
79
|
// Cleanup on unmount
|
|
@@ -45,7 +85,7 @@ export function useModel(
|
|
|
45
85
|
console.warn("Failed to close model", e);
|
|
46
86
|
}
|
|
47
87
|
};
|
|
48
|
-
}, []);
|
|
88
|
+
}, [enableMemoryTracking, maxMemorySnapshots]);
|
|
49
89
|
|
|
50
90
|
const load = useCallback(async () => {
|
|
51
91
|
setIsReady(false);
|
|
@@ -106,6 +146,7 @@ export function useModel(
|
|
|
106
146
|
(token: string, done: boolean) => {
|
|
107
147
|
fullResponse += token;
|
|
108
148
|
if (done) {
|
|
149
|
+
refreshMemorySummary();
|
|
109
150
|
resolve(fullResponse);
|
|
110
151
|
}
|
|
111
152
|
},
|
|
@@ -121,7 +162,7 @@ export function useModel(
|
|
|
121
162
|
setIsGenerating(false);
|
|
122
163
|
}
|
|
123
164
|
},
|
|
124
|
-
[isReady],
|
|
165
|
+
[isReady, refreshMemorySummary],
|
|
125
166
|
);
|
|
126
167
|
|
|
127
168
|
const reset = useCallback(() => {
|
|
@@ -148,5 +189,7 @@ export function useModel(
|
|
|
148
189
|
reset,
|
|
149
190
|
deleteModel,
|
|
150
191
|
load,
|
|
192
|
+
memoryTracker: modelRef.current?.memoryTracker ?? null,
|
|
193
|
+
memorySummary,
|
|
151
194
|
};
|
|
152
195
|
}
|
package/src/index.ts
CHANGED
|
@@ -7,6 +7,7 @@ import type {
|
|
|
7
7
|
Backend,
|
|
8
8
|
Role,
|
|
9
9
|
GenerationStats,
|
|
10
|
+
MemoryUsage,
|
|
10
11
|
} from "./specs/LiteRTLM.nitro";
|
|
11
12
|
|
|
12
13
|
export type {
|
|
@@ -16,6 +17,7 @@ export type {
|
|
|
16
17
|
Backend,
|
|
17
18
|
Role,
|
|
18
19
|
GenerationStats,
|
|
20
|
+
MemoryUsage,
|
|
19
21
|
} from "./specs/LiteRTLM.nitro";
|
|
20
22
|
|
|
21
23
|
// Re-export template utilities
|
|
@@ -26,6 +28,14 @@ export {
|
|
|
26
28
|
applyLlamaTemplate,
|
|
27
29
|
} from "./templates";
|
|
28
30
|
|
|
31
|
+
// Re-export memory tracking utilities (uses NitroModules.createNativeArrayBuffer v0.34+)
|
|
32
|
+
export type {
|
|
33
|
+
MemorySnapshot,
|
|
34
|
+
MemoryTracker,
|
|
35
|
+
MemoryTrackerSummary,
|
|
36
|
+
} from "./memoryTracker";
|
|
37
|
+
export { createMemoryTracker, createNativeBuffer } from "./memoryTracker";
|
|
38
|
+
|
|
29
39
|
export * from "./hooks";
|
|
30
40
|
|
|
31
41
|
/**
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory tracking utilities for LiteRT-LM using real native memory metrics.
|
|
3
|
+
*
|
|
4
|
+
* Records real memory usage from OS-level APIs via `getMemoryUsage()`,
|
|
5
|
+
* and stores snapshots in a native-backed ArrayBuffer allocated via
|
|
6
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.34+) for zero-copy interop.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { createMemoryTracker } from 'react-native-litert-lm';
|
|
11
|
+
*
|
|
12
|
+
* const tracker = createMemoryTracker(100);
|
|
13
|
+
*
|
|
14
|
+
* // Record a real snapshot (typically called internally after inference)
|
|
15
|
+
* tracker.record({
|
|
16
|
+
* timestamp: Date.now(),
|
|
17
|
+
* nativeHeapBytes: usage.nativeHeapBytes,
|
|
18
|
+
* residentBytes: usage.residentBytes,
|
|
19
|
+
* availableMemoryBytes: usage.availableMemoryBytes,
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* console.log(`Peak RSS: ${tracker.getPeakMemory()} bytes`);
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { NitroModules } from "react-native-nitro-modules";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* A single memory usage snapshot with real data from OS APIs.
|
|
30
|
+
*/
|
|
31
|
+
export interface MemorySnapshot {
|
|
32
|
+
/** Unix timestamp in milliseconds */
|
|
33
|
+
timestamp: number;
|
|
34
|
+
/** Native heap allocated bytes (Debug.getNativeHeapAllocatedSize on Android, task_info on iOS) */
|
|
35
|
+
nativeHeapBytes: number;
|
|
36
|
+
/** Process resident set size (RSS) in bytes */
|
|
37
|
+
residentBytes: number;
|
|
38
|
+
/** Available system memory in bytes */
|
|
39
|
+
availableMemoryBytes: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Number of Float64 fields per snapshot */
|
|
43
|
+
const FIELDS_PER_SNAPSHOT = 4;
|
|
44
|
+
/** Bytes per Float64 value */
|
|
45
|
+
const BYTES_PER_FIELD = Float64Array.BYTES_PER_ELEMENT; // 8
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Memory tracker that stores snapshots in a native-backed ArrayBuffer.
|
|
49
|
+
*
|
|
50
|
+
* Uses `NitroModules.createNativeArrayBuffer()` to allocate the backing
|
|
51
|
+
* buffer in native (C++) memory, ensuring zero-copy interop with native
|
|
52
|
+
* methods and keeping memory tracking data off the JS heap.
|
|
53
|
+
*/
|
|
54
|
+
export interface MemoryTracker {
|
|
55
|
+
/**
|
|
56
|
+
* Record a new memory snapshot.
|
|
57
|
+
* @param snapshot The memory usage data to record
|
|
58
|
+
* @returns true if recorded, false if buffer is full
|
|
59
|
+
*/
|
|
60
|
+
record(snapshot: MemorySnapshot): boolean;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get all recorded snapshots as structured objects.
|
|
64
|
+
*/
|
|
65
|
+
getSnapshots(): MemorySnapshot[];
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Get the number of recorded snapshots.
|
|
69
|
+
*/
|
|
70
|
+
getSnapshotCount(): number;
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Get the maximum number of snapshots this tracker can hold.
|
|
74
|
+
*/
|
|
75
|
+
getCapacity(): number;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Get the peak resident set size across all snapshots.
|
|
79
|
+
*/
|
|
80
|
+
getPeakMemory(): number;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Get the latest memory snapshot, or undefined if none recorded.
|
|
84
|
+
*/
|
|
85
|
+
getLatestSnapshot(): MemorySnapshot | undefined;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Get the underlying native ArrayBuffer.
|
|
89
|
+
* This buffer is allocated via `NitroModules.createNativeArrayBuffer()`
|
|
90
|
+
* and lives in native memory, enabling zero-copy transfer to native methods.
|
|
91
|
+
*/
|
|
92
|
+
getNativeBuffer(): ArrayBuffer;
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Get the Float64Array view over the native buffer.
|
|
96
|
+
*/
|
|
97
|
+
getView(): Float64Array;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Reset the tracker, clearing all recorded snapshots.
|
|
101
|
+
* The native buffer is preserved (not reallocated).
|
|
102
|
+
*/
|
|
103
|
+
reset(): void;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Get a summary of memory usage statistics.
|
|
107
|
+
*/
|
|
108
|
+
getSummary(): MemoryTrackerSummary;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Summary statistics from the memory tracker.
|
|
113
|
+
*/
|
|
114
|
+
export interface MemoryTrackerSummary {
|
|
115
|
+
/** Number of snapshots recorded */
|
|
116
|
+
snapshotCount: number;
|
|
117
|
+
/** Peak resident set size in bytes */
|
|
118
|
+
peakResidentBytes: number;
|
|
119
|
+
/** Average resident set size in bytes */
|
|
120
|
+
averageResidentBytes: number;
|
|
121
|
+
/** Latest resident set size in bytes */
|
|
122
|
+
currentResidentBytes: number;
|
|
123
|
+
/** Peak native heap allocated in bytes */
|
|
124
|
+
peakNativeHeapBytes: number;
|
|
125
|
+
/** Latest native heap allocated in bytes */
|
|
126
|
+
currentNativeHeapBytes: number;
|
|
127
|
+
/** RSS delta from first to last snapshot in bytes */
|
|
128
|
+
residentDeltaBytes: number;
|
|
129
|
+
/** Size of the native tracking buffer itself in bytes */
|
|
130
|
+
trackerBufferSizeBytes: number;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Create a new memory tracker backed by a native ArrayBuffer.
|
|
135
|
+
*
|
|
136
|
+
* @param maxSnapshots Maximum number of snapshots to store (default: 256)
|
|
137
|
+
* @returns A MemoryTracker instance
|
|
138
|
+
*/
|
|
139
|
+
export function createMemoryTracker(maxSnapshots: number = 256): MemoryTracker {
|
|
140
|
+
const bufferSize = maxSnapshots * FIELDS_PER_SNAPSHOT * BYTES_PER_FIELD;
|
|
141
|
+
|
|
142
|
+
// Use NitroModules.createNativeArrayBuffer for native-backed allocation.
|
|
143
|
+
const nativeBuffer = NitroModules.createNativeArrayBuffer(bufferSize);
|
|
144
|
+
const view = new Float64Array(nativeBuffer);
|
|
145
|
+
|
|
146
|
+
let currentIndex = 0;
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
record(snapshot: MemorySnapshot): boolean {
|
|
150
|
+
if (currentIndex >= maxSnapshots) {
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const offset = currentIndex * FIELDS_PER_SNAPSHOT;
|
|
155
|
+
view[offset] = snapshot.timestamp;
|
|
156
|
+
view[offset + 1] = snapshot.nativeHeapBytes;
|
|
157
|
+
view[offset + 2] = snapshot.residentBytes;
|
|
158
|
+
view[offset + 3] = snapshot.availableMemoryBytes;
|
|
159
|
+
currentIndex++;
|
|
160
|
+
|
|
161
|
+
return true;
|
|
162
|
+
},
|
|
163
|
+
|
|
164
|
+
getSnapshots(): MemorySnapshot[] {
|
|
165
|
+
const snapshots: MemorySnapshot[] = [];
|
|
166
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
167
|
+
const offset = i * FIELDS_PER_SNAPSHOT;
|
|
168
|
+
snapshots.push({
|
|
169
|
+
timestamp: view[offset]!,
|
|
170
|
+
nativeHeapBytes: view[offset + 1]!,
|
|
171
|
+
residentBytes: view[offset + 2]!,
|
|
172
|
+
availableMemoryBytes: view[offset + 3]!,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
return snapshots;
|
|
176
|
+
},
|
|
177
|
+
|
|
178
|
+
getSnapshotCount(): number {
|
|
179
|
+
return currentIndex;
|
|
180
|
+
},
|
|
181
|
+
|
|
182
|
+
getCapacity(): number {
|
|
183
|
+
return maxSnapshots;
|
|
184
|
+
},
|
|
185
|
+
|
|
186
|
+
getPeakMemory(): number {
|
|
187
|
+
let peak = 0;
|
|
188
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
189
|
+
const rss = view[i * FIELDS_PER_SNAPSHOT + 2]!;
|
|
190
|
+
if (rss > peak) {
|
|
191
|
+
peak = rss;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return peak;
|
|
195
|
+
},
|
|
196
|
+
|
|
197
|
+
getLatestSnapshot(): MemorySnapshot | undefined {
|
|
198
|
+
if (currentIndex === 0) return undefined;
|
|
199
|
+
const offset = (currentIndex - 1) * FIELDS_PER_SNAPSHOT;
|
|
200
|
+
return {
|
|
201
|
+
timestamp: view[offset]!,
|
|
202
|
+
nativeHeapBytes: view[offset + 1]!,
|
|
203
|
+
residentBytes: view[offset + 2]!,
|
|
204
|
+
availableMemoryBytes: view[offset + 3]!,
|
|
205
|
+
};
|
|
206
|
+
},
|
|
207
|
+
|
|
208
|
+
getNativeBuffer(): ArrayBuffer {
|
|
209
|
+
return nativeBuffer;
|
|
210
|
+
},
|
|
211
|
+
|
|
212
|
+
getView(): Float64Array {
|
|
213
|
+
return view;
|
|
214
|
+
},
|
|
215
|
+
|
|
216
|
+
reset(): void {
|
|
217
|
+
view.fill(0);
|
|
218
|
+
currentIndex = 0;
|
|
219
|
+
},
|
|
220
|
+
|
|
221
|
+
getSummary(): MemoryTrackerSummary {
|
|
222
|
+
let peakRss = 0;
|
|
223
|
+
let peakHeap = 0;
|
|
224
|
+
let sumRss = 0;
|
|
225
|
+
let firstRss = 0;
|
|
226
|
+
let lastRss = 0;
|
|
227
|
+
let lastHeap = 0;
|
|
228
|
+
|
|
229
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
230
|
+
const offset = i * FIELDS_PER_SNAPSHOT;
|
|
231
|
+
const heap = view[offset + 1]!;
|
|
232
|
+
const rss = view[offset + 2]!;
|
|
233
|
+
|
|
234
|
+
if (rss > peakRss) peakRss = rss;
|
|
235
|
+
if (heap > peakHeap) peakHeap = heap;
|
|
236
|
+
sumRss += rss;
|
|
237
|
+
if (i === 0) firstRss = rss;
|
|
238
|
+
if (i === currentIndex - 1) {
|
|
239
|
+
lastRss = rss;
|
|
240
|
+
lastHeap = heap;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return {
|
|
245
|
+
snapshotCount: currentIndex,
|
|
246
|
+
peakResidentBytes: peakRss,
|
|
247
|
+
averageResidentBytes: currentIndex > 0 ? sumRss / currentIndex : 0,
|
|
248
|
+
currentResidentBytes: lastRss,
|
|
249
|
+
peakNativeHeapBytes: peakHeap,
|
|
250
|
+
currentNativeHeapBytes: lastHeap,
|
|
251
|
+
residentDeltaBytes: lastRss - firstRss,
|
|
252
|
+
trackerBufferSizeBytes: bufferSize,
|
|
253
|
+
};
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Create a native ArrayBuffer for efficient data transfer.
|
|
260
|
+
*
|
|
261
|
+
* A convenience wrapper around `NitroModules.createNativeArrayBuffer()`.
|
|
262
|
+
*
|
|
263
|
+
* @param size Size in bytes
|
|
264
|
+
* @returns A native-backed ArrayBuffer
|
|
265
|
+
*/
|
|
266
|
+
export function createNativeBuffer(size: number): ArrayBuffer {
|
|
267
|
+
return NitroModules.createNativeArrayBuffer(size);
|
|
268
|
+
}
|
package/src/modelFactory.ts
CHANGED
|
@@ -1,14 +1,49 @@
|
|
|
1
1
|
import { NitroModules } from "react-native-nitro-modules";
|
|
2
2
|
import { LiteRTLM, LLMConfig } from "./specs/LiteRTLM.nitro";
|
|
3
|
+
import { createMemoryTracker, MemoryTracker } from "./memoryTracker";
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
6
|
* Creates a new LiteRT-LM inference engine instance.
|
|
7
|
+
*
|
|
8
|
+
* Optionally creates a native-backed memory tracker using
|
|
9
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.34+) for efficient
|
|
10
|
+
* zero-copy memory usage tracking.
|
|
11
|
+
*
|
|
12
|
+
* @param options.enableMemoryTracking Enable automatic memory tracking (default: false)
|
|
13
|
+
* @param options.maxMemorySnapshots Maximum number of memory snapshots to store (default: 256)
|
|
6
14
|
*/
|
|
7
|
-
export function createLLM(
|
|
15
|
+
export function createLLM(options?: {
|
|
16
|
+
enableMemoryTracking?: boolean;
|
|
17
|
+
maxMemorySnapshots?: number;
|
|
18
|
+
}): LiteRTLM & { memoryTracker?: MemoryTracker } {
|
|
8
19
|
const native = NitroModules.createHybridObject<LiteRTLM>("LiteRTLM");
|
|
9
20
|
|
|
21
|
+
const enableTracking = options?.enableMemoryTracking ?? false;
|
|
22
|
+
const tracker = enableTracking
|
|
23
|
+
? createMemoryTracker(options?.maxMemorySnapshots ?? 256)
|
|
24
|
+
: undefined;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Record a real memory snapshot using OS-level APIs via getMemoryUsage().
|
|
28
|
+
*/
|
|
29
|
+
const recordMemorySnapshot = () => {
|
|
30
|
+
if (!tracker) return;
|
|
31
|
+
try {
|
|
32
|
+
const usage = native.getMemoryUsage();
|
|
33
|
+
tracker.record({
|
|
34
|
+
timestamp: Date.now(),
|
|
35
|
+
nativeHeapBytes: usage.nativeHeapBytes,
|
|
36
|
+
residentBytes: usage.residentBytes,
|
|
37
|
+
availableMemoryBytes: usage.availableMemoryBytes,
|
|
38
|
+
});
|
|
39
|
+
} catch {
|
|
40
|
+
// Ignore errors during memory tracking - it's non-critical
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
|
|
10
44
|
return {
|
|
11
45
|
...native,
|
|
46
|
+
memoryTracker: tracker,
|
|
12
47
|
loadModel: async (pathOrUrl: string, config?: LLMConfig) => {
|
|
13
48
|
let modelPath = pathOrUrl;
|
|
14
49
|
|
|
@@ -31,17 +66,53 @@ export function createLLM(): LiteRTLM {
|
|
|
31
66
|
console.log(`Model downloaded to: ${modelPath}`);
|
|
32
67
|
}
|
|
33
68
|
|
|
34
|
-
|
|
69
|
+
const result = await native.loadModel(modelPath, config);
|
|
70
|
+
|
|
71
|
+
// Record initial memory snapshot after model load
|
|
72
|
+
if (tracker) {
|
|
73
|
+
tracker.reset();
|
|
74
|
+
recordMemorySnapshot();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return result;
|
|
78
|
+
},
|
|
79
|
+
sendMessage: async (...args: Parameters<typeof native.sendMessage>) => {
|
|
80
|
+
const result = await native.sendMessage(...args);
|
|
81
|
+
recordMemorySnapshot();
|
|
82
|
+
return result;
|
|
83
|
+
},
|
|
84
|
+
sendMessageAsync: (...args: Parameters<typeof native.sendMessageAsync>) => {
|
|
85
|
+
const [message, onToken] = args;
|
|
86
|
+
native.sendMessageAsync(message, (token, done) => {
|
|
87
|
+
onToken(token, done);
|
|
88
|
+
if (done) {
|
|
89
|
+
recordMemorySnapshot();
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
},
|
|
93
|
+
sendMessageWithImage: async (
|
|
94
|
+
...args: Parameters<typeof native.sendMessageWithImage>
|
|
95
|
+
) => {
|
|
96
|
+
const result = await native.sendMessageWithImage(...args);
|
|
97
|
+
recordMemorySnapshot();
|
|
98
|
+
return result;
|
|
99
|
+
},
|
|
100
|
+
sendMessageWithAudio: async (
|
|
101
|
+
...args: Parameters<typeof native.sendMessageWithAudio>
|
|
102
|
+
) => {
|
|
103
|
+
const result = await native.sendMessageWithAudio(...args);
|
|
104
|
+
recordMemorySnapshot();
|
|
105
|
+
return result;
|
|
35
106
|
},
|
|
36
|
-
// Bind valid methods to native instance
|
|
37
|
-
sendMessage: native.sendMessage.bind(native),
|
|
38
|
-
sendMessageAsync: native.sendMessageAsync.bind(native),
|
|
39
|
-
sendMessageWithImage: native.sendMessageWithImage.bind(native),
|
|
40
|
-
sendMessageWithAudio: native.sendMessageWithAudio.bind(native),
|
|
41
107
|
getHistory: native.getHistory.bind(native),
|
|
42
|
-
resetConversation:
|
|
108
|
+
resetConversation: () => {
|
|
109
|
+
native.resetConversation();
|
|
110
|
+
// KV cache is cleared on reset, record the drop
|
|
111
|
+
recordMemorySnapshot();
|
|
112
|
+
},
|
|
43
113
|
isReady: native.isReady.bind(native),
|
|
44
114
|
getStats: native.getStats.bind(native),
|
|
115
|
+
getMemoryUsage: native.getMemoryUsage.bind(native),
|
|
45
116
|
close: native.close.bind(native),
|
|
46
117
|
downloadModel: native.downloadModel.bind(native),
|
|
47
118
|
deleteModel: native.deleteModel.bind(native),
|
|
@@ -99,6 +99,21 @@ export interface GenerationStats {
|
|
|
99
99
|
tokensPerSecond: number;
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
/**
|
|
103
|
+
* Real memory usage statistics from the native runtime.
|
|
104
|
+
* Measured from OS-level APIs, not estimated.
|
|
105
|
+
*/
|
|
106
|
+
export interface MemoryUsage {
|
|
107
|
+
/** Native heap allocated bytes (Debug.getNativeHeapAllocatedSize on Android, malloc_size on iOS) */
|
|
108
|
+
nativeHeapBytes: number;
|
|
109
|
+
/** Total process resident set size (RSS) in bytes */
|
|
110
|
+
residentBytes: number;
|
|
111
|
+
/** Available system memory in bytes */
|
|
112
|
+
availableMemoryBytes: number;
|
|
113
|
+
/** Whether the system considers memory low */
|
|
114
|
+
isLowMemory: boolean;
|
|
115
|
+
}
|
|
116
|
+
|
|
102
117
|
/**
|
|
103
118
|
* LiteRT-LM: High-performance LLM inference engine.
|
|
104
119
|
* Supports Gemma 3n, Phi-4, Qwen, and other .litertlm models.
|
|
@@ -204,6 +219,12 @@ export interface LiteRTLM extends HybridObject<{
|
|
|
204
219
|
*/
|
|
205
220
|
getStats(): GenerationStats;
|
|
206
221
|
|
|
222
|
+
/**
|
|
223
|
+
* Get real memory usage from the native runtime.
|
|
224
|
+
* Uses OS-level APIs to report actual memory consumption.
|
|
225
|
+
*/
|
|
226
|
+
getMemoryUsage(): MemoryUsage;
|
|
227
|
+
|
|
207
228
|
/**
|
|
208
229
|
* Release all native resources.
|
|
209
230
|
* Call this when done with the LLM instance.
|