react-native-litert-lm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +331 -150
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +577 -378
- package/cpp/HybridLiteRTLM.hpp +66 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/cpp/include/litert_lm_engine.h +502 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +33 -3
- package/lib/hooks.js +54 -23
- package/lib/index.d.ts +4 -1
- package/lib/index.js +6 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +21 -2
- package/lib/modelFactory.js +78 -11
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +283 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +107 -41
- package/src/index.ts +13 -6
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +107 -11
- package/src/specs/LiteRTLM.nitro.ts +21 -0
package/lib/hooks.d.ts
CHANGED
|
@@ -1,16 +1,46 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LLMConfig } from "./index";
|
|
2
|
+
import type { LiteRTLMInstance } from "./modelFactory";
|
|
3
|
+
import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
|
|
2
4
|
export interface UseModelConfig extends LLMConfig {
|
|
3
5
|
autoLoad?: boolean;
|
|
6
|
+
/**
|
|
7
|
+
* Enable memory tracking using native ArrayBuffers (v0.35+).
|
|
8
|
+
* When enabled, memory usage is tracked after each inference call
|
|
9
|
+
* using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
|
|
10
|
+
* @default false
|
|
11
|
+
*/
|
|
12
|
+
enableMemoryTracking?: boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Maximum number of memory snapshots to store.
|
|
15
|
+
* Each snapshot uses 32 bytes of native memory.
|
|
16
|
+
* @default 256
|
|
17
|
+
*/
|
|
18
|
+
maxMemorySnapshots?: number;
|
|
4
19
|
}
|
|
5
20
|
export interface UseModelResult {
|
|
6
|
-
model:
|
|
21
|
+
model: LiteRTLMInstance | null;
|
|
7
22
|
isReady: boolean;
|
|
8
23
|
isGenerating: boolean;
|
|
9
24
|
downloadProgress: number;
|
|
10
25
|
error: string | null;
|
|
11
26
|
generate: (prompt: string) => Promise<string>;
|
|
12
27
|
reset: () => void;
|
|
13
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Delete the model file. If no fileName is provided, derives it from
|
|
30
|
+
* the URL/path passed to useModel.
|
|
31
|
+
*/
|
|
32
|
+
deleteModel: (fileName?: string) => Promise<void>;
|
|
14
33
|
load: () => Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Memory tracker instance (available when enableMemoryTracking is true).
|
|
36
|
+
* Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
|
|
37
|
+
* for efficient, zero-copy memory usage tracking.
|
|
38
|
+
*/
|
|
39
|
+
memoryTracker: MemoryTracker | null;
|
|
40
|
+
/**
|
|
41
|
+
* Current memory tracking summary (null if tracking is disabled).
|
|
42
|
+
* Updates automatically after each inference call.
|
|
43
|
+
*/
|
|
44
|
+
memorySummary: MemoryTrackerSummary | null;
|
|
15
45
|
}
|
|
16
46
|
export declare function useModel(pathOrUrl: string, config?: UseModelConfig): UseModelResult;
|
package/lib/hooks.js
CHANGED
|
@@ -3,21 +3,56 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.useModel = useModel;
|
|
4
4
|
const react_1 = require("react");
|
|
5
5
|
const modelFactory_1 = require("./modelFactory");
|
|
6
|
+
/**
|
|
7
|
+
* Extract a filename from a URL or file path.
|
|
8
|
+
*/
|
|
9
|
+
function extractFileName(pathOrUrl) {
|
|
10
|
+
return pathOrUrl.split("/").pop() || "model.bin";
|
|
11
|
+
}
|
|
6
12
|
function useModel(pathOrUrl, config) {
|
|
7
13
|
const modelRef = (0, react_1.useRef)(null);
|
|
8
14
|
const [isReady, setIsReady] = (0, react_1.useState)(false);
|
|
9
15
|
const [isGenerating, setIsGenerating] = (0, react_1.useState)(false);
|
|
10
16
|
const [downloadProgress, setDownloadProgress] = (0, react_1.useState)(0);
|
|
11
17
|
const [error, setError] = (0, react_1.useState)(null);
|
|
12
|
-
|
|
18
|
+
const [memorySummary, setMemorySummary] = (0, react_1.useState)(null);
|
|
19
|
+
// Destructure config into primitive values for stable dependency arrays.
|
|
20
|
+
// This prevents infinite re-render loops when consumers pass inline config
|
|
21
|
+
// objects (e.g. useModel(url, { backend: 'cpu' })) without useMemo.
|
|
13
22
|
const autoLoad = config?.autoLoad ?? true;
|
|
23
|
+
const enableMemoryTracking = config?.enableMemoryTracking ?? false;
|
|
24
|
+
const maxMemorySnapshots = config?.maxMemorySnapshots ?? 256;
|
|
25
|
+
const backend = config?.backend;
|
|
26
|
+
const systemPrompt = config?.systemPrompt;
|
|
27
|
+
const maxTokens = config?.maxTokens;
|
|
28
|
+
const temperature = config?.temperature;
|
|
29
|
+
const topK = config?.topK;
|
|
30
|
+
const topP = config?.topP;
|
|
31
|
+
// Build a stable config object from the destructured primitives
|
|
32
|
+
const nativeConfig = (0, react_1.useMemo)(() => ({
|
|
33
|
+
...(backend !== undefined && { backend }),
|
|
34
|
+
...(systemPrompt !== undefined && { systemPrompt }),
|
|
35
|
+
...(maxTokens !== undefined && { maxTokens }),
|
|
36
|
+
...(temperature !== undefined && { temperature }),
|
|
37
|
+
...(topK !== undefined && { topK }),
|
|
38
|
+
...(topP !== undefined && { topP }),
|
|
39
|
+
}), [backend, systemPrompt, maxTokens, temperature, topK, topP]);
|
|
40
|
+
/**
|
|
41
|
+
* Refresh memory summary from the tracker's native buffer.
|
|
42
|
+
*/
|
|
43
|
+
const refreshMemorySummary = (0, react_1.useCallback)(() => {
|
|
44
|
+
if (modelRef.current?.memoryTracker) {
|
|
45
|
+
setMemorySummary(modelRef.current.memoryTracker.getSummary());
|
|
46
|
+
}
|
|
47
|
+
}, []);
|
|
14
48
|
// Initialize the model instance
|
|
15
49
|
(0, react_1.useEffect)(() => {
|
|
16
|
-
modelRef.current = (0, modelFactory_1.createLLM)(
|
|
17
|
-
|
|
50
|
+
modelRef.current = (0, modelFactory_1.createLLM)({
|
|
51
|
+
enableMemoryTracking,
|
|
52
|
+
maxMemorySnapshots,
|
|
53
|
+
});
|
|
18
54
|
// Cleanup on unmount
|
|
19
55
|
return () => {
|
|
20
|
-
isMounted = false;
|
|
21
56
|
try {
|
|
22
57
|
modelRef.current?.close();
|
|
23
58
|
}
|
|
@@ -25,27 +60,19 @@ function useModel(pathOrUrl, config) {
|
|
|
25
60
|
console.warn("Failed to close model", e);
|
|
26
61
|
}
|
|
27
62
|
};
|
|
28
|
-
}, []);
|
|
63
|
+
}, [enableMemoryTracking, maxMemorySnapshots]);
|
|
29
64
|
const load = (0, react_1.useCallback)(async () => {
|
|
30
65
|
setIsReady(false);
|
|
31
66
|
setError(null);
|
|
32
67
|
setDownloadProgress(0);
|
|
33
68
|
try {
|
|
34
|
-
let modelPath = pathOrUrl;
|
|
35
|
-
// Handle URL download manually to capture progress
|
|
36
|
-
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
37
|
-
const fileName = pathOrUrl.split("/").pop() || "model.bin";
|
|
38
|
-
if (modelRef.current) {
|
|
39
|
-
modelPath = await modelRef.current.downloadModel(pathOrUrl, fileName, (progress) => {
|
|
40
|
-
setDownloadProgress(progress);
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
69
|
if (modelRef.current) {
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
await modelRef.current.loadModel(
|
|
70
|
+
// Delegate URL handling + download to the factory's loadModel,
|
|
71
|
+
// passing our progress setter as the callback (eliminates
|
|
72
|
+
// duplicate download logic that was previously in this hook).
|
|
73
|
+
await modelRef.current.loadModel(pathOrUrl, nativeConfig, (progress) => {
|
|
74
|
+
setDownloadProgress(progress);
|
|
75
|
+
});
|
|
49
76
|
setIsReady(true);
|
|
50
77
|
}
|
|
51
78
|
}
|
|
@@ -53,7 +80,7 @@ function useModel(pathOrUrl, config) {
|
|
|
53
80
|
setError(e.message || "Failed to load model");
|
|
54
81
|
console.error(e);
|
|
55
82
|
}
|
|
56
|
-
}, [pathOrUrl,
|
|
83
|
+
}, [pathOrUrl, nativeConfig]);
|
|
57
84
|
(0, react_1.useEffect)(() => {
|
|
58
85
|
if (autoLoad) {
|
|
59
86
|
load();
|
|
@@ -71,6 +98,7 @@ function useModel(pathOrUrl, config) {
|
|
|
71
98
|
modelRef.current?.sendMessageAsync(prompt, (token, done) => {
|
|
72
99
|
fullResponse += token;
|
|
73
100
|
if (done) {
|
|
101
|
+
refreshMemorySummary();
|
|
74
102
|
resolve(fullResponse);
|
|
75
103
|
}
|
|
76
104
|
});
|
|
@@ -87,7 +115,7 @@ function useModel(pathOrUrl, config) {
|
|
|
87
115
|
finally {
|
|
88
116
|
setIsGenerating(false);
|
|
89
117
|
}
|
|
90
|
-
}, [isReady]);
|
|
118
|
+
}, [isReady, refreshMemorySummary]);
|
|
91
119
|
const reset = (0, react_1.useCallback)(() => {
|
|
92
120
|
if (modelRef.current) {
|
|
93
121
|
modelRef.current.resetConversation();
|
|
@@ -95,11 +123,12 @@ function useModel(pathOrUrl, config) {
|
|
|
95
123
|
}, []);
|
|
96
124
|
const deleteModel = (0, react_1.useCallback)(async (fileName) => {
|
|
97
125
|
if (modelRef.current) {
|
|
98
|
-
|
|
126
|
+
const resolvedName = fileName ?? extractFileName(pathOrUrl);
|
|
127
|
+
await modelRef.current.deleteModel(resolvedName);
|
|
99
128
|
setIsReady(false);
|
|
100
129
|
setDownloadProgress(0);
|
|
101
130
|
}
|
|
102
|
-
}, []);
|
|
131
|
+
}, [pathOrUrl]);
|
|
103
132
|
return {
|
|
104
133
|
model: modelRef.current,
|
|
105
134
|
isReady,
|
|
@@ -110,5 +139,7 @@ function useModel(pathOrUrl, config) {
|
|
|
110
139
|
reset,
|
|
111
140
|
deleteModel,
|
|
112
141
|
load,
|
|
142
|
+
memoryTracker: modelRef.current?.memoryTracker ?? null,
|
|
143
|
+
memorySummary,
|
|
113
144
|
};
|
|
114
145
|
}
|
package/lib/index.d.ts
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import type { Backend } from "./specs/LiteRTLM.nitro";
|
|
2
|
-
export type { LiteRTLM, LLMConfig, Message, Backend, Role, GenerationStats, } from "./specs/LiteRTLM.nitro";
|
|
2
|
+
export type { LiteRTLM, LLMConfig, Message, Backend, Role, GenerationStats, MemoryUsage, } from "./specs/LiteRTLM.nitro";
|
|
3
3
|
export type { ChatMessage } from "./templates";
|
|
4
4
|
export { applyGemmaTemplate, applyPhiTemplate, applyLlamaTemplate, } from "./templates";
|
|
5
|
+
export type { MemorySnapshot, MemoryTracker, MemoryTrackerSummary, } from "./memoryTracker";
|
|
6
|
+
export { createMemoryTracker, createNativeBuffer } from "./memoryTracker";
|
|
7
|
+
export type { LiteRTLMInstance } from "./modelFactory";
|
|
5
8
|
export * from "./hooks";
|
|
6
9
|
/**
|
|
7
10
|
* Creates a new LiteRT-LM inference engine instance.
|
package/lib/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.GEMMA_3N_E2B_IT_INT4 = exports.Models = exports.createLLM = exports.applyLlamaTemplate = exports.applyPhiTemplate = exports.applyGemmaTemplate = void 0;
|
|
17
|
+
exports.GEMMA_3N_E2B_IT_INT4 = exports.Models = exports.createLLM = exports.createNativeBuffer = exports.createMemoryTracker = exports.applyLlamaTemplate = exports.applyPhiTemplate = exports.applyGemmaTemplate = void 0;
|
|
18
18
|
exports.getRecommendedBackend = getRecommendedBackend;
|
|
19
19
|
exports.checkBackendSupport = checkBackendSupport;
|
|
20
20
|
exports.checkMultimodalSupport = checkMultimodalSupport;
|
|
@@ -23,6 +23,9 @@ var templates_1 = require("./templates");
|
|
|
23
23
|
Object.defineProperty(exports, "applyGemmaTemplate", { enumerable: true, get: function () { return templates_1.applyGemmaTemplate; } });
|
|
24
24
|
Object.defineProperty(exports, "applyPhiTemplate", { enumerable: true, get: function () { return templates_1.applyPhiTemplate; } });
|
|
25
25
|
Object.defineProperty(exports, "applyLlamaTemplate", { enumerable: true, get: function () { return templates_1.applyLlamaTemplate; } });
|
|
26
|
+
var memoryTracker_1 = require("./memoryTracker");
|
|
27
|
+
Object.defineProperty(exports, "createMemoryTracker", { enumerable: true, get: function () { return memoryTracker_1.createMemoryTracker; } });
|
|
28
|
+
Object.defineProperty(exports, "createNativeBuffer", { enumerable: true, get: function () { return memoryTracker_1.createNativeBuffer; } });
|
|
26
29
|
__exportStar(require("./hooks"), exports);
|
|
27
30
|
/**
|
|
28
31
|
* Creates a new LiteRT-LM inference engine instance.
|
|
@@ -113,12 +116,9 @@ function checkBackendSupport(backend) {
|
|
|
113
116
|
return "NPU backend requires compatible hardware (Qualcomm Hexagon, MediaTek APU, etc.). Will fall back to GPU if unavailable.";
|
|
114
117
|
}
|
|
115
118
|
if (react_native_1.Platform.OS === "ios") {
|
|
116
|
-
return "NPU
|
|
119
|
+
return "NPU (Neural Engine) is not yet supported on iOS. Use 'gpu' (Metal) or 'cpu' instead.";
|
|
117
120
|
}
|
|
118
121
|
}
|
|
119
|
-
if (react_native_1.Platform.OS === "ios" && backend !== "cpu") {
|
|
120
|
-
return "LiteRT-LM iOS is not yet released. Only CPU backend may work via fallback.";
|
|
121
|
-
}
|
|
122
122
|
return undefined;
|
|
123
123
|
}
|
|
124
124
|
/**
|
|
@@ -140,7 +140,7 @@ function checkBackendSupport(backend) {
|
|
|
140
140
|
*/
|
|
141
141
|
function checkMultimodalSupport() {
|
|
142
142
|
if (react_native_1.Platform.OS === "ios") {
|
|
143
|
-
return "Multimodal (image/audio) is
|
|
143
|
+
return "Multimodal (image/audio) is experimental on iOS. Vision and audio executors may not be available in the current build.";
|
|
144
144
|
}
|
|
145
145
|
return undefined;
|
|
146
146
|
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory tracking utilities for LiteRT-LM using real native memory metrics.
|
|
3
|
+
*
|
|
4
|
+
* Records real memory usage from OS-level APIs via `getMemoryUsage()`,
|
|
5
|
+
* and stores snapshots in a native-backed ArrayBuffer allocated via
|
|
6
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.35+) for zero-copy interop.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { createMemoryTracker } from 'react-native-litert-lm';
|
|
11
|
+
*
|
|
12
|
+
* const tracker = createMemoryTracker(100);
|
|
13
|
+
*
|
|
14
|
+
* // Record a real snapshot (typically called internally after inference)
|
|
15
|
+
* tracker.record({
|
|
16
|
+
* timestamp: Date.now(),
|
|
17
|
+
* nativeHeapBytes: usage.nativeHeapBytes,
|
|
18
|
+
* residentBytes: usage.residentBytes,
|
|
19
|
+
* availableMemoryBytes: usage.availableMemoryBytes,
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* console.log(`Peak RSS: ${tracker.getPeakMemory()} bytes`);
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* A single memory usage snapshot with real data from OS APIs.
|
|
27
|
+
*/
|
|
28
|
+
export interface MemorySnapshot {
|
|
29
|
+
/** Unix timestamp in milliseconds */
|
|
30
|
+
timestamp: number;
|
|
31
|
+
/** Native heap allocated bytes (Debug.getNativeHeapAllocatedSize on Android, task_info on iOS) */
|
|
32
|
+
nativeHeapBytes: number;
|
|
33
|
+
/** Process resident set size (RSS) in bytes */
|
|
34
|
+
residentBytes: number;
|
|
35
|
+
/** Available system memory in bytes */
|
|
36
|
+
availableMemoryBytes: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Memory tracker that stores snapshots in a native-backed ArrayBuffer.
|
|
40
|
+
*
|
|
41
|
+
* Uses `NitroModules.createNativeArrayBuffer()` to allocate the backing
|
|
42
|
+
* buffer in native (C++) memory, ensuring zero-copy interop with native
|
|
43
|
+
* methods and keeping memory tracking data off the JS heap.
|
|
44
|
+
*/
|
|
45
|
+
export interface MemoryTracker {
|
|
46
|
+
/**
|
|
47
|
+
* Record a new memory snapshot.
|
|
48
|
+
* @param snapshot The memory usage data to record
|
|
49
|
+
* @returns true if recorded, false if buffer is full
|
|
50
|
+
*/
|
|
51
|
+
record(snapshot: MemorySnapshot): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Get all recorded snapshots as structured objects.
|
|
54
|
+
*/
|
|
55
|
+
getSnapshots(): MemorySnapshot[];
|
|
56
|
+
/**
|
|
57
|
+
* Get the number of recorded snapshots.
|
|
58
|
+
*/
|
|
59
|
+
getSnapshotCount(): number;
|
|
60
|
+
/**
|
|
61
|
+
* Get the maximum number of snapshots this tracker can hold.
|
|
62
|
+
*/
|
|
63
|
+
getCapacity(): number;
|
|
64
|
+
/**
|
|
65
|
+
* Get the peak resident set size across all snapshots.
|
|
66
|
+
*/
|
|
67
|
+
getPeakMemory(): number;
|
|
68
|
+
/**
|
|
69
|
+
* Get the latest memory snapshot, or undefined if none recorded.
|
|
70
|
+
*/
|
|
71
|
+
getLatestSnapshot(): MemorySnapshot | undefined;
|
|
72
|
+
/**
|
|
73
|
+
* Get the underlying native ArrayBuffer.
|
|
74
|
+
* This buffer is allocated via `NitroModules.createNativeArrayBuffer()`
|
|
75
|
+
* and lives in native memory, enabling zero-copy transfer to native methods.
|
|
76
|
+
*/
|
|
77
|
+
getNativeBuffer(): ArrayBuffer;
|
|
78
|
+
/**
|
|
79
|
+
* Get the Float64Array view over the native buffer.
|
|
80
|
+
*/
|
|
81
|
+
getView(): Float64Array;
|
|
82
|
+
/**
|
|
83
|
+
* Reset the tracker, clearing all recorded snapshots.
|
|
84
|
+
* The native buffer is preserved (not reallocated).
|
|
85
|
+
*/
|
|
86
|
+
reset(): void;
|
|
87
|
+
/**
|
|
88
|
+
* Get a summary of memory usage statistics.
|
|
89
|
+
*/
|
|
90
|
+
getSummary(): MemoryTrackerSummary;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Summary statistics from the memory tracker.
|
|
94
|
+
*/
|
|
95
|
+
export interface MemoryTrackerSummary {
|
|
96
|
+
/** Number of snapshots recorded */
|
|
97
|
+
snapshotCount: number;
|
|
98
|
+
/** Peak resident set size in bytes */
|
|
99
|
+
peakResidentBytes: number;
|
|
100
|
+
/** Average resident set size in bytes */
|
|
101
|
+
averageResidentBytes: number;
|
|
102
|
+
/** Latest resident set size in bytes */
|
|
103
|
+
currentResidentBytes: number;
|
|
104
|
+
/** Peak native heap allocated in bytes */
|
|
105
|
+
peakNativeHeapBytes: number;
|
|
106
|
+
/** Latest native heap allocated in bytes */
|
|
107
|
+
currentNativeHeapBytes: number;
|
|
108
|
+
/** RSS delta from first to last snapshot in bytes */
|
|
109
|
+
residentDeltaBytes: number;
|
|
110
|
+
/** Size of the native tracking buffer itself in bytes */
|
|
111
|
+
trackerBufferSizeBytes: number;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Create a new memory tracker backed by a native ArrayBuffer.
|
|
115
|
+
*
|
|
116
|
+
* @param maxSnapshots Maximum number of snapshots to store (default: 256)
|
|
117
|
+
* @returns A MemoryTracker instance
|
|
118
|
+
*/
|
|
119
|
+
export declare function createMemoryTracker(maxSnapshots?: number): MemoryTracker;
|
|
120
|
+
/**
|
|
121
|
+
* Create a native ArrayBuffer for efficient data transfer.
|
|
122
|
+
*
|
|
123
|
+
* A convenience wrapper around `NitroModules.createNativeArrayBuffer()`.
|
|
124
|
+
*
|
|
125
|
+
* @param size Size in bytes
|
|
126
|
+
* @returns A native-backed ArrayBuffer
|
|
127
|
+
*/
|
|
128
|
+
export declare function createNativeBuffer(size: number): ArrayBuffer;
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Memory tracking utilities for LiteRT-LM using real native memory metrics.
|
|
4
|
+
*
|
|
5
|
+
* Records real memory usage from OS-level APIs via `getMemoryUsage()`,
|
|
6
|
+
* and stores snapshots in a native-backed ArrayBuffer allocated via
|
|
7
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.35+) for zero-copy interop.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createMemoryTracker } from 'react-native-litert-lm';
|
|
12
|
+
*
|
|
13
|
+
* const tracker = createMemoryTracker(100);
|
|
14
|
+
*
|
|
15
|
+
* // Record a real snapshot (typically called internally after inference)
|
|
16
|
+
* tracker.record({
|
|
17
|
+
* timestamp: Date.now(),
|
|
18
|
+
* nativeHeapBytes: usage.nativeHeapBytes,
|
|
19
|
+
* residentBytes: usage.residentBytes,
|
|
20
|
+
* availableMemoryBytes: usage.availableMemoryBytes,
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* console.log(`Peak RSS: ${tracker.getPeakMemory()} bytes`);
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
|
+
exports.createMemoryTracker = createMemoryTracker;
|
|
28
|
+
exports.createNativeBuffer = createNativeBuffer;
|
|
29
|
+
const react_native_nitro_modules_1 = require("react-native-nitro-modules");
|
|
30
|
+
/** Number of Float64 fields per snapshot */
|
|
31
|
+
const FIELDS_PER_SNAPSHOT = 4;
|
|
32
|
+
/** Bytes per Float64 value */
|
|
33
|
+
const BYTES_PER_FIELD = Float64Array.BYTES_PER_ELEMENT; // 8
|
|
34
|
+
/**
|
|
35
|
+
* Create a new memory tracker backed by a native ArrayBuffer.
|
|
36
|
+
*
|
|
37
|
+
* @param maxSnapshots Maximum number of snapshots to store (default: 256)
|
|
38
|
+
* @returns A MemoryTracker instance
|
|
39
|
+
*/
|
|
40
|
+
function createMemoryTracker(maxSnapshots = 256) {
|
|
41
|
+
const bufferSize = maxSnapshots * FIELDS_PER_SNAPSHOT * BYTES_PER_FIELD;
|
|
42
|
+
// Use NitroModules.createNativeArrayBuffer for native-backed allocation.
|
|
43
|
+
const nativeBuffer = react_native_nitro_modules_1.NitroModules.createNativeArrayBuffer(bufferSize);
|
|
44
|
+
const view = new Float64Array(nativeBuffer);
|
|
45
|
+
let currentIndex = 0;
|
|
46
|
+
return {
|
|
47
|
+
record(snapshot) {
|
|
48
|
+
if (currentIndex >= maxSnapshots) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
const offset = currentIndex * FIELDS_PER_SNAPSHOT;
|
|
52
|
+
view[offset] = snapshot.timestamp;
|
|
53
|
+
view[offset + 1] = snapshot.nativeHeapBytes;
|
|
54
|
+
view[offset + 2] = snapshot.residentBytes;
|
|
55
|
+
view[offset + 3] = snapshot.availableMemoryBytes;
|
|
56
|
+
currentIndex++;
|
|
57
|
+
return true;
|
|
58
|
+
},
|
|
59
|
+
getSnapshots() {
|
|
60
|
+
const snapshots = [];
|
|
61
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
62
|
+
const offset = i * FIELDS_PER_SNAPSHOT;
|
|
63
|
+
snapshots.push({
|
|
64
|
+
timestamp: view[offset],
|
|
65
|
+
nativeHeapBytes: view[offset + 1],
|
|
66
|
+
residentBytes: view[offset + 2],
|
|
67
|
+
availableMemoryBytes: view[offset + 3],
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
return snapshots;
|
|
71
|
+
},
|
|
72
|
+
getSnapshotCount() {
|
|
73
|
+
return currentIndex;
|
|
74
|
+
},
|
|
75
|
+
getCapacity() {
|
|
76
|
+
return maxSnapshots;
|
|
77
|
+
},
|
|
78
|
+
getPeakMemory() {
|
|
79
|
+
let peak = 0;
|
|
80
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
81
|
+
const rss = view[i * FIELDS_PER_SNAPSHOT + 2];
|
|
82
|
+
if (rss > peak) {
|
|
83
|
+
peak = rss;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return peak;
|
|
87
|
+
},
|
|
88
|
+
getLatestSnapshot() {
|
|
89
|
+
if (currentIndex === 0)
|
|
90
|
+
return undefined;
|
|
91
|
+
const offset = (currentIndex - 1) * FIELDS_PER_SNAPSHOT;
|
|
92
|
+
return {
|
|
93
|
+
timestamp: view[offset],
|
|
94
|
+
nativeHeapBytes: view[offset + 1],
|
|
95
|
+
residentBytes: view[offset + 2],
|
|
96
|
+
availableMemoryBytes: view[offset + 3],
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
getNativeBuffer() {
|
|
100
|
+
return nativeBuffer;
|
|
101
|
+
},
|
|
102
|
+
getView() {
|
|
103
|
+
return view;
|
|
104
|
+
},
|
|
105
|
+
reset() {
|
|
106
|
+
view.fill(0);
|
|
107
|
+
currentIndex = 0;
|
|
108
|
+
},
|
|
109
|
+
getSummary() {
|
|
110
|
+
let peakRss = 0;
|
|
111
|
+
let peakHeap = 0;
|
|
112
|
+
let sumRss = 0;
|
|
113
|
+
let firstRss = 0;
|
|
114
|
+
let lastRss = 0;
|
|
115
|
+
let lastHeap = 0;
|
|
116
|
+
for (let i = 0; i < currentIndex; i++) {
|
|
117
|
+
const offset = i * FIELDS_PER_SNAPSHOT;
|
|
118
|
+
const heap = view[offset + 1];
|
|
119
|
+
const rss = view[offset + 2];
|
|
120
|
+
if (rss > peakRss)
|
|
121
|
+
peakRss = rss;
|
|
122
|
+
if (heap > peakHeap)
|
|
123
|
+
peakHeap = heap;
|
|
124
|
+
sumRss += rss;
|
|
125
|
+
if (i === 0)
|
|
126
|
+
firstRss = rss;
|
|
127
|
+
if (i === currentIndex - 1) {
|
|
128
|
+
lastRss = rss;
|
|
129
|
+
lastHeap = heap;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return {
|
|
133
|
+
snapshotCount: currentIndex,
|
|
134
|
+
peakResidentBytes: peakRss,
|
|
135
|
+
averageResidentBytes: currentIndex > 0 ? sumRss / currentIndex : 0,
|
|
136
|
+
currentResidentBytes: lastRss,
|
|
137
|
+
peakNativeHeapBytes: peakHeap,
|
|
138
|
+
currentNativeHeapBytes: lastHeap,
|
|
139
|
+
residentDeltaBytes: lastRss - firstRss,
|
|
140
|
+
trackerBufferSizeBytes: bufferSize,
|
|
141
|
+
};
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Create a native ArrayBuffer for efficient data transfer.
|
|
147
|
+
*
|
|
148
|
+
* A convenience wrapper around `NitroModules.createNativeArrayBuffer()`.
|
|
149
|
+
*
|
|
150
|
+
* @param size Size in bytes
|
|
151
|
+
* @returns A native-backed ArrayBuffer
|
|
152
|
+
*/
|
|
153
|
+
function createNativeBuffer(size) {
|
|
154
|
+
return react_native_nitro_modules_1.NitroModules.createNativeArrayBuffer(size);
|
|
155
|
+
}
|
package/lib/modelFactory.d.ts
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
|
-
import { LiteRTLM } from "./specs/LiteRTLM.nitro";
|
|
1
|
+
import { LiteRTLM, LLMConfig } from "./specs/LiteRTLM.nitro";
|
|
2
|
+
import { MemoryTracker } from "./memoryTracker";
|
|
3
|
+
/**
|
|
4
|
+
* Extended LiteRT-LM instance with optional memory tracking and
|
|
5
|
+
* augmented loadModel that accepts a download progress callback.
|
|
6
|
+
*/
|
|
7
|
+
export type LiteRTLMInstance = Omit<LiteRTLM, "loadModel"> & {
|
|
8
|
+
memoryTracker?: MemoryTracker;
|
|
9
|
+
loadModel: (pathOrUrl: string, config?: LLMConfig, onDownloadProgress?: (progress: number) => void) => Promise<void>;
|
|
10
|
+
};
|
|
2
11
|
/**
|
|
3
12
|
* Creates a new LiteRT-LM inference engine instance.
|
|
13
|
+
*
|
|
14
|
+
* Optionally creates a native-backed memory tracker using
|
|
15
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.35+) for efficient
|
|
16
|
+
* zero-copy memory usage tracking.
|
|
17
|
+
*
|
|
18
|
+
* @param options.enableMemoryTracking Enable automatic memory tracking (default: false)
|
|
19
|
+
* @param options.maxMemorySnapshots Maximum number of memory snapshots to store (default: 256)
|
|
4
20
|
*/
|
|
5
|
-
export declare function createLLM(
|
|
21
|
+
export declare function createLLM(options?: {
|
|
22
|
+
enableMemoryTracking?: boolean;
|
|
23
|
+
maxMemorySnapshots?: number;
|
|
24
|
+
}): LiteRTLMInstance;
|
package/lib/modelFactory.js
CHANGED
|
@@ -2,17 +2,54 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.createLLM = createLLM;
|
|
4
4
|
const react_native_nitro_modules_1 = require("react-native-nitro-modules");
|
|
5
|
+
const memoryTracker_1 = require("./memoryTracker");
|
|
5
6
|
/**
|
|
6
7
|
* Creates a new LiteRT-LM inference engine instance.
|
|
8
|
+
*
|
|
9
|
+
* Optionally creates a native-backed memory tracker using
|
|
10
|
+
* `NitroModules.createNativeArrayBuffer()` (v0.35+) for efficient
|
|
11
|
+
* zero-copy memory usage tracking.
|
|
12
|
+
*
|
|
13
|
+
* @param options.enableMemoryTracking Enable automatic memory tracking (default: false)
|
|
14
|
+
* @param options.maxMemorySnapshots Maximum number of memory snapshots to store (default: 256)
|
|
7
15
|
*/
|
|
8
|
-
function createLLM() {
|
|
16
|
+
function createLLM(options) {
|
|
9
17
|
const native = react_native_nitro_modules_1.NitroModules.createHybridObject("LiteRTLM");
|
|
18
|
+
const enableTracking = options?.enableMemoryTracking ?? false;
|
|
19
|
+
const tracker = enableTracking
|
|
20
|
+
? (0, memoryTracker_1.createMemoryTracker)(options?.maxMemorySnapshots ?? 256)
|
|
21
|
+
: undefined;
|
|
22
|
+
/**
|
|
23
|
+
* Record a real memory snapshot using OS-level APIs via getMemoryUsage().
|
|
24
|
+
*/
|
|
25
|
+
const recordMemorySnapshot = () => {
|
|
26
|
+
if (!tracker)
|
|
27
|
+
return;
|
|
28
|
+
try {
|
|
29
|
+
const usage = native.getMemoryUsage();
|
|
30
|
+
tracker.record({
|
|
31
|
+
timestamp: Date.now(),
|
|
32
|
+
nativeHeapBytes: usage.nativeHeapBytes,
|
|
33
|
+
residentBytes: usage.residentBytes,
|
|
34
|
+
availableMemoryBytes: usage.availableMemoryBytes,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// Ignore errors during memory tracking - it's non-critical
|
|
39
|
+
}
|
|
40
|
+
};
|
|
10
41
|
return {
|
|
11
42
|
...native,
|
|
12
|
-
|
|
43
|
+
memoryTracker: tracker,
|
|
44
|
+
loadModel: async (pathOrUrl, config, onDownloadProgress) => {
|
|
13
45
|
let modelPath = pathOrUrl;
|
|
14
|
-
// Check if it's a URL
|
|
46
|
+
// Check if it's a URL — enforce HTTPS for model downloads
|
|
15
47
|
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
48
|
+
if (pathOrUrl.startsWith("http://")) {
|
|
49
|
+
throw new Error("Insecure HTTP URLs are not allowed for model downloads. " +
|
|
50
|
+
"Use HTTPS instead: " +
|
|
51
|
+
pathOrUrl.replace("http://", "https://"));
|
|
52
|
+
}
|
|
16
53
|
// Extract filename from URL
|
|
17
54
|
const fileName = pathOrUrl.split("/").pop();
|
|
18
55
|
if (!fileName) {
|
|
@@ -20,21 +57,51 @@ function createLLM() {
|
|
|
20
57
|
}
|
|
21
58
|
console.log(`Checking model at ${pathOrUrl}...`);
|
|
22
59
|
modelPath = await native.downloadModel(pathOrUrl, fileName, (progress) => {
|
|
23
|
-
|
|
60
|
+
onDownloadProgress?.(progress);
|
|
24
61
|
});
|
|
25
62
|
console.log(`Model downloaded to: ${modelPath}`);
|
|
26
63
|
}
|
|
27
|
-
|
|
64
|
+
const result = await native.loadModel(modelPath, config);
|
|
65
|
+
// Record initial memory snapshot after model load
|
|
66
|
+
if (tracker) {
|
|
67
|
+
tracker.reset();
|
|
68
|
+
recordMemorySnapshot();
|
|
69
|
+
}
|
|
70
|
+
return result;
|
|
71
|
+
},
|
|
72
|
+
sendMessage: async (...args) => {
|
|
73
|
+
const result = await native.sendMessage(...args);
|
|
74
|
+
recordMemorySnapshot();
|
|
75
|
+
return result;
|
|
76
|
+
},
|
|
77
|
+
sendMessageAsync: (...args) => {
|
|
78
|
+
const [message, onToken] = args;
|
|
79
|
+
native.sendMessageAsync(message, (token, done) => {
|
|
80
|
+
onToken(token, done);
|
|
81
|
+
if (done) {
|
|
82
|
+
recordMemorySnapshot();
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
},
|
|
86
|
+
sendMessageWithImage: async (...args) => {
|
|
87
|
+
const result = await native.sendMessageWithImage(...args);
|
|
88
|
+
recordMemorySnapshot();
|
|
89
|
+
return result;
|
|
90
|
+
},
|
|
91
|
+
sendMessageWithAudio: async (...args) => {
|
|
92
|
+
const result = await native.sendMessageWithAudio(...args);
|
|
93
|
+
recordMemorySnapshot();
|
|
94
|
+
return result;
|
|
28
95
|
},
|
|
29
|
-
// Bind valid methods to native instance
|
|
30
|
-
sendMessage: native.sendMessage.bind(native),
|
|
31
|
-
sendMessageAsync: native.sendMessageAsync.bind(native),
|
|
32
|
-
sendMessageWithImage: native.sendMessageWithImage.bind(native),
|
|
33
|
-
sendMessageWithAudio: native.sendMessageWithAudio.bind(native),
|
|
34
96
|
getHistory: native.getHistory.bind(native),
|
|
35
|
-
resetConversation:
|
|
97
|
+
resetConversation: () => {
|
|
98
|
+
native.resetConversation();
|
|
99
|
+
// KV cache is cleared on reset, record the drop
|
|
100
|
+
recordMemorySnapshot();
|
|
101
|
+
},
|
|
36
102
|
isReady: native.isReady.bind(native),
|
|
37
103
|
getStats: native.getStats.bind(native),
|
|
104
|
+
getMemoryUsage: native.getMemoryUsage.bind(native),
|
|
38
105
|
close: native.close.bind(native),
|
|
39
106
|
downloadModel: native.downloadModel.bind(native),
|
|
40
107
|
deleteModel: native.deleteModel.bind(native),
|