react-native-litert-lm 0.3.7 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +153 -135
- package/android/build.gradle +12 -0
- package/android/src/main/AndroidManifest.xml +8 -0
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +276 -62
- package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
- package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
- package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +105 -0
- package/ios/HybridLiteRTLM.swift +1344 -0
- package/ios/Tests/HybridLiteRTLMTests.swift +113 -0
- package/lib/__mocks__/react-native-nitro-modules.d.ts +65 -0
- package/lib/__mocks__/react-native-nitro-modules.js +60 -0
- package/lib/__tests__/hooks.test.d.ts +1 -0
- package/lib/__tests__/hooks.test.js +124 -0
- package/lib/__tests__/memoryTracker.test.d.ts +1 -0
- package/lib/__tests__/memoryTracker.test.js +74 -0
- package/lib/__tests__/modelFactory.test.d.ts +1 -0
- package/lib/__tests__/modelFactory.test.js +68 -0
- package/lib/hooks.js +27 -3
- package/lib/index.d.ts +6 -2
- package/lib/index.js +8 -8
- package/lib/modelFactory.js +82 -63
- package/lib/specs/LiteRTLM.nitro.d.ts +87 -2
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -2
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +94 -9
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +5 -1
- package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
- package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
- package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
- package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +28 -2
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
- package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
- package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
- package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +240 -0
- package/nitrogen/generated/ios/swift/Backend.swift +44 -0
- package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
- package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
- package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +71 -0
- package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +431 -0
- package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
- package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
- package/nitrogen/generated/ios/swift/Message.swift +34 -0
- package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
- package/nitrogen/generated/ios/swift/PartType.swift +44 -0
- package/nitrogen/generated/ios/swift/Role.swift +44 -0
- package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +4 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +9 -2
- package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
- package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
- package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
- package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
- package/package.json +22 -11
- package/react-native-litert-lm.podspec +17 -19
- package/scripts/download-ios-frameworks.sh +17 -50
- package/scripts/framework-source.js +46 -0
- package/scripts/postinstall.js +40 -18
- package/src/__mocks__/react-native-nitro-modules.ts +58 -0
- package/src/__tests__/hooks.test.ts +153 -0
- package/src/__tests__/memoryTracker.test.ts +87 -0
- package/src/__tests__/modelFactory.test.ts +96 -0
- package/src/hooks.ts +29 -7
- package/src/index.ts +7 -10
- package/src/modelFactory.ts +104 -80
- package/src/specs/LiteRTLM.nitro.ts +106 -2
- package/cpp/HybridLiteRTLM.cpp +0 -939
- package/cpp/HybridLiteRTLM.hpp +0 -169
- package/cpp/IOSDownloadHelper.h +0 -24
- package/ios/IOSDownloadHelper.mm +0 -129
- package/scripts/build-ios-engine.sh +0 -302
- package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
- package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
- package/scripts/stubs/llguidance_stubs.c +0 -101
- package/src/templates.ts +0 -105
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { createMemoryTracker, createNativeBuffer } from '../memoryTracker';
|
|
2
|
+
import { NitroModules } from 'react-native-nitro-modules';
|
|
3
|
+
|
|
4
|
+
describe('MemoryTracker Unit Tests', () => {
|
|
5
|
+
beforeEach(() => {
|
|
6
|
+
jest.clearAllMocks();
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it('should allocate correct native-backed ArrayBuffer size on initialization', () => {
|
|
10
|
+
const tracker = createMemoryTracker(10);
|
|
11
|
+
expect(NitroModules.createNativeArrayBuffer).toHaveBeenCalledWith(10 * 4 * 8); // 10 snapshots * 4 fields * 8 bytes/Float64
|
|
12
|
+
expect(tracker.getCapacity()).toBe(10);
|
|
13
|
+
expect(tracker.getSnapshotCount()).toBe(0);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('should record snapshots correctly and retrieve them', () => {
|
|
17
|
+
const tracker = createMemoryTracker(5);
|
|
18
|
+
const snapshot1 = {
|
|
19
|
+
timestamp: 1000,
|
|
20
|
+
nativeHeapBytes: 100,
|
|
21
|
+
residentBytes: 200,
|
|
22
|
+
availableMemoryBytes: 500,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
expect(tracker.record(snapshot1)).toBe(true);
|
|
26
|
+
expect(tracker.getSnapshotCount()).toBe(1);
|
|
27
|
+
expect(tracker.getLatestSnapshot()).toEqual(snapshot1);
|
|
28
|
+
|
|
29
|
+
const snapshots = tracker.getSnapshots();
|
|
30
|
+
expect(snapshots).toHaveLength(1);
|
|
31
|
+
expect(snapshots[0]).toEqual(snapshot1);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('should reject new snapshots and return false when capacity is reached', () => {
|
|
35
|
+
const tracker = createMemoryTracker(2);
|
|
36
|
+
|
|
37
|
+
expect(tracker.record({ timestamp: 1, nativeHeapBytes: 10, residentBytes: 20, availableMemoryBytes: 50 })).toBe(true);
|
|
38
|
+
expect(tracker.record({ timestamp: 2, nativeHeapBytes: 20, residentBytes: 30, availableMemoryBytes: 40 })).toBe(true);
|
|
39
|
+
expect(tracker.record({ timestamp: 3, nativeHeapBytes: 30, residentBytes: 40, availableMemoryBytes: 30 })).toBe(false);
|
|
40
|
+
|
|
41
|
+
expect(tracker.getSnapshotCount()).toBe(2);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('should calculate correct peak resident memory size', () => {
|
|
45
|
+
const tracker = createMemoryTracker(5);
|
|
46
|
+
tracker.record({ timestamp: 1, nativeHeapBytes: 100, residentBytes: 150, availableMemoryBytes: 1000 });
|
|
47
|
+
tracker.record({ timestamp: 2, nativeHeapBytes: 120, residentBytes: 300, availableMemoryBytes: 1000 });
|
|
48
|
+
tracker.record({ timestamp: 3, nativeHeapBytes: 110, residentBytes: 200, availableMemoryBytes: 1000 });
|
|
49
|
+
|
|
50
|
+
expect(tracker.getPeakMemory()).toBe(300);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('should calculate accurate memory summary statistics', () => {
|
|
54
|
+
const tracker = createMemoryTracker(5);
|
|
55
|
+
tracker.record({ timestamp: 1, nativeHeapBytes: 50, residentBytes: 100, availableMemoryBytes: 1000 });
|
|
56
|
+
tracker.record({ timestamp: 2, nativeHeapBytes: 150, residentBytes: 300, availableMemoryBytes: 800 });
|
|
57
|
+
tracker.record({ timestamp: 3, nativeHeapBytes: 100, residentBytes: 200, availableMemoryBytes: 900 });
|
|
58
|
+
|
|
59
|
+
const summary = tracker.getSummary();
|
|
60
|
+
expect(summary.snapshotCount).toBe(3);
|
|
61
|
+
expect(summary.peakResidentBytes).toBe(300);
|
|
62
|
+
expect(summary.averageResidentBytes).toBe(200); // (100 + 300 + 200) / 3
|
|
63
|
+
expect(summary.currentResidentBytes).toBe(200);
|
|
64
|
+
expect(summary.peakNativeHeapBytes).toBe(150);
|
|
65
|
+
expect(summary.currentNativeHeapBytes).toBe(100);
|
|
66
|
+
expect(summary.residentDeltaBytes).toBe(100); // currentRss(200) - firstRss(100)
|
|
67
|
+
expect(summary.trackerBufferSizeBytes).toBe(5 * 4 * 8);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should preserve buffer but reset internal state when reset() is called', () => {
|
|
71
|
+
const tracker = createMemoryTracker(5);
|
|
72
|
+
tracker.record({ timestamp: 1, nativeHeapBytes: 50, residentBytes: 100, availableMemoryBytes: 1000 });
|
|
73
|
+
|
|
74
|
+
expect(tracker.getSnapshotCount()).toBe(1);
|
|
75
|
+
tracker.reset();
|
|
76
|
+
expect(tracker.getSnapshotCount()).toBe(0);
|
|
77
|
+
expect(tracker.getLatestSnapshot()).toBeUndefined();
|
|
78
|
+
expect(tracker.getSnapshots()).toEqual([]);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should allow standalone native ArrayBuffer allocation via createNativeBuffer', () => {
|
|
82
|
+
const size = 128;
|
|
83
|
+
const buffer = createNativeBuffer(size);
|
|
84
|
+
expect(NitroModules.createNativeArrayBuffer).toHaveBeenCalledWith(size);
|
|
85
|
+
expect(buffer.byteLength).toBe(size);
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { createLLM } from '../modelFactory';
|
|
2
|
+
import { mockLiteRTLM } from '../__mocks__/react-native-nitro-modules';
|
|
3
|
+
|
|
4
|
+
describe('modelFactory Security & Proxy Unit Tests', () => {
|
|
5
|
+
let llm: ReturnType<typeof createLLM>;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
jest.clearAllMocks();
|
|
9
|
+
llm = createLLM({ enableMemoryTracking: true });
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('should block insecure HTTP downloads', async () => {
|
|
13
|
+
await expect(llm.loadModel('http://example.com/model.litertlm'))
|
|
14
|
+
.rejects.toThrow('Insecure HTTP URLs are not allowed for model downloads');
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it('should allow secure HTTPS downloads and strip query parameters', async () => {
|
|
18
|
+
await llm.loadModel('https://example.com/model.litertlm?token=123');
|
|
19
|
+
|
|
20
|
+
expect(mockLiteRTLM.downloadModel).toHaveBeenCalledWith(
|
|
21
|
+
'https://example.com/model.litertlm?token=123',
|
|
22
|
+
'model.litertlm',
|
|
23
|
+
expect.any(Function)
|
|
24
|
+
);
|
|
25
|
+
expect(mockLiteRTLM.loadModel).toHaveBeenCalledWith('/mock/path/model.litertlm', undefined);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('should throw an error for invalid model URL', async () => {
|
|
29
|
+
await expect(llm.loadModel('https://example.com/'))
|
|
30
|
+
.rejects.toThrow('Invalid model URL: https://example.com/');
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('should successfully proxy sendMessage and record memory metrics', async () => {
|
|
34
|
+
const response = await llm.sendMessage("Test prompt");
|
|
35
|
+
|
|
36
|
+
expect(response).toBe("Mock response");
|
|
37
|
+
expect(mockLiteRTLM.sendMessage).toHaveBeenCalledWith("Test prompt");
|
|
38
|
+
expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
|
|
39
|
+
expect(llm.memoryTracker?.getSnapshotCount()).toBe(1); // sendMessage records one
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should successfully proxy resetConversation and record memory metrics', async () => {
|
|
43
|
+
await llm.resetConversation();
|
|
44
|
+
|
|
45
|
+
expect(mockLiteRTLM.resetConversation).toHaveBeenCalled();
|
|
46
|
+
expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should successfully proxy sendMessageAsync and record memory metrics when done', async () => {
|
|
50
|
+
const onToken = jest.fn();
|
|
51
|
+
await llm.sendMessageAsync("Async prompt", onToken);
|
|
52
|
+
|
|
53
|
+
expect(onToken).toHaveBeenCalledWith("Mock ", false);
|
|
54
|
+
expect(onToken).toHaveBeenCalledWith("token", true);
|
|
55
|
+
expect(mockLiteRTLM.sendMessageAsync).toHaveBeenCalled();
|
|
56
|
+
expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('should successfully proxy sendMessageWithImageAsync and record memory metrics when done', async () => {
|
|
60
|
+
const onToken = jest.fn();
|
|
61
|
+
await llm.sendMessageWithImageAsync("Vision prompt", "/path/to/image.jpg", onToken);
|
|
62
|
+
|
|
63
|
+
expect(onToken).toHaveBeenCalledWith("Mock vision ", false);
|
|
64
|
+
expect(onToken).toHaveBeenCalledWith("token", true);
|
|
65
|
+
expect(mockLiteRTLM.sendMessageWithImageAsync).toHaveBeenCalledWith(
|
|
66
|
+
"Vision prompt",
|
|
67
|
+
"/path/to/image.jpg",
|
|
68
|
+
expect.any(Function)
|
|
69
|
+
);
|
|
70
|
+
expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should successfully proxy sendMessageWithAudioAsync and record memory metrics when done', async () => {
|
|
74
|
+
const onToken = jest.fn();
|
|
75
|
+
await llm.sendMessageWithAudioAsync("Audio prompt", "/path/to/audio.wav", onToken);
|
|
76
|
+
|
|
77
|
+
expect(onToken).toHaveBeenCalledWith("Mock audio ", false);
|
|
78
|
+
expect(onToken).toHaveBeenCalledWith("token", true);
|
|
79
|
+
expect(mockLiteRTLM.sendMessageWithAudioAsync).toHaveBeenCalledWith(
|
|
80
|
+
"Audio prompt",
|
|
81
|
+
"/path/to/audio.wav",
|
|
82
|
+
expect.any(Function)
|
|
83
|
+
);
|
|
84
|
+
expect(mockLiteRTLM.getMemoryUsage).toHaveBeenCalled();
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('should successfully access memoryTracker and getSnapshots when memory tracking is enabled', () => {
|
|
88
|
+
expect(llm.memoryTracker).toBeDefined();
|
|
89
|
+
expect(llm.memoryTracker?.getCapacity()).toBe(256);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('should not initialize memoryTracker when enableMemoryTracking option is false', () => {
|
|
93
|
+
const untrackedLLM = createLLM({ enableMemoryTracking: false });
|
|
94
|
+
expect(untrackedLLM.memoryTracker).toBeUndefined();
|
|
95
|
+
});
|
|
96
|
+
});
|
package/src/hooks.ts
CHANGED
|
@@ -64,7 +64,8 @@ export function useModel(
|
|
|
64
64
|
const [isGenerating, setIsGenerating] = useState(false);
|
|
65
65
|
const [downloadProgress, setDownloadProgress] = useState(0);
|
|
66
66
|
const [error, setError] = useState<string | null>(null);
|
|
67
|
-
const [memorySummary, setMemorySummary] =
|
|
67
|
+
const [memorySummary, setMemorySummary] =
|
|
68
|
+
useState<MemoryTrackerSummary | null>(null);
|
|
68
69
|
|
|
69
70
|
// Destructure config into primitive values for stable dependency arrays.
|
|
70
71
|
// This prevents infinite re-render loops when consumers pass inline config
|
|
@@ -78,6 +79,11 @@ export function useModel(
|
|
|
78
79
|
const temperature = config?.temperature;
|
|
79
80
|
const topK = config?.topK;
|
|
80
81
|
const topP = config?.topP;
|
|
82
|
+
const validate = config?.validate;
|
|
83
|
+
const multimodal = config?.multimodal;
|
|
84
|
+
const tools = config?.tools;
|
|
85
|
+
const enableSpeculativeDecoding = config?.enableSpeculativeDecoding;
|
|
86
|
+
const toolsKey = tools ? JSON.stringify(tools) : undefined;
|
|
81
87
|
|
|
82
88
|
// Build a stable config object from the destructured primitives
|
|
83
89
|
const nativeConfig = useMemo<LLMConfig>(
|
|
@@ -88,8 +94,25 @@ export function useModel(
|
|
|
88
94
|
...(temperature !== undefined && { temperature }),
|
|
89
95
|
...(topK !== undefined && { topK }),
|
|
90
96
|
...(topP !== undefined && { topP }),
|
|
97
|
+
...(validate !== undefined && { validate }),
|
|
98
|
+
...(multimodal !== undefined && { multimodal }),
|
|
99
|
+
...(tools !== undefined && { tools }),
|
|
100
|
+
...(enableSpeculativeDecoding !== undefined && {
|
|
101
|
+
enableSpeculativeDecoding,
|
|
102
|
+
}),
|
|
91
103
|
}),
|
|
92
|
-
[
|
|
104
|
+
[
|
|
105
|
+
backend,
|
|
106
|
+
systemPrompt,
|
|
107
|
+
maxTokens,
|
|
108
|
+
temperature,
|
|
109
|
+
topK,
|
|
110
|
+
topP,
|
|
111
|
+
validate,
|
|
112
|
+
multimodal,
|
|
113
|
+
toolsKey,
|
|
114
|
+
enableSpeculativeDecoding,
|
|
115
|
+
],
|
|
93
116
|
);
|
|
94
117
|
|
|
95
118
|
/**
|
|
@@ -165,16 +188,15 @@ export function useModel(
|
|
|
165
188
|
return new Promise<string>((resolve, reject) => {
|
|
166
189
|
let fullResponse = "";
|
|
167
190
|
try {
|
|
168
|
-
modelRef.current
|
|
169
|
-
prompt,
|
|
170
|
-
(token: string, done: boolean) => {
|
|
191
|
+
modelRef.current
|
|
192
|
+
?.sendMessageAsync(prompt, (token: string, done: boolean) => {
|
|
171
193
|
fullResponse += token;
|
|
172
194
|
if (done) {
|
|
173
195
|
refreshMemorySummary();
|
|
174
196
|
resolve(fullResponse);
|
|
175
197
|
}
|
|
176
|
-
}
|
|
177
|
-
|
|
198
|
+
})
|
|
199
|
+
.catch(reject);
|
|
178
200
|
} catch (e: any) {
|
|
179
201
|
reject(e);
|
|
180
202
|
}
|
package/src/index.ts
CHANGED
|
@@ -20,13 +20,6 @@ export type {
|
|
|
20
20
|
MemoryUsage,
|
|
21
21
|
} from "./specs/LiteRTLM.nitro";
|
|
22
22
|
|
|
23
|
-
// Re-export template utilities
|
|
24
|
-
export type { ChatMessage } from "./templates";
|
|
25
|
-
export {
|
|
26
|
-
applyGemmaTemplate,
|
|
27
|
-
applyPhiTemplate,
|
|
28
|
-
applyLlamaTemplate,
|
|
29
|
-
} from "./templates";
|
|
30
23
|
|
|
31
24
|
// Re-export memory tracking utilities (uses NitroModules.createNativeArrayBuffer v0.35+)
|
|
32
25
|
export type {
|
|
@@ -158,6 +151,12 @@ export function checkBackendSupport(backend: Backend): string | undefined {
|
|
|
158
151
|
* Check if multimodal features (image/audio) are supported on the current platform.
|
|
159
152
|
* Returns an error message if not supported, undefined if OK.
|
|
160
153
|
*
|
|
154
|
+
* Both iOS (v0.12.0 CLiteRTLM xcframework) and Android (LiteRT-LM SDK) ship the
|
|
155
|
+
* vision/audio executor ops, so there is no platform-level block. Whether a
|
|
156
|
+
* given call succeeds depends on the **loaded model**: only multimodal models
|
|
157
|
+
* (e.g. Gemma 3n) bundle the vision/audio executors. Pass `multimodal: true` to
|
|
158
|
+
* `loadModel` for such models, or rely on filename sniffing ("3n"/"gemma3").
|
|
159
|
+
*
|
|
161
160
|
* @returns Error message if multimodal is not supported, undefined if OK
|
|
162
161
|
*
|
|
163
162
|
* @example
|
|
@@ -172,9 +171,7 @@ export function checkBackendSupport(backend: Backend): string | undefined {
|
|
|
172
171
|
* ```
|
|
173
172
|
*/
|
|
174
173
|
export function checkMultimodalSupport(): string | undefined {
|
|
175
|
-
|
|
176
|
-
return "Multimodal (image/audio) is not available on iOS. The XCFramework lacks compiled vision and audio executor ops.";
|
|
177
|
-
}
|
|
174
|
+
// Supported on both platforms with a multimodal model loaded.
|
|
178
175
|
return undefined;
|
|
179
176
|
}
|
|
180
177
|
|
package/src/modelFactory.ts
CHANGED
|
@@ -54,92 +54,116 @@ export function createLLM(options?: {
|
|
|
54
54
|
}
|
|
55
55
|
};
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"Insecure HTTP URLs are not allowed for model downloads. " +
|
|
72
|
-
"Use HTTPS instead: " +
|
|
73
|
-
pathOrUrl.replace("http://", "https://"),
|
|
74
|
-
);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// Extract filename from URL
|
|
78
|
-
const fileName = pathOrUrl.split("/").pop();
|
|
79
|
-
if (!fileName) {
|
|
80
|
-
throw new Error(`Invalid model URL: ${pathOrUrl}`);
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
console.log(`Checking model at ${pathOrUrl}...`);
|
|
84
|
-
modelPath = await native.downloadModel(
|
|
85
|
-
pathOrUrl,
|
|
86
|
-
fileName,
|
|
87
|
-
(progress) => {
|
|
88
|
-
onDownloadProgress?.(progress);
|
|
89
|
-
},
|
|
57
|
+
const augmentedLoadModel = async (
|
|
58
|
+
pathOrUrl: string,
|
|
59
|
+
config?: LLMConfig,
|
|
60
|
+
onDownloadProgress?: (progress: number) => void,
|
|
61
|
+
) => {
|
|
62
|
+
let modelPath = pathOrUrl;
|
|
63
|
+
|
|
64
|
+
// Check if it's a URL — enforce HTTPS for model downloads
|
|
65
|
+
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
66
|
+
if (pathOrUrl.startsWith("http://")) {
|
|
67
|
+
throw new Error(
|
|
68
|
+
"Insecure HTTP URLs are not allowed for model downloads. " +
|
|
69
|
+
"Use HTTPS instead: " +
|
|
70
|
+
pathOrUrl.replace("http://", "https://"),
|
|
90
71
|
);
|
|
91
|
-
console.log(`Model downloaded to: ${modelPath}`);
|
|
92
72
|
}
|
|
93
73
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if (
|
|
98
|
-
|
|
99
|
-
recordMemorySnapshot();
|
|
74
|
+
// Extract filename from URL, stripping query parameters
|
|
75
|
+
const urlWithoutQuery = pathOrUrl.split("?")[0];
|
|
76
|
+
const fileName = urlWithoutQuery.split("/").pop();
|
|
77
|
+
if (!fileName) {
|
|
78
|
+
throw new Error(`Invalid model URL: ${pathOrUrl}`);
|
|
100
79
|
}
|
|
101
80
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
81
|
+
console.log(`Checking model at ${pathOrUrl}...`);
|
|
82
|
+
modelPath = await native.downloadModel(
|
|
83
|
+
pathOrUrl,
|
|
84
|
+
fileName,
|
|
85
|
+
(progress) => {
|
|
86
|
+
onDownloadProgress?.(progress);
|
|
87
|
+
},
|
|
88
|
+
);
|
|
89
|
+
console.log(`Model downloaded to: ${modelPath}`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const result = await native.loadModel(modelPath, config);
|
|
93
|
+
|
|
94
|
+
// Record initial memory snapshot after model load
|
|
95
|
+
if (tracker) {
|
|
96
|
+
tracker.reset();
|
|
106
97
|
recordMemorySnapshot();
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return result;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const SNAPSHOT_TRIGGERS = new Set([
|
|
104
|
+
"sendMessage",
|
|
105
|
+
"sendMessageWithImage",
|
|
106
|
+
"sendMessageWithAudio",
|
|
107
|
+
"resetConversation",
|
|
108
|
+
]);
|
|
109
|
+
|
|
110
|
+
return new Proxy(native, {
|
|
111
|
+
get(target, prop, receiver) {
|
|
112
|
+
if (prop === "memoryTracker") {
|
|
113
|
+
return tracker;
|
|
114
|
+
}
|
|
115
|
+
if (prop === "loadModel") {
|
|
116
|
+
return augmentedLoadModel;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const original = Reflect.get(target, prop, receiver);
|
|
120
|
+
if (typeof original !== "function") {
|
|
121
|
+
return original;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (prop === "sendMessageAsync") {
|
|
125
|
+
return (message: string, onToken: (token: string, done: boolean) => void) => {
|
|
126
|
+
return original.call(target, message, (token: string, done: boolean) => {
|
|
127
|
+
onToken(token, done);
|
|
128
|
+
if (done) {
|
|
129
|
+
recordMemorySnapshot();
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (prop === "sendMessageWithImageAsync") {
|
|
136
|
+
return (message: string, imagePath: string, onToken: (token: string, done: boolean) => void) => {
|
|
137
|
+
return original.call(target, message, imagePath, (token: string, done: boolean) => {
|
|
138
|
+
onToken(token, done);
|
|
139
|
+
if (done) {
|
|
140
|
+
recordMemorySnapshot();
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (prop === "sendMessageWithAudioAsync") {
|
|
147
|
+
return (message: string, audioPath: string, onToken: (token: string, done: boolean) => void) => {
|
|
148
|
+
return original.call(target, message, audioPath, (token: string, done: boolean) => {
|
|
149
|
+
onToken(token, done);
|
|
150
|
+
if (done) {
|
|
151
|
+
recordMemorySnapshot();
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (SNAPSHOT_TRIGGERS.has(prop as string)) {
|
|
158
|
+
return async (...args: any[]) => {
|
|
159
|
+
const result = await original.apply(target, args);
|
|
114
160
|
recordMemorySnapshot();
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
) => {
|
|
121
|
-
const result = await native.sendMessageWithImage(...args);
|
|
122
|
-
recordMemorySnapshot();
|
|
123
|
-
return result;
|
|
124
|
-
},
|
|
125
|
-
sendMessageWithAudio: async (
|
|
126
|
-
...args: Parameters<typeof native.sendMessageWithAudio>
|
|
127
|
-
) => {
|
|
128
|
-
const result = await native.sendMessageWithAudio(...args);
|
|
129
|
-
recordMemorySnapshot();
|
|
130
|
-
return result;
|
|
131
|
-
},
|
|
132
|
-
getHistory: native.getHistory.bind(native),
|
|
133
|
-
resetConversation: () => {
|
|
134
|
-
native.resetConversation();
|
|
135
|
-
// KV cache is cleared on reset, record the drop
|
|
136
|
-
recordMemorySnapshot();
|
|
161
|
+
return result;
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return original.bind(target);
|
|
137
166
|
},
|
|
138
|
-
|
|
139
|
-
getStats: native.getStats.bind(native),
|
|
140
|
-
getMemoryUsage: native.getMemoryUsage.bind(native),
|
|
141
|
-
close: native.close.bind(native),
|
|
142
|
-
downloadModel: native.downloadModel.bind(native),
|
|
143
|
-
deleteModel: native.deleteModel.bind(native),
|
|
144
|
-
};
|
|
167
|
+
}) as unknown as LiteRTLMInstance;
|
|
145
168
|
}
|
|
169
|
+
|
|
@@ -17,6 +17,37 @@ export type Backend = "cpu" | "gpu" | "npu";
|
|
|
17
17
|
*/
|
|
18
18
|
export type Role = "user" | "model" | "system";
|
|
19
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Definition for a function/tool that the model can request to execute.
|
|
22
|
+
*/
|
|
23
|
+
export interface ToolDefinition {
|
|
24
|
+
/** Name of the function/tool */
|
|
25
|
+
name: string;
|
|
26
|
+
/** Human-readable description of what the function/tool does */
|
|
27
|
+
description: string;
|
|
28
|
+
/** JSON schema defining parameter names and types (stringified) */
|
|
29
|
+
parametersJson: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* The part type for a multimodal message content part.
|
|
34
|
+
*/
|
|
35
|
+
export type PartType = "text" | "image" | "audio";
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* A part of a unified multimodal message payload.
|
|
39
|
+
*/
|
|
40
|
+
export interface MultimodalPart {
|
|
41
|
+
/** The part type: 'text', 'image', or 'audio' */
|
|
42
|
+
type: PartType;
|
|
43
|
+
/** The plain text content, if type is 'text' */
|
|
44
|
+
text?: string;
|
|
45
|
+
/** Raw image binary data, if type is 'image' (zero-copy ArrayBuffer mapping) */
|
|
46
|
+
imageBuffer?: ArrayBuffer;
|
|
47
|
+
/** Raw audio binary data, if type is 'audio' (zero-copy ArrayBuffer mapping) */
|
|
48
|
+
audioBuffer?: ArrayBuffer;
|
|
49
|
+
}
|
|
50
|
+
|
|
20
51
|
/**
|
|
21
52
|
* Configuration options for loading an LLM.
|
|
22
53
|
*/
|
|
@@ -68,6 +99,41 @@ export interface LLMConfig {
|
|
|
68
99
|
* @default 0.95
|
|
69
100
|
*/
|
|
70
101
|
topP?: number;
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Whether to run engine validation after loading the model.
|
|
105
|
+
* When enabled, sends a quick test inference ("Hi") and waits up to 30s
|
|
106
|
+
* for a response to confirm the backend works. This is useful for GPU/NPU
|
|
107
|
+
* backends that may silently fail during inference (they can initialize
|
|
108
|
+
* without error but produce no tokens).
|
|
109
|
+
*
|
|
110
|
+
* Validation is **always a no-op on CPU** — the CPU backend is inherently
|
|
111
|
+
* reliable and never needs validation.
|
|
112
|
+
*
|
|
113
|
+
* Disabled by default because it adds significant latency (5-30s) to model loading.
|
|
114
|
+
* Enable only to catch GPU/NPU silent failure issues during development.
|
|
115
|
+
*
|
|
116
|
+
* @default false
|
|
117
|
+
*/
|
|
118
|
+
validate?: boolean;
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Whether this is a multimodal model.
|
|
122
|
+
* When enabled, the engine handles image/audio tokens properly.
|
|
123
|
+
* If not specified, the system will fall back to filename sniffing.
|
|
124
|
+
*/
|
|
125
|
+
multimodal?: boolean;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* List of tools/functions that the model can call.
|
|
129
|
+
*/
|
|
130
|
+
tools?: ToolDefinition[];
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Whether to enable speculative decoding (multi-token prediction) if supported by the model.
|
|
134
|
+
* @default false
|
|
135
|
+
*/
|
|
136
|
+
enableSpeculativeDecoding?: boolean;
|
|
71
137
|
}
|
|
72
138
|
|
|
73
139
|
/**
|
|
@@ -135,7 +201,7 @@ export interface MemoryUsage {
|
|
|
135
201
|
* ```
|
|
136
202
|
*/
|
|
137
203
|
export interface LiteRTLM extends HybridObject<{
|
|
138
|
-
ios: "
|
|
204
|
+
ios: "swift";
|
|
139
205
|
android: "kotlin";
|
|
140
206
|
}> {
|
|
141
207
|
/**
|
|
@@ -160,6 +226,19 @@ export interface LiteRTLM extends HybridObject<{
|
|
|
160
226
|
*/
|
|
161
227
|
sendMessageWithImage(message: string, imagePath: string): Promise<string>;
|
|
162
228
|
|
|
229
|
+
/**
|
|
230
|
+
* Send a text message with an image and get a streaming response.
|
|
231
|
+
* Tokens are delivered via callback as they are generated.
|
|
232
|
+
* @param message User message text.
|
|
233
|
+
* @param imagePath Absolute path to an image file.
|
|
234
|
+
* @param onToken Callback invoked for each token (token, isDone).
|
|
235
|
+
*/
|
|
236
|
+
sendMessageWithImageAsync(
|
|
237
|
+
message: string,
|
|
238
|
+
imagePath: string,
|
|
239
|
+
onToken: (token: string, done: boolean) => void,
|
|
240
|
+
): Promise<void>;
|
|
241
|
+
|
|
163
242
|
/**
|
|
164
243
|
* Download a model file from a URL.
|
|
165
244
|
* @param url URL to download from.
|
|
@@ -187,6 +266,26 @@ export interface LiteRTLM extends HybridObject<{
|
|
|
187
266
|
*/
|
|
188
267
|
sendMessageWithAudio(message: string, audioPath: string): Promise<string>;
|
|
189
268
|
|
|
269
|
+
/**
|
|
270
|
+
* Send a text message with audio and get a streaming response.
|
|
271
|
+
* Tokens are delivered via callback as they are generated.
|
|
272
|
+
* @param message User message text.
|
|
273
|
+
* @param audioPath Absolute path to an audio file (WAV).
|
|
274
|
+
* @param onToken Callback invoked for each token (token, isDone).
|
|
275
|
+
*/
|
|
276
|
+
sendMessageWithAudioAsync(
|
|
277
|
+
message: string,
|
|
278
|
+
audioPath: string,
|
|
279
|
+
onToken: (token: string, done: boolean) => void,
|
|
280
|
+
): Promise<void>;
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Send a unified multimodal message containing text and/or zero-copy binary buffers.
|
|
284
|
+
* @param parts The message content parts (text, image, and/or audio).
|
|
285
|
+
* @returns The model's response text.
|
|
286
|
+
*/
|
|
287
|
+
sendMultimodalMessage(parts: MultimodalPart[]): Promise<string>;
|
|
288
|
+
|
|
190
289
|
/**
|
|
191
290
|
* Send a message with streaming response.
|
|
192
291
|
* Tokens are delivered via callback as they are generated.
|
|
@@ -196,7 +295,7 @@ export interface LiteRTLM extends HybridObject<{
|
|
|
196
295
|
sendMessageAsync(
|
|
197
296
|
message: string,
|
|
198
297
|
onToken: (token: string, done: boolean) => void,
|
|
199
|
-
): void
|
|
298
|
+
): Promise<void>;
|
|
200
299
|
|
|
201
300
|
/**
|
|
202
301
|
* Get the current conversation history.
|
|
@@ -219,6 +318,11 @@ export interface LiteRTLM extends HybridObject<{
|
|
|
219
318
|
*/
|
|
220
319
|
getStats(): GenerationStats;
|
|
221
320
|
|
|
321
|
+
/**
|
|
322
|
+
* Count tokens in a text string. Returns -1 if unavailable.
|
|
323
|
+
*/
|
|
324
|
+
countTokens(text: string): number;
|
|
325
|
+
|
|
222
326
|
/**
|
|
223
327
|
* Get real memory usage from the native runtime.
|
|
224
328
|
* Uses OS-level APIs to report actual memory consumption.
|