react-native-litert-lm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +331 -150
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +577 -378
- package/cpp/HybridLiteRTLM.hpp +66 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/cpp/include/litert_lm_engine.h +502 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +33 -3
- package/lib/hooks.js +54 -23
- package/lib/index.d.ts +4 -1
- package/lib/index.js +6 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +21 -2
- package/lib/modelFactory.js +78 -11
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +283 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +107 -41
- package/src/index.ts +13 -6
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +107 -11
- package/src/specs/LiteRTLM.nitro.ts +21 -0
package/android/build.gradle
CHANGED
|
@@ -84,5 +84,5 @@ dependencies {
|
|
|
84
84
|
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
|
|
85
85
|
|
|
86
86
|
// LiteRT-LM Kotlin API
|
|
87
|
-
implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0
|
|
87
|
+
implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0'
|
|
88
88
|
}
|
|
@@ -6,6 +6,10 @@
|
|
|
6
6
|
package com.margelo.nitro.dev.litert.litertlm
|
|
7
7
|
|
|
8
8
|
import android.util.Log
|
|
9
|
+
import android.os.Debug
|
|
10
|
+
import android.app.ActivityManager
|
|
11
|
+
import android.content.Context
|
|
12
|
+
import java.util.Collections
|
|
9
13
|
import androidx.annotation.Keep
|
|
10
14
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
11
15
|
import dev.litert.litertlm.LiteRTLMInitProvider
|
|
@@ -27,6 +31,44 @@ import com.google.ai.edge.litertlm.Content
|
|
|
27
31
|
// Alias to avoid confusion
|
|
28
32
|
typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
|
|
29
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Named implementation of the LiteRT-LM MessageCallback for streaming inference.
|
|
36
|
+
*
|
|
37
|
+
* Extracted from the anonymous inline class in sendMessageAsync for testability.
|
|
38
|
+
* Accumulates response chunks, forwards tokens to JS, and appends the final
|
|
39
|
+
* response to the conversation history.
|
|
40
|
+
*/
|
|
41
|
+
internal class StreamingCallbackListener(
|
|
42
|
+
private val onToken: (String, Boolean) -> Unit,
|
|
43
|
+
private val responseBuilder: StringBuilder,
|
|
44
|
+
private val history: MutableList<Message>,
|
|
45
|
+
) : com.google.ai.edge.litertlm.MessageCallback {
|
|
46
|
+
|
|
47
|
+
override fun onMessage(responseMsg: com.google.ai.edge.litertlm.LiteRTMessage) {
|
|
48
|
+
val chunk = responseMsg.contents
|
|
49
|
+
.filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
|
|
50
|
+
.joinToString("") { it.text }
|
|
51
|
+
|
|
52
|
+
onToken(chunk, false)
|
|
53
|
+
|
|
54
|
+
if (chunk.isNotEmpty()) {
|
|
55
|
+
responseBuilder.append(chunk)
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
override fun onDone() {
|
|
60
|
+
onToken("", true)
|
|
61
|
+
val fullResponse = responseBuilder.toString()
|
|
62
|
+
history.add(Message(Role.MODEL, fullResponse))
|
|
63
|
+
Log.d("StreamingCallbackListener", "Streaming done. Length: ${fullResponse.length}")
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
override fun onError(t: Throwable) {
|
|
67
|
+
Log.e("StreamingCallbackListener", "Async generation failed", t)
|
|
68
|
+
onToken("Error: ${t.message}", true)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
30
72
|
/**
|
|
31
73
|
* Kotlin implementation of LiteRTLM using the LiteRT-LM Android SDK.
|
|
32
74
|
* This class bridges between React Native (via Nitro) and the Google LiteRT-LM Engine.
|
|
@@ -65,7 +107,10 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
65
107
|
private var isClosed = false
|
|
66
108
|
|
|
67
109
|
// Conversation history for getHistory()
|
|
68
|
-
|
|
110
|
+
// Synchronized to prevent ConcurrentModificationException: history is
|
|
111
|
+
// written from Promise.parallel workers and sendMessageAsync SDK callbacks,
|
|
112
|
+
// and read from getHistory() which may be called from the JS thread.
|
|
113
|
+
private val history: MutableList<Message> = Collections.synchronizedList(mutableListOf())
|
|
69
114
|
|
|
70
115
|
// Last generation stats
|
|
71
116
|
private var lastStats = GenerationStats(
|
|
@@ -83,6 +128,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
83
128
|
private var topK: Int = 40
|
|
84
129
|
private var topP: Double = 0.95
|
|
85
130
|
private var maxTokens: Int = 1024
|
|
131
|
+
private var systemPrompt: String? = null
|
|
86
132
|
|
|
87
133
|
override val memorySize: Long
|
|
88
134
|
get() = 1024L * 1024L * 1024L // ~1GB (models are large)
|
|
@@ -111,6 +157,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
111
157
|
cfg.topK?.let { topK = it.toInt() }
|
|
112
158
|
cfg.topP?.let { topP = it }
|
|
113
159
|
cfg.maxTokens?.let { maxTokens = it.toInt() }
|
|
160
|
+
cfg.systemPrompt?.let { systemPrompt = it }
|
|
114
161
|
}
|
|
115
162
|
|
|
116
163
|
try {
|
|
@@ -182,7 +229,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
182
229
|
|
|
183
230
|
// Blocking inference (safe here because we are in Promise.parallel worker thread)
|
|
184
231
|
val userMsg = LiteRTMessage.of(message)
|
|
232
|
+
val startTime = System.nanoTime()
|
|
185
233
|
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
234
|
+
val elapsedMs = (System.nanoTime() - startTime) / 1_000_000.0
|
|
186
235
|
|
|
187
236
|
// Extract text
|
|
188
237
|
val response = responseMsg.contents
|
|
@@ -192,14 +241,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
192
241
|
// Add model response to history
|
|
193
242
|
history.add(Message(Role.MODEL, response))
|
|
194
243
|
|
|
195
|
-
// Update stats
|
|
244
|
+
// Update stats with real timing data
|
|
245
|
+
val promptTokens = message.length / 4.0
|
|
246
|
+
val completionTokens = response.length / 4.0
|
|
196
247
|
lastStats = GenerationStats(
|
|
197
|
-
promptTokens =
|
|
198
|
-
completionTokens =
|
|
199
|
-
totalTokens =
|
|
200
|
-
timeToFirstToken = 0.0,
|
|
201
|
-
totalTime =
|
|
202
|
-
tokensPerSecond = 0.0
|
|
248
|
+
promptTokens = promptTokens,
|
|
249
|
+
completionTokens = completionTokens,
|
|
250
|
+
totalTokens = promptTokens + completionTokens,
|
|
251
|
+
timeToFirstToken = 0.0, // Not available from sync API
|
|
252
|
+
totalTime = elapsedMs,
|
|
253
|
+
tokensPerSecond = if (elapsedMs > 0) completionTokens / (elapsedMs / 1000.0) else 0.0
|
|
203
254
|
)
|
|
204
255
|
|
|
205
256
|
response // Return the string
|
|
@@ -221,38 +272,17 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
221
272
|
|
|
222
273
|
val fullResponseBuilder = StringBuilder()
|
|
223
274
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
.joinToString("") { it.text }
|
|
230
|
-
|
|
231
|
-
onToken(chunk, false)
|
|
232
|
-
|
|
233
|
-
if (chunk.isNotEmpty()) {
|
|
234
|
-
fullResponseBuilder.append(chunk)
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
override fun onDone() {
|
|
239
|
-
onToken("", true)
|
|
240
|
-
val fullResponse = fullResponseBuilder.toString()
|
|
241
|
-
history.add(Message(Role.MODEL, fullResponse))
|
|
242
|
-
Log.d(TAG, "sendMessageAsync done. Length: ${fullResponse.length}")
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
override fun onError(t: Throwable) {
|
|
246
|
-
Log.e(TAG, "Async generation failed", t)
|
|
247
|
-
onToken("Error: ${t.message}", true)
|
|
248
|
-
}
|
|
249
|
-
}
|
|
275
|
+
val listener = StreamingCallbackListener(
|
|
276
|
+
onToken = onToken,
|
|
277
|
+
responseBuilder = fullResponseBuilder,
|
|
278
|
+
history = history,
|
|
279
|
+
)
|
|
250
280
|
|
|
251
281
|
try {
|
|
252
282
|
val userMsg = LiteRTMessage.of(message)
|
|
253
283
|
conversation!!.sendMessageAsync(userMsg, listener)
|
|
254
284
|
} catch (e: Exception) {
|
|
255
|
-
Log.e(TAG, "Failed
|
|
285
|
+
Log.e(TAG, "Failed to initiate async generation", e)
|
|
256
286
|
onToken("Error: ${e.message}", true)
|
|
257
287
|
}
|
|
258
288
|
}
|
|
@@ -330,6 +360,15 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
330
360
|
.joinToString("") { it.text }
|
|
331
361
|
|
|
332
362
|
history.add(Message(Role.MODEL, response))
|
|
363
|
+
|
|
364
|
+
// Clean up temp resized image to prevent cache dir bloat
|
|
365
|
+
if (processedImagePath != imagePath) {
|
|
366
|
+
try {
|
|
367
|
+
java.io.File(processedImagePath).delete()
|
|
368
|
+
} catch (e: Exception) {
|
|
369
|
+
Log.w(TAG, "Failed to clean up temp image: ${e.message}")
|
|
370
|
+
}
|
|
371
|
+
}
|
|
333
372
|
|
|
334
373
|
response
|
|
335
374
|
}
|
|
@@ -476,11 +515,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
476
515
|
// Helpers
|
|
477
516
|
// -------------------------------------------------------------------------
|
|
478
517
|
override fun getHistory(): Array<Message> {
|
|
479
|
-
|
|
518
|
+
// Synchronized list requires manual sync for iteration/copy
|
|
519
|
+
synchronized(history) {
|
|
520
|
+
return history.toTypedArray()
|
|
521
|
+
}
|
|
480
522
|
}
|
|
481
523
|
|
|
482
524
|
override fun resetConversation() {
|
|
483
|
-
history
|
|
525
|
+
synchronized(history) {
|
|
526
|
+
history.clear()
|
|
527
|
+
}
|
|
484
528
|
createNewConversation()
|
|
485
529
|
}
|
|
486
530
|
|
|
@@ -496,6 +540,50 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
496
540
|
return lastStats
|
|
497
541
|
}
|
|
498
542
|
|
|
543
|
+
override fun getMemoryUsage(): MemoryUsage {
|
|
544
|
+
// Native heap: allocated bytes from Debug APIs (most accurate for native allocations)
|
|
545
|
+
val nativeHeapBytes = Debug.getNativeHeapAllocatedSize().toDouble()
|
|
546
|
+
|
|
547
|
+
// Process RSS: read from /proc/self/status (VmRSS) in kB
|
|
548
|
+
var residentBytes = 0.0
|
|
549
|
+
try {
|
|
550
|
+
java.io.File("/proc/self/status").forEachLine { line ->
|
|
551
|
+
if (line.startsWith("VmRSS:")) {
|
|
552
|
+
val kb = line.substringAfter("VmRSS:").trim().split("\\s+".toRegex())[0].toDoubleOrNull()
|
|
553
|
+
if (kb != null) {
|
|
554
|
+
residentBytes = kb * 1024.0
|
|
555
|
+
}
|
|
556
|
+
return@forEachLine
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
} catch (e: Exception) {
|
|
560
|
+
Log.w(TAG, "Failed to read /proc/self/status: ${e.message}")
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// Available memory and low-memory flag from ActivityManager
|
|
564
|
+
var availableMemoryBytes = 0.0
|
|
565
|
+
var isLowMemory = false
|
|
566
|
+
try {
|
|
567
|
+
val context = LiteRTLMInitProvider.applicationContext
|
|
568
|
+
if (context != null) {
|
|
569
|
+
val activityManager = context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager
|
|
570
|
+
val memInfo = ActivityManager.MemoryInfo()
|
|
571
|
+
activityManager.getMemoryInfo(memInfo)
|
|
572
|
+
availableMemoryBytes = memInfo.availMem.toDouble()
|
|
573
|
+
isLowMemory = memInfo.lowMemory
|
|
574
|
+
}
|
|
575
|
+
} catch (e: Exception) {
|
|
576
|
+
Log.w(TAG, "Failed to get ActivityManager memory info: ${e.message}")
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
return MemoryUsage(
|
|
580
|
+
nativeHeapBytes = nativeHeapBytes,
|
|
581
|
+
residentBytes = residentBytes,
|
|
582
|
+
availableMemoryBytes = availableMemoryBytes,
|
|
583
|
+
isLowMemory = isLowMemory
|
|
584
|
+
)
|
|
585
|
+
}
|
|
586
|
+
|
|
499
587
|
override fun close() {
|
|
500
588
|
Log.d(TAG, "Closing resources")
|
|
501
589
|
isClosed = true
|
|
@@ -533,6 +621,21 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
533
621
|
ensureLoaded()
|
|
534
622
|
// Dispose old conversation if needed
|
|
535
623
|
conversation = engine!!.createConversation()
|
|
624
|
+
// Apply system prompt/instruction if set
|
|
625
|
+
systemPrompt?.let { prompt ->
|
|
626
|
+
if (prompt.isNotEmpty()) {
|
|
627
|
+
try {
|
|
628
|
+
// Send system instruction as the first turn to prime the conversation.
|
|
629
|
+
// LiteRT-LM's Conversation API handles chat template formatting,
|
|
630
|
+
// including Gemma's <start_of_turn>system block.
|
|
631
|
+
val systemMsg = LiteRTMessage.of(listOf(Content.Text(prompt)))
|
|
632
|
+
conversation!!.sendMessage(systemMsg)
|
|
633
|
+
Log.i(TAG, "System prompt applied (${prompt.length} chars)")
|
|
634
|
+
} catch (e: Exception) {
|
|
635
|
+
Log.w(TAG, "Failed to apply system prompt: ${e.message}")
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
536
639
|
}
|
|
537
640
|
|
|
538
641
|
|
package/app.plugin.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Expo config plugin for react-native-litert-lm.
|
|
3
|
+
*
|
|
4
|
+
* Ensures correct build settings for the LiteRT-LM native module:
|
|
5
|
+
* - Android: minSdkVersion 26, arm64-v8a ABI filter
|
|
6
|
+
* - iOS: deployment target 15.0
|
|
7
|
+
*/
|
|
8
|
+
const { withGradleProperties, withXcodeProject } = require('@expo/config-plugins');
|
|
9
|
+
|
|
10
|
+
function withLiteRTLM(config) {
|
|
11
|
+
// Android: Ensure minSdkVersion is at least 26
|
|
12
|
+
config = withGradleProperties(config, (config) => {
|
|
13
|
+
const props = config.modResults;
|
|
14
|
+
|
|
15
|
+
// Set minSdkVersion if not already high enough
|
|
16
|
+
const minSdkProp = props.find((p) => p.key === 'android.minSdkVersion');
|
|
17
|
+
if (!minSdkProp) {
|
|
18
|
+
props.push({
|
|
19
|
+
type: 'property',
|
|
20
|
+
key: 'android.minSdkVersion',
|
|
21
|
+
value: '26',
|
|
22
|
+
});
|
|
23
|
+
} else if (parseInt(minSdkProp.value, 10) < 26) {
|
|
24
|
+
minSdkProp.value = '26';
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return config;
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
return config;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
module.exports = withLiteRTLM;
|