react-native-litert-lm 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +331 -150
  2. package/android/build.gradle +1 -1
  3. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
  4. package/app.plugin.js +33 -0
  5. package/cpp/HybridLiteRTLM.cpp +577 -378
  6. package/cpp/HybridLiteRTLM.hpp +66 -23
  7. package/cpp/IOSDownloadHelper.h +24 -0
  8. package/cpp/cpp-adapter.cpp +10 -2
  9. package/cpp/include/litert_lm_engine.h +502 -0
  10. package/ios/IOSDownloadHelper.mm +129 -0
  11. package/ios/LiteRTLMAutolinking.mm +30 -0
  12. package/lib/hooks.d.ts +33 -3
  13. package/lib/hooks.js +54 -23
  14. package/lib/index.d.ts +4 -1
  15. package/lib/index.js +6 -6
  16. package/lib/memoryTracker.d.ts +128 -0
  17. package/lib/memoryTracker.js +155 -0
  18. package/lib/modelFactory.d.ts +21 -2
  19. package/lib/modelFactory.js +78 -11
  20. package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
  21. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
  22. package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
  23. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
  25. package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
  26. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
  27. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
  28. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
  29. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
  30. package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
  31. package/package.json +12 -5
  32. package/react-native-litert-lm.podspec +20 -7
  33. package/scripts/build-ios-engine.sh +283 -0
  34. package/scripts/download-ios-frameworks.sh +72 -0
  35. package/scripts/postinstall.js +116 -0
  36. package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
  37. package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
  38. package/scripts/stubs/llguidance_stubs.c +101 -0
  39. package/src/hooks.ts +107 -41
  40. package/src/index.ts +13 -6
  41. package/src/memoryTracker.ts +268 -0
  42. package/src/modelFactory.ts +107 -11
  43. package/src/specs/LiteRTLM.nitro.ts +21 -0
@@ -84,5 +84,5 @@ dependencies {
84
84
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
85
85
 
86
86
  // LiteRT-LM Kotlin API
87
- implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0-alpha01'
87
+ implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0'
88
88
  }
@@ -6,6 +6,10 @@
6
6
  package com.margelo.nitro.dev.litert.litertlm
7
7
 
8
8
  import android.util.Log
9
+ import android.os.Debug
10
+ import android.app.ActivityManager
11
+ import android.content.Context
12
+ import java.util.Collections
9
13
  import androidx.annotation.Keep
10
14
  import com.facebook.proguard.annotations.DoNotStrip
11
15
  import dev.litert.litertlm.LiteRTLMInitProvider
@@ -27,6 +31,44 @@ import com.google.ai.edge.litertlm.Content
27
31
  // Alias to avoid confusion
28
32
  typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
29
33
 
34
+ /**
35
+ * Named implementation of the LiteRT-LM MessageCallback for streaming inference.
36
+ *
37
+ * Extracted from the anonymous inline class in sendMessageAsync for testability.
38
+ * Accumulates response chunks, forwards tokens to JS, and appends the final
39
+ * response to the conversation history.
40
+ */
41
+ internal class StreamingCallbackListener(
42
+ private val onToken: (String, Boolean) -> Unit,
43
+ private val responseBuilder: StringBuilder,
44
+ private val history: MutableList<Message>,
45
+ ) : com.google.ai.edge.litertlm.MessageCallback {
46
+
47
+ override fun onMessage(responseMsg: com.google.ai.edge.litertlm.LiteRTMessage) {
48
+ val chunk = responseMsg.contents
49
+ .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
50
+ .joinToString("") { it.text }
51
+
52
+ onToken(chunk, false)
53
+
54
+ if (chunk.isNotEmpty()) {
55
+ responseBuilder.append(chunk)
56
+ }
57
+ }
58
+
59
+ override fun onDone() {
60
+ onToken("", true)
61
+ val fullResponse = responseBuilder.toString()
62
+ history.add(Message(Role.MODEL, fullResponse))
63
+ Log.d("StreamingCallbackListener", "Streaming done. Length: ${fullResponse.length}")
64
+ }
65
+
66
+ override fun onError(t: Throwable) {
67
+ Log.e("StreamingCallbackListener", "Async generation failed", t)
68
+ onToken("Error: ${t.message}", true)
69
+ }
70
+ }
71
+
30
72
  /**
31
73
  * Kotlin implementation of LiteRTLM using the LiteRT-LM Android SDK.
32
74
  * This class bridges between React Native (via Nitro) and the Google LiteRT-LM Engine.
@@ -65,7 +107,10 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
65
107
  private var isClosed = false
66
108
 
67
109
  // Conversation history for getHistory()
68
- private val history = mutableListOf<Message>()
110
+ // Synchronized to prevent ConcurrentModificationException: history is
111
+ // written from Promise.parallel workers and sendMessageAsync SDK callbacks,
112
+ // and read from getHistory() which may be called from the JS thread.
113
+ private val history: MutableList<Message> = Collections.synchronizedList(mutableListOf())
69
114
 
70
115
  // Last generation stats
71
116
  private var lastStats = GenerationStats(
@@ -83,6 +128,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
83
128
  private var topK: Int = 40
84
129
  private var topP: Double = 0.95
85
130
  private var maxTokens: Int = 1024
131
+ private var systemPrompt: String? = null
86
132
 
87
133
  override val memorySize: Long
88
134
  get() = 1024L * 1024L * 1024L // ~1GB (models are large)
@@ -111,6 +157,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
111
157
  cfg.topK?.let { topK = it.toInt() }
112
158
  cfg.topP?.let { topP = it }
113
159
  cfg.maxTokens?.let { maxTokens = it.toInt() }
160
+ cfg.systemPrompt?.let { systemPrompt = it }
114
161
  }
115
162
 
116
163
  try {
@@ -182,7 +229,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
182
229
 
183
230
  // Blocking inference (safe here because we are in Promise.parallel worker thread)
184
231
  val userMsg = LiteRTMessage.of(message)
232
+ val startTime = System.nanoTime()
185
233
  val responseMsg = conversation!!.sendMessage(userMsg)
234
+ val elapsedMs = (System.nanoTime() - startTime) / 1_000_000.0
186
235
 
187
236
  // Extract text
188
237
  val response = responseMsg.contents
@@ -192,14 +241,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
192
241
  // Add model response to history
193
242
  history.add(Message(Role.MODEL, response))
194
243
 
195
- // Update stats
244
+ // Update stats with real timing data
245
+ val promptTokens = message.length / 4.0
246
+ val completionTokens = response.length / 4.0
196
247
  lastStats = GenerationStats(
197
- promptTokens = message.length / 4.0,
198
- completionTokens = response.length / 4.0,
199
- totalTokens = (message.length + response.length) / 4.0,
200
- timeToFirstToken = 0.0,
201
- totalTime = 0.0,
202
- tokensPerSecond = 0.0
248
+ promptTokens = promptTokens,
249
+ completionTokens = completionTokens,
250
+ totalTokens = promptTokens + completionTokens,
251
+ timeToFirstToken = 0.0, // Not available from sync API
252
+ totalTime = elapsedMs,
253
+ tokensPerSecond = if (elapsedMs > 0) completionTokens / (elapsedMs / 1000.0) else 0.0
203
254
  )
204
255
 
205
256
  response // Return the string
@@ -221,38 +272,17 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
221
272
 
222
273
  val fullResponseBuilder = StringBuilder()
223
274
 
224
- // Define callback
225
- val listener = object : com.google.ai.edge.litertlm.MessageCallback {
226
- override fun onMessage(responseMsg: LiteRTMessage) {
227
- val chunk = responseMsg.contents
228
- .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
229
- .joinToString("") { it.text }
230
-
231
- onToken(chunk, false)
232
-
233
- if (chunk.isNotEmpty()) {
234
- fullResponseBuilder.append(chunk)
235
- }
236
- }
237
-
238
- override fun onDone() {
239
- onToken("", true)
240
- val fullResponse = fullResponseBuilder.toString()
241
- history.add(Message(Role.MODEL, fullResponse))
242
- Log.d(TAG, "sendMessageAsync done. Length: ${fullResponse.length}")
243
- }
244
-
245
- override fun onError(t: Throwable) {
246
- Log.e(TAG, "Async generation failed", t)
247
- onToken("Error: ${t.message}", true)
248
- }
249
- }
275
+ val listener = StreamingCallbackListener(
276
+ onToken = onToken,
277
+ responseBuilder = fullResponseBuilder,
278
+ history = history,
279
+ )
250
280
 
251
281
  try {
252
282
  val userMsg = LiteRTMessage.of(message)
253
283
  conversation!!.sendMessageAsync(userMsg, listener)
254
284
  } catch (e: Exception) {
255
- Log.e(TAG, "Failed into initiate async generation", e)
285
+ Log.e(TAG, "Failed to initiate async generation", e)
256
286
  onToken("Error: ${e.message}", true)
257
287
  }
258
288
  }
@@ -330,6 +360,15 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
330
360
  .joinToString("") { it.text }
331
361
 
332
362
  history.add(Message(Role.MODEL, response))
363
+
364
+ // Clean up temp resized image to prevent cache dir bloat
365
+ if (processedImagePath != imagePath) {
366
+ try {
367
+ java.io.File(processedImagePath).delete()
368
+ } catch (e: Exception) {
369
+ Log.w(TAG, "Failed to clean up temp image: ${e.message}")
370
+ }
371
+ }
333
372
 
334
373
  response
335
374
  }
@@ -476,11 +515,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
476
515
  // Helpers
477
516
  // -------------------------------------------------------------------------
478
517
  override fun getHistory(): Array<Message> {
479
- return history.toTypedArray()
518
+ // Synchronized list requires manual sync for iteration/copy
519
+ synchronized(history) {
520
+ return history.toTypedArray()
521
+ }
480
522
  }
481
523
 
482
524
  override fun resetConversation() {
483
- history.clear()
525
+ synchronized(history) {
526
+ history.clear()
527
+ }
484
528
  createNewConversation()
485
529
  }
486
530
 
@@ -496,6 +540,50 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
496
540
  return lastStats
497
541
  }
498
542
 
543
+ override fun getMemoryUsage(): MemoryUsage {
544
+ // Native heap: allocated bytes from Debug APIs (most accurate for native allocations)
545
+ val nativeHeapBytes = Debug.getNativeHeapAllocatedSize().toDouble()
546
+
547
+ // Process RSS: read from /proc/self/status (VmRSS) in kB
548
+ var residentBytes = 0.0
549
+ try {
550
+ java.io.File("/proc/self/status").forEachLine { line ->
551
+ if (line.startsWith("VmRSS:")) {
552
+ val kb = line.substringAfter("VmRSS:").trim().split("\\s+".toRegex())[0].toDoubleOrNull()
553
+ if (kb != null) {
554
+ residentBytes = kb * 1024.0
555
+ }
556
+ return@forEachLine
557
+ }
558
+ }
559
+ } catch (e: Exception) {
560
+ Log.w(TAG, "Failed to read /proc/self/status: ${e.message}")
561
+ }
562
+
563
+ // Available memory and low-memory flag from ActivityManager
564
+ var availableMemoryBytes = 0.0
565
+ var isLowMemory = false
566
+ try {
567
+ val context = LiteRTLMInitProvider.applicationContext
568
+ if (context != null) {
569
+ val activityManager = context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager
570
+ val memInfo = ActivityManager.MemoryInfo()
571
+ activityManager.getMemoryInfo(memInfo)
572
+ availableMemoryBytes = memInfo.availMem.toDouble()
573
+ isLowMemory = memInfo.lowMemory
574
+ }
575
+ } catch (e: Exception) {
576
+ Log.w(TAG, "Failed to get ActivityManager memory info: ${e.message}")
577
+ }
578
+
579
+ return MemoryUsage(
580
+ nativeHeapBytes = nativeHeapBytes,
581
+ residentBytes = residentBytes,
582
+ availableMemoryBytes = availableMemoryBytes,
583
+ isLowMemory = isLowMemory
584
+ )
585
+ }
586
+
499
587
  override fun close() {
500
588
  Log.d(TAG, "Closing resources")
501
589
  isClosed = true
@@ -533,6 +621,21 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
533
621
  ensureLoaded()
534
622
  // Dispose old conversation if needed
535
623
  conversation = engine!!.createConversation()
624
+ // Apply system prompt/instruction if set
625
+ systemPrompt?.let { prompt ->
626
+ if (prompt.isNotEmpty()) {
627
+ try {
628
+ // Send system instruction as the first turn to prime the conversation.
629
+ // LiteRT-LM's Conversation API handles chat template formatting,
630
+ // including Gemma's <start_of_turn>system block.
631
+ val systemMsg = LiteRTMessage.of(listOf(Content.Text(prompt)))
632
+ conversation!!.sendMessage(systemMsg)
633
+ Log.i(TAG, "System prompt applied (${prompt.length} chars)")
634
+ } catch (e: Exception) {
635
+ Log.w(TAG, "Failed to apply system prompt: ${e.message}")
636
+ }
637
+ }
638
+ }
536
639
  }
537
640
 
538
641
 
package/app.plugin.js ADDED
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Expo config plugin for react-native-litert-lm.
3
+ *
4
+ * Ensures correct build settings for the LiteRT-LM native module:
5
+ * - Android: minSdkVersion 26, arm64-v8a ABI filter
6
+ * - iOS: deployment target 15.0
7
+ */
8
+ const { withGradleProperties, withXcodeProject } = require('@expo/config-plugins');
9
+
10
+ function withLiteRTLM(config) {
11
+ // Android: Ensure minSdkVersion is at least 26
12
+ config = withGradleProperties(config, (config) => {
13
+ const props = config.modResults;
14
+
15
+ // Set minSdkVersion if not already high enough
16
+ const minSdkProp = props.find((p) => p.key === 'android.minSdkVersion');
17
+ if (!minSdkProp) {
18
+ props.push({
19
+ type: 'property',
20
+ key: 'android.minSdkVersion',
21
+ value: '26',
22
+ });
23
+ } else if (parseInt(minSdkProp.value, 10) < 26) {
24
+ minSdkProp.value = '26';
25
+ }
26
+
27
+ return config;
28
+ });
29
+
30
+ return config;
31
+ }
32
+
33
+ module.exports = withLiteRTLM;