react-native-litert-lm 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +269 -186
  2. package/android/build.gradle +1 -1
  3. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
  4. package/app.plugin.js +33 -0
  5. package/cpp/HybridLiteRTLM.cpp +604 -450
  6. package/cpp/HybridLiteRTLM.hpp +54 -23
  7. package/cpp/IOSDownloadHelper.h +24 -0
  8. package/cpp/cpp-adapter.cpp +2 -2
  9. package/cpp/include/litert_lm_engine.h +509 -0
  10. package/ios/IOSDownloadHelper.mm +129 -0
  11. package/ios/LiteRTLMAutolinking.mm +30 -0
  12. package/lib/hooks.d.ts +9 -4
  13. package/lib/hooks.js +34 -20
  14. package/lib/index.d.ts +1 -0
  15. package/lib/index.js +2 -5
  16. package/lib/memoryTracker.d.ts +1 -1
  17. package/lib/memoryTracker.js +1 -1
  18. package/lib/modelFactory.d.ts +11 -5
  19. package/lib/modelFactory.js +9 -4
  20. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
  21. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
  22. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
  23. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
  24. package/package.json +12 -5
  25. package/react-native-litert-lm.podspec +20 -7
  26. package/scripts/build-ios-engine.sh +302 -0
  27. package/scripts/download-ios-frameworks.sh +72 -0
  28. package/scripts/postinstall.js +116 -0
  29. package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
  30. package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
  31. package/scripts/stubs/llguidance_stubs.c +101 -0
  32. package/src/hooks.ts +62 -39
  33. package/src/index.ts +4 -7
  34. package/src/memoryTracker.ts +1 -1
  35. package/src/modelFactory.ts +30 -5
@@ -84,5 +84,5 @@ dependencies {
84
84
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
85
85
 
86
86
  // LiteRT-LM Kotlin API
87
- implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0-alpha01'
87
+ implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0'
88
88
  }
@@ -9,6 +9,7 @@ import android.util.Log
9
9
  import android.os.Debug
10
10
  import android.app.ActivityManager
11
11
  import android.content.Context
12
+ import java.util.Collections
12
13
  import androidx.annotation.Keep
13
14
  import com.facebook.proguard.annotations.DoNotStrip
14
15
  import dev.litert.litertlm.LiteRTLMInitProvider
@@ -30,6 +31,44 @@ import com.google.ai.edge.litertlm.Content
30
31
  // Alias to avoid confusion
31
32
  typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
32
33
 
34
+ /**
35
+ * Named implementation of the LiteRT-LM MessageCallback for streaming inference.
36
+ *
37
+ * Extracted from the anonymous inline class in sendMessageAsync for testability.
38
+ * Accumulates response chunks, forwards tokens to JS, and appends the final
39
+ * response to the conversation history.
40
+ */
41
+ internal class StreamingCallbackListener(
42
+ private val onToken: (String, Boolean) -> Unit,
43
+ private val responseBuilder: StringBuilder,
44
+ private val history: MutableList<Message>,
45
+ ) : com.google.ai.edge.litertlm.MessageCallback {
46
+
47
+ override fun onMessage(responseMsg: com.google.ai.edge.litertlm.LiteRTMessage) {
48
+ val chunk = responseMsg.contents
49
+ .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
50
+ .joinToString("") { it.text }
51
+
52
+ onToken(chunk, false)
53
+
54
+ if (chunk.isNotEmpty()) {
55
+ responseBuilder.append(chunk)
56
+ }
57
+ }
58
+
59
+ override fun onDone() {
60
+ onToken("", true)
61
+ val fullResponse = responseBuilder.toString()
62
+ history.add(Message(Role.MODEL, fullResponse))
63
+ Log.d("StreamingCallbackListener", "Streaming done. Length: ${fullResponse.length}")
64
+ }
65
+
66
+ override fun onError(t: Throwable) {
67
+ Log.e("StreamingCallbackListener", "Async generation failed", t)
68
+ onToken("Error: ${t.message}", true)
69
+ }
70
+ }
71
+
33
72
  /**
34
73
  * Kotlin implementation of LiteRTLM using the LiteRT-LM Android SDK.
35
74
  * This class bridges between React Native (via Nitro) and the Google LiteRT-LM Engine.
@@ -68,7 +107,10 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
68
107
  private var isClosed = false
69
108
 
70
109
  // Conversation history for getHistory()
71
- private val history = mutableListOf<Message>()
110
+ // Synchronized to prevent ConcurrentModificationException: history is
111
+ // written from Promise.parallel workers and sendMessageAsync SDK callbacks,
112
+ // and read from getHistory() which may be called from the JS thread.
113
+ private val history: MutableList<Message> = Collections.synchronizedList(mutableListOf())
72
114
 
73
115
  // Last generation stats
74
116
  private var lastStats = GenerationStats(
@@ -86,6 +128,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
86
128
  private var topK: Int = 40
87
129
  private var topP: Double = 0.95
88
130
  private var maxTokens: Int = 1024
131
+ private var systemPrompt: String? = null
89
132
 
90
133
  override val memorySize: Long
91
134
  get() = 1024L * 1024L * 1024L // ~1GB (models are large)
@@ -114,6 +157,7 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
114
157
  cfg.topK?.let { topK = it.toInt() }
115
158
  cfg.topP?.let { topP = it }
116
159
  cfg.maxTokens?.let { maxTokens = it.toInt() }
160
+ cfg.systemPrompt?.let { systemPrompt = it }
117
161
  }
118
162
 
119
163
  try {
@@ -185,7 +229,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
185
229
 
186
230
  // Blocking inference (safe here because we are in Promise.parallel worker thread)
187
231
  val userMsg = LiteRTMessage.of(message)
232
+ val startTime = System.nanoTime()
188
233
  val responseMsg = conversation!!.sendMessage(userMsg)
234
+ val elapsedMs = (System.nanoTime() - startTime) / 1_000_000.0
189
235
 
190
236
  // Extract text
191
237
  val response = responseMsg.contents
@@ -195,14 +241,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
195
241
  // Add model response to history
196
242
  history.add(Message(Role.MODEL, response))
197
243
 
198
- // Update stats
244
+ // Update stats with real timing data
245
+ val promptTokens = message.length / 4.0
246
+ val completionTokens = response.length / 4.0
199
247
  lastStats = GenerationStats(
200
- promptTokens = message.length / 4.0,
201
- completionTokens = response.length / 4.0,
202
- totalTokens = (message.length + response.length) / 4.0,
203
- timeToFirstToken = 0.0,
204
- totalTime = 0.0,
205
- tokensPerSecond = 0.0
248
+ promptTokens = promptTokens,
249
+ completionTokens = completionTokens,
250
+ totalTokens = promptTokens + completionTokens,
251
+ timeToFirstToken = 0.0, // Not available from sync API
252
+ totalTime = elapsedMs,
253
+ tokensPerSecond = if (elapsedMs > 0) completionTokens / (elapsedMs / 1000.0) else 0.0
206
254
  )
207
255
 
208
256
  response // Return the string
@@ -224,38 +272,17 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
224
272
 
225
273
  val fullResponseBuilder = StringBuilder()
226
274
 
227
- // Define callback
228
- val listener = object : com.google.ai.edge.litertlm.MessageCallback {
229
- override fun onMessage(responseMsg: LiteRTMessage) {
230
- val chunk = responseMsg.contents
231
- .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
232
- .joinToString("") { it.text }
233
-
234
- onToken(chunk, false)
235
-
236
- if (chunk.isNotEmpty()) {
237
- fullResponseBuilder.append(chunk)
238
- }
239
- }
240
-
241
- override fun onDone() {
242
- onToken("", true)
243
- val fullResponse = fullResponseBuilder.toString()
244
- history.add(Message(Role.MODEL, fullResponse))
245
- Log.d(TAG, "sendMessageAsync done. Length: ${fullResponse.length}")
246
- }
247
-
248
- override fun onError(t: Throwable) {
249
- Log.e(TAG, "Async generation failed", t)
250
- onToken("Error: ${t.message}", true)
251
- }
252
- }
275
+ val listener = StreamingCallbackListener(
276
+ onToken = onToken,
277
+ responseBuilder = fullResponseBuilder,
278
+ history = history,
279
+ )
253
280
 
254
281
  try {
255
282
  val userMsg = LiteRTMessage.of(message)
256
283
  conversation!!.sendMessageAsync(userMsg, listener)
257
284
  } catch (e: Exception) {
258
- Log.e(TAG, "Failed into initiate async generation", e)
285
+ Log.e(TAG, "Failed to initiate async generation", e)
259
286
  onToken("Error: ${e.message}", true)
260
287
  }
261
288
  }
@@ -333,6 +360,15 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
333
360
  .joinToString("") { it.text }
334
361
 
335
362
  history.add(Message(Role.MODEL, response))
363
+
364
+ // Clean up temp resized image to prevent cache dir bloat
365
+ if (processedImagePath != imagePath) {
366
+ try {
367
+ java.io.File(processedImagePath).delete()
368
+ } catch (e: Exception) {
369
+ Log.w(TAG, "Failed to clean up temp image: ${e.message}")
370
+ }
371
+ }
336
372
 
337
373
  response
338
374
  }
@@ -479,11 +515,16 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
479
515
  // Helpers
480
516
  // -------------------------------------------------------------------------
481
517
  override fun getHistory(): Array<Message> {
482
- return history.toTypedArray()
518
+ // Synchronized list requires manual sync for iteration/copy
519
+ synchronized(history) {
520
+ return history.toTypedArray()
521
+ }
483
522
  }
484
523
 
485
524
  override fun resetConversation() {
486
- history.clear()
525
+ synchronized(history) {
526
+ history.clear()
527
+ }
487
528
  createNewConversation()
488
529
  }
489
530
 
@@ -580,6 +621,21 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
580
621
  ensureLoaded()
581
622
  // Dispose old conversation if needed
582
623
  conversation = engine!!.createConversation()
624
+ // Apply system prompt/instruction if set
625
+ systemPrompt?.let { prompt ->
626
+ if (prompt.isNotEmpty()) {
627
+ try {
628
+ // Send system instruction as the first turn to prime the conversation.
629
+ // LiteRT-LM's Conversation API handles chat template formatting,
630
+ // including Gemma's <start_of_turn>system block.
631
+ val systemMsg = LiteRTMessage.of(listOf(Content.Text(prompt)))
632
+ conversation!!.sendMessage(systemMsg)
633
+ Log.i(TAG, "System prompt applied (${prompt.length} chars)")
634
+ } catch (e: Exception) {
635
+ Log.w(TAG, "Failed to apply system prompt: ${e.message}")
636
+ }
637
+ }
638
+ }
583
639
  }
584
640
 
585
641
 
package/app.plugin.js ADDED
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Expo config plugin for react-native-litert-lm.
3
+ *
4
+ * Ensures correct build settings for the LiteRT-LM native module:
5
+ * - Android: minSdkVersion 26, arm64-v8a ABI filter
6
+ * - iOS: deployment target 15.0
7
+ */
8
+ const { withGradleProperties, withXcodeProject } = require('@expo/config-plugins');
9
+
10
+ function withLiteRTLM(config) {
11
+ // Android: Ensure minSdkVersion is at least 26
12
+ config = withGradleProperties(config, (config) => {
13
+ const props = config.modResults;
14
+
15
+ // Set minSdkVersion if not already high enough
16
+ const minSdkProp = props.find((p) => p.key === 'android.minSdkVersion');
17
+ if (!minSdkProp) {
18
+ props.push({
19
+ type: 'property',
20
+ key: 'android.minSdkVersion',
21
+ value: '26',
22
+ });
23
+ } else if (parseInt(minSdkProp.value, 10) < 26) {
24
+ minSdkProp.value = '26';
25
+ }
26
+
27
+ return config;
28
+ });
29
+
30
+ return config;
31
+ }
32
+
33
+ module.exports = withLiteRTLM;