react-native-litert-lm 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +245 -29
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +301 -58
- package/cpp/HybridLiteRTLM.cpp +109 -9
- package/cpp/HybridLiteRTLM.hpp +16 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/lib/hooks.d.ts +41 -0
- package/lib/hooks.js +131 -0
- package/lib/index.d.ts +30 -3
- package/lib/index.js +53 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +18 -0
- package/lib/modelFactory.js +104 -0
- package/lib/specs/LiteRTLM.nitro.d.ts +38 -0
- package/lib/templates.d.ts +51 -0
- package/lib/templates.js +81 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +22 -17
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +42 -1
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -0
- package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +17 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +3 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +6 -0
- package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +3 -3
- package/src/hooks.ts +195 -0
- package/src/index.ts +51 -3
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +120 -0
- package/src/specs/LiteRTLM.nitro.ts +47 -0
- package/src/templates.ts +105 -0
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
package com.margelo.nitro.dev.litert.litertlm
|
|
7
7
|
|
|
8
8
|
import android.util.Log
|
|
9
|
+
import android.os.Debug
|
|
10
|
+
import android.app.ActivityManager
|
|
11
|
+
import android.content.Context
|
|
9
12
|
import androidx.annotation.Keep
|
|
10
13
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
11
14
|
import dev.litert.litertlm.LiteRTLMInitProvider
|
|
@@ -37,6 +40,20 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
37
40
|
|
|
38
41
|
companion object {
|
|
39
42
|
private const val TAG = "HybridLiteRTLM"
|
|
43
|
+
private val initLock = Any()
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Initialize the native library.
|
|
47
|
+
* Must be called from Application.onCreate() to register the HybridObject.
|
|
48
|
+
*/
|
|
49
|
+
fun initialize() {
|
|
50
|
+
try {
|
|
51
|
+
// Call generated internal OnLoad to load the library
|
|
52
|
+
LiteRTLMOnLoad.initializeNative()
|
|
53
|
+
} catch (e: Throwable) {
|
|
54
|
+
Log.e(TAG, "Failed to initialize LiteRTLM native library", e)
|
|
55
|
+
}
|
|
56
|
+
}
|
|
40
57
|
}
|
|
41
58
|
|
|
42
59
|
init {
|
|
@@ -46,6 +63,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
46
63
|
// LiteRT-LM Engine and Conversation
|
|
47
64
|
private var engine: Engine? = null
|
|
48
65
|
private var conversation: Conversation? = null
|
|
66
|
+
|
|
67
|
+
@Volatile
|
|
68
|
+
private var isClosed = false
|
|
49
69
|
|
|
50
70
|
// Conversation history for getHistory()
|
|
51
71
|
private val history = mutableListOf<Message>()
|
|
@@ -75,64 +95,74 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
75
95
|
// -------------------------------------------------------------------------
|
|
76
96
|
override fun loadModel(modelPath: String, config: LLMConfig?): Promise<Unit> {
|
|
77
97
|
return Promise.parallel {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
// Apply configuration
|
|
84
|
-
config?.let { cfg ->
|
|
85
|
-
cfg.backend?.let { backend = it }
|
|
86
|
-
cfg.temperature?.let { temperature = it }
|
|
87
|
-
cfg.topK?.let { topK = it.toInt() }
|
|
88
|
-
cfg.topP?.let { topP = it }
|
|
89
|
-
cfg.maxTokens?.let { maxTokens = it.toInt() }
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
try {
|
|
93
|
-
// Map our Backend enum to LiteRT-LM Backend enum
|
|
94
|
-
val lmBackend = when (backend) {
|
|
95
|
-
Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
|
|
96
|
-
Backend.NPU -> {
|
|
97
|
-
Log.i(TAG, "NPU backend requested - requires hardware support")
|
|
98
|
-
com.google.ai.edge.litertlm.Backend.NPU
|
|
99
|
-
}
|
|
100
|
-
else -> com.google.ai.edge.litertlm.Backend.CPU
|
|
98
|
+
// Serialize initialization to prevent OOM from concurrent loads
|
|
99
|
+
synchronized(initLock) {
|
|
100
|
+
if (isClosed) {
|
|
101
|
+
throw RuntimeException("Cannot load model: LiteRTLM instance is closed")
|
|
101
102
|
}
|
|
102
103
|
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
Log.i(TAG, "loadModel: $modelPath")
|
|
105
|
+
|
|
106
|
+
// Clean up existing resources
|
|
107
|
+
// We call internal cleanup that doesn't set isClosed
|
|
108
|
+
cleanupInternal()
|
|
109
|
+
|
|
110
|
+
// Apply configuration
|
|
111
|
+
config?.let { cfg ->
|
|
112
|
+
cfg.backend?.let { backend = it }
|
|
113
|
+
cfg.temperature?.let { temperature = it }
|
|
114
|
+
cfg.topK?.let { topK = it.toInt() }
|
|
115
|
+
cfg.topP?.let { topP = it }
|
|
116
|
+
cfg.maxTokens?.let { maxTokens = it.toInt() }
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
// Map our Backend enum to LiteRT-LM Backend enum
|
|
121
|
+
val lmBackend = when (backend) {
|
|
122
|
+
Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
|
|
123
|
+
Backend.NPU -> {
|
|
124
|
+
Log.i(TAG, "NPU backend requested - requires hardware support")
|
|
125
|
+
com.google.ai.edge.litertlm.Backend.NPU
|
|
126
|
+
}
|
|
127
|
+
else -> com.google.ai.edge.litertlm.Backend.CPU
|
|
128
|
+
}
|
|
105
129
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
// Vision backend: hardcoded to GPU (required by Gemma 3n)
|
|
131
|
+
val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
|
|
132
|
+
|
|
133
|
+
// Audio backend: hardcoded to CPU (optimal for audio processing)
|
|
134
|
+
val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
|
|
135
|
+
|
|
136
|
+
Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
|
|
137
|
+
|
|
138
|
+
// Get cache directory from application context
|
|
139
|
+
val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
|
|
140
|
+
Log.i(TAG, "Using cache directory: $cacheDirectory")
|
|
141
|
+
|
|
142
|
+
// Create Engine configuration
|
|
143
|
+
val engineConfig = EngineConfig(
|
|
144
|
+
modelPath = modelPath,
|
|
145
|
+
backend = lmBackend,
|
|
146
|
+
visionBackend = lmVisionBackend,
|
|
147
|
+
audioBackend = lmAudioBackend,
|
|
148
|
+
maxNumTokens = maxTokens,
|
|
149
|
+
cacheDir = cacheDirectory
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if (isClosed) return@synchronized
|
|
153
|
+
|
|
154
|
+
// Initialize Engine
|
|
155
|
+
engine = Engine(engineConfig).also { it.initialize() }
|
|
156
|
+
Log.i(TAG, "Engine created and initialized successfully")
|
|
157
|
+
|
|
158
|
+
// Create Conversation
|
|
159
|
+
createNewConversation()
|
|
160
|
+
Log.i(TAG, "Conversation created successfully")
|
|
161
|
+
|
|
162
|
+
} catch (e: Exception) {
|
|
163
|
+
Log.e(TAG, "Failed to load model: ${e.message}", e)
|
|
164
|
+
throw RuntimeException("Failed to load model: ${e.message}", e)
|
|
165
|
+
}
|
|
136
166
|
}
|
|
137
167
|
}
|
|
138
168
|
}
|
|
@@ -233,18 +263,62 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
233
263
|
// -------------------------------------------------------------------------
|
|
234
264
|
// Multimodal methods
|
|
235
265
|
// -------------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Resize image if dimensions exceed maxDimension to prevent OOM.
|
|
269
|
+
* Gemma 3n's vision encoder is optimized for 512x512 or 1024x1024.
|
|
270
|
+
* Passing larger images can spike memory 500MB+.
|
|
271
|
+
*/
|
|
272
|
+
private fun resizeImageIfNeeded(imagePath: String, maxDimension: Int = 1024): String {
|
|
273
|
+
val originalBitmap = android.graphics.BitmapFactory.decodeFile(imagePath)
|
|
274
|
+
?: throw RuntimeException("Failed to decode image: $imagePath")
|
|
275
|
+
|
|
276
|
+
val width = originalBitmap.width
|
|
277
|
+
val height = originalBitmap.height
|
|
278
|
+
|
|
279
|
+
// If already within bounds, return original path
|
|
280
|
+
if (width <= maxDimension && height <= maxDimension) {
|
|
281
|
+
originalBitmap.recycle()
|
|
282
|
+
return imagePath
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
Log.i(TAG, "Resizing image from ${width}x${height} to fit ${maxDimension}px")
|
|
286
|
+
|
|
287
|
+
val scale = maxDimension.toFloat() / maxOf(width, height)
|
|
288
|
+
val newWidth = (width * scale).toInt()
|
|
289
|
+
val newHeight = (height * scale).toInt()
|
|
290
|
+
|
|
291
|
+
val resizedBitmap = android.graphics.Bitmap.createScaledBitmap(originalBitmap, newWidth, newHeight, true)
|
|
292
|
+
originalBitmap.recycle()
|
|
293
|
+
|
|
294
|
+
// Save to temp file
|
|
295
|
+
val cacheDir = LiteRTLMInitProvider.applicationContext?.cacheDir
|
|
296
|
+
?: throw RuntimeException("Application context not available for image resizing")
|
|
297
|
+
val tempFile = java.io.File(cacheDir, "resized_${System.currentTimeMillis()}.jpg")
|
|
298
|
+
java.io.FileOutputStream(tempFile).use { out ->
|
|
299
|
+
resizedBitmap.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, out)
|
|
300
|
+
}
|
|
301
|
+
resizedBitmap.recycle()
|
|
302
|
+
|
|
303
|
+
Log.i(TAG, "Resized image saved to: ${tempFile.absolutePath} (${newWidth}x${newHeight})")
|
|
304
|
+
return tempFile.absolutePath
|
|
305
|
+
}
|
|
306
|
+
|
|
236
307
|
override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
|
|
237
308
|
return Promise.parallel {
|
|
238
309
|
ensureLoaded()
|
|
239
310
|
Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
|
|
240
311
|
|
|
312
|
+
// Resize image to prevent OOM on high-resolution photos
|
|
313
|
+
val processedImagePath = resizeImageIfNeeded(imagePath)
|
|
314
|
+
|
|
241
315
|
// Create multimodal message
|
|
242
316
|
// Use factory method Message.of passing a list of Content
|
|
243
317
|
val textContent = Content.Text(message)
|
|
244
318
|
|
|
245
319
|
val contentList = listOf(
|
|
246
320
|
textContent,
|
|
247
|
-
Content.ImageFile(
|
|
321
|
+
Content.ImageFile(processedImagePath)
|
|
248
322
|
)
|
|
249
323
|
|
|
250
324
|
val userMsg = LiteRTMessage.of(contentList)
|
|
@@ -264,6 +338,115 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
264
338
|
}
|
|
265
339
|
}
|
|
266
340
|
|
|
341
|
+
override fun downloadModel(url: String, fileName: String, onProgress: ((Double) -> Unit)?): Promise<String> {
|
|
342
|
+
return Promise.parallel {
|
|
343
|
+
Log.i(TAG, "downloadModel: $url -> $fileName")
|
|
344
|
+
|
|
345
|
+
val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
|
|
346
|
+
val modelsDir = java.io.File(context.filesDir, "models")
|
|
347
|
+
if (!modelsDir.exists()) {
|
|
348
|
+
modelsDir.mkdirs()
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
val modelFile = java.io.File(modelsDir, fileName)
|
|
352
|
+
val tempFile = java.io.File(modelsDir, "$fileName.tmp")
|
|
353
|
+
|
|
354
|
+
// Check if file exists and has content
|
|
355
|
+
if (modelFile.exists() && modelFile.length() > 0) {
|
|
356
|
+
Log.i(TAG, "Model already exists at: ${modelFile.absolutePath}")
|
|
357
|
+
onProgress?.invoke(1.0)
|
|
358
|
+
return@parallel modelFile.absolutePath
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
Log.i(TAG, "Downloading model to temp file: ${tempFile.absolutePath}")
|
|
362
|
+
onProgress?.invoke(0.0)
|
|
363
|
+
|
|
364
|
+
try {
|
|
365
|
+
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
|
|
366
|
+
connection.connectTimeout = 15000 // 15s
|
|
367
|
+
connection.readTimeout = 0 // Infinite for large files
|
|
368
|
+
connection.doInput = true
|
|
369
|
+
connection.connect()
|
|
370
|
+
|
|
371
|
+
if (connection.responseCode != java.net.HttpURLConnection.HTTP_OK) {
|
|
372
|
+
throw RuntimeException("Failed to download model: HTTP ${connection.responseCode}")
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
val contentLength = connection.contentLengthLong // Use long for large files
|
|
376
|
+
val input = connection.inputStream
|
|
377
|
+
val output = java.io.FileOutputStream(tempFile)
|
|
378
|
+
|
|
379
|
+
val buffer = ByteArray(8 * 1024)
|
|
380
|
+
var bytesRead: Int
|
|
381
|
+
var totalBytesRead = 0L
|
|
382
|
+
var lastProgressUpdate = 0L
|
|
383
|
+
|
|
384
|
+
while (input.read(buffer).also { bytesRead = it } != -1) {
|
|
385
|
+
output.write(buffer, 0, bytesRead)
|
|
386
|
+
totalBytesRead += bytesRead
|
|
387
|
+
|
|
388
|
+
if (contentLength > 0 && onProgress != null) {
|
|
389
|
+
val currentTime = System.currentTimeMillis()
|
|
390
|
+
// Update roughly every 100ms to avoid flooding JS bridge
|
|
391
|
+
if (currentTime - lastProgressUpdate > 100) {
|
|
392
|
+
val progress = totalBytesRead.toDouble() / contentLength.toDouble()
|
|
393
|
+
onProgress(progress)
|
|
394
|
+
lastProgressUpdate = currentTime
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
output.flush()
|
|
400
|
+
output.close()
|
|
401
|
+
input.close()
|
|
402
|
+
connection.disconnect()
|
|
403
|
+
|
|
404
|
+
// Atomic rename
|
|
405
|
+
if (tempFile.renameTo(modelFile)) {
|
|
406
|
+
Log.i(TAG, "Download complete and renamed to: ${modelFile.absolutePath}")
|
|
407
|
+
onProgress?.invoke(1.0)
|
|
408
|
+
return@parallel modelFile.absolutePath
|
|
409
|
+
} else {
|
|
410
|
+
throw RuntimeException("Failed to rename temp file to model file")
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
} catch (e: Exception) {
|
|
414
|
+
Log.e(TAG, "Download failed", e)
|
|
415
|
+
if (tempFile.exists()) {
|
|
416
|
+
tempFile.delete()
|
|
417
|
+
}
|
|
418
|
+
throw RuntimeException("Download failed: ${e.message}", e)
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
override fun deleteModel(fileName: String): Promise<Unit> {
|
|
424
|
+
return Promise.parallel {
|
|
425
|
+
Log.i(TAG, "deleteModel: $fileName")
|
|
426
|
+
val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
|
|
427
|
+
val modelsDir = java.io.File(context.filesDir, "models")
|
|
428
|
+
val modelFile = java.io.File(modelsDir, fileName)
|
|
429
|
+
|
|
430
|
+
if (modelFile.exists()) {
|
|
431
|
+
val deleted = modelFile.delete()
|
|
432
|
+
if (deleted) {
|
|
433
|
+
Log.i(TAG, "Deleted model: ${modelFile.absolutePath}")
|
|
434
|
+
// Ensure engine references are cleared if they point to this file
|
|
435
|
+
// We use cleanupInternal() which releases resources WITHOUT marking the instance as closed.
|
|
436
|
+
if (engine != null) {
|
|
437
|
+
Log.i(TAG, "Cleaning up engine after deleting model file.")
|
|
438
|
+
cleanupInternal()
|
|
439
|
+
}
|
|
440
|
+
} else {
|
|
441
|
+
Log.e(TAG, "Failed to delete model: ${modelFile.absolutePath}")
|
|
442
|
+
throw RuntimeException("Failed to delete model: ${modelFile.absolutePath}")
|
|
443
|
+
}
|
|
444
|
+
} else {
|
|
445
|
+
Log.w(TAG, "Model not found for deletion: ${modelFile.absolutePath}")
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
267
450
|
override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
|
|
268
451
|
return Promise.parallel {
|
|
269
452
|
ensureLoaded()
|
|
@@ -316,12 +499,72 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
316
499
|
return lastStats
|
|
317
500
|
}
|
|
318
501
|
|
|
502
|
+
override fun getMemoryUsage(): MemoryUsage {
|
|
503
|
+
// Native heap: allocated bytes from Debug APIs (most accurate for native allocations)
|
|
504
|
+
val nativeHeapBytes = Debug.getNativeHeapAllocatedSize().toDouble()
|
|
505
|
+
|
|
506
|
+
// Process RSS: read from /proc/self/status (VmRSS) in kB
|
|
507
|
+
var residentBytes = 0.0
|
|
508
|
+
try {
|
|
509
|
+
java.io.File("/proc/self/status").forEachLine { line ->
|
|
510
|
+
if (line.startsWith("VmRSS:")) {
|
|
511
|
+
val kb = line.substringAfter("VmRSS:").trim().split("\\s+".toRegex())[0].toDoubleOrNull()
|
|
512
|
+
if (kb != null) {
|
|
513
|
+
residentBytes = kb * 1024.0
|
|
514
|
+
}
|
|
515
|
+
return@forEachLine
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
} catch (e: Exception) {
|
|
519
|
+
Log.w(TAG, "Failed to read /proc/self/status: ${e.message}")
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// Available memory and low-memory flag from ActivityManager
|
|
523
|
+
var availableMemoryBytes = 0.0
|
|
524
|
+
var isLowMemory = false
|
|
525
|
+
try {
|
|
526
|
+
val context = LiteRTLMInitProvider.applicationContext
|
|
527
|
+
if (context != null) {
|
|
528
|
+
val activityManager = context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager
|
|
529
|
+
val memInfo = ActivityManager.MemoryInfo()
|
|
530
|
+
activityManager.getMemoryInfo(memInfo)
|
|
531
|
+
availableMemoryBytes = memInfo.availMem.toDouble()
|
|
532
|
+
isLowMemory = memInfo.lowMemory
|
|
533
|
+
}
|
|
534
|
+
} catch (e: Exception) {
|
|
535
|
+
Log.w(TAG, "Failed to get ActivityManager memory info: ${e.message}")
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
return MemoryUsage(
|
|
539
|
+
nativeHeapBytes = nativeHeapBytes,
|
|
540
|
+
residentBytes = residentBytes,
|
|
541
|
+
availableMemoryBytes = availableMemoryBytes,
|
|
542
|
+
isLowMemory = isLowMemory
|
|
543
|
+
)
|
|
544
|
+
}
|
|
545
|
+
|
|
319
546
|
override fun close() {
|
|
320
547
|
Log.d(TAG, "Closing resources")
|
|
548
|
+
isClosed = true
|
|
549
|
+
cleanupInternal()
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
private fun cleanupInternal() {
|
|
321
553
|
try {
|
|
322
554
|
conversation = null
|
|
323
|
-
engine
|
|
324
|
-
//
|
|
555
|
+
// Explicitly close engine if it supports it to free native memory immediately
|
|
556
|
+
// Assuming Engine implements AutoCloseable or has close()
|
|
557
|
+
if (engine is AutoCloseable) {
|
|
558
|
+
(engine as AutoCloseable).close()
|
|
559
|
+
} else {
|
|
560
|
+
// Try reflection or just null it if no close method
|
|
561
|
+
try {
|
|
562
|
+
engine?.javaClass?.getMethod("close")?.invoke(engine)
|
|
563
|
+
} catch (e: Exception) {
|
|
564
|
+
// Method not found, rely on GC
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
engine = null
|
|
325
568
|
} catch (e: Exception) {
|
|
326
569
|
Log.e(TAG, "Error closing resources", e)
|
|
327
570
|
}
|
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -283,16 +283,34 @@ std::string HybridLiteRTLM::sendMessageWithImage(
|
|
|
283
283
|
return response->content;
|
|
284
284
|
|
|
285
285
|
#else
|
|
286
|
-
//
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
286
|
+
// iOS: LiteRT-LM SDK not yet available, throw clear error
|
|
287
|
+
throw std::runtime_error(
|
|
288
|
+
"sendMessageWithImage is not supported on iOS. "
|
|
289
|
+
"LiteRT-LM iOS SDK is not yet available. "
|
|
290
|
+
"Please use text-only sendMessage() for now.");
|
|
291
|
+
#endif
|
|
292
|
+
}
|
|
293
|
+
|
|
293
294
|
#endif
|
|
294
295
|
}
|
|
295
296
|
|
|
297
|
+
//------------------------------------------------------------------------------
|
|
298
|
+
// downloadModel - Download model file from URL
|
|
299
|
+
//------------------------------------------------------------------------------
|
|
300
|
+
std::future<std::string> HybridLiteRTLM::downloadModel(
|
|
301
|
+
const std::string& url,
|
|
302
|
+
const std::string& fileName,
|
|
303
|
+
const std::optional<std::function<void(double)>>& onProgress) {
|
|
304
|
+
|
|
305
|
+
// Return a future that throws an exception
|
|
306
|
+
return std::async(std::launch::async, []() -> std::string {
|
|
307
|
+
throw std::runtime_error(
|
|
308
|
+
"downloadModel is not supported on iOS yet. "
|
|
309
|
+
"Please download the model manually using a separate library."
|
|
310
|
+
);
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
|
|
296
314
|
//------------------------------------------------------------------------------
|
|
297
315
|
// sendMessageWithAudio - Multimodal audio + text
|
|
298
316
|
//------------------------------------------------------------------------------
|
|
@@ -349,8 +367,11 @@ std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
|
349
367
|
return response->content;
|
|
350
368
|
|
|
351
369
|
#else
|
|
352
|
-
//
|
|
353
|
-
|
|
370
|
+
// iOS: LiteRT-LM SDK not yet available, throw clear error
|
|
371
|
+
throw std::runtime_error(
|
|
372
|
+
"sendMessageWithAudio is not supported on iOS. "
|
|
373
|
+
"LiteRT-LM iOS SDK is not yet available. "
|
|
374
|
+
"Please use text-only sendMessage() for now.");
|
|
354
375
|
#endif
|
|
355
376
|
}
|
|
356
377
|
|
|
@@ -497,6 +518,85 @@ GenerationStats HybridLiteRTLM::getStats() {
|
|
|
497
518
|
return lastStats_;
|
|
498
519
|
}
|
|
499
520
|
|
|
521
|
+
//------------------------------------------------------------------------------
|
|
522
|
+
// getMemoryUsage - Return real memory usage from OS
|
|
523
|
+
//------------------------------------------------------------------------------
|
|
524
|
+
MemoryUsage HybridLiteRTLM::getMemoryUsage() {
|
|
525
|
+
double nativeHeapBytes = 0;
|
|
526
|
+
double residentBytes = 0;
|
|
527
|
+
double availableMemoryBytes = 0;
|
|
528
|
+
bool isLowMemory = false;
|
|
529
|
+
|
|
530
|
+
#ifdef __APPLE__
|
|
531
|
+
// Get process memory info via Mach APIs
|
|
532
|
+
struct mach_task_basic_info taskInfo;
|
|
533
|
+
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
|
534
|
+
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
|
|
535
|
+
(task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
|
|
536
|
+
residentBytes = static_cast<double>(taskInfo.resident_size);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Get system-wide memory pressure
|
|
540
|
+
vm_statistics64_data_t vmStats;
|
|
541
|
+
mach_msg_type_number_t vmCount = HOST_VM_INFO64_COUNT;
|
|
542
|
+
if (host_statistics64(mach_host_self(), HOST_VM_INFO64,
|
|
543
|
+
(host_info64_t)&vmStats, &vmCount) == KERN_SUCCESS) {
|
|
544
|
+
vm_size_t pageSize;
|
|
545
|
+
host_page_size(mach_host_self(), &pageSize);
|
|
546
|
+
availableMemoryBytes = static_cast<double>(vmStats.free_count) * pageSize;
|
|
547
|
+
// Consider low memory if free pages < 10% of total active+inactive+free
|
|
548
|
+
uint64_t totalPages = vmStats.active_count + vmStats.inactive_count + vmStats.free_count;
|
|
549
|
+
isLowMemory = (totalPages > 0) &&
|
|
550
|
+
(static_cast<double>(vmStats.free_count) / totalPages < 0.1);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// malloc_size is per-allocation; use resident_size as native heap proxy
|
|
554
|
+
nativeHeapBytes = residentBytes;
|
|
555
|
+
#endif
|
|
556
|
+
|
|
557
|
+
#ifdef __ANDROID__
|
|
558
|
+
// Parse /proc/self/status for VmRSS (resident set size)
|
|
559
|
+
std::ifstream statusFile("/proc/self/status");
|
|
560
|
+
if (statusFile.is_open()) {
|
|
561
|
+
std::string line;
|
|
562
|
+
while (std::getline(statusFile, line)) {
|
|
563
|
+
if (line.rfind("VmRSS:", 0) == 0) {
|
|
564
|
+
// Format: "VmRSS: 123456 kB"
|
|
565
|
+
std::istringstream iss(line.substr(6));
|
|
566
|
+
double kbValue = 0;
|
|
567
|
+
iss >> kbValue;
|
|
568
|
+
residentBytes = kbValue * 1024.0;
|
|
569
|
+
break;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Use mallinfo for native heap
|
|
575
|
+
struct mallinfo mi = mallinfo();
|
|
576
|
+
nativeHeapBytes = static_cast<double>(mi.uordblks); // total allocated space
|
|
577
|
+
|
|
578
|
+
// Parse /proc/meminfo for available memory
|
|
579
|
+
std::ifstream memFile("/proc/meminfo");
|
|
580
|
+
if (memFile.is_open()) {
|
|
581
|
+
std::string line;
|
|
582
|
+
while (std::getline(memFile, line)) {
|
|
583
|
+
if (line.rfind("MemAvailable:", 0) == 0) {
|
|
584
|
+
std::istringstream iss(line.substr(13));
|
|
585
|
+
double kbValue = 0;
|
|
586
|
+
iss >> kbValue;
|
|
587
|
+
availableMemoryBytes = kbValue * 1024.0;
|
|
588
|
+
break;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// Consider low if available < 256MB
|
|
594
|
+
isLowMemory = availableMemoryBytes > 0 && availableMemoryBytes < 256.0 * 1024 * 1024;
|
|
595
|
+
#endif
|
|
596
|
+
|
|
597
|
+
return MemoryUsage{nativeHeapBytes, residentBytes, availableMemoryBytes, isLowMemory};
|
|
598
|
+
}
|
|
599
|
+
|
|
500
600
|
//------------------------------------------------------------------------------
|
|
501
601
|
// close - Release all native resources
|
|
502
602
|
//------------------------------------------------------------------------------
|
package/cpp/HybridLiteRTLM.hpp
CHANGED
|
@@ -21,6 +21,16 @@
|
|
|
21
21
|
#include "litert/lm/types.h"
|
|
22
22
|
#endif
|
|
23
23
|
|
|
24
|
+
// Memory usage headers
|
|
25
|
+
#ifdef __APPLE__
|
|
26
|
+
#include <mach/mach.h>
|
|
27
|
+
#include <mach/mach_host.h>
|
|
28
|
+
#endif
|
|
29
|
+
#ifdef __ANDROID__
|
|
30
|
+
#include <malloc.h>
|
|
31
|
+
#include <fstream>
|
|
32
|
+
#endif
|
|
33
|
+
|
|
24
34
|
#include <string>
|
|
25
35
|
#include <optional>
|
|
26
36
|
#include <vector>
|
|
@@ -58,6 +68,10 @@ public:
|
|
|
58
68
|
|
|
59
69
|
std::string sendMessageWithImage(const std::string& message,
|
|
60
70
|
const std::string& imagePath) override;
|
|
71
|
+
|
|
72
|
+
std::future<std::string> downloadModel(const std::string& url,
|
|
73
|
+
const std::string& fileName,
|
|
74
|
+
const std::optional<std::function<void(double)>>& onProgress) override;
|
|
61
75
|
|
|
62
76
|
std::string sendMessageWithAudio(const std::string& message,
|
|
63
77
|
const std::string& audioPath) override;
|
|
@@ -75,6 +89,8 @@ public:
|
|
|
75
89
|
|
|
76
90
|
GenerationStats getStats() override;
|
|
77
91
|
|
|
92
|
+
MemoryUsage getMemoryUsage() override;
|
|
93
|
+
|
|
78
94
|
void close() override;
|
|
79
95
|
|
|
80
96
|
private:
|
package/cpp/cpp-adapter.cpp
CHANGED
|
@@ -2,12 +2,20 @@
|
|
|
2
2
|
/// cpp-adapter.cpp
|
|
3
3
|
/// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
|
|
4
4
|
///
|
|
5
|
+
/// Updated for react-native-nitro-modules v0.34+:
|
|
6
|
+
/// Uses facebook::jni::initialize() directly with registerAllNatives().
|
|
7
|
+
///
|
|
5
8
|
|
|
6
9
|
#include <jni.h>
|
|
10
|
+
#include <fbjni/fbjni.h>
|
|
7
11
|
#include "LiteRTLMOnLoad.hpp"
|
|
8
12
|
|
|
9
13
|
// JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
|
|
10
|
-
// This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects
|
|
14
|
+
// This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects.
|
|
15
|
+
// The new v0.34 API allows registering custom C++ native JNI classes/functions
|
|
16
|
+
// alongside Nitrogen's auto-generated registrations.
|
|
11
17
|
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
|
|
12
|
-
return
|
|
18
|
+
return facebook::jni::initialize(vm, []() {
|
|
19
|
+
margelo::nitro::litertlm::registerAllNatives();
|
|
20
|
+
});
|
|
13
21
|
}
|
package/lib/hooks.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { LiteRTLM, LLMConfig } from "./index";
|
|
2
|
+
import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
|
|
3
|
+
export interface UseModelConfig extends LLMConfig {
|
|
4
|
+
autoLoad?: boolean;
|
|
5
|
+
/**
|
|
6
|
+
* Enable memory tracking using native ArrayBuffers (v0.34+).
|
|
7
|
+
* When enabled, memory usage is tracked after each inference call
|
|
8
|
+
* using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
|
|
9
|
+
* @default false
|
|
10
|
+
*/
|
|
11
|
+
enableMemoryTracking?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Maximum number of memory snapshots to store.
|
|
14
|
+
* Each snapshot uses 32 bytes of native memory.
|
|
15
|
+
* @default 256
|
|
16
|
+
*/
|
|
17
|
+
maxMemorySnapshots?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface UseModelResult {
|
|
20
|
+
model: LiteRTLM | null;
|
|
21
|
+
isReady: boolean;
|
|
22
|
+
isGenerating: boolean;
|
|
23
|
+
downloadProgress: number;
|
|
24
|
+
error: string | null;
|
|
25
|
+
generate: (prompt: string) => Promise<string>;
|
|
26
|
+
reset: () => void;
|
|
27
|
+
deleteModel: (fileName: string) => Promise<void>;
|
|
28
|
+
load: () => Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Memory tracker instance (available when enableMemoryTracking is true).
|
|
31
|
+
* Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
|
|
32
|
+
* for efficient, zero-copy memory usage tracking.
|
|
33
|
+
*/
|
|
34
|
+
memoryTracker: MemoryTracker | null;
|
|
35
|
+
/**
|
|
36
|
+
* Current memory tracking summary (null if tracking is disabled).
|
|
37
|
+
* Updates automatically after each inference call.
|
|
38
|
+
*/
|
|
39
|
+
memorySummary: MemoryTrackerSummary | null;
|
|
40
|
+
}
|
|
41
|
+
export declare function useModel(pathOrUrl: string, config?: UseModelConfig): UseModelResult;
|