react-native-litert-lm 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +153 -135
  2. package/android/build.gradle +12 -0
  3. package/android/src/main/AndroidManifest.xml +8 -0
  4. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +276 -62
  5. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
  6. package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
  7. package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +105 -0
  8. package/ios/HybridLiteRTLM.swift +1344 -0
  9. package/ios/Tests/HybridLiteRTLMTests.swift +113 -0
  10. package/lib/__mocks__/react-native-nitro-modules.d.ts +65 -0
  11. package/lib/__mocks__/react-native-nitro-modules.js +60 -0
  12. package/lib/__tests__/hooks.test.d.ts +1 -0
  13. package/lib/__tests__/hooks.test.js +124 -0
  14. package/lib/__tests__/memoryTracker.test.d.ts +1 -0
  15. package/lib/__tests__/memoryTracker.test.js +74 -0
  16. package/lib/__tests__/modelFactory.test.d.ts +1 -0
  17. package/lib/__tests__/modelFactory.test.js +68 -0
  18. package/lib/hooks.js +27 -3
  19. package/lib/index.d.ts +6 -2
  20. package/lib/index.js +8 -8
  21. package/lib/modelFactory.js +82 -63
  22. package/lib/specs/LiteRTLM.nitro.d.ts +87 -2
  23. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -2
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +94 -9
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +5 -1
  26. package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
  27. package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
  28. package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
  29. package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +28 -2
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
  38. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
  41. package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
  42. package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
  43. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
  44. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +240 -0
  45. package/nitrogen/generated/ios/swift/Backend.swift +44 -0
  46. package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
  47. package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
  48. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  49. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
  50. package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
  51. package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
  52. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +71 -0
  53. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +431 -0
  54. package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
  55. package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
  56. package/nitrogen/generated/ios/swift/Message.swift +34 -0
  57. package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
  58. package/nitrogen/generated/ios/swift/PartType.swift +44 -0
  59. package/nitrogen/generated/ios/swift/Role.swift +44 -0
  60. package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
  61. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +4 -0
  62. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +9 -2
  63. package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
  64. package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
  65. package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
  66. package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
  67. package/package.json +22 -11
  68. package/react-native-litert-lm.podspec +17 -19
  69. package/scripts/download-ios-frameworks.sh +17 -50
  70. package/scripts/framework-source.js +46 -0
  71. package/scripts/postinstall.js +40 -18
  72. package/src/__mocks__/react-native-nitro-modules.ts +58 -0
  73. package/src/__tests__/hooks.test.ts +153 -0
  74. package/src/__tests__/memoryTracker.test.ts +87 -0
  75. package/src/__tests__/modelFactory.test.ts +96 -0
  76. package/src/hooks.ts +29 -7
  77. package/src/index.ts +7 -10
  78. package/src/modelFactory.ts +104 -80
  79. package/src/specs/LiteRTLM.nitro.ts +106 -2
  80. package/cpp/HybridLiteRTLM.cpp +0 -939
  81. package/cpp/HybridLiteRTLM.hpp +0 -169
  82. package/cpp/IOSDownloadHelper.h +0 -24
  83. package/ios/IOSDownloadHelper.mm +0 -129
  84. package/scripts/build-ios-engine.sh +0 -302
  85. package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
  86. package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
  87. package/scripts/stubs/llguidance_stubs.c +0 -101
  88. package/src/templates.ts +0 -105
@@ -0,0 +1,1344 @@
1
+ //
2
+ // HybridLiteRTLM.swift
3
+ // react-native-litert-lm
4
+ //
5
+ // Created by Antigravity on 2026-05-19.
6
+ // Copyright © 2026 Margelo. All rights reserved.
7
+ //
8
+
9
+ import Foundation
10
+ import NitroModules
11
+ import CLiteRTLM
12
+ import os
13
+
14
+ /// A stream context passed to the low-level C FFI callback to forward chunks safely to the JS thread.
15
+ private class StreamContext {
16
+ let userMessage: String
17
+ let startTime: Date
18
+ let onToken: (_ token: String, _ done: Bool) -> Void
19
+ let promise: Promise<Void>
20
+ let parent: HybridLiteRTLM
21
+
22
+ var rawResponse: String = ""
23
+ var fullResponse: String = ""
24
+ var lastEmittedLength: Int = 0
25
+ var tokenCount: Int = 0
26
+
27
+ init(
28
+ userMessage: String,
29
+ startTime: Date,
30
+ onToken: @escaping (_ token: String, _ done: Bool) -> Void,
31
+ promise: Promise<Void>,
32
+ parent: HybridLiteRTLM
33
+ ) {
34
+ self.userMessage = userMessage
35
+ self.startTime = startTime
36
+ self.onToken = onToken
37
+ self.promise = promise
38
+ self.parent = parent
39
+ }
40
+ }
41
+
42
+ public class HybridLiteRTLM: HybridLiteRTLMSpec_base, HybridLiteRTLMSpec_protocol {
43
+
44
+ /// Dedicated background serial queue to protect the JSI/JS thread from blocking and deadlocks (User Rule #1).
45
+ private let queue = DispatchQueue(label: "dev.litert.engine", qos: .userInteractive)
46
+
47
+ /// Opaque pointer to the LiteRT LM C Engine.
48
+ private var engine: OpaquePointer?
49
+
50
+ /// Opaque pointer to the active conversation state.
51
+ private var conversation: OpaquePointer?
52
+
53
+ /// Thread-safe status flag.
54
+ private var isLoaded = false
55
+
56
+ /// Conversation history.
57
+ private var history: [Message] = []
58
+
59
+ /// Latest inference generation statistics.
60
+ private var lastStats = GenerationStats(
61
+ promptTokens: 0.0,
62
+ completionTokens: 0.0,
63
+ totalTokens: 0.0,
64
+ timeToFirstToken: 0.0,
65
+ totalTime: 0.0,
66
+ tokensPerSecond: 0.0
67
+ )
68
+
69
+ // Default configuration variables
70
+ private var backend: Backend = .cpu
71
+ private var temperature: Double = 0.7
72
+ private var topK: Int = 40
73
+ private var topP: Double = 0.95
74
+ private var maxTokens: Int = 1024
75
+ private var systemPrompt: String?
76
+ private var tools: [ToolDefinition]?
77
+ private var enableSpeculativeDecoding: Bool = false
78
+
79
+ /// Approximate model weight size to inform the JS engine's garbage collection.
80
+ public var memorySize: Int {
81
+ return 1024 * 1024 * 1024 // ~1GB proxy
82
+ }
83
+
84
+ deinit {
85
+ closeInternal()
86
+ }
87
+
88
+ // MARK: - Core Hybrid Object API
89
+
90
+ public func isReady() throws -> Bool {
91
+ return queue.sync { isLoaded }
92
+ }
93
+
94
+ public func getHistory() throws -> [Message] {
95
+ return queue.sync { history }
96
+ }
97
+
98
+ public func resetConversation() throws {
99
+ queue.sync {
100
+ history.removeAll()
101
+ lastStats = GenerationStats(
102
+ promptTokens: 0.0,
103
+ completionTokens: 0.0,
104
+ totalTokens: 0.0,
105
+ timeToFirstToken: 0.0,
106
+ totalTime: 0.0,
107
+ tokensPerSecond: 0.0
108
+ )
109
+ if isLoaded && engine != nil {
110
+ createNewConversation()
111
+ }
112
+ }
113
+ }
114
+
115
+ public func getStats() throws -> GenerationStats {
116
+ return queue.sync { lastStats }
117
+ }
118
+
119
+ public func countTokens(text: String) throws -> Double {
120
+ return queue.sync {
121
+ guard let engine = self.engine else {
122
+ return -1.0
123
+ }
124
+ guard let result = litert_lm_engine_tokenize(engine, text) else {
125
+ return -1.0
126
+ }
127
+ let numTokens = litert_lm_tokenize_result_get_num_tokens(result)
128
+ litert_lm_tokenize_result_delete(result)
129
+ return Double(numTokens)
130
+ }
131
+ }
132
+
133
+ public func getMemoryUsage() throws -> MemoryUsage {
134
+ var residentBytes: Double = 0.0
135
+ var nativeHeapBytes: Double = 0.0
136
+
137
+ // Retrieve process resident set size (RSS) via Mach basic task info
138
+ var info = mach_task_basic_info()
139
+ var count = mach_msg_type_number_t(MemoryLayout<mach_task_basic_info>.size / MemoryLayout<integer_t>.size)
140
+ let kerr = withUnsafeMutablePointer(to: &info) {
141
+ $0.withMemoryRebound(to: integer_t.self, capacity: Int(count)) {
142
+ task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count)
143
+ }
144
+ }
145
+
146
+ if kerr == KERN_SUCCESS {
147
+ residentBytes = Double(info.resident_size)
148
+ nativeHeapBytes = Double(info.resident_size)
149
+ }
150
+
151
+ // os_proc_available_memory reports actual headroom available before Jetsam termination (iOS 13+)
152
+ let availableBytes = Double(os_proc_available_memory())
153
+
154
+ // Flag memory warning at ~200MB remaining headroom
155
+ let isLowMemory = availableBytes < 200.0 * 1024.0 * 1024.0
156
+
157
+ return MemoryUsage(
158
+ nativeHeapBytes: nativeHeapBytes,
159
+ residentBytes: residentBytes,
160
+ availableMemoryBytes: availableBytes,
161
+ isLowMemory: isLowMemory
162
+ )
163
+ }
164
+
165
+ public func close() throws {
166
+ queue.sync {
167
+ closeInternal()
168
+ }
169
+ }
170
+
171
+ // MARK: - Async Operations
172
+
173
+ public func loadModel(modelPath: String, config: LLMConfig?) throws -> Promise<Void> {
174
+ let promise = Promise<Void>()
175
+
176
+ queue.async {
177
+ // Teardown any previous contexts
178
+ self.closeInternal()
179
+
180
+ // Extract configurations
181
+ if let config = config {
182
+ if let b = config.backend { self.backend = b }
183
+ if let t = config.temperature { self.temperature = t }
184
+ if let k = config.topK { self.topK = Int(k) }
185
+ if let p = config.topP { self.topP = p }
186
+ if let m = config.maxTokens { self.maxTokens = Int(m) }
187
+ if let s = config.systemPrompt { self.systemPrompt = s }
188
+ self.tools = config.tools
189
+ self.enableSpeculativeDecoding = config.enableSpeculativeDecoding ?? false
190
+ } else {
191
+ self.tools = nil
192
+ self.enableSpeculativeDecoding = false
193
+ }
194
+
195
+ // Map main backend string
196
+ let mainBackendStr = self.backend == .gpu ? "gpu" : (self.backend == .npu ? "gpu" : "cpu")
197
+
198
+ //Sniff multimodal support
199
+ let isMultimodal = config?.multimodal ?? (modelPath.lowercased().contains("3n") || modelPath.lowercased().contains("gemma3"))
200
+ let visionBackend = isMultimodal ? "gpu" : nil
201
+ let audioBackend = isMultimodal ? "cpu" : nil
202
+
203
+ var rawEngine: OpaquePointer? = nil
204
+
205
+ // Set LiteRT C Log Level to WARNING (2) for clean production output
206
+ litert_lm_set_min_log_level(2)
207
+
208
+ // Creation helper with scoped FFI pointer lifetime
209
+ let createEngine = { (main: String, vision: String?, audio: String?) -> OpaquePointer? in
210
+ let settings = modelPath.withCString { modelC in
211
+ self.withOptionalCString(main) { mainC in
212
+ self.withOptionalCString(vision) { visionC in
213
+ self.withOptionalCString(audio) { audioC in
214
+ return litert_lm_engine_settings_create(modelC, mainC, visionC, audioC)
215
+ }
216
+ }
217
+ }
218
+ }
219
+
220
+ guard let s = settings else { return nil }
221
+ defer { litert_lm_engine_settings_delete(s) }
222
+
223
+ litert_lm_engine_settings_set_max_num_tokens(s, Int32(self.maxTokens))
224
+ litert_lm_engine_settings_enable_benchmark(s)
225
+
226
+ if self.enableSpeculativeDecoding {
227
+ if let loadedFile = litert_lm_loaded_file_create((modelPath as NSString).utf8String) {
228
+ let hasMtp = litert_lm_loaded_file_has_speculative_decoding_support(loadedFile)
229
+ litert_lm_loaded_file_delete(loadedFile)
230
+ if hasMtp {
231
+ litert_lm_engine_settings_set_enable_speculative_decoding(s, true)
232
+ }
233
+ }
234
+ }
235
+
236
+ // Cache dir set to parent directory of model path
237
+ let cacheDir = (modelPath as NSString).deletingLastPathComponent
238
+ cacheDir.withCString { cacheC in
239
+ litert_lm_engine_settings_set_cache_dir(s, cacheC)
240
+ }
241
+
242
+ return litert_lm_engine_create(s)
243
+ }
244
+
245
+ // Attempt primary backend configuration
246
+ rawEngine = createEngine(mainBackendStr, visionBackend, audioBackend)
247
+
248
+ // Fallback sequence if GPU/NPU fails to initialize
249
+ if rawEngine == nil && mainBackendStr != "cpu" {
250
+ // Fallback 1: CPU execution with GPU acceleration for heavy Vision parameters
251
+ rawEngine = createEngine("cpu", "gpu", "cpu")
252
+
253
+ if rawEngine == nil {
254
+ // Fallback 2: Full CPU execution for all modalities
255
+ rawEngine = createEngine("cpu", "cpu", "cpu")
256
+ }
257
+
258
+ if rawEngine == nil {
259
+ // Fallback 3: Text-only CPU execution (skip vision executor mapping)
260
+ rawEngine = createEngine("cpu", nil, nil)
261
+ }
262
+
263
+ if rawEngine != nil {
264
+ self.backend = .cpu
265
+ }
266
+ }
267
+
268
+ guard let engine = rawEngine else {
269
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to construct LiteRT-LM engine. Checked backends and fallback chains."]))
270
+ return
271
+ }
272
+
273
+ self.engine = engine
274
+ self.createNewConversation()
275
+
276
+ guard self.conversation != nil else {
277
+ self.closeInternal()
278
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create conversation context."]))
279
+ return
280
+ }
281
+
282
+ self.isLoaded = true
283
+ promise.resolve()
284
+ }
285
+
286
+ return promise
287
+ }
288
+
289
+ public func sendMessage(message: String) throws -> Promise<String> {
290
+ let promise = Promise<String>()
291
+
292
+ queue.async {
293
+ guard let conversation = self.conversation else {
294
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
295
+ return
296
+ }
297
+
298
+ let msgJson = self.buildTextMessageJson(text: message)
299
+ let startTime = Date()
300
+
301
+ // Synchronous FFI call blocks only this interactive queue
302
+ guard let response = litert_lm_conversation_send_message(conversation, msgJson, nil, nil) else {
303
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRT-LM: sendMessage failed"]))
304
+ return
305
+ }
306
+ defer { litert_lm_json_response_delete(response) }
307
+
308
+ var result = ""
309
+ if let responseStr = litert_lm_json_response_get_string(response) {
310
+ result = self.extractTextFromResponse(String(cString: responseStr))
311
+ .trimmingCharacters(in: .whitespacesAndNewlines)
312
+ }
313
+
314
+ let endTime = Date()
315
+ let totalTime = endTime.timeIntervalSince(startTime)
316
+
317
+ var completionTokens = 0.0
318
+ var tokensPerSecond = 0.0
319
+ var ttft = 0.0
320
+
321
+ if let benchInfo = litert_lm_conversation_get_benchmark_info(conversation) {
322
+ let numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo)
323
+ if numDecodeTurns > 0 {
324
+ let lastIdx = numDecodeTurns - 1
325
+ tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx)
326
+ completionTokens = Double(litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx))
327
+ }
328
+ ttft = litert_lm_benchmark_info_get_time_to_first_token(benchInfo)
329
+ litert_lm_benchmark_info_delete(benchInfo)
330
+ }
331
+
332
+ let promptTokens = Double(message.count) / 4.0
333
+ if completionTokens == 0.0 {
334
+ completionTokens = Double(result.count) / 4.0
335
+ }
336
+
337
+ self.lastStats = GenerationStats(
338
+ promptTokens: promptTokens,
339
+ completionTokens: completionTokens,
340
+ totalTokens: promptTokens + completionTokens,
341
+ timeToFirstToken: ttft,
342
+ totalTime: totalTime,
343
+ tokensPerSecond: tokensPerSecond > 0.0 ? tokensPerSecond : (completionTokens / totalTime)
344
+ )
345
+
346
+ self.history.append(Message(role: .user, content: message))
347
+ self.history.append(Message(role: .model, content: result))
348
+
349
+ promise.resolve(withResult: result)
350
+ }
351
+
352
+ return promise
353
+ }
354
+
355
+ public func sendMessageAsync(
356
+ message: String,
357
+ onToken: @escaping (_ token: String, _ done: Bool) -> Void
358
+ ) throws -> Promise<Void> {
359
+ let promise = Promise<Void>()
360
+
361
+ queue.async {
362
+ guard let conversation = self.conversation else {
363
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
364
+ return
365
+ }
366
+
367
+ let msgJson = self.buildTextMessageJson(text: message)
368
+ let startTime = Date()
369
+
370
+ let context = StreamContext(
371
+ userMessage: message,
372
+ startTime: startTime,
373
+ onToken: onToken,
374
+ promise: promise,
375
+ parent: self
376
+ )
377
+
378
+ let callbackData = Unmanaged.passRetained(context).toOpaque()
379
+
380
+ let callback: LiteRtLmStreamCallback = { callbackData, chunk, isFinal, errorMsg in
381
+ guard let callbackData = callbackData else { return }
382
+ let ctx = Unmanaged<StreamContext>.fromOpaque(callbackData).takeUnretainedValue()
383
+
384
+ if let errorMsg = errorMsg {
385
+ let errorStr = String(cString: errorMsg)
386
+ ctx.onToken("Error: \(errorStr)", true)
387
+ ctx.promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: errorStr]))
388
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
389
+ return
390
+ }
391
+
392
+ if isFinal {
393
+ let endTime = Date()
394
+ let totalTime = endTime.timeIntervalSince(ctx.startTime)
395
+
396
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
397
+ var finalCleaned = cleaned.trimmingCharacters(in: .whitespacesAndNewlines)
398
+ if !ctx.userMessage.isEmpty && finalCleaned.hasPrefix(ctx.userMessage) {
399
+ finalCleaned = String(finalCleaned.dropFirst(ctx.userMessage.count))
400
+ .trimmingCharacters(in: .whitespacesAndNewlines)
401
+ }
402
+
403
+ if finalCleaned.count > ctx.lastEmittedLength {
404
+ let startIdx = finalCleaned.index(finalCleaned.startIndex, offsetBy: ctx.lastEmittedLength)
405
+ let remaining = String(finalCleaned[startIdx...])
406
+ ctx.onToken(remaining, false)
407
+ }
408
+ ctx.fullResponse = finalCleaned
409
+
410
+ // This callback fires on an engine-internal thread (the C API
411
+ // returns once the stream *starts*), so commit the shared
412
+ // lastStats/history — and the conversation benchmark read — on
413
+ // the serial engine queue to avoid racing getStats()/getHistory().
414
+ // Resolving inside the same block guarantees JS observes the
415
+ // final turn before the promise settles.
416
+ ctx.parent.queue.async {
417
+ var completionTokens = Double(ctx.tokenCount)
418
+ var tokensPerSecond = 0.0
419
+ var ttft = 0.0
420
+
421
+ if let benchInfo = litert_lm_conversation_get_benchmark_info(ctx.parent.conversation) {
422
+ let numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo)
423
+ if numDecodeTurns > 0 {
424
+ let lastIdx = numDecodeTurns - 1
425
+ tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx)
426
+ completionTokens = Double(litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx))
427
+ }
428
+ ttft = litert_lm_benchmark_info_get_time_to_first_token(benchInfo)
429
+ litert_lm_benchmark_info_delete(benchInfo)
430
+ }
431
+
432
+ let promptTokens = Double(ctx.userMessage.count) / 4.0
433
+ if completionTokens == 0.0 {
434
+ completionTokens = Double(ctx.fullResponse.count) / 4.0
435
+ }
436
+
437
+ ctx.parent.lastStats = GenerationStats(
438
+ promptTokens: promptTokens,
439
+ completionTokens: completionTokens,
440
+ totalTokens: promptTokens + completionTokens,
441
+ timeToFirstToken: ttft,
442
+ totalTime: totalTime,
443
+ tokensPerSecond: tokensPerSecond > 0.0 ? tokensPerSecond : (completionTokens / totalTime)
444
+ )
445
+
446
+ ctx.parent.history.append(Message(role: .user, content: ctx.userMessage))
447
+ ctx.parent.history.append(Message(role: .model, content: ctx.fullResponse))
448
+
449
+ ctx.onToken("", true)
450
+ ctx.promise.resolve()
451
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
452
+ }
453
+ return
454
+ }
455
+
456
+ if let chunk = chunk {
457
+ let token = String(cString: chunk)
458
+ let raw: String
459
+ if token.hasPrefix("{") && token.contains("\"role\"") {
460
+ raw = ctx.parent.extractTextFromResponse(token)
461
+ } else {
462
+ raw = token
463
+ }
464
+
465
+ ctx.rawResponse += raw
466
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
467
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
468
+
469
+ var processed = cleaned
470
+ if !ctx.userMessage.isEmpty && processed.hasPrefix(ctx.userMessage) {
471
+ processed = String(processed.dropFirst(ctx.userMessage.count))
472
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
473
+ }
474
+
475
+ let safeLen = ctx.parent.safeEmitLength(processed)
476
+ if safeLen > ctx.lastEmittedLength {
477
+ let chars = Array(processed)
478
+ let newText = String(chars[ctx.lastEmittedLength..<safeLen])
479
+ ctx.lastEmittedLength = safeLen
480
+ ctx.tokenCount += 1
481
+ ctx.onToken(newText, false)
482
+ }
483
+ }
484
+ }
485
+
486
+ let status = litert_lm_conversation_send_message_stream(
487
+ conversation,
488
+ msgJson,
489
+ nil,
490
+ nil,
491
+ callback,
492
+ callbackData
493
+ )
494
+
495
+ if status != 0 {
496
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
497
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: Int(status), userInfo: [NSLocalizedDescriptionKey: "Failed to start streaming conversation."]))
498
+ }
499
+ }
500
+
501
+ return promise
502
+ }
503
+
504
+ public func sendMessageWithImage(message: String, imagePath: String) throws -> Promise<String> {
505
+ let promise = Promise<String>()
506
+
507
+ queue.async {
508
+ guard let conversation = self.conversation else {
509
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
510
+ return
511
+ }
512
+
513
+ if !FileManager.default.fileExists(atPath: imagePath) {
514
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 404, userInfo: [NSLocalizedDescriptionKey: "Image file not found: \(imagePath)"]))
515
+ return
516
+ }
517
+
518
+ let msgJson = self.buildImageMessageJson(text: message, imagePath: imagePath)
519
+ let startTime = Date()
520
+
521
+ guard let response = litert_lm_conversation_send_message(conversation, msgJson, nil, nil) else {
522
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRT-LM: sendMessageWithImage failed"]))
523
+ return
524
+ }
525
+ defer { litert_lm_json_response_delete(response) }
526
+
527
+ var result = ""
528
+ if let responseStr = litert_lm_json_response_get_string(response) {
529
+ result = self.extractTextFromResponse(String(cString: responseStr))
530
+ .trimmingCharacters(in: .whitespacesAndNewlines)
531
+ }
532
+
533
+ let endTime = Date()
534
+ let totalTime = endTime.timeIntervalSince(startTime)
535
+
536
+ self.lastStats = GenerationStats(
537
+ promptTokens: Double(message.count) / 4.0,
538
+ completionTokens: Double(result.count) / 4.0,
539
+ totalTokens: Double(message.count + result.count) / 4.0,
540
+ timeToFirstToken: 0.0,
541
+ totalTime: totalTime,
542
+ tokensPerSecond: Double(result.count) / 4.0 / totalTime
543
+ )
544
+
545
+ self.history.append(Message(role: .user, content: message + " [image: \(imagePath)]"))
546
+ self.history.append(Message(role: .model, content: result))
547
+
548
+ promise.resolve(withResult: result)
549
+ }
550
+
551
+ return promise
552
+ }
553
+
554
+ public func sendMessageWithImageAsync(message: String, imagePath: String, onToken: @escaping (_ token: String, _ done: Bool) -> Void) throws -> Promise<Void> {
555
+ let promise = Promise<Void>()
556
+
557
+ queue.async {
558
+ guard let conversation = self.conversation else {
559
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
560
+ return
561
+ }
562
+
563
+ if !FileManager.default.fileExists(atPath: imagePath) {
564
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 404, userInfo: [NSLocalizedDescriptionKey: "Image file not found: \(imagePath)"]))
565
+ return
566
+ }
567
+
568
+ let msgJson = self.buildImageMessageJson(text: message, imagePath: imagePath)
569
+ let startTime = Date()
570
+
571
+ let historyUserContent = message + " [image: \(imagePath)]"
572
+ let context = StreamContext(
573
+ userMessage: message,
574
+ startTime: startTime,
575
+ onToken: onToken,
576
+ promise: promise,
577
+ parent: self
578
+ )
579
+
580
+ let callbackData = Unmanaged.passRetained(context).toOpaque()
581
+
582
+ let callback: LiteRtLmStreamCallback = { callbackData, chunk, isFinal, errorMsg in
583
+ guard let callbackData = callbackData else { return }
584
+ let ctx = Unmanaged<StreamContext>.fromOpaque(callbackData).takeUnretainedValue()
585
+
586
+ if let errorMsg = errorMsg {
587
+ let errorStr = String(cString: errorMsg)
588
+ ctx.onToken("Error: \(errorStr)", true)
589
+ ctx.promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: errorStr]))
590
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
591
+ return
592
+ }
593
+
594
+ if isFinal {
595
+ let endTime = Date()
596
+ let totalTime = endTime.timeIntervalSince(ctx.startTime)
597
+
598
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
599
+ var finalCleaned = cleaned.trimmingCharacters(in: .whitespacesAndNewlines)
600
+ if !ctx.userMessage.isEmpty && finalCleaned.hasPrefix(ctx.userMessage) {
601
+ finalCleaned = String(finalCleaned.dropFirst(ctx.userMessage.count))
602
+ .trimmingCharacters(in: .whitespacesAndNewlines)
603
+ }
604
+
605
+ if finalCleaned.count > ctx.lastEmittedLength {
606
+ let startIdx = finalCleaned.index(finalCleaned.startIndex, offsetBy: ctx.lastEmittedLength)
607
+ let remaining = String(finalCleaned[startIdx...])
608
+ ctx.onToken(remaining, false)
609
+ }
610
+ ctx.fullResponse = finalCleaned
611
+
612
+ var completionTokens = Double(ctx.tokenCount)
613
+ var tokensPerSecond = 0.0
614
+ var ttft = 0.0
615
+ if let benchInfo = litert_lm_conversation_get_benchmark_info(ctx.parent.conversation) {
616
+ let numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo)
617
+ if numDecodeTurns > 0 {
618
+ let lastIdx = numDecodeTurns - 1
619
+ tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx)
620
+ completionTokens = Double(litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx))
621
+ }
622
+ ttft = litert_lm_benchmark_info_get_time_to_first_token(benchInfo)
623
+ litert_lm_benchmark_info_delete(benchInfo)
624
+ }
625
+
626
+ let promptTokens = Double(ctx.userMessage.count) / 4.0
627
+ if completionTokens == 0.0 {
628
+ completionTokens = Double(ctx.fullResponse.count) / 4.0
629
+ }
630
+ ctx.parent.lastStats = GenerationStats(
631
+ promptTokens: promptTokens,
632
+ completionTokens: completionTokens,
633
+ totalTokens: promptTokens + completionTokens,
634
+ timeToFirstToken: ttft,
635
+ totalTime: totalTime,
636
+ tokensPerSecond: tokensPerSecond > 0.0 ? tokensPerSecond : (completionTokens / totalTime)
637
+ )
638
+ ctx.parent.history.append(Message(role: .user, content: historyUserContent))
639
+ ctx.parent.history.append(Message(role: .model, content: ctx.fullResponse))
640
+ ctx.onToken("", true)
641
+ ctx.promise.resolve()
642
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
643
+ return
644
+ }
645
+
646
+ if let chunk = chunk {
647
+ let token = String(cString: chunk)
648
+ let raw: String
649
+ if token.hasPrefix("{") && token.contains("\"role\"") {
650
+ raw = ctx.parent.extractTextFromResponse(token)
651
+ } else {
652
+ raw = token
653
+ }
654
+
655
+ ctx.rawResponse += raw
656
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
657
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
658
+
659
+ var processed = cleaned
660
+ if !ctx.userMessage.isEmpty && processed.hasPrefix(ctx.userMessage) {
661
+ processed = String(processed.dropFirst(ctx.userMessage.count))
662
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
663
+ }
664
+
665
+ let safeLen = ctx.parent.safeEmitLength(processed)
666
+ if safeLen > ctx.lastEmittedLength {
667
+ let chars = Array(processed)
668
+ let newText = String(chars[ctx.lastEmittedLength..<safeLen])
669
+ ctx.lastEmittedLength = safeLen
670
+ ctx.tokenCount += 1
671
+ ctx.onToken(newText, false)
672
+ }
673
+ }
674
+ }
675
+
676
+ let status = litert_lm_conversation_send_message_stream(
677
+ conversation,
678
+ msgJson,
679
+ nil,
680
+ nil,
681
+ callback,
682
+ callbackData
683
+ )
684
+ if status != 0 {
685
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
686
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: Int(status), userInfo: [NSLocalizedDescriptionKey: "Failed to start streaming conversation."]))
687
+ }
688
+ }
689
+
690
+ return promise
691
+ }
692
+
693
+ public func sendMessageWithAudioAsync(message: String, audioPath: String, onToken: @escaping (_ token: String, _ done: Bool) -> Void) throws -> Promise<Void> {
694
+ let promise = Promise<Void>()
695
+
696
+ queue.async {
697
+ guard let conversation = self.conversation else {
698
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
699
+ return
700
+ }
701
+
702
+ if !FileManager.default.fileExists(atPath: audioPath) {
703
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 404, userInfo: [NSLocalizedDescriptionKey: "Audio file not found: \(audioPath)"]))
704
+ return
705
+ }
706
+
707
+ let msgJson = self.buildAudioMessageJson(text: message, audioPath: audioPath)
708
+ let startTime = Date()
709
+
710
+ let historyUserContent = message + " [audio: \(audioPath)]"
711
+ let context = StreamContext(
712
+ userMessage: message,
713
+ startTime: startTime,
714
+ onToken: onToken,
715
+ promise: promise,
716
+ parent: self
717
+ )
718
+
719
+ let callbackData = Unmanaged.passRetained(context).toOpaque()
720
+
721
+ let callback: LiteRtLmStreamCallback = { callbackData, chunk, isFinal, errorMsg in
722
+ guard let callbackData = callbackData else { return }
723
+ let ctx = Unmanaged<StreamContext>.fromOpaque(callbackData).takeUnretainedValue()
724
+
725
+ if let errorMsg = errorMsg {
726
+ let errorStr = String(cString: errorMsg)
727
+ ctx.onToken("Error: \(errorStr)", true)
728
+ ctx.promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: errorStr]))
729
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
730
+ return
731
+ }
732
+
733
+ if isFinal {
734
+ let endTime = Date()
735
+ let totalTime = endTime.timeIntervalSince(ctx.startTime)
736
+
737
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
738
+ var finalCleaned = cleaned.trimmingCharacters(in: .whitespacesAndNewlines)
739
+ if !ctx.userMessage.isEmpty && finalCleaned.hasPrefix(ctx.userMessage) {
740
+ finalCleaned = String(finalCleaned.dropFirst(ctx.userMessage.count))
741
+ .trimmingCharacters(in: .whitespacesAndNewlines)
742
+ }
743
+
744
+ if finalCleaned.count > ctx.lastEmittedLength {
745
+ let startIdx = finalCleaned.index(finalCleaned.startIndex, offsetBy: ctx.lastEmittedLength)
746
+ let remaining = String(finalCleaned[startIdx...])
747
+ ctx.onToken(remaining, false)
748
+ }
749
+ ctx.fullResponse = finalCleaned
750
+
751
+ var completionTokens = Double(ctx.tokenCount)
752
+ var tokensPerSecond = 0.0
753
+ var ttft = 0.0
754
+ if let benchInfo = litert_lm_conversation_get_benchmark_info(ctx.parent.conversation) {
755
+ let numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo)
756
+ if numDecodeTurns > 0 {
757
+ let lastIdx = numDecodeTurns - 1
758
+ tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx)
759
+ completionTokens = Double(litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx))
760
+ }
761
+ ttft = litert_lm_benchmark_info_get_time_to_first_token(benchInfo)
762
+ litert_lm_benchmark_info_delete(benchInfo)
763
+ }
764
+
765
+ let promptTokens = Double(ctx.userMessage.count) / 4.0
766
+ if completionTokens == 0.0 {
767
+ completionTokens = Double(ctx.fullResponse.count) / 4.0
768
+ }
769
+ ctx.parent.lastStats = GenerationStats(
770
+ promptTokens: promptTokens,
771
+ completionTokens: completionTokens,
772
+ totalTokens: promptTokens + completionTokens,
773
+ timeToFirstToken: ttft,
774
+ totalTime: totalTime,
775
+ tokensPerSecond: tokensPerSecond > 0.0 ? tokensPerSecond : (completionTokens / totalTime)
776
+ )
777
+ ctx.parent.history.append(Message(role: .user, content: historyUserContent))
778
+ ctx.parent.history.append(Message(role: .model, content: ctx.fullResponse))
779
+ ctx.onToken("", true)
780
+ ctx.promise.resolve()
781
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
782
+ return
783
+ }
784
+
785
+ if let chunk = chunk {
786
+ let token = String(cString: chunk)
787
+ let raw: String
788
+ if token.hasPrefix("{") && token.contains("\"role\"") {
789
+ raw = ctx.parent.extractTextFromResponse(token)
790
+ } else {
791
+ raw = token
792
+ }
793
+
794
+ ctx.rawResponse += raw
795
+ let cleaned = ctx.parent.stripControlTokens(ctx.rawResponse)
796
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
797
+
798
+ var processed = cleaned
799
+ if !ctx.userMessage.isEmpty && processed.hasPrefix(ctx.userMessage) {
800
+ processed = String(processed.dropFirst(ctx.userMessage.count))
801
+ .trimmingLeadingCharacters(in: .whitespacesAndNewlines)
802
+ }
803
+
804
+ let safeLen = ctx.parent.safeEmitLength(processed)
805
+ if safeLen > ctx.lastEmittedLength {
806
+ let chars = Array(processed)
807
+ let newText = String(chars[ctx.lastEmittedLength..<safeLen])
808
+ ctx.lastEmittedLength = safeLen
809
+ ctx.tokenCount += 1
810
+ ctx.onToken(newText, false)
811
+ }
812
+ }
813
+ }
814
+
815
+ let status = litert_lm_conversation_send_message_stream(
816
+ conversation,
817
+ msgJson,
818
+ nil,
819
+ nil,
820
+ callback,
821
+ callbackData
822
+ )
823
+ if status != 0 {
824
+ Unmanaged<StreamContext>.fromOpaque(callbackData).release()
825
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: Int(status), userInfo: [NSLocalizedDescriptionKey: "Failed to start streaming conversation."]))
826
+ }
827
+ }
828
+
829
+ return promise
830
+ }
831
+
832
+ public func sendMessageWithAudio(message: String, audioPath: String) throws -> Promise<String> {
833
+ let promise = Promise<String>()
834
+
835
+ queue.async {
836
+ guard let conversation = self.conversation else {
837
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
838
+ return
839
+ }
840
+
841
+ if !FileManager.default.fileExists(atPath: audioPath) {
842
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 404, userInfo: [NSLocalizedDescriptionKey: "Audio file not found: \(audioPath)"]))
843
+ return
844
+ }
845
+
846
+ let msgJson = self.buildAudioMessageJson(text: message, audioPath: audioPath)
847
+ let startTime = Date()
848
+
849
+ guard let response = litert_lm_conversation_send_message(conversation, msgJson, nil, nil) else {
850
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRT-LM: sendMessageWithAudio failed"]))
851
+ return
852
+ }
853
+ defer { litert_lm_json_response_delete(response) }
854
+
855
+ var result = ""
856
+ if let responseStr = litert_lm_json_response_get_string(response) {
857
+ result = self.extractTextFromResponse(String(cString: responseStr))
858
+ .trimmingCharacters(in: .whitespacesAndNewlines)
859
+ }
860
+
861
+ let endTime = Date()
862
+ let totalTime = endTime.timeIntervalSince(startTime)
863
+
864
+ self.lastStats = GenerationStats(
865
+ promptTokens: Double(message.count) / 4.0,
866
+ completionTokens: Double(result.count) / 4.0,
867
+ totalTokens: Double(message.count + result.count) / 4.0,
868
+ timeToFirstToken: 0.0,
869
+ totalTime: totalTime,
870
+ tokensPerSecond: Double(result.count) / 4.0 / totalTime
871
+ )
872
+
873
+ self.history.append(Message(role: .user, content: message + " [audio: \(audioPath)]"))
874
+ self.history.append(Message(role: .model, content: result))
875
+
876
+ promise.resolve(withResult: result)
877
+ }
878
+
879
+ return promise
880
+ }
881
+
882
+ public func sendMultimodalMessage(parts: [MultimodalPart]) throws -> Promise<String> {
883
+ let promise = Promise<String>()
884
+
885
+ queue.async {
886
+ guard let engine = self.engine else {
887
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: No model loaded. Call loadModel() first."]))
888
+ return
889
+ }
890
+
891
+ // Create session config
892
+ guard let sessionConfig = litert_lm_session_config_create() else {
893
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: Failed to create session config."]))
894
+ return
895
+ }
896
+ defer { litert_lm_session_config_delete(sessionConfig) }
897
+
898
+ litert_lm_session_config_set_max_output_tokens(sessionConfig, Int32(self.maxTokens))
899
+
900
+ var sampler = LiteRtLmSamplerParams()
901
+ sampler.type = kLiteRtLmSamplerTypeTopP
902
+ sampler.top_k = Int32(self.topK)
903
+ sampler.top_p = Float(self.topP)
904
+ sampler.temperature = Float(self.temperature)
905
+ sampler.seed = 0
906
+ withUnsafePointer(to: &sampler) { samplerPtr in
907
+ litert_lm_session_config_set_sampler_params(sessionConfig, samplerPtr)
908
+ }
909
+
910
+ guard let session = litert_lm_engine_create_session(engine, sessionConfig) else {
911
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: Failed to create session."]))
912
+ return
913
+ }
914
+ defer { litert_lm_session_delete(session) }
915
+
916
+ // Construct inputs array
917
+ var inputs: [LiteRtLmInputData] = []
918
+ var allocatedStrings: [UnsafeMutablePointer<CChar>] = []
919
+
920
+ defer {
921
+ for ptr in allocatedStrings {
922
+ free(ptr)
923
+ }
924
+ }
925
+
926
+ for part in parts {
927
+ switch part.type {
928
+ case .text:
929
+ if let text = part.text {
930
+ let cStr = strdup(text)!
931
+ allocatedStrings.append(cStr)
932
+ inputs.append(LiteRtLmInputData(type: kLiteRtLmInputDataTypeText, data: cStr, size: text.utf8.count))
933
+ }
934
+ case .image:
935
+ if let imageBuffer = part.imageBuffer {
936
+ inputs.append(LiteRtLmInputData(type: kLiteRtLmInputDataTypeImage, data: imageBuffer.data, size: imageBuffer.size))
937
+ }
938
+ case .audio:
939
+ if let audioBuffer = part.audioBuffer {
940
+ inputs.append(LiteRtLmInputData(type: kLiteRtLmInputDataTypeAudio, data: audioBuffer.data, size: audioBuffer.size))
941
+ }
942
+ }
943
+ }
944
+
945
+ let startTime = Date()
946
+
947
+ // Run session inference
948
+ guard let responses = litert_lm_session_generate_content(session, inputs, inputs.count) else {
949
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "LiteRTLM: Session generate content failed."]))
950
+ return
951
+ }
952
+ defer { litert_lm_responses_delete(responses) }
953
+
954
+ var result = ""
955
+ let numCandidates = litert_lm_responses_get_num_candidates(responses)
956
+ if numCandidates > 0 {
957
+ if let responseStr = litert_lm_responses_get_response_text_at(responses, 0) {
958
+ result = String(cString: responseStr).trimmingCharacters(in: .whitespacesAndNewlines)
959
+ }
960
+ }
961
+
962
+ let endTime = Date()
963
+ let totalTime = endTime.timeIntervalSince(startTime)
964
+
965
+ // Update last stats using benchmark info from session
966
+ var completionTokens = 0.0
967
+ var tokensPerSecond = 0.0
968
+ var ttft = 0.0
969
+
970
+ if let benchInfo = litert_lm_session_get_benchmark_info(session) {
971
+ let numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo)
972
+ if numDecodeTurns > 0 {
973
+ let lastIdx = numDecodeTurns - 1
974
+ tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx)
975
+ completionTokens = Double(litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx))
976
+ }
977
+ ttft = litert_lm_benchmark_info_get_time_to_first_token(benchInfo)
978
+ litert_lm_benchmark_info_delete(benchInfo)
979
+ }
980
+
981
+ let totalInputLen = parts.reduce(0) { $0 + ($1.text?.count ?? 0) }
982
+ let promptTokens = Double(totalInputLen) / 4.0
983
+ if completionTokens == 0.0 {
984
+ completionTokens = Double(result.count) / 4.0
985
+ }
986
+
987
+ self.lastStats = GenerationStats(
988
+ promptTokens: promptTokens,
989
+ completionTokens: completionTokens,
990
+ totalTokens: promptTokens + completionTokens,
991
+ timeToFirstToken: ttft,
992
+ totalTime: totalTime,
993
+ tokensPerSecond: tokensPerSecond > 0.0 ? tokensPerSecond : (completionTokens / totalTime)
994
+ )
995
+
996
+ // Append to history
997
+ var userTextRepresentation = ""
998
+ for part in parts {
999
+ if part.type == .text, let text = part.text {
1000
+ userTextRepresentation += text + " "
1001
+ } else if part.type == .image {
1002
+ userTextRepresentation += "[Image Buffer] "
1003
+ } else if part.type == .audio {
1004
+ userTextRepresentation += "[Audio Buffer] "
1005
+ }
1006
+ }
1007
+ userTextRepresentation = userTextRepresentation.trimmingCharacters(in: .whitespacesAndNewlines)
1008
+
1009
+ self.history.append(Message(role: .user, content: userTextRepresentation))
1010
+ self.history.append(Message(role: .model, content: result))
1011
+
1012
+ promise.resolve(withResult: result)
1013
+ }
1014
+
1015
+ return promise
1016
+ }
1017
+
1018
+ public func downloadModel(
1019
+ url: String,
1020
+ fileName: String,
1021
+ onProgress: ((Double) -> Void)?
1022
+ ) throws -> Promise<String> {
1023
+ let promise = Promise<String>()
1024
+
1025
+ queue.async {
1026
+ do {
1027
+ if fileName.contains("..") || fileName.contains("/") || fileName.contains("\\") {
1028
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "Invalid filename: path traversal or directory separators are not allowed."]))
1029
+ return
1030
+ }
1031
+
1032
+ let cachesDir = NSSearchPathForDirectoriesInDomains(.cachesDirectory, .userDomainMask, true).first ?? NSTemporaryDirectory()
1033
+ let modelsDir = (cachesDir as NSString).appendingPathComponent("litert_models")
1034
+
1035
+ let fileManager = FileManager.default
1036
+ if !fileManager.fileExists(atPath: modelsDir) {
1037
+ try fileManager.createDirectory(atPath: modelsDir, withIntermediateDirectories: true, attributes: nil)
1038
+ }
1039
+
1040
+ let destPath = (modelsDir as NSString).appendingPathComponent(fileName)
1041
+
1042
+ // Fast cache check
1043
+ if fileManager.fileExists(atPath: destPath) {
1044
+ let attrs = try fileManager.attributesOfItem(atPath: destPath)
1045
+ if let fileSize = attrs[.size] as? UInt64, fileSize > 0 {
1046
+ onProgress?(1.0)
1047
+ promise.resolve(withResult: destPath)
1048
+ return
1049
+ }
1050
+ }
1051
+
1052
+ guard let downloadUrl = URL(string: url), downloadUrl.scheme?.lowercased() == "https" else {
1053
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "Invalid download URL: HTTPS is required for security."]))
1054
+ return
1055
+ }
1056
+
1057
+ onProgress?(0.0)
1058
+
1059
+ let sessionConfig = URLSessionConfiguration.default
1060
+ sessionConfig.timeoutIntervalForRequest = 30
1061
+ sessionConfig.timeoutIntervalForResource = 3600
1062
+
1063
+ let session = URLSession(configuration: sessionConfig)
1064
+ var progressHandler: NSKeyValueObservation?
1065
+
1066
+ let task = session.downloadTask(with: downloadUrl) { location, response, error in
1067
+ progressHandler?.invalidate()
1068
+
1069
+ if let error = error {
1070
+ promise.reject(withError: error)
1071
+ return
1072
+ }
1073
+
1074
+ if let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode >= 400 {
1075
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: httpResponse.statusCode, userInfo: [NSLocalizedDescriptionKey: "HTTP \(httpResponse.statusCode)"]))
1076
+ return
1077
+ }
1078
+
1079
+ guard let location = location else {
1080
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 500, userInfo: [NSLocalizedDescriptionKey: "No download location found."]))
1081
+ return
1082
+ }
1083
+
1084
+ do {
1085
+ if fileManager.fileExists(atPath: destPath) {
1086
+ try fileManager.removeItem(atPath: destPath)
1087
+ }
1088
+ try fileManager.moveItem(at: location, to: URL(fileURLWithPath: destPath))
1089
+ onProgress?(1.0)
1090
+ promise.resolve(withResult: destPath)
1091
+ } catch {
1092
+ promise.reject(withError: error)
1093
+ }
1094
+ }
1095
+
1096
+ if let onProgress = onProgress {
1097
+ var lastUpdate = Date()
1098
+ progressHandler = task.observe(\.countOfBytesReceived, options: [.new]) { task, _ in
1099
+ let expected = task.countOfBytesExpectedToReceive
1100
+ if expected > 0 {
1101
+ let now = Date()
1102
+ // Throttled progress notifications to 10Hz
1103
+ if now.timeIntervalSince(lastUpdate) > 0.1 {
1104
+ let progress = Double(task.countOfBytesReceived) / Double(expected)
1105
+ onProgress(progress)
1106
+ lastUpdate = now
1107
+ }
1108
+ }
1109
+ }
1110
+ }
1111
+
1112
+ task.resume()
1113
+ session.finishTasksAndInvalidate()
1114
+ } catch {
1115
+ promise.reject(withError: error)
1116
+ }
1117
+ }
1118
+
1119
+ return promise
1120
+ }
1121
+
1122
+ public func deleteModel(fileName: String) throws -> Promise<Void> {
1123
+ let promise = Promise<Void>()
1124
+
1125
+ queue.async {
1126
+ do {
1127
+ if fileName.contains("..") || fileName.contains("/") || fileName.contains("\\") {
1128
+ promise.reject(withError: NSError(domain: "LiteRTLM", code: 400, userInfo: [NSLocalizedDescriptionKey: "Invalid filename: path traversal or directory separators are not allowed."]))
1129
+ return
1130
+ }
1131
+
1132
+ let cachesDir = NSSearchPathForDirectoriesInDomains(.cachesDirectory, .userDomainMask, true).first ?? NSTemporaryDirectory()
1133
+ let modelsDir = (cachesDir as NSString).appendingPathComponent("litert_models")
1134
+ let destPath = (modelsDir as NSString).appendingPathComponent(fileName)
1135
+
1136
+ let fileManager = FileManager.default
1137
+ if fileManager.fileExists(atPath: destPath) {
1138
+ try fileManager.removeItem(atPath: destPath)
1139
+ if self.isLoaded {
1140
+ self.closeInternal()
1141
+ }
1142
+ }
1143
+ promise.resolve()
1144
+ } catch {
1145
+ promise.reject(withError: error)
1146
+ }
1147
+ }
1148
+
1149
+ return promise
1150
+ }
1151
+
1152
+ // MARK: - Internal Engine Helpers
1153
+
1154
+ private func createNewConversation() {
1155
+ guard let engine = self.engine else { return }
1156
+
1157
+ if let oldConv = self.conversation {
1158
+ litert_lm_conversation_delete(oldConv)
1159
+ self.conversation = nil
1160
+ }
1161
+
1162
+ guard let convConfig = litert_lm_conversation_config_create() else { return }
1163
+ defer { litert_lm_conversation_config_delete(convConfig) }
1164
+
1165
+ guard let sessionConfig = litert_lm_session_config_create() else { return }
1166
+ defer { litert_lm_session_config_delete(sessionConfig) }
1167
+
1168
+ litert_lm_session_config_set_max_output_tokens(sessionConfig, Int32(self.maxTokens))
1169
+
1170
+ var sampler = LiteRtLmSamplerParams()
1171
+ sampler.type = kLiteRtLmSamplerTypeTopP
1172
+ sampler.top_k = Int32(self.topK)
1173
+ sampler.top_p = Float(self.topP)
1174
+ sampler.temperature = Float(self.temperature)
1175
+ sampler.seed = 0
1176
+ withUnsafePointer(to: &sampler) { samplerPtr in
1177
+ litert_lm_session_config_set_sampler_params(sessionConfig, samplerPtr)
1178
+ }
1179
+
1180
+ litert_lm_conversation_config_set_session_config(convConfig, sessionConfig)
1181
+
1182
+ if let systemPrompt = self.systemPrompt {
1183
+ let systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt) + "\"}"
1184
+ systemMsgJson.withCString { systemMsgC in
1185
+ litert_lm_conversation_config_set_system_message(convConfig, systemMsgC)
1186
+ }
1187
+ }
1188
+
1189
+ if let tools = self.tools, !tools.isEmpty {
1190
+ var toolsArray: [[String: Any]] = []
1191
+ for tool in tools {
1192
+ var functionMap: [String: Any] = ["name": tool.name, "description": tool.description]
1193
+ if let data = tool.parametersJson.data(using: .utf8),
1194
+ let parsedParams = try? JSONSerialization.jsonObject(with: data, options: []) {
1195
+ functionMap["parameters"] = parsedParams
1196
+ }
1197
+ toolsArray.append(["type": "function", "function": functionMap])
1198
+ }
1199
+ if let data = try? JSONSerialization.data(withJSONObject: toolsArray, options: []),
1200
+ let jsonString = String(data: data, encoding: .utf8) {
1201
+ jsonString.withCString { toolsC in
1202
+ litert_lm_conversation_config_set_tools(convConfig, toolsC)
1203
+ }
1204
+ }
1205
+ }
1206
+
1207
+ self.conversation = litert_lm_conversation_create(engine, convConfig)
1208
+ }
1209
+
1210
+ private func closeInternal() {
1211
+ isLoaded = false
1212
+ history.removeAll()
1213
+
1214
+ if let conversation = self.conversation {
1215
+ litert_lm_conversation_delete(conversation)
1216
+ self.conversation = nil
1217
+ }
1218
+ if let engine = self.engine {
1219
+ litert_lm_engine_delete(engine)
1220
+ self.engine = nil
1221
+ }
1222
+
1223
+ lastStats = GenerationStats(
1224
+ promptTokens: 0.0,
1225
+ completionTokens: 0.0,
1226
+ totalTokens: 0.0,
1227
+ timeToFirstToken: 0.0,
1228
+ totalTime: 0.0,
1229
+ tokensPerSecond: 0.0
1230
+ )
1231
+ }
1232
+
1233
+ // MARK: - String and JSON Preprocessing Helpers
1234
+
1235
+ private let kControlTokens = [
1236
+ "<end_of_turn>",
1237
+ "<start_of_turn>model",
1238
+ "<start_of_turn>user",
1239
+ "<start_of_turn>",
1240
+ "<eos>"
1241
+ ]
1242
+
1243
+ private func escapeJson(_ input: String) -> String {
1244
+ var output = ""
1245
+ for char in input {
1246
+ switch char {
1247
+ case "\"": output += "\\\""
1248
+ case "\\": output += "\\\\"
1249
+ case "\n": output += "\\n"
1250
+ case "\r": output += "\\r"
1251
+ case "\t": output += "\\t"
1252
+ case "\u{0008}": output += "\\b"
1253
+ case "\u{000c}": output += "\\f"
1254
+ default: output.append(char)
1255
+ }
1256
+ }
1257
+ return output
1258
+ }
1259
+
1260
+ private func buildTextMessageJson(text: String) -> String {
1261
+ return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}"
1262
+ }
1263
+
1264
+ private func buildImageMessageJson(text: String, imagePath: String) -> String {
1265
+ return "{\"role\":\"user\",\"content\":[" +
1266
+ "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"}," +
1267
+ "{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}" +
1268
+ "]}"
1269
+ }
1270
+
1271
+ private func buildAudioMessageJson(text: String, audioPath: String) -> String {
1272
+ return "{\"role\":\"user\",\"content\":[" +
1273
+ "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"}," +
1274
+ "{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}" +
1275
+ "]}"
1276
+ }
1277
+
1278
+ private func stripControlTokens(_ text: String) -> String {
1279
+ var result = text
1280
+ for tok in kControlTokens {
1281
+ result = result.replacingOccurrences(of: tok, with: "")
1282
+ }
1283
+ return result
1284
+ }
1285
+
1286
+ private func safeEmitLength(_ text: String) -> Int {
1287
+ let chars = Array(text)
1288
+ guard let lastAngleIdx = chars.lastIndex(of: "<") else {
1289
+ return chars.count
1290
+ }
1291
+ let suffix = String(chars[lastAngleIdx...])
1292
+ for tok in kControlTokens {
1293
+ if tok.hasPrefix(suffix) && suffix.count < tok.count {
1294
+ return lastAngleIdx
1295
+ }
1296
+ }
1297
+ return chars.count
1298
+ }
1299
+
1300
+ private func extractTextFromResponse(_ jsonResponse: String) -> String {
1301
+ guard let data = jsonResponse.data(using: .utf8) else {
1302
+ return stripControlTokens(jsonResponse)
1303
+ }
1304
+ do {
1305
+ if let json = try JSONSerialization.jsonObject(with: data, options: []) as? [String: Any] {
1306
+ if let content = json["content"] {
1307
+ if let contentString = content as? String {
1308
+ return stripControlTokens(contentString)
1309
+ } else if let contentArray = content as? [[String: Any]] {
1310
+ var textResult = ""
1311
+ for part in contentArray {
1312
+ if let type = part["type"] as? String, type == "text", let text = part["text"] as? String {
1313
+ textResult += text
1314
+ }
1315
+ }
1316
+ return stripControlTokens(textResult)
1317
+ }
1318
+ }
1319
+ }
1320
+ } catch {}
1321
+ return stripControlTokens(jsonResponse)
1322
+ }
1323
+
1324
+ private func withOptionalCString<R>(_ string: String?, _ block: (UnsafePointer<CChar>?) -> R) -> R {
1325
+ if let string = string {
1326
+ return string.withCString { block($0) }
1327
+ } else {
1328
+ return block(nil)
1329
+ }
1330
+ }
1331
+ }
1332
+
1333
+ // MARK: - String Trimming Extension
1334
+
1335
+ private extension String {
1336
+ func trimmingLeadingCharacters(in characterSet: CharacterSet) -> String {
1337
+ guard let index = firstIndex(where: { char in
1338
+ !char.unicodeScalars.allSatisfy { characterSet.contains($0) }
1339
+ }) else {
1340
+ return ""
1341
+ }
1342
+ return String(self[index...])
1343
+ }
1344
+ }