@inferrlm/react-native-mlx 0.4.2-alpha.2 → 0.4.2-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/Sources/HybridLLM.swift +155 -25
- package/package.json +1 -1
|
@@ -230,10 +230,6 @@ class HybridLLM: HybridLLMSpec {
|
|
|
230
230
|
}
|
|
231
231
|
|
|
232
232
|
return Promise.async { [self] in
|
|
233
|
-
if self.manageHistory {
|
|
234
|
-
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
235
|
-
}
|
|
236
|
-
|
|
237
233
|
let task = Task<String, Error> {
|
|
238
234
|
log("Generating response for: \(prompt.prefix(50))...")
|
|
239
235
|
let result = try await session.respond(to: prompt)
|
|
@@ -247,6 +243,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
247
243
|
let result = try await task.value
|
|
248
244
|
|
|
249
245
|
if self.manageHistory {
|
|
246
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
250
247
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
251
248
|
}
|
|
252
249
|
|
|
@@ -266,15 +263,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
266
263
|
}
|
|
267
264
|
|
|
268
265
|
return Promise.async { [self] in
|
|
269
|
-
if self.manageHistory {
|
|
270
|
-
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
271
|
-
}
|
|
272
|
-
|
|
273
266
|
let task = Task<String, Error> {
|
|
274
267
|
let startTime = Date()
|
|
275
268
|
var firstTokenTime: Date?
|
|
276
269
|
var tokenCount = 0
|
|
277
270
|
|
|
271
|
+
log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
|
|
272
|
+
log("stream_prompt: \(prompt.prefix(300))")
|
|
273
|
+
|
|
278
274
|
let result = try await self.performGeneration(
|
|
279
275
|
container: container,
|
|
280
276
|
prompt: prompt,
|
|
@@ -303,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
|
|
|
303
299
|
toolExecutionTime: 0
|
|
304
300
|
)
|
|
305
301
|
|
|
306
|
-
log("
|
|
302
|
+
log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
|
|
303
|
+
log("stream_result_preview: \(result.prefix(300))")
|
|
307
304
|
return result
|
|
308
305
|
}
|
|
309
306
|
|
|
@@ -313,7 +310,9 @@ class HybridLLM: HybridLLMSpec {
|
|
|
313
310
|
let result = try await task.value
|
|
314
311
|
|
|
315
312
|
if self.manageHistory {
|
|
313
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
316
314
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
315
|
+
log("stream_history_updated count=\(self.messageHistory.count)")
|
|
317
316
|
}
|
|
318
317
|
|
|
319
318
|
return result
|
|
@@ -329,10 +328,6 @@ class HybridLLM: HybridLLMSpec {
|
|
|
329
328
|
}
|
|
330
329
|
|
|
331
330
|
return Promise.async { [self] in
|
|
332
|
-
if self.manageHistory {
|
|
333
|
-
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
334
|
-
}
|
|
335
|
-
|
|
336
331
|
let task = Task<String, Error> {
|
|
337
332
|
let startTime = Date()
|
|
338
333
|
var firstTokenTime: Date?
|
|
@@ -389,6 +384,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
389
384
|
let result = try await task.value
|
|
390
385
|
|
|
391
386
|
if self.manageHistory {
|
|
387
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
392
388
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
393
389
|
}
|
|
394
390
|
|
|
@@ -403,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
403
399
|
) -> [Chat.Message] {
|
|
404
400
|
var chat: [Chat.Message] = []
|
|
405
401
|
|
|
402
|
+
log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
|
|
403
|
+
|
|
406
404
|
if !self.systemPrompt.isEmpty {
|
|
407
405
|
chat.append(.system(self.systemPrompt))
|
|
406
|
+
log(" [system] \(self.systemPrompt.prefix(80))...")
|
|
408
407
|
}
|
|
409
408
|
|
|
410
|
-
for msg in self.messageHistory {
|
|
409
|
+
for (i, msg) in self.messageHistory.enumerated() {
|
|
411
410
|
switch msg.role {
|
|
412
411
|
case "user": chat.append(.user(msg.content))
|
|
413
412
|
case "assistant": chat.append(.assistant(msg.content))
|
|
@@ -415,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
|
|
|
415
414
|
case "tool": chat.append(.tool(msg.content))
|
|
416
415
|
default: break
|
|
417
416
|
}
|
|
417
|
+
log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
|
|
418
418
|
}
|
|
419
419
|
|
|
420
420
|
if depth == 0 {
|
|
421
421
|
chat.append(.user(prompt))
|
|
422
|
+
log(" [prompt] \(prompt.prefix(200))")
|
|
422
423
|
}
|
|
423
424
|
|
|
424
425
|
if let toolResults {
|
|
425
|
-
for result in toolResults {
|
|
426
|
+
for (i, result) in toolResults.enumerated() {
|
|
426
427
|
chat.append(.tool(result))
|
|
428
|
+
log(" [tool_result_\(i)] \(result.prefix(100))")
|
|
427
429
|
}
|
|
428
430
|
}
|
|
429
431
|
|
|
432
|
+
log("chat_built total=\(chat.count) messages")
|
|
430
433
|
return chat
|
|
431
434
|
}
|
|
432
435
|
|
|
@@ -460,6 +463,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
460
463
|
var output = ""
|
|
461
464
|
var thinkingMachine = ThinkingStateMachine()
|
|
462
465
|
var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
466
|
+
var rawTokenLog = ""
|
|
467
|
+
|
|
468
|
+
let specialTokenPattern = try? NSRegularExpression(
|
|
469
|
+
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
470
|
+
options: []
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
463
474
|
|
|
464
475
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
465
476
|
let userInput = UserInput(
|
|
@@ -468,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
468
479
|
)
|
|
469
480
|
|
|
470
481
|
let lmInput = try await container.prepare(input: userInput)
|
|
482
|
+
log("perform_gen_events input_prepared")
|
|
471
483
|
|
|
472
484
|
let stream = try await container.perform { context in
|
|
473
485
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -478,30 +490,57 @@ class HybridLLM: HybridLLMSpec {
|
|
|
478
490
|
)
|
|
479
491
|
}
|
|
480
492
|
|
|
493
|
+
var chunkCount = 0
|
|
481
494
|
for await generation in stream {
|
|
482
|
-
if Task.isCancelled {
|
|
495
|
+
if Task.isCancelled {
|
|
496
|
+
log("perform_gen_events cancelled at chunk=\(chunkCount)")
|
|
497
|
+
break
|
|
498
|
+
}
|
|
483
499
|
|
|
484
500
|
switch generation {
|
|
485
501
|
case .chunk(let text):
|
|
502
|
+
chunkCount += 1
|
|
503
|
+
rawTokenLog += text
|
|
504
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
505
|
+
log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
|
|
506
|
+
}
|
|
507
|
+
|
|
486
508
|
let outputs = thinkingMachine.process(token: text)
|
|
487
509
|
|
|
488
510
|
for machineOutput in outputs {
|
|
489
511
|
switch machineOutput {
|
|
490
512
|
case .token(let token):
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
513
|
+
var cleaned = token
|
|
514
|
+
if let regex = specialTokenPattern {
|
|
515
|
+
let before = cleaned
|
|
516
|
+
cleaned = regex.stringByReplacingMatches(
|
|
517
|
+
in: cleaned,
|
|
518
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
519
|
+
withTemplate: ""
|
|
520
|
+
)
|
|
521
|
+
if before != cleaned {
|
|
522
|
+
log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
if !cleaned.isEmpty {
|
|
526
|
+
output += cleaned
|
|
527
|
+
emitter.emitToken(cleaned)
|
|
528
|
+
onTokenProcessed()
|
|
529
|
+
}
|
|
494
530
|
|
|
495
531
|
case .thinkingStart:
|
|
532
|
+
log("thinking_start_events at chunk=\(chunkCount)")
|
|
496
533
|
emitter.emitThinkingStart()
|
|
497
534
|
|
|
498
535
|
case .thinkingChunk(let chunk):
|
|
499
536
|
emitter.emitThinkingChunk(chunk)
|
|
500
537
|
|
|
501
538
|
case .thinkingEnd(let content):
|
|
539
|
+
log("thinking_end_events at chunk=\(chunkCount)")
|
|
502
540
|
emitter.emitThinkingEnd(content)
|
|
503
541
|
}
|
|
504
542
|
}
|
|
543
|
+
}
|
|
505
544
|
|
|
506
545
|
case .toolCall(let toolCall):
|
|
507
546
|
log("Tool call detected: \(toolCall.function.name)")
|
|
@@ -519,12 +558,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
519
558
|
pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
|
|
520
559
|
|
|
521
560
|
case .info(let info):
|
|
522
|
-
log("
|
|
561
|
+
log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
523
562
|
let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
|
|
524
563
|
onGenerationInfo(info.generationTokenCount, generationTime)
|
|
525
564
|
}
|
|
526
565
|
}
|
|
527
566
|
|
|
567
|
+
log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
568
|
+
log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
|
|
569
|
+
|
|
528
570
|
let flushOutputs = thinkingMachine.flush()
|
|
529
571
|
for machineOutput in flushOutputs {
|
|
530
572
|
switch machineOutput {
|
|
@@ -614,7 +656,16 @@ class HybridLLM: HybridLLMSpec {
|
|
|
614
656
|
}
|
|
615
657
|
|
|
616
658
|
var output = ""
|
|
659
|
+
var thinkingMachine = ThinkingStateMachine()
|
|
617
660
|
var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
661
|
+
var rawTokenLog = ""
|
|
662
|
+
|
|
663
|
+
let specialTokenPattern = try? NSRegularExpression(
|
|
664
|
+
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
665
|
+
options: []
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
618
669
|
|
|
619
670
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
620
671
|
let userInput = UserInput(
|
|
@@ -623,6 +674,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
623
674
|
)
|
|
624
675
|
|
|
625
676
|
let lmInput = try await container.prepare(input: userInput)
|
|
677
|
+
log("perform_gen input_prepared")
|
|
626
678
|
|
|
627
679
|
let stream = try await container.perform { context in
|
|
628
680
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -633,13 +685,55 @@ class HybridLLM: HybridLLMSpec {
|
|
|
633
685
|
)
|
|
634
686
|
}
|
|
635
687
|
|
|
688
|
+
var chunkCount = 0
|
|
636
689
|
for await generation in stream {
|
|
637
|
-
if Task.isCancelled {
|
|
690
|
+
if Task.isCancelled {
|
|
691
|
+
log("perform_gen cancelled at chunk=\(chunkCount)")
|
|
692
|
+
break
|
|
693
|
+
}
|
|
638
694
|
|
|
639
695
|
switch generation {
|
|
640
696
|
case .chunk(let text):
|
|
641
|
-
|
|
642
|
-
|
|
697
|
+
chunkCount += 1
|
|
698
|
+
rawTokenLog += text
|
|
699
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
700
|
+
log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
let outputs = thinkingMachine.process(token: text)
|
|
704
|
+
|
|
705
|
+
for machineOutput in outputs {
|
|
706
|
+
switch machineOutput {
|
|
707
|
+
case .token(let token):
|
|
708
|
+
var cleaned = token
|
|
709
|
+
if let regex = specialTokenPattern {
|
|
710
|
+
let before = cleaned
|
|
711
|
+
cleaned = regex.stringByReplacingMatches(
|
|
712
|
+
in: cleaned,
|
|
713
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
714
|
+
withTemplate: ""
|
|
715
|
+
)
|
|
716
|
+
if before != cleaned {
|
|
717
|
+
log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
if !cleaned.isEmpty {
|
|
721
|
+
output += cleaned
|
|
722
|
+
onToken(cleaned)
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
case .thinkingStart:
|
|
726
|
+
log("thinking_start at chunk=\(chunkCount)")
|
|
727
|
+
onToken("<think>")
|
|
728
|
+
|
|
729
|
+
case .thinkingChunk(let chunk):
|
|
730
|
+
onToken(chunk)
|
|
731
|
+
|
|
732
|
+
case .thinkingEnd:
|
|
733
|
+
log("thinking_end at chunk=\(chunkCount)")
|
|
734
|
+
onToken("</think>")
|
|
735
|
+
}
|
|
736
|
+
}
|
|
643
737
|
|
|
644
738
|
case .toolCall(let toolCall):
|
|
645
739
|
log("Tool call detected: \(toolCall.function.name)")
|
|
@@ -656,7 +750,38 @@ class HybridLLM: HybridLLMSpec {
|
|
|
656
750
|
onToolCall(toolCall.function.name, argsJson)
|
|
657
751
|
|
|
658
752
|
case .info(let info):
|
|
659
|
-
log("
|
|
753
|
+
log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
758
|
+
log("raw_output_first500: \(rawTokenLog.prefix(500))")
|
|
759
|
+
|
|
760
|
+
let flushOutputs = thinkingMachine.flush()
|
|
761
|
+
if !flushOutputs.isEmpty {
|
|
762
|
+
log("flush_outputs count=\(flushOutputs.count)")
|
|
763
|
+
}
|
|
764
|
+
for machineOutput in flushOutputs {
|
|
765
|
+
switch machineOutput {
|
|
766
|
+
case .token(let token):
|
|
767
|
+
var cleaned = token
|
|
768
|
+
if let regex = specialTokenPattern {
|
|
769
|
+
cleaned = regex.stringByReplacingMatches(
|
|
770
|
+
in: cleaned,
|
|
771
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
772
|
+
withTemplate: ""
|
|
773
|
+
)
|
|
774
|
+
}
|
|
775
|
+
if !cleaned.isEmpty {
|
|
776
|
+
output += cleaned
|
|
777
|
+
onToken(cleaned)
|
|
778
|
+
}
|
|
779
|
+
case .thinkingStart:
|
|
780
|
+
onToken("<think>")
|
|
781
|
+
case .thinkingChunk(let chunk):
|
|
782
|
+
onToken(chunk)
|
|
783
|
+
case .thinkingEnd:
|
|
784
|
+
onToken("</think>")
|
|
660
785
|
}
|
|
661
786
|
}
|
|
662
787
|
|
|
@@ -710,6 +835,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
710
835
|
return output + continuation
|
|
711
836
|
}
|
|
712
837
|
|
|
838
|
+
log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
|
|
713
839
|
return output
|
|
714
840
|
}
|
|
715
841
|
|
|
@@ -793,10 +919,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
793
919
|
}
|
|
794
920
|
|
|
795
921
|
func clearHistory() throws {
|
|
922
|
+
log("clear_history before=\(messageHistory.count) messages")
|
|
923
|
+
for (i, msg) in messageHistory.enumerated() {
|
|
924
|
+
log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
|
|
925
|
+
}
|
|
796
926
|
messageHistory = []
|
|
797
927
|
if let container = self.container {
|
|
798
928
|
self.session = ChatSession(container, instructions: self.systemPrompt)
|
|
799
929
|
}
|
|
800
|
-
log("
|
|
930
|
+
log("clear_history done session_reset")
|
|
801
931
|
}
|
|
802
932
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inferrlm/react-native-mlx",
|
|
3
3
|
"description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
|
|
4
|
-
"version": "0.4.2-alpha.
|
|
4
|
+
"version": "0.4.2-alpha.4",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"module": "./lib/module/index.js",
|
|
7
7
|
"types": "./lib/typescript/src/index.d.ts",
|