@inferrlm/react-native-mlx 0.4.2-alpha.3 → 0.4.2-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/Sources/HybridLLM.swift +74 -8
- package/package.json +1 -1
|
@@ -268,6 +268,9 @@ class HybridLLM: HybridLLMSpec {
|
|
|
268
268
|
var firstTokenTime: Date?
|
|
269
269
|
var tokenCount = 0
|
|
270
270
|
|
|
271
|
+
log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
|
|
272
|
+
log("stream_prompt: \(prompt.prefix(300))")
|
|
273
|
+
|
|
271
274
|
let result = try await self.performGeneration(
|
|
272
275
|
container: container,
|
|
273
276
|
prompt: prompt,
|
|
@@ -296,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
|
|
|
296
299
|
toolExecutionTime: 0
|
|
297
300
|
)
|
|
298
301
|
|
|
299
|
-
log("
|
|
302
|
+
log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
|
|
303
|
+
log("stream_result_preview: \(result.prefix(300))")
|
|
300
304
|
return result
|
|
301
305
|
}
|
|
302
306
|
|
|
@@ -308,6 +312,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
308
312
|
if self.manageHistory {
|
|
309
313
|
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
310
314
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
315
|
+
log("stream_history_updated count=\(self.messageHistory.count)")
|
|
311
316
|
}
|
|
312
317
|
|
|
313
318
|
return result
|
|
@@ -394,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
394
399
|
) -> [Chat.Message] {
|
|
395
400
|
var chat: [Chat.Message] = []
|
|
396
401
|
|
|
402
|
+
log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
|
|
403
|
+
|
|
397
404
|
if !self.systemPrompt.isEmpty {
|
|
398
405
|
chat.append(.system(self.systemPrompt))
|
|
406
|
+
log(" [system] \(self.systemPrompt.prefix(80))...")
|
|
399
407
|
}
|
|
400
408
|
|
|
401
|
-
for msg in self.messageHistory {
|
|
409
|
+
for (i, msg) in self.messageHistory.enumerated() {
|
|
402
410
|
switch msg.role {
|
|
403
411
|
case "user": chat.append(.user(msg.content))
|
|
404
412
|
case "assistant": chat.append(.assistant(msg.content))
|
|
@@ -406,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
|
|
|
406
414
|
case "tool": chat.append(.tool(msg.content))
|
|
407
415
|
default: break
|
|
408
416
|
}
|
|
417
|
+
log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
|
|
409
418
|
}
|
|
410
419
|
|
|
411
420
|
if depth == 0 {
|
|
412
421
|
chat.append(.user(prompt))
|
|
422
|
+
log(" [prompt] \(prompt.prefix(200))")
|
|
413
423
|
}
|
|
414
424
|
|
|
415
425
|
if let toolResults {
|
|
416
|
-
for result in toolResults {
|
|
426
|
+
for (i, result) in toolResults.enumerated() {
|
|
417
427
|
chat.append(.tool(result))
|
|
428
|
+
log(" [tool_result_\(i)] \(result.prefix(100))")
|
|
418
429
|
}
|
|
419
430
|
}
|
|
420
431
|
|
|
432
|
+
log("chat_built total=\(chat.count) messages")
|
|
421
433
|
return chat
|
|
422
434
|
}
|
|
423
435
|
|
|
@@ -451,12 +463,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
451
463
|
var output = ""
|
|
452
464
|
var thinkingMachine = ThinkingStateMachine()
|
|
453
465
|
var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
466
|
+
var rawTokenLog = ""
|
|
454
467
|
|
|
455
468
|
let specialTokenPattern = try? NSRegularExpression(
|
|
456
469
|
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
457
470
|
options: []
|
|
458
471
|
)
|
|
459
472
|
|
|
473
|
+
log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
474
|
+
|
|
460
475
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
461
476
|
let userInput = UserInput(
|
|
462
477
|
chat: chat,
|
|
@@ -464,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
464
479
|
)
|
|
465
480
|
|
|
466
481
|
let lmInput = try await container.prepare(input: userInput)
|
|
482
|
+
log("perform_gen_events input_prepared")
|
|
467
483
|
|
|
468
484
|
let stream = try await container.perform { context in
|
|
469
485
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -474,11 +490,21 @@ class HybridLLM: HybridLLMSpec {
|
|
|
474
490
|
)
|
|
475
491
|
}
|
|
476
492
|
|
|
493
|
+
var chunkCount = 0
|
|
477
494
|
for await generation in stream {
|
|
478
|
-
if Task.isCancelled {
|
|
495
|
+
if Task.isCancelled {
|
|
496
|
+
log("perform_gen_events cancelled at chunk=\(chunkCount)")
|
|
497
|
+
break
|
|
498
|
+
}
|
|
479
499
|
|
|
480
500
|
switch generation {
|
|
481
501
|
case .chunk(let text):
|
|
502
|
+
chunkCount += 1
|
|
503
|
+
rawTokenLog += text
|
|
504
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
505
|
+
log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
|
|
506
|
+
}
|
|
507
|
+
|
|
482
508
|
let outputs = thinkingMachine.process(token: text)
|
|
483
509
|
|
|
484
510
|
for machineOutput in outputs {
|
|
@@ -486,11 +512,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
486
512
|
case .token(let token):
|
|
487
513
|
var cleaned = token
|
|
488
514
|
if let regex = specialTokenPattern {
|
|
515
|
+
let before = cleaned
|
|
489
516
|
cleaned = regex.stringByReplacingMatches(
|
|
490
517
|
in: cleaned,
|
|
491
518
|
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
492
519
|
withTemplate: ""
|
|
493
520
|
)
|
|
521
|
+
if before != cleaned {
|
|
522
|
+
log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
523
|
+
}
|
|
494
524
|
}
|
|
495
525
|
if !cleaned.isEmpty {
|
|
496
526
|
output += cleaned
|
|
@@ -499,12 +529,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
499
529
|
}
|
|
500
530
|
|
|
501
531
|
case .thinkingStart:
|
|
532
|
+
log("thinking_start_events at chunk=\(chunkCount)")
|
|
502
533
|
emitter.emitThinkingStart()
|
|
503
534
|
|
|
504
535
|
case .thinkingChunk(let chunk):
|
|
505
536
|
emitter.emitThinkingChunk(chunk)
|
|
506
537
|
|
|
507
538
|
case .thinkingEnd(let content):
|
|
539
|
+
log("thinking_end_events at chunk=\(chunkCount)")
|
|
508
540
|
emitter.emitThinkingEnd(content)
|
|
509
541
|
}
|
|
510
542
|
}
|
|
@@ -525,12 +557,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
525
557
|
pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
|
|
526
558
|
|
|
527
559
|
case .info(let info):
|
|
528
|
-
log("
|
|
560
|
+
log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
529
561
|
let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
|
|
530
562
|
onGenerationInfo(info.generationTokenCount, generationTime)
|
|
531
563
|
}
|
|
532
564
|
}
|
|
533
565
|
|
|
566
|
+
log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
567
|
+
log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
|
|
568
|
+
|
|
534
569
|
let flushOutputs = thinkingMachine.flush()
|
|
535
570
|
for machineOutput in flushOutputs {
|
|
536
571
|
switch machineOutput {
|
|
@@ -622,12 +657,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
622
657
|
var output = ""
|
|
623
658
|
var thinkingMachine = ThinkingStateMachine()
|
|
624
659
|
var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
660
|
+
var rawTokenLog = ""
|
|
625
661
|
|
|
626
662
|
let specialTokenPattern = try? NSRegularExpression(
|
|
627
663
|
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
628
664
|
options: []
|
|
629
665
|
)
|
|
630
666
|
|
|
667
|
+
log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
668
|
+
|
|
631
669
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
632
670
|
let userInput = UserInput(
|
|
633
671
|
chat: chat,
|
|
@@ -635,6 +673,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
635
673
|
)
|
|
636
674
|
|
|
637
675
|
let lmInput = try await container.prepare(input: userInput)
|
|
676
|
+
log("perform_gen input_prepared")
|
|
638
677
|
|
|
639
678
|
let stream = try await container.perform { context in
|
|
640
679
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -645,11 +684,21 @@ class HybridLLM: HybridLLMSpec {
|
|
|
645
684
|
)
|
|
646
685
|
}
|
|
647
686
|
|
|
687
|
+
var chunkCount = 0
|
|
648
688
|
for await generation in stream {
|
|
649
|
-
if Task.isCancelled {
|
|
689
|
+
if Task.isCancelled {
|
|
690
|
+
log("perform_gen cancelled at chunk=\(chunkCount)")
|
|
691
|
+
break
|
|
692
|
+
}
|
|
650
693
|
|
|
651
694
|
switch generation {
|
|
652
695
|
case .chunk(let text):
|
|
696
|
+
chunkCount += 1
|
|
697
|
+
rawTokenLog += text
|
|
698
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
699
|
+
log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
|
|
700
|
+
}
|
|
701
|
+
|
|
653
702
|
let outputs = thinkingMachine.process(token: text)
|
|
654
703
|
|
|
655
704
|
for machineOutput in outputs {
|
|
@@ -657,11 +706,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
657
706
|
case .token(let token):
|
|
658
707
|
var cleaned = token
|
|
659
708
|
if let regex = specialTokenPattern {
|
|
709
|
+
let before = cleaned
|
|
660
710
|
cleaned = regex.stringByReplacingMatches(
|
|
661
711
|
in: cleaned,
|
|
662
712
|
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
663
713
|
withTemplate: ""
|
|
664
714
|
)
|
|
715
|
+
if before != cleaned {
|
|
716
|
+
log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
717
|
+
}
|
|
665
718
|
}
|
|
666
719
|
if !cleaned.isEmpty {
|
|
667
720
|
output += cleaned
|
|
@@ -669,12 +722,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
669
722
|
}
|
|
670
723
|
|
|
671
724
|
case .thinkingStart:
|
|
725
|
+
log("thinking_start at chunk=\(chunkCount)")
|
|
672
726
|
onToken("<think>")
|
|
673
727
|
|
|
674
728
|
case .thinkingChunk(let chunk):
|
|
675
729
|
onToken(chunk)
|
|
676
730
|
|
|
677
731
|
case .thinkingEnd:
|
|
732
|
+
log("thinking_end at chunk=\(chunkCount)")
|
|
678
733
|
onToken("</think>")
|
|
679
734
|
}
|
|
680
735
|
}
|
|
@@ -694,11 +749,17 @@ class HybridLLM: HybridLLMSpec {
|
|
|
694
749
|
onToolCall(toolCall.function.name, argsJson)
|
|
695
750
|
|
|
696
751
|
case .info(let info):
|
|
697
|
-
log("
|
|
752
|
+
log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
698
753
|
}
|
|
699
754
|
}
|
|
700
755
|
|
|
756
|
+
log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
757
|
+
log("raw_output_first500: \(rawTokenLog.prefix(500))")
|
|
758
|
+
|
|
701
759
|
let flushOutputs = thinkingMachine.flush()
|
|
760
|
+
if !flushOutputs.isEmpty {
|
|
761
|
+
log("flush_outputs count=\(flushOutputs.count)")
|
|
762
|
+
}
|
|
702
763
|
for machineOutput in flushOutputs {
|
|
703
764
|
switch machineOutput {
|
|
704
765
|
case .token(let token):
|
|
@@ -773,6 +834,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
773
834
|
return output + continuation
|
|
774
835
|
}
|
|
775
836
|
|
|
837
|
+
log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
|
|
776
838
|
return output
|
|
777
839
|
}
|
|
778
840
|
|
|
@@ -856,10 +918,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
856
918
|
}
|
|
857
919
|
|
|
858
920
|
func clearHistory() throws {
|
|
921
|
+
log("clear_history before=\(messageHistory.count) messages")
|
|
922
|
+
for (i, msg) in messageHistory.enumerated() {
|
|
923
|
+
log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
|
|
924
|
+
}
|
|
859
925
|
messageHistory = []
|
|
860
926
|
if let container = self.container {
|
|
861
927
|
self.session = ChatSession(container, instructions: self.systemPrompt)
|
|
862
928
|
}
|
|
863
|
-
log("
|
|
929
|
+
log("clear_history done session_reset")
|
|
864
930
|
}
|
|
865
931
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inferrlm/react-native-mlx",
|
|
3
3
|
"description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
|
|
4
|
-
"version": "0.4.2-alpha.
|
|
4
|
+
"version": "0.4.2-alpha.5",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"module": "./lib/module/index.js",
|
|
7
7
|
"types": "./lib/typescript/src/index.d.ts",
|