@inferrlm/react-native-mlx 0.4.2-alpha.3 → 0.4.2-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/Sources/HybridLLM.swift +75 -8
- package/package.json +1 -1
|
@@ -268,6 +268,9 @@ class HybridLLM: HybridLLMSpec {
|
|
|
268
268
|
var firstTokenTime: Date?
|
|
269
269
|
var tokenCount = 0
|
|
270
270
|
|
|
271
|
+
log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
|
|
272
|
+
log("stream_prompt: \(prompt.prefix(300))")
|
|
273
|
+
|
|
271
274
|
let result = try await self.performGeneration(
|
|
272
275
|
container: container,
|
|
273
276
|
prompt: prompt,
|
|
@@ -296,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
|
|
|
296
299
|
toolExecutionTime: 0
|
|
297
300
|
)
|
|
298
301
|
|
|
299
|
-
log("
|
|
302
|
+
log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
|
|
303
|
+
log("stream_result_preview: \(result.prefix(300))")
|
|
300
304
|
return result
|
|
301
305
|
}
|
|
302
306
|
|
|
@@ -308,6 +312,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
308
312
|
if self.manageHistory {
|
|
309
313
|
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
310
314
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
315
|
+
log("stream_history_updated count=\(self.messageHistory.count)")
|
|
311
316
|
}
|
|
312
317
|
|
|
313
318
|
return result
|
|
@@ -394,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
394
399
|
) -> [Chat.Message] {
|
|
395
400
|
var chat: [Chat.Message] = []
|
|
396
401
|
|
|
402
|
+
log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
|
|
403
|
+
|
|
397
404
|
if !self.systemPrompt.isEmpty {
|
|
398
405
|
chat.append(.system(self.systemPrompt))
|
|
406
|
+
log(" [system] \(self.systemPrompt.prefix(80))...")
|
|
399
407
|
}
|
|
400
408
|
|
|
401
|
-
for msg in self.messageHistory {
|
|
409
|
+
for (i, msg) in self.messageHistory.enumerated() {
|
|
402
410
|
switch msg.role {
|
|
403
411
|
case "user": chat.append(.user(msg.content))
|
|
404
412
|
case "assistant": chat.append(.assistant(msg.content))
|
|
@@ -406,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
|
|
|
406
414
|
case "tool": chat.append(.tool(msg.content))
|
|
407
415
|
default: break
|
|
408
416
|
}
|
|
417
|
+
log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
|
|
409
418
|
}
|
|
410
419
|
|
|
411
420
|
if depth == 0 {
|
|
412
421
|
chat.append(.user(prompt))
|
|
422
|
+
log(" [prompt] \(prompt.prefix(200))")
|
|
413
423
|
}
|
|
414
424
|
|
|
415
425
|
if let toolResults {
|
|
416
|
-
for result in toolResults {
|
|
426
|
+
for (i, result) in toolResults.enumerated() {
|
|
417
427
|
chat.append(.tool(result))
|
|
428
|
+
log(" [tool_result_\(i)] \(result.prefix(100))")
|
|
418
429
|
}
|
|
419
430
|
}
|
|
420
431
|
|
|
432
|
+
log("chat_built total=\(chat.count) messages")
|
|
421
433
|
return chat
|
|
422
434
|
}
|
|
423
435
|
|
|
@@ -451,12 +463,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
451
463
|
var output = ""
|
|
452
464
|
var thinkingMachine = ThinkingStateMachine()
|
|
453
465
|
var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
466
|
+
var rawTokenLog = ""
|
|
454
467
|
|
|
455
468
|
let specialTokenPattern = try? NSRegularExpression(
|
|
456
469
|
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
457
470
|
options: []
|
|
458
471
|
)
|
|
459
472
|
|
|
473
|
+
log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
474
|
+
|
|
460
475
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
461
476
|
let userInput = UserInput(
|
|
462
477
|
chat: chat,
|
|
@@ -464,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
464
479
|
)
|
|
465
480
|
|
|
466
481
|
let lmInput = try await container.prepare(input: userInput)
|
|
482
|
+
log("perform_gen_events input_prepared")
|
|
467
483
|
|
|
468
484
|
let stream = try await container.perform { context in
|
|
469
485
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -474,11 +490,21 @@ class HybridLLM: HybridLLMSpec {
|
|
|
474
490
|
)
|
|
475
491
|
}
|
|
476
492
|
|
|
493
|
+
var chunkCount = 0
|
|
477
494
|
for await generation in stream {
|
|
478
|
-
if Task.isCancelled {
|
|
495
|
+
if Task.isCancelled {
|
|
496
|
+
log("perform_gen_events cancelled at chunk=\(chunkCount)")
|
|
497
|
+
break
|
|
498
|
+
}
|
|
479
499
|
|
|
480
500
|
switch generation {
|
|
481
501
|
case .chunk(let text):
|
|
502
|
+
chunkCount += 1
|
|
503
|
+
rawTokenLog += text
|
|
504
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
505
|
+
log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
|
|
506
|
+
}
|
|
507
|
+
|
|
482
508
|
let outputs = thinkingMachine.process(token: text)
|
|
483
509
|
|
|
484
510
|
for machineOutput in outputs {
|
|
@@ -486,11 +512,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
486
512
|
case .token(let token):
|
|
487
513
|
var cleaned = token
|
|
488
514
|
if let regex = specialTokenPattern {
|
|
515
|
+
let before = cleaned
|
|
489
516
|
cleaned = regex.stringByReplacingMatches(
|
|
490
517
|
in: cleaned,
|
|
491
518
|
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
492
519
|
withTemplate: ""
|
|
493
520
|
)
|
|
521
|
+
if before != cleaned {
|
|
522
|
+
log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
523
|
+
}
|
|
494
524
|
}
|
|
495
525
|
if !cleaned.isEmpty {
|
|
496
526
|
output += cleaned
|
|
@@ -499,15 +529,18 @@ class HybridLLM: HybridLLMSpec {
|
|
|
499
529
|
}
|
|
500
530
|
|
|
501
531
|
case .thinkingStart:
|
|
532
|
+
log("thinking_start_events at chunk=\(chunkCount)")
|
|
502
533
|
emitter.emitThinkingStart()
|
|
503
534
|
|
|
504
535
|
case .thinkingChunk(let chunk):
|
|
505
536
|
emitter.emitThinkingChunk(chunk)
|
|
506
537
|
|
|
507
538
|
case .thinkingEnd(let content):
|
|
539
|
+
log("thinking_end_events at chunk=\(chunkCount)")
|
|
508
540
|
emitter.emitThinkingEnd(content)
|
|
509
541
|
}
|
|
510
542
|
}
|
|
543
|
+
}
|
|
511
544
|
|
|
512
545
|
case .toolCall(let toolCall):
|
|
513
546
|
log("Tool call detected: \(toolCall.function.name)")
|
|
@@ -525,12 +558,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
525
558
|
pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
|
|
526
559
|
|
|
527
560
|
case .info(let info):
|
|
528
|
-
log("
|
|
561
|
+
log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
529
562
|
let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
|
|
530
563
|
onGenerationInfo(info.generationTokenCount, generationTime)
|
|
531
564
|
}
|
|
532
565
|
}
|
|
533
566
|
|
|
567
|
+
log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
568
|
+
log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
|
|
569
|
+
|
|
534
570
|
let flushOutputs = thinkingMachine.flush()
|
|
535
571
|
for machineOutput in flushOutputs {
|
|
536
572
|
switch machineOutput {
|
|
@@ -622,12 +658,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
622
658
|
var output = ""
|
|
623
659
|
var thinkingMachine = ThinkingStateMachine()
|
|
624
660
|
var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
661
|
+
var rawTokenLog = ""
|
|
625
662
|
|
|
626
663
|
let specialTokenPattern = try? NSRegularExpression(
|
|
627
664
|
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
628
665
|
options: []
|
|
629
666
|
)
|
|
630
667
|
|
|
668
|
+
log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
669
|
+
|
|
631
670
|
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
632
671
|
let userInput = UserInput(
|
|
633
672
|
chat: chat,
|
|
@@ -635,6 +674,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
635
674
|
)
|
|
636
675
|
|
|
637
676
|
let lmInput = try await container.prepare(input: userInput)
|
|
677
|
+
log("perform_gen input_prepared")
|
|
638
678
|
|
|
639
679
|
let stream = try await container.perform { context in
|
|
640
680
|
let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
|
|
@@ -645,11 +685,21 @@ class HybridLLM: HybridLLMSpec {
|
|
|
645
685
|
)
|
|
646
686
|
}
|
|
647
687
|
|
|
688
|
+
var chunkCount = 0
|
|
648
689
|
for await generation in stream {
|
|
649
|
-
if Task.isCancelled {
|
|
690
|
+
if Task.isCancelled {
|
|
691
|
+
log("perform_gen cancelled at chunk=\(chunkCount)")
|
|
692
|
+
break
|
|
693
|
+
}
|
|
650
694
|
|
|
651
695
|
switch generation {
|
|
652
696
|
case .chunk(let text):
|
|
697
|
+
chunkCount += 1
|
|
698
|
+
rawTokenLog += text
|
|
699
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
700
|
+
log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
|
|
701
|
+
}
|
|
702
|
+
|
|
653
703
|
let outputs = thinkingMachine.process(token: text)
|
|
654
704
|
|
|
655
705
|
for machineOutput in outputs {
|
|
@@ -657,11 +707,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
657
707
|
case .token(let token):
|
|
658
708
|
var cleaned = token
|
|
659
709
|
if let regex = specialTokenPattern {
|
|
710
|
+
let before = cleaned
|
|
660
711
|
cleaned = regex.stringByReplacingMatches(
|
|
661
712
|
in: cleaned,
|
|
662
713
|
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
663
714
|
withTemplate: ""
|
|
664
715
|
)
|
|
716
|
+
if before != cleaned {
|
|
717
|
+
log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
718
|
+
}
|
|
665
719
|
}
|
|
666
720
|
if !cleaned.isEmpty {
|
|
667
721
|
output += cleaned
|
|
@@ -669,12 +723,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
669
723
|
}
|
|
670
724
|
|
|
671
725
|
case .thinkingStart:
|
|
726
|
+
log("thinking_start at chunk=\(chunkCount)")
|
|
672
727
|
onToken("<think>")
|
|
673
728
|
|
|
674
729
|
case .thinkingChunk(let chunk):
|
|
675
730
|
onToken(chunk)
|
|
676
731
|
|
|
677
732
|
case .thinkingEnd:
|
|
733
|
+
log("thinking_end at chunk=\(chunkCount)")
|
|
678
734
|
onToken("</think>")
|
|
679
735
|
}
|
|
680
736
|
}
|
|
@@ -694,11 +750,17 @@ class HybridLLM: HybridLLMSpec {
|
|
|
694
750
|
onToolCall(toolCall.function.name, argsJson)
|
|
695
751
|
|
|
696
752
|
case .info(let info):
|
|
697
|
-
log("
|
|
753
|
+
log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
698
754
|
}
|
|
699
755
|
}
|
|
700
756
|
|
|
757
|
+
log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
758
|
+
log("raw_output_first500: \(rawTokenLog.prefix(500))")
|
|
759
|
+
|
|
701
760
|
let flushOutputs = thinkingMachine.flush()
|
|
761
|
+
if !flushOutputs.isEmpty {
|
|
762
|
+
log("flush_outputs count=\(flushOutputs.count)")
|
|
763
|
+
}
|
|
702
764
|
for machineOutput in flushOutputs {
|
|
703
765
|
switch machineOutput {
|
|
704
766
|
case .token(let token):
|
|
@@ -773,6 +835,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
773
835
|
return output + continuation
|
|
774
836
|
}
|
|
775
837
|
|
|
838
|
+
log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
|
|
776
839
|
return output
|
|
777
840
|
}
|
|
778
841
|
|
|
@@ -856,10 +919,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
856
919
|
}
|
|
857
920
|
|
|
858
921
|
func clearHistory() throws {
|
|
922
|
+
log("clear_history before=\(messageHistory.count) messages")
|
|
923
|
+
for (i, msg) in messageHistory.enumerated() {
|
|
924
|
+
log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
|
|
925
|
+
}
|
|
859
926
|
messageHistory = []
|
|
860
927
|
if let container = self.container {
|
|
861
928
|
self.session = ChatSession(container, instructions: self.systemPrompt)
|
|
862
929
|
}
|
|
863
|
-
log("
|
|
930
|
+
log("clear_history done session_reset")
|
|
864
931
|
}
|
|
865
932
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inferrlm/react-native-mlx",
|
|
3
3
|
"description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
|
|
4
|
-
"version": "0.4.2-alpha.
|
|
4
|
+
"version": "0.4.2-alpha.4",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"module": "./lib/module/index.js",
|
|
7
7
|
"types": "./lib/typescript/src/index.d.ts",
|