@inferrlm/react-native-mlx 0.4.2-alpha.3 → 0.4.2-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -268,6 +268,9 @@ class HybridLLM: HybridLLMSpec {
268
268
  var firstTokenTime: Date?
269
269
  var tokenCount = 0
270
270
 
271
+ log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
272
+ log("stream_prompt: \(prompt.prefix(300))")
273
+
271
274
  let result = try await self.performGeneration(
272
275
  container: container,
273
276
  prompt: prompt,
@@ -296,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
296
299
  toolExecutionTime: 0
297
300
  )
298
301
 
299
- log("Stream complete - \(tokenCount) tokens, \(String(format: "%.1f", tokensPerSecond)) tokens/s")
302
+ log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
303
+ log("stream_result_preview: \(result.prefix(300))")
300
304
  return result
301
305
  }
302
306
 
@@ -308,6 +312,7 @@ class HybridLLM: HybridLLMSpec {
308
312
  if self.manageHistory {
309
313
  self.messageHistory.append(LLMMessage(role: "user", content: prompt))
310
314
  self.messageHistory.append(LLMMessage(role: "assistant", content: result))
315
+ log("stream_history_updated count=\(self.messageHistory.count)")
311
316
  }
312
317
 
313
318
  return result
@@ -394,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
394
399
  ) -> [Chat.Message] {
395
400
  var chat: [Chat.Message] = []
396
401
 
402
+ log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
403
+
397
404
  if !self.systemPrompt.isEmpty {
398
405
  chat.append(.system(self.systemPrompt))
406
+ log(" [system] \(self.systemPrompt.prefix(80))...")
399
407
  }
400
408
 
401
- for msg in self.messageHistory {
409
+ for (i, msg) in self.messageHistory.enumerated() {
402
410
  switch msg.role {
403
411
  case "user": chat.append(.user(msg.content))
404
412
  case "assistant": chat.append(.assistant(msg.content))
@@ -406,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
406
414
  case "tool": chat.append(.tool(msg.content))
407
415
  default: break
408
416
  }
417
+ log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
409
418
  }
410
419
 
411
420
  if depth == 0 {
412
421
  chat.append(.user(prompt))
422
+ log(" [prompt] \(prompt.prefix(200))")
413
423
  }
414
424
 
415
425
  if let toolResults {
416
- for result in toolResults {
426
+ for (i, result) in toolResults.enumerated() {
417
427
  chat.append(.tool(result))
428
+ log(" [tool_result_\(i)] \(result.prefix(100))")
418
429
  }
419
430
  }
420
431
 
432
+ log("chat_built total=\(chat.count) messages")
421
433
  return chat
422
434
  }
423
435
 
@@ -451,12 +463,15 @@ class HybridLLM: HybridLLMSpec {
451
463
  var output = ""
452
464
  var thinkingMachine = ThinkingStateMachine()
453
465
  var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
466
+ var rawTokenLog = ""
454
467
 
455
468
  let specialTokenPattern = try? NSRegularExpression(
456
469
  pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
457
470
  options: []
458
471
  )
459
472
 
473
+ log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
474
+
460
475
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
461
476
  let userInput = UserInput(
462
477
  chat: chat,
@@ -464,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
464
479
  )
465
480
 
466
481
  let lmInput = try await container.prepare(input: userInput)
482
+ log("perform_gen_events input_prepared")
467
483
 
468
484
  let stream = try await container.perform { context in
469
485
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -474,11 +490,21 @@ class HybridLLM: HybridLLMSpec {
474
490
  )
475
491
  }
476
492
 
493
+ var chunkCount = 0
477
494
  for await generation in stream {
478
- if Task.isCancelled { break }
495
+ if Task.isCancelled {
496
+ log("perform_gen_events cancelled at chunk=\(chunkCount)")
497
+ break
498
+ }
479
499
 
480
500
  switch generation {
481
501
  case .chunk(let text):
502
+ chunkCount += 1
503
+ rawTokenLog += text
504
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
505
+ log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
506
+ }
507
+
482
508
  let outputs = thinkingMachine.process(token: text)
483
509
 
484
510
  for machineOutput in outputs {
@@ -486,11 +512,15 @@ class HybridLLM: HybridLLMSpec {
486
512
  case .token(let token):
487
513
  var cleaned = token
488
514
  if let regex = specialTokenPattern {
515
+ let before = cleaned
489
516
  cleaned = regex.stringByReplacingMatches(
490
517
  in: cleaned,
491
518
  range: NSRange(cleaned.startIndex..., in: cleaned),
492
519
  withTemplate: ""
493
520
  )
521
+ if before != cleaned {
522
+ log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
523
+ }
494
524
  }
495
525
  if !cleaned.isEmpty {
496
526
  output += cleaned
@@ -499,12 +529,14 @@ class HybridLLM: HybridLLMSpec {
499
529
  }
500
530
 
501
531
  case .thinkingStart:
532
+ log("thinking_start_events at chunk=\(chunkCount)")
502
533
  emitter.emitThinkingStart()
503
534
 
504
535
  case .thinkingChunk(let chunk):
505
536
  emitter.emitThinkingChunk(chunk)
506
537
 
507
538
  case .thinkingEnd(let content):
539
+ log("thinking_end_events at chunk=\(chunkCount)")
508
540
  emitter.emitThinkingEnd(content)
509
541
  }
510
542
  }
@@ -525,12 +557,15 @@ class HybridLLM: HybridLLMSpec {
525
557
  pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
526
558
 
527
559
  case .info(let info):
528
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
560
+ log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
529
561
  let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
530
562
  onGenerationInfo(info.generationTokenCount, generationTime)
531
563
  }
532
564
  }
533
565
 
566
+ log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
567
+ log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
568
+
534
569
  let flushOutputs = thinkingMachine.flush()
535
570
  for machineOutput in flushOutputs {
536
571
  switch machineOutput {
@@ -622,12 +657,15 @@ class HybridLLM: HybridLLMSpec {
622
657
  var output = ""
623
658
  var thinkingMachine = ThinkingStateMachine()
624
659
  var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
660
+ var rawTokenLog = ""
625
661
 
626
662
  let specialTokenPattern = try? NSRegularExpression(
627
663
  pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
628
664
  options: []
629
665
  )
630
666
 
667
+ log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
668
+
631
669
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
632
670
  let userInput = UserInput(
633
671
  chat: chat,
@@ -635,6 +673,7 @@ class HybridLLM: HybridLLMSpec {
635
673
  )
636
674
 
637
675
  let lmInput = try await container.prepare(input: userInput)
676
+ log("perform_gen input_prepared")
638
677
 
639
678
  let stream = try await container.perform { context in
640
679
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -645,11 +684,21 @@ class HybridLLM: HybridLLMSpec {
645
684
  )
646
685
  }
647
686
 
687
+ var chunkCount = 0
648
688
  for await generation in stream {
649
- if Task.isCancelled { break }
689
+ if Task.isCancelled {
690
+ log("perform_gen cancelled at chunk=\(chunkCount)")
691
+ break
692
+ }
650
693
 
651
694
  switch generation {
652
695
  case .chunk(let text):
696
+ chunkCount += 1
697
+ rawTokenLog += text
698
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
699
+ log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
700
+ }
701
+
653
702
  let outputs = thinkingMachine.process(token: text)
654
703
 
655
704
  for machineOutput in outputs {
@@ -657,11 +706,15 @@ class HybridLLM: HybridLLMSpec {
657
706
  case .token(let token):
658
707
  var cleaned = token
659
708
  if let regex = specialTokenPattern {
709
+ let before = cleaned
660
710
  cleaned = regex.stringByReplacingMatches(
661
711
  in: cleaned,
662
712
  range: NSRange(cleaned.startIndex..., in: cleaned),
663
713
  withTemplate: ""
664
714
  )
715
+ if before != cleaned {
716
+ log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
717
+ }
665
718
  }
666
719
  if !cleaned.isEmpty {
667
720
  output += cleaned
@@ -669,12 +722,14 @@ class HybridLLM: HybridLLMSpec {
669
722
  }
670
723
 
671
724
  case .thinkingStart:
725
+ log("thinking_start at chunk=\(chunkCount)")
672
726
  onToken("<think>")
673
727
 
674
728
  case .thinkingChunk(let chunk):
675
729
  onToken(chunk)
676
730
 
677
731
  case .thinkingEnd:
732
+ log("thinking_end at chunk=\(chunkCount)")
678
733
  onToken("</think>")
679
734
  }
680
735
  }
@@ -694,11 +749,17 @@ class HybridLLM: HybridLLMSpec {
694
749
  onToolCall(toolCall.function.name, argsJson)
695
750
 
696
751
  case .info(let info):
697
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
752
+ log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
698
753
  }
699
754
  }
700
755
 
756
+ log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
757
+ log("raw_output_first500: \(rawTokenLog.prefix(500))")
758
+
701
759
  let flushOutputs = thinkingMachine.flush()
760
+ if !flushOutputs.isEmpty {
761
+ log("flush_outputs count=\(flushOutputs.count)")
762
+ }
702
763
  for machineOutput in flushOutputs {
703
764
  switch machineOutput {
704
765
  case .token(let token):
@@ -773,6 +834,7 @@ class HybridLLM: HybridLLMSpec {
773
834
  return output + continuation
774
835
  }
775
836
 
837
+ log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
776
838
  return output
777
839
  }
778
840
 
@@ -856,10 +918,14 @@ class HybridLLM: HybridLLMSpec {
856
918
  }
857
919
 
858
920
  func clearHistory() throws {
921
+ log("clear_history before=\(messageHistory.count) messages")
922
+ for (i, msg) in messageHistory.enumerated() {
923
+ log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
924
+ }
859
925
  messageHistory = []
860
926
  if let container = self.container {
861
927
  self.session = ChatSession(container, instructions: self.systemPrompt)
862
928
  }
863
- log("History and session cleared")
929
+ log("clear_history done session_reset")
864
930
  }
865
931
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@inferrlm/react-native-mlx",
3
3
  "description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
4
- "version": "0.4.2-alpha.3",
4
+ "version": "0.4.2-alpha.5",
5
5
  "main": "./lib/module/index.js",
6
6
  "module": "./lib/module/index.js",
7
7
  "types": "./lib/typescript/src/index.d.ts",