@inferrlm/react-native-mlx 0.4.2-alpha.3 → 0.4.2-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -268,6 +268,9 @@ class HybridLLM: HybridLLMSpec {
268
268
  var firstTokenTime: Date?
269
269
  var tokenCount = 0
270
270
 
271
+ log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
272
+ log("stream_prompt: \(prompt.prefix(300))")
273
+
271
274
  let result = try await self.performGeneration(
272
275
  container: container,
273
276
  prompt: prompt,
@@ -296,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
296
299
  toolExecutionTime: 0
297
300
  )
298
301
 
299
- log("Stream complete - \(tokenCount) tokens, \(String(format: "%.1f", tokensPerSecond)) tokens/s")
302
+ log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
303
+ log("stream_result_preview: \(result.prefix(300))")
300
304
  return result
301
305
  }
302
306
 
@@ -308,6 +312,7 @@ class HybridLLM: HybridLLMSpec {
308
312
  if self.manageHistory {
309
313
  self.messageHistory.append(LLMMessage(role: "user", content: prompt))
310
314
  self.messageHistory.append(LLMMessage(role: "assistant", content: result))
315
+ log("stream_history_updated count=\(self.messageHistory.count)")
311
316
  }
312
317
 
313
318
  return result
@@ -394,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
394
399
  ) -> [Chat.Message] {
395
400
  var chat: [Chat.Message] = []
396
401
 
402
+ log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
403
+
397
404
  if !self.systemPrompt.isEmpty {
398
405
  chat.append(.system(self.systemPrompt))
406
+ log(" [system] \(self.systemPrompt.prefix(80))...")
399
407
  }
400
408
 
401
- for msg in self.messageHistory {
409
+ for (i, msg) in self.messageHistory.enumerated() {
402
410
  switch msg.role {
403
411
  case "user": chat.append(.user(msg.content))
404
412
  case "assistant": chat.append(.assistant(msg.content))
@@ -406,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
406
414
  case "tool": chat.append(.tool(msg.content))
407
415
  default: break
408
416
  }
417
+ log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
409
418
  }
410
419
 
411
420
  if depth == 0 {
412
421
  chat.append(.user(prompt))
422
+ log(" [prompt] \(prompt.prefix(200))")
413
423
  }
414
424
 
415
425
  if let toolResults {
416
- for result in toolResults {
426
+ for (i, result) in toolResults.enumerated() {
417
427
  chat.append(.tool(result))
428
+ log(" [tool_result_\(i)] \(result.prefix(100))")
418
429
  }
419
430
  }
420
431
 
432
+ log("chat_built total=\(chat.count) messages")
421
433
  return chat
422
434
  }
423
435
 
@@ -451,12 +463,15 @@ class HybridLLM: HybridLLMSpec {
451
463
  var output = ""
452
464
  var thinkingMachine = ThinkingStateMachine()
453
465
  var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
466
+ var rawTokenLog = ""
454
467
 
455
468
  let specialTokenPattern = try? NSRegularExpression(
456
469
  pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
457
470
  options: []
458
471
  )
459
472
 
473
+ log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
474
+
460
475
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
461
476
  let userInput = UserInput(
462
477
  chat: chat,
@@ -464,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
464
479
  )
465
480
 
466
481
  let lmInput = try await container.prepare(input: userInput)
482
+ log("perform_gen_events input_prepared")
467
483
 
468
484
  let stream = try await container.perform { context in
469
485
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -474,11 +490,21 @@ class HybridLLM: HybridLLMSpec {
474
490
  )
475
491
  }
476
492
 
493
+ var chunkCount = 0
477
494
  for await generation in stream {
478
- if Task.isCancelled { break }
495
+ if Task.isCancelled {
496
+ log("perform_gen_events cancelled at chunk=\(chunkCount)")
497
+ break
498
+ }
479
499
 
480
500
  switch generation {
481
501
  case .chunk(let text):
502
+ chunkCount += 1
503
+ rawTokenLog += text
504
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
505
+ log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
506
+ }
507
+
482
508
  let outputs = thinkingMachine.process(token: text)
483
509
 
484
510
  for machineOutput in outputs {
@@ -486,11 +512,15 @@ class HybridLLM: HybridLLMSpec {
486
512
  case .token(let token):
487
513
  var cleaned = token
488
514
  if let regex = specialTokenPattern {
515
+ let before = cleaned
489
516
  cleaned = regex.stringByReplacingMatches(
490
517
  in: cleaned,
491
518
  range: NSRange(cleaned.startIndex..., in: cleaned),
492
519
  withTemplate: ""
493
520
  )
521
+ if before != cleaned {
522
+ log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
523
+ }
494
524
  }
495
525
  if !cleaned.isEmpty {
496
526
  output += cleaned
@@ -499,15 +529,18 @@ class HybridLLM: HybridLLMSpec {
499
529
  }
500
530
 
501
531
  case .thinkingStart:
532
+ log("thinking_start_events at chunk=\(chunkCount)")
502
533
  emitter.emitThinkingStart()
503
534
 
504
535
  case .thinkingChunk(let chunk):
505
536
  emitter.emitThinkingChunk(chunk)
506
537
 
507
538
  case .thinkingEnd(let content):
539
+ log("thinking_end_events at chunk=\(chunkCount)")
508
540
  emitter.emitThinkingEnd(content)
509
541
  }
510
542
  }
543
+ }
511
544
 
512
545
  case .toolCall(let toolCall):
513
546
  log("Tool call detected: \(toolCall.function.name)")
@@ -525,12 +558,15 @@ class HybridLLM: HybridLLMSpec {
525
558
  pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
526
559
 
527
560
  case .info(let info):
528
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
561
+ log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
529
562
  let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
530
563
  onGenerationInfo(info.generationTokenCount, generationTime)
531
564
  }
532
565
  }
533
566
 
567
+ log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
568
+ log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
569
+
534
570
  let flushOutputs = thinkingMachine.flush()
535
571
  for machineOutput in flushOutputs {
536
572
  switch machineOutput {
@@ -622,12 +658,15 @@ class HybridLLM: HybridLLMSpec {
622
658
  var output = ""
623
659
  var thinkingMachine = ThinkingStateMachine()
624
660
  var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
661
+ var rawTokenLog = ""
625
662
 
626
663
  let specialTokenPattern = try? NSRegularExpression(
627
664
  pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
628
665
  options: []
629
666
  )
630
667
 
668
+ log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
669
+
631
670
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
632
671
  let userInput = UserInput(
633
672
  chat: chat,
@@ -635,6 +674,7 @@ class HybridLLM: HybridLLMSpec {
635
674
  )
636
675
 
637
676
  let lmInput = try await container.prepare(input: userInput)
677
+ log("perform_gen input_prepared")
638
678
 
639
679
  let stream = try await container.perform { context in
640
680
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -645,11 +685,21 @@ class HybridLLM: HybridLLMSpec {
645
685
  )
646
686
  }
647
687
 
688
+ var chunkCount = 0
648
689
  for await generation in stream {
649
- if Task.isCancelled { break }
690
+ if Task.isCancelled {
691
+ log("perform_gen cancelled at chunk=\(chunkCount)")
692
+ break
693
+ }
650
694
 
651
695
  switch generation {
652
696
  case .chunk(let text):
697
+ chunkCount += 1
698
+ rawTokenLog += text
699
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
700
+ log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
701
+ }
702
+
653
703
  let outputs = thinkingMachine.process(token: text)
654
704
 
655
705
  for machineOutput in outputs {
@@ -657,11 +707,15 @@ class HybridLLM: HybridLLMSpec {
657
707
  case .token(let token):
658
708
  var cleaned = token
659
709
  if let regex = specialTokenPattern {
710
+ let before = cleaned
660
711
  cleaned = regex.stringByReplacingMatches(
661
712
  in: cleaned,
662
713
  range: NSRange(cleaned.startIndex..., in: cleaned),
663
714
  withTemplate: ""
664
715
  )
716
+ if before != cleaned {
717
+ log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
718
+ }
665
719
  }
666
720
  if !cleaned.isEmpty {
667
721
  output += cleaned
@@ -669,12 +723,14 @@ class HybridLLM: HybridLLMSpec {
669
723
  }
670
724
 
671
725
  case .thinkingStart:
726
+ log("thinking_start at chunk=\(chunkCount)")
672
727
  onToken("<think>")
673
728
 
674
729
  case .thinkingChunk(let chunk):
675
730
  onToken(chunk)
676
731
 
677
732
  case .thinkingEnd:
733
+ log("thinking_end at chunk=\(chunkCount)")
678
734
  onToken("</think>")
679
735
  }
680
736
  }
@@ -694,11 +750,17 @@ class HybridLLM: HybridLLMSpec {
694
750
  onToolCall(toolCall.function.name, argsJson)
695
751
 
696
752
  case .info(let info):
697
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
753
+ log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
698
754
  }
699
755
  }
700
756
 
757
+ log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
758
+ log("raw_output_first500: \(rawTokenLog.prefix(500))")
759
+
701
760
  let flushOutputs = thinkingMachine.flush()
761
+ if !flushOutputs.isEmpty {
762
+ log("flush_outputs count=\(flushOutputs.count)")
763
+ }
702
764
  for machineOutput in flushOutputs {
703
765
  switch machineOutput {
704
766
  case .token(let token):
@@ -773,6 +835,7 @@ class HybridLLM: HybridLLMSpec {
773
835
  return output + continuation
774
836
  }
775
837
 
838
+ log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
776
839
  return output
777
840
  }
778
841
 
@@ -856,10 +919,14 @@ class HybridLLM: HybridLLMSpec {
856
919
  }
857
920
 
858
921
  func clearHistory() throws {
922
+ log("clear_history before=\(messageHistory.count) messages")
923
+ for (i, msg) in messageHistory.enumerated() {
924
+ log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
925
+ }
859
926
  messageHistory = []
860
927
  if let container = self.container {
861
928
  self.session = ChatSession(container, instructions: self.systemPrompt)
862
929
  }
863
- log("History and session cleared")
930
+ log("clear_history done session_reset")
864
931
  }
865
932
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@inferrlm/react-native-mlx",
3
3
  "description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
4
- "version": "0.4.2-alpha.3",
4
+ "version": "0.4.2-alpha.4",
5
5
  "main": "./lib/module/index.js",
6
6
  "module": "./lib/module/index.js",
7
7
  "types": "./lib/typescript/src/index.d.ts",