@inferrlm/react-native-mlx 0.4.2-alpha.2 → 0.4.2-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -230,10 +230,6 @@ class HybridLLM: HybridLLMSpec {
230
230
  }
231
231
 
232
232
  return Promise.async { [self] in
233
- if self.manageHistory {
234
- self.messageHistory.append(LLMMessage(role: "user", content: prompt))
235
- }
236
-
237
233
  let task = Task<String, Error> {
238
234
  log("Generating response for: \(prompt.prefix(50))...")
239
235
  let result = try await session.respond(to: prompt)
@@ -247,6 +243,7 @@ class HybridLLM: HybridLLMSpec {
247
243
  let result = try await task.value
248
244
 
249
245
  if self.manageHistory {
246
+ self.messageHistory.append(LLMMessage(role: "user", content: prompt))
250
247
  self.messageHistory.append(LLMMessage(role: "assistant", content: result))
251
248
  }
252
249
 
@@ -266,15 +263,14 @@ class HybridLLM: HybridLLMSpec {
266
263
  }
267
264
 
268
265
  return Promise.async { [self] in
269
- if self.manageHistory {
270
- self.messageHistory.append(LLMMessage(role: "user", content: prompt))
271
- }
272
-
273
266
  let task = Task<String, Error> {
274
267
  let startTime = Date()
275
268
  var firstTokenTime: Date?
276
269
  var tokenCount = 0
277
270
 
271
+ log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory)")
272
+ log("stream_prompt: \(prompt.prefix(300))")
273
+
278
274
  let result = try await self.performGeneration(
279
275
  container: container,
280
276
  prompt: prompt,
@@ -303,7 +299,8 @@ class HybridLLM: HybridLLMSpec {
303
299
  toolExecutionTime: 0
304
300
  )
305
301
 
306
- log("Stream complete - \(tokenCount) tokens, \(String(format: "%.1f", tokensPerSecond)) tokens/s")
302
+ log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
303
+ log("stream_result_preview: \(result.prefix(300))")
307
304
  return result
308
305
  }
309
306
 
@@ -313,7 +310,9 @@ class HybridLLM: HybridLLMSpec {
313
310
  let result = try await task.value
314
311
 
315
312
  if self.manageHistory {
313
+ self.messageHistory.append(LLMMessage(role: "user", content: prompt))
316
314
  self.messageHistory.append(LLMMessage(role: "assistant", content: result))
315
+ log("stream_history_updated count=\(self.messageHistory.count)")
317
316
  }
318
317
 
319
318
  return result
@@ -329,10 +328,6 @@ class HybridLLM: HybridLLMSpec {
329
328
  }
330
329
 
331
330
  return Promise.async { [self] in
332
- if self.manageHistory {
333
- self.messageHistory.append(LLMMessage(role: "user", content: prompt))
334
- }
335
-
336
331
  let task = Task<String, Error> {
337
332
  let startTime = Date()
338
333
  var firstTokenTime: Date?
@@ -389,6 +384,7 @@ class HybridLLM: HybridLLMSpec {
389
384
  let result = try await task.value
390
385
 
391
386
  if self.manageHistory {
387
+ self.messageHistory.append(LLMMessage(role: "user", content: prompt))
392
388
  self.messageHistory.append(LLMMessage(role: "assistant", content: result))
393
389
  }
394
390
 
@@ -403,11 +399,14 @@ class HybridLLM: HybridLLMSpec {
403
399
  ) -> [Chat.Message] {
404
400
  var chat: [Chat.Message] = []
405
401
 
402
+ log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
403
+
406
404
  if !self.systemPrompt.isEmpty {
407
405
  chat.append(.system(self.systemPrompt))
406
+ log(" [system] \(self.systemPrompt.prefix(80))...")
408
407
  }
409
408
 
410
- for msg in self.messageHistory {
409
+ for (i, msg) in self.messageHistory.enumerated() {
411
410
  switch msg.role {
412
411
  case "user": chat.append(.user(msg.content))
413
412
  case "assistant": chat.append(.assistant(msg.content))
@@ -415,18 +414,22 @@ class HybridLLM: HybridLLMSpec {
415
414
  case "tool": chat.append(.tool(msg.content))
416
415
  default: break
417
416
  }
417
+ log(" [\(i):\(msg.role)] \(msg.content.prefix(120))")
418
418
  }
419
419
 
420
420
  if depth == 0 {
421
421
  chat.append(.user(prompt))
422
+ log(" [prompt] \(prompt.prefix(200))")
422
423
  }
423
424
 
424
425
  if let toolResults {
425
- for result in toolResults {
426
+ for (i, result) in toolResults.enumerated() {
426
427
  chat.append(.tool(result))
428
+ log(" [tool_result_\(i)] \(result.prefix(100))")
427
429
  }
428
430
  }
429
431
 
432
+ log("chat_built total=\(chat.count) messages")
430
433
  return chat
431
434
  }
432
435
 
@@ -460,6 +463,14 @@ class HybridLLM: HybridLLMSpec {
460
463
  var output = ""
461
464
  var thinkingMachine = ThinkingStateMachine()
462
465
  var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
466
+ var rawTokenLog = ""
467
+
468
+ let specialTokenPattern = try? NSRegularExpression(
469
+ pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
470
+ options: []
471
+ )
472
+
473
+ log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
463
474
 
464
475
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
465
476
  let userInput = UserInput(
@@ -468,6 +479,7 @@ class HybridLLM: HybridLLMSpec {
468
479
  )
469
480
 
470
481
  let lmInput = try await container.prepare(input: userInput)
482
+ log("perform_gen_events input_prepared")
471
483
 
472
484
  let stream = try await container.perform { context in
473
485
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -478,30 +490,57 @@ class HybridLLM: HybridLLMSpec {
478
490
  )
479
491
  }
480
492
 
493
+ var chunkCount = 0
481
494
  for await generation in stream {
482
- if Task.isCancelled { break }
495
+ if Task.isCancelled {
496
+ log("perform_gen_events cancelled at chunk=\(chunkCount)")
497
+ break
498
+ }
483
499
 
484
500
  switch generation {
485
501
  case .chunk(let text):
502
+ chunkCount += 1
503
+ rawTokenLog += text
504
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
505
+ log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
506
+ }
507
+
486
508
  let outputs = thinkingMachine.process(token: text)
487
509
 
488
510
  for machineOutput in outputs {
489
511
  switch machineOutput {
490
512
  case .token(let token):
491
- output += token
492
- emitter.emitToken(token)
493
- onTokenProcessed()
513
+ var cleaned = token
514
+ if let regex = specialTokenPattern {
515
+ let before = cleaned
516
+ cleaned = regex.stringByReplacingMatches(
517
+ in: cleaned,
518
+ range: NSRange(cleaned.startIndex..., in: cleaned),
519
+ withTemplate: ""
520
+ )
521
+ if before != cleaned {
522
+ log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
523
+ }
524
+ }
525
+ if !cleaned.isEmpty {
526
+ output += cleaned
527
+ emitter.emitToken(cleaned)
528
+ onTokenProcessed()
529
+ }
494
530
 
495
531
  case .thinkingStart:
532
+ log("thinking_start_events at chunk=\(chunkCount)")
496
533
  emitter.emitThinkingStart()
497
534
 
498
535
  case .thinkingChunk(let chunk):
499
536
  emitter.emitThinkingChunk(chunk)
500
537
 
501
538
  case .thinkingEnd(let content):
539
+ log("thinking_end_events at chunk=\(chunkCount)")
502
540
  emitter.emitThinkingEnd(content)
503
541
  }
504
542
  }
543
+ }
505
544
 
506
545
  case .toolCall(let toolCall):
507
546
  log("Tool call detected: \(toolCall.function.name)")
@@ -519,12 +558,15 @@ class HybridLLM: HybridLLMSpec {
519
558
  pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
520
559
 
521
560
  case .info(let info):
522
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
561
+ log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
523
562
  let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
524
563
  onGenerationInfo(info.generationTokenCount, generationTime)
525
564
  }
526
565
  }
527
566
 
567
+ log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
568
+ log("raw_output_events_first500: \(rawTokenLog.prefix(500))")
569
+
528
570
  let flushOutputs = thinkingMachine.flush()
529
571
  for machineOutput in flushOutputs {
530
572
  switch machineOutput {
@@ -614,7 +656,16 @@ class HybridLLM: HybridLLMSpec {
614
656
  }
615
657
 
616
658
  var output = ""
659
+ var thinkingMachine = ThinkingStateMachine()
617
660
  var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
661
+ var rawTokenLog = ""
662
+
663
+ let specialTokenPattern = try? NSRegularExpression(
664
+ pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
665
+ options: []
666
+ )
667
+
668
+ log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
618
669
 
619
670
  let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
620
671
  let userInput = UserInput(
@@ -623,6 +674,7 @@ class HybridLLM: HybridLLMSpec {
623
674
  )
624
675
 
625
676
  let lmInput = try await container.prepare(input: userInput)
677
+ log("perform_gen input_prepared")
626
678
 
627
679
  let stream = try await container.perform { context in
628
680
  let parameters = GenerateParameters(maxTokens: 2048, temperature: 0.7)
@@ -633,13 +685,55 @@ class HybridLLM: HybridLLMSpec {
633
685
  )
634
686
  }
635
687
 
688
+ var chunkCount = 0
636
689
  for await generation in stream {
637
- if Task.isCancelled { break }
690
+ if Task.isCancelled {
691
+ log("perform_gen cancelled at chunk=\(chunkCount)")
692
+ break
693
+ }
638
694
 
639
695
  switch generation {
640
696
  case .chunk(let text):
641
- output += text
642
- onToken(text)
697
+ chunkCount += 1
698
+ rawTokenLog += text
699
+ if chunkCount <= 20 || chunkCount % 50 == 0 {
700
+ log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
701
+ }
702
+
703
+ let outputs = thinkingMachine.process(token: text)
704
+
705
+ for machineOutput in outputs {
706
+ switch machineOutput {
707
+ case .token(let token):
708
+ var cleaned = token
709
+ if let regex = specialTokenPattern {
710
+ let before = cleaned
711
+ cleaned = regex.stringByReplacingMatches(
712
+ in: cleaned,
713
+ range: NSRange(cleaned.startIndex..., in: cleaned),
714
+ withTemplate: ""
715
+ )
716
+ if before != cleaned {
717
+ log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
718
+ }
719
+ }
720
+ if !cleaned.isEmpty {
721
+ output += cleaned
722
+ onToken(cleaned)
723
+ }
724
+
725
+ case .thinkingStart:
726
+ log("thinking_start at chunk=\(chunkCount)")
727
+ onToken("<think>")
728
+
729
+ case .thinkingChunk(let chunk):
730
+ onToken(chunk)
731
+
732
+ case .thinkingEnd:
733
+ log("thinking_end at chunk=\(chunkCount)")
734
+ onToken("</think>")
735
+ }
736
+ }
643
737
 
644
738
  case .toolCall(let toolCall):
645
739
  log("Tool call detected: \(toolCall.function.name)")
@@ -656,7 +750,38 @@ class HybridLLM: HybridLLMSpec {
656
750
  onToolCall(toolCall.function.name, argsJson)
657
751
 
658
752
  case .info(let info):
659
- log("Generation info: \(info.generationTokenCount) tokens, \(String(format: "%.1f", info.tokensPerSecond)) tokens/s")
753
+ log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
754
+ }
755
+ }
756
+
757
+ log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
758
+ log("raw_output_first500: \(rawTokenLog.prefix(500))")
759
+
760
+ let flushOutputs = thinkingMachine.flush()
761
+ if !flushOutputs.isEmpty {
762
+ log("flush_outputs count=\(flushOutputs.count)")
763
+ }
764
+ for machineOutput in flushOutputs {
765
+ switch machineOutput {
766
+ case .token(let token):
767
+ var cleaned = token
768
+ if let regex = specialTokenPattern {
769
+ cleaned = regex.stringByReplacingMatches(
770
+ in: cleaned,
771
+ range: NSRange(cleaned.startIndex..., in: cleaned),
772
+ withTemplate: ""
773
+ )
774
+ }
775
+ if !cleaned.isEmpty {
776
+ output += cleaned
777
+ onToken(cleaned)
778
+ }
779
+ case .thinkingStart:
780
+ onToken("<think>")
781
+ case .thinkingChunk(let chunk):
782
+ onToken(chunk)
783
+ case .thinkingEnd:
784
+ onToken("</think>")
660
785
  }
661
786
  }
662
787
 
@@ -710,6 +835,7 @@ class HybridLLM: HybridLLMSpec {
710
835
  return output + continuation
711
836
  }
712
837
 
838
+ log("perform_gen_result output=\(output.count)chars preview: \(output.prefix(200))")
713
839
  return output
714
840
  }
715
841
 
@@ -793,10 +919,14 @@ class HybridLLM: HybridLLMSpec {
793
919
  }
794
920
 
795
921
  func clearHistory() throws {
922
+ log("clear_history before=\(messageHistory.count) messages")
923
+ for (i, msg) in messageHistory.enumerated() {
924
+ log(" clearing[\(i):\(msg.role)] \(msg.content.prefix(80))")
925
+ }
796
926
  messageHistory = []
797
927
  if let container = self.container {
798
928
  self.session = ChatSession(container, instructions: self.systemPrompt)
799
929
  }
800
- log("History and session cleared")
930
+ log("clear_history done session_reset")
801
931
  }
802
932
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@inferrlm/react-native-mlx",
3
3
  "description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
4
- "version": "0.4.2-alpha.2",
4
+ "version": "0.4.2-alpha.4",
5
5
  "main": "./lib/module/index.js",
6
6
  "module": "./lib/module/index.js",
7
7
  "types": "./lib/typescript/src/index.d.ts",