npm - @inferrlm/react-native-mlx - Versions diffs - 0.4.2-alpha.1 → 0.4.2-alpha.3 - Mend

@inferrlm/react-native-mlx 0.4.2-alpha.1 → 0.4.2-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/ios/Sources/HybridLLM.swift +84 -18
package/package.json +1 -1

package/ios/Sources/HybridLLM.swift CHANGED Viewed

@@ -230,10 +230,6 @@ class HybridLLM: HybridLLMSpec {
         }
         return Promise.async { [self] in
-            if self.manageHistory {
-                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
-            }
             let task = Task<String, Error> {
                 log("Generating response for: \(prompt.prefix(50))...")
                 let result = try await session.respond(to: prompt)
@@ -247,6 +243,7 @@ class HybridLLM: HybridLLMSpec {
             let result = try await task.value
             if self.manageHistory {
+                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
                 self.messageHistory.append(LLMMessage(role: "assistant", content: result))
             }
@@ -266,10 +263,6 @@ class HybridLLM: HybridLLMSpec {
         }
         return Promise.async { [self] in
-            if self.manageHistory {
-                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
-            }
             let task = Task<String, Error> {
                 let startTime = Date()
                 var firstTokenTime: Date?
@@ -313,6 +306,7 @@ class HybridLLM: HybridLLMSpec {
             let result = try await task.value
             if self.manageHistory {
+                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
                 self.messageHistory.append(LLMMessage(role: "assistant", content: result))
             }
@@ -329,10 +323,6 @@ class HybridLLM: HybridLLMSpec {
         }
         return Promise.async { [self] in
-            if self.manageHistory {
-                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
-            }
             let task = Task<String, Error> {
                 let startTime = Date()
                 var firstTokenTime: Date?
@@ -389,6 +379,7 @@ class HybridLLM: HybridLLMSpec {
             let result = try await task.value
             if self.manageHistory {
+                self.messageHistory.append(LLMMessage(role: "user", content: prompt))
                 self.messageHistory.append(LLMMessage(role: "assistant", content: result))
             }
@@ -461,6 +452,11 @@ class HybridLLM: HybridLLMSpec {
         var thinkingMachine = ThinkingStateMachine()
         var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
+        let specialTokenPattern = try? NSRegularExpression(
+            pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
+            options: []
+        )
         let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
         let userInput = UserInput(
             chat: chat,
@@ -488,9 +484,19 @@ class HybridLLM: HybridLLMSpec {
                 for machineOutput in outputs {
                     switch machineOutput {
                     case .token(let token):
-                        output += token
-                        emitter.emitToken(token)
-                        onTokenProcessed()
+                        var cleaned = token
+                        if let regex = specialTokenPattern {
+                            cleaned = regex.stringByReplacingMatches(
+                                in: cleaned,
+                                range: NSRange(cleaned.startIndex..., in: cleaned),
+                                withTemplate: ""
+                            )
+                        }
+                        if !cleaned.isEmpty {
+                            output += cleaned
+                            emitter.emitToken(cleaned)
+                            onTokenProcessed()
+                        }
                     case .thinkingStart:
                         emitter.emitThinkingStart()
@@ -614,8 +620,14 @@ class HybridLLM: HybridLLMSpec {
         }
         var output = ""
+        var thinkingMachine = ThinkingStateMachine()
         var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
+        let specialTokenPattern = try? NSRegularExpression(
+            pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
+            options: []
+        )
         let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
         let userInput = UserInput(
             chat: chat,
@@ -638,8 +650,34 @@ class HybridLLM: HybridLLMSpec {
             switch generation {
             case .chunk(let text):
-                output += text
-                onToken(text)
+                let outputs = thinkingMachine.process(token: text)
+                for machineOutput in outputs {
+                    switch machineOutput {
+                    case .token(let token):
+                        var cleaned = token
+                        if let regex = specialTokenPattern {
+                            cleaned = regex.stringByReplacingMatches(
+                                in: cleaned,
+                                range: NSRange(cleaned.startIndex..., in: cleaned),
+                                withTemplate: ""
+                            )
+                        }
+                        if !cleaned.isEmpty {
+                            output += cleaned
+                            onToken(cleaned)
+                        }
+                    case .thinkingStart:
+                        onToken("<think>")
+                    case .thinkingChunk(let chunk):
+                        onToken(chunk)
+                    case .thinkingEnd:
+                        onToken("</think>")
+                    }
+                }
             case .toolCall(let toolCall):
                 log("Tool call detected: \(toolCall.function.name)")
@@ -660,6 +698,31 @@ class HybridLLM: HybridLLMSpec {
             }
         }
+        let flushOutputs = thinkingMachine.flush()
+        for machineOutput in flushOutputs {
+            switch machineOutput {
+            case .token(let token):
+                var cleaned = token
+                if let regex = specialTokenPattern {
+                    cleaned = regex.stringByReplacingMatches(
+                        in: cleaned,
+                        range: NSRange(cleaned.startIndex..., in: cleaned),
+                        withTemplate: ""
+                    )
+                }
+                if !cleaned.isEmpty {
+                    output += cleaned
+                    onToken(cleaned)
+                }
+            case .thinkingStart:
+                onToken("<think>")
+            case .thinkingChunk(let chunk):
+                onToken(chunk)
+            case .thinkingEnd:
+                onToken("</think>")
+            }
+        }
         if !pendingToolCalls.isEmpty {
             log("Executing \(pendingToolCalls.count) tool call(s)")
@@ -794,6 +857,9 @@ class HybridLLM: HybridLLMSpec {
     func clearHistory() throws {
         messageHistory = []
-        log("Message history cleared")
+        if let container = self.container {
+            self.session = ChatSession(container, instructions: self.systemPrompt)
+        }
+        log("History and session cleared")
     }
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@inferrlm/react-native-mlx",
   "description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
-  "version": "0.4.2-alpha.1",
+  "version": "0.4.2-alpha.3",
   "main": "./lib/module/index.js",
   "module": "./lib/module/index.js",
   "types": "./lib/typescript/src/index.d.ts",