@inferrlm/react-native-mlx 0.4.2-alpha.0 → 0.4.2-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/Sources/HybridLLM.swift +287 -47
- package/lib/module/llm.js +18 -0
- package/lib/module/llm.js.map +1 -1
- package/lib/typescript/src/llm.d.ts +3 -0
- package/lib/typescript/src/llm.d.ts.map +1 -1
- package/lib/typescript/src/specs/LLM.nitro.d.ts +6 -0
- package/lib/typescript/src/specs/LLM.nitro.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/llm.ts +24 -0
- package/src/specs/LLM.nitro.ts +6 -0
|
@@ -29,7 +29,11 @@ class HybridLLM: HybridLLMSpec {
|
|
|
29
29
|
var modelId: String = ""
|
|
30
30
|
var debug: Bool = false
|
|
31
31
|
var systemPrompt: String = "You are a helpful assistant."
|
|
32
|
+
var maxTokens: Int = 2048
|
|
33
|
+
var temperature: Float = 0.7
|
|
34
|
+
var enableThinking: Bool = true
|
|
32
35
|
var additionalContext: LLMMessage = LLMMessage()
|
|
36
|
+
private var lastInputContainedThinkTag = false
|
|
33
37
|
|
|
34
38
|
private func log(_ message: String) {
|
|
35
39
|
if debug {
|
|
@@ -72,16 +76,27 @@ class HybridLLM: HybridLLMSpec {
|
|
|
72
76
|
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
|
|
73
77
|
else { return [] }
|
|
74
78
|
|
|
79
|
+
var allIds = Set<Int>()
|
|
80
|
+
|
|
75
81
|
if let ids = extractEosIds(from: json) {
|
|
76
|
-
|
|
82
|
+
allIds.formUnion(ids)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
for key in ["text_config", "language_config", "llm_config"] {
|
|
86
|
+
if let nested = json[key] as? [String: Any],
|
|
87
|
+
let ids = extractEosIds(from: nested) {
|
|
88
|
+
allIds.formUnion(ids)
|
|
89
|
+
}
|
|
77
90
|
}
|
|
78
91
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
92
|
+
let genConfigURL = modelDir.appendingPathComponent("generation_config.json")
|
|
93
|
+
if let genData = try? Data(contentsOf: genConfigURL),
|
|
94
|
+
let genJson = try? JSONSerialization.jsonObject(with: genData) as? [String: Any],
|
|
95
|
+
let ids = extractEosIds(from: genJson) {
|
|
96
|
+
allIds = ids
|
|
82
97
|
}
|
|
83
98
|
|
|
84
|
-
return
|
|
99
|
+
return allIds
|
|
85
100
|
}
|
|
86
101
|
|
|
87
102
|
private func extractEosIds(from dict: [String: Any]) -> Set<Int>? {
|
|
@@ -157,16 +172,27 @@ class HybridLLM: HybridLLMSpec {
|
|
|
157
172
|
mlx-swift-lm only reads top-level eos_token_id from config.json.
|
|
158
173
|
Models like Qwen3.5 nest it inside text_config, leaving the stop
|
|
159
174
|
set empty. Parse it ourselves and patch the container.
|
|
175
|
+
Also add common chat stop tokens as extraEOSTokens.
|
|
160
176
|
*/
|
|
161
177
|
let containerEos = await loadedContainer.configuration.eosTokenIds
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
178
|
+
let containerExtra = await loadedContainer.configuration.extraEOSTokens
|
|
179
|
+
log("EOS state after load - ids: \(containerEos), extra: \(containerExtra)")
|
|
180
|
+
|
|
181
|
+
let parsed = self.parseEosTokenIds(from: modelDir)
|
|
182
|
+
let chatStopTokens: Set<String> = ["<|endoftext|>", "<|im_end|>", "<|im_start|>"]
|
|
183
|
+
let needsIdPatch = containerEos.isEmpty && !parsed.isEmpty
|
|
184
|
+
let missingExtra = chatStopTokens.subtracting(containerExtra)
|
|
185
|
+
|
|
186
|
+
if needsIdPatch || !missingExtra.isEmpty {
|
|
187
|
+
await loadedContainer.update { ctx in
|
|
188
|
+
if needsIdPatch {
|
|
167
189
|
ctx.configuration.eosTokenIds = parsed
|
|
168
190
|
}
|
|
191
|
+
ctx.configuration.extraEOSTokens.formUnion(chatStopTokens)
|
|
169
192
|
}
|
|
193
|
+
let updated = await loadedContainer.configuration.eosTokenIds
|
|
194
|
+
let updatedExtra = await loadedContainer.configuration.extraEOSTokens
|
|
195
|
+
log("EOS patched - ids: \(updated), extra: \(updatedExtra)")
|
|
170
196
|
}
|
|
171
197
|
|
|
172
198
|
let memoryAfterContainer = self.getMemoryUsage()
|
|
@@ -208,10 +234,6 @@ class HybridLLM: HybridLLMSpec {
|
|
|
208
234
|
}
|
|
209
235
|
|
|
210
236
|
return Promise.async { [self] in
|
|
211
|
-
if self.manageHistory {
|
|
212
|
-
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
213
|
-
}
|
|
214
|
-
|
|
215
237
|
let task = Task<String, Error> {
|
|
216
238
|
log("Generating response for: \(prompt.prefix(50))...")
|
|
217
239
|
let result = try await session.respond(to: prompt)
|
|
@@ -225,6 +247,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
225
247
|
let result = try await task.value
|
|
226
248
|
|
|
227
249
|
if self.manageHistory {
|
|
250
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
228
251
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
229
252
|
}
|
|
230
253
|
|
|
@@ -243,16 +266,21 @@ class HybridLLM: HybridLLMSpec {
|
|
|
243
266
|
throw LLMError.notLoaded
|
|
244
267
|
}
|
|
245
268
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
269
|
+
if let prev = currentTask {
|
|
270
|
+
log("stream_cancelling_previous")
|
|
271
|
+
prev.cancel()
|
|
272
|
+
currentTask = nil
|
|
273
|
+
}
|
|
250
274
|
|
|
275
|
+
return Promise.async { [self] in
|
|
251
276
|
let task = Task<String, Error> {
|
|
252
277
|
let startTime = Date()
|
|
253
278
|
var firstTokenTime: Date?
|
|
254
279
|
var tokenCount = 0
|
|
255
280
|
|
|
281
|
+
log("stream_start prompt=\(prompt.count)chars history=\(self.messageHistory.count) manageHistory=\(self.manageHistory) maxTokens=\(self.maxTokens) temperature=\(self.temperature)")
|
|
282
|
+
log("stream_prompt: \(prompt)")
|
|
283
|
+
|
|
256
284
|
let result = try await self.performGeneration(
|
|
257
285
|
container: container,
|
|
258
286
|
prompt: prompt,
|
|
@@ -281,7 +309,8 @@ class HybridLLM: HybridLLMSpec {
|
|
|
281
309
|
toolExecutionTime: 0
|
|
282
310
|
)
|
|
283
311
|
|
|
284
|
-
log("
|
|
312
|
+
log("stream_done tokens=\(tokenCount) tps=\(String(format: "%.1f", tokensPerSecond)) result=\(result.count)chars")
|
|
313
|
+
log("stream_result: \(result)")
|
|
285
314
|
return result
|
|
286
315
|
}
|
|
287
316
|
|
|
@@ -291,7 +320,9 @@ class HybridLLM: HybridLLMSpec {
|
|
|
291
320
|
let result = try await task.value
|
|
292
321
|
|
|
293
322
|
if self.manageHistory {
|
|
323
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
294
324
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
325
|
+
log("stream_history_updated count=\(self.messageHistory.count)")
|
|
295
326
|
}
|
|
296
327
|
|
|
297
328
|
return result
|
|
@@ -306,11 +337,13 @@ class HybridLLM: HybridLLMSpec {
|
|
|
306
337
|
throw LLMError.notLoaded
|
|
307
338
|
}
|
|
308
339
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
340
|
+
if let prev = currentTask {
|
|
341
|
+
log("streamWithEvents_cancelling_previous")
|
|
342
|
+
prev.cancel()
|
|
343
|
+
currentTask = nil
|
|
344
|
+
}
|
|
313
345
|
|
|
346
|
+
return Promise.async { [self] in
|
|
314
347
|
let task = Task<String, Error> {
|
|
315
348
|
let startTime = Date()
|
|
316
349
|
var firstTokenTime: Date?
|
|
@@ -367,6 +400,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
367
400
|
let result = try await task.value
|
|
368
401
|
|
|
369
402
|
if self.manageHistory {
|
|
403
|
+
self.messageHistory.append(LLMMessage(role: "user", content: prompt))
|
|
370
404
|
self.messageHistory.append(LLMMessage(role: "assistant", content: result))
|
|
371
405
|
}
|
|
372
406
|
|
|
@@ -374,6 +408,8 @@ class HybridLLM: HybridLLMSpec {
|
|
|
374
408
|
}
|
|
375
409
|
}
|
|
376
410
|
|
|
411
|
+
private static let fallbackTemplate = "{%- for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{%- endfor %}{%- if add_generation_prompt %}{%- if enable_thinking is defined and enable_thinking is true %}{{ '<|im_start|>assistant\\n<think>\\n' }}{%- else %}{{ '<|im_start|>assistant\\n' }}{%- endif %}{%- endif %}"
|
|
412
|
+
|
|
377
413
|
private func buildChatMessages(
|
|
378
414
|
prompt: String,
|
|
379
415
|
toolResults: [String]?,
|
|
@@ -381,11 +417,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
381
417
|
) -> [Chat.Message] {
|
|
382
418
|
var chat: [Chat.Message] = []
|
|
383
419
|
|
|
420
|
+
log("build_chat depth=\(depth) history=\(self.messageHistory.count) prompt=\(prompt.count)chars")
|
|
421
|
+
|
|
384
422
|
if !self.systemPrompt.isEmpty {
|
|
385
423
|
chat.append(.system(self.systemPrompt))
|
|
424
|
+
log(" [system] \(self.systemPrompt)")
|
|
386
425
|
}
|
|
387
426
|
|
|
388
|
-
for msg in self.messageHistory {
|
|
427
|
+
for (i, msg) in self.messageHistory.enumerated() {
|
|
389
428
|
switch msg.role {
|
|
390
429
|
case "user": chat.append(.user(msg.content))
|
|
391
430
|
case "assistant": chat.append(.assistant(msg.content))
|
|
@@ -393,21 +432,71 @@ class HybridLLM: HybridLLMSpec {
|
|
|
393
432
|
case "tool": chat.append(.tool(msg.content))
|
|
394
433
|
default: break
|
|
395
434
|
}
|
|
435
|
+
log(" [\(i):\(msg.role)] \(msg.content)")
|
|
396
436
|
}
|
|
397
437
|
|
|
398
438
|
if depth == 0 {
|
|
399
439
|
chat.append(.user(prompt))
|
|
440
|
+
log(" [prompt] \(prompt)")
|
|
400
441
|
}
|
|
401
442
|
|
|
402
443
|
if let toolResults {
|
|
403
|
-
for result in toolResults {
|
|
444
|
+
for (i, result) in toolResults.enumerated() {
|
|
404
445
|
chat.append(.tool(result))
|
|
446
|
+
log(" [tool_result_\(i)] \(result)")
|
|
405
447
|
}
|
|
406
448
|
}
|
|
407
449
|
|
|
450
|
+
log("chat_built total=\(chat.count) messages")
|
|
408
451
|
return chat
|
|
409
452
|
}
|
|
410
453
|
|
|
454
|
+
private func prepareInput(
|
|
455
|
+
container: ModelContainer,
|
|
456
|
+
chat: [Chat.Message]
|
|
457
|
+
) async throws -> LMInput {
|
|
458
|
+
let tools = !self.toolSchemas.isEmpty ? self.toolSchemas : nil
|
|
459
|
+
let thinkingEnabled = self.enableThinking
|
|
460
|
+
let additionalCtx: [String: any Sendable] = ["enable_thinking": thinkingEnabled]
|
|
461
|
+
|
|
462
|
+
let messages: [[String: any Sendable]] = chat.map {
|
|
463
|
+
["role": $0.role.rawValue, "content": $0.content]
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
let tokens: [Int] = try await container.perform { (context: ModelContext) in
|
|
467
|
+
do {
|
|
468
|
+
let result = try context.tokenizer.applyChatTemplate(
|
|
469
|
+
messages: messages,
|
|
470
|
+
tools: tools,
|
|
471
|
+
additionalContext: additionalCtx
|
|
472
|
+
)
|
|
473
|
+
self.log("template_applied token_count=\(result.count)")
|
|
474
|
+
let decoded = context.tokenizer.decode(tokens: Array(result.suffix(60)))
|
|
475
|
+
self.log("input_tail_decoded: \(decoded)")
|
|
476
|
+
self.lastInputContainedThinkTag = decoded.contains("<think>")
|
|
477
|
+
return result
|
|
478
|
+
} catch {
|
|
479
|
+
self.log("template_error: \(error), retrying with fallback")
|
|
480
|
+
let result = try context.tokenizer.applyChatTemplate(
|
|
481
|
+
messages: messages,
|
|
482
|
+
chatTemplate: .literal(HybridLLM.fallbackTemplate),
|
|
483
|
+
addGenerationPrompt: true,
|
|
484
|
+
truncation: false,
|
|
485
|
+
maxLength: nil,
|
|
486
|
+
tools: nil,
|
|
487
|
+
additionalContext: additionalCtx
|
|
488
|
+
)
|
|
489
|
+
self.log("fallback_template_applied token_count=\(result.count)")
|
|
490
|
+
let decoded = context.tokenizer.decode(tokens: Array(result.suffix(60)))
|
|
491
|
+
self.log("fallback_input_tail_decoded: \(decoded)")
|
|
492
|
+
self.lastInputContainedThinkTag = decoded.contains("<think>")
|
|
493
|
+
return result
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
return LMInput(tokens: MLXArray(tokens))
|
|
498
|
+
}
|
|
499
|
+
|
|
411
500
|
private func executeToolCall(
|
|
412
501
|
tool: ToolDefinition,
|
|
413
502
|
argsDict: [String: Any]
|
|
@@ -438,17 +527,34 @@ class HybridLLM: HybridLLMSpec {
|
|
|
438
527
|
var output = ""
|
|
439
528
|
var thinkingMachine = ThinkingStateMachine()
|
|
440
529
|
var pendingToolCalls: [(id: String, tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
530
|
+
var rawTokenLog = ""
|
|
441
531
|
|
|
442
|
-
let
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
tools: !self.toolSchemas.isEmpty ? self.toolSchemas : nil
|
|
532
|
+
let specialTokenPattern = try? NSRegularExpression(
|
|
533
|
+
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
534
|
+
options: []
|
|
446
535
|
)
|
|
447
536
|
|
|
448
|
-
|
|
537
|
+
log("perform_gen_events depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
538
|
+
|
|
539
|
+
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
540
|
+
let lmInput = try await prepareInput(container: container, chat: chat)
|
|
541
|
+
log("perform_gen_events input_prepared messages=\(chat.count) maxTokens=\(self.maxTokens) temperature=\(self.temperature)")
|
|
542
|
+
|
|
543
|
+
if self.lastInputContainedThinkTag {
|
|
544
|
+
let seed = thinkingMachine.process(token: "<think>")
|
|
545
|
+
for seedOutput in seed {
|
|
546
|
+
switch seedOutput {
|
|
547
|
+
case .thinkingStart:
|
|
548
|
+
log("thinking_seeded_events")
|
|
549
|
+
emitter.emitThinkingStart()
|
|
550
|
+
default:
|
|
551
|
+
break
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
449
555
|
|
|
450
556
|
let stream = try await container.perform { context in
|
|
451
|
-
let parameters = GenerateParameters(maxTokens:
|
|
557
|
+
let parameters = GenerateParameters(maxTokens: self.maxTokens, temperature: Float(self.temperature))
|
|
452
558
|
return try MLXLMCommon.generate(
|
|
453
559
|
input: lmInput,
|
|
454
560
|
parameters: parameters,
|
|
@@ -456,27 +562,53 @@ class HybridLLM: HybridLLMSpec {
|
|
|
456
562
|
)
|
|
457
563
|
}
|
|
458
564
|
|
|
565
|
+
var chunkCount = 0
|
|
459
566
|
for await generation in stream {
|
|
460
|
-
if Task.isCancelled {
|
|
567
|
+
if Task.isCancelled {
|
|
568
|
+
log("perform_gen_events cancelled at chunk=\(chunkCount)")
|
|
569
|
+
break
|
|
570
|
+
}
|
|
461
571
|
|
|
462
572
|
switch generation {
|
|
463
573
|
case .chunk(let text):
|
|
574
|
+
chunkCount += 1
|
|
575
|
+
rawTokenLog += text
|
|
576
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
577
|
+
log("raw_chunk_events[\(chunkCount)] \(text.debugDescription)")
|
|
578
|
+
}
|
|
579
|
+
|
|
464
580
|
let outputs = thinkingMachine.process(token: text)
|
|
465
581
|
|
|
466
582
|
for machineOutput in outputs {
|
|
467
583
|
switch machineOutput {
|
|
468
584
|
case .token(let token):
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
585
|
+
var cleaned = token
|
|
586
|
+
if let regex = specialTokenPattern {
|
|
587
|
+
let before = cleaned
|
|
588
|
+
cleaned = regex.stringByReplacingMatches(
|
|
589
|
+
in: cleaned,
|
|
590
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
591
|
+
withTemplate: ""
|
|
592
|
+
)
|
|
593
|
+
if before != cleaned {
|
|
594
|
+
log("stripped_special_events: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
if !cleaned.isEmpty {
|
|
598
|
+
output += cleaned
|
|
599
|
+
emitter.emitToken(cleaned)
|
|
600
|
+
onTokenProcessed()
|
|
601
|
+
}
|
|
472
602
|
|
|
473
603
|
case .thinkingStart:
|
|
604
|
+
log("thinking_start_events at chunk=\(chunkCount)")
|
|
474
605
|
emitter.emitThinkingStart()
|
|
475
606
|
|
|
476
607
|
case .thinkingChunk(let chunk):
|
|
477
608
|
emitter.emitThinkingChunk(chunk)
|
|
478
609
|
|
|
479
610
|
case .thinkingEnd(let content):
|
|
611
|
+
log("thinking_end_events at chunk=\(chunkCount)")
|
|
480
612
|
emitter.emitThinkingEnd(content)
|
|
481
613
|
}
|
|
482
614
|
}
|
|
@@ -497,12 +629,15 @@ class HybridLLM: HybridLLMSpec {
|
|
|
497
629
|
pendingToolCalls.append((id: toolCallId, tool: tool, args: argsDict, argsJson: argsJson))
|
|
498
630
|
|
|
499
631
|
case .info(let info):
|
|
500
|
-
log("
|
|
632
|
+
log("gen_info_events chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
501
633
|
let generationTime = info.tokensPerSecond > 0 ? Double(info.generationTokenCount) / info.tokensPerSecond * 1000 : 0
|
|
502
634
|
onGenerationInfo(info.generationTokenCount, generationTime)
|
|
503
635
|
}
|
|
504
636
|
}
|
|
505
637
|
|
|
638
|
+
log("perform_gen_events_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
639
|
+
log("raw_output_events: \(rawTokenLog)")
|
|
640
|
+
|
|
506
641
|
let flushOutputs = thinkingMachine.flush()
|
|
507
642
|
for machineOutput in flushOutputs {
|
|
508
643
|
switch machineOutput {
|
|
@@ -592,18 +727,42 @@ class HybridLLM: HybridLLMSpec {
|
|
|
592
727
|
}
|
|
593
728
|
|
|
594
729
|
var output = ""
|
|
730
|
+
var thinkingMachine = ThinkingStateMachine()
|
|
595
731
|
var pendingToolCalls: [(tool: ToolDefinition, args: [String: Any], argsJson: String)] = []
|
|
732
|
+
var rawTokenLog = ""
|
|
596
733
|
|
|
597
|
-
let
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
tools: !self.toolSchemas.isEmpty ? self.toolSchemas : nil
|
|
734
|
+
let specialTokenPattern = try? NSRegularExpression(
|
|
735
|
+
pattern: "<\\|(?:im_end|im_start|endoftext|end|pad)\\|>",
|
|
736
|
+
options: []
|
|
601
737
|
)
|
|
602
738
|
|
|
603
|
-
|
|
739
|
+
log("perform_gen depth=\(depth) prompt=\(prompt.count)chars toolResults=\(toolResults?.count ?? 0)")
|
|
740
|
+
|
|
741
|
+
let chat = buildChatMessages(prompt: prompt, toolResults: toolResults, depth: depth)
|
|
742
|
+
let lmInput = try await prepareInput(container: container, chat: chat)
|
|
743
|
+
log("perform_gen input_prepared messages=\(chat.count) maxTokens=\(self.maxTokens) temperature=\(self.temperature)")
|
|
744
|
+
|
|
745
|
+
/*
|
|
746
|
+
When the chat template injects <think> at the end of the prompt,
|
|
747
|
+
the model generates thinking content directly — the opening tag
|
|
748
|
+
is NOT part of the generated stream. Seed the state machine so
|
|
749
|
+
the TS layer receives <think> and sets isThinking = true.
|
|
750
|
+
*/
|
|
751
|
+
if self.lastInputContainedThinkTag {
|
|
752
|
+
let seed = thinkingMachine.process(token: "<think>")
|
|
753
|
+
for seedOutput in seed {
|
|
754
|
+
switch seedOutput {
|
|
755
|
+
case .thinkingStart:
|
|
756
|
+
log("thinking_seeded")
|
|
757
|
+
onToken("<think>")
|
|
758
|
+
default:
|
|
759
|
+
break
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
604
763
|
|
|
605
764
|
let stream = try await container.perform { context in
|
|
606
|
-
let parameters = GenerateParameters(maxTokens:
|
|
765
|
+
let parameters = GenerateParameters(maxTokens: self.maxTokens, temperature: Float(self.temperature))
|
|
607
766
|
return try MLXLMCommon.generate(
|
|
608
767
|
input: lmInput,
|
|
609
768
|
parameters: parameters,
|
|
@@ -611,13 +770,55 @@ class HybridLLM: HybridLLMSpec {
|
|
|
611
770
|
)
|
|
612
771
|
}
|
|
613
772
|
|
|
773
|
+
var chunkCount = 0
|
|
614
774
|
for await generation in stream {
|
|
615
|
-
if Task.isCancelled {
|
|
775
|
+
if Task.isCancelled {
|
|
776
|
+
log("perform_gen cancelled at chunk=\(chunkCount)")
|
|
777
|
+
break
|
|
778
|
+
}
|
|
616
779
|
|
|
617
780
|
switch generation {
|
|
618
781
|
case .chunk(let text):
|
|
619
|
-
|
|
620
|
-
|
|
782
|
+
chunkCount += 1
|
|
783
|
+
rawTokenLog += text
|
|
784
|
+
if chunkCount <= 20 || chunkCount % 50 == 0 {
|
|
785
|
+
log("raw_chunk[\(chunkCount)] \(text.debugDescription)")
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
let outputs = thinkingMachine.process(token: text)
|
|
789
|
+
|
|
790
|
+
for machineOutput in outputs {
|
|
791
|
+
switch machineOutput {
|
|
792
|
+
case .token(let token):
|
|
793
|
+
var cleaned = token
|
|
794
|
+
if let regex = specialTokenPattern {
|
|
795
|
+
let before = cleaned
|
|
796
|
+
cleaned = regex.stringByReplacingMatches(
|
|
797
|
+
in: cleaned,
|
|
798
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
799
|
+
withTemplate: ""
|
|
800
|
+
)
|
|
801
|
+
if before != cleaned {
|
|
802
|
+
log("stripped_special: \(before.debugDescription) -> \(cleaned.debugDescription)")
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
if !cleaned.isEmpty {
|
|
806
|
+
output += cleaned
|
|
807
|
+
onToken(cleaned)
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
case .thinkingStart:
|
|
811
|
+
log("thinking_start at chunk=\(chunkCount)")
|
|
812
|
+
onToken("<think>")
|
|
813
|
+
|
|
814
|
+
case .thinkingChunk(let chunk):
|
|
815
|
+
onToken(chunk)
|
|
816
|
+
|
|
817
|
+
case .thinkingEnd:
|
|
818
|
+
log("thinking_end at chunk=\(chunkCount)")
|
|
819
|
+
onToken("</think>")
|
|
820
|
+
}
|
|
821
|
+
}
|
|
621
822
|
|
|
622
823
|
case .toolCall(let toolCall):
|
|
623
824
|
log("Tool call detected: \(toolCall.function.name)")
|
|
@@ -634,7 +835,38 @@ class HybridLLM: HybridLLMSpec {
|
|
|
634
835
|
onToolCall(toolCall.function.name, argsJson)
|
|
635
836
|
|
|
636
837
|
case .info(let info):
|
|
637
|
-
log("
|
|
838
|
+
log("gen_info chunks=\(chunkCount) genTokens=\(info.generationTokenCount) tps=\(String(format: "%.1f", info.tokensPerSecond))")
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
log("perform_gen_loop_done chunks=\(chunkCount) output=\(output.count)chars")
|
|
843
|
+
log("raw_output: \(rawTokenLog)")
|
|
844
|
+
|
|
845
|
+
let flushOutputs = thinkingMachine.flush()
|
|
846
|
+
if !flushOutputs.isEmpty {
|
|
847
|
+
log("flush_outputs count=\(flushOutputs.count)")
|
|
848
|
+
}
|
|
849
|
+
for machineOutput in flushOutputs {
|
|
850
|
+
switch machineOutput {
|
|
851
|
+
case .token(let token):
|
|
852
|
+
var cleaned = token
|
|
853
|
+
if let regex = specialTokenPattern {
|
|
854
|
+
cleaned = regex.stringByReplacingMatches(
|
|
855
|
+
in: cleaned,
|
|
856
|
+
range: NSRange(cleaned.startIndex..., in: cleaned),
|
|
857
|
+
withTemplate: ""
|
|
858
|
+
)
|
|
859
|
+
}
|
|
860
|
+
if !cleaned.isEmpty {
|
|
861
|
+
output += cleaned
|
|
862
|
+
onToken(cleaned)
|
|
863
|
+
}
|
|
864
|
+
case .thinkingStart:
|
|
865
|
+
onToken("<think>")
|
|
866
|
+
case .thinkingChunk(let chunk):
|
|
867
|
+
onToken(chunk)
|
|
868
|
+
case .thinkingEnd:
|
|
869
|
+
onToken("</think>")
|
|
638
870
|
}
|
|
639
871
|
}
|
|
640
872
|
|
|
@@ -688,6 +920,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
688
920
|
return output + continuation
|
|
689
921
|
}
|
|
690
922
|
|
|
923
|
+
log("perform_gen_result output=\(output.count)chars result: \(output)")
|
|
691
924
|
return output
|
|
692
925
|
}
|
|
693
926
|
|
|
@@ -771,7 +1004,14 @@ class HybridLLM: HybridLLMSpec {
|
|
|
771
1004
|
}
|
|
772
1005
|
|
|
773
1006
|
func clearHistory() throws {
|
|
1007
|
+
log("clear_history before=\(messageHistory.count) messages")
|
|
1008
|
+
for (i, msg) in messageHistory.enumerated() {
|
|
1009
|
+
log(" clearing[\(i):\(msg.role)] \(msg.content)")
|
|
1010
|
+
}
|
|
774
1011
|
messageHistory = []
|
|
775
|
-
|
|
1012
|
+
if let container = self.container {
|
|
1013
|
+
self.session = ChatSession(container, instructions: self.systemPrompt)
|
|
1014
|
+
}
|
|
1015
|
+
log("clear_history done session_reset")
|
|
776
1016
|
}
|
|
777
1017
|
}
|
package/lib/module/llm.js
CHANGED
|
@@ -188,6 +188,24 @@ export const LLM = {
|
|
|
188
188
|
},
|
|
189
189
|
set systemPrompt(value) {
|
|
190
190
|
getInstance().systemPrompt = value;
|
|
191
|
+
},
|
|
192
|
+
get maxTokens() {
|
|
193
|
+
return getInstance().maxTokens;
|
|
194
|
+
},
|
|
195
|
+
set maxTokens(value) {
|
|
196
|
+
getInstance().maxTokens = value;
|
|
197
|
+
},
|
|
198
|
+
get temperature() {
|
|
199
|
+
return getInstance().temperature;
|
|
200
|
+
},
|
|
201
|
+
set temperature(value) {
|
|
202
|
+
getInstance().temperature = value;
|
|
203
|
+
},
|
|
204
|
+
get enableThinking() {
|
|
205
|
+
return getInstance().enableThinking;
|
|
206
|
+
},
|
|
207
|
+
set enableThinking(value) {
|
|
208
|
+
getInstance().enableThinking = value;
|
|
191
209
|
}
|
|
192
210
|
};
|
|
193
211
|
//# sourceMappingURL=llm.js.map
|
package/lib/module/llm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["NitroModules","instance","getInstance","createHybridObject","LLM","load","modelId","options","generate","prompt","stream","onToken","onToolCall","accumulatedToolCalls","name","argsJson","args","JSON","parse","toolCall","arguments","push","allToolCalls","streamWithEvents","onEvent","eventJson","event","stop","unload","getLastGenerationStats","getHistory","clearHistory","isLoaded","isGenerating","debug","value","systemPrompt"],"sourceRoot":"../../src","sources":["llm.ts"],"mappings":";;AAAA,SAASA,YAAY,QAAQ,4BAA4B;AAUzD,IAAIC,QAAwB,GAAG,IAAI;AAiBnC,SAASC,WAAWA,CAAA,EAAY;EAC9B,IAAI,CAACD,QAAQ,EAAE;IACbA,QAAQ,GAAGD,YAAY,CAACG,kBAAkB,CAAU,KAAK,CAAC;EAC5D;EACA,OAAOF,QAAQ;AACjB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,MAAMG,GAAG,GAAG;EACjB;AACF;AACA;AACA;AACA;EACEC,IAAIA,CAACC,OAAe,EAAEC,OAAuB,EAAiB;IAC5D,OAAOL,WAAW,CAAC,CAAC,CAACG,IAAI,CAACC,OAAO,EAAEC,OAAO,CAAC;EAC7C,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;EACEC,QAAQA,CAACC,MAAc,EAAmB;IACxC,OAAOP,WAAW,CAAC,CAAC,CAACM,QAAQ,CAACC,MAAM,CAAC;EACvC,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACEC,MAAMA,CACJD,MAAc,EACdE,OAAgC,EAChCC,UAA6C,EAC5B;IACjB,MAAMC,oBAAoC,GAAG,EAAE;IAE/C,OAAOX,WAAW,CAAC,CAAC,CAACQ,MAAM,CAACD,MAAM,EAAEE,OAAO,EAAE,CAACG,IAAY,EAAEC,QAAgB,KAAK;MAC/E,IAAIH,UAAU,EAAE;QACd,IAAI;UACF,MAAMI,IAAI,GAAGC,IAAI,CAACC,KAAK,CAACH,QAAQ,CAA4B;UAC5D,MAAMI,QAAQ,GAAG;YAAEL,IAAI;YAAEM,SAAS,EAAEJ;UAAK,CAAC;UAC1CH,oBAAoB,CAACQ,IAAI,CAACF,QAAQ,CAAC;UACnCP,UAAU,CAAC;YACTO,QAAQ;YACRG,YAAY,EAAE,CAAC,GAAGT,oBAAoB;UACxC,CAAC,CAAC;QACJ,CAAC,CAAC,MAAM;UACN,MAAMM,QAAQ,GAAG;YAAEL,IAAI;YAAEM,SAAS,EAAE,CAAC;UAAE,CAAC;UACxCP,oBAAoB,CAACQ,IAAI,CAACF,QAAQ,CAAC;UACnCP,UAAU,CAAC;YACTO,QAAQ;YACRG,YAAY,EAAE,CAAC,GAAGT,oBAAoB;UACxC,CAAC,CAAC;QACJ;MACF;IACF,CAAC,CAAC;EACJ,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACEU,gBAAgBA,CAACd,MAAc,EAAEe,OAAsB,EAAmB;IACxE,OAAOtB,WAAW,CAAC,CAAC,CAACqB,gBAAgB,CAACd,MAAM,EAAGgB,SAAiB,IAAK;MACnE,IAAI;QACF,MAAMC,KAAK,GAAGT,IAAI,CAACC,KAAK,CAACO,SAAS,CAAgB;QAClDD,OAAO,CAACE,KAAK,CAAC;MAChB,CAAC,CAAC,MAAM;QACN;MAAA;IAEJ,CAAC,CAAC;EACJ,CAAC;EAED;AACF;AACA;EACEC,IAAIA,CAAA,EAAS;IACXzB,WAAW,CAAC,CAAC,CAACyB,IAAI,CAAC,CAAC;EACtB,CAAC;EAED;AACF;AACA;AACA;EACEC,MAAMA,CAAA,EAAS;IACb1B,WAAW,CAAC,CAAC,CAAC0B,MAAM,CAAC,CAAC;EACxB,CAAC;EAED;AACF;AACA;AACA;EACEC,sBAAsBA,CAAA,EAAoB;IACxC,OAAO3B,WAAW,CAAC,CAAC,CAAC2B,sBAAsB,CAAC,CAAC;EAC/C,CAAC;EAED;AACF;AACA;AACA;EACEC,UAAUA,CAAA,EAAc;IACtB,OAAO5B,WAAW,CAAC,CAAC,CAAC4B,UAAU,CAAC,CAAC;EACnC,CAAC;EAED;AACF;AACA;EACEC,YAAYA,CAAA,EAAS;IACnB7B,WAAW,CAAC,CAAC,CAAC6B,YAAY,CAAC,CAAC;EAC9B,CAAC;EAED;EACA,IAAIC,QAAQA,CAAA,EAAY;IACtB,OAAO9B,WAAW,CAAC,CAAC,CAAC8B,QAAQ;EAC/B,CAAC;EAED;EACA,IAAIC,YAAYA,CAAA,EAAY;IAC1B,OAAO/B,WAAW,CAAC,CAAC,CAAC+B,YAAY;EACnC,CAAC;EAED;EACA,IAAI3B,OAAOA,CAAA,EAAW;IACpB,OAAOJ,WAAW,CAAC,CAAC,CAACI,OAAO;EAC9B,CAAC;EAED;EACA,IAAI4B,KAAKA,CAAA,EAAY;IACnB,OAAOhC,WAAW,CAAC,CAAC,CAACgC,KAAK;EAC5B,CAAC;EAED,IAAIA,KAAKA,CAACC,KAAc,EAAE;IACxBjC,WAAW,CAAC,CAAC,CAACgC,KAAK,GAAGC,KAAK;EAC7B,CAAC;EAED;AACF;AACA;AACA;AACA;EACE,IAAIC,YAAYA,CAAA,EAAW;IACzB,OAAOlC,WAAW,CAAC,CAAC,CAACkC,YAAY;EACnC,CAAC;EAED,IAAIA,YAAYA,CAACD,KAAa,EAAE;IAC9BjC,WAAW,CAAC,CAAC,CAACkC,YAAY,GAAGD,KAAK;EACpC;AACF,CAAC","ignoreList":[]}
|
|
1
|
+
{"version":3,"names":["NitroModules","instance","getInstance","createHybridObject","LLM","load","modelId","options","generate","prompt","stream","onToken","onToolCall","accumulatedToolCalls","name","argsJson","args","JSON","parse","toolCall","arguments","push","allToolCalls","streamWithEvents","onEvent","eventJson","event","stop","unload","getLastGenerationStats","getHistory","clearHistory","isLoaded","isGenerating","debug","value","systemPrompt","maxTokens","temperature","enableThinking"],"sourceRoot":"../../src","sources":["llm.ts"],"mappings":";;AAAA,SAASA,YAAY,QAAQ,4BAA4B;AAUzD,IAAIC,QAAwB,GAAG,IAAI;AAiBnC,SAASC,WAAWA,CAAA,EAAY;EAC9B,IAAI,CAACD,QAAQ,EAAE;IACbA,QAAQ,GAAGD,YAAY,CAACG,kBAAkB,CAAU,KAAK,CAAC;EAC5D;EACA,OAAOF,QAAQ;AACjB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,MAAMG,GAAG,GAAG;EACjB;AACF;AACA;AACA;AACA;EACEC,IAAIA,CAACC,OAAe,EAAEC,OAAuB,EAAiB;IAC5D,OAAOL,WAAW,CAAC,CAAC,CAACG,IAAI,CAACC,OAAO,EAAEC,OAAO,CAAC;EAC7C,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;EACEC,QAAQA,CAACC,MAAc,EAAmB;IACxC,OAAOP,WAAW,CAAC,CAAC,CAACM,QAAQ,CAACC,MAAM,CAAC;EACvC,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACEC,MAAMA,CACJD,MAAc,EACdE,OAAgC,EAChCC,UAA6C,EAC5B;IACjB,MAAMC,oBAAoC,GAAG,EAAE;IAE/C,OAAOX,WAAW,CAAC,CAAC,CAACQ,MAAM,CAACD,MAAM,EAAEE,OAAO,EAAE,CAACG,IAAY,EAAEC,QAAgB,KAAK;MAC/E,IAAIH,UAAU,EAAE;QACd,IAAI;UACF,MAAMI,IAAI,GAAGC,IAAI,CAACC,KAAK,CAACH,QAAQ,CAA4B;UAC5D,MAAMI,QAAQ,GAAG;YAAEL,IAAI;YAAEM,SAAS,EAAEJ;UAAK,CAAC;UAC1CH,oBAAoB,CAACQ,IAAI,CAACF,QAAQ,CAAC;UACnCP,UAAU,CAAC;YACTO,QAAQ;YACRG,YAAY,EAAE,CAAC,GAAGT,oBAAoB;UACxC,CAAC,CAAC;QACJ,CAAC,CAAC,MAAM;UACN,MAAMM,QAAQ,GAAG;YAAEL,IAAI;YAAEM,SAAS,EAAE,CAAC;UAAE,CAAC;UACxCP,oBAAoB,CAACQ,IAAI,CAACF,QAAQ,CAAC;UACnCP,UAAU,CAAC;YACTO,QAAQ;YACRG,YAAY,EAAE,CAAC,GAAGT,oBAAoB;UACxC,CAAC,CAAC;QACJ;MACF;IACF,CAAC,CAAC;EACJ,CAAC;EAED;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACEU,gBAAgBA,CAACd,MAAc,EAAEe,OAAsB,EAAmB;IACxE,OAAOtB,WAAW,CAAC,CAAC,CAACqB,gBAAgB,CAACd,MAAM,EAAGgB,SAAiB,IAAK;MACnE,IAAI;QACF,MAAMC,KAAK,GAAGT,IAAI,CAACC,KAAK,CAACO,SAAS,CAAgB;QAClDD,OAAO,CAACE,KAAK,CAAC;MAChB,CAAC,CAAC,MAAM;QACN;MAAA;IAEJ,CAAC,CAAC;EACJ,CAAC;EAED;AACF;AACA;EACEC,IAAIA,CAAA,EAAS;IACXzB,WAAW,CAAC,CAAC,CAACyB,IAAI,CAAC,CAAC;EACtB,CAAC;EAED;AACF;AACA;AACA;EACEC,MAAMA,CAAA,EAAS;IACb1B,WAAW,CAAC,CAAC,CAAC0B,MAAM,CAAC,CAAC;EACxB,CAAC;EAED;AACF;AACA;AACA;EACEC,sBAAsBA,CAAA,EAAoB;IACxC,OAAO3B,WAAW,CAAC,CAAC,CAAC2B,sBAAsB,CAAC,CAAC;EAC/C,CAAC;EAED;AACF;AACA;AACA;EACEC,UAAUA,CAAA,EAAc;IACtB,OAAO5B,WAAW,CAAC,CAAC,CAAC4B,UAAU,CAAC,CAAC;EACnC,CAAC;EAED;AACF;AACA;EACEC,YAAYA,CAAA,EAAS;IACnB7B,WAAW,CAAC,CAAC,CAAC6B,YAAY,CAAC,CAAC;EAC9B,CAAC;EAED;EACA,IAAIC,QAAQA,CAAA,EAAY;IACtB,OAAO9B,WAAW,CAAC,CAAC,CAAC8B,QAAQ;EAC/B,CAAC;EAED;EACA,IAAIC,YAAYA,CAAA,EAAY;IAC1B,OAAO/B,WAAW,CAAC,CAAC,CAAC+B,YAAY;EACnC,CAAC;EAED;EACA,IAAI3B,OAAOA,CAAA,EAAW;IACpB,OAAOJ,WAAW,CAAC,CAAC,CAACI,OAAO;EAC9B,CAAC;EAED;EACA,IAAI4B,KAAKA,CAAA,EAAY;IACnB,OAAOhC,WAAW,CAAC,CAAC,CAACgC,KAAK;EAC5B,CAAC;EAED,IAAIA,KAAKA,CAACC,KAAc,EAAE;IACxBjC,WAAW,CAAC,CAAC,CAACgC,KAAK,GAAGC,KAAK;EAC7B,CAAC;EAED;AACF;AACA;AACA;AACA;EACE,IAAIC,YAAYA,CAAA,EAAW;IACzB,OAAOlC,WAAW,CAAC,CAAC,CAACkC,YAAY;EACnC,CAAC;EAED,IAAIA,YAAYA,CAACD,KAAa,EAAE;IAC9BjC,WAAW,CAAC,CAAC,CAACkC,YAAY,GAAGD,KAAK;EACpC,CAAC;EAED,IAAIE,SAASA,CAAA,EAAW;IACtB,OAAOnC,WAAW,CAAC,CAAC,CAACmC,SAAS;EAChC,CAAC;EAED,IAAIA,SAASA,CAACF,KAAa,EAAE;IAC3BjC,WAAW,CAAC,CAAC,CAACmC,SAAS,GAAGF,KAAK;EACjC,CAAC;EAED,IAAIG,WAAWA,CAAA,EAAW;IACxB,OAAOpC,WAAW,CAAC,CAAC,CAACoC,WAAW;EAClC,CAAC;EAED,IAAIA,WAAWA,CAACH,KAAa,EAAE;IAC7BjC,WAAW,CAAC,CAAC,CAACoC,WAAW,GAAGH,KAAK;EACnC,CAAC;EAED,IAAII,cAAcA,CAAA,EAAY;IAC5B,OAAOrC,WAAW,CAAC,CAAC,CAACqC,cAAc;EACrC,CAAC;EAED,IAAIA,cAAcA,CAACJ,KAAc,EAAE;IACjCjC,WAAW,CAAC,CAAC,CAACqC,cAAc,GAAGJ,KAAK;EACtC;AACF,CAAC","ignoreList":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/llm.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,eAAe,EACf,cAAc,EAEd,WAAW,EACZ,MAAM,mBAAmB,CAAA;AAE1B,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAA;AAIxD,MAAM,MAAM,OAAO,GAAG;IACpB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAA;IACrC,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC,CAAA;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,YAAY,CAAA;IACtB,YAAY,EAAE,YAAY,EAAE,CAAA;CAC7B,CAAA;AASD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,eAAO,MAAM,GAAG;IACd;;;;OAIG;kBACW,MAAM,WAAW,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7D;;;;;OAKG;qBACc,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAIzC;;;;;;;;;OASG;mBAEO,MAAM,WACL,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,eACnB,CAAC,MAAM,EAAE,cAAc,KAAK,IAAI,GAC5C,OAAO,CAAC,MAAM,CAAC;IAyBlB;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;6BACsB,MAAM,WAAW,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC;IAWzE;;OAEG;YACK,IAAI;IAIZ;;;OAGG;cACO,IAAI;IAId;;;OAGG;8BACuB,eAAe;IAIzC;;;OAGG;kBACW,OAAO,EAAE;IAIvB;;OAEG;oBACa,IAAI;IAIpB,mEAAmE;uBACnD,OAAO;IAIvB,gDAAgD;2BAC5B,OAAO;IAI3B,oEAAoE;sBACrD,MAAM;IAIrB,sCAAsC;WACzB,OAAO;IAQpB;;;;OAIG;kBACiB,MAAM;
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/llm.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,eAAe,EACf,cAAc,EAEd,WAAW,EACZ,MAAM,mBAAmB,CAAA;AAE1B,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAA;AAIxD,MAAM,MAAM,OAAO,GAAG;IACpB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAA;IACrC,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC,CAAA;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,YAAY,CAAA;IACtB,YAAY,EAAE,YAAY,EAAE,CAAA;CAC7B,CAAA;AASD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,eAAO,MAAM,GAAG;IACd;;;;OAIG;kBACW,MAAM,WAAW,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7D;;;;;OAKG;qBACc,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAIzC;;;;;;;;;OASG;mBAEO,MAAM,WACL,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,eACnB,CAAC,MAAM,EAAE,cAAc,KAAK,IAAI,GAC5C,OAAO,CAAC,MAAM,CAAC;IAyBlB;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;6BACsB,MAAM,WAAW,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC;IAWzE;;OAEG;YACK,IAAI;IAIZ;;;OAGG;cACO,IAAI;IAId;;;OAGG;8BACuB,eAAe;IAIzC;;;OAGG;kBACW,OAAO,EAAE;IAIvB;;OAEG;oBACa,IAAI;IAIpB,mEAAmE;uBACnD,OAAO;IAIvB,gDAAgD;2BAC5B,OAAO;IAI3B,oEAAoE;sBACrD,MAAM;IAIrB,sCAAsC;WACzB,OAAO;IAQpB;;;;OAIG;kBACiB,MAAM;eAQT,MAAM;iBAQJ,MAAM;oBAQH,OAAO;CAO9B,CAAA"}
|
|
@@ -151,6 +151,12 @@ export interface LLM extends HybridObject<{
|
|
|
151
151
|
debug: boolean;
|
|
152
152
|
/** System prompt used when loading the model */
|
|
153
153
|
systemPrompt: string;
|
|
154
|
+
/** Maximum number of tokens to generate */
|
|
155
|
+
maxTokens: number;
|
|
156
|
+
/** Sampling temperature (0 = deterministic, higher = more random) */
|
|
157
|
+
temperature: number;
|
|
158
|
+
/** Enable thinking mode for models that support it */
|
|
159
|
+
enableThinking: boolean;
|
|
154
160
|
}
|
|
155
161
|
/**
|
|
156
162
|
* Supported parameter types for tool definitions.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LLM.nitro.d.ts","sourceRoot":"","sources":["../../../../src/specs/LLM.nitro.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAA;AAEtE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,UAAU,EAAE,MAAM,CAAA;IAClB,eAAe,EAAE,MAAM,CAAA;IACvB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB,EAAE,MAAM,CAAA;CAC1B;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,kBAAkB,CAAA;IACxB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,cAAc,CAAA;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,iBAAiB,CAAA;IACvB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,qBAAqB,CAAA;IAC3B,EAAE,EAAE,MAAM,CAAA;CACX;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,qBAAqB,CAAA;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAA;IACxB,EAAE,EAAE,MAAM,CAAA;IACV,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,eAAe,CAAA;CACvB;AAED,MAAM,MAAM,WAAW,GACnB,oBAAoB,GACpB,UAAU,GACV,kBAAkB,GAClB,kBAAkB,GAClB,gBAAgB,GAChB,kBAAkB,GAClB,sBAAsB,GACtB,sBAAsB,GACtB,mBAAmB,GACnB,kBAAkB,CAAA;AAEtB,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,OAAO,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,aAAa,EAAE,CAAA;IAC3B,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAA;CAC3C;AAED;GACG;AACH,MAAM,WAAW,cAAc;IAC7B,mDAAmD;IACnD,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAA;IACvC,iDAAiD;IACjD,iBAAiB,CAAC,EAAE,UAAU,EAAE,CAAA;IAChC,sDAAsD;IACtD,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,GAAI,SAAQ,YAAY,CAAC;IAAE,GAAG,EAAE,OAAO,CAAA;CAAE,CAAC;IACzD;;;;OAIG;IACH,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAE9D;;;;OAIG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzC;;;;;;;OAOG;IACH,MAAM,CACJ,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,EAChC,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,GACpD,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,gBAAgB,CACd,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,IAAI,GACnC,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB;;OAEG;IACH,IAAI,IAAI,IAAI,CAAA;IAEZ;;OAEG;IACH,MAAM,IAAI,IAAI,CAAA;IAEd;;;OAGG;IACH,sBAAsB,IAAI,eAAe,CAAA;IAEzC;;;OAGG;IACH,UAAU,IAAI,UAAU,EAAE,CAAA;IAE1B;;OAEG;IACH,YAAY,IAAI,IAAI,CAAA;IAEpB,0CAA0C;IAC1C,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAA;IAC1B,gDAAgD;IAChD,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAA;IAC9B,2CAA2C;IAC3C,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IAExB,2BAA2B;IAC3B,KAAK,EAAE,OAAO,CAAA;IACd,gDAAgD;IAChD,YAAY,EAAE,MAAM,CAAA;
|
|
1
|
+
{"version":3,"file":"LLM.nitro.d.ts","sourceRoot":"","sources":["../../../../src/specs/LLM.nitro.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAA;AAEtE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,UAAU,EAAE,MAAM,CAAA;IAClB,eAAe,EAAE,MAAM,CAAA;IACvB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB,EAAE,MAAM,CAAA;CAC1B;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,kBAAkB,CAAA;IACxB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,cAAc,CAAA;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,iBAAiB,CAAA;IACvB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,qBAAqB,CAAA;IAC3B,EAAE,EAAE,MAAM,CAAA;CACX;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,qBAAqB,CAAA;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAA;IACxB,EAAE,EAAE,MAAM,CAAA;IACV,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,gBAAgB,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,eAAe,CAAA;CACvB;AAED,MAAM,MAAM,WAAW,GACnB,oBAAoB,GACpB,UAAU,GACV,kBAAkB,GAClB,kBAAkB,GAClB,gBAAgB,GAChB,kBAAkB,GAClB,sBAAsB,GACtB,sBAAsB,GACtB,mBAAmB,GACnB,kBAAkB,CAAA;AAEtB,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,OAAO,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,aAAa,EAAE,CAAA;IAC3B,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAA;CAC3C;AAED;GACG;AACH,MAAM,WAAW,cAAc;IAC7B,mDAAmD;IACnD,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAA;IACvC,iDAAiD;IACjD,iBAAiB,CAAC,EAAE,UAAU,EAAE,CAAA;IAChC,sDAAsD;IACtD,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,GAAI,SAAQ,YAAY,CAAC;IAAE,GAAG,EAAE,OAAO,CAAA;CAAE,CAAC;IACzD;;;;OAIG;IACH,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAE9D;;;;OAIG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzC;;;;;;;OAOG;IACH,MAAM,CACJ,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,EAChC,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,GACpD,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,gBAAgB,CACd,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,IAAI,GACnC,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB;;OAEG;IACH,IAAI,IAAI,IAAI,CAAA;IAEZ;;OAEG;IACH,MAAM,IAAI,IAAI,CAAA;IAEd;;;OAGG;IACH,sBAAsB,IAAI,eAAe,CAAA;IAEzC;;;OAGG;IACH,UAAU,IAAI,UAAU,EAAE,CAAA;IAE1B;;OAEG;IACH,YAAY,IAAI,IAAI,CAAA;IAEpB,0CAA0C;IAC1C,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAA;IAC1B,gDAAgD;IAChD,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAA;IAC9B,2CAA2C;IAC3C,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IAExB,2BAA2B;IAC3B,KAAK,EAAE,OAAO,CAAA;IACd,gDAAgD;IAChD,YAAY,EAAE,MAAM,CAAA;IACpB,2CAA2C;IAC3C,SAAS,EAAE,MAAM,CAAA;IACjB,qEAAqE;IACrE,WAAW,EAAE,MAAM,CAAA;IACnB,sDAAsD;IACtD,cAAc,EAAE,OAAO,CAAA;CACxB;AAED;;;GAGG;AACH,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,QAAQ,CAAA"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inferrlm/react-native-mlx",
|
|
3
3
|
"description": "MLX Swift integration for React Native - InferrLM fork with enhanced features",
|
|
4
|
-
"version": "0.4.2-alpha.
|
|
4
|
+
"version": "0.4.2-alpha.10",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"module": "./lib/module/index.js",
|
|
7
7
|
"types": "./lib/typescript/src/index.d.ts",
|
package/src/llm.ts
CHANGED
|
@@ -226,4 +226,28 @@ export const LLM = {
|
|
|
226
226
|
set systemPrompt(value: string) {
|
|
227
227
|
getInstance().systemPrompt = value
|
|
228
228
|
},
|
|
229
|
+
|
|
230
|
+
get maxTokens(): number {
|
|
231
|
+
return getInstance().maxTokens
|
|
232
|
+
},
|
|
233
|
+
|
|
234
|
+
set maxTokens(value: number) {
|
|
235
|
+
getInstance().maxTokens = value
|
|
236
|
+
},
|
|
237
|
+
|
|
238
|
+
get temperature(): number {
|
|
239
|
+
return getInstance().temperature
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
set temperature(value: number) {
|
|
243
|
+
getInstance().temperature = value
|
|
244
|
+
},
|
|
245
|
+
|
|
246
|
+
get enableThinking(): boolean {
|
|
247
|
+
return getInstance().enableThinking
|
|
248
|
+
},
|
|
249
|
+
|
|
250
|
+
set enableThinking(value: boolean) {
|
|
251
|
+
getInstance().enableThinking = value
|
|
252
|
+
},
|
|
229
253
|
}
|
package/src/specs/LLM.nitro.ts
CHANGED
|
@@ -193,6 +193,12 @@ export interface LLM extends HybridObject<{ ios: 'swift' }> {
|
|
|
193
193
|
debug: boolean
|
|
194
194
|
/** System prompt used when loading the model */
|
|
195
195
|
systemPrompt: string
|
|
196
|
+
/** Maximum number of tokens to generate */
|
|
197
|
+
maxTokens: number
|
|
198
|
+
/** Sampling temperature (0 = deterministic, higher = more random) */
|
|
199
|
+
temperature: number
|
|
200
|
+
/** Enable thinking mode for models that support it */
|
|
201
|
+
enableThinking: boolean
|
|
196
202
|
}
|
|
197
203
|
|
|
198
204
|
/**
|