react-native-nitro-mlx 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/Sources/HybridLLM.swift +49 -37
- package/package.json +1 -1
|
@@ -17,6 +17,7 @@ class HybridLLM: HybridLLMSpec {
|
|
|
17
17
|
private var modelFactory: ModelFactory = LLMModelFactory.shared
|
|
18
18
|
private var manageHistory: Bool = false
|
|
19
19
|
private var messageHistory: [LLMMessage] = []
|
|
20
|
+
private var loadTask: Task<Void, Error>?
|
|
20
21
|
|
|
21
22
|
var isLoaded: Bool { session != nil }
|
|
22
23
|
var isGenerating: Bool { currentTask != nil }
|
|
@@ -61,50 +62,58 @@ class HybridLLM: HybridLLMSpec {
|
|
|
61
62
|
}
|
|
62
63
|
|
|
63
64
|
func load(modelId: String, options: LLMLoadOptions?) throws -> Promise<Void> {
|
|
65
|
+
self.loadTask?.cancel()
|
|
66
|
+
|
|
64
67
|
return Promise.async { [self] in
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
self.currentTask?.cancel()
|
|
68
|
-
self.currentTask = nil
|
|
69
|
-
self.session = nil
|
|
70
|
-
self.container = nil
|
|
71
|
-
MLX.GPU.clearCache()
|
|
72
|
-
|
|
73
|
-
let memoryAfterCleanup = self.getMemoryUsage()
|
|
74
|
-
let gpuAfterCleanup = self.getGPUMemoryUsage()
|
|
75
|
-
log("After cleanup - Host: \(memoryAfterCleanup), GPU: \(gpuAfterCleanup)")
|
|
76
|
-
|
|
77
|
-
let modelDir = await ModelDownloader.shared.getModelDirectory(modelId: modelId)
|
|
78
|
-
log("Loading from directory: \(modelDir.path)")
|
|
79
|
-
|
|
80
|
-
let config = ModelConfiguration(directory: modelDir)
|
|
81
|
-
let loadedContainer = try await modelFactory.loadContainer(
|
|
82
|
-
configuration: config
|
|
83
|
-
) { progress in
|
|
84
|
-
options?.onProgress?(progress.fractionCompleted)
|
|
85
|
-
}
|
|
68
|
+
let task = Task { @MainActor in
|
|
69
|
+
MLX.GPU.set(cacheLimit: 2000000)
|
|
86
70
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
71
|
+
self.currentTask?.cancel()
|
|
72
|
+
self.currentTask = nil
|
|
73
|
+
self.session = nil
|
|
74
|
+
self.container = nil
|
|
75
|
+
MLX.GPU.clearCache()
|
|
76
|
+
|
|
77
|
+
let memoryAfterCleanup = self.getMemoryUsage()
|
|
78
|
+
let gpuAfterCleanup = self.getGPUMemoryUsage()
|
|
79
|
+
log("After cleanup - Host: \(memoryAfterCleanup), GPU: \(gpuAfterCleanup)")
|
|
80
|
+
|
|
81
|
+
let modelDir = await ModelDownloader.shared.getModelDirectory(modelId: modelId)
|
|
82
|
+
log("Loading from directory: \(modelDir.path)")
|
|
83
|
+
|
|
84
|
+
let config = ModelConfiguration(directory: modelDir)
|
|
85
|
+
let loadedContainer = try await self.modelFactory.loadContainer(
|
|
86
|
+
configuration: config
|
|
87
|
+
) { progress in
|
|
88
|
+
options?.onProgress?(progress.fractionCompleted)
|
|
89
|
+
}
|
|
90
90
|
|
|
91
|
-
|
|
92
|
-
let additionalContextDict: [String: Any]? = if let messages = options?.additionalContext {
|
|
93
|
-
["messages": messages.map { ["role": $0.role, "content": $0.content] }]
|
|
94
|
-
} else {
|
|
95
|
-
nil
|
|
96
|
-
}
|
|
91
|
+
try Task.checkCancellation()
|
|
97
92
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
93
|
+
let memoryAfterContainer = self.getMemoryUsage()
|
|
94
|
+
let gpuAfterContainer = self.getGPUMemoryUsage()
|
|
95
|
+
log("Model loaded - Host: \(memoryAfterContainer), GPU: \(gpuAfterContainer)")
|
|
101
96
|
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
let additionalContextDict: [String: Any]? = if let messages = options?.additionalContext {
|
|
98
|
+
["messages": messages.map { ["role": $0.role, "content": $0.content] }]
|
|
99
|
+
} else {
|
|
100
|
+
nil
|
|
101
|
+
}
|
|
104
102
|
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
self.container = loadedContainer
|
|
104
|
+
self.session = ChatSession(loadedContainer, instructions: self.systemPrompt, additionalContext: additionalContextDict)
|
|
105
|
+
self.modelId = modelId
|
|
106
|
+
|
|
107
|
+
self.manageHistory = options?.manageHistory ?? false
|
|
108
|
+
self.messageHistory = options?.additionalContext ?? []
|
|
109
|
+
|
|
110
|
+
if self.manageHistory {
|
|
111
|
+
log("History management enabled with \(self.messageHistory.count) initial messages")
|
|
112
|
+
}
|
|
107
113
|
}
|
|
114
|
+
|
|
115
|
+
self.loadTask = task
|
|
116
|
+
try await task.value
|
|
108
117
|
}
|
|
109
118
|
}
|
|
110
119
|
|
|
@@ -211,6 +220,9 @@ class HybridLLM: HybridLLMSpec {
|
|
|
211
220
|
}
|
|
212
221
|
|
|
213
222
|
func unload() throws {
|
|
223
|
+
loadTask?.cancel()
|
|
224
|
+
loadTask = nil
|
|
225
|
+
|
|
214
226
|
let memoryBefore = getMemoryUsage()
|
|
215
227
|
let gpuBefore = getGPUMemoryUsage()
|
|
216
228
|
log("Before unload - Host: \(memoryBefore), GPU: \(gpuBefore)")
|