@dvai-bridge/ios-llama-core 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,158 +1,158 @@
1
- // Internal/PluginState.swift
2
- import Foundation
3
- #if !COCOAPODS
4
- import DVAILlamaCoreObjC
5
- #endif
6
- #if !COCOAPODS
7
- import DVAISharedCore
8
- #endif
9
-
10
- /// Owns the running state of the capacitor-llama plugin: the model bridge,
11
- /// the HTTP server, and the model metadata. All access is serialised through
12
- /// the actor isolation.
13
- public actor PluginState {
14
- private var server: HttpServer?
15
- private var bridge: LlamaCppBridge?
16
- private(set) var modelId: String = ""
17
- private(set) var isRunning: Bool = false
18
- private(set) var baseUrl: String?
19
- private(set) var port: Int?
20
-
21
- public init() {}
22
-
23
- /// Start the plugin: load model, bind server, install routes.
24
- /// - Returns dictionary suitable for Capacitor's `call.resolve(...)`.
25
- public func start(opts: [String: Any]) async throws -> [String: Any] {
26
- if isRunning { try await stopInternal() }
27
-
28
- guard let modelPath = opts["modelPath"] as? String, !modelPath.isEmpty else {
29
- throw NSError(
30
- domain: "DVAIBridgeLlama",
31
- code: 400,
32
- userInfo: [NSLocalizedDescriptionKey: "modelPath is required for llama backend"]
33
- )
34
- }
35
-
36
- let mmprojPath = opts["mmprojPath"] as? String
37
- let chatTemplate = opts["chatTemplate"] as? String
38
- let gpuLayers = opts["gpuLayers"] as? Int ?? 99
39
- let contextSize = opts["contextSize"] as? Int ?? 2048
40
- let threads = opts["threads"] as? Int ?? 4
41
- let embeddingMode = opts["embeddingMode"] as? Bool ?? false
42
- let httpBasePort = opts["httpBasePort"] as? Int ?? 38883
43
- let httpMaxPortAttempts = opts["httpMaxPortAttempts"] as? Int ?? 16
44
- let corsRaw = opts["corsOrigin"]
45
- let corsConfig = parseCors(corsRaw)
46
-
47
- // Load model via the ObjC++ bridge (real llama.cpp under the hood).
48
- let bridge = LlamaCppBridge()
49
- try bridge.loadModel(
50
- atPath: modelPath,
51
- mmprojPath: mmprojPath,
52
- gpuLayers: Int32(gpuLayers),
53
- contextSize: Int32(contextSize),
54
- threads: Int32(threads),
55
- embeddingMode: embeddingMode
56
- )
57
-
58
- // Phase 2A Pass 2: load mmproj (if provided) so multimodal handlers
59
- // can light up. A failed mmproj load is fatal for this start() call —
60
- // the caller asked for a multimodal model and we couldn't deliver.
61
- if let mmprojPath = mmprojPath, !mmprojPath.isEmpty {
62
- do {
63
- try bridge.loadMmproj(atPath: mmprojPath)
64
- } catch {
65
- bridge.unload()
66
- throw error
67
- }
68
- }
69
- let mmprojLoaded = bridge.isMmprojLoaded
70
- // Audio encoder support implies mmproj is loaded AND mtmd reports
71
- // an audio encoder is present in the projector.
72
- let modelHasAudioEncoder = mmprojLoaded && bridge.hasAudioEncoder()
73
-
74
- // Build handlers + context first; Hummingbird requires routes
75
- // to be registered at Application construction time, so the
76
- // installRoutes → tryBind order is mandatory. Phase 2A Pass 2:
77
- // real flags mirrored from the bridge state. embeddingMode
78
- // comes straight from the start opts so /v1/embeddings can
79
- // short-circuit when off. chatTemplate is an optional
80
- // Jinja-compatible override; nil/empty falls through to the
81
- // model's bundled `tokenizer.chat_template`.
82
- let handlers = LlamaHandlers(
83
- bridge: bridge,
84
- modelId: modelPath,
85
- mmprojLoaded: mmprojLoaded,
86
- modelHasAudioEncoder: modelHasAudioEncoder,
87
- embeddingMode: embeddingMode,
88
- chatTemplate: chatTemplate
89
- )
90
- let ctx = HandlerContext(modelId: modelPath, backendName: "llama")
91
- let server = HttpServer()
92
- await server.installRoutes(handlers: handlers, ctx: ctx, corsConfig: corsConfig)
93
-
94
- // Bind server (with port-fallback). If bind fails, release the
95
- // bridge so the loaded llama context doesn't leak until next
96
- // start().
97
- let port: Int
98
- do {
99
- port = try await server.tryBind(
100
- basePort: httpBasePort,
101
- maxAttempts: httpMaxPortAttempts,
102
- host: "127.0.0.1"
103
- )
104
- } catch {
105
- bridge.unload()
106
- throw error
107
- }
108
-
109
- self.bridge = bridge
110
- self.server = server
111
- self.modelId = modelPath
112
- self.port = port
113
- self.baseUrl = "http://127.0.0.1:\(port)/v1"
114
- self.isRunning = true
115
-
116
- return [
117
- "baseUrl": self.baseUrl!,
118
- "port": port,
119
- "backend": "llama",
120
- "modelId": modelPath,
121
- ]
122
- }
123
-
124
- /// Stop the plugin: release model, stop server.
125
- public func stop() async throws {
126
- try await stopInternal()
127
- }
128
-
129
- private func stopInternal() async throws {
130
- await server?.stop()
131
- bridge?.unload()
132
- server = nil
133
- bridge = nil
134
- modelId = ""
135
- baseUrl = nil
136
- port = nil
137
- isRunning = false
138
- }
139
-
140
- /// Snapshot of the current running state, suitable for Capacitor `call.resolve(...)`.
141
- public func statusInfo() -> [String: Any] {
142
- var dict: [String: Any] = ["running": isRunning]
143
- if let baseUrl = baseUrl { dict["baseUrl"] = baseUrl }
144
- if isRunning { dict["backend"] = "llama" }
145
- return dict
146
- }
147
-
148
- /// Parse the CORS option from the start opts dict.
149
- private func parseCors(_ raw: Any?) -> CORSConfig {
150
- if let s = raw as? String {
151
- return s == "*" ? .wildcard : .exact(s)
152
- }
153
- if let arr = raw as? [String] {
154
- return .allowlist(arr)
155
- }
156
- return .wildcard
157
- }
158
- }
1
+ // Internal/PluginState.swift
2
+ import Foundation
3
+ #if !COCOAPODS
4
+ import DVAILlamaCoreObjC
5
+ #endif
6
+ #if !COCOAPODS
7
+ import DVAISharedCore
8
+ #endif
9
+
10
+ /// Owns the running state of the capacitor-llama plugin: the model bridge,
11
+ /// the HTTP server, and the model metadata. All access is serialised through
12
+ /// the actor isolation.
13
+ public actor PluginState {
14
+ private var server: HttpServer?
15
+ private var bridge: LlamaCppBridge?
16
+ private(set) var modelId: String = ""
17
+ private(set) var isRunning: Bool = false
18
+ private(set) var baseUrl: String?
19
+ private(set) var port: Int?
20
+
21
+ public init() {}
22
+
23
+ /// Start the plugin: load model, bind server, install routes.
24
+ /// - Returns dictionary suitable for Capacitor's `call.resolve(...)`.
25
+ public func start(opts: [String: Any]) async throws -> [String: Any] {
26
+ if isRunning { try await stopInternal() }
27
+
28
+ guard let modelPath = opts["modelPath"] as? String, !modelPath.isEmpty else {
29
+ throw NSError(
30
+ domain: "DVAIBridgeLlama",
31
+ code: 400,
32
+ userInfo: [NSLocalizedDescriptionKey: "modelPath is required for llama backend"]
33
+ )
34
+ }
35
+
36
+ let mmprojPath = opts["mmprojPath"] as? String
37
+ let chatTemplate = opts["chatTemplate"] as? String
38
+ let gpuLayers = opts["gpuLayers"] as? Int ?? 99
39
+ let contextSize = opts["contextSize"] as? Int ?? 2048
40
+ let threads = opts["threads"] as? Int ?? 4
41
+ let embeddingMode = opts["embeddingMode"] as? Bool ?? false
42
+ let httpBasePort = opts["httpBasePort"] as? Int ?? 38883
43
+ let httpMaxPortAttempts = opts["httpMaxPortAttempts"] as? Int ?? 16
44
+ let corsRaw = opts["corsOrigin"]
45
+ let corsConfig = parseCors(corsRaw)
46
+
47
+ // Load model via the ObjC++ bridge (real llama.cpp under the hood).
48
+ let bridge = LlamaCppBridge()
49
+ try bridge.loadModel(
50
+ atPath: modelPath,
51
+ mmprojPath: mmprojPath,
52
+ gpuLayers: Int32(gpuLayers),
53
+ contextSize: Int32(contextSize),
54
+ threads: Int32(threads),
55
+ embeddingMode: embeddingMode
56
+ )
57
+
58
+ // Phase 2A Pass 2: load mmproj (if provided) so multimodal handlers
59
+ // can light up. A failed mmproj load is fatal for this start() call —
60
+ // the caller asked for a multimodal model and we couldn't deliver.
61
+ if let mmprojPath = mmprojPath, !mmprojPath.isEmpty {
62
+ do {
63
+ try bridge.loadMmproj(atPath: mmprojPath)
64
+ } catch {
65
+ bridge.unload()
66
+ throw error
67
+ }
68
+ }
69
+ let mmprojLoaded = bridge.isMmprojLoaded
70
+ // Audio encoder support implies mmproj is loaded AND mtmd reports
71
+ // an audio encoder is present in the projector.
72
+ let modelHasAudioEncoder = mmprojLoaded && bridge.hasAudioEncoder()
73
+
74
+ // Build handlers + context first; Hummingbird requires routes
75
+ // to be registered at Application construction time, so the
76
+ // installRoutes → tryBind order is mandatory. Phase 2A Pass 2:
77
+ // real flags mirrored from the bridge state. embeddingMode
78
+ // comes straight from the start opts so /v1/embeddings can
79
+ // short-circuit when off. chatTemplate is an optional
80
+ // Jinja-compatible override; nil/empty falls through to the
81
+ // model's bundled `tokenizer.chat_template`.
82
+ let handlers = LlamaHandlers(
83
+ bridge: bridge,
84
+ modelId: modelPath,
85
+ mmprojLoaded: mmprojLoaded,
86
+ modelHasAudioEncoder: modelHasAudioEncoder,
87
+ embeddingMode: embeddingMode,
88
+ chatTemplate: chatTemplate
89
+ )
90
+ let ctx = HandlerContext(modelId: modelPath, backendName: "llama")
91
+ let server = HttpServer()
92
+ await server.installRoutes(handlers: handlers, ctx: ctx, corsConfig: corsConfig)
93
+
94
+ // Bind server (with port-fallback). If bind fails, release the
95
+ // bridge so the loaded llama context doesn't leak until next
96
+ // start().
97
+ let port: Int
98
+ do {
99
+ port = try await server.tryBind(
100
+ basePort: httpBasePort,
101
+ maxAttempts: httpMaxPortAttempts,
102
+ host: "127.0.0.1"
103
+ )
104
+ } catch {
105
+ bridge.unload()
106
+ throw error
107
+ }
108
+
109
+ self.bridge = bridge
110
+ self.server = server
111
+ self.modelId = modelPath
112
+ self.port = port
113
+ self.baseUrl = "http://127.0.0.1:\(port)/v1"
114
+ self.isRunning = true
115
+
116
+ return [
117
+ "baseUrl": self.baseUrl!,
118
+ "port": port,
119
+ "backend": "llama",
120
+ "modelId": modelPath,
121
+ ]
122
+ }
123
+
124
+ /// Stop the plugin: release model, stop server.
125
+ public func stop() async throws {
126
+ try await stopInternal()
127
+ }
128
+
129
+ private func stopInternal() async throws {
130
+ await server?.stop()
131
+ bridge?.unload()
132
+ server = nil
133
+ bridge = nil
134
+ modelId = ""
135
+ baseUrl = nil
136
+ port = nil
137
+ isRunning = false
138
+ }
139
+
140
+ /// Snapshot of the current running state, suitable for Capacitor `call.resolve(...)`.
141
+ public func statusInfo() -> [String: Any] {
142
+ var dict: [String: Any] = ["running": isRunning]
143
+ if let baseUrl = baseUrl { dict["baseUrl"] = baseUrl }
144
+ if isRunning { dict["backend"] = "llama" }
145
+ return dict
146
+ }
147
+
148
+ /// Parse the CORS option from the start opts dict.
149
+ private func parseCors(_ raw: Any?) -> CORSConfig {
150
+ if let s = raw as? String {
151
+ return s == "*" ? .wildcard : .exact(s)
152
+ }
153
+ if let arr = raw as? [String] {
154
+ return .allowlist(arr)
155
+ }
156
+ return .wildcard
157
+ }
158
+ }