@dvai-bridge/ios-llama-core 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +341 -34
- package/Package.swift +71 -71
- package/ios/Sources/DVAILlamaCore/AudioDecoder.swift +112 -112
- package/ios/Sources/DVAILlamaCore/ContentPartsTranslator.swift +232 -232
- package/ios/Sources/DVAILlamaCore/ImageDecoder.swift +91 -91
- package/ios/Sources/DVAILlamaCore/LlamaCppBridgeProtocol.swift +59 -59
- package/ios/Sources/DVAILlamaCore/LlamaHandlers.swift +422 -422
- package/ios/Sources/DVAILlamaCore/ModelDownloader.swift +445 -445
- package/ios/Sources/DVAILlamaCore/PluginState.swift +158 -158
- package/ios/Sources/DVAILlamaCoreObjC/LlamaCppBridge.mm +649 -649
- package/ios/Sources/DVAILlamaCoreObjC/include/LlamaCppBridge.h +101 -101
- package/ios/Tests/DVAILlamaCoreTests/AudioDecoderTest.swift +46 -46
- package/ios/Tests/DVAILlamaCoreTests/ContentPartsTranslatorTest.swift +361 -361
- package/ios/Tests/DVAILlamaCoreTests/ImageDecoderTest.swift +139 -139
- package/ios/Tests/DVAILlamaCoreTests/LlamaCppBridgeTest.swift +131 -131
- package/ios/Tests/DVAILlamaCoreTests/LlamaHandlersTest.swift +515 -515
- package/ios/Tests/DVAILlamaCoreTests/ModelDownloaderTest.swift +89 -89
- package/ios/Tests/DVAILlamaCoreTests/PluginStateTest.swift +51 -51
- package/package.json +3 -3
- package/README.md +0 -199
|
@@ -1,101 +1,101 @@
|
|
|
1
|
-
#import <Foundation/Foundation.h>
|
|
2
|
-
|
|
3
|
-
NS_ASSUME_NONNULL_BEGIN
|
|
4
|
-
|
|
5
|
-
/// Objective-C++ bridge to llama.cpp. Wraps the C API for use from Swift via the
|
|
6
|
-
/// `DVAICapacitorLlamaObjC` module. Owns the `llama_model` and `llama_context`
|
|
7
|
-
/// for the lifetime of a load/unload cycle.
|
|
8
|
-
@interface LlamaCppBridge : NSObject
|
|
9
|
-
|
|
10
|
-
@property (nonatomic, readonly, getter=isLoaded) BOOL loaded;
|
|
11
|
-
@property (nonatomic, readonly, copy, nullable) NSString *currentModelPath;
|
|
12
|
-
|
|
13
|
-
- (instancetype)init;
|
|
14
|
-
|
|
15
|
-
- (BOOL)loadModelAtPath:(NSString *)path
|
|
16
|
-
mmprojPath:(nullable NSString *)mmprojPath
|
|
17
|
-
gpuLayers:(int)gpuLayers
|
|
18
|
-
contextSize:(int)contextSize
|
|
19
|
-
threads:(int)threads
|
|
20
|
-
embeddingMode:(BOOL)embeddingMode
|
|
21
|
-
error:(NSError **)error;
|
|
22
|
-
|
|
23
|
-
- (void)unload;
|
|
24
|
-
|
|
25
|
-
- (NSString *)versionString;
|
|
26
|
-
|
|
27
|
-
/// Greedy-sample `maxTokens` tokens from the loaded model for the given prompt.
|
|
28
|
-
/// `temperature` and `topP` are accepted but currently ignored — this entry point
|
|
29
|
-
/// uses a deterministic greedy sampler for Task 30. Future work (Task 36) will
|
|
30
|
-
/// honour temperature/top-p via additional sampler-chain stages.
|
|
31
|
-
- (nullable NSString *)completePrompt:(NSString *)prompt
|
|
32
|
-
maxTokens:(int)maxTokens
|
|
33
|
-
temperature:(float)temperature
|
|
34
|
-
topP:(float)topP
|
|
35
|
-
error:(NSError **)error;
|
|
36
|
-
|
|
37
|
-
/// Compute an embedding vector for the given text. Requires the model to have
|
|
38
|
-
/// been loaded with `embeddingMode:YES`; otherwise the returned values are
|
|
39
|
-
/// undefined / not meaningful (the handler layer is responsible for the 400
|
|
40
|
-
/// short-circuit before we get here). Returns the per-dimension floats (length
|
|
41
|
-
/// == llama_n_embd(model)) wrapped as `NSNumber` doubles.
|
|
42
|
-
- (nullable NSArray<NSNumber *> *)embedding:(NSString *)text
|
|
43
|
-
error:(NSError **)error;
|
|
44
|
-
|
|
45
|
-
/// Whether a multimodal projector (mmproj) has been loaded for this bridge.
|
|
46
|
-
@property (nonatomic, readonly, getter=isMmprojLoaded) BOOL mmprojLoaded;
|
|
47
|
-
|
|
48
|
-
/// Load a multimodal projector (mmproj). The main model must already be
|
|
49
|
-
/// loaded — the projector is always paired with a text model. Phase 2A Pass 2
|
|
50
|
-
/// wires the real `mtmd_init_from_file()` call.
|
|
51
|
-
///
|
|
52
|
-
/// `useGPU` controls `mtmd_context_params.use_gpu`. Pass `YES` for production
|
|
53
|
-
/// (Metal-accelerated CLIP / vision-encoder pass on real iPhone). Pass `NO`
|
|
54
|
-
/// in environments where Metal allocation can't accommodate the projector's
|
|
55
|
-
/// position-embedding tensor — most commonly the iOS Simulator, where
|
|
56
|
-
/// `_xpc_shmem_create_with_prot` aborts on tensors >~ 60 MiB. The bridge
|
|
57
|
-
/// uses YES by default. Returns NO on failure (with `error` populated).
|
|
58
|
-
- (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
|
|
59
|
-
useGPU:(BOOL)useGPU
|
|
60
|
-
error:(NSError **)error;
|
|
61
|
-
|
|
62
|
-
/// Convenience wrapper that defaults `useGPU:YES`. Most callers want this.
|
|
63
|
-
- (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
|
|
64
|
-
error:(NSError **)error;
|
|
65
|
-
|
|
66
|
-
/// Unload the multimodal projector. Safe to call when nothing is loaded
|
|
67
|
-
/// (idempotent). Frees the underlying mtmd_context.
|
|
68
|
-
- (void)unloadMmproj;
|
|
69
|
-
|
|
70
|
-
/// Apply a chat template via `llama_chat_apply_template`. If `templateOverride`
|
|
71
|
-
/// is nil/empty, the model's bundled `tokenizer.chat_template` (via
|
|
72
|
-
/// `llama_model_chat_template`) is used. `messages` is an array of
|
|
73
|
-
/// `@{ @"role": @"...", @"content": @"..." }` dicts. Returns the rendered
|
|
74
|
-
/// prompt string with role markers; multimodal callers should pre-populate
|
|
75
|
-
/// `<__media__>` markers in the content fields where image/audio bytes will
|
|
76
|
-
/// splice in.
|
|
77
|
-
- (nullable NSString *)applyChatTemplate:(nullable NSString *)templateOverride
|
|
78
|
-
messages:(NSArray<NSDictionary<NSString *, NSString *> *> *)messages
|
|
79
|
-
addAssistant:(BOOL)addAssistant
|
|
80
|
-
error:(NSError **)error;
|
|
81
|
-
|
|
82
|
-
/// Multimodal completion. `prompt` must contain N `<__media__>` markers
|
|
83
|
-
/// matching the count of `media`. Bytes are auto-detected as image or audio
|
|
84
|
-
/// via `mtmd_helper_bitmap_init_from_buf` (magic bytes); the caller must
|
|
85
|
-
/// supply media in declaration order (i.e. the same order the markers
|
|
86
|
-
/// appear in `prompt`). Returns the generated text. Throws on tokenization
|
|
87
|
-
/// / eval / decode failures.
|
|
88
|
-
- (nullable NSString *)completeMultimodalPrompt:(NSString *)prompt
|
|
89
|
-
media:(NSArray<NSData *> *)mediaInOrder
|
|
90
|
-
maxTokens:(int)maxTokens
|
|
91
|
-
temperature:(float)temperature
|
|
92
|
-
topP:(float)topP
|
|
93
|
-
error:(NSError **)error;
|
|
94
|
-
|
|
95
|
-
/// Returns YES if the loaded model declares an audio encoder (via
|
|
96
|
-
/// `mtmd_support_audio()`). Always NO when no mmproj is loaded.
|
|
97
|
-
- (BOOL)hasAudioEncoder;
|
|
98
|
-
|
|
99
|
-
@end
|
|
100
|
-
|
|
101
|
-
NS_ASSUME_NONNULL_END
|
|
1
|
+
#import <Foundation/Foundation.h>
|
|
2
|
+
|
|
3
|
+
NS_ASSUME_NONNULL_BEGIN
|
|
4
|
+
|
|
5
|
+
/// Objective-C++ bridge to llama.cpp. Wraps the C API for use from Swift via the
|
|
6
|
+
/// `DVAICapacitorLlamaObjC` module. Owns the `llama_model` and `llama_context`
|
|
7
|
+
/// for the lifetime of a load/unload cycle.
|
|
8
|
+
@interface LlamaCppBridge : NSObject
|
|
9
|
+
|
|
10
|
+
@property (nonatomic, readonly, getter=isLoaded) BOOL loaded;
|
|
11
|
+
@property (nonatomic, readonly, copy, nullable) NSString *currentModelPath;
|
|
12
|
+
|
|
13
|
+
- (instancetype)init;
|
|
14
|
+
|
|
15
|
+
- (BOOL)loadModelAtPath:(NSString *)path
|
|
16
|
+
mmprojPath:(nullable NSString *)mmprojPath
|
|
17
|
+
gpuLayers:(int)gpuLayers
|
|
18
|
+
contextSize:(int)contextSize
|
|
19
|
+
threads:(int)threads
|
|
20
|
+
embeddingMode:(BOOL)embeddingMode
|
|
21
|
+
error:(NSError **)error;
|
|
22
|
+
|
|
23
|
+
- (void)unload;
|
|
24
|
+
|
|
25
|
+
- (NSString *)versionString;
|
|
26
|
+
|
|
27
|
+
/// Greedy-sample `maxTokens` tokens from the loaded model for the given prompt.
|
|
28
|
+
/// `temperature` and `topP` are accepted but currently ignored — this entry point
|
|
29
|
+
/// uses a deterministic greedy sampler for Task 30. Future work (Task 36) will
|
|
30
|
+
/// honour temperature/top-p via additional sampler-chain stages.
|
|
31
|
+
- (nullable NSString *)completePrompt:(NSString *)prompt
|
|
32
|
+
maxTokens:(int)maxTokens
|
|
33
|
+
temperature:(float)temperature
|
|
34
|
+
topP:(float)topP
|
|
35
|
+
error:(NSError **)error;
|
|
36
|
+
|
|
37
|
+
/// Compute an embedding vector for the given text. Requires the model to have
|
|
38
|
+
/// been loaded with `embeddingMode:YES`; otherwise the returned values are
|
|
39
|
+
/// undefined / not meaningful (the handler layer is responsible for the 400
|
|
40
|
+
/// short-circuit before we get here). Returns the per-dimension floats (length
|
|
41
|
+
/// == llama_n_embd(model)) wrapped as `NSNumber` doubles.
|
|
42
|
+
- (nullable NSArray<NSNumber *> *)embedding:(NSString *)text
|
|
43
|
+
error:(NSError **)error;
|
|
44
|
+
|
|
45
|
+
/// Whether a multimodal projector (mmproj) has been loaded for this bridge.
|
|
46
|
+
@property (nonatomic, readonly, getter=isMmprojLoaded) BOOL mmprojLoaded;
|
|
47
|
+
|
|
48
|
+
/// Load a multimodal projector (mmproj). The main model must already be
|
|
49
|
+
/// loaded — the projector is always paired with a text model. Phase 2A Pass 2
|
|
50
|
+
/// wires the real `mtmd_init_from_file()` call.
|
|
51
|
+
///
|
|
52
|
+
/// `useGPU` controls `mtmd_context_params.use_gpu`. Pass `YES` for production
|
|
53
|
+
/// (Metal-accelerated CLIP / vision-encoder pass on real iPhone). Pass `NO`
|
|
54
|
+
/// in environments where Metal allocation can't accommodate the projector's
|
|
55
|
+
/// position-embedding tensor — most commonly the iOS Simulator, where
|
|
56
|
+
/// `_xpc_shmem_create_with_prot` aborts on tensors >~ 60 MiB. The bridge
|
|
57
|
+
/// uses YES by default. Returns NO on failure (with `error` populated).
|
|
58
|
+
- (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
|
|
59
|
+
useGPU:(BOOL)useGPU
|
|
60
|
+
error:(NSError **)error;
|
|
61
|
+
|
|
62
|
+
/// Convenience wrapper that defaults `useGPU:YES`. Most callers want this.
|
|
63
|
+
- (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
|
|
64
|
+
error:(NSError **)error;
|
|
65
|
+
|
|
66
|
+
/// Unload the multimodal projector. Safe to call when nothing is loaded
|
|
67
|
+
/// (idempotent). Frees the underlying mtmd_context.
|
|
68
|
+
- (void)unloadMmproj;
|
|
69
|
+
|
|
70
|
+
/// Apply a chat template via `llama_chat_apply_template`. If `templateOverride`
|
|
71
|
+
/// is nil/empty, the model's bundled `tokenizer.chat_template` (via
|
|
72
|
+
/// `llama_model_chat_template`) is used. `messages` is an array of
|
|
73
|
+
/// `@{ @"role": @"...", @"content": @"..." }` dicts. Returns the rendered
|
|
74
|
+
/// prompt string with role markers; multimodal callers should pre-populate
|
|
75
|
+
/// `<__media__>` markers in the content fields where image/audio bytes will
|
|
76
|
+
/// splice in.
|
|
77
|
+
- (nullable NSString *)applyChatTemplate:(nullable NSString *)templateOverride
|
|
78
|
+
messages:(NSArray<NSDictionary<NSString *, NSString *> *> *)messages
|
|
79
|
+
addAssistant:(BOOL)addAssistant
|
|
80
|
+
error:(NSError **)error;
|
|
81
|
+
|
|
82
|
+
/// Multimodal completion. `prompt` must contain N `<__media__>` markers
|
|
83
|
+
/// matching the count of `media`. Bytes are auto-detected as image or audio
|
|
84
|
+
/// via `mtmd_helper_bitmap_init_from_buf` (magic bytes); the caller must
|
|
85
|
+
/// supply media in declaration order (i.e. the same order the markers
|
|
86
|
+
/// appear in `prompt`). Returns the generated text. Throws on tokenization
|
|
87
|
+
/// / eval / decode failures.
|
|
88
|
+
- (nullable NSString *)completeMultimodalPrompt:(NSString *)prompt
|
|
89
|
+
media:(NSArray<NSData *> *)mediaInOrder
|
|
90
|
+
maxTokens:(int)maxTokens
|
|
91
|
+
temperature:(float)temperature
|
|
92
|
+
topP:(float)topP
|
|
93
|
+
error:(NSError **)error;
|
|
94
|
+
|
|
95
|
+
/// Returns YES if the loaded model declares an audio encoder (via
|
|
96
|
+
/// `mtmd_support_audio()`). Always NO when no mmproj is loaded.
|
|
97
|
+
- (BOOL)hasAudioEncoder;
|
|
98
|
+
|
|
99
|
+
@end
|
|
100
|
+
|
|
101
|
+
NS_ASSUME_NONNULL_END
|
|
@@ -1,46 +1,46 @@
|
|
|
1
|
-
import XCTest
|
|
2
|
-
@testable import DVAILlamaCore
|
|
3
|
-
|
|
4
|
-
final class AudioDecoderTest: XCTestCase {
|
|
5
|
-
func testPCM16PassThrough() async throws {
|
|
6
|
-
let pcm = try Data(contentsOf: audioFixtureURL("pcm16-1s-16khz-mono.bin"))
|
|
7
|
-
let result = try await AudioDecoder.decode(data: pcm, format: .pcm16)
|
|
8
|
-
XCTAssertEqual(result.count, pcm.count)
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
func testWavToPCM() async throws {
|
|
12
|
-
let wav = try Data(contentsOf: audioFixtureURL("wav-1s-16khz-mono.wav"))
|
|
13
|
-
let result = try await AudioDecoder.decode(data: wav, format: .wav)
|
|
14
|
-
// 1s @ 16kHz mono PCM16 = 32000 bytes (allow ±5%).
|
|
15
|
-
XCTAssertGreaterThan(result.count, 30000)
|
|
16
|
-
XCTAssertLessThan(result.count, 34000)
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
func testM4AToPCM() async throws {
|
|
20
|
-
let m4a = try Data(contentsOf: audioFixtureURL("m4a-1s.m4a"))
|
|
21
|
-
let result = try await AudioDecoder.decode(data: m4a, format: .m4a)
|
|
22
|
-
// M4A AAC 1s decodes to ~32000 bytes; AAC priming may shave a few hundred samples.
|
|
23
|
-
XCTAssertGreaterThan(result.count, 25000)
|
|
24
|
-
XCTAssertLessThan(result.count, 36000)
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
/// Walks up from this test source file until it finds the repo-root
|
|
28
|
-
/// `fixtures/` directory. We don't bundle the fixtures into the SwiftPM
|
|
29
|
-
/// test target's resources because they're shared with Android and Node
|
|
30
|
-
/// tests.
|
|
31
|
-
private func fixturesURL() -> URL {
|
|
32
|
-
var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
|
|
33
|
-
while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
|
|
34
|
-
let parent = dir.deletingLastPathComponent()
|
|
35
|
-
if parent.path == dir.path {
|
|
36
|
-
fatalError("fixtures dir not found walking up from \(#file)")
|
|
37
|
-
}
|
|
38
|
-
dir = parent
|
|
39
|
-
}
|
|
40
|
-
return dir.appendingPathComponent("fixtures")
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
private func audioFixtureURL(_ name: String) -> URL {
|
|
44
|
-
fixturesURL().appendingPathComponent("audio").appendingPathComponent(name)
|
|
45
|
-
}
|
|
46
|
-
}
|
|
1
|
+
import XCTest
|
|
2
|
+
@testable import DVAILlamaCore
|
|
3
|
+
|
|
4
|
+
final class AudioDecoderTest: XCTestCase {
|
|
5
|
+
func testPCM16PassThrough() async throws {
|
|
6
|
+
let pcm = try Data(contentsOf: audioFixtureURL("pcm16-1s-16khz-mono.bin"))
|
|
7
|
+
let result = try await AudioDecoder.decode(data: pcm, format: .pcm16)
|
|
8
|
+
XCTAssertEqual(result.count, pcm.count)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
func testWavToPCM() async throws {
|
|
12
|
+
let wav = try Data(contentsOf: audioFixtureURL("wav-1s-16khz-mono.wav"))
|
|
13
|
+
let result = try await AudioDecoder.decode(data: wav, format: .wav)
|
|
14
|
+
// 1s @ 16kHz mono PCM16 = 32000 bytes (allow ±5%).
|
|
15
|
+
XCTAssertGreaterThan(result.count, 30000)
|
|
16
|
+
XCTAssertLessThan(result.count, 34000)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
func testM4AToPCM() async throws {
|
|
20
|
+
let m4a = try Data(contentsOf: audioFixtureURL("m4a-1s.m4a"))
|
|
21
|
+
let result = try await AudioDecoder.decode(data: m4a, format: .m4a)
|
|
22
|
+
// M4A AAC 1s decodes to ~32000 bytes; AAC priming may shave a few hundred samples.
|
|
23
|
+
XCTAssertGreaterThan(result.count, 25000)
|
|
24
|
+
XCTAssertLessThan(result.count, 36000)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/// Walks up from this test source file until it finds the repo-root
|
|
28
|
+
/// `fixtures/` directory. We don't bundle the fixtures into the SwiftPM
|
|
29
|
+
/// test target's resources because they're shared with Android and Node
|
|
30
|
+
/// tests.
|
|
31
|
+
private func fixturesURL() -> URL {
|
|
32
|
+
var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
|
|
33
|
+
while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
|
|
34
|
+
let parent = dir.deletingLastPathComponent()
|
|
35
|
+
if parent.path == dir.path {
|
|
36
|
+
fatalError("fixtures dir not found walking up from \(#file)")
|
|
37
|
+
}
|
|
38
|
+
dir = parent
|
|
39
|
+
}
|
|
40
|
+
return dir.appendingPathComponent("fixtures")
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
private func audioFixtureURL(_ name: String) -> URL {
|
|
44
|
+
fixturesURL().appendingPathComponent("audio").appendingPathComponent(name)
|
|
45
|
+
}
|
|
46
|
+
}
|