llama-cpp-capacitor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/LlamaCpp.podspec +17 -0
- package/Package.swift +28 -0
- package/README.md +574 -0
- package/android/build.gradle +58 -0
- package/android/src/main/AndroidManifest.xml +2 -0
- package/android/src/main/CMakeLists.txt +148 -0
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCpp.java +677 -0
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCppPlugin.java +482 -0
- package/android/src/main/jni-utils.h +139 -0
- package/android/src/main/jni.cpp +271 -0
- package/android/src/main/res/.gitkeep +0 -0
- package/dist/docs.json +5513 -0
- package/dist/esm/definitions.d.ts +653 -0
- package/dist/esm/definitions.js +2 -0
- package/dist/esm/definitions.js.map +1 -0
- package/dist/esm/index.d.ts +180 -0
- package/dist/esm/index.js +518 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/plugin.cjs.js +531 -0
- package/dist/plugin.cjs.js.map +1 -0
- package/dist/plugin.js +534 -0
- package/dist/plugin.js.map +1 -0
- package/ios/Sources/LlamaCppPlugin/LlamaCpp.swift +596 -0
- package/ios/Sources/LlamaCppPlugin/LlamaCppPlugin.swift +514 -0
- package/ios/Tests/LlamaCppPluginTests/LlamaCppPluginTests.swift +15 -0
- package/package.json +108 -0
|
@@ -0,0 +1,677 @@
|
|
|
1
|
+
package ai.annadata.plugin.capacitor;
|
|
2
|
+
|
|
3
|
+
import android.util.Log;
|
|
4
|
+
import com.getcapacitor.JSObject;
|
|
5
|
+
import java.util.HashMap;
|
|
6
|
+
import java.util.Map;
|
|
7
|
+
import java.util.concurrent.CompletableFuture;
|
|
8
|
+
|
|
9
|
+
// MARK: - Result Types
|
|
10
|
+
class LlamaResult<T> {
|
|
11
|
+
private final T data;
|
|
12
|
+
private final LlamaError error;
|
|
13
|
+
private final boolean isSuccess;
|
|
14
|
+
|
|
15
|
+
private LlamaResult(T data, LlamaError error, boolean isSuccess) {
|
|
16
|
+
this.data = data;
|
|
17
|
+
this.error = error;
|
|
18
|
+
this.isSuccess = isSuccess;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
public static <T> LlamaResult<T> success(T data) {
|
|
22
|
+
return new LlamaResult<>(data, null, true);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
public static <T> LlamaResult<T> failure(LlamaError error) {
|
|
26
|
+
return new LlamaResult<>(null, error, false);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
public boolean isSuccess() {
|
|
30
|
+
return isSuccess;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
public T getData() {
|
|
34
|
+
return data;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
public LlamaError getError() {
|
|
38
|
+
return error;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
class LlamaError extends Exception {
|
|
43
|
+
public LlamaError(String message) {
|
|
44
|
+
super(message);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// MARK: - Context Management
|
|
49
|
+
class LlamaContext {
|
|
50
|
+
private final int id;
|
|
51
|
+
private LlamaModel model;
|
|
52
|
+
private boolean isMultimodalEnabled = false;
|
|
53
|
+
private boolean isVocoderEnabled = false;
|
|
54
|
+
|
|
55
|
+
public LlamaContext(int id) {
|
|
56
|
+
this.id = id;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
public int getId() {
|
|
60
|
+
return id;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
public LlamaModel getModel() {
|
|
64
|
+
return model;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
public void setModel(LlamaModel model) {
|
|
68
|
+
this.model = model;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
public boolean isMultimodalEnabled() {
|
|
72
|
+
return isMultimodalEnabled;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
public void setMultimodalEnabled(boolean multimodalEnabled) {
|
|
76
|
+
isMultimodalEnabled = multimodalEnabled;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
public boolean isVocoderEnabled() {
|
|
80
|
+
return isVocoderEnabled;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
public void setVocoderEnabled(boolean vocoderEnabled) {
|
|
84
|
+
isVocoderEnabled = vocoderEnabled;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
class LlamaModel {
|
|
89
|
+
private final String path;
|
|
90
|
+
private final String desc;
|
|
91
|
+
private final int size;
|
|
92
|
+
private final int nEmbd;
|
|
93
|
+
private final int nParams;
|
|
94
|
+
private final ChatTemplates chatTemplates;
|
|
95
|
+
private final Map<String, Object> metadata;
|
|
96
|
+
|
|
97
|
+
public LlamaModel(String path, String desc, int size, int nEmbd, int nParams, ChatTemplates chatTemplates, Map<String, Object> metadata) {
|
|
98
|
+
this.path = path;
|
|
99
|
+
this.desc = desc;
|
|
100
|
+
this.size = size;
|
|
101
|
+
this.nEmbd = nEmbd;
|
|
102
|
+
this.nParams = nParams;
|
|
103
|
+
this.chatTemplates = chatTemplates;
|
|
104
|
+
this.metadata = metadata;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
public String getPath() {
|
|
108
|
+
return path;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
public String getDesc() {
|
|
112
|
+
return desc;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
public int getSize() {
|
|
116
|
+
return size;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
public int getNEmbd() {
|
|
120
|
+
return nEmbd;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
public int getNParams() {
|
|
124
|
+
return nParams;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
public ChatTemplates getChatTemplates() {
|
|
128
|
+
return chatTemplates;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
public Map<String, Object> getMetadata() {
|
|
132
|
+
return metadata;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
class ChatTemplates {
|
|
137
|
+
private final boolean llamaChat;
|
|
138
|
+
private final MinjaTemplates minja;
|
|
139
|
+
|
|
140
|
+
public ChatTemplates(boolean llamaChat, MinjaTemplates minja) {
|
|
141
|
+
this.llamaChat = llamaChat;
|
|
142
|
+
this.minja = minja;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
public boolean isLlamaChat() {
|
|
146
|
+
return llamaChat;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
public MinjaTemplates getMinja() {
|
|
150
|
+
return minja;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
class MinjaTemplates {
|
|
155
|
+
private final boolean default_;
|
|
156
|
+
private final MinjaCaps defaultCaps;
|
|
157
|
+
private final boolean toolUse;
|
|
158
|
+
private final MinjaCaps toolUseCaps;
|
|
159
|
+
|
|
160
|
+
public MinjaTemplates(boolean default_, MinjaCaps defaultCaps, boolean toolUse, MinjaCaps toolUseCaps) {
|
|
161
|
+
this.default_ = default_;
|
|
162
|
+
this.defaultCaps = defaultCaps;
|
|
163
|
+
this.toolUse = toolUse;
|
|
164
|
+
this.toolUseCaps = toolUseCaps;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
public boolean isDefault() {
|
|
168
|
+
return default_;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
public MinjaCaps getDefaultCaps() {
|
|
172
|
+
return defaultCaps;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
public boolean isToolUse() {
|
|
176
|
+
return toolUse;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
public MinjaCaps getToolUseCaps() {
|
|
180
|
+
return toolUseCaps;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
class MinjaCaps {
|
|
185
|
+
private final boolean tools;
|
|
186
|
+
private final boolean toolCalls;
|
|
187
|
+
private final boolean toolResponses;
|
|
188
|
+
private final boolean systemRole;
|
|
189
|
+
private final boolean parallelToolCalls;
|
|
190
|
+
private final boolean toolCallId;
|
|
191
|
+
|
|
192
|
+
public MinjaCaps(boolean tools, boolean toolCalls, boolean toolResponses, boolean systemRole, boolean parallelToolCalls, boolean toolCallId) {
|
|
193
|
+
this.tools = tools;
|
|
194
|
+
this.toolCalls = toolCalls;
|
|
195
|
+
this.toolResponses = toolResponses;
|
|
196
|
+
this.systemRole = systemRole;
|
|
197
|
+
this.parallelToolCalls = parallelToolCalls;
|
|
198
|
+
this.toolCallId = toolCallId;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
public boolean isTools() {
|
|
202
|
+
return tools;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
public boolean isToolCalls() {
|
|
206
|
+
return toolCalls;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
public boolean isToolResponses() {
|
|
210
|
+
return toolResponses;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
public boolean isSystemRole() {
|
|
214
|
+
return systemRole;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
public boolean isParallelToolCalls() {
|
|
218
|
+
return parallelToolCalls;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
public boolean isToolCallId() {
|
|
222
|
+
return toolCallId;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// MARK: - Main Implementation
|
|
227
|
+
public class LlamaCpp {
|
|
228
|
+
private static final String TAG = "LlamaCpp";
|
|
229
|
+
private final Map<Integer, LlamaContext> contexts = new HashMap<>();
|
|
230
|
+
private int contextCounter = 0;
|
|
231
|
+
private int contextLimit = 10;
|
|
232
|
+
private boolean nativeLogEnabled = false;
|
|
233
|
+
|
|
234
|
+
// MARK: - Core initialization and management
|
|
235
|
+
|
|
236
|
+
public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
|
|
237
|
+
nativeLogEnabled = enabled;
|
|
238
|
+
if (enabled) {
|
|
239
|
+
Log.i(TAG, "Native logging enabled");
|
|
240
|
+
} else {
|
|
241
|
+
Log.i(TAG, "Native logging disabled");
|
|
242
|
+
}
|
|
243
|
+
callback.onResult(LlamaResult.success(null));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
public void setContextLimit(int limit, LlamaCallback<Void> callback) {
|
|
247
|
+
contextLimit = limit;
|
|
248
|
+
Log.i(TAG, "Context limit set to " + limit);
|
|
249
|
+
callback.onResult(LlamaResult.success(null));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
public void modelInfo(String path, String[] skip, LlamaCallback<Map<String, Object>> callback) {
|
|
253
|
+
// This would typically load model info from the GGUF file
|
|
254
|
+
// For now, return a basic structure
|
|
255
|
+
Map<String, Object> modelInfo = new HashMap<>();
|
|
256
|
+
modelInfo.put("path", path);
|
|
257
|
+
modelInfo.put("desc", "Sample model");
|
|
258
|
+
modelInfo.put("size", 0);
|
|
259
|
+
modelInfo.put("nEmbd", 0);
|
|
260
|
+
modelInfo.put("nParams", 0);
|
|
261
|
+
callback.onResult(LlamaResult.success(modelInfo));
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
public void initContext(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
265
|
+
// Check context limit
|
|
266
|
+
if (contexts.size() >= contextLimit) {
|
|
267
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context limit reached")));
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Extract parameters
|
|
272
|
+
String modelPath = params.getString("model");
|
|
273
|
+
if (modelPath == null) {
|
|
274
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Invalid parameters")));
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Create context
|
|
279
|
+
LlamaContext context = new LlamaContext(contextId);
|
|
280
|
+
|
|
281
|
+
// Create model info (this would typically load from GGUF file)
|
|
282
|
+
MinjaCaps defaultCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
283
|
+
MinjaCaps toolUseCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
284
|
+
MinjaTemplates minja = new MinjaTemplates(true, defaultCaps, true, toolUseCaps);
|
|
285
|
+
ChatTemplates chatTemplates = new ChatTemplates(true, minja);
|
|
286
|
+
|
|
287
|
+
LlamaModel model = new LlamaModel(
|
|
288
|
+
modelPath,
|
|
289
|
+
"Sample model",
|
|
290
|
+
0,
|
|
291
|
+
0,
|
|
292
|
+
0,
|
|
293
|
+
chatTemplates,
|
|
294
|
+
new HashMap<>()
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
context.setModel(model);
|
|
298
|
+
contexts.put(contextId, context);
|
|
299
|
+
|
|
300
|
+
// Return context info
|
|
301
|
+
Map<String, Object> contextInfo = new HashMap<>();
|
|
302
|
+
contextInfo.put("contextId", contextId);
|
|
303
|
+
contextInfo.put("gpu", false);
|
|
304
|
+
contextInfo.put("reasonNoGPU", "Not implemented");
|
|
305
|
+
|
|
306
|
+
Map<String, Object> modelInfo = new HashMap<>();
|
|
307
|
+
modelInfo.put("desc", model.getDesc());
|
|
308
|
+
modelInfo.put("size", model.getSize());
|
|
309
|
+
modelInfo.put("nEmbd", model.getNEmbd());
|
|
310
|
+
modelInfo.put("nParams", model.getNParams());
|
|
311
|
+
|
|
312
|
+
Map<String, Object> chatTemplatesInfo = new HashMap<>();
|
|
313
|
+
chatTemplatesInfo.put("llamaChat", model.getChatTemplates().isLlamaChat());
|
|
314
|
+
|
|
315
|
+
Map<String, Object> minjaInfo = new HashMap<>();
|
|
316
|
+
minjaInfo.put("default", model.getChatTemplates().getMinja().isDefault());
|
|
317
|
+
|
|
318
|
+
Map<String, Object> defaultCapsInfo = new HashMap<>();
|
|
319
|
+
defaultCapsInfo.put("tools", model.getChatTemplates().getMinja().getDefaultCaps().isTools());
|
|
320
|
+
defaultCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isToolCalls());
|
|
321
|
+
defaultCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getDefaultCaps().isToolResponses());
|
|
322
|
+
defaultCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getDefaultCaps().isSystemRole());
|
|
323
|
+
defaultCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isParallelToolCalls());
|
|
324
|
+
defaultCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getDefaultCaps().isToolCallId());
|
|
325
|
+
|
|
326
|
+
Map<String, Object> toolUseCapsInfo = new HashMap<>();
|
|
327
|
+
toolUseCapsInfo.put("tools", model.getChatTemplates().getMinja().getToolUseCaps().isTools());
|
|
328
|
+
toolUseCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isToolCalls());
|
|
329
|
+
toolUseCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getToolUseCaps().isToolResponses());
|
|
330
|
+
toolUseCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getToolUseCaps().isSystemRole());
|
|
331
|
+
toolUseCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isParallelToolCalls());
|
|
332
|
+
toolUseCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getToolUseCaps().isToolCallId());
|
|
333
|
+
|
|
334
|
+
minjaInfo.put("defaultCaps", defaultCapsInfo);
|
|
335
|
+
minjaInfo.put("toolUse", model.getChatTemplates().getMinja().isToolUse());
|
|
336
|
+
minjaInfo.put("toolUseCaps", toolUseCapsInfo);
|
|
337
|
+
|
|
338
|
+
chatTemplatesInfo.put("minja", minjaInfo);
|
|
339
|
+
modelInfo.put("chatTemplates", chatTemplatesInfo);
|
|
340
|
+
modelInfo.put("metadata", model.getMetadata());
|
|
341
|
+
modelInfo.put("isChatTemplateSupported", true);
|
|
342
|
+
|
|
343
|
+
contextInfo.put("model", modelInfo);
|
|
344
|
+
|
|
345
|
+
callback.onResult(LlamaResult.success(contextInfo));
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
public void releaseContext(int contextId, LlamaCallback<Void> callback) {
|
|
349
|
+
if (contexts.remove(contextId) == null) {
|
|
350
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
callback.onResult(LlamaResult.success(null));
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
public void releaseAllContexts(LlamaCallback<Void> callback) {
|
|
357
|
+
contexts.clear();
|
|
358
|
+
callback.onResult(LlamaResult.success(null));
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// MARK: - Chat and completion
|
|
362
|
+
|
|
363
|
+
public void getFormattedChat(int contextId, String messages, String chatTemplate, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
364
|
+
LlamaContext context = contexts.get(contextId);
|
|
365
|
+
if (context == null) {
|
|
366
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// This would typically format the chat using the model's chat templates
|
|
371
|
+
// For now, return a basic formatted chat
|
|
372
|
+
Map<String, Object> formattedChat = new HashMap<>();
|
|
373
|
+
formattedChat.put("type", "llama-chat");
|
|
374
|
+
formattedChat.put("prompt", messages);
|
|
375
|
+
formattedChat.put("has_media", false);
|
|
376
|
+
formattedChat.put("media_paths", new String[0]);
|
|
377
|
+
|
|
378
|
+
callback.onResult(LlamaResult.success(formattedChat));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
382
|
+
LlamaContext context = contexts.get(contextId);
|
|
383
|
+
if (context == null) {
|
|
384
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// This would typically perform the completion using llama.cpp
|
|
389
|
+
// For now, return a basic completion result
|
|
390
|
+
Map<String, Object> completionResult = new HashMap<>();
|
|
391
|
+
completionResult.put("text", "Sample completion text");
|
|
392
|
+
completionResult.put("reasoning_content", "");
|
|
393
|
+
completionResult.put("tool_calls", new Object[0]);
|
|
394
|
+
completionResult.put("content", "Sample completion text");
|
|
395
|
+
completionResult.put("chat_format", 0);
|
|
396
|
+
completionResult.put("tokens_predicted", 0);
|
|
397
|
+
completionResult.put("tokens_evaluated", 0);
|
|
398
|
+
completionResult.put("truncated", false);
|
|
399
|
+
completionResult.put("stopped_eos", false);
|
|
400
|
+
completionResult.put("stopped_word", "");
|
|
401
|
+
completionResult.put("stopped_limit", 0);
|
|
402
|
+
completionResult.put("stopping_word", "");
|
|
403
|
+
completionResult.put("context_full", false);
|
|
404
|
+
completionResult.put("interrupted", false);
|
|
405
|
+
completionResult.put("tokens_cached", 0);
|
|
406
|
+
|
|
407
|
+
Map<String, Object> timings = new HashMap<>();
|
|
408
|
+
timings.put("prompt_n", 0);
|
|
409
|
+
timings.put("prompt_ms", 0);
|
|
410
|
+
timings.put("prompt_per_token_ms", 0);
|
|
411
|
+
timings.put("prompt_per_second", 0);
|
|
412
|
+
timings.put("predicted_n", 0);
|
|
413
|
+
timings.put("predicted_ms", 0);
|
|
414
|
+
timings.put("predicted_per_token_ms", 0);
|
|
415
|
+
timings.put("predicted_per_second", 0);
|
|
416
|
+
|
|
417
|
+
completionResult.put("timings", timings);
|
|
418
|
+
|
|
419
|
+
callback.onResult(LlamaResult.success(completionResult));
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
|
|
423
|
+
if (contexts.get(contextId) == null) {
|
|
424
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// This would typically stop any ongoing completion
|
|
429
|
+
callback.onResult(LlamaResult.success(null));
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// MARK: - Session management
|
|
433
|
+
|
|
434
|
+
public void loadSession(int contextId, String filepath, LlamaCallback<Map<String, Object>> callback) {
|
|
435
|
+
if (contexts.get(contextId) == null) {
|
|
436
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// This would typically load session from file
|
|
441
|
+
Map<String, Object> sessionResult = new HashMap<>();
|
|
442
|
+
sessionResult.put("tokens_loaded", 0);
|
|
443
|
+
sessionResult.put("prompt", "");
|
|
444
|
+
|
|
445
|
+
callback.onResult(LlamaResult.success(sessionResult));
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
public void saveSession(int contextId, String filepath, int size, LlamaCallback<Integer> callback) {
|
|
449
|
+
if (contexts.get(contextId) == null) {
|
|
450
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// This would typically save session to file
|
|
455
|
+
callback.onResult(LlamaResult.success(0));
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// MARK: - Tokenization
|
|
459
|
+
|
|
460
|
+
public void tokenize(int contextId, String text, String[] imagePaths, LlamaCallback<Map<String, Object>> callback) {
|
|
461
|
+
if (contexts.get(contextId) == null) {
|
|
462
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// This would typically tokenize the text using the model's tokenizer
|
|
467
|
+
Map<String, Object> tokenizeResult = new HashMap<>();
|
|
468
|
+
tokenizeResult.put("tokens", new Integer[0]);
|
|
469
|
+
tokenizeResult.put("has_images", false);
|
|
470
|
+
tokenizeResult.put("bitmap_hashes", new Integer[0]);
|
|
471
|
+
tokenizeResult.put("chunk_pos", new Integer[0]);
|
|
472
|
+
tokenizeResult.put("chunk_pos_images", new Integer[0]);
|
|
473
|
+
|
|
474
|
+
callback.onResult(LlamaResult.success(tokenizeResult));
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
public void detokenize(int contextId, Integer[] tokens, LlamaCallback<String> callback) {
|
|
478
|
+
if (contexts.get(contextId) == null) {
|
|
479
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
480
|
+
return;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// This would typically detokenize using the model's tokenizer
|
|
484
|
+
callback.onResult(LlamaResult.success(""));
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// MARK: - Embeddings and reranking
|
|
488
|
+
|
|
489
|
+
public void embedding(int contextId, String text, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
490
|
+
if (contexts.get(contextId) == null) {
|
|
491
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// This would typically generate embeddings
|
|
496
|
+
Map<String, Object> embeddingResult = new HashMap<>();
|
|
497
|
+
embeddingResult.put("embedding", new Double[0]);
|
|
498
|
+
|
|
499
|
+
callback.onResult(LlamaResult.success(embeddingResult));
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
public void rerank(int contextId, String query, String[] documents, JSObject params, LlamaCallback<Map<String, Object>[]> callback) {
|
|
503
|
+
if (contexts.get(contextId) == null) {
|
|
504
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// This would typically perform reranking
|
|
509
|
+
Map<String, Object>[] rerankResults = new Map[0];
|
|
510
|
+
callback.onResult(LlamaResult.success(rerankResults));
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// MARK: - Benchmarking
|
|
514
|
+
|
|
515
|
+
public void bench(int contextId, int pp, int tg, int pl, int nr, LlamaCallback<String> callback) {
|
|
516
|
+
if (contexts.get(contextId) == null) {
|
|
517
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
518
|
+
return;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// This would typically run benchmarks
|
|
522
|
+
String benchResult = "[]";
|
|
523
|
+
callback.onResult(LlamaResult.success(benchResult));
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// MARK: - LoRA adapters
|
|
527
|
+
|
|
528
|
+
public void applyLoraAdapters(int contextId, JSObject[] loraAdapters, LlamaCallback<Void> callback) {
|
|
529
|
+
if (contexts.get(contextId) == null) {
|
|
530
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
531
|
+
return;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// This would typically apply LoRA adapters
|
|
535
|
+
callback.onResult(LlamaResult.success(null));
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
public void removeLoraAdapters(int contextId, LlamaCallback<Void> callback) {
|
|
539
|
+
if (contexts.get(contextId) == null) {
|
|
540
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
541
|
+
return;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// This would typically remove LoRA adapters
|
|
545
|
+
callback.onResult(LlamaResult.success(null));
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
public void getLoadedLoraAdapters(int contextId, LlamaCallback<Map<String, Object>[]> callback) {
|
|
549
|
+
if (contexts.get(contextId) == null) {
|
|
550
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// This would typically return loaded LoRA adapters
|
|
555
|
+
Map<String, Object>[] adapters = new Map[0];
|
|
556
|
+
callback.onResult(LlamaResult.success(adapters));
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// MARK: - Multimodal methods
|
|
560
|
+
|
|
561
|
+
public void initMultimodal(int contextId, String path, boolean useGpu, LlamaCallback<Boolean> callback) {
|
|
562
|
+
LlamaContext context = contexts.get(contextId);
|
|
563
|
+
if (context == null) {
|
|
564
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
context.setMultimodalEnabled(true);
|
|
569
|
+
callback.onResult(LlamaResult.success(true));
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
public void isMultimodalEnabled(int contextId, LlamaCallback<Boolean> callback) {
|
|
573
|
+
LlamaContext context = contexts.get(contextId);
|
|
574
|
+
if (context == null) {
|
|
575
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
callback.onResult(LlamaResult.success(context.isMultimodalEnabled()));
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
public void getMultimodalSupport(int contextId, LlamaCallback<Map<String, Object>> callback) {
|
|
583
|
+
if (contexts.get(contextId) == null) {
|
|
584
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
Map<String, Object> support = new HashMap<>();
|
|
589
|
+
support.put("vision", true);
|
|
590
|
+
support.put("audio", true);
|
|
591
|
+
|
|
592
|
+
callback.onResult(LlamaResult.success(support));
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
public void releaseMultimodal(int contextId, LlamaCallback<Void> callback) {
|
|
596
|
+
LlamaContext context = contexts.get(contextId);
|
|
597
|
+
if (context == null) {
|
|
598
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
context.setMultimodalEnabled(false);
|
|
603
|
+
callback.onResult(LlamaResult.success(null));
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// MARK: - TTS methods
|
|
607
|
+
|
|
608
|
+
public void initVocoder(int contextId, String path, Integer nBatch, LlamaCallback<Boolean> callback) {
|
|
609
|
+
LlamaContext context = contexts.get(contextId);
|
|
610
|
+
if (context == null) {
|
|
611
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
612
|
+
return;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
context.setVocoderEnabled(true);
|
|
616
|
+
callback.onResult(LlamaResult.success(true));
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
public void isVocoderEnabled(int contextId, LlamaCallback<Boolean> callback) {
|
|
620
|
+
LlamaContext context = contexts.get(contextId);
|
|
621
|
+
if (context == null) {
|
|
622
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
callback.onResult(LlamaResult.success(context.isVocoderEnabled()));
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
public void getFormattedAudioCompletion(int contextId, String speakerJsonStr, String textToSpeak, LlamaCallback<Map<String, Object>> callback) {
|
|
630
|
+
if (contexts.get(contextId) == null) {
|
|
631
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
632
|
+
return;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
Map<String, Object> audioCompletion = new HashMap<>();
|
|
636
|
+
audioCompletion.put("prompt", "");
|
|
637
|
+
audioCompletion.put("grammar", null);
|
|
638
|
+
|
|
639
|
+
callback.onResult(LlamaResult.success(audioCompletion));
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
public void getAudioCompletionGuideTokens(int contextId, String textToSpeak, LlamaCallback<Integer[]> callback) {
|
|
643
|
+
if (contexts.get(contextId) == null) {
|
|
644
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
645
|
+
return;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
Integer[] tokens = new Integer[0];
|
|
649
|
+
callback.onResult(LlamaResult.success(tokens));
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
public void decodeAudioTokens(int contextId, Integer[] tokens, LlamaCallback<Integer[]> callback) {
|
|
653
|
+
if (contexts.get(contextId) == null) {
|
|
654
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
Integer[] decodedTokens = new Integer[0];
|
|
659
|
+
callback.onResult(LlamaResult.success(decodedTokens));
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
public void releaseVocoder(int contextId, LlamaCallback<Void> callback) {
|
|
663
|
+
LlamaContext context = contexts.get(contextId);
|
|
664
|
+
if (context == null) {
|
|
665
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
666
|
+
return;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
context.setVocoderEnabled(false);
|
|
670
|
+
callback.onResult(LlamaResult.success(null));
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// MARK: - Callback Interface
|
|
674
|
+
public interface LlamaCallback<T> {
|
|
675
|
+
void onResult(LlamaResult<T> result);
|
|
676
|
+
}
|
|
677
|
+
}
|