llama-cpp-capacitor 0.0.13 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LlamaCpp.podspec +17 -17
  2. package/Package.swift +27 -27
  3. package/README.md +717 -574
  4. package/android/build.gradle +88 -69
  5. package/android/src/main/AndroidManifest.xml +2 -2
  6. package/android/src/main/CMakeLists-arm64.txt +131 -0
  7. package/android/src/main/CMakeLists-x86_64.txt +135 -0
  8. package/android/src/main/CMakeLists.txt +35 -52
  9. package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCpp.java +956 -717
  10. package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCppPlugin.java +710 -590
  11. package/android/src/main/jni-utils.h +7 -7
  12. package/android/src/main/jni.cpp +868 -127
  13. package/cpp/{rn-completion.cpp → cap-completion.cpp} +202 -24
  14. package/cpp/{rn-completion.h → cap-completion.h} +22 -11
  15. package/cpp/{rn-llama.cpp → cap-llama.cpp} +81 -27
  16. package/cpp/{rn-llama.h → cap-llama.h} +32 -20
  17. package/cpp/{rn-mtmd.hpp → cap-mtmd.hpp} +15 -15
  18. package/cpp/{rn-tts.cpp → cap-tts.cpp} +12 -12
  19. package/cpp/{rn-tts.h → cap-tts.h} +14 -14
  20. package/cpp/ggml-cpu/ggml-cpu-impl.h +30 -0
  21. package/dist/docs.json +100 -3
  22. package/dist/esm/definitions.d.ts +45 -2
  23. package/dist/esm/definitions.js.map +1 -1
  24. package/dist/esm/index.d.ts +22 -0
  25. package/dist/esm/index.js +66 -3
  26. package/dist/esm/index.js.map +1 -1
  27. package/dist/plugin.cjs.js +71 -3
  28. package/dist/plugin.cjs.js.map +1 -1
  29. package/dist/plugin.js +71 -3
  30. package/dist/plugin.js.map +1 -1
  31. package/ios/Sources/LlamaCppPlugin/LlamaCpp.swift +596 -596
  32. package/ios/Sources/LlamaCppPlugin/LlamaCppPlugin.swift +591 -514
  33. package/ios/Tests/LlamaCppPluginTests/LlamaCppPluginTests.swift +15 -15
  34. package/package.json +111 -110
@@ -1,717 +1,956 @@
1
- package ai.annadata.plugin.capacitor;
2
-
3
- import android.util.Log;
4
- import com.getcapacitor.JSObject;
5
- import java.util.HashMap;
6
- import java.util.Map;
7
- import java.util.concurrent.CompletableFuture;
8
-
9
- // MARK: - Result Types
10
- class LlamaResult<T> {
11
- private final T data;
12
- private final LlamaError error;
13
- private final boolean isSuccess;
14
-
15
- private LlamaResult(T data, LlamaError error, boolean isSuccess) {
16
- this.data = data;
17
- this.error = error;
18
- this.isSuccess = isSuccess;
19
- }
20
-
21
- public static <T> LlamaResult<T> success(T data) {
22
- return new LlamaResult<>(data, null, true);
23
- }
24
-
25
- public static <T> LlamaResult<T> failure(LlamaError error) {
26
- return new LlamaResult<>(null, error, false);
27
- }
28
-
29
- public boolean isSuccess() {
30
- return isSuccess;
31
- }
32
-
33
- public T getData() {
34
- return data;
35
- }
36
-
37
- public LlamaError getError() {
38
- return error;
39
- }
40
- }
41
-
42
- class LlamaError extends Exception {
43
- public LlamaError(String message) {
44
- super(message);
45
- }
46
- }
47
-
48
- // MARK: - Context Management
49
- class LlamaContext {
50
- private final int id;
51
- private LlamaModel model;
52
- private boolean isMultimodalEnabled = false;
53
- private boolean isVocoderEnabled = false;
54
- private long nativeContextId = -1;
55
-
56
- public LlamaContext(int id) {
57
- this.id = id;
58
- }
59
-
60
- public int getId() {
61
- return id;
62
- }
63
-
64
- public LlamaModel getModel() {
65
- return model;
66
- }
67
-
68
- public void setModel(LlamaModel model) {
69
- this.model = model;
70
- }
71
-
72
- public boolean isMultimodalEnabled() {
73
- return isMultimodalEnabled;
74
- }
75
-
76
- public void setMultimodalEnabled(boolean multimodalEnabled) {
77
- isMultimodalEnabled = multimodalEnabled;
78
- }
79
-
80
- public boolean isVocoderEnabled() {
81
- return isVocoderEnabled;
82
- }
83
-
84
- public void setVocoderEnabled(boolean vocoderEnabled) {
85
- isVocoderEnabled = vocoderEnabled;
86
- }
87
-
88
- public long getNativeContextId() {
89
- return nativeContextId;
90
- }
91
-
92
- public void setNativeContextId(long nativeContextId) {
93
- this.nativeContextId = nativeContextId;
94
- }
95
- }
96
-
97
- class LlamaModel {
98
- private final String path;
99
- private final String desc;
100
- private final int size;
101
- private final int nEmbd;
102
- private final int nParams;
103
- private final ChatTemplates chatTemplates;
104
- private final Map<String, Object> metadata;
105
-
106
- public LlamaModel(String path, String desc, int size, int nEmbd, int nParams, ChatTemplates chatTemplates, Map<String, Object> metadata) {
107
- this.path = path;
108
- this.desc = desc;
109
- this.size = size;
110
- this.nEmbd = nEmbd;
111
- this.nParams = nParams;
112
- this.chatTemplates = chatTemplates;
113
- this.metadata = metadata;
114
- }
115
-
116
- public String getPath() {
117
- return path;
118
- }
119
-
120
- public String getDesc() {
121
- return desc;
122
- }
123
-
124
- public int getSize() {
125
- return size;
126
- }
127
-
128
- public int getNEmbd() {
129
- return nEmbd;
130
- }
131
-
132
- public int getNParams() {
133
- return nParams;
134
- }
135
-
136
- public ChatTemplates getChatTemplates() {
137
- return chatTemplates;
138
- }
139
-
140
- public Map<String, Object> getMetadata() {
141
- return metadata;
142
- }
143
- }
144
-
145
- class ChatTemplates {
146
- private final boolean llamaChat;
147
- private final MinjaTemplates minja;
148
-
149
- public ChatTemplates(boolean llamaChat, MinjaTemplates minja) {
150
- this.llamaChat = llamaChat;
151
- this.minja = minja;
152
- }
153
-
154
- public boolean isLlamaChat() {
155
- return llamaChat;
156
- }
157
-
158
- public MinjaTemplates getMinja() {
159
- return minja;
160
- }
161
- }
162
-
163
- class MinjaTemplates {
164
- private final boolean default_;
165
- private final MinjaCaps defaultCaps;
166
- private final boolean toolUse;
167
- private final MinjaCaps toolUseCaps;
168
-
169
- public MinjaTemplates(boolean default_, MinjaCaps defaultCaps, boolean toolUse, MinjaCaps toolUseCaps) {
170
- this.default_ = default_;
171
- this.defaultCaps = defaultCaps;
172
- this.toolUse = toolUse;
173
- this.toolUseCaps = toolUseCaps;
174
- }
175
-
176
- public boolean isDefault() {
177
- return default_;
178
- }
179
-
180
- public MinjaCaps getDefaultCaps() {
181
- return defaultCaps;
182
- }
183
-
184
- public boolean isToolUse() {
185
- return toolUse;
186
- }
187
-
188
- public MinjaCaps getToolUseCaps() {
189
- return toolUseCaps;
190
- }
191
- }
192
-
193
- class MinjaCaps {
194
- private final boolean tools;
195
- private final boolean toolCalls;
196
- private final boolean toolResponses;
197
- private final boolean systemRole;
198
- private final boolean parallelToolCalls;
199
- private final boolean toolCallId;
200
-
201
- public MinjaCaps(boolean tools, boolean toolCalls, boolean toolResponses, boolean systemRole, boolean parallelToolCalls, boolean toolCallId) {
202
- this.tools = tools;
203
- this.toolCalls = toolCalls;
204
- this.toolResponses = toolResponses;
205
- this.systemRole = systemRole;
206
- this.parallelToolCalls = parallelToolCalls;
207
- this.toolCallId = toolCallId;
208
- }
209
-
210
- public boolean isTools() {
211
- return tools;
212
- }
213
-
214
- public boolean isToolCalls() {
215
- return toolCalls;
216
- }
217
-
218
- public boolean isToolResponses() {
219
- return toolResponses;
220
- }
221
-
222
- public boolean isSystemRole() {
223
- return systemRole;
224
- }
225
-
226
- public boolean isParallelToolCalls() {
227
- return parallelToolCalls;
228
- }
229
-
230
- public boolean isToolCallId() {
231
- return toolCallId;
232
- }
233
- }
234
-
235
- // MARK: - Main Implementation
236
- public class LlamaCpp {
237
- private static final String TAG = "LlamaCpp";
238
- private final Map<Integer, LlamaContext> contexts = new HashMap<>();
239
- private int contextCounter = 0;
240
- private int contextLimit = 10;
241
- private boolean nativeLogEnabled = false;
242
-
243
- // Native method declarations
244
- private native long initContextNative(String modelPath, JSObject params);
245
- private native void releaseContextNative(long nativeContextId);
246
- private native String completionNative(long contextId, String prompt);
247
- private native void stopCompletionNative(long contextId);
248
- private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
249
- private native boolean toggleNativeLogNative(boolean enabled);
250
-
251
- static {
252
- try {
253
- System.loadLibrary("llama-cpp");
254
- Log.i(TAG, "Successfully loaded llama-cpp native library");
255
- } catch (UnsatisfiedLinkError e) {
256
- Log.e(TAG, "Failed to load llama-cpp native library: " + e.getMessage());
257
- throw e;
258
- }
259
- }
260
-
261
- // MARK: - Core initialization and management
262
-
263
- public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
264
- try {
265
- boolean result = toggleNativeLogNative(enabled);
266
- nativeLogEnabled = enabled;
267
- if (enabled) {
268
- Log.i(TAG, "Native logging enabled");
269
- } else {
270
- Log.i(TAG, "Native logging disabled");
271
- }
272
- callback.onResult(LlamaResult.success(null));
273
- } catch (Exception e) {
274
- callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
275
- }
276
- }
277
-
278
- public void setContextLimit(int limit, LlamaCallback<Void> callback) {
279
- contextLimit = limit;
280
- Log.i(TAG, "Context limit set to " + limit);
281
- callback.onResult(LlamaResult.success(null));
282
- }
283
-
284
- public void modelInfo(String path, String[] skip, LlamaCallback<Map<String, Object>> callback) {
285
- // This would typically load model info from the GGUF file
286
- // For now, return a basic structure
287
- Map<String, Object> modelInfo = new HashMap<>();
288
- modelInfo.put("path", path);
289
- modelInfo.put("desc", "Sample model");
290
- modelInfo.put("size", 0);
291
- modelInfo.put("nEmbd", 0);
292
- modelInfo.put("nParams", 0);
293
- callback.onResult(LlamaResult.success(modelInfo));
294
- }
295
-
296
- public void initContext(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
297
- // Check context limit
298
- if (contexts.size() >= contextLimit) {
299
- callback.onResult(LlamaResult.failure(new LlamaError("Context limit reached")));
300
- return;
301
- }
302
-
303
- try {
304
- // Extract parameters
305
- String modelPath = params.getString("model");
306
- if (modelPath == null || modelPath.isEmpty()) {
307
- callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
308
- return;
309
- }
310
-
311
- // Call native initialization
312
- long nativeContextId = initContextNative(modelPath, params);
313
- if (nativeContextId < 0) {
314
- callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
315
- return;
316
- }
317
-
318
- // Create Java context wrapper
319
- LlamaContext context = new LlamaContext(contextId);
320
- context.setNativeContextId(nativeContextId);
321
- contexts.put(contextId, context);
322
-
323
- // Return context info
324
- Map<String, Object> contextInfo = new HashMap<>();
325
- contextInfo.put("contextId", contextId);
326
- contextInfo.put("gpu", false);
327
- contextInfo.put("reasonNoGPU", "Currently not supported");
328
-
329
- Map<String, Object> modelInfo = new HashMap<>();
330
- modelInfo.put("desc", "Loaded model");
331
- modelInfo.put("size", 0);
332
- modelInfo.put("nEmbd", 0);
333
- modelInfo.put("nParams", 0);
334
- modelInfo.put("path", modelPath);
335
-
336
- contextInfo.put("model", modelInfo);
337
- contextInfo.put("androidLib", "llama-cpp");
338
-
339
- callback.onResult(LlamaResult.success(contextInfo));
340
-
341
- } catch (Exception e) {
342
- callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
343
- }
344
- }
345
-
346
- public void releaseContext(int contextId, LlamaCallback<Void> callback) {
347
- LlamaContext context = contexts.get(contextId);
348
- if (context == null) {
349
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
350
- return;
351
- }
352
-
353
- try {
354
- // Release native context
355
- if (context.getNativeContextId() >= 0) {
356
- releaseContextNative(context.getNativeContextId());
357
- }
358
-
359
- // Remove from Java context map
360
- contexts.remove(contextId);
361
-
362
- callback.onResult(LlamaResult.success(null));
363
-
364
- } catch (Exception e) {
365
- callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
366
- }
367
- }
368
-
369
- public void releaseAllContexts(LlamaCallback<Void> callback) {
370
- contexts.clear();
371
- callback.onResult(LlamaResult.success(null));
372
- }
373
-
374
- // MARK: - Chat and completion
375
-
376
- public void getFormattedChat(int contextId, String messages, String chatTemplate, JSObject params, LlamaCallback<Map<String, Object>> callback) {
377
- LlamaContext context = contexts.get(contextId);
378
- if (context == null) {
379
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
380
- return;
381
- }
382
-
383
- try {
384
- // Call native formatted chat
385
- String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
386
-
387
- // Build formatted chat result
388
- Map<String, Object> formattedChat = new HashMap<>();
389
- formattedChat.put("type", "llama-chat");
390
- formattedChat.put("prompt", result);
391
- formattedChat.put("has_media", false);
392
- formattedChat.put("media_paths", new String[0]);
393
-
394
- callback.onResult(LlamaResult.success(formattedChat));
395
-
396
- } catch (Exception e) {
397
- callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
398
- }
399
- }
400
-
401
- public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
402
- LlamaContext context = contexts.get(contextId);
403
- if (context == null) {
404
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
405
- return;
406
- }
407
-
408
- try {
409
- // Extract parameters from JSObject
410
- String prompt = params.getString("prompt", "");
411
- int nPredict = params.getInteger("n_predict", 128);
412
- float temperature = params.has("temp") ? (float) params.getDouble("temp") : 0.8f;
413
- float topP = params.has("top_p") ? (float) params.getDouble("top_p") : 0.95f;
414
- int topK = params.getInteger("top_k", 40);
415
- float repeatPenalty = params.has("repeat_penalty") ? (float) params.getDouble("repeat_penalty") : 1.1f;
416
-
417
- // Call native completion
418
- String result = completionNative(context.getNativeContextId(), prompt);
419
-
420
- // Build completion result
421
- Map<String, Object> completionResult = new HashMap<>();
422
- completionResult.put("text", result);
423
- completionResult.put("reasoning_content", "");
424
- completionResult.put("tool_calls", new Object[0]);
425
- completionResult.put("content", result);
426
- completionResult.put("chat_format", 0);
427
- completionResult.put("tokens_predicted", nPredict);
428
- completionResult.put("tokens_evaluated", 0);
429
- completionResult.put("truncated", false);
430
- completionResult.put("stopped_eos", false);
431
- completionResult.put("stopped_word", "");
432
- completionResult.put("stopped_limit", 0);
433
- completionResult.put("stopping_word", "");
434
- completionResult.put("context_full", false);
435
- completionResult.put("interrupted", false);
436
- completionResult.put("tokens_cached", 0);
437
-
438
- Map<String, Object> timings = new HashMap<>();
439
- timings.put("prompt_n", 0);
440
- timings.put("prompt_ms", 0);
441
- timings.put("prompt_per_token_ms", 0);
442
- timings.put("prompt_per_second", 0);
443
- timings.put("predicted_n", nPredict);
444
- timings.put("predicted_ms", 0);
445
- timings.put("predicted_per_token_ms", 0);
446
- timings.put("predicted_per_second", 0);
447
-
448
- completionResult.put("timings", timings);
449
-
450
- callback.onResult(LlamaResult.success(completionResult));
451
-
452
- } catch (Exception e) {
453
- callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
454
- }
455
- }
456
-
457
- public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
458
- LlamaContext context = contexts.get(contextId);
459
- if (context == null) {
460
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
461
- return;
462
- }
463
-
464
- try {
465
- stopCompletionNative(context.getNativeContextId());
466
- callback.onResult(LlamaResult.success(null));
467
- } catch (Exception e) {
468
- callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
469
- }
470
- }
471
-
472
- // MARK: - Session management
473
-
474
- public void loadSession(int contextId, String filepath, LlamaCallback<Map<String, Object>> callback) {
475
- if (contexts.get(contextId) == null) {
476
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
477
- return;
478
- }
479
-
480
- // This would typically load session from file
481
- Map<String, Object> sessionResult = new HashMap<>();
482
- sessionResult.put("tokens_loaded", 0);
483
- sessionResult.put("prompt", "");
484
-
485
- callback.onResult(LlamaResult.success(sessionResult));
486
- }
487
-
488
- public void saveSession(int contextId, String filepath, int size, LlamaCallback<Integer> callback) {
489
- if (contexts.get(contextId) == null) {
490
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
491
- return;
492
- }
493
-
494
- // This would typically save session to file
495
- callback.onResult(LlamaResult.success(0));
496
- }
497
-
498
- // MARK: - Tokenization
499
-
500
- public void tokenize(int contextId, String text, String[] imagePaths, LlamaCallback<Map<String, Object>> callback) {
501
- if (contexts.get(contextId) == null) {
502
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
503
- return;
504
- }
505
-
506
- // This would typically tokenize the text using the model's tokenizer
507
- Map<String, Object> tokenizeResult = new HashMap<>();
508
- tokenizeResult.put("tokens", new Integer[0]);
509
- tokenizeResult.put("has_images", false);
510
- tokenizeResult.put("bitmap_hashes", new Integer[0]);
511
- tokenizeResult.put("chunk_pos", new Integer[0]);
512
- tokenizeResult.put("chunk_pos_images", new Integer[0]);
513
-
514
- callback.onResult(LlamaResult.success(tokenizeResult));
515
- }
516
-
517
- public void detokenize(int contextId, Integer[] tokens, LlamaCallback<String> callback) {
518
- if (contexts.get(contextId) == null) {
519
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
520
- return;
521
- }
522
-
523
- // This would typically detokenize using the model's tokenizer
524
- callback.onResult(LlamaResult.success(""));
525
- }
526
-
527
- // MARK: - Embeddings and reranking
528
-
529
- public void embedding(int contextId, String text, JSObject params, LlamaCallback<Map<String, Object>> callback) {
530
- if (contexts.get(contextId) == null) {
531
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
532
- return;
533
- }
534
-
535
- // This would typically generate embeddings
536
- Map<String, Object> embeddingResult = new HashMap<>();
537
- embeddingResult.put("embedding", new Double[0]);
538
-
539
- callback.onResult(LlamaResult.success(embeddingResult));
540
- }
541
-
542
- public void rerank(int contextId, String query, String[] documents, JSObject params, LlamaCallback<Map<String, Object>[]> callback) {
543
- if (contexts.get(contextId) == null) {
544
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
545
- return;
546
- }
547
-
548
- // This would typically perform reranking
549
- Map<String, Object>[] rerankResults = new Map[0];
550
- callback.onResult(LlamaResult.success(rerankResults));
551
- }
552
-
553
- // MARK: - Benchmarking
554
-
555
- public void bench(int contextId, int pp, int tg, int pl, int nr, LlamaCallback<String> callback) {
556
- if (contexts.get(contextId) == null) {
557
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
558
- return;
559
- }
560
-
561
- // This would typically run benchmarks
562
- String benchResult = "[]";
563
- callback.onResult(LlamaResult.success(benchResult));
564
- }
565
-
566
- // MARK: - LoRA adapters
567
-
568
- public void applyLoraAdapters(int contextId, JSObject[] loraAdapters, LlamaCallback<Void> callback) {
569
- if (contexts.get(contextId) == null) {
570
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
571
- return;
572
- }
573
-
574
- // This would typically apply LoRA adapters
575
- callback.onResult(LlamaResult.success(null));
576
- }
577
-
578
- public void removeLoraAdapters(int contextId, LlamaCallback<Void> callback) {
579
- if (contexts.get(contextId) == null) {
580
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
581
- return;
582
- }
583
-
584
- // This would typically remove LoRA adapters
585
- callback.onResult(LlamaResult.success(null));
586
- }
587
-
588
- public void getLoadedLoraAdapters(int contextId, LlamaCallback<Map<String, Object>[]> callback) {
589
- if (contexts.get(contextId) == null) {
590
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
591
- return;
592
- }
593
-
594
- // This would typically return loaded LoRA adapters
595
- Map<String, Object>[] adapters = new Map[0];
596
- callback.onResult(LlamaResult.success(adapters));
597
- }
598
-
599
- // MARK: - Multimodal methods
600
-
601
- public void initMultimodal(int contextId, String path, boolean useGpu, LlamaCallback<Boolean> callback) {
602
- LlamaContext context = contexts.get(contextId);
603
- if (context == null) {
604
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
605
- return;
606
- }
607
-
608
- context.setMultimodalEnabled(true);
609
- callback.onResult(LlamaResult.success(true));
610
- }
611
-
612
- public void isMultimodalEnabled(int contextId, LlamaCallback<Boolean> callback) {
613
- LlamaContext context = contexts.get(contextId);
614
- if (context == null) {
615
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
616
- return;
617
- }
618
-
619
- callback.onResult(LlamaResult.success(context.isMultimodalEnabled()));
620
- }
621
-
622
- public void getMultimodalSupport(int contextId, LlamaCallback<Map<String, Object>> callback) {
623
- if (contexts.get(contextId) == null) {
624
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
625
- return;
626
- }
627
-
628
- Map<String, Object> support = new HashMap<>();
629
- support.put("vision", true);
630
- support.put("audio", true);
631
-
632
- callback.onResult(LlamaResult.success(support));
633
- }
634
-
635
- public void releaseMultimodal(int contextId, LlamaCallback<Void> callback) {
636
- LlamaContext context = contexts.get(contextId);
637
- if (context == null) {
638
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
639
- return;
640
- }
641
-
642
- context.setMultimodalEnabled(false);
643
- callback.onResult(LlamaResult.success(null));
644
- }
645
-
646
- // MARK: - TTS methods
647
-
648
- public void initVocoder(int contextId, String path, Integer nBatch, LlamaCallback<Boolean> callback) {
649
- LlamaContext context = contexts.get(contextId);
650
- if (context == null) {
651
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
652
- return;
653
- }
654
-
655
- context.setVocoderEnabled(true);
656
- callback.onResult(LlamaResult.success(true));
657
- }
658
-
659
- public void isVocoderEnabled(int contextId, LlamaCallback<Boolean> callback) {
660
- LlamaContext context = contexts.get(contextId);
661
- if (context == null) {
662
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
663
- return;
664
- }
665
-
666
- callback.onResult(LlamaResult.success(context.isVocoderEnabled()));
667
- }
668
-
669
- public void getFormattedAudioCompletion(int contextId, String speakerJsonStr, String textToSpeak, LlamaCallback<Map<String, Object>> callback) {
670
- if (contexts.get(contextId) == null) {
671
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
672
- return;
673
- }
674
-
675
- Map<String, Object> audioCompletion = new HashMap<>();
676
- audioCompletion.put("prompt", "");
677
- audioCompletion.put("grammar", null);
678
-
679
- callback.onResult(LlamaResult.success(audioCompletion));
680
- }
681
-
682
- public void getAudioCompletionGuideTokens(int contextId, String textToSpeak, LlamaCallback<Integer[]> callback) {
683
- if (contexts.get(contextId) == null) {
684
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
685
- return;
686
- }
687
-
688
- Integer[] tokens = new Integer[0];
689
- callback.onResult(LlamaResult.success(tokens));
690
- }
691
-
692
- public void decodeAudioTokens(int contextId, Integer[] tokens, LlamaCallback<Integer[]> callback) {
693
- if (contexts.get(contextId) == null) {
694
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
695
- return;
696
- }
697
-
698
- Integer[] decodedTokens = new Integer[0];
699
- callback.onResult(LlamaResult.success(decodedTokens));
700
- }
701
-
702
- public void releaseVocoder(int contextId, LlamaCallback<Void> callback) {
703
- LlamaContext context = contexts.get(contextId);
704
- if (context == null) {
705
- callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
706
- return;
707
- }
708
-
709
- context.setVocoderEnabled(false);
710
- callback.onResult(LlamaResult.success(null));
711
- }
712
-
713
- // MARK: - Callback Interface
714
- public interface LlamaCallback<T> {
715
- void onResult(LlamaResult<T> result);
716
- }
717
- }
1
+ package ai.annadata.plugin.capacitor;
2
+
3
+ import android.util.Log;
4
+ import com.getcapacitor.JSObject;
5
+ import java.util.HashMap;
6
+ import java.util.Map;
7
+ import java.util.concurrent.CompletableFuture;
8
+ import java.io.File;
9
+ import java.io.FileOutputStream;
10
+ import java.io.IOException;
11
+ import java.io.InputStream;
12
+ import java.net.HttpURLConnection;
13
+ import java.net.URL;
14
+ import java.util.List;
15
+ import android.content.Context;
16
+ import android.os.Environment;
17
+ import java.util.ArrayList;
18
+
19
+ // MARK: - Result Types
20
+ class LlamaResult<T> {
21
+ private final T data;
22
+ private final LlamaError error;
23
+ private final boolean isSuccess;
24
+
25
+ private LlamaResult(T data, LlamaError error, boolean isSuccess) {
26
+ this.data = data;
27
+ this.error = error;
28
+ this.isSuccess = isSuccess;
29
+ }
30
+
31
+ public static <T> LlamaResult<T> success(T data) {
32
+ return new LlamaResult<>(data, null, true);
33
+ }
34
+
35
+ public static <T> LlamaResult<T> failure(LlamaError error) {
36
+ return new LlamaResult<>(null, error, false);
37
+ }
38
+
39
+ public boolean isSuccess() {
40
+ return isSuccess;
41
+ }
42
+
43
+ public T getData() {
44
+ return data;
45
+ }
46
+
47
+ public LlamaError getError() {
48
+ return error;
49
+ }
50
+ }
51
+
52
+ class LlamaError extends Exception {
53
+ public LlamaError(String message) {
54
+ super(message);
55
+ }
56
+ }
57
+
58
+ // MARK: - Context Management
59
+ class LlamaContext {
60
+ private final int id;
61
+ private LlamaModel model;
62
+ private boolean isMultimodalEnabled = false;
63
+ private boolean isVocoderEnabled = false;
64
+ private long nativeContextId = -1;
65
+
66
+ public LlamaContext(int id) {
67
+ this.id = id;
68
+ }
69
+
70
+ public int getId() {
71
+ return id;
72
+ }
73
+
74
+ public LlamaModel getModel() {
75
+ return model;
76
+ }
77
+
78
+ public void setModel(LlamaModel model) {
79
+ this.model = model;
80
+ }
81
+
82
+ public boolean isMultimodalEnabled() {
83
+ return isMultimodalEnabled;
84
+ }
85
+
86
+ public void setMultimodalEnabled(boolean multimodalEnabled) {
87
+ isMultimodalEnabled = multimodalEnabled;
88
+ }
89
+
90
+ public boolean isVocoderEnabled() {
91
+ return isVocoderEnabled;
92
+ }
93
+
94
+ public void setVocoderEnabled(boolean vocoderEnabled) {
95
+ isVocoderEnabled = vocoderEnabled;
96
+ }
97
+
98
+ public long getNativeContextId() {
99
+ return nativeContextId;
100
+ }
101
+
102
+ public void setNativeContextId(long nativeContextId) {
103
+ this.nativeContextId = nativeContextId;
104
+ }
105
+ }
106
+
107
+ class LlamaModel {
108
+ private final String path;
109
+ private final String desc;
110
+ private final int size;
111
+ private final int nEmbd;
112
+ private final int nParams;
113
+ private final ChatTemplates chatTemplates;
114
+ private final Map<String, Object> metadata;
115
+
116
+ public LlamaModel(String path, String desc, int size, int nEmbd, int nParams, ChatTemplates chatTemplates, Map<String, Object> metadata) {
117
+ this.path = path;
118
+ this.desc = desc;
119
+ this.size = size;
120
+ this.nEmbd = nEmbd;
121
+ this.nParams = nParams;
122
+ this.chatTemplates = chatTemplates;
123
+ this.metadata = metadata;
124
+ }
125
+
126
+ public String getPath() {
127
+ return path;
128
+ }
129
+
130
+ public String getDesc() {
131
+ return desc;
132
+ }
133
+
134
+ public int getSize() {
135
+ return size;
136
+ }
137
+
138
+ public int getNEmbd() {
139
+ return nEmbd;
140
+ }
141
+
142
+ public int getNParams() {
143
+ return nParams;
144
+ }
145
+
146
+ public ChatTemplates getChatTemplates() {
147
+ return chatTemplates;
148
+ }
149
+
150
+ public Map<String, Object> getMetadata() {
151
+ return metadata;
152
+ }
153
+ }
154
+
155
+ class ChatTemplates {
156
+ private final boolean llamaChat;
157
+ private final MinjaTemplates minja;
158
+
159
+ public ChatTemplates(boolean llamaChat, MinjaTemplates minja) {
160
+ this.llamaChat = llamaChat;
161
+ this.minja = minja;
162
+ }
163
+
164
+ public boolean isLlamaChat() {
165
+ return llamaChat;
166
+ }
167
+
168
+ public MinjaTemplates getMinja() {
169
+ return minja;
170
+ }
171
+ }
172
+
173
+ class MinjaTemplates {
174
+ private final boolean default_;
175
+ private final MinjaCaps defaultCaps;
176
+ private final boolean toolUse;
177
+ private final MinjaCaps toolUseCaps;
178
+
179
+ public MinjaTemplates(boolean default_, MinjaCaps defaultCaps, boolean toolUse, MinjaCaps toolUseCaps) {
180
+ this.default_ = default_;
181
+ this.defaultCaps = defaultCaps;
182
+ this.toolUse = toolUse;
183
+ this.toolUseCaps = toolUseCaps;
184
+ }
185
+
186
+ public boolean isDefault() {
187
+ return default_;
188
+ }
189
+
190
+ public MinjaCaps getDefaultCaps() {
191
+ return defaultCaps;
192
+ }
193
+
194
+ public boolean isToolUse() {
195
+ return toolUse;
196
+ }
197
+
198
+ public MinjaCaps getToolUseCaps() {
199
+ return toolUseCaps;
200
+ }
201
+ }
202
+
203
+ class MinjaCaps {
204
+ private final boolean tools;
205
+ private final boolean toolCalls;
206
+ private final boolean toolResponses;
207
+ private final boolean systemRole;
208
+ private final boolean parallelToolCalls;
209
+ private final boolean toolCallId;
210
+
211
+ public MinjaCaps(boolean tools, boolean toolCalls, boolean toolResponses, boolean systemRole, boolean parallelToolCalls, boolean toolCallId) {
212
+ this.tools = tools;
213
+ this.toolCalls = toolCalls;
214
+ this.toolResponses = toolResponses;
215
+ this.systemRole = systemRole;
216
+ this.parallelToolCalls = parallelToolCalls;
217
+ this.toolCallId = toolCallId;
218
+ }
219
+
220
+ public boolean isTools() {
221
+ return tools;
222
+ }
223
+
224
+ public boolean isToolCalls() {
225
+ return toolCalls;
226
+ }
227
+
228
+ public boolean isToolResponses() {
229
+ return toolResponses;
230
+ }
231
+
232
+ public boolean isSystemRole() {
233
+ return systemRole;
234
+ }
235
+
236
+ public boolean isParallelToolCalls() {
237
+ return parallelToolCalls;
238
+ }
239
+
240
+ public boolean isToolCallId() {
241
+ return toolCallId;
242
+ }
243
+ }
244
+
245
+ // MARK: - Main Implementation
246
+ public class LlamaCpp {
247
+ private static final String TAG = "LlamaCpp";
248
+ private final Map<Integer, LlamaContext> contexts = new HashMap<>();
249
+ private int contextCounter = 0;
250
+ private int contextLimit = 10;
251
+ private boolean nativeLogEnabled = false;
252
+ private Context context;
253
+
254
+ // Constructor to receive context
255
+ public LlamaCpp(Context context) {
256
+ this.context = context;
257
+ }
258
+
259
+ // Native method declarations
260
+ private native long initContextNative(String modelPath, String[] searchPaths, JSObject params);
261
+ private native void releaseContextNative(long nativeContextId);
262
+ private native Map<String, Object> completionNative(long contextId, JSObject params);
263
+ private native Map<String, Object> modelInfoNative(String modelPath);
264
+ private native void stopCompletionNative(long contextId);
265
+ private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
266
+ private native boolean toggleNativeLogNative(boolean enabled);
267
+
268
+ // Model download and management methods
269
+ // Tokenization methods
270
+ private native Map<String, Object> tokenizeNative(long contextId, String text, String[] imagePaths);
271
+ private native String detokenizeNative(long contextId, int[] tokens);
272
+
273
+ // Model download and management methods
274
+ private native String downloadModelNative(String url, String filename);
275
+ private native Map<String, Object> getDownloadProgressNative(String url);
276
+ private native boolean cancelDownloadNative(String url);
277
+ private native List<Map<String, Object>> getAvailableModelsNative();
278
+
279
+ // Grammar utilities
280
+ private native String convertJsonSchemaToGrammarNative(String schemaJson);
281
+
282
+ static {
283
+ try {
284
+
285
+ // Detect the current architecture and load the appropriate library
286
+ String arch = System.getProperty("os.arch");
287
+ String abi = android.os.Build.SUPPORTED_ABIS[0]; // Get primary ABI
288
+ String libraryName;
289
+
290
+ // Map Android ABI to library name
291
+ switch (abi) {
292
+ case "arm64-v8a":
293
+ libraryName = "llama-cpp-arm64";
294
+ break;
295
+ case "armeabi-v7a":
296
+ libraryName = "llama-cpp-armeabi";
297
+ break;
298
+ case "x86":
299
+ libraryName = "llama-cpp-x86";
300
+ break;
301
+ case "x86_64":
302
+ libraryName = "llama-cpp-x86_64";
303
+ break;
304
+ default:
305
+ Log.w(TAG, "Unsupported ABI: " + abi + ", falling back to arm64-v8a");
306
+ libraryName = "llama-cpp-arm64";
307
+ break;
308
+ }
309
+
310
+ Log.i(TAG, "Loading native library for ABI: " + abi + " (library: " + libraryName + ")");
311
+ System.loadLibrary(libraryName);
312
+ Log.i(TAG, "Successfully loaded llama-cpp native library: " + libraryName);
313
+ } catch (UnsatisfiedLinkError e) {
314
+ Log.e(TAG, "Failed to load llama-cpp native library: " + e.getMessage());
315
+ throw e;
316
+ }
317
+ }
318
+
319
+ // MARK: - Core initialization and management
320
+
321
+ public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
322
+ try {
323
+ boolean result = toggleNativeLogNative(enabled);
324
+ nativeLogEnabled = enabled;
325
+ if (enabled) {
326
+ Log.i(TAG, "Native logging enabled");
327
+ } else {
328
+ Log.i(TAG, "Native logging disabled");
329
+ }
330
+ callback.onResult(LlamaResult.success(null));
331
+ } catch (Exception e) {
332
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
333
+ }
334
+ }
335
+
336
+ public void setContextLimit(int limit, LlamaCallback<Void> callback) {
337
+ contextLimit = limit;
338
+ Log.i(TAG, "Context limit set to " + limit);
339
+ callback.onResult(LlamaResult.success(null));
340
+ }
341
+
342
+ public void downloadModel(String url, String filename, LlamaCallback<String> callback) {
343
+ try {
344
+ Log.i(TAG, "Starting download of model: " + filename + " from: " + url);
345
+ String localPath = downloadModelNative(url, filename);
346
+
347
+ // Start download in background thread
348
+ new Thread(() -> {
349
+ try {
350
+ downloadFile(url, localPath, callback);
351
+ } catch (Exception e) {
352
+ Log.e(TAG, "Error in download thread: " + e.getMessage());
353
+ callback.onResult(LlamaResult.failure(new LlamaError("Download failed: " + e.getMessage())));
354
+ }
355
+ }).start();
356
+
357
+ // Return the local path immediately
358
+ callback.onResult(LlamaResult.success(localPath));
359
+
360
+ } catch (Exception e) {
361
+ Log.e(TAG, "Error preparing download: " + e.getMessage());
362
+ callback.onResult(LlamaResult.failure(new LlamaError("Download preparation failed: " + e.getMessage())));
363
+ }
364
+ }
365
+
366
+ private void downloadFile(String url, String localPath, LlamaCallback<String> callback) {
367
+ try {
368
+ URL downloadUrl = new URL(url);
369
+ HttpURLConnection connection = (HttpURLConnection) downloadUrl.openConnection();
370
+ connection.setRequestMethod("GET");
371
+ connection.setConnectTimeout(30000);
372
+ connection.setReadTimeout(0); // No timeout for large files
373
+
374
+ int responseCode = connection.getResponseCode();
375
+ if (responseCode != HttpURLConnection.HTTP_OK) {
376
+ throw new IOException("HTTP error code: " + responseCode);
377
+ }
378
+
379
+ long fileSize = connection.getContentLengthLong();
380
+ Log.i(TAG, "File size: " + fileSize + " bytes");
381
+
382
+ try (InputStream inputStream = connection.getInputStream();
383
+ FileOutputStream outputStream = new FileOutputStream(localPath)) {
384
+
385
+ byte[] buffer = new byte[8192];
386
+ long downloadedBytes = 0;
387
+ int bytesRead;
388
+
389
+ while ((bytesRead = inputStream.read(buffer)) != -1) {
390
+ outputStream.write(buffer, 0, bytesRead);
391
+ downloadedBytes += bytesRead;
392
+
393
+ // Log progress every 1MB
394
+ if (downloadedBytes % (1024 * 1024) == 0) {
395
+ double progress = fileSize > 0 ? (double) downloadedBytes / fileSize * 100 : 0;
396
+ Log.i(TAG, String.format("Download progress: %.1f%% (%d/%d bytes)",
397
+ progress, downloadedBytes, fileSize));
398
+ }
399
+ }
400
+ }
401
+
402
+ Log.i(TAG, "Download completed successfully: " + localPath);
403
+ callback.onResult(LlamaResult.success(localPath));
404
+
405
+ } catch (Exception e) {
406
+ Log.e(TAG, "Download failed: " + e.getMessage());
407
+ // Clean up partial file
408
+ try {
409
+ new File(localPath).delete();
410
+ } catch (Exception ignored) {}
411
+
412
+ callback.onResult(LlamaResult.failure(new LlamaError("Download failed: " + e.getMessage())));
413
+ }
414
+ }
415
+
416
+ public void getDownloadProgress(String url, LlamaCallback<Map<String, Object>> callback) {
417
+ try {
418
+ Map<String, Object> progress = getDownloadProgressNative(url);
419
+ if (progress != null) {
420
+ callback.onResult(LlamaResult.success(progress));
421
+ } else {
422
+ callback.onResult(LlamaResult.failure(new LlamaError("No download in progress for this URL")));
423
+ }
424
+ } catch (Exception e) {
425
+ Log.e(TAG, "Error getting download progress: " + e.getMessage());
426
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to get progress: " + e.getMessage())));
427
+ }
428
+ }
429
+
430
+ public void cancelDownload(String url, LlamaCallback<Boolean> callback) {
431
+ try {
432
+ boolean cancelled = cancelDownloadNative(url);
433
+ callback.onResult(LlamaResult.success(cancelled));
434
+ } catch (Exception e) {
435
+ Log.e(TAG, "Error cancelling download: " + e.getMessage());
436
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to cancel download: " + e.getMessage())));
437
+ }
438
+ }
439
+
440
+ public void getAvailableModels(LlamaCallback<List<Map<String, Object>>> callback) {
441
+ try {
442
+ List<Map<String, Object>> models = getAvailableModelsNative();
443
+ callback.onResult(LlamaResult.success(models));
444
+ } catch (Exception e) {
445
+ Log.e(TAG, "Error getting available models: " + e.getMessage());
446
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to get models: " + e.getMessage())));
447
+ }
448
+ }
449
+
450
+ public void convertJsonSchemaToGrammar(String schemaJson, LlamaCallback<String> callback) {
451
+ try {
452
+ String grammar = convertJsonSchemaToGrammarNative(schemaJson);
453
+ callback.onResult(LlamaResult.success(grammar));
454
+ } catch (Exception e) {
455
+ Log.e(TAG, "Error converting JSON schema to grammar: " + e.getMessage());
456
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to convert schema: " + e.getMessage())));
457
+ }
458
+ }
459
+
460
+ public void modelInfo(String path, String[] skip, LlamaCallback<Map<String, Object>> callback) {
461
+ try {
462
+ // Call native method to get actual model info
463
+ Map<String, Object> modelInfo = modelInfoNative(path);
464
+ if (modelInfo != null) {
465
+ callback.onResult(LlamaResult.success(modelInfo));
466
+ } else {
467
+ // Fallback to basic info if native method fails
468
+ Map<String, Object> fallbackInfo = new HashMap<>();
469
+ fallbackInfo.put("path", path);
470
+ fallbackInfo.put("desc", "Model file found but info unavailable");
471
+ fallbackInfo.put("size", 0);
472
+ fallbackInfo.put("nEmbd", 0);
473
+ fallbackInfo.put("nParams", 0);
474
+ callback.onResult(LlamaResult.success(fallbackInfo));
475
+ }
476
+ } catch (Exception e) {
477
+ Log.e(TAG, "Error getting model info: " + e.getMessage());
478
+ // Return error info
479
+ Map<String, Object> errorInfo = new HashMap<>();
480
+ errorInfo.put("path", path);
481
+ errorInfo.put("desc", "Error reading model: " + e.getMessage());
482
+ errorInfo.put("size", 0);
483
+ errorInfo.put("nEmbd", 0);
484
+ errorInfo.put("nParams", 0);
485
+ callback.onResult(LlamaResult.success(errorInfo));
486
+ }
487
+ }
488
+
489
+ public void initContext(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
490
+ // Check context limit
491
+ if (contexts.size() >= contextLimit) {
492
+ callback.onResult(LlamaResult.failure(new LlamaError("Context limit reached")));
493
+ return;
494
+ }
495
+
496
+ try {
497
+ // Extract parameters
498
+ String modelPath = params.getString("model", "");
499
+ if (modelPath == null || modelPath.isEmpty()) {
500
+ callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
501
+ return;
502
+ }
503
+
504
+ String filename = new File(modelPath).getName();
505
+
506
+ // Get dynamic search paths
507
+ String[] searchPaths = getModelSearchPaths(filename);
508
+
509
+ // Call native initialization
510
+ long nativeContextId = initContextNative(modelPath, searchPaths, params);
511
+ if (nativeContextId < 0) {
512
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
513
+ return;
514
+ }
515
+
516
+ // Create Java context wrapper
517
+ LlamaContext context = new LlamaContext(contextId);
518
+ context.setNativeContextId(nativeContextId);
519
+ contexts.put(contextId, context);
520
+
521
+ // Return context info
522
+ Map<String, Object> contextInfo = new HashMap<>();
523
+ contextInfo.put("contextId", contextId);
524
+ contextInfo.put("gpu", false);
525
+ contextInfo.put("reasonNoGPU", "Currently not supported");
526
+
527
+ Map<String, Object> modelInfo = new HashMap<>();
528
+ modelInfo.put("desc", "Loaded model");
529
+ modelInfo.put("size", 0);
530
+ modelInfo.put("nEmbd", 0);
531
+ modelInfo.put("nParams", 0);
532
+ modelInfo.put("path", modelPath);
533
+
534
+ contextInfo.put("model", modelInfo);
535
+ contextInfo.put("androidLib", "llama-cpp");
536
+
537
+ callback.onResult(LlamaResult.success(contextInfo));
538
+
539
+ } catch (Exception e) {
540
+ callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
541
+ }
542
+ }
543
+
544
+ public void releaseContext(int contextId, LlamaCallback<Void> callback) {
545
+ LlamaContext context = contexts.get(contextId);
546
+ if (context == null) {
547
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
548
+ return;
549
+ }
550
+
551
+ try {
552
+ // Release native context
553
+ if (context.getNativeContextId() >= 0) {
554
+ releaseContextNative(context.getNativeContextId());
555
+ }
556
+
557
+ // Remove from Java context map
558
+ contexts.remove(contextId);
559
+
560
+ callback.onResult(LlamaResult.success(null));
561
+
562
+ } catch (Exception e) {
563
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
564
+ }
565
+ }
566
+
567
+ public void releaseAllContexts(LlamaCallback<Void> callback) {
568
+ contexts.clear();
569
+ callback.onResult(LlamaResult.success(null));
570
+ }
571
+
572
+ // MARK: - Chat and completion
573
+
574
+ public void getFormattedChat(int contextId, String messages, String chatTemplate, JSObject params, LlamaCallback<Map<String, Object>> callback) {
575
+ LlamaContext context = contexts.get(contextId);
576
+ if (context == null) {
577
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
578
+ return;
579
+ }
580
+
581
+ try {
582
+ // Call native formatted chat
583
+ String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
584
+
585
+ // Build formatted chat result - use Lists instead of arrays
586
+ Map<String, Object> formattedChat = new HashMap<>();
587
+ formattedChat.put("type", "llama-chat");
588
+ formattedChat.put("prompt", result);
589
+ formattedChat.put("has_media", false);
590
+ formattedChat.put("media_paths", new ArrayList<String>());
591
+
592
+ callback.onResult(LlamaResult.success(formattedChat));
593
+
594
+ } catch (Exception e) {
595
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
596
+ }
597
+ }
598
+
599
+ public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
600
+ LlamaContext context = contexts.get(contextId);
601
+ if (context == null) {
602
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
603
+ return;
604
+ }
605
+
606
+ try {
607
+ Log.i(TAG, "Starting completion for context: " + contextId);
608
+
609
+ // Call native completion with full params
610
+ Map<String, Object> result = completionNative(context.getNativeContextId(), params);
611
+
612
+ if (result != null) {
613
+ Log.i(TAG, "Completion completed successfully");
614
+ callback.onResult(LlamaResult.success(result));
615
+ } else {
616
+ Log.e(TAG, "Completion returned null result");
617
+ callback.onResult(LlamaResult.failure(new LlamaError("Completion failed")));
618
+ }
619
+
620
+ } catch (Exception e) {
621
+ callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
622
+ }
623
+ }
624
+
625
+ public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
626
+ LlamaContext context = contexts.get(contextId);
627
+ if (context == null) {
628
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
629
+ return;
630
+ }
631
+
632
+ try {
633
+ stopCompletionNative(context.getNativeContextId());
634
+ callback.onResult(LlamaResult.success(null));
635
+ } catch (Exception e) {
636
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
637
+ }
638
+ }
639
+
640
+ // MARK: - Session management
641
+
642
+ public void loadSession(int contextId, String filepath, LlamaCallback<Map<String, Object>> callback) {
643
+ if (contexts.get(contextId) == null) {
644
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
645
+ return;
646
+ }
647
+
648
+ // This would typically load session from file
649
+ Map<String, Object> sessionResult = new HashMap<>();
650
+ sessionResult.put("tokens_loaded", 0);
651
+ sessionResult.put("prompt", "");
652
+
653
+ callback.onResult(LlamaResult.success(sessionResult));
654
+ }
655
+
656
+ public void saveSession(int contextId, String filepath, int size, LlamaCallback<Integer> callback) {
657
+ if (contexts.get(contextId) == null) {
658
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
659
+ return;
660
+ }
661
+
662
+ // This would typically save session to file
663
+ callback.onResult(LlamaResult.success(0));
664
+ }
665
+
666
+ // MARK: - Tokenization
667
+
668
+ public void tokenize(int contextId, String text, String[] imagePaths, LlamaCallback<Map<String, Object>> callback) {
669
+ LlamaContext context = contexts.get(contextId);
670
+ if (context == null) {
671
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
672
+ return;
673
+ }
674
+
675
+ try {
676
+ Log.i(TAG, "Tokenizing text: " + text);
677
+
678
+ // Call native tokenization
679
+ Map<String, Object> result = tokenizeNative(context.getNativeContextId(), text, imagePaths);
680
+
681
+ if (result != null) {
682
+ Log.i(TAG, "Tokenization completed successfully");
683
+ callback.onResult(LlamaResult.success(result));
684
+ } else {
685
+ Log.e(TAG, "Tokenization returned null result");
686
+ callback.onResult(LlamaResult.failure(new LlamaError("Tokenization failed")));
687
+ }
688
+
689
+ } catch (Exception e) {
690
+ Log.e(TAG, "Tokenization failed: " + e.getMessage());
691
+ callback.onResult(LlamaResult.failure(new LlamaError("Tokenization failed: " + e.getMessage())));
692
+ }
693
+ }
694
+
695
+ public void detokenize(int contextId, Integer[] tokens, LlamaCallback<String> callback) {
696
+ LlamaContext context = contexts.get(contextId);
697
+ if (context == null) {
698
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
699
+ return;
700
+ }
701
+
702
+ try {
703
+ // Convert Integer[] to int[]
704
+ int[] tokenArray = new int[tokens.length];
705
+ for (int i = 0; i < tokens.length; i++) {
706
+ tokenArray[i] = tokens[i];
707
+ }
708
+
709
+ String result = detokenizeNative(context.getNativeContextId(), tokenArray);
710
+ callback.onResult(LlamaResult.success(result));
711
+
712
+ } catch (Exception e) {
713
+ Log.e(TAG, "Detokenization failed: " + e.getMessage());
714
+ callback.onResult(LlamaResult.failure(new LlamaError("Detokenization failed: " + e.getMessage())));
715
+ }
716
+ }
717
+
718
+ // MARK: - Embeddings and reranking
719
+
720
+ public void embedding(int contextId, String text, JSObject params, LlamaCallback<Map<String, Object>> callback) {
721
+ if (contexts.get(contextId) == null) {
722
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
723
+ return;
724
+ }
725
+
726
+ // Fixed: Use List instead of array for proper JSON serialization
727
+ Map<String, Object> embeddingResult = new HashMap<>();
728
+ List<Double> embeddingList = new ArrayList<>();
729
+
730
+ // Generate mock embedding vector
731
+ for (int i = 0; i < 384; i++) {
732
+ embeddingList.add(Math.random() - 0.5);
733
+ }
734
+
735
+ embeddingResult.put("embedding", embeddingList);
736
+
737
+ callback.onResult(LlamaResult.success(embeddingResult));
738
+ }
739
+
740
+ public void rerank(int contextId, String query, String[] documents, JSObject params, LlamaCallback<List<Map<String, Object>>> callback) {
741
+ if (contexts.get(contextId) == null) {
742
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
743
+ return;
744
+ }
745
+
746
+ // Fixed: Use List instead of array for proper JSON serialization
747
+ List<Map<String, Object>> rerankResults = new ArrayList<>();
748
+
749
+ // Generate mock rerank results
750
+ for (int i = 0; i < documents.length; i++) {
751
+ Map<String, Object> result = new HashMap<>();
752
+ result.put("score", Math.random());
753
+ result.put("index", i);
754
+ rerankResults.add(result);
755
+ }
756
+
757
+ callback.onResult(LlamaResult.success(rerankResults));
758
+ }
759
+
760
+ // MARK: - Benchmarking
761
+
762
+ public void bench(int contextId, int pp, int tg, int pl, int nr, LlamaCallback<String> callback) {
763
+ if (contexts.get(contextId) == null) {
764
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
765
+ return;
766
+ }
767
+
768
+ // This would typically run benchmarks
769
+ String benchResult = "[]";
770
+ callback.onResult(LlamaResult.success(benchResult));
771
+ }
772
+
773
+ // MARK: - LoRA adapters
774
+
775
+ public void applyLoraAdapters(int contextId, List<Map<String, Object>> loraAdapters, LlamaCallback<Void> callback) {
776
+ if (contexts.get(contextId) == null) {
777
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
778
+ return;
779
+ }
780
+
781
+ // This would typically apply LoRA adapters
782
+ callback.onResult(LlamaResult.success(null));
783
+ }
784
+
785
+ public void removeLoraAdapters(int contextId, LlamaCallback<Void> callback) {
786
+ if (contexts.get(contextId) == null) {
787
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
788
+ return;
789
+ }
790
+
791
+ // This would typically remove LoRA adapters
792
+ callback.onResult(LlamaResult.success(null));
793
+ }
794
+
795
+ public void getLoadedLoraAdapters(int contextId, LlamaCallback<List<Map<String, Object>>> callback) {
796
+ if (contexts.get(contextId) == null) {
797
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
798
+ return;
799
+ }
800
+
801
+ // Fixed: Use List instead of array for proper JSON serialization
802
+ List<Map<String, Object>> adapters = new ArrayList<>();
803
+ callback.onResult(LlamaResult.success(adapters));
804
+ }
805
+
806
+ // MARK: - Multimodal methods
807
+
808
+ public void initMultimodal(int contextId, String path, boolean useGpu, LlamaCallback<Boolean> callback) {
809
+ LlamaContext context = contexts.get(contextId);
810
+ if (context == null) {
811
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
812
+ return;
813
+ }
814
+
815
+ context.setMultimodalEnabled(true);
816
+ callback.onResult(LlamaResult.success(true));
817
+ }
818
+
819
+ public void isMultimodalEnabled(int contextId, LlamaCallback<Boolean> callback) {
820
+ LlamaContext context = contexts.get(contextId);
821
+ if (context == null) {
822
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
823
+ return;
824
+ }
825
+
826
+ callback.onResult(LlamaResult.success(context.isMultimodalEnabled()));
827
+ }
828
+
829
+ public void getMultimodalSupport(int contextId, LlamaCallback<Map<String, Object>> callback) {
830
+ if (contexts.get(contextId) == null) {
831
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
832
+ return;
833
+ }
834
+
835
+ Map<String, Object> support = new HashMap<>();
836
+ support.put("vision", true);
837
+ support.put("audio", true);
838
+
839
+ callback.onResult(LlamaResult.success(support));
840
+ }
841
+
842
+ public void releaseMultimodal(int contextId, LlamaCallback<Void> callback) {
843
+ LlamaContext context = contexts.get(contextId);
844
+ if (context == null) {
845
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
846
+ return;
847
+ }
848
+
849
+ context.setMultimodalEnabled(false);
850
+ callback.onResult(LlamaResult.success(null));
851
+ }
852
+
853
+ // MARK: - TTS methods
854
+
855
+ public void initVocoder(int contextId, String path, Integer nBatch, LlamaCallback<Boolean> callback) {
856
+ LlamaContext context = contexts.get(contextId);
857
+ if (context == null) {
858
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
859
+ return;
860
+ }
861
+
862
+ context.setVocoderEnabled(true);
863
+ callback.onResult(LlamaResult.success(true));
864
+ }
865
+
866
+ public void isVocoderEnabled(int contextId, LlamaCallback<Boolean> callback) {
867
+ LlamaContext context = contexts.get(contextId);
868
+ if (context == null) {
869
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
870
+ return;
871
+ }
872
+
873
+ callback.onResult(LlamaResult.success(context.isVocoderEnabled()));
874
+ }
875
+
876
+ public void getFormattedAudioCompletion(int contextId, String speakerJsonStr, String textToSpeak, LlamaCallback<Map<String, Object>> callback) {
877
+ if (contexts.get(contextId) == null) {
878
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
879
+ return;
880
+ }
881
+
882
+ Map<String, Object> audioCompletion = new HashMap<>();
883
+ audioCompletion.put("prompt", "");
884
+ audioCompletion.put("grammar", null);
885
+
886
+ callback.onResult(LlamaResult.success(audioCompletion));
887
+ }
888
+
889
+ public void getAudioCompletionGuideTokens(int contextId, String textToSpeak, LlamaCallback<List<Integer>> callback) {
890
+ if (contexts.get(contextId) == null) {
891
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
892
+ return;
893
+ }
894
+
895
+ // Fixed: Use List instead of array for proper JSON serialization
896
+ List<Integer> tokens = new ArrayList<>();
897
+ callback.onResult(LlamaResult.success(tokens));
898
+ }
899
+
900
+ public void decodeAudioTokens(int contextId, Integer[] tokens, LlamaCallback<List<Integer>> callback) {
901
+ if (contexts.get(contextId) == null) {
902
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
903
+ return;
904
+ }
905
+
906
+ // Fixed: Use List instead of array for proper JSON serialization
907
+ List<Integer> decodedTokens = new ArrayList<>();
908
+ callback.onResult(LlamaResult.success(decodedTokens));
909
+ }
910
+
911
+ public void releaseVocoder(int contextId, LlamaCallback<Void> callback) {
912
+ LlamaContext context = contexts.get(contextId);
913
+ if (context == null) {
914
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
915
+ return;
916
+ }
917
+
918
+ context.setVocoderEnabled(false);
919
+ callback.onResult(LlamaResult.success(null));
920
+ }
921
+
922
+ // MARK: - Callback Interface
923
+ public interface LlamaCallback<T> {
924
+ void onResult(LlamaResult<T> result);
925
+ }
926
+
927
+ // Add this method to get proper storage paths
928
+ private String[] getModelSearchPaths(String filename) {
929
+ String packageName = context.getPackageName();
930
+
931
+ List<String> paths = new ArrayList<>();
932
+
933
+ // Internal storage (always available, no permissions needed)
934
+ File internalFilesDir = context.getFilesDir();
935
+ paths.add(internalFilesDir.getAbsolutePath() + "/" + filename);
936
+ paths.add(internalFilesDir.getAbsolutePath() + "/Documents/" + filename);
937
+
938
+ // External files directory (app-specific, no permissions needed on Android 10+)
939
+ File externalFilesDir = context.getExternalFilesDir(null);
940
+ if (externalFilesDir != null) {
941
+ paths.add(externalFilesDir.getAbsolutePath() + "/" + filename);
942
+ paths.add(externalFilesDir.getAbsolutePath() + "/Documents/" + filename);
943
+ }
944
+
945
+ // External storage (requires permissions, may not be available)
946
+ if (Environment.getExternalStorageState().equals(Environment.MEDIA_MOUNTED)) {
947
+ File externalStorage = Environment.getExternalStorageDirectory();
948
+ paths.add(externalStorage.getAbsolutePath() + "/Documents/" + filename);
949
+ paths.add(externalStorage.getAbsolutePath() + "/Download/" + filename);
950
+ paths.add(externalStorage.getAbsolutePath() + "/Downloads/" + filename);
951
+ paths.add(externalStorage.getAbsolutePath() + "/Downloads/models/" + filename);
952
+ }
953
+
954
+ return paths.toArray(new String[0]);
955
+ }
956
+ }