llama-cpp-capacitor 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,40 +109,42 @@ endfunction()
109
109
 
110
110
  # Build for different architectures
111
111
  if (ANDROID_ABI STREQUAL "arm64-v8a")
112
- build_library(llama-cpp-arm64-v8a "arm64" "-march=armv8-a")
112
+ build_library(llama-cpp-arm64-v8a "arm" "-march=armv8-a")
113
113
  elseif (ANDROID_ABI STREQUAL "armeabi-v7a")
114
114
  build_library(llama-cpp-armeabi-v7a "arm" "-march=armv7-a -mfpu=neon")
115
115
  elseif (ANDROID_ABI STREQUAL "x86")
116
- build_library(llama-cpp-x86 "generic" "-march=i686 -mtune=intel -mssse3 -mfpmath=sse -m32")
116
+ build_library(llama-cpp-x86 "x86" "-march=i686 -mtune=intel -mssse3 -mfpmath=sse -m32")
117
117
  elseif (ANDROID_ABI STREQUAL "x86_64")
118
- build_library(llama-cpp-x86_64 "generic" "-march=x86-64 -msse4.2 -mpopcnt -m64 -mtune=intel")
118
+ build_library(llama-cpp-x86_64 "x86" "-march=x86-64 -msse4.2 -mpopcnt -m64 -mtune=intel")
119
119
  endif()
120
120
 
121
- # Set compile definitions
122
- target_compile_definitions(llama-cpp-arm64-v8a PRIVATE
123
- -DNDEBUG
124
- -DO3
125
- -DLM_GGML_USE_CPU
126
- -DLM_GGML_CPU_GENERIC
127
- )
128
-
129
- target_compile_definitions(llama-cpp-armeabi-v7a PRIVATE
130
- -DNDEBUG
131
- -DO3
132
- -DLM_GGML_USE_CPU
133
- -DLM_GGML_CPU_GENERIC
134
- )
135
-
136
- target_compile_definitions(llama-cpp-x86 PRIVATE
137
- -DNDEBUG
138
- -DO3
139
- -DLM_GGML_USE_CPU
140
- -DLM_GGML_CPU_GENERIC
141
- )
142
-
143
- target_compile_definitions(llama-cpp-x86_64 PRIVATE
144
- -DNDEBUG
145
- -DO3
146
- -DLM_GGML_USE_CPU
147
- -DLM_GGML_CPU_GENERIC
148
- )
121
+ # Set compile definitions for the target that was actually built
122
+ if (ANDROID_ABI STREQUAL "arm64-v8a")
123
+ target_compile_definitions(llama-cpp-arm64-v8a PRIVATE
124
+ -DNDEBUG
125
+ -DO3
126
+ -DLM_GGML_USE_CPU
127
+ -DLM_GGML_CPU_GENERIC
128
+ )
129
+ elseif (ANDROID_ABI STREQUAL "armeabi-v7a")
130
+ target_compile_definitions(llama-cpp-armeabi-v7a PRIVATE
131
+ -DNDEBUG
132
+ -DO3
133
+ -DLM_GGML_USE_CPU
134
+ -DLM_GGML_CPU_GENERIC
135
+ )
136
+ elseif (ANDROID_ABI STREQUAL "x86")
137
+ target_compile_definitions(llama-cpp-x86 PRIVATE
138
+ -DNDEBUG
139
+ -DO3
140
+ -DLM_GGML_USE_CPU
141
+ -DLM_GGML_CPU_GENERIC
142
+ )
143
+ elseif (ANDROID_ABI STREQUAL "x86_64")
144
+ target_compile_definitions(llama-cpp-x86_64 PRIVATE
145
+ -DNDEBUG
146
+ -DO3
147
+ -DLM_GGML_USE_CPU
148
+ -DLM_GGML_CPU_GENERIC
149
+ )
150
+ endif()
@@ -51,6 +51,7 @@ class LlamaContext {
51
51
  private LlamaModel model;
52
52
  private boolean isMultimodalEnabled = false;
53
53
  private boolean isVocoderEnabled = false;
54
+ private long nativeContextId = -1;
54
55
 
55
56
  public LlamaContext(int id) {
56
57
  this.id = id;
@@ -83,6 +84,14 @@ class LlamaContext {
83
84
  public void setVocoderEnabled(boolean vocoderEnabled) {
84
85
  isVocoderEnabled = vocoderEnabled;
85
86
  }
87
+
88
+ public long getNativeContextId() {
89
+ return nativeContextId;
90
+ }
91
+
92
+ public void setNativeContextId(long nativeContextId) {
93
+ this.nativeContextId = nativeContextId;
94
+ }
86
95
  }
87
96
 
88
97
  class LlamaModel {
@@ -231,16 +240,33 @@ public class LlamaCpp {
231
240
  private int contextLimit = 10;
232
241
  private boolean nativeLogEnabled = false;
233
242
 
243
+ // Native method declarations
244
+ private native long initContextNative(String modelPath, JSObject params);
245
+ private native void releaseContextNative(long nativeContextId);
246
+ private native String completionNative(long contextId, String prompt);
247
+ private native void stopCompletionNative(long contextId);
248
+ private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
249
+ private native boolean toggleNativeLogNative(boolean enabled);
250
+
251
+ static {
252
+ System.loadLibrary("llama-cpp");
253
+ }
254
+
234
255
  // MARK: - Core initialization and management
235
256
 
236
257
  public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
237
- nativeLogEnabled = enabled;
238
- if (enabled) {
239
- Log.i(TAG, "Native logging enabled");
240
- } else {
241
- Log.i(TAG, "Native logging disabled");
258
+ try {
259
+ boolean result = toggleNativeLogNative(enabled);
260
+ nativeLogEnabled = enabled;
261
+ if (enabled) {
262
+ Log.i(TAG, "Native logging enabled");
263
+ } else {
264
+ Log.i(TAG, "Native logging disabled");
265
+ }
266
+ callback.onResult(LlamaResult.success(null));
267
+ } catch (Exception e) {
268
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
242
269
  }
243
- callback.onResult(LlamaResult.success(null));
244
270
  }
245
271
 
246
272
  public void setContextLimit(int limit, LlamaCallback<Void> callback) {
@@ -268,89 +294,70 @@ public class LlamaCpp {
268
294
  return;
269
295
  }
270
296
 
271
- // Extract parameters
272
- String modelPath = params.getString("model");
273
- if (modelPath == null) {
274
- callback.onResult(LlamaResult.failure(new LlamaError("Invalid parameters")));
275
- return;
297
+ try {
298
+ // Extract parameters
299
+ String modelPath = params.getString("model");
300
+ if (modelPath == null || modelPath.isEmpty()) {
301
+ callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
302
+ return;
303
+ }
304
+
305
+ // Call native initialization
306
+ long nativeContextId = initContextNative(modelPath, params);
307
+ if (nativeContextId < 0) {
308
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
309
+ return;
310
+ }
311
+
312
+ // Create Java context wrapper
313
+ LlamaContext context = new LlamaContext(contextId);
314
+ context.setNativeContextId(nativeContextId);
315
+ contexts.put(contextId, context);
316
+
317
+ // Return context info
318
+ Map<String, Object> contextInfo = new HashMap<>();
319
+ contextInfo.put("contextId", contextId);
320
+ contextInfo.put("gpu", false);
321
+ contextInfo.put("reasonNoGPU", "Currently not supported");
322
+
323
+ Map<String, Object> modelInfo = new HashMap<>();
324
+ modelInfo.put("desc", "Loaded model");
325
+ modelInfo.put("size", 0);
326
+ modelInfo.put("nEmbd", 0);
327
+ modelInfo.put("nParams", 0);
328
+ modelInfo.put("path", modelPath);
329
+
330
+ contextInfo.put("model", modelInfo);
331
+ contextInfo.put("androidLib", "llama-cpp");
332
+
333
+ callback.onResult(LlamaResult.success(contextInfo));
334
+
335
+ } catch (Exception e) {
336
+ callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
276
337
  }
277
-
278
- // Create context
279
- LlamaContext context = new LlamaContext(contextId);
280
-
281
- // Create model info (this would typically load from GGUF file)
282
- MinjaCaps defaultCaps = new MinjaCaps(true, true, true, true, true, true);
283
- MinjaCaps toolUseCaps = new MinjaCaps(true, true, true, true, true, true);
284
- MinjaTemplates minja = new MinjaTemplates(true, defaultCaps, true, toolUseCaps);
285
- ChatTemplates chatTemplates = new ChatTemplates(true, minja);
286
-
287
- LlamaModel model = new LlamaModel(
288
- modelPath,
289
- "Sample model",
290
- 0,
291
- 0,
292
- 0,
293
- chatTemplates,
294
- new HashMap<>()
295
- );
296
-
297
- context.setModel(model);
298
- contexts.put(contextId, context);
299
-
300
- // Return context info
301
- Map<String, Object> contextInfo = new HashMap<>();
302
- contextInfo.put("contextId", contextId);
303
- contextInfo.put("gpu", false);
304
- contextInfo.put("reasonNoGPU", "Not implemented");
305
-
306
- Map<String, Object> modelInfo = new HashMap<>();
307
- modelInfo.put("desc", model.getDesc());
308
- modelInfo.put("size", model.getSize());
309
- modelInfo.put("nEmbd", model.getNEmbd());
310
- modelInfo.put("nParams", model.getNParams());
311
-
312
- Map<String, Object> chatTemplatesInfo = new HashMap<>();
313
- chatTemplatesInfo.put("llamaChat", model.getChatTemplates().isLlamaChat());
314
-
315
- Map<String, Object> minjaInfo = new HashMap<>();
316
- minjaInfo.put("default", model.getChatTemplates().getMinja().isDefault());
317
-
318
- Map<String, Object> defaultCapsInfo = new HashMap<>();
319
- defaultCapsInfo.put("tools", model.getChatTemplates().getMinja().getDefaultCaps().isTools());
320
- defaultCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isToolCalls());
321
- defaultCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getDefaultCaps().isToolResponses());
322
- defaultCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getDefaultCaps().isSystemRole());
323
- defaultCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isParallelToolCalls());
324
- defaultCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getDefaultCaps().isToolCallId());
325
-
326
- Map<String, Object> toolUseCapsInfo = new HashMap<>();
327
- toolUseCapsInfo.put("tools", model.getChatTemplates().getMinja().getToolUseCaps().isTools());
328
- toolUseCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isToolCalls());
329
- toolUseCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getToolUseCaps().isToolResponses());
330
- toolUseCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getToolUseCaps().isSystemRole());
331
- toolUseCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isParallelToolCalls());
332
- toolUseCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getToolUseCaps().isToolCallId());
333
-
334
- minjaInfo.put("defaultCaps", defaultCapsInfo);
335
- minjaInfo.put("toolUse", model.getChatTemplates().getMinja().isToolUse());
336
- minjaInfo.put("toolUseCaps", toolUseCapsInfo);
337
-
338
- chatTemplatesInfo.put("minja", minjaInfo);
339
- modelInfo.put("chatTemplates", chatTemplatesInfo);
340
- modelInfo.put("metadata", model.getMetadata());
341
- modelInfo.put("isChatTemplateSupported", true);
342
-
343
- contextInfo.put("model", modelInfo);
344
-
345
- callback.onResult(LlamaResult.success(contextInfo));
346
338
  }
347
339
 
348
340
  public void releaseContext(int contextId, LlamaCallback<Void> callback) {
349
- if (contexts.remove(contextId) == null) {
341
+ LlamaContext context = contexts.get(contextId);
342
+ if (context == null) {
350
343
  callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
351
344
  return;
352
345
  }
353
- callback.onResult(LlamaResult.success(null));
346
+
347
+ try {
348
+ // Release native context
349
+ if (context.getNativeContextId() >= 0) {
350
+ releaseContextNative(context.getNativeContextId());
351
+ }
352
+
353
+ // Remove from Java context map
354
+ contexts.remove(contextId);
355
+
356
+ callback.onResult(LlamaResult.success(null));
357
+
358
+ } catch (Exception e) {
359
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
360
+ }
354
361
  }
355
362
 
356
363
  public void releaseAllContexts(LlamaCallback<Void> callback) {
@@ -367,15 +374,22 @@ public class LlamaCpp {
367
374
  return;
368
375
  }
369
376
 
370
- // This would typically format the chat using the model's chat templates
371
- // For now, return a basic formatted chat
372
- Map<String, Object> formattedChat = new HashMap<>();
373
- formattedChat.put("type", "llama-chat");
374
- formattedChat.put("prompt", messages);
375
- formattedChat.put("has_media", false);
376
- formattedChat.put("media_paths", new String[0]);
377
-
378
- callback.onResult(LlamaResult.success(formattedChat));
377
+ try {
378
+ // Call native formatted chat
379
+ String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
380
+
381
+ // Build formatted chat result
382
+ Map<String, Object> formattedChat = new HashMap<>();
383
+ formattedChat.put("type", "llama-chat");
384
+ formattedChat.put("prompt", result);
385
+ formattedChat.put("has_media", false);
386
+ formattedChat.put("media_paths", new String[0]);
387
+
388
+ callback.onResult(LlamaResult.success(formattedChat));
389
+
390
+ } catch (Exception e) {
391
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
392
+ }
379
393
  }
380
394
 
381
395
  public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
@@ -385,48 +399,68 @@ public class LlamaCpp {
385
399
  return;
386
400
  }
387
401
 
388
- // This would typically perform the completion using llama.cpp
389
- // For now, return a basic completion result
390
- Map<String, Object> completionResult = new HashMap<>();
391
- completionResult.put("text", "Sample completion text");
392
- completionResult.put("reasoning_content", "");
393
- completionResult.put("tool_calls", new Object[0]);
394
- completionResult.put("content", "Sample completion text");
395
- completionResult.put("chat_format", 0);
396
- completionResult.put("tokens_predicted", 0);
397
- completionResult.put("tokens_evaluated", 0);
398
- completionResult.put("truncated", false);
399
- completionResult.put("stopped_eos", false);
400
- completionResult.put("stopped_word", "");
401
- completionResult.put("stopped_limit", 0);
402
- completionResult.put("stopping_word", "");
403
- completionResult.put("context_full", false);
404
- completionResult.put("interrupted", false);
405
- completionResult.put("tokens_cached", 0);
406
-
407
- Map<String, Object> timings = new HashMap<>();
408
- timings.put("prompt_n", 0);
409
- timings.put("prompt_ms", 0);
410
- timings.put("prompt_per_token_ms", 0);
411
- timings.put("prompt_per_second", 0);
412
- timings.put("predicted_n", 0);
413
- timings.put("predicted_ms", 0);
414
- timings.put("predicted_per_token_ms", 0);
415
- timings.put("predicted_per_second", 0);
416
-
417
- completionResult.put("timings", timings);
418
-
419
- callback.onResult(LlamaResult.success(completionResult));
402
+ try {
403
+ // Extract parameters from JSObject
404
+ String prompt = params.getString("prompt", "");
405
+ int nPredict = params.getInteger("n_predict", 128);
406
+ float temperature = params.has("temp") ? (float) params.getDouble("temp") : 0.8f;
407
+ float topP = params.has("top_p") ? (float) params.getDouble("top_p") : 0.95f;
408
+ int topK = params.getInteger("top_k", 40);
409
+ float repeatPenalty = params.has("repeat_penalty") ? (float) params.getDouble("repeat_penalty") : 1.1f;
410
+
411
+ // Call native completion
412
+ String result = completionNative(context.getNativeContextId(), prompt);
413
+
414
+ // Build completion result
415
+ Map<String, Object> completionResult = new HashMap<>();
416
+ completionResult.put("text", result);
417
+ completionResult.put("reasoning_content", "");
418
+ completionResult.put("tool_calls", new Object[0]);
419
+ completionResult.put("content", result);
420
+ completionResult.put("chat_format", 0);
421
+ completionResult.put("tokens_predicted", nPredict);
422
+ completionResult.put("tokens_evaluated", 0);
423
+ completionResult.put("truncated", false);
424
+ completionResult.put("stopped_eos", false);
425
+ completionResult.put("stopped_word", "");
426
+ completionResult.put("stopped_limit", 0);
427
+ completionResult.put("stopping_word", "");
428
+ completionResult.put("context_full", false);
429
+ completionResult.put("interrupted", false);
430
+ completionResult.put("tokens_cached", 0);
431
+
432
+ Map<String, Object> timings = new HashMap<>();
433
+ timings.put("prompt_n", 0);
434
+ timings.put("prompt_ms", 0);
435
+ timings.put("prompt_per_token_ms", 0);
436
+ timings.put("prompt_per_second", 0);
437
+ timings.put("predicted_n", nPredict);
438
+ timings.put("predicted_ms", 0);
439
+ timings.put("predicted_per_token_ms", 0);
440
+ timings.put("predicted_per_second", 0);
441
+
442
+ completionResult.put("timings", timings);
443
+
444
+ callback.onResult(LlamaResult.success(completionResult));
445
+
446
+ } catch (Exception e) {
447
+ callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
448
+ }
420
449
  }
421
450
 
422
451
  public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
423
- if (contexts.get(contextId) == null) {
452
+ LlamaContext context = contexts.get(contextId);
453
+ if (context == null) {
424
454
  callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
425
455
  return;
426
456
  }
427
457
 
428
- // This would typically stop any ongoing completion
429
- callback.onResult(LlamaResult.success(null));
458
+ try {
459
+ stopCompletionNative(context.getNativeContextId());
460
+ callback.onResult(LlamaResult.success(null));
461
+ } catch (Exception e) {
462
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
463
+ }
430
464
  }
431
465
 
432
466
  // MARK: - Session management
@@ -1,10 +1,13 @@
1
1
  package ai.annadata.plugin.capacitor;
2
2
 
3
3
  import com.getcapacitor.JSObject;
4
+ import com.getcapacitor.JSArray;
4
5
  import com.getcapacitor.Plugin;
5
6
  import com.getcapacitor.PluginCall;
6
7
  import com.getcapacitor.PluginMethod;
7
8
  import com.getcapacitor.annotation.CapacitorPlugin;
9
+ import java.util.Map;
10
+ import org.json.JSONException;
8
11
 
9
12
  @CapacitorPlugin(name = "LlamaCpp")
10
13
  public class LlamaCppPlugin extends Plugin {
@@ -40,12 +43,27 @@ public class LlamaCppPlugin extends Plugin {
40
43
  @PluginMethod
41
44
  public void modelInfo(PluginCall call) {
42
45
  String path = call.getString("path", "");
43
- String[] skip = call.getArray("skip", String.class);
44
- if (skip == null) skip = new String[0];
46
+ JSArray skipArray = call.getArray("skip");
47
+ String[] skip = new String[0];
48
+ if (skipArray != null) {
49
+ skip = new String[skipArray.length()];
50
+ for (int i = 0; i < skipArray.length(); i++) {
51
+ try {
52
+ skip[i] = skipArray.getString(i);
53
+ } catch (JSONException e) {
54
+ skip[i] = "";
55
+ }
56
+ }
57
+ }
45
58
 
46
59
  implementation.modelInfo(path, skip, result -> {
47
60
  if (result.isSuccess()) {
48
- call.resolve(result.getData());
61
+ JSObject jsResult = new JSObject();
62
+ Map<String, Object> data = result.getData();
63
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
64
+ jsResult.put(entry.getKey(), entry.getValue());
65
+ }
66
+ call.resolve(jsResult);
49
67
  } else {
50
68
  call.reject(result.getError().getMessage());
51
69
  }
@@ -59,7 +77,12 @@ public class LlamaCppPlugin extends Plugin {
59
77
 
60
78
  implementation.initContext(contextId, params, result -> {
61
79
  if (result.isSuccess()) {
62
- call.resolve(result.getData());
80
+ JSObject jsResult = new JSObject();
81
+ Map<String, Object> data = result.getData();
82
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
83
+ jsResult.put(entry.getKey(), entry.getValue());
84
+ }
85
+ call.resolve(jsResult);
63
86
  } else {
64
87
  call.reject(result.getError().getMessage());
65
88
  }
@@ -101,7 +124,12 @@ public class LlamaCppPlugin extends Plugin {
101
124
 
102
125
  implementation.getFormattedChat(contextId, messages, chatTemplate, params, result -> {
103
126
  if (result.isSuccess()) {
104
- call.resolve(result.getData());
127
+ JSObject jsResult = new JSObject();
128
+ Map<String, Object> data = result.getData();
129
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
130
+ jsResult.put(entry.getKey(), entry.getValue());
131
+ }
132
+ call.resolve(jsResult);
105
133
  } else {
106
134
  call.reject(result.getError().getMessage());
107
135
  }
@@ -115,7 +143,12 @@ public class LlamaCppPlugin extends Plugin {
115
143
 
116
144
  implementation.completion(contextId, params, result -> {
117
145
  if (result.isSuccess()) {
118
- call.resolve(result.getData());
146
+ JSObject jsResult = new JSObject();
147
+ Map<String, Object> data = result.getData();
148
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
149
+ jsResult.put(entry.getKey(), entry.getValue());
150
+ }
151
+ call.resolve(jsResult);
119
152
  } else {
120
153
  call.reject(result.getError().getMessage());
121
154
  }
@@ -140,11 +173,16 @@ public class LlamaCppPlugin extends Plugin {
140
173
  @PluginMethod
141
174
  public void loadSession(PluginCall call) {
142
175
  int contextId = call.getInt("contextId", 0);
143
- String filepath = call.getString("filepath", "");
176
+ String path = call.getString("path", "");
144
177
 
145
- implementation.loadSession(contextId, filepath, result -> {
178
+ implementation.loadSession(contextId, path, result -> {
146
179
  if (result.isSuccess()) {
147
- call.resolve(result.getData());
180
+ JSObject jsResult = new JSObject();
181
+ Map<String, Object> data = result.getData();
182
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
183
+ jsResult.put(entry.getKey(), entry.getValue());
184
+ }
185
+ call.resolve(jsResult);
148
186
  } else {
149
187
  call.reject(result.getError().getMessage());
150
188
  }
@@ -154,10 +192,10 @@ public class LlamaCppPlugin extends Plugin {
154
192
  @PluginMethod
155
193
  public void saveSession(PluginCall call) {
156
194
  int contextId = call.getInt("contextId", 0);
157
- String filepath = call.getString("filepath", "");
195
+ String path = call.getString("path", "");
158
196
  int size = call.getInt("size", -1);
159
197
 
160
- implementation.saveSession(contextId, filepath, size, result -> {
198
+ implementation.saveSession(contextId, path, size, result -> {
161
199
  if (result.isSuccess()) {
162
200
  JSObject ret = new JSObject();
163
201
  ret.put("tokensSaved", result.getData());
@@ -174,12 +212,27 @@ public class LlamaCppPlugin extends Plugin {
174
212
  public void tokenize(PluginCall call) {
175
213
  int contextId = call.getInt("contextId", 0);
176
214
  String text = call.getString("text", "");
177
- String[] imagePaths = call.getArray("imagePaths", String.class);
178
- if (imagePaths == null) imagePaths = new String[0];
215
+ JSArray imagePathsArray = call.getArray("imagePaths");
216
+ String[] imagePaths = new String[0];
217
+ if (imagePathsArray != null) {
218
+ imagePaths = new String[imagePathsArray.length()];
219
+ for (int i = 0; i < imagePathsArray.length(); i++) {
220
+ try {
221
+ imagePaths[i] = imagePathsArray.getString(i);
222
+ } catch (JSONException e) {
223
+ imagePaths[i] = "";
224
+ }
225
+ }
226
+ }
179
227
 
180
228
  implementation.tokenize(contextId, text, imagePaths, result -> {
181
229
  if (result.isSuccess()) {
182
- call.resolve(result.getData());
230
+ JSObject jsResult = new JSObject();
231
+ Map<String, Object> data = result.getData();
232
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
233
+ jsResult.put(entry.getKey(), entry.getValue());
234
+ }
235
+ call.resolve(jsResult);
183
236
  } else {
184
237
  call.reject(result.getError().getMessage());
185
238
  }
@@ -189,8 +242,18 @@ public class LlamaCppPlugin extends Plugin {
189
242
  @PluginMethod
190
243
  public void detokenize(PluginCall call) {
191
244
  int contextId = call.getInt("contextId", 0);
192
- Integer[] tokens = call.getArray("tokens", Integer.class);
193
- if (tokens == null) tokens = new Integer[0];
245
+ JSArray tokensArray = call.getArray("tokens");
246
+ Integer[] tokens = new Integer[0];
247
+ if (tokensArray != null) {
248
+ tokens = new Integer[tokensArray.length()];
249
+ for (int i = 0; i < tokensArray.length(); i++) {
250
+ try {
251
+ tokens[i] = tokensArray.getInt(i);
252
+ } catch (JSONException e) {
253
+ tokens[i] = 0;
254
+ }
255
+ }
256
+ }
194
257
 
195
258
  implementation.detokenize(contextId, tokens, result -> {
196
259
  if (result.isSuccess()) {
@@ -213,7 +276,12 @@ public class LlamaCppPlugin extends Plugin {
213
276
 
214
277
  implementation.embedding(contextId, text, params, result -> {
215
278
  if (result.isSuccess()) {
216
- call.resolve(result.getData());
279
+ JSObject jsResult = new JSObject();
280
+ Map<String, Object> data = result.getData();
281
+ for (Map.Entry<String, Object> entry : data.entrySet()) {
282
+ jsResult.put(entry.getKey(), entry.getValue());
283
+ }
284
+ call.resolve(jsResult);
217
285
  } else {
218
286
  call.reject(result.getError().getMessage());
219
287
  }
@@ -224,8 +292,18 @@ public class LlamaCppPlugin extends Plugin {
224
292
  public void rerank(PluginCall call) {
225
293
  int contextId = call.getInt("contextId", 0);
226
294
  String query = call.getString("query", "");
227
- String[] documents = call.getArray("documents", String.class);
228
- if (documents == null) documents = new String[0];
295
+ JSArray documentsArray = call.getArray("documents");
296
+ String[] documents = new String[0];
297
+ if (documentsArray != null) {
298
+ documents = new String[documentsArray.length()];
299
+ for (int i = 0; i < documentsArray.length(); i++) {
300
+ try {
301
+ documents[i] = documentsArray.getString(i);
302
+ } catch (JSONException e) {
303
+ documents[i] = "";
304
+ }
305
+ }
306
+ }
229
307
  JSObject params = call.getObject("params");
230
308
 
231
309
  implementation.rerank(contextId, query, documents, params, result -> {
@@ -265,8 +343,15 @@ public class LlamaCppPlugin extends Plugin {
265
343
  @PluginMethod
266
344
  public void applyLoraAdapters(PluginCall call) {
267
345
  int contextId = call.getInt("contextId", 0);
268
- JSObject[] loraAdapters = call.getArray("loraAdapters", JSObject.class);
269
- if (loraAdapters == null) loraAdapters = new JSObject[0];
346
+ JSArray loraAdaptersArray = call.getArray("loraAdapters");
347
+ JSObject[] loraAdapters = new JSObject[0];
348
+ if (loraAdaptersArray != null) {
349
+ loraAdapters = new JSObject[loraAdaptersArray.length()];
350
+ for (int i = 0; i < loraAdaptersArray.length(); i++) {
351
+ // For now, create empty JSObjects since the exact method is unclear
352
+ loraAdapters[i] = new JSObject();
353
+ }
354
+ }
270
355
 
271
356
  implementation.applyLoraAdapters(contextId, loraAdapters, result -> {
272
357
  if (result.isSuccess()) {
@@ -346,7 +431,9 @@ public class LlamaCppPlugin extends Plugin {
346
431
 
347
432
  implementation.getMultimodalSupport(contextId, result -> {
348
433
  if (result.isSuccess()) {
349
- call.resolve(result.getData());
434
+ JSObject ret = new JSObject();
435
+ ret.put("support", result.getData());
436
+ call.resolve(ret);
350
437
  } else {
351
438
  call.reject(result.getError().getMessage());
352
439
  }
@@ -409,7 +496,9 @@ public class LlamaCppPlugin extends Plugin {
409
496
 
410
497
  implementation.getFormattedAudioCompletion(contextId, speakerJsonStr, textToSpeak, result -> {
411
498
  if (result.isSuccess()) {
412
- call.resolve(result.getData());
499
+ JSObject ret = new JSObject();
500
+ ret.put("completion", result.getData());
501
+ call.resolve(ret);
413
502
  } else {
414
503
  call.reject(result.getError().getMessage());
415
504
  }
@@ -435,8 +524,18 @@ public class LlamaCppPlugin extends Plugin {
435
524
  @PluginMethod
436
525
  public void decodeAudioTokens(PluginCall call) {
437
526
  int contextId = call.getInt("contextId", 0);
438
- Integer[] tokens = call.getArray("tokens", Integer.class);
439
- if (tokens == null) tokens = new Integer[0];
527
+ JSArray tokensArray = call.getArray("tokens");
528
+ Integer[] tokens = new Integer[0];
529
+ if (tokensArray != null) {
530
+ tokens = new Integer[tokensArray.length()];
531
+ for (int i = 0; i < tokensArray.length(); i++) {
532
+ try {
533
+ tokens[i] = tokensArray.getInt(i);
534
+ } catch (JSONException e) {
535
+ tokens[i] = 0;
536
+ }
537
+ }
538
+ }
440
539
 
441
540
  implementation.decodeAudioTokens(contextId, tokens, result -> {
442
541
  if (result.isSuccess()) {
@@ -123,7 +123,7 @@ jclass find_class(JNIEnv* env, const char* name) {
123
123
  }
124
124
 
125
125
  // Global context storage
126
- static std::map<jlong, std::unique_ptr<llama_rn_context>> contexts;
126
+ static std::map<jlong, std::unique_ptr<rnllama::llama_rn_context>> contexts;
127
127
  static jlong next_context_id = 1;
128
128
 
129
129
  extern "C" {
@@ -136,15 +136,24 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
136
136
  std::string model_path_str = jstring_to_string(env, model_path);
137
137
 
138
138
  // Create new context
139
- auto context = std::make_unique<llama_rn_context>();
139
+ auto context = std::make_unique<rnllama::llama_rn_context>();
140
140
 
141
- // Initialize common parameters (simplified)
141
+ // Initialize common parameters
142
142
  common_params cparams;
143
143
  cparams.model = model_path_str;
144
144
  cparams.n_ctx = 2048;
145
145
  cparams.n_batch = 512;
146
146
  cparams.n_threads = 4;
147
147
  cparams.n_gpu_layers = 0;
148
+ cparams.rope_freq_base = 10000.0f;
149
+ cparams.rope_freq_scale = 1.0f;
150
+ cparams.mul_mat_q = true;
151
+ cparams.f16_kv = true;
152
+ cparams.logits_all = false;
153
+ cparams.embedding = false;
154
+ cparams.use_mmap = true;
155
+ cparams.use_mlock = false;
156
+ cparams.numa = GGML_NUMA_STRATEGY_DISABLED;
148
157
 
149
158
  // Load model
150
159
  if (!context->loadModel(cparams)) {
@@ -195,8 +204,76 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
195
204
 
196
205
  std::string prompt_str = jstring_to_string(env, prompt);
197
206
 
198
- // Simplified completion (placeholder implementation)
199
- std::string result = "Generated text for: " + prompt_str;
207
+ // Get the context
208
+ rnllama::llama_rn_context* context = it->second.get();
209
+
210
+ // Initialize completion if not already done
211
+ if (!context->completion) {
212
+ context->completion = new rnllama::llama_rn_context_completion(context);
213
+ }
214
+
215
+ // Set up completion parameters
216
+ completion_params cparams;
217
+ cparams.prompt = prompt_str;
218
+ cparams.n_predict = 128;
219
+ cparams.n_keep = 0;
220
+ cparams.n_discard = -1;
221
+ cparams.n_probs = 0;
222
+ cparams.logit_bias.clear();
223
+ cparams.top_k = 40;
224
+ cparams.top_p = 0.95f;
225
+ cparams.tfs_z = 1.0f;
226
+ cparams.typical_p = 1.0f;
227
+ cparams.temp = 0.8f;
228
+ cparams.repeat_penalty = 1.1f;
229
+ cparams.repeat_last_n = 64;
230
+ cparams.frequency_penalty = 0.0f;
231
+ cparams.presence_penalty = 0.0f;
232
+ cparams.mirostat = 0;
233
+ cparams.mirostat_tau = 5.0f;
234
+ cparams.mirostat_eta = 0.1f;
235
+ cparams.penalize_nl = true;
236
+ cparams.grammar = "";
237
+ cparams.grammar_penalty.clear();
238
+ cparams.antiprompt.clear();
239
+ cparams.seed = -1;
240
+ cparams.ignore_eos = false;
241
+ cparams.stop_sequences.clear();
242
+ cparams.streaming = false;
243
+
244
+ // Perform completion
245
+ std::string result;
246
+ try {
247
+ // Tokenize the prompt
248
+ auto tokenize_result = context->tokenize(prompt_str, {});
249
+
250
+ // Set up completion
251
+ context->completion->rewind();
252
+ context->completion->beginCompletion();
253
+
254
+ // Process tokens
255
+ for (size_t i = 0; i < tokenize_result.tokens.size(); i++) {
256
+ llama_batch_add(&context->completion->embd, tokenize_result.tokens[i], i, {0}, false);
257
+ }
258
+
259
+ // Generate completion
260
+ std::string generated_text;
261
+ for (int i = 0; i < cparams.n_predict; i++) {
262
+ auto token_output = context->completion->nextToken();
263
+ if (token_output.tok == llama_token_eos(context->ctx)) {
264
+ break;
265
+ }
266
+
267
+ std::string token_text = rnllama::tokens_to_output_formatted_string(context->ctx, token_output.tok);
268
+ generated_text += token_text;
269
+ }
270
+
271
+ result = generated_text;
272
+
273
+ } catch (const std::exception& e) {
274
+ LOGE("Completion error: %s", e.what());
275
+ result = "Error during completion: " + std::string(e.what());
276
+ }
200
277
 
201
278
  LOGI("Completion for context %lld: %s", context_id, prompt_str.c_str());
202
279
  return string_to_jstring(env, result);
@@ -215,7 +292,10 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
215
292
  try {
216
293
  auto it = contexts.find(context_id);
217
294
  if (it != contexts.end()) {
218
- // Stop completion logic would go here
295
+ rnllama::llama_rn_context* context = it->second.get();
296
+ if (context->completion) {
297
+ context->completion->is_interrupted = true;
298
+ }
219
299
  LOGI("Stopped completion for context %lld", context_id);
220
300
  }
221
301
  } catch (const std::exception& e) {
@@ -238,8 +318,10 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
238
318
  std::string messages_str = jstring_to_string(env, messages);
239
319
  std::string template_str = jstring_to_string(env, chat_template);
240
320
 
241
- // Simplified chat formatting (placeholder implementation)
242
- std::string result = "Formatted chat: " + messages_str;
321
+ rnllama::llama_rn_context* context = it->second.get();
322
+
323
+ // Format chat using the context's method
324
+ std::string result = context->getFormattedChat(messages_str, template_str);
243
325
 
244
326
  LOGI("Formatted chat for context %lld", context_id);
245
327
  return string_to_jstring(env, result);
@@ -256,7 +338,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
256
338
  JNIEnv* env, jobject thiz, jboolean enabled) {
257
339
 
258
340
  try {
259
- rnllama_verbose = jboolean_to_bool(enabled);
341
+ rnllama::rnllama_verbose = jboolean_to_bool(enabled);
260
342
  LOGI("Native logging %s", enabled ? "enabled" : "disabled");
261
343
  return bool_to_jboolean(true);
262
344
  } catch (const std::exception& e) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llama-cpp-capacitor",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
5
5
  "main": "dist/plugin.cjs.js",
6
6
  "module": "dist/esm/index.js",