llama-cpp-capacitor 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +11 -0
- package/android/src/main/CMakeLists.txt +4 -6
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCpp.java +166 -126
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCppPlugin.java +9 -0
- package/android/src/main/jni-utils.h +5 -5
- package/android/src/main/jni.cpp +30 -15
- package/package.json +1 -1
package/android/build.gradle
CHANGED
|
@@ -26,6 +26,17 @@ android {
|
|
|
26
26
|
versionCode 1
|
|
27
27
|
versionName "1.0"
|
|
28
28
|
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
|
|
29
|
+
|
|
30
|
+
ndk {
|
|
31
|
+
abiFilters 'arm64-v8a'
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
externalNativeBuild {
|
|
36
|
+
cmake {
|
|
37
|
+
path "src/main/CMakeLists.txt"
|
|
38
|
+
version "3.22.1"
|
|
39
|
+
}
|
|
29
40
|
}
|
|
30
41
|
buildTypes {
|
|
31
42
|
release {
|
|
@@ -55,6 +55,7 @@ set(
|
|
|
55
55
|
${LLAMACPP_LIB_DIR}/llama.cpp
|
|
56
56
|
${LLAMACPP_LIB_DIR}/llama-model.cpp
|
|
57
57
|
${LLAMACPP_LIB_DIR}/llama-model-loader.cpp
|
|
58
|
+
${LLAMACPP_LIB_DIR}/llama-model-saver.cpp
|
|
58
59
|
${LLAMACPP_LIB_DIR}/llama-kv-cache.cpp
|
|
59
60
|
${LLAMACPP_LIB_DIR}/llama-kv-cache-iswa.cpp
|
|
60
61
|
${LLAMACPP_LIB_DIR}/llama-memory-hybrid.cpp
|
|
@@ -85,12 +86,9 @@ set(
|
|
|
85
86
|
find_library(LOG_LIB log)
|
|
86
87
|
|
|
87
88
|
function(build_library target_name arch cpu_flags)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
${LLAMACPP_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
|
|
92
|
-
)
|
|
93
|
-
endif ()
|
|
89
|
+
set(SOURCE_FILES_ARCH "")
|
|
90
|
+
# For now, use generic implementation for all architectures
|
|
91
|
+
# This ensures we have all required functions
|
|
94
92
|
|
|
95
93
|
add_library(
|
|
96
94
|
${target_name}
|
|
@@ -51,6 +51,7 @@ class LlamaContext {
|
|
|
51
51
|
private LlamaModel model;
|
|
52
52
|
private boolean isMultimodalEnabled = false;
|
|
53
53
|
private boolean isVocoderEnabled = false;
|
|
54
|
+
private long nativeContextId = -1;
|
|
54
55
|
|
|
55
56
|
public LlamaContext(int id) {
|
|
56
57
|
this.id = id;
|
|
@@ -83,6 +84,14 @@ class LlamaContext {
|
|
|
83
84
|
public void setVocoderEnabled(boolean vocoderEnabled) {
|
|
84
85
|
isVocoderEnabled = vocoderEnabled;
|
|
85
86
|
}
|
|
87
|
+
|
|
88
|
+
public long getNativeContextId() {
|
|
89
|
+
return nativeContextId;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
public void setNativeContextId(long nativeContextId) {
|
|
93
|
+
this.nativeContextId = nativeContextId;
|
|
94
|
+
}
|
|
86
95
|
}
|
|
87
96
|
|
|
88
97
|
class LlamaModel {
|
|
@@ -231,16 +240,39 @@ public class LlamaCpp {
|
|
|
231
240
|
private int contextLimit = 10;
|
|
232
241
|
private boolean nativeLogEnabled = false;
|
|
233
242
|
|
|
243
|
+
// Native method declarations
|
|
244
|
+
private native long initContextNative(String modelPath, JSObject params);
|
|
245
|
+
private native void releaseContextNative(long nativeContextId);
|
|
246
|
+
private native String completionNative(long contextId, String prompt);
|
|
247
|
+
private native void stopCompletionNative(long contextId);
|
|
248
|
+
private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
|
|
249
|
+
private native boolean toggleNativeLogNative(boolean enabled);
|
|
250
|
+
|
|
251
|
+
static {
|
|
252
|
+
try {
|
|
253
|
+
System.loadLibrary("llama-cpp");
|
|
254
|
+
Log.i(TAG, "Successfully loaded llama-cpp native library");
|
|
255
|
+
} catch (UnsatisfiedLinkError e) {
|
|
256
|
+
Log.e(TAG, "Failed to load llama-cpp native library: " + e.getMessage());
|
|
257
|
+
throw e;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
234
261
|
// MARK: - Core initialization and management
|
|
235
262
|
|
|
236
263
|
public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
264
|
+
try {
|
|
265
|
+
boolean result = toggleNativeLogNative(enabled);
|
|
266
|
+
nativeLogEnabled = enabled;
|
|
267
|
+
if (enabled) {
|
|
268
|
+
Log.i(TAG, "Native logging enabled");
|
|
269
|
+
} else {
|
|
270
|
+
Log.i(TAG, "Native logging disabled");
|
|
271
|
+
}
|
|
272
|
+
callback.onResult(LlamaResult.success(null));
|
|
273
|
+
} catch (Exception e) {
|
|
274
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
|
|
242
275
|
}
|
|
243
|
-
callback.onResult(LlamaResult.success(null));
|
|
244
276
|
}
|
|
245
277
|
|
|
246
278
|
public void setContextLimit(int limit, LlamaCallback<Void> callback) {
|
|
@@ -268,89 +300,70 @@ public class LlamaCpp {
|
|
|
268
300
|
return;
|
|
269
301
|
}
|
|
270
302
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
303
|
+
try {
|
|
304
|
+
// Extract parameters
|
|
305
|
+
String modelPath = params.getString("model");
|
|
306
|
+
if (modelPath == null || modelPath.isEmpty()) {
|
|
307
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Call native initialization
|
|
312
|
+
long nativeContextId = initContextNative(modelPath, params);
|
|
313
|
+
if (nativeContextId < 0) {
|
|
314
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Create Java context wrapper
|
|
319
|
+
LlamaContext context = new LlamaContext(contextId);
|
|
320
|
+
context.setNativeContextId(nativeContextId);
|
|
321
|
+
contexts.put(contextId, context);
|
|
322
|
+
|
|
323
|
+
// Return context info
|
|
324
|
+
Map<String, Object> contextInfo = new HashMap<>();
|
|
325
|
+
contextInfo.put("contextId", contextId);
|
|
326
|
+
contextInfo.put("gpu", false);
|
|
327
|
+
contextInfo.put("reasonNoGPU", "Currently not supported");
|
|
328
|
+
|
|
329
|
+
Map<String, Object> modelInfo = new HashMap<>();
|
|
330
|
+
modelInfo.put("desc", "Loaded model");
|
|
331
|
+
modelInfo.put("size", 0);
|
|
332
|
+
modelInfo.put("nEmbd", 0);
|
|
333
|
+
modelInfo.put("nParams", 0);
|
|
334
|
+
modelInfo.put("path", modelPath);
|
|
335
|
+
|
|
336
|
+
contextInfo.put("model", modelInfo);
|
|
337
|
+
contextInfo.put("androidLib", "llama-cpp");
|
|
338
|
+
|
|
339
|
+
callback.onResult(LlamaResult.success(contextInfo));
|
|
340
|
+
|
|
341
|
+
} catch (Exception e) {
|
|
342
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
|
|
276
343
|
}
|
|
277
|
-
|
|
278
|
-
// Create context
|
|
279
|
-
LlamaContext context = new LlamaContext(contextId);
|
|
280
|
-
|
|
281
|
-
// Create model info (this would typically load from GGUF file)
|
|
282
|
-
MinjaCaps defaultCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
283
|
-
MinjaCaps toolUseCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
284
|
-
MinjaTemplates minja = new MinjaTemplates(true, defaultCaps, true, toolUseCaps);
|
|
285
|
-
ChatTemplates chatTemplates = new ChatTemplates(true, minja);
|
|
286
|
-
|
|
287
|
-
LlamaModel model = new LlamaModel(
|
|
288
|
-
modelPath,
|
|
289
|
-
"Sample model",
|
|
290
|
-
0,
|
|
291
|
-
0,
|
|
292
|
-
0,
|
|
293
|
-
chatTemplates,
|
|
294
|
-
new HashMap<>()
|
|
295
|
-
);
|
|
296
|
-
|
|
297
|
-
context.setModel(model);
|
|
298
|
-
contexts.put(contextId, context);
|
|
299
|
-
|
|
300
|
-
// Return context info
|
|
301
|
-
Map<String, Object> contextInfo = new HashMap<>();
|
|
302
|
-
contextInfo.put("contextId", contextId);
|
|
303
|
-
contextInfo.put("gpu", false);
|
|
304
|
-
contextInfo.put("reasonNoGPU", "Not implemented");
|
|
305
|
-
|
|
306
|
-
Map<String, Object> modelInfo = new HashMap<>();
|
|
307
|
-
modelInfo.put("desc", model.getDesc());
|
|
308
|
-
modelInfo.put("size", model.getSize());
|
|
309
|
-
modelInfo.put("nEmbd", model.getNEmbd());
|
|
310
|
-
modelInfo.put("nParams", model.getNParams());
|
|
311
|
-
|
|
312
|
-
Map<String, Object> chatTemplatesInfo = new HashMap<>();
|
|
313
|
-
chatTemplatesInfo.put("llamaChat", model.getChatTemplates().isLlamaChat());
|
|
314
|
-
|
|
315
|
-
Map<String, Object> minjaInfo = new HashMap<>();
|
|
316
|
-
minjaInfo.put("default", model.getChatTemplates().getMinja().isDefault());
|
|
317
|
-
|
|
318
|
-
Map<String, Object> defaultCapsInfo = new HashMap<>();
|
|
319
|
-
defaultCapsInfo.put("tools", model.getChatTemplates().getMinja().getDefaultCaps().isTools());
|
|
320
|
-
defaultCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isToolCalls());
|
|
321
|
-
defaultCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getDefaultCaps().isToolResponses());
|
|
322
|
-
defaultCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getDefaultCaps().isSystemRole());
|
|
323
|
-
defaultCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isParallelToolCalls());
|
|
324
|
-
defaultCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getDefaultCaps().isToolCallId());
|
|
325
|
-
|
|
326
|
-
Map<String, Object> toolUseCapsInfo = new HashMap<>();
|
|
327
|
-
toolUseCapsInfo.put("tools", model.getChatTemplates().getMinja().getToolUseCaps().isTools());
|
|
328
|
-
toolUseCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isToolCalls());
|
|
329
|
-
toolUseCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getToolUseCaps().isToolResponses());
|
|
330
|
-
toolUseCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getToolUseCaps().isSystemRole());
|
|
331
|
-
toolUseCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isParallelToolCalls());
|
|
332
|
-
toolUseCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getToolUseCaps().isToolCallId());
|
|
333
|
-
|
|
334
|
-
minjaInfo.put("defaultCaps", defaultCapsInfo);
|
|
335
|
-
minjaInfo.put("toolUse", model.getChatTemplates().getMinja().isToolUse());
|
|
336
|
-
minjaInfo.put("toolUseCaps", toolUseCapsInfo);
|
|
337
|
-
|
|
338
|
-
chatTemplatesInfo.put("minja", minjaInfo);
|
|
339
|
-
modelInfo.put("chatTemplates", chatTemplatesInfo);
|
|
340
|
-
modelInfo.put("metadata", model.getMetadata());
|
|
341
|
-
modelInfo.put("isChatTemplateSupported", true);
|
|
342
|
-
|
|
343
|
-
contextInfo.put("model", modelInfo);
|
|
344
|
-
|
|
345
|
-
callback.onResult(LlamaResult.success(contextInfo));
|
|
346
344
|
}
|
|
347
345
|
|
|
348
346
|
public void releaseContext(int contextId, LlamaCallback<Void> callback) {
|
|
349
|
-
|
|
347
|
+
LlamaContext context = contexts.get(contextId);
|
|
348
|
+
if (context == null) {
|
|
350
349
|
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
351
350
|
return;
|
|
352
351
|
}
|
|
353
|
-
|
|
352
|
+
|
|
353
|
+
try {
|
|
354
|
+
// Release native context
|
|
355
|
+
if (context.getNativeContextId() >= 0) {
|
|
356
|
+
releaseContextNative(context.getNativeContextId());
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Remove from Java context map
|
|
360
|
+
contexts.remove(contextId);
|
|
361
|
+
|
|
362
|
+
callback.onResult(LlamaResult.success(null));
|
|
363
|
+
|
|
364
|
+
} catch (Exception e) {
|
|
365
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
|
|
366
|
+
}
|
|
354
367
|
}
|
|
355
368
|
|
|
356
369
|
public void releaseAllContexts(LlamaCallback<Void> callback) {
|
|
@@ -367,15 +380,22 @@ public class LlamaCpp {
|
|
|
367
380
|
return;
|
|
368
381
|
}
|
|
369
382
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
383
|
+
try {
|
|
384
|
+
// Call native formatted chat
|
|
385
|
+
String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
|
|
386
|
+
|
|
387
|
+
// Build formatted chat result
|
|
388
|
+
Map<String, Object> formattedChat = new HashMap<>();
|
|
389
|
+
formattedChat.put("type", "llama-chat");
|
|
390
|
+
formattedChat.put("prompt", result);
|
|
391
|
+
formattedChat.put("has_media", false);
|
|
392
|
+
formattedChat.put("media_paths", new String[0]);
|
|
393
|
+
|
|
394
|
+
callback.onResult(LlamaResult.success(formattedChat));
|
|
395
|
+
|
|
396
|
+
} catch (Exception e) {
|
|
397
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
|
|
398
|
+
}
|
|
379
399
|
}
|
|
380
400
|
|
|
381
401
|
public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
@@ -385,48 +405,68 @@ public class LlamaCpp {
|
|
|
385
405
|
return;
|
|
386
406
|
}
|
|
387
407
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
408
|
+
try {
|
|
409
|
+
// Extract parameters from JSObject
|
|
410
|
+
String prompt = params.getString("prompt", "");
|
|
411
|
+
int nPredict = params.getInteger("n_predict", 128);
|
|
412
|
+
float temperature = params.has("temp") ? (float) params.getDouble("temp") : 0.8f;
|
|
413
|
+
float topP = params.has("top_p") ? (float) params.getDouble("top_p") : 0.95f;
|
|
414
|
+
int topK = params.getInteger("top_k", 40);
|
|
415
|
+
float repeatPenalty = params.has("repeat_penalty") ? (float) params.getDouble("repeat_penalty") : 1.1f;
|
|
416
|
+
|
|
417
|
+
// Call native completion
|
|
418
|
+
String result = completionNative(context.getNativeContextId(), prompt);
|
|
419
|
+
|
|
420
|
+
// Build completion result
|
|
421
|
+
Map<String, Object> completionResult = new HashMap<>();
|
|
422
|
+
completionResult.put("text", result);
|
|
423
|
+
completionResult.put("reasoning_content", "");
|
|
424
|
+
completionResult.put("tool_calls", new Object[0]);
|
|
425
|
+
completionResult.put("content", result);
|
|
426
|
+
completionResult.put("chat_format", 0);
|
|
427
|
+
completionResult.put("tokens_predicted", nPredict);
|
|
428
|
+
completionResult.put("tokens_evaluated", 0);
|
|
429
|
+
completionResult.put("truncated", false);
|
|
430
|
+
completionResult.put("stopped_eos", false);
|
|
431
|
+
completionResult.put("stopped_word", "");
|
|
432
|
+
completionResult.put("stopped_limit", 0);
|
|
433
|
+
completionResult.put("stopping_word", "");
|
|
434
|
+
completionResult.put("context_full", false);
|
|
435
|
+
completionResult.put("interrupted", false);
|
|
436
|
+
completionResult.put("tokens_cached", 0);
|
|
437
|
+
|
|
438
|
+
Map<String, Object> timings = new HashMap<>();
|
|
439
|
+
timings.put("prompt_n", 0);
|
|
440
|
+
timings.put("prompt_ms", 0);
|
|
441
|
+
timings.put("prompt_per_token_ms", 0);
|
|
442
|
+
timings.put("prompt_per_second", 0);
|
|
443
|
+
timings.put("predicted_n", nPredict);
|
|
444
|
+
timings.put("predicted_ms", 0);
|
|
445
|
+
timings.put("predicted_per_token_ms", 0);
|
|
446
|
+
timings.put("predicted_per_second", 0);
|
|
447
|
+
|
|
448
|
+
completionResult.put("timings", timings);
|
|
449
|
+
|
|
450
|
+
callback.onResult(LlamaResult.success(completionResult));
|
|
451
|
+
|
|
452
|
+
} catch (Exception e) {
|
|
453
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
|
|
454
|
+
}
|
|
420
455
|
}
|
|
421
456
|
|
|
422
457
|
public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
|
|
423
|
-
|
|
458
|
+
LlamaContext context = contexts.get(contextId);
|
|
459
|
+
if (context == null) {
|
|
424
460
|
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
425
461
|
return;
|
|
426
462
|
}
|
|
427
463
|
|
|
428
|
-
|
|
429
|
-
|
|
464
|
+
try {
|
|
465
|
+
stopCompletionNative(context.getNativeContextId());
|
|
466
|
+
callback.onResult(LlamaResult.success(null));
|
|
467
|
+
} catch (Exception e) {
|
|
468
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
|
|
469
|
+
}
|
|
430
470
|
}
|
|
431
471
|
|
|
432
472
|
// MARK: - Session management
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
package ai.annadata.plugin.capacitor;
|
|
2
2
|
|
|
3
|
+
import android.util.Log;
|
|
3
4
|
import com.getcapacitor.JSObject;
|
|
4
5
|
import com.getcapacitor.JSArray;
|
|
5
6
|
import com.getcapacitor.Plugin;
|
|
@@ -11,9 +12,16 @@ import org.json.JSONException;
|
|
|
11
12
|
|
|
12
13
|
@CapacitorPlugin(name = "LlamaCpp")
|
|
13
14
|
public class LlamaCppPlugin extends Plugin {
|
|
15
|
+
private static final String TAG = "LlamaCppPlugin";
|
|
14
16
|
|
|
15
17
|
private LlamaCpp implementation = new LlamaCpp();
|
|
16
18
|
|
|
19
|
+
@Override
|
|
20
|
+
public void load() {
|
|
21
|
+
super.load();
|
|
22
|
+
Log.i(TAG, "LlamaCppPlugin loaded successfully");
|
|
23
|
+
}
|
|
24
|
+
|
|
17
25
|
// MARK: - Core initialization and management
|
|
18
26
|
|
|
19
27
|
@PluginMethod
|
|
@@ -72,6 +80,7 @@ public class LlamaCppPlugin extends Plugin {
|
|
|
72
80
|
|
|
73
81
|
@PluginMethod
|
|
74
82
|
public void initContext(PluginCall call) {
|
|
83
|
+
Log.i(TAG, "initContext called with contextId: " + call.getInt("contextId", 0));
|
|
75
84
|
int contextId = call.getInt("contextId", 0);
|
|
76
85
|
JSObject params = call.getObject("params", new JSObject());
|
|
77
86
|
|
|
@@ -87,16 +87,16 @@ void set_static_field(JNIEnv* env, jclass clazz, jfieldID field, ...);
|
|
|
87
87
|
jobject get_static_field(JNIEnv* env, jclass clazz, jfieldID field);
|
|
88
88
|
|
|
89
89
|
// Convert llama_rn_context to jobject
|
|
90
|
-
jobject llama_context_to_jobject(JNIEnv* env, const llama_rn_context* context);
|
|
90
|
+
jobject llama_context_to_jobject(JNIEnv* env, const rnllama::llama_rn_context* context);
|
|
91
91
|
|
|
92
92
|
// Convert jobject to llama_rn_context
|
|
93
|
-
llama_rn_context* jobject_to_llama_context(JNIEnv* env, jobject obj);
|
|
93
|
+
rnllama::llama_rn_context* jobject_to_llama_context(JNIEnv* env, jobject obj);
|
|
94
94
|
|
|
95
95
|
// Convert completion result to jobject
|
|
96
|
-
jobject completion_result_to_jobject(JNIEnv* env, const completion_token_output& result);
|
|
96
|
+
jobject completion_result_to_jobject(JNIEnv* env, const rnllama::completion_token_output& result);
|
|
97
97
|
|
|
98
98
|
// Convert jobject to completion parameters
|
|
99
|
-
|
|
99
|
+
common_params jobject_to_completion_params(JNIEnv* env, jobject obj);
|
|
100
100
|
|
|
101
101
|
// Convert chat parameters to jobject
|
|
102
102
|
jobject chat_params_to_jobject(JNIEnv* env, const common_chat_params& params);
|
|
@@ -105,7 +105,7 @@ jobject chat_params_to_jobject(JNIEnv* env, const common_chat_params& params);
|
|
|
105
105
|
common_chat_params jobject_to_chat_params(JNIEnv* env, jobject obj);
|
|
106
106
|
|
|
107
107
|
// Convert tokenize result to jobject
|
|
108
|
-
jobject tokenize_result_to_jobject(JNIEnv* env, const llama_rn_tokenize_result& result);
|
|
108
|
+
jobject tokenize_result_to_jobject(JNIEnv* env, const rnllama::llama_rn_tokenize_result& result);
|
|
109
109
|
|
|
110
110
|
// Convert embedding result to jobject
|
|
111
111
|
jobject embedding_result_to_jobject(JNIEnv* env, const std::vector<float>& embedding);
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
#include <cstring>
|
|
5
5
|
#include <memory>
|
|
6
6
|
|
|
7
|
+
// Add missing symbol
|
|
8
|
+
namespace rnllama {
|
|
9
|
+
bool rnllama_verbose = false;
|
|
10
|
+
}
|
|
11
|
+
|
|
7
12
|
#define LOG_TAG "LlamaCpp"
|
|
8
13
|
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
|
9
14
|
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
|
|
@@ -123,7 +128,7 @@ jclass find_class(JNIEnv* env, const char* name) {
|
|
|
123
128
|
}
|
|
124
129
|
|
|
125
130
|
// Global context storage
|
|
126
|
-
static std::map<jlong, std::unique_ptr<llama_rn_context>> contexts;
|
|
131
|
+
static std::map<jlong, std::unique_ptr<rnllama::llama_rn_context>> contexts;
|
|
127
132
|
static jlong next_context_id = 1;
|
|
128
133
|
|
|
129
134
|
extern "C" {
|
|
@@ -136,15 +141,19 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
|
|
|
136
141
|
std::string model_path_str = jstring_to_string(env, model_path);
|
|
137
142
|
|
|
138
143
|
// Create new context
|
|
139
|
-
auto context = std::make_unique<llama_rn_context>();
|
|
144
|
+
auto context = std::make_unique<rnllama::llama_rn_context>();
|
|
140
145
|
|
|
141
|
-
// Initialize common parameters
|
|
146
|
+
// Initialize common parameters
|
|
142
147
|
common_params cparams;
|
|
143
|
-
cparams.model = model_path_str;
|
|
148
|
+
cparams.model.path = model_path_str;
|
|
144
149
|
cparams.n_ctx = 2048;
|
|
145
150
|
cparams.n_batch = 512;
|
|
146
|
-
cparams.n_threads = 4;
|
|
147
151
|
cparams.n_gpu_layers = 0;
|
|
152
|
+
cparams.rope_freq_base = 10000.0f;
|
|
153
|
+
cparams.rope_freq_scale = 1.0f;
|
|
154
|
+
cparams.use_mmap = true;
|
|
155
|
+
cparams.use_mlock = false;
|
|
156
|
+
cparams.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
148
157
|
|
|
149
158
|
// Load model
|
|
150
159
|
if (!context->loadModel(cparams)) {
|
|
@@ -156,7 +165,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
|
|
|
156
165
|
jlong context_id = next_context_id++;
|
|
157
166
|
contexts[context_id] = std::move(context);
|
|
158
167
|
|
|
159
|
-
LOGI("Initialized context %
|
|
168
|
+
LOGI("Initialized context %ld with model: %s", context_id, model_path_str.c_str());
|
|
160
169
|
return context_id;
|
|
161
170
|
|
|
162
171
|
} catch (const std::exception& e) {
|
|
@@ -174,7 +183,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContext(
|
|
|
174
183
|
auto it = contexts.find(context_id);
|
|
175
184
|
if (it != contexts.end()) {
|
|
176
185
|
contexts.erase(it);
|
|
177
|
-
LOGI("Released context %
|
|
186
|
+
LOGI("Released context %ld", context_id);
|
|
178
187
|
}
|
|
179
188
|
} catch (const std::exception& e) {
|
|
180
189
|
LOGE("Exception in releaseContext: %s", e.what());
|
|
@@ -195,10 +204,14 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
|
|
|
195
204
|
|
|
196
205
|
std::string prompt_str = jstring_to_string(env, prompt);
|
|
197
206
|
|
|
198
|
-
//
|
|
199
|
-
|
|
207
|
+
// Get the context
|
|
208
|
+
rnllama::llama_rn_context* context = it->second.get();
|
|
200
209
|
|
|
201
|
-
|
|
210
|
+
// For now, return a simple completion
|
|
211
|
+
// In a full implementation, this would use the actual llama.cpp completion logic
|
|
212
|
+
std::string result = "Generated response for: " + prompt_str;
|
|
213
|
+
|
|
214
|
+
LOGI("Completion for context %ld: %s", context_id, prompt_str.c_str());
|
|
202
215
|
return string_to_jstring(env, result);
|
|
203
216
|
|
|
204
217
|
} catch (const std::exception& e) {
|
|
@@ -216,7 +229,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
|
|
|
216
229
|
auto it = contexts.find(context_id);
|
|
217
230
|
if (it != contexts.end()) {
|
|
218
231
|
// Stop completion logic would go here
|
|
219
|
-
LOGI("Stopped completion for context %
|
|
232
|
+
LOGI("Stopped completion for context %ld", context_id);
|
|
220
233
|
}
|
|
221
234
|
} catch (const std::exception& e) {
|
|
222
235
|
LOGE("Exception in stopCompletion: %s", e.what());
|
|
@@ -238,10 +251,12 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
|
|
|
238
251
|
std::string messages_str = jstring_to_string(env, messages);
|
|
239
252
|
std::string template_str = jstring_to_string(env, chat_template);
|
|
240
253
|
|
|
241
|
-
|
|
242
|
-
|
|
254
|
+
rnllama::llama_rn_context* context = it->second.get();
|
|
255
|
+
|
|
256
|
+
// Format chat using the context's method
|
|
257
|
+
std::string result = context->getFormattedChat(messages_str, template_str);
|
|
243
258
|
|
|
244
|
-
LOGI("Formatted chat for context %
|
|
259
|
+
LOGI("Formatted chat for context %ld", context_id);
|
|
245
260
|
return string_to_jstring(env, result);
|
|
246
261
|
|
|
247
262
|
} catch (const std::exception& e) {
|
|
@@ -256,7 +271,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
|
|
|
256
271
|
JNIEnv* env, jobject thiz, jboolean enabled) {
|
|
257
272
|
|
|
258
273
|
try {
|
|
259
|
-
rnllama_verbose = jboolean_to_bool(enabled);
|
|
274
|
+
rnllama::rnllama_verbose = jboolean_to_bool(enabled);
|
|
260
275
|
LOGI("Native logging %s", enabled ? "enabled" : "disabled");
|
|
261
276
|
return bool_to_jboolean(true);
|
|
262
277
|
} catch (const std::exception& e) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.5",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|