llama-cpp-capacitor 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -51,6 +51,7 @@ class LlamaContext {
|
|
|
51
51
|
private LlamaModel model;
|
|
52
52
|
private boolean isMultimodalEnabled = false;
|
|
53
53
|
private boolean isVocoderEnabled = false;
|
|
54
|
+
private long nativeContextId = -1;
|
|
54
55
|
|
|
55
56
|
public LlamaContext(int id) {
|
|
56
57
|
this.id = id;
|
|
@@ -83,6 +84,14 @@ class LlamaContext {
|
|
|
83
84
|
public void setVocoderEnabled(boolean vocoderEnabled) {
|
|
84
85
|
isVocoderEnabled = vocoderEnabled;
|
|
85
86
|
}
|
|
87
|
+
|
|
88
|
+
public long getNativeContextId() {
|
|
89
|
+
return nativeContextId;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
public void setNativeContextId(long nativeContextId) {
|
|
93
|
+
this.nativeContextId = nativeContextId;
|
|
94
|
+
}
|
|
86
95
|
}
|
|
87
96
|
|
|
88
97
|
class LlamaModel {
|
|
@@ -231,16 +240,33 @@ public class LlamaCpp {
|
|
|
231
240
|
private int contextLimit = 10;
|
|
232
241
|
private boolean nativeLogEnabled = false;
|
|
233
242
|
|
|
243
|
+
// Native method declarations
|
|
244
|
+
private native long initContextNative(String modelPath, JSObject params);
|
|
245
|
+
private native void releaseContextNative(long nativeContextId);
|
|
246
|
+
private native String completionNative(long contextId, String prompt);
|
|
247
|
+
private native void stopCompletionNative(long contextId);
|
|
248
|
+
private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
|
|
249
|
+
private native boolean toggleNativeLogNative(boolean enabled);
|
|
250
|
+
|
|
251
|
+
static {
|
|
252
|
+
System.loadLibrary("llama-cpp");
|
|
253
|
+
}
|
|
254
|
+
|
|
234
255
|
// MARK: - Core initialization and management
|
|
235
256
|
|
|
236
257
|
public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
258
|
+
try {
|
|
259
|
+
boolean result = toggleNativeLogNative(enabled);
|
|
260
|
+
nativeLogEnabled = enabled;
|
|
261
|
+
if (enabled) {
|
|
262
|
+
Log.i(TAG, "Native logging enabled");
|
|
263
|
+
} else {
|
|
264
|
+
Log.i(TAG, "Native logging disabled");
|
|
265
|
+
}
|
|
266
|
+
callback.onResult(LlamaResult.success(null));
|
|
267
|
+
} catch (Exception e) {
|
|
268
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
|
|
242
269
|
}
|
|
243
|
-
callback.onResult(LlamaResult.success(null));
|
|
244
270
|
}
|
|
245
271
|
|
|
246
272
|
public void setContextLimit(int limit, LlamaCallback<Void> callback) {
|
|
@@ -268,89 +294,70 @@ public class LlamaCpp {
|
|
|
268
294
|
return;
|
|
269
295
|
}
|
|
270
296
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
297
|
+
try {
|
|
298
|
+
// Extract parameters
|
|
299
|
+
String modelPath = params.getString("model");
|
|
300
|
+
if (modelPath == null || modelPath.isEmpty()) {
|
|
301
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Call native initialization
|
|
306
|
+
long nativeContextId = initContextNative(modelPath, params);
|
|
307
|
+
if (nativeContextId < 0) {
|
|
308
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Create Java context wrapper
|
|
313
|
+
LlamaContext context = new LlamaContext(contextId);
|
|
314
|
+
context.setNativeContextId(nativeContextId);
|
|
315
|
+
contexts.put(contextId, context);
|
|
316
|
+
|
|
317
|
+
// Return context info
|
|
318
|
+
Map<String, Object> contextInfo = new HashMap<>();
|
|
319
|
+
contextInfo.put("contextId", contextId);
|
|
320
|
+
contextInfo.put("gpu", false);
|
|
321
|
+
contextInfo.put("reasonNoGPU", "Currently not supported");
|
|
322
|
+
|
|
323
|
+
Map<String, Object> modelInfo = new HashMap<>();
|
|
324
|
+
modelInfo.put("desc", "Loaded model");
|
|
325
|
+
modelInfo.put("size", 0);
|
|
326
|
+
modelInfo.put("nEmbd", 0);
|
|
327
|
+
modelInfo.put("nParams", 0);
|
|
328
|
+
modelInfo.put("path", modelPath);
|
|
329
|
+
|
|
330
|
+
contextInfo.put("model", modelInfo);
|
|
331
|
+
contextInfo.put("androidLib", "llama-cpp");
|
|
332
|
+
|
|
333
|
+
callback.onResult(LlamaResult.success(contextInfo));
|
|
334
|
+
|
|
335
|
+
} catch (Exception e) {
|
|
336
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
|
|
276
337
|
}
|
|
277
|
-
|
|
278
|
-
// Create context
|
|
279
|
-
LlamaContext context = new LlamaContext(contextId);
|
|
280
|
-
|
|
281
|
-
// Create model info (this would typically load from GGUF file)
|
|
282
|
-
MinjaCaps defaultCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
283
|
-
MinjaCaps toolUseCaps = new MinjaCaps(true, true, true, true, true, true);
|
|
284
|
-
MinjaTemplates minja = new MinjaTemplates(true, defaultCaps, true, toolUseCaps);
|
|
285
|
-
ChatTemplates chatTemplates = new ChatTemplates(true, minja);
|
|
286
|
-
|
|
287
|
-
LlamaModel model = new LlamaModel(
|
|
288
|
-
modelPath,
|
|
289
|
-
"Sample model",
|
|
290
|
-
0,
|
|
291
|
-
0,
|
|
292
|
-
0,
|
|
293
|
-
chatTemplates,
|
|
294
|
-
new HashMap<>()
|
|
295
|
-
);
|
|
296
|
-
|
|
297
|
-
context.setModel(model);
|
|
298
|
-
contexts.put(contextId, context);
|
|
299
|
-
|
|
300
|
-
// Return context info
|
|
301
|
-
Map<String, Object> contextInfo = new HashMap<>();
|
|
302
|
-
contextInfo.put("contextId", contextId);
|
|
303
|
-
contextInfo.put("gpu", false);
|
|
304
|
-
contextInfo.put("reasonNoGPU", "Not implemented");
|
|
305
|
-
|
|
306
|
-
Map<String, Object> modelInfo = new HashMap<>();
|
|
307
|
-
modelInfo.put("desc", model.getDesc());
|
|
308
|
-
modelInfo.put("size", model.getSize());
|
|
309
|
-
modelInfo.put("nEmbd", model.getNEmbd());
|
|
310
|
-
modelInfo.put("nParams", model.getNParams());
|
|
311
|
-
|
|
312
|
-
Map<String, Object> chatTemplatesInfo = new HashMap<>();
|
|
313
|
-
chatTemplatesInfo.put("llamaChat", model.getChatTemplates().isLlamaChat());
|
|
314
|
-
|
|
315
|
-
Map<String, Object> minjaInfo = new HashMap<>();
|
|
316
|
-
minjaInfo.put("default", model.getChatTemplates().getMinja().isDefault());
|
|
317
|
-
|
|
318
|
-
Map<String, Object> defaultCapsInfo = new HashMap<>();
|
|
319
|
-
defaultCapsInfo.put("tools", model.getChatTemplates().getMinja().getDefaultCaps().isTools());
|
|
320
|
-
defaultCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isToolCalls());
|
|
321
|
-
defaultCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getDefaultCaps().isToolResponses());
|
|
322
|
-
defaultCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getDefaultCaps().isSystemRole());
|
|
323
|
-
defaultCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isParallelToolCalls());
|
|
324
|
-
defaultCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getDefaultCaps().isToolCallId());
|
|
325
|
-
|
|
326
|
-
Map<String, Object> toolUseCapsInfo = new HashMap<>();
|
|
327
|
-
toolUseCapsInfo.put("tools", model.getChatTemplates().getMinja().getToolUseCaps().isTools());
|
|
328
|
-
toolUseCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isToolCalls());
|
|
329
|
-
toolUseCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getToolUseCaps().isToolResponses());
|
|
330
|
-
toolUseCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getToolUseCaps().isSystemRole());
|
|
331
|
-
toolUseCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isParallelToolCalls());
|
|
332
|
-
toolUseCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getToolUseCaps().isToolCallId());
|
|
333
|
-
|
|
334
|
-
minjaInfo.put("defaultCaps", defaultCapsInfo);
|
|
335
|
-
minjaInfo.put("toolUse", model.getChatTemplates().getMinja().isToolUse());
|
|
336
|
-
minjaInfo.put("toolUseCaps", toolUseCapsInfo);
|
|
337
|
-
|
|
338
|
-
chatTemplatesInfo.put("minja", minjaInfo);
|
|
339
|
-
modelInfo.put("chatTemplates", chatTemplatesInfo);
|
|
340
|
-
modelInfo.put("metadata", model.getMetadata());
|
|
341
|
-
modelInfo.put("isChatTemplateSupported", true);
|
|
342
|
-
|
|
343
|
-
contextInfo.put("model", modelInfo);
|
|
344
|
-
|
|
345
|
-
callback.onResult(LlamaResult.success(contextInfo));
|
|
346
338
|
}
|
|
347
339
|
|
|
348
340
|
public void releaseContext(int contextId, LlamaCallback<Void> callback) {
|
|
349
|
-
|
|
341
|
+
LlamaContext context = contexts.get(contextId);
|
|
342
|
+
if (context == null) {
|
|
350
343
|
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
351
344
|
return;
|
|
352
345
|
}
|
|
353
|
-
|
|
346
|
+
|
|
347
|
+
try {
|
|
348
|
+
// Release native context
|
|
349
|
+
if (context.getNativeContextId() >= 0) {
|
|
350
|
+
releaseContextNative(context.getNativeContextId());
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Remove from Java context map
|
|
354
|
+
contexts.remove(contextId);
|
|
355
|
+
|
|
356
|
+
callback.onResult(LlamaResult.success(null));
|
|
357
|
+
|
|
358
|
+
} catch (Exception e) {
|
|
359
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
|
|
360
|
+
}
|
|
354
361
|
}
|
|
355
362
|
|
|
356
363
|
public void releaseAllContexts(LlamaCallback<Void> callback) {
|
|
@@ -367,15 +374,22 @@ public class LlamaCpp {
|
|
|
367
374
|
return;
|
|
368
375
|
}
|
|
369
376
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
377
|
+
try {
|
|
378
|
+
// Call native formatted chat
|
|
379
|
+
String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
|
|
380
|
+
|
|
381
|
+
// Build formatted chat result
|
|
382
|
+
Map<String, Object> formattedChat = new HashMap<>();
|
|
383
|
+
formattedChat.put("type", "llama-chat");
|
|
384
|
+
formattedChat.put("prompt", result);
|
|
385
|
+
formattedChat.put("has_media", false);
|
|
386
|
+
formattedChat.put("media_paths", new String[0]);
|
|
387
|
+
|
|
388
|
+
callback.onResult(LlamaResult.success(formattedChat));
|
|
389
|
+
|
|
390
|
+
} catch (Exception e) {
|
|
391
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
|
|
392
|
+
}
|
|
379
393
|
}
|
|
380
394
|
|
|
381
395
|
public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
|
|
@@ -385,48 +399,68 @@ public class LlamaCpp {
|
|
|
385
399
|
return;
|
|
386
400
|
}
|
|
387
401
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
402
|
+
try {
|
|
403
|
+
// Extract parameters from JSObject
|
|
404
|
+
String prompt = params.getString("prompt", "");
|
|
405
|
+
int nPredict = params.getInteger("n_predict", 128);
|
|
406
|
+
float temperature = params.has("temp") ? (float) params.getDouble("temp") : 0.8f;
|
|
407
|
+
float topP = params.has("top_p") ? (float) params.getDouble("top_p") : 0.95f;
|
|
408
|
+
int topK = params.getInteger("top_k", 40);
|
|
409
|
+
float repeatPenalty = params.has("repeat_penalty") ? (float) params.getDouble("repeat_penalty") : 1.1f;
|
|
410
|
+
|
|
411
|
+
// Call native completion
|
|
412
|
+
String result = completionNative(context.getNativeContextId(), prompt);
|
|
413
|
+
|
|
414
|
+
// Build completion result
|
|
415
|
+
Map<String, Object> completionResult = new HashMap<>();
|
|
416
|
+
completionResult.put("text", result);
|
|
417
|
+
completionResult.put("reasoning_content", "");
|
|
418
|
+
completionResult.put("tool_calls", new Object[0]);
|
|
419
|
+
completionResult.put("content", result);
|
|
420
|
+
completionResult.put("chat_format", 0);
|
|
421
|
+
completionResult.put("tokens_predicted", nPredict);
|
|
422
|
+
completionResult.put("tokens_evaluated", 0);
|
|
423
|
+
completionResult.put("truncated", false);
|
|
424
|
+
completionResult.put("stopped_eos", false);
|
|
425
|
+
completionResult.put("stopped_word", "");
|
|
426
|
+
completionResult.put("stopped_limit", 0);
|
|
427
|
+
completionResult.put("stopping_word", "");
|
|
428
|
+
completionResult.put("context_full", false);
|
|
429
|
+
completionResult.put("interrupted", false);
|
|
430
|
+
completionResult.put("tokens_cached", 0);
|
|
431
|
+
|
|
432
|
+
Map<String, Object> timings = new HashMap<>();
|
|
433
|
+
timings.put("prompt_n", 0);
|
|
434
|
+
timings.put("prompt_ms", 0);
|
|
435
|
+
timings.put("prompt_per_token_ms", 0);
|
|
436
|
+
timings.put("prompt_per_second", 0);
|
|
437
|
+
timings.put("predicted_n", nPredict);
|
|
438
|
+
timings.put("predicted_ms", 0);
|
|
439
|
+
timings.put("predicted_per_token_ms", 0);
|
|
440
|
+
timings.put("predicted_per_second", 0);
|
|
441
|
+
|
|
442
|
+
completionResult.put("timings", timings);
|
|
443
|
+
|
|
444
|
+
callback.onResult(LlamaResult.success(completionResult));
|
|
445
|
+
|
|
446
|
+
} catch (Exception e) {
|
|
447
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
|
|
448
|
+
}
|
|
420
449
|
}
|
|
421
450
|
|
|
422
451
|
public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
|
|
423
|
-
|
|
452
|
+
LlamaContext context = contexts.get(contextId);
|
|
453
|
+
if (context == null) {
|
|
424
454
|
callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
|
|
425
455
|
return;
|
|
426
456
|
}
|
|
427
457
|
|
|
428
|
-
|
|
429
|
-
|
|
458
|
+
try {
|
|
459
|
+
stopCompletionNative(context.getNativeContextId());
|
|
460
|
+
callback.onResult(LlamaResult.success(null));
|
|
461
|
+
} catch (Exception e) {
|
|
462
|
+
callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
|
|
463
|
+
}
|
|
430
464
|
}
|
|
431
465
|
|
|
432
466
|
// MARK: - Session management
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -123,7 +123,7 @@ jclass find_class(JNIEnv* env, const char* name) {
|
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
// Global context storage
|
|
126
|
-
static std::map<jlong, std::unique_ptr<llama_rn_context>> contexts;
|
|
126
|
+
static std::map<jlong, std::unique_ptr<rnllama::llama_rn_context>> contexts;
|
|
127
127
|
static jlong next_context_id = 1;
|
|
128
128
|
|
|
129
129
|
extern "C" {
|
|
@@ -136,15 +136,24 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
|
|
|
136
136
|
std::string model_path_str = jstring_to_string(env, model_path);
|
|
137
137
|
|
|
138
138
|
// Create new context
|
|
139
|
-
auto context = std::make_unique<llama_rn_context>();
|
|
139
|
+
auto context = std::make_unique<rnllama::llama_rn_context>();
|
|
140
140
|
|
|
141
|
-
// Initialize common parameters
|
|
141
|
+
// Initialize common parameters
|
|
142
142
|
common_params cparams;
|
|
143
143
|
cparams.model = model_path_str;
|
|
144
144
|
cparams.n_ctx = 2048;
|
|
145
145
|
cparams.n_batch = 512;
|
|
146
146
|
cparams.n_threads = 4;
|
|
147
147
|
cparams.n_gpu_layers = 0;
|
|
148
|
+
cparams.rope_freq_base = 10000.0f;
|
|
149
|
+
cparams.rope_freq_scale = 1.0f;
|
|
150
|
+
cparams.mul_mat_q = true;
|
|
151
|
+
cparams.f16_kv = true;
|
|
152
|
+
cparams.logits_all = false;
|
|
153
|
+
cparams.embedding = false;
|
|
154
|
+
cparams.use_mmap = true;
|
|
155
|
+
cparams.use_mlock = false;
|
|
156
|
+
cparams.numa = GGML_NUMA_STRATEGY_DISABLED;
|
|
148
157
|
|
|
149
158
|
// Load model
|
|
150
159
|
if (!context->loadModel(cparams)) {
|
|
@@ -195,8 +204,76 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
|
|
|
195
204
|
|
|
196
205
|
std::string prompt_str = jstring_to_string(env, prompt);
|
|
197
206
|
|
|
198
|
-
//
|
|
199
|
-
|
|
207
|
+
// Get the context
|
|
208
|
+
rnllama::llama_rn_context* context = it->second.get();
|
|
209
|
+
|
|
210
|
+
// Initialize completion if not already done
|
|
211
|
+
if (!context->completion) {
|
|
212
|
+
context->completion = new rnllama::llama_rn_context_completion(context);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Set up completion parameters
|
|
216
|
+
completion_params cparams;
|
|
217
|
+
cparams.prompt = prompt_str;
|
|
218
|
+
cparams.n_predict = 128;
|
|
219
|
+
cparams.n_keep = 0;
|
|
220
|
+
cparams.n_discard = -1;
|
|
221
|
+
cparams.n_probs = 0;
|
|
222
|
+
cparams.logit_bias.clear();
|
|
223
|
+
cparams.top_k = 40;
|
|
224
|
+
cparams.top_p = 0.95f;
|
|
225
|
+
cparams.tfs_z = 1.0f;
|
|
226
|
+
cparams.typical_p = 1.0f;
|
|
227
|
+
cparams.temp = 0.8f;
|
|
228
|
+
cparams.repeat_penalty = 1.1f;
|
|
229
|
+
cparams.repeat_last_n = 64;
|
|
230
|
+
cparams.frequency_penalty = 0.0f;
|
|
231
|
+
cparams.presence_penalty = 0.0f;
|
|
232
|
+
cparams.mirostat = 0;
|
|
233
|
+
cparams.mirostat_tau = 5.0f;
|
|
234
|
+
cparams.mirostat_eta = 0.1f;
|
|
235
|
+
cparams.penalize_nl = true;
|
|
236
|
+
cparams.grammar = "";
|
|
237
|
+
cparams.grammar_penalty.clear();
|
|
238
|
+
cparams.antiprompt.clear();
|
|
239
|
+
cparams.seed = -1;
|
|
240
|
+
cparams.ignore_eos = false;
|
|
241
|
+
cparams.stop_sequences.clear();
|
|
242
|
+
cparams.streaming = false;
|
|
243
|
+
|
|
244
|
+
// Perform completion
|
|
245
|
+
std::string result;
|
|
246
|
+
try {
|
|
247
|
+
// Tokenize the prompt
|
|
248
|
+
auto tokenize_result = context->tokenize(prompt_str, {});
|
|
249
|
+
|
|
250
|
+
// Set up completion
|
|
251
|
+
context->completion->rewind();
|
|
252
|
+
context->completion->beginCompletion();
|
|
253
|
+
|
|
254
|
+
// Process tokens
|
|
255
|
+
for (size_t i = 0; i < tokenize_result.tokens.size(); i++) {
|
|
256
|
+
llama_batch_add(&context->completion->embd, tokenize_result.tokens[i], i, {0}, false);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Generate completion
|
|
260
|
+
std::string generated_text;
|
|
261
|
+
for (int i = 0; i < cparams.n_predict; i++) {
|
|
262
|
+
auto token_output = context->completion->nextToken();
|
|
263
|
+
if (token_output.tok == llama_token_eos(context->ctx)) {
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
std::string token_text = rnllama::tokens_to_output_formatted_string(context->ctx, token_output.tok);
|
|
268
|
+
generated_text += token_text;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
result = generated_text;
|
|
272
|
+
|
|
273
|
+
} catch (const std::exception& e) {
|
|
274
|
+
LOGE("Completion error: %s", e.what());
|
|
275
|
+
result = "Error during completion: " + std::string(e.what());
|
|
276
|
+
}
|
|
200
277
|
|
|
201
278
|
LOGI("Completion for context %lld: %s", context_id, prompt_str.c_str());
|
|
202
279
|
return string_to_jstring(env, result);
|
|
@@ -215,7 +292,10 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
|
|
|
215
292
|
try {
|
|
216
293
|
auto it = contexts.find(context_id);
|
|
217
294
|
if (it != contexts.end()) {
|
|
218
|
-
|
|
295
|
+
rnllama::llama_rn_context* context = it->second.get();
|
|
296
|
+
if (context->completion) {
|
|
297
|
+
context->completion->is_interrupted = true;
|
|
298
|
+
}
|
|
219
299
|
LOGI("Stopped completion for context %lld", context_id);
|
|
220
300
|
}
|
|
221
301
|
} catch (const std::exception& e) {
|
|
@@ -238,8 +318,10 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
|
|
|
238
318
|
std::string messages_str = jstring_to_string(env, messages);
|
|
239
319
|
std::string template_str = jstring_to_string(env, chat_template);
|
|
240
320
|
|
|
241
|
-
|
|
242
|
-
|
|
321
|
+
rnllama::llama_rn_context* context = it->second.get();
|
|
322
|
+
|
|
323
|
+
// Format chat using the context's method
|
|
324
|
+
std::string result = context->getFormattedChat(messages_str, template_str);
|
|
243
325
|
|
|
244
326
|
LOGI("Formatted chat for context %lld", context_id);
|
|
245
327
|
return string_to_jstring(env, result);
|
|
@@ -256,7 +338,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
|
|
|
256
338
|
JNIEnv* env, jobject thiz, jboolean enabled) {
|
|
257
339
|
|
|
258
340
|
try {
|
|
259
|
-
rnllama_verbose = jboolean_to_bool(enabled);
|
|
341
|
+
rnllama::rnllama_verbose = jboolean_to_bool(enabled);
|
|
260
342
|
LOGI("Native logging %s", enabled ? "enabled" : "disabled");
|
|
261
343
|
return bool_to_jboolean(true);
|
|
262
344
|
} catch (const std::exception& e) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|