llama-cpp-capacitor 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,17 @@ android {
26
26
  versionCode 1
27
27
  versionName "1.0"
28
28
  testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
29
+
30
+ ndk {
31
+ abiFilters 'arm64-v8a'
32
+ }
33
+ }
34
+
35
+ externalNativeBuild {
36
+ cmake {
37
+ path "src/main/CMakeLists.txt"
38
+ version "3.22.1"
39
+ }
29
40
  }
30
41
  buildTypes {
31
42
  release {
@@ -55,6 +55,7 @@ set(
55
55
  ${LLAMACPP_LIB_DIR}/llama.cpp
56
56
  ${LLAMACPP_LIB_DIR}/llama-model.cpp
57
57
  ${LLAMACPP_LIB_DIR}/llama-model-loader.cpp
58
+ ${LLAMACPP_LIB_DIR}/llama-model-saver.cpp
58
59
  ${LLAMACPP_LIB_DIR}/llama-kv-cache.cpp
59
60
  ${LLAMACPP_LIB_DIR}/llama-kv-cache-iswa.cpp
60
61
  ${LLAMACPP_LIB_DIR}/llama-memory-hybrid.cpp
@@ -85,12 +86,9 @@ set(
85
86
  find_library(LOG_LIB log)
86
87
 
87
88
  function(build_library target_name arch cpu_flags)
88
- if (NOT ${arch} STREQUAL "generic")
89
- set(SOURCE_FILES_ARCH
90
- ${LLAMACPP_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
91
- ${LLAMACPP_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
92
- )
93
- endif ()
89
+ set(SOURCE_FILES_ARCH "")
90
+ # For now, use generic implementation for all architectures
91
+ # This ensures we have all required functions
94
92
 
95
93
  add_library(
96
94
  ${target_name}
@@ -51,6 +51,7 @@ class LlamaContext {
51
51
  private LlamaModel model;
52
52
  private boolean isMultimodalEnabled = false;
53
53
  private boolean isVocoderEnabled = false;
54
+ private long nativeContextId = -1;
54
55
 
55
56
  public LlamaContext(int id) {
56
57
  this.id = id;
@@ -83,6 +84,14 @@ class LlamaContext {
83
84
  public void setVocoderEnabled(boolean vocoderEnabled) {
84
85
  isVocoderEnabled = vocoderEnabled;
85
86
  }
87
+
88
+ public long getNativeContextId() {
89
+ return nativeContextId;
90
+ }
91
+
92
+ public void setNativeContextId(long nativeContextId) {
93
+ this.nativeContextId = nativeContextId;
94
+ }
86
95
  }
87
96
 
88
97
  class LlamaModel {
@@ -231,16 +240,39 @@ public class LlamaCpp {
231
240
  private int contextLimit = 10;
232
241
  private boolean nativeLogEnabled = false;
233
242
 
243
+ // Native method declarations
244
+ private native long initContextNative(String modelPath, JSObject params);
245
+ private native void releaseContextNative(long nativeContextId);
246
+ private native String completionNative(long contextId, String prompt);
247
+ private native void stopCompletionNative(long contextId);
248
+ private native String getFormattedChatNative(long contextId, String messages, String chatTemplate);
249
+ private native boolean toggleNativeLogNative(boolean enabled);
250
+
251
+ static {
252
+ try {
253
+ System.loadLibrary("llama-cpp");
254
+ Log.i(TAG, "Successfully loaded llama-cpp native library");
255
+ } catch (UnsatisfiedLinkError e) {
256
+ Log.e(TAG, "Failed to load llama-cpp native library: " + e.getMessage());
257
+ throw e;
258
+ }
259
+ }
260
+
234
261
  // MARK: - Core initialization and management
235
262
 
236
263
  public void toggleNativeLog(boolean enabled, LlamaCallback<Void> callback) {
237
- nativeLogEnabled = enabled;
238
- if (enabled) {
239
- Log.i(TAG, "Native logging enabled");
240
- } else {
241
- Log.i(TAG, "Native logging disabled");
264
+ try {
265
+ boolean result = toggleNativeLogNative(enabled);
266
+ nativeLogEnabled = enabled;
267
+ if (enabled) {
268
+ Log.i(TAG, "Native logging enabled");
269
+ } else {
270
+ Log.i(TAG, "Native logging disabled");
271
+ }
272
+ callback.onResult(LlamaResult.success(null));
273
+ } catch (Exception e) {
274
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to toggle native log: " + e.getMessage())));
242
275
  }
243
- callback.onResult(LlamaResult.success(null));
244
276
  }
245
277
 
246
278
  public void setContextLimit(int limit, LlamaCallback<Void> callback) {
@@ -268,89 +300,70 @@ public class LlamaCpp {
268
300
  return;
269
301
  }
270
302
 
271
- // Extract parameters
272
- String modelPath = params.getString("model");
273
- if (modelPath == null) {
274
- callback.onResult(LlamaResult.failure(new LlamaError("Invalid parameters")));
275
- return;
303
+ try {
304
+ // Extract parameters
305
+ String modelPath = params.getString("model");
306
+ if (modelPath == null || modelPath.isEmpty()) {
307
+ callback.onResult(LlamaResult.failure(new LlamaError("Model path is required")));
308
+ return;
309
+ }
310
+
311
+ // Call native initialization
312
+ long nativeContextId = initContextNative(modelPath, params);
313
+ if (nativeContextId < 0) {
314
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to initialize native context")));
315
+ return;
316
+ }
317
+
318
+ // Create Java context wrapper
319
+ LlamaContext context = new LlamaContext(contextId);
320
+ context.setNativeContextId(nativeContextId);
321
+ contexts.put(contextId, context);
322
+
323
+ // Return context info
324
+ Map<String, Object> contextInfo = new HashMap<>();
325
+ contextInfo.put("contextId", contextId);
326
+ contextInfo.put("gpu", false);
327
+ contextInfo.put("reasonNoGPU", "Currently not supported");
328
+
329
+ Map<String, Object> modelInfo = new HashMap<>();
330
+ modelInfo.put("desc", "Loaded model");
331
+ modelInfo.put("size", 0);
332
+ modelInfo.put("nEmbd", 0);
333
+ modelInfo.put("nParams", 0);
334
+ modelInfo.put("path", modelPath);
335
+
336
+ contextInfo.put("model", modelInfo);
337
+ contextInfo.put("androidLib", "llama-cpp");
338
+
339
+ callback.onResult(LlamaResult.success(contextInfo));
340
+
341
+ } catch (Exception e) {
342
+ callback.onResult(LlamaResult.failure(new LlamaError("Context initialization failed: " + e.getMessage())));
276
343
  }
277
-
278
- // Create context
279
- LlamaContext context = new LlamaContext(contextId);
280
-
281
- // Create model info (this would typically load from GGUF file)
282
- MinjaCaps defaultCaps = new MinjaCaps(true, true, true, true, true, true);
283
- MinjaCaps toolUseCaps = new MinjaCaps(true, true, true, true, true, true);
284
- MinjaTemplates minja = new MinjaTemplates(true, defaultCaps, true, toolUseCaps);
285
- ChatTemplates chatTemplates = new ChatTemplates(true, minja);
286
-
287
- LlamaModel model = new LlamaModel(
288
- modelPath,
289
- "Sample model",
290
- 0,
291
- 0,
292
- 0,
293
- chatTemplates,
294
- new HashMap<>()
295
- );
296
-
297
- context.setModel(model);
298
- contexts.put(contextId, context);
299
-
300
- // Return context info
301
- Map<String, Object> contextInfo = new HashMap<>();
302
- contextInfo.put("contextId", contextId);
303
- contextInfo.put("gpu", false);
304
- contextInfo.put("reasonNoGPU", "Not implemented");
305
-
306
- Map<String, Object> modelInfo = new HashMap<>();
307
- modelInfo.put("desc", model.getDesc());
308
- modelInfo.put("size", model.getSize());
309
- modelInfo.put("nEmbd", model.getNEmbd());
310
- modelInfo.put("nParams", model.getNParams());
311
-
312
- Map<String, Object> chatTemplatesInfo = new HashMap<>();
313
- chatTemplatesInfo.put("llamaChat", model.getChatTemplates().isLlamaChat());
314
-
315
- Map<String, Object> minjaInfo = new HashMap<>();
316
- minjaInfo.put("default", model.getChatTemplates().getMinja().isDefault());
317
-
318
- Map<String, Object> defaultCapsInfo = new HashMap<>();
319
- defaultCapsInfo.put("tools", model.getChatTemplates().getMinja().getDefaultCaps().isTools());
320
- defaultCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isToolCalls());
321
- defaultCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getDefaultCaps().isToolResponses());
322
- defaultCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getDefaultCaps().isSystemRole());
323
- defaultCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getDefaultCaps().isParallelToolCalls());
324
- defaultCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getDefaultCaps().isToolCallId());
325
-
326
- Map<String, Object> toolUseCapsInfo = new HashMap<>();
327
- toolUseCapsInfo.put("tools", model.getChatTemplates().getMinja().getToolUseCaps().isTools());
328
- toolUseCapsInfo.put("toolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isToolCalls());
329
- toolUseCapsInfo.put("toolResponses", model.getChatTemplates().getMinja().getToolUseCaps().isToolResponses());
330
- toolUseCapsInfo.put("systemRole", model.getChatTemplates().getMinja().getToolUseCaps().isSystemRole());
331
- toolUseCapsInfo.put("parallelToolCalls", model.getChatTemplates().getMinja().getToolUseCaps().isParallelToolCalls());
332
- toolUseCapsInfo.put("toolCallId", model.getChatTemplates().getMinja().getToolUseCaps().isToolCallId());
333
-
334
- minjaInfo.put("defaultCaps", defaultCapsInfo);
335
- minjaInfo.put("toolUse", model.getChatTemplates().getMinja().isToolUse());
336
- minjaInfo.put("toolUseCaps", toolUseCapsInfo);
337
-
338
- chatTemplatesInfo.put("minja", minjaInfo);
339
- modelInfo.put("chatTemplates", chatTemplatesInfo);
340
- modelInfo.put("metadata", model.getMetadata());
341
- modelInfo.put("isChatTemplateSupported", true);
342
-
343
- contextInfo.put("model", modelInfo);
344
-
345
- callback.onResult(LlamaResult.success(contextInfo));
346
344
  }
347
345
 
348
346
  public void releaseContext(int contextId, LlamaCallback<Void> callback) {
349
- if (contexts.remove(contextId) == null) {
347
+ LlamaContext context = contexts.get(contextId);
348
+ if (context == null) {
350
349
  callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
351
350
  return;
352
351
  }
353
- callback.onResult(LlamaResult.success(null));
352
+
353
+ try {
354
+ // Release native context
355
+ if (context.getNativeContextId() >= 0) {
356
+ releaseContextNative(context.getNativeContextId());
357
+ }
358
+
359
+ // Remove from Java context map
360
+ contexts.remove(contextId);
361
+
362
+ callback.onResult(LlamaResult.success(null));
363
+
364
+ } catch (Exception e) {
365
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to release context: " + e.getMessage())));
366
+ }
354
367
  }
355
368
 
356
369
  public void releaseAllContexts(LlamaCallback<Void> callback) {
@@ -367,15 +380,22 @@ public class LlamaCpp {
367
380
  return;
368
381
  }
369
382
 
370
- // This would typically format the chat using the model's chat templates
371
- // For now, return a basic formatted chat
372
- Map<String, Object> formattedChat = new HashMap<>();
373
- formattedChat.put("type", "llama-chat");
374
- formattedChat.put("prompt", messages);
375
- formattedChat.put("has_media", false);
376
- formattedChat.put("media_paths", new String[0]);
377
-
378
- callback.onResult(LlamaResult.success(formattedChat));
383
+ try {
384
+ // Call native formatted chat
385
+ String result = getFormattedChatNative(context.getNativeContextId(), messages, chatTemplate);
386
+
387
+ // Build formatted chat result
388
+ Map<String, Object> formattedChat = new HashMap<>();
389
+ formattedChat.put("type", "llama-chat");
390
+ formattedChat.put("prompt", result);
391
+ formattedChat.put("has_media", false);
392
+ formattedChat.put("media_paths", new String[0]);
393
+
394
+ callback.onResult(LlamaResult.success(formattedChat));
395
+
396
+ } catch (Exception e) {
397
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to format chat: " + e.getMessage())));
398
+ }
379
399
  }
380
400
 
381
401
  public void completion(int contextId, JSObject params, LlamaCallback<Map<String, Object>> callback) {
@@ -385,48 +405,68 @@ public class LlamaCpp {
385
405
  return;
386
406
  }
387
407
 
388
- // This would typically perform the completion using llama.cpp
389
- // For now, return a basic completion result
390
- Map<String, Object> completionResult = new HashMap<>();
391
- completionResult.put("text", "Sample completion text");
392
- completionResult.put("reasoning_content", "");
393
- completionResult.put("tool_calls", new Object[0]);
394
- completionResult.put("content", "Sample completion text");
395
- completionResult.put("chat_format", 0);
396
- completionResult.put("tokens_predicted", 0);
397
- completionResult.put("tokens_evaluated", 0);
398
- completionResult.put("truncated", false);
399
- completionResult.put("stopped_eos", false);
400
- completionResult.put("stopped_word", "");
401
- completionResult.put("stopped_limit", 0);
402
- completionResult.put("stopping_word", "");
403
- completionResult.put("context_full", false);
404
- completionResult.put("interrupted", false);
405
- completionResult.put("tokens_cached", 0);
406
-
407
- Map<String, Object> timings = new HashMap<>();
408
- timings.put("prompt_n", 0);
409
- timings.put("prompt_ms", 0);
410
- timings.put("prompt_per_token_ms", 0);
411
- timings.put("prompt_per_second", 0);
412
- timings.put("predicted_n", 0);
413
- timings.put("predicted_ms", 0);
414
- timings.put("predicted_per_token_ms", 0);
415
- timings.put("predicted_per_second", 0);
416
-
417
- completionResult.put("timings", timings);
418
-
419
- callback.onResult(LlamaResult.success(completionResult));
408
+ try {
409
+ // Extract parameters from JSObject
410
+ String prompt = params.getString("prompt", "");
411
+ int nPredict = params.getInteger("n_predict", 128);
412
+ float temperature = params.has("temp") ? (float) params.getDouble("temp") : 0.8f;
413
+ float topP = params.has("top_p") ? (float) params.getDouble("top_p") : 0.95f;
414
+ int topK = params.getInteger("top_k", 40);
415
+ float repeatPenalty = params.has("repeat_penalty") ? (float) params.getDouble("repeat_penalty") : 1.1f;
416
+
417
+ // Call native completion
418
+ String result = completionNative(context.getNativeContextId(), prompt);
419
+
420
+ // Build completion result
421
+ Map<String, Object> completionResult = new HashMap<>();
422
+ completionResult.put("text", result);
423
+ completionResult.put("reasoning_content", "");
424
+ completionResult.put("tool_calls", new Object[0]);
425
+ completionResult.put("content", result);
426
+ completionResult.put("chat_format", 0);
427
+ completionResult.put("tokens_predicted", nPredict);
428
+ completionResult.put("tokens_evaluated", 0);
429
+ completionResult.put("truncated", false);
430
+ completionResult.put("stopped_eos", false);
431
+ completionResult.put("stopped_word", "");
432
+ completionResult.put("stopped_limit", 0);
433
+ completionResult.put("stopping_word", "");
434
+ completionResult.put("context_full", false);
435
+ completionResult.put("interrupted", false);
436
+ completionResult.put("tokens_cached", 0);
437
+
438
+ Map<String, Object> timings = new HashMap<>();
439
+ timings.put("prompt_n", 0);
440
+ timings.put("prompt_ms", 0);
441
+ timings.put("prompt_per_token_ms", 0);
442
+ timings.put("prompt_per_second", 0);
443
+ timings.put("predicted_n", nPredict);
444
+ timings.put("predicted_ms", 0);
445
+ timings.put("predicted_per_token_ms", 0);
446
+ timings.put("predicted_per_second", 0);
447
+
448
+ completionResult.put("timings", timings);
449
+
450
+ callback.onResult(LlamaResult.success(completionResult));
451
+
452
+ } catch (Exception e) {
453
+ callback.onResult(LlamaResult.failure(new LlamaError("Completion failed: " + e.getMessage())));
454
+ }
420
455
  }
421
456
 
422
457
  public void stopCompletion(int contextId, LlamaCallback<Void> callback) {
423
- if (contexts.get(contextId) == null) {
458
+ LlamaContext context = contexts.get(contextId);
459
+ if (context == null) {
424
460
  callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
425
461
  return;
426
462
  }
427
463
 
428
- // This would typically stop any ongoing completion
429
- callback.onResult(LlamaResult.success(null));
464
+ try {
465
+ stopCompletionNative(context.getNativeContextId());
466
+ callback.onResult(LlamaResult.success(null));
467
+ } catch (Exception e) {
468
+ callback.onResult(LlamaResult.failure(new LlamaError("Failed to stop completion: " + e.getMessage())));
469
+ }
430
470
  }
431
471
 
432
472
  // MARK: - Session management
@@ -1,5 +1,6 @@
1
1
  package ai.annadata.plugin.capacitor;
2
2
 
3
+ import android.util.Log;
3
4
  import com.getcapacitor.JSObject;
4
5
  import com.getcapacitor.JSArray;
5
6
  import com.getcapacitor.Plugin;
@@ -11,9 +12,16 @@ import org.json.JSONException;
11
12
 
12
13
  @CapacitorPlugin(name = "LlamaCpp")
13
14
  public class LlamaCppPlugin extends Plugin {
15
+ private static final String TAG = "LlamaCppPlugin";
14
16
 
15
17
  private LlamaCpp implementation = new LlamaCpp();
16
18
 
19
+ @Override
20
+ public void load() {
21
+ super.load();
22
+ Log.i(TAG, "LlamaCppPlugin loaded successfully");
23
+ }
24
+
17
25
  // MARK: - Core initialization and management
18
26
 
19
27
  @PluginMethod
@@ -72,6 +80,7 @@ public class LlamaCppPlugin extends Plugin {
72
80
 
73
81
  @PluginMethod
74
82
  public void initContext(PluginCall call) {
83
+ Log.i(TAG, "initContext called with contextId: " + call.getInt("contextId", 0));
75
84
  int contextId = call.getInt("contextId", 0);
76
85
  JSObject params = call.getObject("params", new JSObject());
77
86
 
@@ -87,16 +87,16 @@ void set_static_field(JNIEnv* env, jclass clazz, jfieldID field, ...);
87
87
  jobject get_static_field(JNIEnv* env, jclass clazz, jfieldID field);
88
88
 
89
89
  // Convert llama_rn_context to jobject
90
- jobject llama_context_to_jobject(JNIEnv* env, const llama_rn_context* context);
90
+ jobject llama_context_to_jobject(JNIEnv* env, const rnllama::llama_rn_context* context);
91
91
 
92
92
  // Convert jobject to llama_rn_context
93
- llama_rn_context* jobject_to_llama_context(JNIEnv* env, jobject obj);
93
+ rnllama::llama_rn_context* jobject_to_llama_context(JNIEnv* env, jobject obj);
94
94
 
95
95
  // Convert completion result to jobject
96
- jobject completion_result_to_jobject(JNIEnv* env, const completion_token_output& result);
96
+ jobject completion_result_to_jobject(JNIEnv* env, const rnllama::completion_token_output& result);
97
97
 
98
98
  // Convert jobject to completion parameters
99
- completion_params jobject_to_completion_params(JNIEnv* env, jobject obj);
99
+ common_params jobject_to_completion_params(JNIEnv* env, jobject obj);
100
100
 
101
101
  // Convert chat parameters to jobject
102
102
  jobject chat_params_to_jobject(JNIEnv* env, const common_chat_params& params);
@@ -105,7 +105,7 @@ jobject chat_params_to_jobject(JNIEnv* env, const common_chat_params& params);
105
105
  common_chat_params jobject_to_chat_params(JNIEnv* env, jobject obj);
106
106
 
107
107
  // Convert tokenize result to jobject
108
- jobject tokenize_result_to_jobject(JNIEnv* env, const llama_rn_tokenize_result& result);
108
+ jobject tokenize_result_to_jobject(JNIEnv* env, const rnllama::llama_rn_tokenize_result& result);
109
109
 
110
110
  // Convert embedding result to jobject
111
111
  jobject embedding_result_to_jobject(JNIEnv* env, const std::vector<float>& embedding);
@@ -4,6 +4,11 @@
4
4
  #include <cstring>
5
5
  #include <memory>
6
6
 
7
+ // Add missing symbol
8
+ namespace rnllama {
9
+ bool rnllama_verbose = false;
10
+ }
11
+
7
12
  #define LOG_TAG "LlamaCpp"
8
13
  #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
9
14
  #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
@@ -123,7 +128,7 @@ jclass find_class(JNIEnv* env, const char* name) {
123
128
  }
124
129
 
125
130
  // Global context storage
126
- static std::map<jlong, std::unique_ptr<llama_rn_context>> contexts;
131
+ static std::map<jlong, std::unique_ptr<rnllama::llama_rn_context>> contexts;
127
132
  static jlong next_context_id = 1;
128
133
 
129
134
  extern "C" {
@@ -136,15 +141,19 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
136
141
  std::string model_path_str = jstring_to_string(env, model_path);
137
142
 
138
143
  // Create new context
139
- auto context = std::make_unique<llama_rn_context>();
144
+ auto context = std::make_unique<rnllama::llama_rn_context>();
140
145
 
141
- // Initialize common parameters (simplified)
146
+ // Initialize common parameters
142
147
  common_params cparams;
143
- cparams.model = model_path_str;
148
+ cparams.model.path = model_path_str;
144
149
  cparams.n_ctx = 2048;
145
150
  cparams.n_batch = 512;
146
- cparams.n_threads = 4;
147
151
  cparams.n_gpu_layers = 0;
152
+ cparams.rope_freq_base = 10000.0f;
153
+ cparams.rope_freq_scale = 1.0f;
154
+ cparams.use_mmap = true;
155
+ cparams.use_mlock = false;
156
+ cparams.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
148
157
 
149
158
  // Load model
150
159
  if (!context->loadModel(cparams)) {
@@ -156,7 +165,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
156
165
  jlong context_id = next_context_id++;
157
166
  contexts[context_id] = std::move(context);
158
167
 
159
- LOGI("Initialized context %lld with model: %s", context_id, model_path_str.c_str());
168
+ LOGI("Initialized context %ld with model: %s", context_id, model_path_str.c_str());
160
169
  return context_id;
161
170
 
162
171
  } catch (const std::exception& e) {
@@ -174,7 +183,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContext(
174
183
  auto it = contexts.find(context_id);
175
184
  if (it != contexts.end()) {
176
185
  contexts.erase(it);
177
- LOGI("Released context %lld", context_id);
186
+ LOGI("Released context %ld", context_id);
178
187
  }
179
188
  } catch (const std::exception& e) {
180
189
  LOGE("Exception in releaseContext: %s", e.what());
@@ -195,10 +204,14 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
195
204
 
196
205
  std::string prompt_str = jstring_to_string(env, prompt);
197
206
 
198
- // Simplified completion (placeholder implementation)
199
- std::string result = "Generated text for: " + prompt_str;
207
+ // Get the context
208
+ rnllama::llama_rn_context* context = it->second.get();
200
209
 
201
- LOGI("Completion for context %lld: %s", context_id, prompt_str.c_str());
210
+ // For now, return a simple completion
211
+ // In a full implementation, this would use the actual llama.cpp completion logic
212
+ std::string result = "Generated response for: " + prompt_str;
213
+
214
+ LOGI("Completion for context %ld: %s", context_id, prompt_str.c_str());
202
215
  return string_to_jstring(env, result);
203
216
 
204
217
  } catch (const std::exception& e) {
@@ -216,7 +229,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
216
229
  auto it = contexts.find(context_id);
217
230
  if (it != contexts.end()) {
218
231
  // Stop completion logic would go here
219
- LOGI("Stopped completion for context %lld", context_id);
232
+ LOGI("Stopped completion for context %ld", context_id);
220
233
  }
221
234
  } catch (const std::exception& e) {
222
235
  LOGE("Exception in stopCompletion: %s", e.what());
@@ -238,10 +251,12 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
238
251
  std::string messages_str = jstring_to_string(env, messages);
239
252
  std::string template_str = jstring_to_string(env, chat_template);
240
253
 
241
- // Simplified chat formatting (placeholder implementation)
242
- std::string result = "Formatted chat: " + messages_str;
254
+ rnllama::llama_rn_context* context = it->second.get();
255
+
256
+ // Format chat using the context's method
257
+ std::string result = context->getFormattedChat(messages_str, template_str);
243
258
 
244
- LOGI("Formatted chat for context %lld", context_id);
259
+ LOGI("Formatted chat for context %ld", context_id);
245
260
  return string_to_jstring(env, result);
246
261
 
247
262
  } catch (const std::exception& e) {
@@ -256,7 +271,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
256
271
  JNIEnv* env, jobject thiz, jboolean enabled) {
257
272
 
258
273
  try {
259
- rnllama_verbose = jboolean_to_bool(enabled);
274
+ rnllama::rnllama_verbose = jboolean_to_bool(enabled);
260
275
  LOGI("Native logging %s", enabled ? "enabled" : "disabled");
261
276
  return bool_to_jboolean(true);
262
277
  } catch (const std::exception& e) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llama-cpp-capacitor",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
5
5
  "main": "dist/plugin.cjs.js",
6
6
  "module": "dist/esm/index.js",