llama-cpp-capacitor 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,7 +134,7 @@ static jlong next_context_id = 1;
134
134
  extern "C" {
135
135
 
136
136
  JNIEXPORT jlong JNICALL
137
- Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
137
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
138
138
  JNIEnv* env, jobject thiz, jstring model_path, jobject params) {
139
139
 
140
140
  try {
@@ -154,13 +154,82 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
154
154
  cparams.use_mmap = true;
155
155
  cparams.use_mlock = false;
156
156
  cparams.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
157
+ cparams.ctx_shift = false;
158
+ cparams.chat_template = "";
159
+ cparams.embedding = false;
160
+ cparams.cont_batching = false;
161
+ cparams.parallel = false;
162
+ cparams.grammar = "";
163
+ cparams.grammar_penalty.clear();
164
+ cparams.antiprompt.clear();
165
+ cparams.lora_adapter.clear();
166
+ cparams.lora_base = "";
167
+ cparams.mul_mat_q = true;
168
+ cparams.f16_kv = true;
169
+ cparams.logits_all = false;
170
+ cparams.vocab_only = false;
171
+ cparams.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
172
+ cparams.rope_scaling_factor = 0.0f;
173
+ cparams.rope_scaling_orig_ctx_len = 0;
174
+ cparams.yarn_ext_factor = -1.0f;
175
+ cparams.yarn_attn_factor = 1.0f;
176
+ cparams.yarn_beta_fast = 32.0f;
177
+ cparams.yarn_beta_slow = 1.0f;
178
+ cparams.yarn_orig_ctx = 0;
179
+ cparams.offload_kqv = true;
180
+ cparams.flash_attn = false;
181
+ cparams.flash_attn_kernel = false;
182
+ cparams.flash_attn_causal = true;
183
+ cparams.mmproj = "";
184
+ cparams.image = "";
185
+ cparams.export = "";
186
+ cparams.export_path = "";
187
+ cparams.seed = -1;
188
+ cparams.n_keep = 0;
189
+ cparams.n_discard = -1;
190
+ cparams.n_draft = 0;
191
+ cparams.n_chunks = -1;
192
+ cparams.n_parallel = 1;
193
+ cparams.n_sequences = 1;
194
+ cparams.p_accept = 0.5f;
195
+ cparams.p_split = 0.1f;
196
+ cparams.n_gqa = 8;
197
+ cparams.rms_norm_eps = 5e-6f;
198
+ cparams.model_alias = "unknown";
199
+ cparams.ubatch_size = 512;
200
+ cparams.ubatch_seq_len_max = 1;
157
201
 
158
202
  // Load model
159
- if (!context->loadModel(cparams)) {
160
- throw_java_exception(env, "java/lang/RuntimeException", "Failed to load model");
161
- return -1;
203
+ LOGI("Attempting to load model from: %s", model_path_str.c_str());
204
+ LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
205
+ cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
206
+
207
+ // Try to load the model
208
+ bool load_success = context->loadModel(cparams);
209
+ if (!load_success) {
210
+ LOGE("Model loading failed for: %s", model_path_str.c_str());
211
+
212
+ // Try with minimal parameters as fallback
213
+ LOGI("Trying with minimal parameters...");
214
+ common_params minimal_params;
215
+ minimal_params.model.path = model_path_str;
216
+ minimal_params.n_ctx = 512;
217
+ minimal_params.n_batch = 256;
218
+ minimal_params.n_gpu_layers = 0;
219
+ minimal_params.use_mmap = true;
220
+ minimal_params.use_mlock = false;
221
+ minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
222
+
223
+ load_success = context->loadModel(minimal_params);
224
+ if (!load_success) {
225
+ LOGE("Model loading failed even with minimal parameters");
226
+ throw_java_exception(env, "java/lang/RuntimeException", "Failed to load model");
227
+ return -1;
228
+ }
162
229
  }
163
230
 
231
+ LOGI("Model loaded successfully: %s", model_path_str.c_str());
232
+
164
233
  // Store context
165
234
  jlong context_id = next_context_id++;
166
235
  contexts[context_id] = std::move(context);
@@ -176,7 +245,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
176
245
  }
177
246
 
178
247
  JNIEXPORT void JNICALL
179
- Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContext(
248
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContextNative(
180
249
  JNIEnv* env, jobject thiz, jlong context_id) {
181
250
 
182
251
  try {
@@ -192,8 +261,8 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContext(
192
261
  }
193
262
 
194
263
  JNIEXPORT jstring JNICALL
195
- Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
196
- JNIEnv* env, jobject thiz, jlong context_id, jstring prompt, jobject params) {
264
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_completionNative(
265
+ JNIEnv* env, jobject thiz, jlong context_id, jstring prompt) {
197
266
 
198
267
  try {
199
268
  auto it = contexts.find(context_id);
@@ -222,7 +291,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
222
291
  }
223
292
 
224
293
  JNIEXPORT void JNICALL
225
- Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
294
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletionNative(
226
295
  JNIEnv* env, jobject thiz, jlong context_id) {
227
296
 
228
297
  try {
@@ -238,7 +307,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
238
307
  }
239
308
 
240
309
  JNIEXPORT jstring JNICALL
241
- Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
310
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChatNative(
242
311
  JNIEnv* env, jobject thiz, jlong context_id, jstring messages, jstring chat_template) {
243
312
 
244
313
  try {
@@ -267,7 +336,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
267
336
  }
268
337
 
269
338
  JNIEXPORT jboolean JNICALL
270
- Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
339
+ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLogNative(
271
340
  JNIEnv* env, jobject thiz, jboolean enabled) {
272
341
 
273
342
  try {
@@ -281,6 +350,8 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
281
350
  }
282
351
  }
283
352
 
353
+
354
+
284
355
  } // extern "C"
285
356
 
286
357
  } // namespace jni_utils
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llama-cpp-capacitor",
3
- "version": "0.0.8",
3
+ "version": "0.0.10",
4
4
  "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
5
5
  "main": "dist/plugin.cjs.js",
6
6
  "module": "dist/esm/index.js",