llama-cpp-capacitor 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,8 @@
4
4
  #include <cstring>
5
5
  #include <memory>
6
6
  #include <fstream> // Added for file existence and size checks
7
+ #include <signal.h> // Added for signal handling
8
+ #include <sys/signal.h> // Added for sigaction
7
9
 
8
10
  // Add missing symbol
9
11
  namespace rnllama {
@@ -199,6 +201,25 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
199
201
  throw_java_exception(env, "java/lang/RuntimeException", "Model file not found in any expected location");
200
202
  return -1;
201
203
  }
204
+
205
+ // Additional model validation
206
+ LOGI("Performing additional model validation...");
207
+ std::ifstream validation_file(full_model_path, std::ios::binary);
208
+ if (validation_file.good()) {
209
+ // Read first 8 bytes to check GGUF version
210
+ char header[8];
211
+ if (validation_file.read(header, 8)) {
212
+ uint32_t version = *reinterpret_cast<uint32_t*>(header + 4);
213
+ LOGI("GGUF version: %u", version);
214
+
215
+ // Check if version is reasonable (should be > 0 and < 1000)
216
+ if (version == 0 || version > 1000) {
217
+ LOGE("Suspicious GGUF version: %u", version);
218
+ LOGI("This might indicate a corrupted or incompatible model file");
219
+ }
220
+ }
221
+ validation_file.close();
222
+ }
202
223
 
203
224
  // Create new context
204
225
  auto context = std::make_unique<rnllama::llama_rn_context>();
@@ -264,9 +285,27 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
264
285
  LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
265
286
  cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
266
287
 
267
- // Try to load the model with error handling
288
+ // Try to load the model with error handling and signal protection
268
289
  bool load_success = false;
290
+
291
+ // Set up signal handler to catch segmentation faults
292
+ struct sigaction old_action;
293
+ struct sigaction new_action;
294
+ new_action.sa_handler = [](int sig) {
295
+ LOGE("Segmentation fault caught during model loading");
296
+ // Restore default handler and re-raise signal
297
+ signal(sig, SIG_DFL);
298
+ raise(sig);
299
+ };
300
+ new_action.sa_flags = SA_RESETHAND;
301
+ sigemptyset(&new_action.sa_mask);
302
+
303
+ if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
304
+ LOGI("Signal handler installed for segmentation fault protection");
305
+ }
306
+
269
307
  try {
308
+ LOGI("Attempting to load model with standard parameters...");
270
309
  load_success = context->loadModel(cparams);
271
310
  } catch (const std::exception& e) {
272
311
  LOGE("Exception during model loading: %s", e.what());
@@ -276,77 +315,90 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
276
315
  load_success = false;
277
316
  }
278
317
 
318
+ // Restore original signal handler
319
+ sigaction(SIGSEGV, &old_action, nullptr);
320
+
279
321
  if (!load_success) {
280
322
  LOGE("context->loadModel() returned false - model loading failed");
281
323
 
282
- // Try with minimal parameters as fallback
283
- LOGI("Trying with minimal parameters...");
284
- common_params minimal_params;
285
- minimal_params.model.path = full_model_path;
286
- minimal_params.n_ctx = 512;
287
- minimal_params.n_batch = 256;
288
- minimal_params.n_gpu_layers = 0;
289
- minimal_params.use_mmap = true;
290
- minimal_params.use_mlock = false;
291
- minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
292
- minimal_params.ctx_shift = false;
293
- minimal_params.chat_template = "";
294
- minimal_params.embedding = false;
295
- minimal_params.cont_batching = false;
296
- minimal_params.parallel = false;
297
- minimal_params.grammar = "";
298
- minimal_params.grammar_penalty.clear();
299
- minimal_params.antiprompt.clear();
300
- minimal_params.lora_adapter.clear();
301
- minimal_params.lora_base = "";
302
- minimal_params.mul_mat_q = true;
303
- minimal_params.f16_kv = true;
304
- minimal_params.logits_all = false;
305
- minimal_params.vocab_only = false;
306
- minimal_params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
307
- minimal_params.rope_scaling_factor = 0.0f;
308
- minimal_params.rope_scaling_orig_ctx_len = 0;
309
- minimal_params.yarn_ext_factor = -1.0f;
310
- minimal_params.yarn_attn_factor = 1.0f;
311
- minimal_params.yarn_beta_fast = 32.0f;
312
- minimal_params.yarn_beta_slow = 1.0f;
313
- minimal_params.yarn_orig_ctx = 0;
314
- minimal_params.offload_kqv = true;
315
- minimal_params.flash_attn = false;
316
- minimal_params.flash_attn_kernel = false;
317
- minimal_params.flash_attn_causal = true;
318
- minimal_params.mmproj = "";
319
- minimal_params.image = "";
320
- minimal_params.export = "";
321
- minimal_params.export_path = "";
322
- minimal_params.seed = -1;
323
- minimal_params.n_keep = 0;
324
- minimal_params.n_discard = -1;
325
- minimal_params.n_draft = 0;
326
- minimal_params.n_chunks = -1;
327
- minimal_params.n_parallel = 1;
328
- minimal_params.n_sequences = 1;
329
- minimal_params.p_accept = 0.5f;
330
- minimal_params.p_split = 0.1f;
331
- minimal_params.n_gqa = 8;
332
- minimal_params.rms_norm_eps = 5e-6f;
333
- minimal_params.model_alias = "unknown";
334
- minimal_params.ubatch_size = 256;
335
- minimal_params.ubatch_seq_len_max = 1;
324
+ // Try with ultra-minimal parameters as fallback
325
+ LOGI("Trying with ultra-minimal parameters...");
326
+ common_params ultra_minimal_params;
327
+ ultra_minimal_params.model.path = full_model_path;
328
+ ultra_minimal_params.n_ctx = 256; // Very small context
329
+ ultra_minimal_params.n_batch = 128; // Very small batch
330
+ ultra_minimal_params.n_gpu_layers = 0;
331
+ ultra_minimal_params.use_mmap = false; // Disable mmap to avoid memory issues
332
+ ultra_minimal_params.use_mlock = false;
333
+ ultra_minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
334
+ ultra_minimal_params.ctx_shift = false;
335
+ ultra_minimal_params.chat_template = "";
336
+ ultra_minimal_params.embedding = false;
337
+ ultra_minimal_params.cont_batching = false;
338
+ ultra_minimal_params.parallel = false;
339
+ ultra_minimal_params.grammar = "";
340
+ ultra_minimal_params.grammar_penalty.clear();
341
+ ultra_minimal_params.antiprompt.clear();
342
+ ultra_minimal_params.lora_adapter.clear();
343
+ ultra_minimal_params.lora_base = "";
344
+ ultra_minimal_params.mul_mat_q = false; // Disable quantized matrix multiplication
345
+ ultra_minimal_params.f16_kv = false; // Disable f16 key-value cache
346
+ ultra_minimal_params.logits_all = false;
347
+ ultra_minimal_params.vocab_only = false;
348
+ ultra_minimal_params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
349
+ ultra_minimal_params.rope_scaling_factor = 0.0f;
350
+ ultra_minimal_params.rope_scaling_orig_ctx_len = 0;
351
+ ultra_minimal_params.yarn_ext_factor = -1.0f;
352
+ ultra_minimal_params.yarn_attn_factor = 1.0f;
353
+ ultra_minimal_params.yarn_beta_fast = 32.0f;
354
+ ultra_minimal_params.yarn_beta_slow = 1.0f;
355
+ ultra_minimal_params.yarn_orig_ctx = 0;
356
+ ultra_minimal_params.offload_kqv = false; // Disable offloading
357
+ ultra_minimal_params.flash_attn = false;
358
+ ultra_minimal_params.flash_attn_kernel = false;
359
+ ultra_minimal_params.flash_attn_causal = true;
360
+ ultra_minimal_params.mmproj = "";
361
+ ultra_minimal_params.image = "";
362
+ ultra_minimal_params.export = "";
363
+ ultra_minimal_params.export_path = "";
364
+ ultra_minimal_params.seed = -1;
365
+ ultra_minimal_params.n_keep = 0;
366
+ ultra_minimal_params.n_discard = -1;
367
+ ultra_minimal_params.n_draft = 0;
368
+ ultra_minimal_params.n_chunks = -1;
369
+ ultra_minimal_params.n_parallel = 1;
370
+ ultra_minimal_params.n_sequences = 1;
371
+ ultra_minimal_params.p_accept = 0.5f;
372
+ ultra_minimal_params.p_split = 0.1f;
373
+ ultra_minimal_params.n_gqa = 8;
374
+ ultra_minimal_params.rms_norm_eps = 5e-6f;
375
+ ultra_minimal_params.model_alias = "unknown";
376
+ ultra_minimal_params.ubatch_size = 128;
377
+ ultra_minimal_params.ubatch_seq_len_max = 1;
378
+
379
+ // Set up signal handler again for ultra-minimal attempt
380
+ if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
381
+ LOGI("Signal handler reinstalled for ultra-minimal attempt");
382
+ }
336
383
 
337
384
  try {
338
- load_success = context->loadModel(minimal_params);
385
+ load_success = context->loadModel(ultra_minimal_params);
339
386
  } catch (const std::exception& e) {
340
- LOGE("Exception during minimal model loading: %s", e.what());
387
+ LOGE("Exception during ultra-minimal model loading: %s", e.what());
341
388
  load_success = false;
342
389
  } catch (...) {
343
- LOGE("Unknown exception during minimal model loading");
390
+ LOGE("Unknown exception during ultra-minimal model loading");
344
391
  load_success = false;
345
392
  }
346
393
 
394
+ // Restore original signal handler
395
+ sigaction(SIGSEGV, &old_action, nullptr);
396
+
347
397
  if (!load_success) {
348
- LOGE("Model loading failed even with minimal parameters");
349
- throw_java_exception(env, "java/lang/RuntimeException", "Failed to load model - possible model corruption or incompatibility");
398
+ LOGE("Model loading failed even with ultra-minimal parameters");
399
+ throw_java_exception(env, "java/lang/RuntimeException",
400
+ "Failed to load model - model appears to be corrupted or incompatible with this llama.cpp version. "
401
+ "Try downloading a fresh copy of the model file.");
350
402
  return -1;
351
403
  }
352
404
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llama-cpp-capacitor",
3
- "version": "0.0.12",
3
+ "version": "0.0.13",
4
4
  "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
5
5
  "main": "dist/plugin.cjs.js",
6
6
  "module": "dist/esm/index.js",