llama-cpp-capacitor 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +112 -60
- package/package.json +1 -1
package/android/src/main/jni.cpp
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
#include <cstring>
|
|
5
5
|
#include <memory>
|
|
6
6
|
#include <fstream> // Added for file existence and size checks
|
|
7
|
+
#include <signal.h> // Added for signal handling
|
|
8
|
+
#include <sys/signal.h> // Added for sigaction
|
|
7
9
|
|
|
8
10
|
// Add missing symbol
|
|
9
11
|
namespace rnllama {
|
|
@@ -199,6 +201,25 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
199
201
|
throw_java_exception(env, "java/lang/RuntimeException", "Model file not found in any expected location");
|
|
200
202
|
return -1;
|
|
201
203
|
}
|
|
204
|
+
|
|
205
|
+
// Additional model validation
|
|
206
|
+
LOGI("Performing additional model validation...");
|
|
207
|
+
std::ifstream validation_file(full_model_path, std::ios::binary);
|
|
208
|
+
if (validation_file.good()) {
|
|
209
|
+
// Read first 8 bytes to check GGUF version
|
|
210
|
+
char header[8];
|
|
211
|
+
if (validation_file.read(header, 8)) {
|
|
212
|
+
uint32_t version = *reinterpret_cast<uint32_t*>(header + 4);
|
|
213
|
+
LOGI("GGUF version: %u", version);
|
|
214
|
+
|
|
215
|
+
// Check if version is reasonable (should be > 0 and < 1000)
|
|
216
|
+
if (version == 0 || version > 1000) {
|
|
217
|
+
LOGE("Suspicious GGUF version: %u", version);
|
|
218
|
+
LOGI("This might indicate a corrupted or incompatible model file");
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
validation_file.close();
|
|
222
|
+
}
|
|
202
223
|
|
|
203
224
|
// Create new context
|
|
204
225
|
auto context = std::make_unique<rnllama::llama_rn_context>();
|
|
@@ -264,9 +285,27 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
264
285
|
LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
|
|
265
286
|
cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
|
|
266
287
|
|
|
267
|
-
// Try to load the model with error handling
|
|
288
|
+
// Try to load the model with error handling and signal protection
|
|
268
289
|
bool load_success = false;
|
|
290
|
+
|
|
291
|
+
// Set up signal handler to catch segmentation faults
|
|
292
|
+
struct sigaction old_action;
|
|
293
|
+
struct sigaction new_action;
|
|
294
|
+
new_action.sa_handler = [](int sig) {
|
|
295
|
+
LOGE("Segmentation fault caught during model loading");
|
|
296
|
+
// Restore default handler and re-raise signal
|
|
297
|
+
signal(sig, SIG_DFL);
|
|
298
|
+
raise(sig);
|
|
299
|
+
};
|
|
300
|
+
new_action.sa_flags = SA_RESETHAND;
|
|
301
|
+
sigemptyset(&new_action.sa_mask);
|
|
302
|
+
|
|
303
|
+
if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
|
|
304
|
+
LOGI("Signal handler installed for segmentation fault protection");
|
|
305
|
+
}
|
|
306
|
+
|
|
269
307
|
try {
|
|
308
|
+
LOGI("Attempting to load model with standard parameters...");
|
|
270
309
|
load_success = context->loadModel(cparams);
|
|
271
310
|
} catch (const std::exception& e) {
|
|
272
311
|
LOGE("Exception during model loading: %s", e.what());
|
|
@@ -276,77 +315,90 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
276
315
|
load_success = false;
|
|
277
316
|
}
|
|
278
317
|
|
|
318
|
+
// Restore original signal handler
|
|
319
|
+
sigaction(SIGSEGV, &old_action, nullptr);
|
|
320
|
+
|
|
279
321
|
if (!load_success) {
|
|
280
322
|
LOGE("context->loadModel() returned false - model loading failed");
|
|
281
323
|
|
|
282
|
-
// Try with minimal parameters as fallback
|
|
283
|
-
LOGI("Trying with minimal parameters...");
|
|
284
|
-
common_params
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
324
|
+
// Try with ultra-minimal parameters as fallback
|
|
325
|
+
LOGI("Trying with ultra-minimal parameters...");
|
|
326
|
+
common_params ultra_minimal_params;
|
|
327
|
+
ultra_minimal_params.model.path = full_model_path;
|
|
328
|
+
ultra_minimal_params.n_ctx = 256; // Very small context
|
|
329
|
+
ultra_minimal_params.n_batch = 128; // Very small batch
|
|
330
|
+
ultra_minimal_params.n_gpu_layers = 0;
|
|
331
|
+
ultra_minimal_params.use_mmap = false; // Disable mmap to avoid memory issues
|
|
332
|
+
ultra_minimal_params.use_mlock = false;
|
|
333
|
+
ultra_minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
334
|
+
ultra_minimal_params.ctx_shift = false;
|
|
335
|
+
ultra_minimal_params.chat_template = "";
|
|
336
|
+
ultra_minimal_params.embedding = false;
|
|
337
|
+
ultra_minimal_params.cont_batching = false;
|
|
338
|
+
ultra_minimal_params.parallel = false;
|
|
339
|
+
ultra_minimal_params.grammar = "";
|
|
340
|
+
ultra_minimal_params.grammar_penalty.clear();
|
|
341
|
+
ultra_minimal_params.antiprompt.clear();
|
|
342
|
+
ultra_minimal_params.lora_adapter.clear();
|
|
343
|
+
ultra_minimal_params.lora_base = "";
|
|
344
|
+
ultra_minimal_params.mul_mat_q = false; // Disable quantized matrix multiplication
|
|
345
|
+
ultra_minimal_params.f16_kv = false; // Disable f16 key-value cache
|
|
346
|
+
ultra_minimal_params.logits_all = false;
|
|
347
|
+
ultra_minimal_params.vocab_only = false;
|
|
348
|
+
ultra_minimal_params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
|
349
|
+
ultra_minimal_params.rope_scaling_factor = 0.0f;
|
|
350
|
+
ultra_minimal_params.rope_scaling_orig_ctx_len = 0;
|
|
351
|
+
ultra_minimal_params.yarn_ext_factor = -1.0f;
|
|
352
|
+
ultra_minimal_params.yarn_attn_factor = 1.0f;
|
|
353
|
+
ultra_minimal_params.yarn_beta_fast = 32.0f;
|
|
354
|
+
ultra_minimal_params.yarn_beta_slow = 1.0f;
|
|
355
|
+
ultra_minimal_params.yarn_orig_ctx = 0;
|
|
356
|
+
ultra_minimal_params.offload_kqv = false; // Disable offloading
|
|
357
|
+
ultra_minimal_params.flash_attn = false;
|
|
358
|
+
ultra_minimal_params.flash_attn_kernel = false;
|
|
359
|
+
ultra_minimal_params.flash_attn_causal = true;
|
|
360
|
+
ultra_minimal_params.mmproj = "";
|
|
361
|
+
ultra_minimal_params.image = "";
|
|
362
|
+
ultra_minimal_params.export = "";
|
|
363
|
+
ultra_minimal_params.export_path = "";
|
|
364
|
+
ultra_minimal_params.seed = -1;
|
|
365
|
+
ultra_minimal_params.n_keep = 0;
|
|
366
|
+
ultra_minimal_params.n_discard = -1;
|
|
367
|
+
ultra_minimal_params.n_draft = 0;
|
|
368
|
+
ultra_minimal_params.n_chunks = -1;
|
|
369
|
+
ultra_minimal_params.n_parallel = 1;
|
|
370
|
+
ultra_minimal_params.n_sequences = 1;
|
|
371
|
+
ultra_minimal_params.p_accept = 0.5f;
|
|
372
|
+
ultra_minimal_params.p_split = 0.1f;
|
|
373
|
+
ultra_minimal_params.n_gqa = 8;
|
|
374
|
+
ultra_minimal_params.rms_norm_eps = 5e-6f;
|
|
375
|
+
ultra_minimal_params.model_alias = "unknown";
|
|
376
|
+
ultra_minimal_params.ubatch_size = 128;
|
|
377
|
+
ultra_minimal_params.ubatch_seq_len_max = 1;
|
|
378
|
+
|
|
379
|
+
// Set up signal handler again for ultra-minimal attempt
|
|
380
|
+
if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
|
|
381
|
+
LOGI("Signal handler reinstalled for ultra-minimal attempt");
|
|
382
|
+
}
|
|
336
383
|
|
|
337
384
|
try {
|
|
338
|
-
load_success = context->loadModel(
|
|
385
|
+
load_success = context->loadModel(ultra_minimal_params);
|
|
339
386
|
} catch (const std::exception& e) {
|
|
340
|
-
LOGE("Exception during minimal model loading: %s", e.what());
|
|
387
|
+
LOGE("Exception during ultra-minimal model loading: %s", e.what());
|
|
341
388
|
load_success = false;
|
|
342
389
|
} catch (...) {
|
|
343
|
-
LOGE("Unknown exception during minimal model loading");
|
|
390
|
+
LOGE("Unknown exception during ultra-minimal model loading");
|
|
344
391
|
load_success = false;
|
|
345
392
|
}
|
|
346
393
|
|
|
394
|
+
// Restore original signal handler
|
|
395
|
+
sigaction(SIGSEGV, &old_action, nullptr);
|
|
396
|
+
|
|
347
397
|
if (!load_success) {
|
|
348
|
-
LOGE("Model loading failed even with minimal parameters");
|
|
349
|
-
throw_java_exception(env, "java/lang/RuntimeException",
|
|
398
|
+
LOGE("Model loading failed even with ultra-minimal parameters");
|
|
399
|
+
throw_java_exception(env, "java/lang/RuntimeException",
|
|
400
|
+
"Failed to load model - model appears to be corrupted or incompatible with this llama.cpp version. "
|
|
401
|
+
"Try downloading a fresh copy of the model file.");
|
|
350
402
|
return -1;
|
|
351
403
|
}
|
|
352
404
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.13",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|