llama-cpp-capacitor 0.0.10 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +196 -21
- package/package.json +1 -1
package/android/src/main/jni.cpp
CHANGED
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
#include <android/log.h>
|
|
4
4
|
#include <cstring>
|
|
5
5
|
#include <memory>
|
|
6
|
+
#include <fstream> // Added for file existence and size checks
|
|
7
|
+
#include <signal.h> // Added for signal handling
|
|
8
|
+
#include <sys/signal.h> // Added for sigaction
|
|
6
9
|
|
|
7
10
|
// Add missing symbol
|
|
8
11
|
namespace rnllama {
|
|
@@ -139,13 +142,92 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
139
142
|
|
|
140
143
|
try {
|
|
141
144
|
std::string model_path_str = jstring_to_string(env, model_path);
|
|
145
|
+
LOGI("Attempting to load model from path: %s", model_path_str.c_str());
|
|
146
|
+
|
|
147
|
+
// List all possible paths we should check
|
|
148
|
+
std::vector<std::string> paths_to_check = {
|
|
149
|
+
model_path_str,
|
|
150
|
+
"/data/data/ai.annadata.app/files/" + model_path_str,
|
|
151
|
+
"/data/data/ai.annadata.app/files/Documents/" + model_path_str,
|
|
152
|
+
"/storage/emulated/0/Android/data/ai.annadata.app/files/" + model_path_str,
|
|
153
|
+
"/storage/emulated/0/Android/data/ai.annadata.app/files/Documents/" + model_path_str,
|
|
154
|
+
"/storage/emulated/0/Documents/" + model_path_str
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
// Check each path and log what we find
|
|
158
|
+
std::string full_model_path;
|
|
159
|
+
bool file_found = false;
|
|
142
160
|
|
|
161
|
+
for (const auto& path : paths_to_check) {
|
|
162
|
+
LOGI("Checking path: %s", path.c_str());
|
|
163
|
+
std::ifstream file_check(path);
|
|
164
|
+
if (file_check.good()) {
|
|
165
|
+
file_check.seekg(0, std::ios::end);
|
|
166
|
+
std::streamsize file_size = file_check.tellg();
|
|
167
|
+
file_check.close();
|
|
168
|
+
LOGI("Found file at: %s, size: %ld bytes", path.c_str(), file_size);
|
|
169
|
+
|
|
170
|
+
// Validate file size
|
|
171
|
+
if (file_size < 1024 * 1024) { // Less than 1MB
|
|
172
|
+
LOGE("Model file is too small, likely corrupted: %s", path.c_str());
|
|
173
|
+
continue; // Try next path
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Check if it's a valid GGUF file by reading the magic number
|
|
177
|
+
std::ifstream magic_file(path, std::ios::binary);
|
|
178
|
+
if (magic_file.good()) {
|
|
179
|
+
char magic[4];
|
|
180
|
+
if (magic_file.read(magic, 4)) {
|
|
181
|
+
if (magic[0] == 'G' && magic[1] == 'G' && magic[2] == 'U' && magic[3] == 'F') {
|
|
182
|
+
LOGI("Valid GGUF file detected at: %s", path.c_str());
|
|
183
|
+
full_model_path = path;
|
|
184
|
+
file_found = true;
|
|
185
|
+
break;
|
|
186
|
+
} else {
|
|
187
|
+
LOGI("File does not appear to be a GGUF file (magic: %c%c%c%c) at: %s",
|
|
188
|
+
magic[0], magic[1], magic[2], magic[3], path.c_str());
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
magic_file.close();
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
LOGI("File not found at: %s", path.c_str());
|
|
195
|
+
}
|
|
196
|
+
file_check.close();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (!file_found) {
|
|
200
|
+
LOGE("Model file not found in any of the checked paths");
|
|
201
|
+
throw_java_exception(env, "java/lang/RuntimeException", "Model file not found in any expected location");
|
|
202
|
+
return -1;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Additional model validation
|
|
206
|
+
LOGI("Performing additional model validation...");
|
|
207
|
+
std::ifstream validation_file(full_model_path, std::ios::binary);
|
|
208
|
+
if (validation_file.good()) {
|
|
209
|
+
// Read first 8 bytes to check GGUF version
|
|
210
|
+
char header[8];
|
|
211
|
+
if (validation_file.read(header, 8)) {
|
|
212
|
+
uint32_t version = *reinterpret_cast<uint32_t*>(header + 4);
|
|
213
|
+
LOGI("GGUF version: %u", version);
|
|
214
|
+
|
|
215
|
+
// Check if version is reasonable (should be > 0 and < 1000)
|
|
216
|
+
if (version == 0 || version > 1000) {
|
|
217
|
+
LOGE("Suspicious GGUF version: %u", version);
|
|
218
|
+
LOGI("This might indicate a corrupted or incompatible model file");
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
validation_file.close();
|
|
222
|
+
}
|
|
223
|
+
|
|
143
224
|
// Create new context
|
|
144
225
|
auto context = std::make_unique<rnllama::llama_rn_context>();
|
|
226
|
+
LOGI("Created llama_rn_context");
|
|
145
227
|
|
|
146
228
|
// Initialize common parameters
|
|
147
229
|
common_params cparams;
|
|
148
|
-
cparams.model.path =
|
|
230
|
+
cparams.model.path = full_model_path;
|
|
149
231
|
cparams.n_ctx = 2048;
|
|
150
232
|
cparams.n_batch = 512;
|
|
151
233
|
cparams.n_gpu_layers = 0;
|
|
@@ -199,42 +281,135 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
199
281
|
cparams.ubatch_size = 512;
|
|
200
282
|
cparams.ubatch_seq_len_max = 1;
|
|
201
283
|
|
|
202
|
-
|
|
203
|
-
LOGI("Attempting to load model from: %s", model_path_str.c_str());
|
|
284
|
+
LOGI("Initialized common parameters, attempting to load model from: %s", full_model_path.c_str());
|
|
204
285
|
LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
|
|
205
286
|
cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
|
|
206
287
|
|
|
207
|
-
// Try to load the model
|
|
208
|
-
bool load_success =
|
|
288
|
+
// Try to load the model with error handling and signal protection
|
|
289
|
+
bool load_success = false;
|
|
290
|
+
|
|
291
|
+
// Set up signal handler to catch segmentation faults
|
|
292
|
+
struct sigaction old_action;
|
|
293
|
+
struct sigaction new_action;
|
|
294
|
+
new_action.sa_handler = [](int sig) {
|
|
295
|
+
LOGE("Segmentation fault caught during model loading");
|
|
296
|
+
// Restore default handler and re-raise signal
|
|
297
|
+
signal(sig, SIG_DFL);
|
|
298
|
+
raise(sig);
|
|
299
|
+
};
|
|
300
|
+
new_action.sa_flags = SA_RESETHAND;
|
|
301
|
+
sigemptyset(&new_action.sa_mask);
|
|
302
|
+
|
|
303
|
+
if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
|
|
304
|
+
LOGI("Signal handler installed for segmentation fault protection");
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
try {
|
|
308
|
+
LOGI("Attempting to load model with standard parameters...");
|
|
309
|
+
load_success = context->loadModel(cparams);
|
|
310
|
+
} catch (const std::exception& e) {
|
|
311
|
+
LOGE("Exception during model loading: %s", e.what());
|
|
312
|
+
load_success = false;
|
|
313
|
+
} catch (...) {
|
|
314
|
+
LOGE("Unknown exception during model loading");
|
|
315
|
+
load_success = false;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Restore original signal handler
|
|
319
|
+
sigaction(SIGSEGV, &old_action, nullptr);
|
|
320
|
+
|
|
209
321
|
if (!load_success) {
|
|
210
|
-
LOGE("
|
|
322
|
+
LOGE("context->loadModel() returned false - model loading failed");
|
|
323
|
+
|
|
324
|
+
// Try with ultra-minimal parameters as fallback
|
|
325
|
+
LOGI("Trying with ultra-minimal parameters...");
|
|
326
|
+
common_params ultra_minimal_params;
|
|
327
|
+
ultra_minimal_params.model.path = full_model_path;
|
|
328
|
+
ultra_minimal_params.n_ctx = 256; // Very small context
|
|
329
|
+
ultra_minimal_params.n_batch = 128; // Very small batch
|
|
330
|
+
ultra_minimal_params.n_gpu_layers = 0;
|
|
331
|
+
ultra_minimal_params.use_mmap = false; // Disable mmap to avoid memory issues
|
|
332
|
+
ultra_minimal_params.use_mlock = false;
|
|
333
|
+
ultra_minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
334
|
+
ultra_minimal_params.ctx_shift = false;
|
|
335
|
+
ultra_minimal_params.chat_template = "";
|
|
336
|
+
ultra_minimal_params.embedding = false;
|
|
337
|
+
ultra_minimal_params.cont_batching = false;
|
|
338
|
+
ultra_minimal_params.parallel = false;
|
|
339
|
+
ultra_minimal_params.grammar = "";
|
|
340
|
+
ultra_minimal_params.grammar_penalty.clear();
|
|
341
|
+
ultra_minimal_params.antiprompt.clear();
|
|
342
|
+
ultra_minimal_params.lora_adapter.clear();
|
|
343
|
+
ultra_minimal_params.lora_base = "";
|
|
344
|
+
ultra_minimal_params.mul_mat_q = false; // Disable quantized matrix multiplication
|
|
345
|
+
ultra_minimal_params.f16_kv = false; // Disable f16 key-value cache
|
|
346
|
+
ultra_minimal_params.logits_all = false;
|
|
347
|
+
ultra_minimal_params.vocab_only = false;
|
|
348
|
+
ultra_minimal_params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
|
349
|
+
ultra_minimal_params.rope_scaling_factor = 0.0f;
|
|
350
|
+
ultra_minimal_params.rope_scaling_orig_ctx_len = 0;
|
|
351
|
+
ultra_minimal_params.yarn_ext_factor = -1.0f;
|
|
352
|
+
ultra_minimal_params.yarn_attn_factor = 1.0f;
|
|
353
|
+
ultra_minimal_params.yarn_beta_fast = 32.0f;
|
|
354
|
+
ultra_minimal_params.yarn_beta_slow = 1.0f;
|
|
355
|
+
ultra_minimal_params.yarn_orig_ctx = 0;
|
|
356
|
+
ultra_minimal_params.offload_kqv = false; // Disable offloading
|
|
357
|
+
ultra_minimal_params.flash_attn = false;
|
|
358
|
+
ultra_minimal_params.flash_attn_kernel = false;
|
|
359
|
+
ultra_minimal_params.flash_attn_causal = true;
|
|
360
|
+
ultra_minimal_params.mmproj = "";
|
|
361
|
+
ultra_minimal_params.image = "";
|
|
362
|
+
ultra_minimal_params.export = "";
|
|
363
|
+
ultra_minimal_params.export_path = "";
|
|
364
|
+
ultra_minimal_params.seed = -1;
|
|
365
|
+
ultra_minimal_params.n_keep = 0;
|
|
366
|
+
ultra_minimal_params.n_discard = -1;
|
|
367
|
+
ultra_minimal_params.n_draft = 0;
|
|
368
|
+
ultra_minimal_params.n_chunks = -1;
|
|
369
|
+
ultra_minimal_params.n_parallel = 1;
|
|
370
|
+
ultra_minimal_params.n_sequences = 1;
|
|
371
|
+
ultra_minimal_params.p_accept = 0.5f;
|
|
372
|
+
ultra_minimal_params.p_split = 0.1f;
|
|
373
|
+
ultra_minimal_params.n_gqa = 8;
|
|
374
|
+
ultra_minimal_params.rms_norm_eps = 5e-6f;
|
|
375
|
+
ultra_minimal_params.model_alias = "unknown";
|
|
376
|
+
ultra_minimal_params.ubatch_size = 128;
|
|
377
|
+
ultra_minimal_params.ubatch_seq_len_max = 1;
|
|
378
|
+
|
|
379
|
+
// Set up signal handler again for ultra-minimal attempt
|
|
380
|
+
if (sigaction(SIGSEGV, &new_action, &old_action) == 0) {
|
|
381
|
+
LOGI("Signal handler reinstalled for ultra-minimal attempt");
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
try {
|
|
385
|
+
load_success = context->loadModel(ultra_minimal_params);
|
|
386
|
+
} catch (const std::exception& e) {
|
|
387
|
+
LOGE("Exception during ultra-minimal model loading: %s", e.what());
|
|
388
|
+
load_success = false;
|
|
389
|
+
} catch (...) {
|
|
390
|
+
LOGE("Unknown exception during ultra-minimal model loading");
|
|
391
|
+
load_success = false;
|
|
392
|
+
}
|
|
211
393
|
|
|
212
|
-
//
|
|
213
|
-
|
|
214
|
-
common_params minimal_params;
|
|
215
|
-
minimal_params.model.path = model_path_str;
|
|
216
|
-
minimal_params.n_ctx = 512;
|
|
217
|
-
minimal_params.n_batch = 256;
|
|
218
|
-
minimal_params.n_gpu_layers = 0;
|
|
219
|
-
minimal_params.use_mmap = true;
|
|
220
|
-
minimal_params.use_mlock = false;
|
|
221
|
-
minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
394
|
+
// Restore original signal handler
|
|
395
|
+
sigaction(SIGSEGV, &old_action, nullptr);
|
|
222
396
|
|
|
223
|
-
load_success = context->loadModel(minimal_params);
|
|
224
397
|
if (!load_success) {
|
|
225
|
-
LOGE("Model loading failed even with minimal parameters");
|
|
226
|
-
throw_java_exception(env, "java/lang/RuntimeException",
|
|
398
|
+
LOGE("Model loading failed even with ultra-minimal parameters");
|
|
399
|
+
throw_java_exception(env, "java/lang/RuntimeException",
|
|
400
|
+
"Failed to load model - model appears to be corrupted or incompatible with this llama.cpp version. "
|
|
401
|
+
"Try downloading a fresh copy of the model file.");
|
|
227
402
|
return -1;
|
|
228
403
|
}
|
|
229
404
|
}
|
|
230
405
|
|
|
231
|
-
LOGI("Model loaded successfully
|
|
406
|
+
LOGI("Model loaded successfully!");
|
|
232
407
|
|
|
233
408
|
// Store context
|
|
234
409
|
jlong context_id = next_context_id++;
|
|
235
410
|
contexts[context_id] = std::move(context);
|
|
236
411
|
|
|
237
|
-
LOGI("Initialized context %ld with model: %s", context_id,
|
|
412
|
+
LOGI("Initialized context %ld with model: %s", context_id, full_model_path.c_str());
|
|
238
413
|
return context_id;
|
|
239
414
|
|
|
240
415
|
} catch (const std::exception& e) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.13",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|