llama-cpp-capacitor 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +72 -3
- package/package.json +1 -1
package/android/src/main/jni.cpp
CHANGED
|
@@ -154,13 +154,82 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
|
154
154
|
cparams.use_mmap = true;
|
|
155
155
|
cparams.use_mlock = false;
|
|
156
156
|
cparams.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
157
|
+
cparams.ctx_shift = false;
|
|
158
|
+
cparams.chat_template = "";
|
|
159
|
+
cparams.embedding = false;
|
|
160
|
+
cparams.cont_batching = false;
|
|
161
|
+
cparams.parallel = false;
|
|
162
|
+
cparams.grammar = "";
|
|
163
|
+
cparams.grammar_penalty.clear();
|
|
164
|
+
cparams.antiprompt.clear();
|
|
165
|
+
cparams.lora_adapter.clear();
|
|
166
|
+
cparams.lora_base = "";
|
|
167
|
+
cparams.mul_mat_q = true;
|
|
168
|
+
cparams.f16_kv = true;
|
|
169
|
+
cparams.logits_all = false;
|
|
170
|
+
cparams.vocab_only = false;
|
|
171
|
+
cparams.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
|
172
|
+
cparams.rope_scaling_factor = 0.0f;
|
|
173
|
+
cparams.rope_scaling_orig_ctx_len = 0;
|
|
174
|
+
cparams.yarn_ext_factor = -1.0f;
|
|
175
|
+
cparams.yarn_attn_factor = 1.0f;
|
|
176
|
+
cparams.yarn_beta_fast = 32.0f;
|
|
177
|
+
cparams.yarn_beta_slow = 1.0f;
|
|
178
|
+
cparams.yarn_orig_ctx = 0;
|
|
179
|
+
cparams.offload_kqv = true;
|
|
180
|
+
cparams.flash_attn = false;
|
|
181
|
+
cparams.flash_attn_kernel = false;
|
|
182
|
+
cparams.flash_attn_causal = true;
|
|
183
|
+
cparams.mmproj = "";
|
|
184
|
+
cparams.image = "";
|
|
185
|
+
cparams.export = "";
|
|
186
|
+
cparams.export_path = "";
|
|
187
|
+
cparams.seed = -1;
|
|
188
|
+
cparams.n_keep = 0;
|
|
189
|
+
cparams.n_discard = -1;
|
|
190
|
+
cparams.n_draft = 0;
|
|
191
|
+
cparams.n_chunks = -1;
|
|
192
|
+
cparams.n_parallel = 1;
|
|
193
|
+
cparams.n_sequences = 1;
|
|
194
|
+
cparams.p_accept = 0.5f;
|
|
195
|
+
cparams.p_split = 0.1f;
|
|
196
|
+
cparams.n_gqa = 8;
|
|
197
|
+
cparams.rms_norm_eps = 5e-6f;
|
|
198
|
+
cparams.model_alias = "unknown";
|
|
199
|
+
cparams.ubatch_size = 512;
|
|
200
|
+
cparams.ubatch_seq_len_max = 1;
|
|
157
201
|
|
|
158
202
|
// Load model
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
203
|
+
LOGI("Attempting to load model from: %s", model_path_str.c_str());
|
|
204
|
+
LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
|
|
205
|
+
cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
|
|
206
|
+
|
|
207
|
+
// Try to load the model
|
|
208
|
+
bool load_success = context->loadModel(cparams);
|
|
209
|
+
if (!load_success) {
|
|
210
|
+
LOGE("Model loading failed for: %s", model_path_str.c_str());
|
|
211
|
+
|
|
212
|
+
// Try with minimal parameters as fallback
|
|
213
|
+
LOGI("Trying with minimal parameters...");
|
|
214
|
+
common_params minimal_params;
|
|
215
|
+
minimal_params.model.path = model_path_str;
|
|
216
|
+
minimal_params.n_ctx = 512;
|
|
217
|
+
minimal_params.n_batch = 256;
|
|
218
|
+
minimal_params.n_gpu_layers = 0;
|
|
219
|
+
minimal_params.use_mmap = true;
|
|
220
|
+
minimal_params.use_mlock = false;
|
|
221
|
+
minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
222
|
+
|
|
223
|
+
load_success = context->loadModel(minimal_params);
|
|
224
|
+
if (!load_success) {
|
|
225
|
+
LOGE("Model loading failed even with minimal parameters");
|
|
226
|
+
throw_java_exception(env, "java/lang/RuntimeException", "Failed to load model");
|
|
227
|
+
return -1;
|
|
228
|
+
}
|
|
162
229
|
}
|
|
163
230
|
|
|
231
|
+
LOGI("Model loaded successfully: %s", model_path_str.c_str());
|
|
232
|
+
|
|
164
233
|
// Store context
|
|
165
234
|
jlong context_id = next_context_id++;
|
|
166
235
|
contexts[context_id] = std::move(context);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|