llama-cpp-capacitor 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +81 -10
- package/package.json +1 -1
package/android/src/main/jni.cpp
CHANGED
|
@@ -134,7 +134,7 @@ static jlong next_context_id = 1;
|
|
|
134
134
|
extern "C" {
|
|
135
135
|
|
|
136
136
|
JNIEXPORT jlong JNICALL
|
|
137
|
-
|
|
137
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_initContextNative(
|
|
138
138
|
JNIEnv* env, jobject thiz, jstring model_path, jobject params) {
|
|
139
139
|
|
|
140
140
|
try {
|
|
@@ -154,13 +154,82 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
|
|
|
154
154
|
cparams.use_mmap = true;
|
|
155
155
|
cparams.use_mlock = false;
|
|
156
156
|
cparams.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
157
|
+
cparams.ctx_shift = false;
|
|
158
|
+
cparams.chat_template = "";
|
|
159
|
+
cparams.embedding = false;
|
|
160
|
+
cparams.cont_batching = false;
|
|
161
|
+
cparams.parallel = false;
|
|
162
|
+
cparams.grammar = "";
|
|
163
|
+
cparams.grammar_penalty.clear();
|
|
164
|
+
cparams.antiprompt.clear();
|
|
165
|
+
cparams.lora_adapter.clear();
|
|
166
|
+
cparams.lora_base = "";
|
|
167
|
+
cparams.mul_mat_q = true;
|
|
168
|
+
cparams.f16_kv = true;
|
|
169
|
+
cparams.logits_all = false;
|
|
170
|
+
cparams.vocab_only = false;
|
|
171
|
+
cparams.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
|
172
|
+
cparams.rope_scaling_factor = 0.0f;
|
|
173
|
+
cparams.rope_scaling_orig_ctx_len = 0;
|
|
174
|
+
cparams.yarn_ext_factor = -1.0f;
|
|
175
|
+
cparams.yarn_attn_factor = 1.0f;
|
|
176
|
+
cparams.yarn_beta_fast = 32.0f;
|
|
177
|
+
cparams.yarn_beta_slow = 1.0f;
|
|
178
|
+
cparams.yarn_orig_ctx = 0;
|
|
179
|
+
cparams.offload_kqv = true;
|
|
180
|
+
cparams.flash_attn = false;
|
|
181
|
+
cparams.flash_attn_kernel = false;
|
|
182
|
+
cparams.flash_attn_causal = true;
|
|
183
|
+
cparams.mmproj = "";
|
|
184
|
+
cparams.image = "";
|
|
185
|
+
cparams.export = "";
|
|
186
|
+
cparams.export_path = "";
|
|
187
|
+
cparams.seed = -1;
|
|
188
|
+
cparams.n_keep = 0;
|
|
189
|
+
cparams.n_discard = -1;
|
|
190
|
+
cparams.n_draft = 0;
|
|
191
|
+
cparams.n_chunks = -1;
|
|
192
|
+
cparams.n_parallel = 1;
|
|
193
|
+
cparams.n_sequences = 1;
|
|
194
|
+
cparams.p_accept = 0.5f;
|
|
195
|
+
cparams.p_split = 0.1f;
|
|
196
|
+
cparams.n_gqa = 8;
|
|
197
|
+
cparams.rms_norm_eps = 5e-6f;
|
|
198
|
+
cparams.model_alias = "unknown";
|
|
199
|
+
cparams.ubatch_size = 512;
|
|
200
|
+
cparams.ubatch_seq_len_max = 1;
|
|
157
201
|
|
|
158
202
|
// Load model
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
203
|
+
LOGI("Attempting to load model from: %s", model_path_str.c_str());
|
|
204
|
+
LOGI("Model parameters: n_ctx=%d, n_batch=%d, n_gpu_layers=%d",
|
|
205
|
+
cparams.n_ctx, cparams.n_batch, cparams.n_gpu_layers);
|
|
206
|
+
|
|
207
|
+
// Try to load the model
|
|
208
|
+
bool load_success = context->loadModel(cparams);
|
|
209
|
+
if (!load_success) {
|
|
210
|
+
LOGE("Model loading failed for: %s", model_path_str.c_str());
|
|
211
|
+
|
|
212
|
+
// Try with minimal parameters as fallback
|
|
213
|
+
LOGI("Trying with minimal parameters...");
|
|
214
|
+
common_params minimal_params;
|
|
215
|
+
minimal_params.model.path = model_path_str;
|
|
216
|
+
minimal_params.n_ctx = 512;
|
|
217
|
+
minimal_params.n_batch = 256;
|
|
218
|
+
minimal_params.n_gpu_layers = 0;
|
|
219
|
+
minimal_params.use_mmap = true;
|
|
220
|
+
minimal_params.use_mlock = false;
|
|
221
|
+
minimal_params.numa = LM_GGML_NUMA_STRATEGY_DISABLED;
|
|
222
|
+
|
|
223
|
+
load_success = context->loadModel(minimal_params);
|
|
224
|
+
if (!load_success) {
|
|
225
|
+
LOGE("Model loading failed even with minimal parameters");
|
|
226
|
+
throw_java_exception(env, "java/lang/RuntimeException", "Failed to load model");
|
|
227
|
+
return -1;
|
|
228
|
+
}
|
|
162
229
|
}
|
|
163
230
|
|
|
231
|
+
LOGI("Model loaded successfully: %s", model_path_str.c_str());
|
|
232
|
+
|
|
164
233
|
// Store context
|
|
165
234
|
jlong context_id = next_context_id++;
|
|
166
235
|
contexts[context_id] = std::move(context);
|
|
@@ -176,7 +245,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_initContext(
|
|
|
176
245
|
}
|
|
177
246
|
|
|
178
247
|
JNIEXPORT void JNICALL
|
|
179
|
-
|
|
248
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContextNative(
|
|
180
249
|
JNIEnv* env, jobject thiz, jlong context_id) {
|
|
181
250
|
|
|
182
251
|
try {
|
|
@@ -192,8 +261,8 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_releaseContext(
|
|
|
192
261
|
}
|
|
193
262
|
|
|
194
263
|
JNIEXPORT jstring JNICALL
|
|
195
|
-
|
|
196
|
-
JNIEnv* env, jobject thiz, jlong context_id, jstring prompt
|
|
264
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_completionNative(
|
|
265
|
+
JNIEnv* env, jobject thiz, jlong context_id, jstring prompt) {
|
|
197
266
|
|
|
198
267
|
try {
|
|
199
268
|
auto it = contexts.find(context_id);
|
|
@@ -222,7 +291,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_completion(
|
|
|
222
291
|
}
|
|
223
292
|
|
|
224
293
|
JNIEXPORT void JNICALL
|
|
225
|
-
|
|
294
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletionNative(
|
|
226
295
|
JNIEnv* env, jobject thiz, jlong context_id) {
|
|
227
296
|
|
|
228
297
|
try {
|
|
@@ -238,7 +307,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_stopCompletion(
|
|
|
238
307
|
}
|
|
239
308
|
|
|
240
309
|
JNIEXPORT jstring JNICALL
|
|
241
|
-
|
|
310
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChatNative(
|
|
242
311
|
JNIEnv* env, jobject thiz, jlong context_id, jstring messages, jstring chat_template) {
|
|
243
312
|
|
|
244
313
|
try {
|
|
@@ -267,7 +336,7 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_getFormattedChat(
|
|
|
267
336
|
}
|
|
268
337
|
|
|
269
338
|
JNIEXPORT jboolean JNICALL
|
|
270
|
-
|
|
339
|
+
Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLogNative(
|
|
271
340
|
JNIEnv* env, jobject thiz, jboolean enabled) {
|
|
272
341
|
|
|
273
342
|
try {
|
|
@@ -281,6 +350,8 @@ Java_ai_annadata_plugin_capacitor_LlamaCpp_toggleNativeLog(
|
|
|
281
350
|
}
|
|
282
351
|
}
|
|
283
352
|
|
|
353
|
+
|
|
354
|
+
|
|
284
355
|
} // extern "C"
|
|
285
356
|
|
|
286
357
|
} // namespace jni_utils
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llama-cpp-capacitor",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
|
|
5
5
|
"main": "dist/plugin.cjs.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|