llama-cpp-capacitor 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import android.util.Log;
4
4
  import com.getcapacitor.JSObject;
5
5
  import java.util.HashMap;
6
6
  import java.util.Map;
7
+ import java.util.Iterator;
7
8
  import java.util.concurrent.CompletableFuture;
8
9
  import java.io.File;
9
10
  import java.io.FileOutputStream;
@@ -637,6 +638,161 @@ public class LlamaCpp {
637
638
  }
638
639
  }
639
640
 
641
+ // MARK: - Chat-first methods (like llama-cli -sys)
642
+
643
+ public void chat(int contextId, String messagesJson, String system, String chatTemplate, JSObject params, LlamaCallback<Map<String, Object>> callback) {
644
+ LlamaContext context = contexts.get(contextId);
645
+ if (context == null) {
646
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
647
+ return;
648
+ }
649
+
650
+ try {
651
+ Log.i(TAG, "Starting chat for context: " + contextId);
652
+
653
+ // Parse messages JSON
654
+ List<Map<String, Object>> messages = parseMessagesJson(messagesJson);
655
+
656
+ // Add system message if provided
657
+ if (system != null && !system.isEmpty()) {
658
+ Map<String, Object> systemMsg = new HashMap<>();
659
+ systemMsg.put("role", "system");
660
+ systemMsg.put("content", system);
661
+ messages.add(0, systemMsg); // Add system message at the beginning
662
+ }
663
+
664
+ // Convert messages to JSON string for getFormattedChat
665
+ String formattedMessages = convertMessagesToJson(messages);
666
+
667
+ // First, format the chat
668
+ String formattedPrompt = getFormattedChatNative(context.getNativeContextId(), formattedMessages, chatTemplate != null ? chatTemplate : "");
669
+
670
+ // Then run completion with the formatted prompt
671
+ JSObject completionParams = new JSObject();
672
+ completionParams.put("prompt", formattedPrompt);
673
+
674
+ // Copy other parameters from params
675
+ if (params != null) {
676
+ Iterator<String> keyIterator = params.keys();
677
+ while (keyIterator.hasNext()) {
678
+ String key = keyIterator.next();
679
+ if (!key.equals("prompt") && !key.equals("messages")) {
680
+ completionParams.put(key, params.get(key));
681
+ }
682
+ }
683
+ }
684
+
685
+ // Call native completion
686
+ Map<String, Object> result = completionNative(context.getNativeContextId(), completionParams);
687
+
688
+ if (result != null) {
689
+ Log.i(TAG, "Chat completed successfully");
690
+ callback.onResult(LlamaResult.success(result));
691
+ } else {
692
+ Log.e(TAG, "Chat returned null result");
693
+ callback.onResult(LlamaResult.failure(new LlamaError("Chat failed")));
694
+ }
695
+
696
+ } catch (Exception e) {
697
+ Log.e(TAG, "Chat failed: " + e.getMessage());
698
+ callback.onResult(LlamaResult.failure(new LlamaError("Chat failed: " + e.getMessage())));
699
+ }
700
+ }
701
+
702
+ public void chatWithSystem(int contextId, String system, String message, JSObject params, LlamaCallback<Map<String, Object>> callback) {
703
+ try {
704
+ // Create a simple message array
705
+ List<Map<String, Object>> messages = new ArrayList<>();
706
+ Map<String, Object> userMsg = new HashMap<>();
707
+ userMsg.put("role", "user");
708
+ userMsg.put("content", message);
709
+ messages.add(userMsg);
710
+
711
+ // Call the main chat method
712
+ chat(contextId, convertMessagesToJson(messages), system, null, params, callback);
713
+ } catch (Exception e) {
714
+ callback.onResult(LlamaResult.failure(new LlamaError("Chat with system failed: " + e.getMessage())));
715
+ }
716
+ }
717
+
718
+ public void generateText(int contextId, String prompt, JSObject params, LlamaCallback<Map<String, Object>> callback) {
719
+ LlamaContext context = contexts.get(contextId);
720
+ if (context == null) {
721
+ callback.onResult(LlamaResult.failure(new LlamaError("Context not found")));
722
+ return;
723
+ }
724
+
725
+ try {
726
+ Log.i(TAG, "Starting text generation for context: " + contextId);
727
+
728
+ // Create completion parameters
729
+ JSObject completionParams = new JSObject();
730
+ completionParams.put("prompt", prompt);
731
+
732
+ // Copy other parameters from params
733
+ if (params != null) {
734
+ Iterator<String> keyIterator = params.keys();
735
+ while (keyIterator.hasNext()) {
736
+ String key = keyIterator.next();
737
+ if (!key.equals("prompt") && !key.equals("messages")) {
738
+ completionParams.put(key, params.get(key));
739
+ }
740
+ }
741
+ }
742
+
743
+ // Call native completion
744
+ Map<String, Object> result = completionNative(context.getNativeContextId(), completionParams);
745
+
746
+ if (result != null) {
747
+ Log.i(TAG, "Text generation completed successfully");
748
+ callback.onResult(LlamaResult.success(result));
749
+ } else {
750
+ Log.e(TAG, "Text generation returned null result");
751
+ callback.onResult(LlamaResult.failure(new LlamaError("Text generation failed")));
752
+ }
753
+
754
+ } catch (Exception e) {
755
+ Log.e(TAG, "Text generation failed: " + e.getMessage());
756
+ callback.onResult(LlamaResult.failure(new LlamaError("Text generation failed: " + e.getMessage())));
757
+ }
758
+ }
759
+
760
+ // Helper methods for message handling
761
+ private List<Map<String, Object>> parseMessagesJson(String messagesJson) {
762
+ List<Map<String, Object>> messages = new ArrayList<>();
763
+ try {
764
+ // Parse JSON string to extract messages
765
+ org.json.JSONArray jsonArray = new org.json.JSONArray(messagesJson);
766
+ for (int i = 0; i < jsonArray.length(); i++) {
767
+ org.json.JSONObject jsonMessage = jsonArray.getJSONObject(i);
768
+ Map<String, Object> message = new HashMap<>();
769
+ message.put("role", jsonMessage.getString("role"));
770
+ message.put("content", jsonMessage.getString("content"));
771
+ messages.add(message);
772
+ }
773
+ } catch (Exception e) {
774
+ Log.e(TAG, "Error parsing messages JSON: " + e.getMessage());
775
+ // Return empty list on error
776
+ }
777
+ return messages;
778
+ }
779
+
780
+ private String convertMessagesToJson(List<Map<String, Object>> messages) {
781
+ try {
782
+ org.json.JSONArray jsonArray = new org.json.JSONArray();
783
+ for (Map<String, Object> message : messages) {
784
+ org.json.JSONObject jsonMessage = new org.json.JSONObject();
785
+ jsonMessage.put("role", message.get("role"));
786
+ jsonMessage.put("content", message.get("content"));
787
+ jsonArray.put(jsonMessage);
788
+ }
789
+ return jsonArray.toString();
790
+ } catch (Exception e) {
791
+ Log.e(TAG, "Error converting messages to JSON: " + e.getMessage());
792
+ return "[]"; // Return empty array on error
793
+ }
794
+ }
795
+
640
796
  // MARK: - Session management
641
797
 
642
798
  public void loadSession(int contextId, String filepath, LlamaCallback<Map<String, Object>> callback) {
@@ -178,6 +178,69 @@ public class LlamaCppPlugin extends Plugin {
178
178
  });
179
179
  }
180
180
 
181
+ // MARK: - Chat-first methods (like llama-cli -sys)
182
+
183
+ @PluginMethod
184
+ public void chat(PluginCall call) {
185
+ int contextId = call.getInt("contextId", 0);
186
+ JSArray messagesArray = call.getArray("messages", new JSArray());
187
+ String system = call.getString("system");
188
+ String chatTemplate = call.getString("chatTemplate");
189
+ JSObject params = call.getObject("params");
190
+
191
+ try {
192
+ // Convert JSArray to JSON string
193
+ String messagesJson = messagesArray.toString();
194
+
195
+ implementation.chat(contextId, messagesJson, system, chatTemplate, params, result -> {
196
+ if (result.isSuccess()) {
197
+ Map<String, Object> data = result.getData();
198
+ JSObject jsResult = convertMapToJSObject(data);
199
+ call.resolve(jsResult);
200
+ } else {
201
+ call.reject(result.getError().getMessage());
202
+ }
203
+ });
204
+ } catch (Exception e) {
205
+ call.reject("Failed to process chat request: " + e.getMessage());
206
+ }
207
+ }
208
+
209
+ @PluginMethod
210
+ public void chatWithSystem(PluginCall call) {
211
+ int contextId = call.getInt("contextId", 0);
212
+ String system = call.getString("system", "");
213
+ String message = call.getString("message", "");
214
+ JSObject params = call.getObject("params");
215
+
216
+ implementation.chatWithSystem(contextId, system, message, params, result -> {
217
+ if (result.isSuccess()) {
218
+ Map<String, Object> data = result.getData();
219
+ JSObject jsResult = convertMapToJSObject(data);
220
+ call.resolve(jsResult);
221
+ } else {
222
+ call.reject(result.getError().getMessage());
223
+ }
224
+ });
225
+ }
226
+
227
+ @PluginMethod
228
+ public void generateText(PluginCall call) {
229
+ int contextId = call.getInt("contextId", 0);
230
+ String prompt = call.getString("prompt", "");
231
+ JSObject params = call.getObject("params");
232
+
233
+ implementation.generateText(contextId, prompt, params, result -> {
234
+ if (result.isSuccess()) {
235
+ Map<String, Object> data = result.getData();
236
+ JSObject jsResult = convertMapToJSObject(data);
237
+ call.resolve(jsResult);
238
+ } else {
239
+ call.reject(result.getError().getMessage());
240
+ }
241
+ });
242
+ }
243
+
181
244
  // MARK: - Session management
182
245
 
183
246
  @PluginMethod
package/dist/docs.json CHANGED
@@ -137,6 +137,67 @@
137
137
  ],
138
138
  "slug": "completion"
139
139
  },
140
+ {
141
+ "name": "chat",
142
+ "signature": "(options: { contextId: number; messages: LlamaCppOAICompatibleMessage[]; system?: string; chatTemplate?: string; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>; }) => Promise<NativeCompletionResult>",
143
+ "parameters": [
144
+ {
145
+ "name": "options",
146
+ "docs": "",
147
+ "type": "{ contextId: number; messages: LlamaCppOAICompatibleMessage[]; system?: string | undefined; chatTemplate?: string | undefined; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'> | undefined; }"
148
+ }
149
+ ],
150
+ "returns": "Promise<NativeCompletionResult>",
151
+ "tags": [],
152
+ "docs": "",
153
+ "complexTypes": [
154
+ "NativeCompletionResult",
155
+ "LlamaCppOAICompatibleMessage",
156
+ "Omit",
157
+ "NativeCompletionParams"
158
+ ],
159
+ "slug": "chat"
160
+ },
161
+ {
162
+ "name": "chatWithSystem",
163
+ "signature": "(options: { contextId: number; system: string; message: string; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>; }) => Promise<NativeCompletionResult>",
164
+ "parameters": [
165
+ {
166
+ "name": "options",
167
+ "docs": "",
168
+ "type": "{ contextId: number; system: string; message: string; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'> | undefined; }"
169
+ }
170
+ ],
171
+ "returns": "Promise<NativeCompletionResult>",
172
+ "tags": [],
173
+ "docs": "",
174
+ "complexTypes": [
175
+ "NativeCompletionResult",
176
+ "Omit",
177
+ "NativeCompletionParams"
178
+ ],
179
+ "slug": "chatwithsystem"
180
+ },
181
+ {
182
+ "name": "generateText",
183
+ "signature": "(options: { contextId: number; prompt: string; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>; }) => Promise<NativeCompletionResult>",
184
+ "parameters": [
185
+ {
186
+ "name": "options",
187
+ "docs": "",
188
+ "type": "{ contextId: number; prompt: string; params?: Omit<NativeCompletionParams, 'prompt' | 'messages'> | undefined; }"
189
+ }
190
+ ],
191
+ "returns": "Promise<NativeCompletionResult>",
192
+ "tags": [],
193
+ "docs": "",
194
+ "complexTypes": [
195
+ "NativeCompletionResult",
196
+ "Omit",
197
+ "NativeCompletionParams"
198
+ ],
199
+ "slug": "generatetext"
200
+ },
140
201
  {
141
202
  "name": "stopCompletion",
142
203
  "signature": "(options: { contextId: number; }) => Promise<void>",
@@ -4904,6 +4965,68 @@
4904
4965
  }
4905
4966
  ]
4906
4967
  },
4968
+ {
4969
+ "name": "LlamaCppOAICompatibleMessage",
4970
+ "slug": "llamacppoaicompatiblemessage",
4971
+ "docs": "",
4972
+ "tags": [],
4973
+ "methods": [],
4974
+ "properties": [
4975
+ {
4976
+ "name": "role",
4977
+ "tags": [],
4978
+ "docs": "",
4979
+ "complexTypes": [],
4980
+ "type": "string"
4981
+ },
4982
+ {
4983
+ "name": "content",
4984
+ "tags": [],
4985
+ "docs": "",
4986
+ "complexTypes": [
4987
+ "LlamaCppMessagePart"
4988
+ ],
4989
+ "type": "string | LlamaCppMessagePart[] | undefined"
4990
+ }
4991
+ ]
4992
+ },
4993
+ {
4994
+ "name": "LlamaCppMessagePart",
4995
+ "slug": "llamacppmessagepart",
4996
+ "docs": "",
4997
+ "tags": [],
4998
+ "methods": [],
4999
+ "properties": [
5000
+ {
5001
+ "name": "type",
5002
+ "tags": [],
5003
+ "docs": "",
5004
+ "complexTypes": [],
5005
+ "type": "string"
5006
+ },
5007
+ {
5008
+ "name": "text",
5009
+ "tags": [],
5010
+ "docs": "",
5011
+ "complexTypes": [],
5012
+ "type": "string | undefined"
5013
+ },
5014
+ {
5015
+ "name": "image_url",
5016
+ "tags": [],
5017
+ "docs": "",
5018
+ "complexTypes": [],
5019
+ "type": "{ url?: string | undefined; } | undefined"
5020
+ },
5021
+ {
5022
+ "name": "input_audio",
5023
+ "tags": [],
5024
+ "docs": "",
5025
+ "complexTypes": [],
5026
+ "type": "{ format: string; data?: string | undefined; url?: string | undefined; } | undefined"
5027
+ }
5028
+ ]
5029
+ },
4907
5030
  {
4908
5031
  "name": "NativeSessionLoadResult",
4909
5032
  "slug": "nativesessionloadresult",
@@ -5604,6 +5727,51 @@
5604
5727
  ]
5605
5728
  }
5606
5729
  ]
5730
+ },
5731
+ {
5732
+ "name": "Omit",
5733
+ "slug": "omit",
5734
+ "docs": "Construct a type with the properties of T except for those in type K.",
5735
+ "types": [
5736
+ {
5737
+ "text": "Pick<T, Exclude<keyof T, K>>",
5738
+ "complexTypes": [
5739
+ "Pick",
5740
+ "T",
5741
+ "Exclude",
5742
+ "K"
5743
+ ]
5744
+ }
5745
+ ]
5746
+ },
5747
+ {
5748
+ "name": "Pick",
5749
+ "slug": "pick",
5750
+ "docs": "From T, pick a set of properties whose keys are in the union K",
5751
+ "types": [
5752
+ {
5753
+ "text": "{\r\n [P in K]: T[P];\r\n}",
5754
+ "complexTypes": [
5755
+ "K",
5756
+ "T",
5757
+ "P"
5758
+ ]
5759
+ }
5760
+ ]
5761
+ },
5762
+ {
5763
+ "name": "Exclude",
5764
+ "slug": "exclude",
5765
+ "docs": "Exclude from T those types that are assignable to U",
5766
+ "types": [
5767
+ {
5768
+ "text": "T extends U ? never : T",
5769
+ "complexTypes": [
5770
+ "T",
5771
+ "U"
5772
+ ]
5773
+ }
5774
+ ]
5607
5775
  }
5608
5776
  ],
5609
5777
  "pluginConfigs": []
@@ -562,6 +562,24 @@ export interface LlamaCppPlugin {
562
562
  contextId: number;
563
563
  params: NativeCompletionParams;
564
564
  }): Promise<NativeCompletionResult>;
565
+ chat(options: {
566
+ contextId: number;
567
+ messages: LlamaCppOAICompatibleMessage[];
568
+ system?: string;
569
+ chatTemplate?: string;
570
+ params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;
571
+ }): Promise<NativeCompletionResult>;
572
+ chatWithSystem(options: {
573
+ contextId: number;
574
+ system: string;
575
+ message: string;
576
+ params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;
577
+ }): Promise<NativeCompletionResult>;
578
+ generateText(options: {
579
+ contextId: number;
580
+ prompt: string;
581
+ params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;
582
+ }): Promise<NativeCompletionResult>;
565
583
  stopCompletion(options: {
566
584
  contextId: number;
567
585
  }): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"definitions.js","sourceRoot":"","sources":["../../src/definitions.ts"],"names":[],"mappings":"","sourcesContent":["// Native parameter types that match llama.rn exactly\r\nexport interface NativeEmbeddingParams {\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeContextParams {\r\n model: string;\r\n /**\r\n * Chat template to override the default one from the model.\r\n */\r\n chat_template?: string;\r\n\r\n is_model_asset?: boolean;\r\n use_progress_callback?: boolean;\r\n\r\n n_ctx?: number;\r\n n_batch?: number;\r\n n_ubatch?: number;\r\n\r\n n_threads?: number;\r\n\r\n /**\r\n * Path to draft model for speculative decoding (mobile optimization)\r\n */\r\n draft_model?: string;\r\n /**\r\n * Number of tokens to predict speculatively (default: 3 for mobile)\r\n */\r\n speculative_samples?: number;\r\n /**\r\n * Enable mobile-optimized speculative decoding\r\n */\r\n mobile_speculative?: boolean;\r\n\r\n /**\r\n * Number of layers to store in VRAM (Currently only for iOS)\r\n */\r\n n_gpu_layers?: number;\r\n /**\r\n * Skip GPU devices (iOS only)\r\n */\r\n no_gpu_devices?: boolean;\r\n\r\n /**\r\n * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)\r\n */\r\n flash_attn?: boolean;\r\n\r\n /**\r\n * KV cache data type for the K (Experimental in llama.cpp)\r\n */\r\n cache_type_k?: string;\r\n /**\r\n * KV cache data type for the V (Experimental in llama.cpp)\r\n */\r\n cache_type_v?: string;\r\n\r\n use_mlock?: boolean;\r\n use_mmap?: boolean;\r\n vocab_only?: boolean;\r\n\r\n /**\r\n * Single LoRA adapter path\r\n */\r\n lora?: string;\r\n /**\r\n * Single LoRA adapter scale\r\n */\r\n lora_scaled?: number;\r\n /**\r\n * LoRA adapter list\r\n */\r\n lora_list?: Array<{ path: string; scaled?: number }>;\r\n\r\n rope_freq_base?: number;\r\n rope_freq_scale?: number;\r\n\r\n pooling_type?: number;\r\n\r\n /**\r\n * Enable context shifting to handle prompts larger than context size\r\n */\r\n ctx_shift?: boolean;\r\n\r\n /**\r\n * Use a unified buffer across the input sequences when computing the attention.\r\n * Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.\r\n */\r\n kv_unified?: boolean;\r\n\r\n /**\r\n * Use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)\r\n */\r\n swa_full?: boolean;\r\n\r\n /**\r\n * Number of layers to keep MoE weights on CPU\r\n */\r\n n_cpu_moe?: number;\r\n\r\n // Embedding params\r\n embedding?: boolean;\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeCompletionParams {\r\n prompt: string;\r\n n_threads?: number;\r\n /**\r\n * Enable Jinja. Default: true if supported by the model\r\n */\r\n jinja?: boolean;\r\n /**\r\n * JSON schema for convert to grammar for structured JSON output.\r\n * It will be override by grammar if both are set.\r\n */\r\n json_schema?: string;\r\n /**\r\n * Set grammar for grammar-based sampling (GBNF format). Default: no grammar\r\n * This will override json_schema if both are provided.\r\n */\r\n grammar?: string;\r\n /**\r\n * Lazy grammar sampling, trigger by grammar_triggers. Default: false\r\n */\r\n grammar_lazy?: boolean;\r\n /**\r\n * Enable thinking if jinja is enabled. Default: true\r\n */\r\n enable_thinking?: boolean;\r\n /**\r\n * Force thinking to be open. Default: false\r\n */\r\n thinking_forced_open?: boolean;\r\n /**\r\n * Lazy grammar triggers. Default: []\r\n */\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n preserved_tokens?: Array<string>;\r\n chat_format?: number;\r\n reasoning_format?: string;\r\n /**\r\n * Path to an image file to process before generating text.\r\n * When provided, the image will be processed and added to the context.\r\n * Requires multimodal support to be enabled via initMultimodal.\r\n */\r\n media_paths?: Array<string>;\r\n /**\r\n * Specify a JSON array of stopping strings.\r\n * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`\r\n */\r\n stop?: Array<string>;\r\n /**\r\n * Set the maximum number of tokens to predict when generating text.\r\n * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.\r\n * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.\r\n */\r\n n_predict?: number;\r\n /**\r\n * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.\r\n * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.\r\n * Default: `0`\r\n */\r\n n_probs?: number;\r\n /**\r\n * Limit the next token selection to the K most probable tokens. Default: `40`\r\n */\r\n top_k?: number;\r\n /**\r\n * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`\r\n */\r\n top_p?: number;\r\n /**\r\n * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`\r\n */\r\n min_p?: number;\r\n /**\r\n * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.\r\n */\r\n xtc_probability?: number;\r\n /**\r\n * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)\r\n */\r\n xtc_threshold?: number;\r\n /**\r\n * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.\r\n */\r\n typical_p?: number;\r\n /**\r\n * Adjust the randomness of the generated text. Default: `0.8`\r\n */\r\n temperature?: number;\r\n /**\r\n * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.\r\n */\r\n penalty_last_n?: number;\r\n /**\r\n * Control the repetition of token sequences in the generated text. Default: `1.0`\r\n */\r\n penalty_repeat?: number;\r\n /**\r\n * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_freq?: number;\r\n /**\r\n * Repeat alpha presence penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_present?: number;\r\n /**\r\n * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.\r\n */\r\n mirostat?: number;\r\n /**\r\n * Set the Mirostat target entropy, parameter tau. Default: `5.0`\r\n */\r\n mirostat_tau?: number;\r\n /**\r\n * Set the Mirostat learning rate, parameter eta. Default: `0.1`\r\n */\r\n mirostat_eta?: number;\r\n /**\r\n * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.\r\n */\r\n dry_multiplier?: number;\r\n /**\r\n * Set the DRY repetition penalty base value. Default: `1.75`\r\n */\r\n dry_base?: number;\r\n /**\r\n * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`\r\n */\r\n dry_allowed_length?: number;\r\n /**\r\n * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.\r\n */\r\n dry_penalty_last_n?: number;\r\n /**\r\n * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\\n', ':', '\"', '*']`\r\n */\r\n dry_sequence_breakers?: Array<string>;\r\n /**\r\n * Top n sigma sampling as described in academic paper \"Top-nσ: Not All Logits Are You Need\" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)\r\n */\r\n top_n_sigma?: number;\r\n\r\n /**\r\n * Ignore end of stream token and continue generating. Default: `false`\r\n */\r\n ignore_eos?: boolean;\r\n /**\r\n * Modify the likelihood of a token appearing in the generated text completion.\r\n * For example, use `\"logit_bias\": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `\"logit_bias\": [[15043,-1.0]]` to decrease its likelihood.\r\n * Setting the value to false, `\"logit_bias\": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,\r\n * e.g.`[[\"Hello, World!\",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.\r\n * Default: `[]`\r\n */\r\n logit_bias?: Array<Array<number>>;\r\n /**\r\n * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.\r\n */\r\n seed?: number;\r\n\r\n /**\r\n * Guide tokens for the completion.\r\n * Help prevent hallucinations by forcing the TTS to use the correct words.\r\n * Default: `[]`\r\n */\r\n guide_tokens?: Array<number>;\r\n\r\n emit_partial_completion: boolean;\r\n}\r\n\r\nexport interface NativeCompletionTokenProbItem {\r\n tok_str: string;\r\n prob: number;\r\n}\r\n\r\nexport interface NativeCompletionTokenProb {\r\n content: string;\r\n probs: Array<NativeCompletionTokenProbItem>;\r\n}\r\n\r\nexport interface NativeCompletionResultTimings {\r\n prompt_n: number;\r\n prompt_ms: number;\r\n prompt_per_token_ms: number;\r\n prompt_per_second: number;\r\n predicted_n: number;\r\n predicted_ms: number;\r\n predicted_per_token_ms: number;\r\n predicted_per_second: number;\r\n}\r\n\r\nexport interface NativeCompletionResult {\r\n /**\r\n * Original text (Ignored reasoning_content / tool_calls)\r\n */\r\n text: string;\r\n /**\r\n * Reasoning content (parsed for reasoning model)\r\n */\r\n reasoning_content: string;\r\n /**\r\n * Tool calls (parsed from response)\r\n */\r\n tool_calls: Array<{\r\n type: 'function';\r\n function: {\r\n name: string;\r\n arguments: string; // JSON string of arguments\r\n };\r\n id?: string;\r\n }>;\r\n /**\r\n * Content text (Filtered text by reasoning_content / tool_calls)\r\n */\r\n content: string;\r\n\r\n chat_format: number;\r\n\r\n tokens_predicted: number;\r\n tokens_evaluated: number;\r\n truncated: boolean;\r\n stopped_eos: boolean;\r\n stopped_word: string;\r\n stopped_limit: number;\r\n stopping_word: string;\r\n context_full: boolean;\r\n interrupted: boolean;\r\n tokens_cached: number;\r\n timings: NativeCompletionResultTimings;\r\n\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n audio_tokens?: Array<number>;\r\n}\r\n\r\nexport interface NativeTokenizeResult {\r\n tokens: Array<number>;\r\n /**\r\n * Whether the tokenization contains images\r\n */\r\n has_images: boolean;\r\n /**\r\n * Bitmap hashes of the images\r\n */\r\n bitmap_hashes: Array<number>;\r\n /**\r\n * Chunk positions of the text and images\r\n */\r\n chunk_pos: Array<number>;\r\n /**\r\n * Chunk positions of the images\r\n */\r\n chunk_pos_images: Array<number>;\r\n}\r\n\r\nexport interface NativeEmbeddingResult {\r\n embedding: Array<number>;\r\n}\r\n\r\nexport interface NativeLlamaContext {\r\n contextId: number;\r\n model: {\r\n desc: string;\r\n size: number;\r\n nEmbd: number;\r\n nParams: number;\r\n chatTemplates: {\r\n llamaChat: boolean; // Chat template in llama-chat.cpp\r\n minja: {\r\n // Chat template supported by minja.hpp\r\n default: boolean;\r\n defaultCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n toolUse: boolean;\r\n toolUseCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n };\r\n };\r\n metadata: Object;\r\n isChatTemplateSupported: boolean; // Deprecated\r\n };\r\n /**\r\n * Loaded library name for Android\r\n */\r\n androidLib?: string;\r\n gpu: boolean;\r\n reasonNoGPU: string;\r\n}\r\n\r\nexport interface NativeSessionLoadResult {\r\n tokens_loaded: number;\r\n prompt: string;\r\n}\r\n\r\nexport interface NativeLlamaMessagePart {\r\n type: 'text';\r\n text: string;\r\n}\r\n\r\nexport interface NativeLlamaChatMessage {\r\n role: string;\r\n content: string | Array<NativeLlamaMessagePart>;\r\n}\r\n\r\nexport interface FormattedChatResult {\r\n type: 'jinja' | 'llama-chat';\r\n prompt: string;\r\n has_media: boolean;\r\n media_paths?: Array<string>;\r\n}\r\n\r\nexport interface JinjaFormattedChatResult extends FormattedChatResult {\r\n chat_format?: number;\r\n grammar?: string;\r\n grammar_lazy?: boolean;\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n thinking_forced_open?: boolean;\r\n preserved_tokens?: Array<string>;\r\n additional_stops?: Array<string>;\r\n}\r\n\r\nexport interface NativeImageProcessingResult {\r\n success: boolean;\r\n prompt: string;\r\n error?: string;\r\n}\r\n\r\nexport interface NativeRerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface NativeRerankResult {\r\n score: number;\r\n index: number;\r\n}\r\n\r\n// High-level types for the plugin interface\r\nexport interface LlamaCppMessagePart {\r\n type: string;\r\n text?: string;\r\n image_url?: {\r\n url?: string;\r\n };\r\n input_audio?: {\r\n format: string;\r\n data?: string;\r\n url?: string;\r\n };\r\n}\r\n\r\nexport interface LlamaCppOAICompatibleMessage {\r\n role: string;\r\n content?: string | LlamaCppMessagePart[];\r\n}\r\n\r\nexport interface ToolCall {\r\n type: 'function';\r\n id?: string;\r\n function: {\r\n name: string;\r\n arguments: string; // JSON string\r\n };\r\n}\r\n\r\nexport interface TokenData {\r\n token: string;\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n // Parsed content from accumulated text\r\n content?: string;\r\n reasoning_content?: string;\r\n tool_calls?: Array<ToolCall>;\r\n accumulated_text?: string;\r\n}\r\n\r\nexport interface ContextParams extends Omit<\r\n NativeContextParams,\r\n 'cache_type_k' | 'cache_type_v' | 'pooling_type'\r\n> {\r\n cache_type_k?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n cache_type_v?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';\r\n}\r\n\r\nexport interface EmbeddingParams extends NativeEmbeddingParams {}\r\n\r\nexport interface RerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface RerankResult {\r\n score: number;\r\n index: number;\r\n document?: string;\r\n}\r\n\r\nexport interface CompletionResponseFormat {\r\n type: 'text' | 'json_object' | 'json_schema';\r\n json_schema?: {\r\n strict?: boolean;\r\n schema: object;\r\n };\r\n schema?: object; // for json_object type\r\n}\r\n\r\nexport interface CompletionBaseParams {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface CompletionParams extends Omit<\r\n NativeCompletionParams,\r\n 'emit_partial_completion' | 'prompt'\r\n> {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n /**\r\n * GBNF grammar for structured output. Takes precedence over json_schema.\r\n */\r\n grammar?: string;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface BenchResult {\r\n modelDesc: string;\r\n modelSize: number;\r\n modelNParams: number;\r\n ppAvg: number;\r\n ppStd: number;\r\n tgAvg: number;\r\n tgStd: number;\r\n}\r\n\r\n// Main plugin interface\r\nexport interface LlamaCppPlugin {\r\n // Core initialization and management\r\n toggleNativeLog(options: { enabled: boolean }): Promise<void>;\r\n setContextLimit(options: { limit: number }): Promise<void>;\r\n modelInfo(options: { path: string; skip?: string[] }): Promise<Object>;\r\n initContext(options: { contextId: number; params: NativeContextParams }): Promise<NativeLlamaContext>;\r\n releaseContext(options: { contextId: number }): Promise<void>;\r\n releaseAllContexts(): Promise<void>;\r\n\r\n // Chat and completion\r\n getFormattedChat(options: {\r\n contextId: number;\r\n messages: string;\r\n chatTemplate?: string;\r\n params?: {\r\n jinja?: boolean;\r\n json_schema?: string;\r\n tools?: string;\r\n parallel_tool_calls?: string;\r\n tool_choice?: string;\r\n enable_thinking?: boolean;\r\n add_generation_prompt?: boolean;\r\n now?: string;\r\n chat_template_kwargs?: string;\r\n };\r\n }): Promise<JinjaFormattedChatResult | string>;\r\n\r\n completion(options: {\r\n contextId: number;\r\n params: NativeCompletionParams;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n stopCompletion(options: { contextId: number }): Promise<void>;\r\n\r\n // Session management\r\n loadSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n }): Promise<NativeSessionLoadResult>;\r\n\r\n saveSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n size: number;\r\n }): Promise<number>;\r\n\r\n // Tokenization\r\n tokenize(options: {\r\n contextId: number;\r\n text: string;\r\n imagePaths?: Array<string>;\r\n }): Promise<NativeTokenizeResult>;\r\n\r\n detokenize(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<string>;\r\n\r\n // Embeddings and reranking\r\n embedding(options: {\r\n contextId: number;\r\n text: string;\r\n params: NativeEmbeddingParams;\r\n }): Promise<NativeEmbeddingResult>;\r\n\r\n rerank(options: {\r\n contextId: number;\r\n query: string;\r\n documents: Array<string>;\r\n params?: NativeRerankParams;\r\n }): Promise<Array<NativeRerankResult>>;\r\n\r\n // Benchmarking\r\n bench(options: {\r\n contextId: number;\r\n pp: number;\r\n tg: number;\r\n pl: number;\r\n nr: number;\r\n }): Promise<string>;\r\n\r\n // LoRA adapters\r\n applyLoraAdapters(options: {\r\n contextId: number;\r\n loraAdapters: Array<{ path: string; scaled?: number }>;\r\n }): Promise<void>;\r\n\r\n removeLoraAdapters(options: { contextId: number }): Promise<void>;\r\n\r\n getLoadedLoraAdapters(options: {\r\n contextId: number;\r\n }): Promise<Array<{ path: string; scaled?: number }>>;\r\n\r\n // Multimodal methods\r\n initMultimodal(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n use_gpu: boolean;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isMultimodalEnabled(options: {\r\n contextId: number;\r\n }): Promise<boolean>;\r\n\r\n getMultimodalSupport(options: {\r\n contextId: number;\r\n }): Promise<{\r\n vision: boolean;\r\n audio: boolean;\r\n }>;\r\n\r\n releaseMultimodal(options: {\r\n contextId: number;\r\n }): Promise<void>;\r\n\r\n // TTS methods\r\n initVocoder(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n n_batch?: number;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isVocoderEnabled(options: { contextId: number }): Promise<boolean>;\r\n\r\n getFormattedAudioCompletion(options: {\r\n contextId: number;\r\n speakerJsonStr: string;\r\n textToSpeak: string;\r\n }): Promise<{\r\n prompt: string;\r\n grammar?: string;\r\n }>;\r\n\r\n getAudioCompletionGuideTokens(options: {\r\n contextId: number;\r\n textToSpeak: string;\r\n }): Promise<Array<number>>;\r\n\r\n decodeAudioTokens(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<Array<number>>;\r\n\r\n releaseVocoder(options: { contextId: number }): Promise<void>;\r\n\r\n // Model download and management\r\n downloadModel(options: {\r\n url: string;\r\n filename: string;\r\n }): Promise<string>;\r\n\r\n getDownloadProgress(options: {\r\n url: string;\r\n }): Promise<{\r\n progress: number;\r\n completed: boolean;\r\n failed: boolean;\r\n errorMessage?: string;\r\n localPath?: string;\r\n downloadedBytes: number;\r\n totalBytes: number;\r\n }>;\r\n\r\n cancelDownload(options: {\r\n url: string;\r\n }): Promise<boolean>;\r\n\r\n getAvailableModels(): Promise<Array<{\r\n name: string;\r\n path: string;\r\n size: number;\r\n }>>;\r\n\r\n // Grammar utilities\r\n convertJsonSchemaToGrammar(options: {\r\n schema: string;\r\n }): Promise<string>;\r\n\r\n // Events\r\n addListener(eventName: string, listenerFunc: (data: any) => void): Promise<void>;\r\n removeAllListeners(eventName: string): Promise<void>;\r\n}\r\n"]}
1
+ {"version":3,"file":"definitions.js","sourceRoot":"","sources":["../../src/definitions.ts"],"names":[],"mappings":"","sourcesContent":["// Native parameter types that match llama.rn exactly\r\nexport interface NativeEmbeddingParams {\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeContextParams {\r\n model: string;\r\n /**\r\n * Chat template to override the default one from the model.\r\n */\r\n chat_template?: string;\r\n\r\n is_model_asset?: boolean;\r\n use_progress_callback?: boolean;\r\n\r\n n_ctx?: number;\r\n n_batch?: number;\r\n n_ubatch?: number;\r\n\r\n n_threads?: number;\r\n\r\n /**\r\n * Path to draft model for speculative decoding (mobile optimization)\r\n */\r\n draft_model?: string;\r\n /**\r\n * Number of tokens to predict speculatively (default: 3 for mobile)\r\n */\r\n speculative_samples?: number;\r\n /**\r\n * Enable mobile-optimized speculative decoding\r\n */\r\n mobile_speculative?: boolean;\r\n\r\n /**\r\n * Number of layers to store in VRAM (Currently only for iOS)\r\n */\r\n n_gpu_layers?: number;\r\n /**\r\n * Skip GPU devices (iOS only)\r\n */\r\n no_gpu_devices?: boolean;\r\n\r\n /**\r\n * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)\r\n */\r\n flash_attn?: boolean;\r\n\r\n /**\r\n * KV cache data type for the K (Experimental in llama.cpp)\r\n */\r\n cache_type_k?: string;\r\n /**\r\n * KV cache data type for the V (Experimental in llama.cpp)\r\n */\r\n cache_type_v?: string;\r\n\r\n use_mlock?: boolean;\r\n use_mmap?: boolean;\r\n vocab_only?: boolean;\r\n\r\n /**\r\n * Single LoRA adapter path\r\n */\r\n lora?: string;\r\n /**\r\n * Single LoRA adapter scale\r\n */\r\n lora_scaled?: number;\r\n /**\r\n * LoRA adapter list\r\n */\r\n lora_list?: Array<{ path: string; scaled?: number }>;\r\n\r\n rope_freq_base?: number;\r\n rope_freq_scale?: number;\r\n\r\n pooling_type?: number;\r\n\r\n /**\r\n * Enable context shifting to handle prompts larger than context size\r\n */\r\n ctx_shift?: boolean;\r\n\r\n /**\r\n * Use a unified buffer across the input sequences when computing the attention.\r\n * Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.\r\n */\r\n kv_unified?: boolean;\r\n\r\n /**\r\n * Use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)\r\n */\r\n swa_full?: boolean;\r\n\r\n /**\r\n * Number of layers to keep MoE weights on CPU\r\n */\r\n n_cpu_moe?: number;\r\n\r\n // Embedding params\r\n embedding?: boolean;\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeCompletionParams {\r\n prompt: string;\r\n n_threads?: number;\r\n /**\r\n * Enable Jinja. Default: true if supported by the model\r\n */\r\n jinja?: boolean;\r\n /**\r\n * JSON schema for convert to grammar for structured JSON output.\r\n * It will be override by grammar if both are set.\r\n */\r\n json_schema?: string;\r\n /**\r\n * Set grammar for grammar-based sampling (GBNF format). Default: no grammar\r\n * This will override json_schema if both are provided.\r\n */\r\n grammar?: string;\r\n /**\r\n * Lazy grammar sampling, trigger by grammar_triggers. Default: false\r\n */\r\n grammar_lazy?: boolean;\r\n /**\r\n * Enable thinking if jinja is enabled. Default: true\r\n */\r\n enable_thinking?: boolean;\r\n /**\r\n * Force thinking to be open. Default: false\r\n */\r\n thinking_forced_open?: boolean;\r\n /**\r\n * Lazy grammar triggers. Default: []\r\n */\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n preserved_tokens?: Array<string>;\r\n chat_format?: number;\r\n reasoning_format?: string;\r\n /**\r\n * Path to an image file to process before generating text.\r\n * When provided, the image will be processed and added to the context.\r\n * Requires multimodal support to be enabled via initMultimodal.\r\n */\r\n media_paths?: Array<string>;\r\n /**\r\n * Specify a JSON array of stopping strings.\r\n * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`\r\n */\r\n stop?: Array<string>;\r\n /**\r\n * Set the maximum number of tokens to predict when generating text.\r\n * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.\r\n * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.\r\n */\r\n n_predict?: number;\r\n /**\r\n * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.\r\n * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.\r\n * Default: `0`\r\n */\r\n n_probs?: number;\r\n /**\r\n * Limit the next token selection to the K most probable tokens. Default: `40`\r\n */\r\n top_k?: number;\r\n /**\r\n * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`\r\n */\r\n top_p?: number;\r\n /**\r\n * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`\r\n */\r\n min_p?: number;\r\n /**\r\n * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.\r\n */\r\n xtc_probability?: number;\r\n /**\r\n * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)\r\n */\r\n xtc_threshold?: number;\r\n /**\r\n * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.\r\n */\r\n typical_p?: number;\r\n /**\r\n * Adjust the randomness of the generated text. Default: `0.8`\r\n */\r\n temperature?: number;\r\n /**\r\n * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.\r\n */\r\n penalty_last_n?: number;\r\n /**\r\n * Control the repetition of token sequences in the generated text. Default: `1.0`\r\n */\r\n penalty_repeat?: number;\r\n /**\r\n * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_freq?: number;\r\n /**\r\n * Repeat alpha presence penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_present?: number;\r\n /**\r\n * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.\r\n */\r\n mirostat?: number;\r\n /**\r\n * Set the Mirostat target entropy, parameter tau. Default: `5.0`\r\n */\r\n mirostat_tau?: number;\r\n /**\r\n * Set the Mirostat learning rate, parameter eta. Default: `0.1`\r\n */\r\n mirostat_eta?: number;\r\n /**\r\n * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.\r\n */\r\n dry_multiplier?: number;\r\n /**\r\n * Set the DRY repetition penalty base value. Default: `1.75`\r\n */\r\n dry_base?: number;\r\n /**\r\n * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`\r\n */\r\n dry_allowed_length?: number;\r\n /**\r\n * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.\r\n */\r\n dry_penalty_last_n?: number;\r\n /**\r\n * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\\n', ':', '\"', '*']`\r\n */\r\n dry_sequence_breakers?: Array<string>;\r\n /**\r\n * Top n sigma sampling as described in academic paper \"Top-nσ: Not All Logits Are You Need\" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)\r\n */\r\n top_n_sigma?: number;\r\n\r\n /**\r\n * Ignore end of stream token and continue generating. Default: `false`\r\n */\r\n ignore_eos?: boolean;\r\n /**\r\n * Modify the likelihood of a token appearing in the generated text completion.\r\n * For example, use `\"logit_bias\": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `\"logit_bias\": [[15043,-1.0]]` to decrease its likelihood.\r\n * Setting the value to false, `\"logit_bias\": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,\r\n * e.g.`[[\"Hello, World!\",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.\r\n * Default: `[]`\r\n */\r\n logit_bias?: Array<Array<number>>;\r\n /**\r\n * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.\r\n */\r\n seed?: number;\r\n\r\n /**\r\n * Guide tokens for the completion.\r\n * Help prevent hallucinations by forcing the TTS to use the correct words.\r\n * Default: `[]`\r\n */\r\n guide_tokens?: Array<number>;\r\n\r\n emit_partial_completion: boolean;\r\n}\r\n\r\nexport interface NativeCompletionTokenProbItem {\r\n tok_str: string;\r\n prob: number;\r\n}\r\n\r\nexport interface NativeCompletionTokenProb {\r\n content: string;\r\n probs: Array<NativeCompletionTokenProbItem>;\r\n}\r\n\r\nexport interface NativeCompletionResultTimings {\r\n prompt_n: number;\r\n prompt_ms: number;\r\n prompt_per_token_ms: number;\r\n prompt_per_second: number;\r\n predicted_n: number;\r\n predicted_ms: number;\r\n predicted_per_token_ms: number;\r\n predicted_per_second: number;\r\n}\r\n\r\nexport interface NativeCompletionResult {\r\n /**\r\n * Original text (Ignored reasoning_content / tool_calls)\r\n */\r\n text: string;\r\n /**\r\n * Reasoning content (parsed for reasoning model)\r\n */\r\n reasoning_content: string;\r\n /**\r\n * Tool calls (parsed from response)\r\n */\r\n tool_calls: Array<{\r\n type: 'function';\r\n function: {\r\n name: string;\r\n arguments: string; // JSON string of arguments\r\n };\r\n id?: string;\r\n }>;\r\n /**\r\n * Content text (Filtered text by reasoning_content / tool_calls)\r\n */\r\n content: string;\r\n\r\n chat_format: number;\r\n\r\n tokens_predicted: number;\r\n tokens_evaluated: number;\r\n truncated: boolean;\r\n stopped_eos: boolean;\r\n stopped_word: string;\r\n stopped_limit: number;\r\n stopping_word: string;\r\n context_full: boolean;\r\n interrupted: boolean;\r\n tokens_cached: number;\r\n timings: NativeCompletionResultTimings;\r\n\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n audio_tokens?: Array<number>;\r\n}\r\n\r\nexport interface NativeTokenizeResult {\r\n tokens: Array<number>;\r\n /**\r\n * Whether the tokenization contains images\r\n */\r\n has_images: boolean;\r\n /**\r\n * Bitmap hashes of the images\r\n */\r\n bitmap_hashes: Array<number>;\r\n /**\r\n * Chunk positions of the text and images\r\n */\r\n chunk_pos: Array<number>;\r\n /**\r\n * Chunk positions of the images\r\n */\r\n chunk_pos_images: Array<number>;\r\n}\r\n\r\nexport interface NativeEmbeddingResult {\r\n embedding: Array<number>;\r\n}\r\n\r\nexport interface NativeLlamaContext {\r\n contextId: number;\r\n model: {\r\n desc: string;\r\n size: number;\r\n nEmbd: number;\r\n nParams: number;\r\n chatTemplates: {\r\n llamaChat: boolean; // Chat template in llama-chat.cpp\r\n minja: {\r\n // Chat template supported by minja.hpp\r\n default: boolean;\r\n defaultCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n toolUse: boolean;\r\n toolUseCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n };\r\n };\r\n metadata: Object;\r\n isChatTemplateSupported: boolean; // Deprecated\r\n };\r\n /**\r\n * Loaded library name for Android\r\n */\r\n androidLib?: string;\r\n gpu: boolean;\r\n reasonNoGPU: string;\r\n}\r\n\r\nexport interface NativeSessionLoadResult {\r\n tokens_loaded: number;\r\n prompt: string;\r\n}\r\n\r\nexport interface NativeLlamaMessagePart {\r\n type: 'text';\r\n text: string;\r\n}\r\n\r\nexport interface NativeLlamaChatMessage {\r\n role: string;\r\n content: string | Array<NativeLlamaMessagePart>;\r\n}\r\n\r\nexport interface FormattedChatResult {\r\n type: 'jinja' | 'llama-chat';\r\n prompt: string;\r\n has_media: boolean;\r\n media_paths?: Array<string>;\r\n}\r\n\r\nexport interface JinjaFormattedChatResult extends FormattedChatResult {\r\n chat_format?: number;\r\n grammar?: string;\r\n grammar_lazy?: boolean;\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n thinking_forced_open?: boolean;\r\n preserved_tokens?: Array<string>;\r\n additional_stops?: Array<string>;\r\n}\r\n\r\nexport interface NativeImageProcessingResult {\r\n success: boolean;\r\n prompt: string;\r\n error?: string;\r\n}\r\n\r\nexport interface NativeRerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface NativeRerankResult {\r\n score: number;\r\n index: number;\r\n}\r\n\r\n// High-level types for the plugin interface\r\nexport interface LlamaCppMessagePart {\r\n type: string;\r\n text?: string;\r\n image_url?: {\r\n url?: string;\r\n };\r\n input_audio?: {\r\n format: string;\r\n data?: string;\r\n url?: string;\r\n };\r\n}\r\n\r\nexport interface LlamaCppOAICompatibleMessage {\r\n role: string;\r\n content?: string | LlamaCppMessagePart[];\r\n}\r\n\r\nexport interface ToolCall {\r\n type: 'function';\r\n id?: string;\r\n function: {\r\n name: string;\r\n arguments: string; // JSON string\r\n };\r\n}\r\n\r\nexport interface TokenData {\r\n token: string;\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n // Parsed content from accumulated text\r\n content?: string;\r\n reasoning_content?: string;\r\n tool_calls?: Array<ToolCall>;\r\n accumulated_text?: string;\r\n}\r\n\r\nexport interface ContextParams extends Omit<\r\n NativeContextParams,\r\n 'cache_type_k' | 'cache_type_v' | 'pooling_type'\r\n> {\r\n cache_type_k?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n cache_type_v?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';\r\n}\r\n\r\nexport interface EmbeddingParams extends NativeEmbeddingParams {}\r\n\r\nexport interface RerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface RerankResult {\r\n score: number;\r\n index: number;\r\n document?: string;\r\n}\r\n\r\nexport interface CompletionResponseFormat {\r\n type: 'text' | 'json_object' | 'json_schema';\r\n json_schema?: {\r\n strict?: boolean;\r\n schema: object;\r\n };\r\n schema?: object; // for json_object type\r\n}\r\n\r\nexport interface CompletionBaseParams {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface CompletionParams extends Omit<\r\n NativeCompletionParams,\r\n 'emit_partial_completion' | 'prompt'\r\n> {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n /**\r\n * GBNF grammar for structured output. Takes precedence over json_schema.\r\n */\r\n grammar?: string;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface BenchResult {\r\n modelDesc: string;\r\n modelSize: number;\r\n modelNParams: number;\r\n ppAvg: number;\r\n ppStd: number;\r\n tgAvg: number;\r\n tgStd: number;\r\n}\r\n\r\n// Main plugin interface\r\nexport interface LlamaCppPlugin {\r\n // Core initialization and management\r\n toggleNativeLog(options: { enabled: boolean }): Promise<void>;\r\n setContextLimit(options: { limit: number }): Promise<void>;\r\n modelInfo(options: { path: string; skip?: string[] }): Promise<Object>;\r\n initContext(options: { contextId: number; params: NativeContextParams }): Promise<NativeLlamaContext>;\r\n releaseContext(options: { contextId: number }): Promise<void>;\r\n releaseAllContexts(): Promise<void>;\r\n\r\n // Chat and completion\r\n getFormattedChat(options: {\r\n contextId: number;\r\n messages: string;\r\n chatTemplate?: string;\r\n params?: {\r\n jinja?: boolean;\r\n json_schema?: string;\r\n tools?: string;\r\n parallel_tool_calls?: string;\r\n tool_choice?: string;\r\n enable_thinking?: boolean;\r\n add_generation_prompt?: boolean;\r\n now?: string;\r\n chat_template_kwargs?: string;\r\n };\r\n }): Promise<JinjaFormattedChatResult | string>;\r\n\r\n completion(options: {\r\n contextId: number;\r\n params: NativeCompletionParams;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n // Chat-first methods (like llama-cli -sys)\r\n chat(options: {\r\n contextId: number;\r\n messages: LlamaCppOAICompatibleMessage[];\r\n system?: string; // Simple system prompt like llama-cli -sys\r\n chatTemplate?: string;\r\n params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n // Simple chat with system prompt (like llama-cli -sys \"You are a helpful assistant\")\r\n chatWithSystem(options: {\r\n contextId: number;\r\n system: string;\r\n message: string;\r\n params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n // Simple text generation (like llama-cli -p \"prompt\")\r\n generateText(options: {\r\n contextId: number;\r\n prompt: string;\r\n params?: Omit<NativeCompletionParams, 'prompt' | 'messages'>;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n stopCompletion(options: { contextId: number }): Promise<void>;\r\n\r\n // Session management\r\n loadSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n }): Promise<NativeSessionLoadResult>;\r\n\r\n saveSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n size: number;\r\n }): Promise<number>;\r\n\r\n // Tokenization\r\n tokenize(options: {\r\n contextId: number;\r\n text: string;\r\n imagePaths?: Array<string>;\r\n }): Promise<NativeTokenizeResult>;\r\n\r\n detokenize(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<string>;\r\n\r\n // Embeddings and reranking\r\n embedding(options: {\r\n contextId: number;\r\n text: string;\r\n params: NativeEmbeddingParams;\r\n }): Promise<NativeEmbeddingResult>;\r\n\r\n rerank(options: {\r\n contextId: number;\r\n query: string;\r\n documents: Array<string>;\r\n params?: NativeRerankParams;\r\n }): Promise<Array<NativeRerankResult>>;\r\n\r\n // Benchmarking\r\n bench(options: {\r\n contextId: number;\r\n pp: number;\r\n tg: number;\r\n pl: number;\r\n nr: number;\r\n }): Promise<string>;\r\n\r\n // LoRA adapters\r\n applyLoraAdapters(options: {\r\n contextId: number;\r\n loraAdapters: Array<{ path: string; scaled?: number }>;\r\n }): Promise<void>;\r\n\r\n removeLoraAdapters(options: { contextId: number }): Promise<void>;\r\n\r\n getLoadedLoraAdapters(options: {\r\n contextId: number;\r\n }): Promise<Array<{ path: string; scaled?: number }>>;\r\n\r\n // Multimodal methods\r\n initMultimodal(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n use_gpu: boolean;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isMultimodalEnabled(options: {\r\n contextId: number;\r\n }): Promise<boolean>;\r\n\r\n getMultimodalSupport(options: {\r\n contextId: number;\r\n }): Promise<{\r\n vision: boolean;\r\n audio: boolean;\r\n }>;\r\n\r\n releaseMultimodal(options: {\r\n contextId: number;\r\n }): Promise<void>;\r\n\r\n // TTS methods\r\n initVocoder(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n n_batch?: number;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isVocoderEnabled(options: { contextId: number }): Promise<boolean>;\r\n\r\n getFormattedAudioCompletion(options: {\r\n contextId: number;\r\n speakerJsonStr: string;\r\n textToSpeak: string;\r\n }): Promise<{\r\n prompt: string;\r\n grammar?: string;\r\n }>;\r\n\r\n getAudioCompletionGuideTokens(options: {\r\n contextId: number;\r\n textToSpeak: string;\r\n }): Promise<Array<number>>;\r\n\r\n decodeAudioTokens(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<Array<number>>;\r\n\r\n releaseVocoder(options: { contextId: number }): Promise<void>;\r\n\r\n // Model download and management\r\n downloadModel(options: {\r\n url: string;\r\n filename: string;\r\n }): Promise<string>;\r\n\r\n getDownloadProgress(options: {\r\n url: string;\r\n }): Promise<{\r\n progress: number;\r\n completed: boolean;\r\n failed: boolean;\r\n errorMessage?: string;\r\n localPath?: string;\r\n downloadedBytes: number;\r\n totalBytes: number;\r\n }>;\r\n\r\n cancelDownload(options: {\r\n url: string;\r\n }): Promise<boolean>;\r\n\r\n getAvailableModels(): Promise<Array<{\r\n name: string;\r\n path: string;\r\n size: number;\r\n }>>;\r\n\r\n // Grammar utilities\r\n convertJsonSchemaToGrammar(options: {\r\n schema: string;\r\n }): Promise<string>;\r\n\r\n // Events\r\n addListener(eventName: string, listenerFunc: (data: any) => void): Promise<void>;\r\n removeAllListeners(eventName: string): Promise<void>;\r\n}\r\n"]}
@@ -354,6 +354,84 @@ struct MinjaCaps {
354
354
  completion(.success(()))
355
355
  }
356
356
 
357
+ // MARK: - Chat-first methods (like llama-cli -sys)
358
+
359
+ func chat(contextId: Int, messages: [JSObject], system: String?, chatTemplate: String?, params: [String: Any]?, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
360
+ guard contexts[contextId] != nil else {
361
+ completion(.failure(.contextNotFound))
362
+ return
363
+ }
364
+
365
+ do {
366
+ // Convert JSObject messages to JSON string
367
+ let messagesData = try JSONSerialization.data(withJSONObject: messages)
368
+ let messagesJson = String(data: messagesData, encoding: .utf8) ?? "[]"
369
+
370
+ // Add system message if provided
371
+ var allMessages = messages
372
+ if let system = system, !system.isEmpty {
373
+ let systemMessage: [String: Any] = [
374
+ "role": "system",
375
+ "content": system
376
+ ]
377
+ allMessages.insert(JSObject(systemMessage), at: 0)
378
+ }
379
+
380
+ // Convert to JSON string for getFormattedChat
381
+ let allMessagesData = try JSONSerialization.data(withJSONObject: allMessages.map { $0.dictionary })
382
+ let allMessagesJson = String(data: allMessagesData, encoding: .utf8) ?? "[]"
383
+
384
+ // First, format the chat
385
+ getFormattedChat(contextId: contextId, messages: allMessagesJson, chatTemplate: chatTemplate, params: nil) { [weak self] result in
386
+ switch result {
387
+ case .success(let formattedResult):
388
+ // Extract the formatted prompt
389
+ let formattedPrompt = formattedResult["prompt"] as? String ?? ""
390
+
391
+ // Create completion parameters
392
+ var completionParams = params ?? [:]
393
+ completionParams["prompt"] = formattedPrompt
394
+
395
+ // Call completion with formatted prompt
396
+ self?.completion(contextId: contextId, params: completionParams, completion: completion)
397
+
398
+ case .failure(let error):
399
+ completion(.failure(error))
400
+ }
401
+ }
402
+
403
+ } catch {
404
+ completion(.failure(.contextNotFound)) // Use a more appropriate error
405
+ }
406
+ }
407
+
408
+ func chatWithSystem(contextId: Int, system: String, message: String, params: [String: Any]?, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
409
+ // Create a simple message array
410
+ let userMessage: [String: Any] = [
411
+ "role": "user",
412
+ "content": message
413
+ ]
414
+
415
+ let messages = [JSObject(userMessage)]
416
+
417
+ // Call the main chat method
418
+ chat(contextId: contextId, messages: messages, system: system, chatTemplate: nil, params: params, completion: completion)
419
+ }
420
+
421
+ func generateText(contextId: Int, prompt: String, params: [String: Any]?, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
422
+ guard contexts[contextId] != nil else {
423
+ completion(.failure(.contextNotFound))
424
+ return
425
+ }
426
+
427
+ // Create completion parameters
428
+ var completionParams = params ?? [:]
429
+ completionParams["prompt"] = prompt
430
+
431
+ // Call completion directly
432
+ completion(contextId: contextId, params: completionParams, completion: completion)
433
+ }
434
+
357
435
  // MARK: - Session management
358
436
 
359
437
  func loadSession(contextId: Int, filepath: String, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
@@ -23,6 +23,11 @@ public class LlamaCppPlugin: CAPPlugin, CAPBridgedPlugin {
23
23
  CAPPluginMethod(name: "completion", returnType: CAPPluginReturnPromise),
24
24
  CAPPluginMethod(name: "stopCompletion", returnType: CAPPluginReturnPromise),
25
25
 
26
+ // Chat-first methods (like llama-cli -sys)
27
+ CAPPluginMethod(name: "chat", returnType: CAPPluginReturnPromise),
28
+ CAPPluginMethod(name: "chatWithSystem", returnType: CAPPluginReturnPromise),
29
+ CAPPluginMethod(name: "generateText", returnType: CAPPluginReturnPromise),
30
+
26
31
  // Session management
27
32
  CAPPluginMethod(name: "loadSession", returnType: CAPPluginReturnPromise),
28
33
  CAPPluginMethod(name: "saveSession", returnType: CAPPluginReturnPromise),
@@ -201,6 +206,56 @@ public class LlamaCppPlugin: CAPPlugin, CAPBridgedPlugin {
201
206
  }
202
207
  }
203
208
 
209
+ // MARK: - Chat-first methods (like llama-cli -sys)
210
+
211
+ @objc func chat(_ call: CAPPluginCall) {
212
+ let contextId = call.getInt("contextId") ?? 0
213
+ let messages = call.getArray("messages", JSObject.self) ?? []
214
+ let system = call.getString("system")
215
+ let chatTemplate = call.getString("chatTemplate")
216
+ let params = call.getObject("params")
217
+
218
+ implementation.chat(contextId: contextId, messages: messages, system: system, chatTemplate: chatTemplate, params: params) { result in
219
+ switch result {
220
+ case .success(let completionResult):
221
+ call.resolve(completionResult)
222
+ case .failure(let error):
223
+ call.reject(error.localizedDescription)
224
+ }
225
+ }
226
+ }
227
+
228
+ @objc func chatWithSystem(_ call: CAPPluginCall) {
229
+ let contextId = call.getInt("contextId") ?? 0
230
+ let system = call.getString("system") ?? ""
231
+ let message = call.getString("message") ?? ""
232
+ let params = call.getObject("params")
233
+
234
+ implementation.chatWithSystem(contextId: contextId, system: system, message: message, params: params) { result in
235
+ switch result {
236
+ case .success(let completionResult):
237
+ call.resolve(completionResult)
238
+ case .failure(let error):
239
+ call.reject(error.localizedDescription)
240
+ }
241
+ }
242
+ }
243
+
244
+ @objc func generateText(_ call: CAPPluginCall) {
245
+ let contextId = call.getInt("contextId") ?? 0
246
+ let prompt = call.getString("prompt") ?? ""
247
+ let params = call.getObject("params")
248
+
249
+ implementation.generateText(contextId: contextId, prompt: prompt, params: params) { result in
250
+ switch result {
251
+ case .success(let completionResult):
252
+ call.resolve(completionResult)
253
+ case .failure(let error):
254
+ call.reject(error.localizedDescription)
255
+ }
256
+ }
257
+ }
258
+
204
259
  // MARK: - Session management
205
260
 
206
261
  @objc func loadSession(_ call: CAPPluginCall) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "llama-cpp-capacitor",
3
- "version": "0.0.21",
4
- "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with comprehensive support for text generation, multimodal processing, TTS, LoRA adapters, and more.",
3
+ "version": "0.0.22",
4
+ "description": "A native Capacitor plugin that embeds llama.cpp directly into mobile apps, enabling offline AI inference with chat-first API design. Supports both simple text generation and advanced chat conversations with system prompts, multimodal processing, TTS, LoRA adapters, and more.",
5
5
  "main": "dist/plugin.cjs.js",
6
6
  "type": "module",
7
7
  "module": "dist/esm/index.js",