llama-cpp-capacitor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"definitions.js","sourceRoot":"","sources":["../../src/definitions.ts"],"names":[],"mappings":"","sourcesContent":["// Native parameter types that match llama.rn exactly\r\nexport interface NativeEmbeddingParams {\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeContextParams {\r\n model: string;\r\n /**\r\n * Chat template to override the default one from the model.\r\n */\r\n chat_template?: string;\r\n\r\n is_model_asset?: boolean;\r\n use_progress_callback?: boolean;\r\n\r\n n_ctx?: number;\r\n n_batch?: number;\r\n n_ubatch?: number;\r\n\r\n n_threads?: number;\r\n\r\n /**\r\n * Number of layers to store in VRAM (Currently only for iOS)\r\n */\r\n n_gpu_layers?: number;\r\n /**\r\n * Skip GPU devices (iOS only)\r\n */\r\n no_gpu_devices?: boolean;\r\n\r\n /**\r\n * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)\r\n */\r\n flash_attn?: boolean;\r\n\r\n /**\r\n * KV cache data type for the K (Experimental in llama.cpp)\r\n */\r\n cache_type_k?: string;\r\n /**\r\n * KV cache data type for the V (Experimental in llama.cpp)\r\n */\r\n cache_type_v?: string;\r\n\r\n use_mlock?: boolean;\r\n use_mmap?: boolean;\r\n vocab_only?: boolean;\r\n\r\n /**\r\n * Single LoRA adapter path\r\n */\r\n lora?: string;\r\n /**\r\n * Single LoRA adapter scale\r\n */\r\n lora_scaled?: number;\r\n /**\r\n * LoRA adapter list\r\n */\r\n lora_list?: Array<{ path: string; scaled?: number }>;\r\n\r\n rope_freq_base?: number;\r\n rope_freq_scale?: number;\r\n\r\n pooling_type?: number;\r\n\r\n /**\r\n * Enable context shifting to handle prompts larger than context size\r\n */\r\n ctx_shift?: boolean;\r\n\r\n /**\r\n * Use a unified buffer across the input sequences when computing the attention.\r\n * Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.\r\n */\r\n kv_unified?: boolean;\r\n\r\n /**\r\n * Use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)\r\n */\r\n swa_full?: boolean;\r\n\r\n /**\r\n * Number of layers to keep MoE weights on CPU\r\n */\r\n n_cpu_moe?: number;\r\n\r\n // Embedding params\r\n embedding?: boolean;\r\n embd_normalize?: number;\r\n}\r\n\r\nexport interface NativeCompletionParams {\r\n prompt: string;\r\n n_threads?: number;\r\n /**\r\n * Enable Jinja. Default: true if supported by the model\r\n */\r\n jinja?: boolean;\r\n /**\r\n * JSON schema for convert to grammar for structured JSON output.\r\n * It will be override by grammar if both are set.\r\n */\r\n json_schema?: string;\r\n /**\r\n * Set grammar for grammar-based sampling. Default: no grammar\r\n */\r\n grammar?: string;\r\n /**\r\n * Lazy grammar sampling, trigger by grammar_triggers. Default: false\r\n */\r\n grammar_lazy?: boolean;\r\n /**\r\n * Enable thinking if jinja is enabled. Default: true\r\n */\r\n enable_thinking?: boolean;\r\n /**\r\n * Force thinking to be open. Default: false\r\n */\r\n thinking_forced_open?: boolean;\r\n /**\r\n * Lazy grammar triggers. Default: []\r\n */\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n preserved_tokens?: Array<string>;\r\n chat_format?: number;\r\n reasoning_format?: string;\r\n /**\r\n * Path to an image file to process before generating text.\r\n * When provided, the image will be processed and added to the context.\r\n * Requires multimodal support to be enabled via initMultimodal.\r\n */\r\n media_paths?: Array<string>;\r\n /**\r\n * Specify a JSON array of stopping strings.\r\n * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`\r\n */\r\n stop?: Array<string>;\r\n /**\r\n * Set the maximum number of tokens to predict when generating text.\r\n * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.\r\n * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.\r\n */\r\n n_predict?: number;\r\n /**\r\n * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.\r\n * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.\r\n * Default: `0`\r\n */\r\n n_probs?: number;\r\n /**\r\n * Limit the next token selection to the K most probable tokens. Default: `40`\r\n */\r\n top_k?: number;\r\n /**\r\n * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`\r\n */\r\n top_p?: number;\r\n /**\r\n * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`\r\n */\r\n min_p?: number;\r\n /**\r\n * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.\r\n */\r\n xtc_probability?: number;\r\n /**\r\n * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)\r\n */\r\n xtc_threshold?: number;\r\n /**\r\n * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.\r\n */\r\n typical_p?: number;\r\n /**\r\n * Adjust the randomness of the generated text. Default: `0.8`\r\n */\r\n temperature?: number;\r\n /**\r\n * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.\r\n */\r\n penalty_last_n?: number;\r\n /**\r\n * Control the repetition of token sequences in the generated text. Default: `1.0`\r\n */\r\n penalty_repeat?: number;\r\n /**\r\n * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_freq?: number;\r\n /**\r\n * Repeat alpha presence penalty. Default: `0.0`, which is disabled.\r\n */\r\n penalty_present?: number;\r\n /**\r\n * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.\r\n */\r\n mirostat?: number;\r\n /**\r\n * Set the Mirostat target entropy, parameter tau. Default: `5.0`\r\n */\r\n mirostat_tau?: number;\r\n /**\r\n * Set the Mirostat learning rate, parameter eta. Default: `0.1`\r\n */\r\n mirostat_eta?: number;\r\n /**\r\n * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.\r\n */\r\n dry_multiplier?: number;\r\n /**\r\n * Set the DRY repetition penalty base value. Default: `1.75`\r\n */\r\n dry_base?: number;\r\n /**\r\n * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`\r\n */\r\n dry_allowed_length?: number;\r\n /**\r\n * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.\r\n */\r\n dry_penalty_last_n?: number;\r\n /**\r\n * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\\n', ':', '\"', '*']`\r\n */\r\n dry_sequence_breakers?: Array<string>;\r\n /**\r\n * Top n sigma sampling as described in academic paper \"Top-nσ: Not All Logits Are You Need\" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)\r\n */\r\n top_n_sigma?: number;\r\n\r\n /**\r\n * Ignore end of stream token and continue generating. Default: `false`\r\n */\r\n ignore_eos?: boolean;\r\n /**\r\n * Modify the likelihood of a token appearing in the generated text completion.\r\n * For example, use `\"logit_bias\": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `\"logit_bias\": [[15043,-1.0]]` to decrease its likelihood.\r\n * Setting the value to false, `\"logit_bias\": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,\r\n * e.g.`[[\"Hello, World!\",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.\r\n * Default: `[]`\r\n */\r\n logit_bias?: Array<Array<number>>;\r\n /**\r\n * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.\r\n */\r\n seed?: number;\r\n\r\n /**\r\n * Guide tokens for the completion.\r\n * Help prevent hallucinations by forcing the TTS to use the correct words.\r\n * Default: `[]`\r\n */\r\n guide_tokens?: Array<number>;\r\n\r\n emit_partial_completion: boolean;\r\n}\r\n\r\nexport interface NativeCompletionTokenProbItem {\r\n tok_str: string;\r\n prob: number;\r\n}\r\n\r\nexport interface NativeCompletionTokenProb {\r\n content: string;\r\n probs: Array<NativeCompletionTokenProbItem>;\r\n}\r\n\r\nexport interface NativeCompletionResultTimings {\r\n prompt_n: number;\r\n prompt_ms: number;\r\n prompt_per_token_ms: number;\r\n prompt_per_second: number;\r\n predicted_n: number;\r\n predicted_ms: number;\r\n predicted_per_token_ms: number;\r\n predicted_per_second: number;\r\n}\r\n\r\nexport interface NativeCompletionResult {\r\n /**\r\n * Original text (Ignored reasoning_content / tool_calls)\r\n */\r\n text: string;\r\n /**\r\n * Reasoning content (parsed for reasoning model)\r\n */\r\n reasoning_content: string;\r\n /**\r\n * Tool calls\r\n */\r\n tool_calls: Array<{\r\n type: 'function';\r\n function: {\r\n name: string;\r\n arguments: string;\r\n };\r\n id?: string;\r\n }>;\r\n /**\r\n * Content text (Filtered text by reasoning_content / tool_calls)\r\n */\r\n content: string;\r\n\r\n chat_format: number;\r\n\r\n tokens_predicted: number;\r\n tokens_evaluated: number;\r\n truncated: boolean;\r\n stopped_eos: boolean;\r\n stopped_word: string;\r\n stopped_limit: number;\r\n stopping_word: string;\r\n context_full: boolean;\r\n interrupted: boolean;\r\n tokens_cached: number;\r\n timings: NativeCompletionResultTimings;\r\n\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n audio_tokens?: Array<number>;\r\n}\r\n\r\nexport interface NativeTokenizeResult {\r\n tokens: Array<number>;\r\n /**\r\n * Whether the tokenization contains images\r\n */\r\n has_images: boolean;\r\n /**\r\n * Bitmap hashes of the images\r\n */\r\n bitmap_hashes: Array<number>;\r\n /**\r\n * Chunk positions of the text and images\r\n */\r\n chunk_pos: Array<number>;\r\n /**\r\n * Chunk positions of the images\r\n */\r\n chunk_pos_images: Array<number>;\r\n}\r\n\r\nexport interface NativeEmbeddingResult {\r\n embedding: Array<number>;\r\n}\r\n\r\nexport interface NativeLlamaContext {\r\n contextId: number;\r\n model: {\r\n desc: string;\r\n size: number;\r\n nEmbd: number;\r\n nParams: number;\r\n chatTemplates: {\r\n llamaChat: boolean; // Chat template in llama-chat.cpp\r\n minja: {\r\n // Chat template supported by minja.hpp\r\n default: boolean;\r\n defaultCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n toolUse: boolean;\r\n toolUseCaps: {\r\n tools: boolean;\r\n toolCalls: boolean;\r\n toolResponses: boolean;\r\n systemRole: boolean;\r\n parallelToolCalls: boolean;\r\n toolCallId: boolean;\r\n };\r\n };\r\n };\r\n metadata: Object;\r\n isChatTemplateSupported: boolean; // Deprecated\r\n };\r\n /**\r\n * Loaded library name for Android\r\n */\r\n androidLib?: string;\r\n gpu: boolean;\r\n reasonNoGPU: string;\r\n}\r\n\r\nexport interface NativeSessionLoadResult {\r\n tokens_loaded: number;\r\n prompt: string;\r\n}\r\n\r\nexport interface NativeLlamaMessagePart {\r\n type: 'text';\r\n text: string;\r\n}\r\n\r\nexport interface NativeLlamaChatMessage {\r\n role: string;\r\n content: string | Array<NativeLlamaMessagePart>;\r\n}\r\n\r\nexport interface FormattedChatResult {\r\n type: 'jinja' | 'llama-chat';\r\n prompt: string;\r\n has_media: boolean;\r\n media_paths?: Array<string>;\r\n}\r\n\r\nexport interface JinjaFormattedChatResult extends FormattedChatResult {\r\n chat_format?: number;\r\n grammar?: string;\r\n grammar_lazy?: boolean;\r\n grammar_triggers?: Array<{\r\n type: number;\r\n value: string;\r\n token: number;\r\n }>;\r\n thinking_forced_open?: boolean;\r\n preserved_tokens?: Array<string>;\r\n additional_stops?: Array<string>;\r\n}\r\n\r\nexport interface NativeImageProcessingResult {\r\n success: boolean;\r\n prompt: string;\r\n error?: string;\r\n}\r\n\r\nexport interface NativeRerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface NativeRerankResult {\r\n score: number;\r\n index: number;\r\n}\r\n\r\n// High-level types for the plugin interface\r\nexport interface LlamaCppMessagePart {\r\n type: string;\r\n text?: string;\r\n image_url?: {\r\n url?: string;\r\n };\r\n input_audio?: {\r\n format: string;\r\n data?: string;\r\n url?: string;\r\n };\r\n}\r\n\r\nexport interface LlamaCppOAICompatibleMessage {\r\n role: string;\r\n content?: string | LlamaCppMessagePart[];\r\n}\r\n\r\nexport interface ToolCall {\r\n type: 'function';\r\n id?: string;\r\n function: {\r\n name: string;\r\n arguments: string; // JSON string\r\n };\r\n}\r\n\r\nexport interface TokenData {\r\n token: string;\r\n completion_probabilities?: Array<NativeCompletionTokenProb>;\r\n // Parsed content from accumulated text\r\n content?: string;\r\n reasoning_content?: string;\r\n tool_calls?: Array<ToolCall>;\r\n accumulated_text?: string;\r\n}\r\n\r\nexport interface ContextParams extends Omit<\r\n NativeContextParams,\r\n 'cache_type_k' | 'cache_type_v' | 'pooling_type'\r\n> {\r\n cache_type_k?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n cache_type_v?:\r\n | 'f16'\r\n | 'f32'\r\n | 'q8_0'\r\n | 'q4_0'\r\n | 'q4_1'\r\n | 'iq4_nl'\r\n | 'q5_0'\r\n | 'q5_1';\r\n pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';\r\n}\r\n\r\nexport interface EmbeddingParams extends NativeEmbeddingParams {}\r\n\r\nexport interface RerankParams {\r\n normalize?: number;\r\n}\r\n\r\nexport interface RerankResult {\r\n score: number;\r\n index: number;\r\n document?: string;\r\n}\r\n\r\nexport interface CompletionResponseFormat {\r\n type: 'text' | 'json_object' | 'json_schema';\r\n json_schema?: {\r\n strict?: boolean;\r\n schema: object;\r\n };\r\n schema?: object; // for json_object type\r\n}\r\n\r\nexport interface CompletionBaseParams {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface CompletionParams extends Omit<\r\n NativeCompletionParams,\r\n 'emit_partial_completion' | 'prompt'\r\n> {\r\n prompt?: string;\r\n messages?: LlamaCppOAICompatibleMessage[];\r\n chatTemplate?: string; // deprecated\r\n chat_template?: string;\r\n jinja?: boolean;\r\n tools?: object;\r\n parallel_tool_calls?: object;\r\n tool_choice?: string;\r\n response_format?: CompletionResponseFormat;\r\n media_paths?: string[];\r\n add_generation_prompt?: boolean;\r\n /*\r\n * Timestamp in seconds since epoch to apply to chat template's strftime_now\r\n */\r\n now?: string | number;\r\n chat_template_kwargs?: Record<string, string>;\r\n /**\r\n * Prefill text to be used for chat parsing (Generation Prompt + Content)\r\n * Used for if last assistant message is for prefill purpose\r\n */\r\n prefill_text?: string;\r\n}\r\n\r\nexport interface BenchResult {\r\n modelDesc: string;\r\n modelSize: number;\r\n modelNParams: number;\r\n ppAvg: number;\r\n ppStd: number;\r\n tgAvg: number;\r\n tgStd: number;\r\n}\r\n\r\n// Main plugin interface\r\nexport interface LlamaCppPlugin {\r\n // Core initialization and management\r\n toggleNativeLog(options: { enabled: boolean }): Promise<void>;\r\n setContextLimit(options: { limit: number }): Promise<void>;\r\n modelInfo(options: { path: string; skip?: string[] }): Promise<Object>;\r\n initContext(options: { contextId: number; params: NativeContextParams }): Promise<NativeLlamaContext>;\r\n releaseContext(options: { contextId: number }): Promise<void>;\r\n releaseAllContexts(): Promise<void>;\r\n\r\n // Chat and completion\r\n getFormattedChat(options: {\r\n contextId: number;\r\n messages: string;\r\n chatTemplate?: string;\r\n params?: {\r\n jinja?: boolean;\r\n json_schema?: string;\r\n tools?: string;\r\n parallel_tool_calls?: string;\r\n tool_choice?: string;\r\n enable_thinking?: boolean;\r\n add_generation_prompt?: boolean;\r\n now?: string;\r\n chat_template_kwargs?: string;\r\n };\r\n }): Promise<JinjaFormattedChatResult | string>;\r\n\r\n completion(options: {\r\n contextId: number;\r\n params: NativeCompletionParams;\r\n }): Promise<NativeCompletionResult>;\r\n\r\n stopCompletion(options: { contextId: number }): Promise<void>;\r\n\r\n // Session management\r\n loadSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n }): Promise<NativeSessionLoadResult>;\r\n\r\n saveSession(options: {\r\n contextId: number;\r\n filepath: string;\r\n size: number;\r\n }): Promise<number>;\r\n\r\n // Tokenization\r\n tokenize(options: {\r\n contextId: number;\r\n text: string;\r\n imagePaths?: Array<string>;\r\n }): Promise<NativeTokenizeResult>;\r\n\r\n detokenize(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<string>;\r\n\r\n // Embeddings and reranking\r\n embedding(options: {\r\n contextId: number;\r\n text: string;\r\n params: NativeEmbeddingParams;\r\n }): Promise<NativeEmbeddingResult>;\r\n\r\n rerank(options: {\r\n contextId: number;\r\n query: string;\r\n documents: Array<string>;\r\n params?: NativeRerankParams;\r\n }): Promise<Array<NativeRerankResult>>;\r\n\r\n // Benchmarking\r\n bench(options: {\r\n contextId: number;\r\n pp: number;\r\n tg: number;\r\n pl: number;\r\n nr: number;\r\n }): Promise<string>;\r\n\r\n // LoRA adapters\r\n applyLoraAdapters(options: {\r\n contextId: number;\r\n loraAdapters: Array<{ path: string; scaled?: number }>;\r\n }): Promise<void>;\r\n\r\n removeLoraAdapters(options: { contextId: number }): Promise<void>;\r\n\r\n getLoadedLoraAdapters(options: {\r\n contextId: number;\r\n }): Promise<Array<{ path: string; scaled?: number }>>;\r\n\r\n // Multimodal methods\r\n initMultimodal(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n use_gpu: boolean;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isMultimodalEnabled(options: {\r\n contextId: number;\r\n }): Promise<boolean>;\r\n\r\n getMultimodalSupport(options: {\r\n contextId: number;\r\n }): Promise<{\r\n vision: boolean;\r\n audio: boolean;\r\n }>;\r\n\r\n releaseMultimodal(options: {\r\n contextId: number;\r\n }): Promise<void>;\r\n\r\n // TTS methods\r\n initVocoder(options: {\r\n contextId: number;\r\n params: {\r\n path: string;\r\n n_batch?: number;\r\n };\r\n }): Promise<boolean>;\r\n\r\n isVocoderEnabled(options: { contextId: number }): Promise<boolean>;\r\n\r\n getFormattedAudioCompletion(options: {\r\n contextId: number;\r\n speakerJsonStr: string;\r\n textToSpeak: string;\r\n }): Promise<{\r\n prompt: string;\r\n grammar?: string;\r\n }>;\r\n\r\n getAudioCompletionGuideTokens(options: {\r\n contextId: number;\r\n textToSpeak: string;\r\n }): Promise<Array<number>>;\r\n\r\n decodeAudioTokens(options: {\r\n contextId: number;\r\n tokens: number[];\r\n }): Promise<Array<number>>;\r\n\r\n releaseVocoder(options: { contextId: number }): Promise<void>;\r\n\r\n // Events\r\n addListener(eventName: string, listenerFunc: (data: any) => void): Promise<void>;\r\n removeAllListeners(eventName: string): Promise<void>;\r\n}\r\n"]}
@@ -0,0 +1,180 @@
1
+ import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, JinjaFormattedChatResult, FormattedChatResult, NativeImageProcessingResult, LlamaCppMessagePart, LlamaCppOAICompatibleMessage, ContextParams, EmbeddingParams, RerankParams, RerankResult, CompletionResponseFormat, CompletionParams, BenchResult, LlamaCppPlugin } from './definitions';
2
+ export declare const LLAMACPP_MTMD_DEFAULT_MEDIA_MARKER = "<__media__>";
3
+ declare const LlamaCpp: LlamaCppPlugin;
4
+ export declare type RNLlamaMessagePart = LlamaCppMessagePart;
5
+ export declare type RNLlamaOAICompatibleMessage = LlamaCppOAICompatibleMessage;
6
+ export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, FormattedChatResult, JinjaFormattedChatResult, NativeImageProcessingResult, ContextParams, EmbeddingParams, RerankParams, RerankResult, CompletionResponseFormat, CompletionParams, BenchResult, };
7
+ export declare const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = "<__media__>";
8
+ export declare type ToolCall = {
9
+ type: 'function';
10
+ id?: string;
11
+ function: {
12
+ name: string;
13
+ arguments: string;
14
+ };
15
+ };
16
+ export declare type TokenData = {
17
+ token: string;
18
+ completion_probabilities?: Array<NativeCompletionTokenProb>;
19
+ content?: string;
20
+ reasoning_content?: string;
21
+ tool_calls?: Array<ToolCall>;
22
+ accumulated_text?: string;
23
+ };
24
+ export declare class LlamaContext {
25
+ id: number;
26
+ gpu: boolean;
27
+ reasonNoGPU: string;
28
+ model: NativeLlamaContext['model'];
29
+ constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext);
30
+ /**
31
+ * Load cached prompt & completion state from a file.
32
+ */
33
+ loadSession(filepath: string): Promise<NativeSessionLoadResult>;
34
+ /**
35
+ * Save current cached prompt & completion state to a file.
36
+ */
37
+ saveSession(filepath: string, options?: {
38
+ tokenSize: number;
39
+ }): Promise<number>;
40
+ isLlamaChatSupported(): boolean;
41
+ isJinjaSupported(): boolean;
42
+ getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string | null, params?: {
43
+ jinja?: boolean;
44
+ response_format?: CompletionResponseFormat;
45
+ tools?: object;
46
+ parallel_tool_calls?: object;
47
+ tool_choice?: string;
48
+ enable_thinking?: boolean;
49
+ add_generation_prompt?: boolean;
50
+ now?: string | number;
51
+ chat_template_kwargs?: Record<string, string>;
52
+ }): Promise<FormattedChatResult | JinjaFormattedChatResult>;
53
+ /**
54
+ * Generate a completion based on the provided parameters
55
+ * @param params Completion parameters including prompt or messages
56
+ * @param callback Optional callback for token-by-token streaming
57
+ * @returns Promise resolving to the completion result
58
+ *
59
+ * Note: For multimodal support, you can include an media_paths parameter.
60
+ * This will process the images and add them to the context before generating text.
61
+ * Multimodal support must be enabled via initMultimodal() first.
62
+ */
63
+ completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
64
+ stopCompletion(): Promise<void>;
65
+ /**
66
+ * Tokenize text or text with images
67
+ * @param text Text to tokenize
68
+ * @param params.media_paths Array of image paths to tokenize (if multimodal is enabled)
69
+ * @returns Promise resolving to the tokenize result
70
+ */
71
+ tokenize(text: string, { media_paths: mediaPaths, }?: {
72
+ media_paths?: string[];
73
+ }): Promise<NativeTokenizeResult>;
74
+ detokenize(tokens: number[]): Promise<string>;
75
+ embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
76
+ /**
77
+ * Rerank documents based on relevance to a query
78
+ * @param query The query text to rank documents against
79
+ * @param documents Array of document texts to rank
80
+ * @param params Optional reranking parameters
81
+ * @returns Promise resolving to an array of ranking results with scores and indices
82
+ */
83
+ rerank(query: string, documents: string[], params?: RerankParams): Promise<RerankResult[]>;
84
+ bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
85
+ applyLoraAdapters(loraList: Array<{
86
+ path: string;
87
+ scaled?: number;
88
+ }>): Promise<void>;
89
+ removeLoraAdapters(): Promise<void>;
90
+ getLoadedLoraAdapters(): Promise<Array<{
91
+ path: string;
92
+ scaled?: number;
93
+ }>>;
94
+ /**
95
+ * Initialize multimodal support with a mmproj file
96
+ * @param params Parameters for multimodal support
97
+ * @param params.path Path to the multimodal projector file
98
+ * @param params.use_gpu Whether to use GPU
99
+ * @returns Promise resolving to true if initialization was successful
100
+ */
101
+ initMultimodal({ path, use_gpu: useGpu, }: {
102
+ path: string;
103
+ use_gpu?: boolean;
104
+ }): Promise<boolean>;
105
+ /**
106
+ * Check if multimodal support is enabled
107
+ * @returns Promise resolving to true if multimodal is enabled
108
+ */
109
+ isMultimodalEnabled(): Promise<boolean>;
110
+ /**
111
+ * Check multimodal support
112
+ * @returns Promise resolving to an object with vision and audio support
113
+ */
114
+ getMultimodalSupport(): Promise<{
115
+ vision: boolean;
116
+ audio: boolean;
117
+ }>;
118
+ /**
119
+ * Release multimodal support
120
+ * @returns Promise resolving to void
121
+ */
122
+ releaseMultimodal(): Promise<void>;
123
+ /**
124
+ * Initialize TTS support with a vocoder model
125
+ * @param params Parameters for TTS support
126
+ * @param params.path Path to the vocoder model
127
+ * @param params.n_batch Batch size for the vocoder model
128
+ * @returns Promise resolving to true if initialization was successful
129
+ */
130
+ initVocoder({ path, n_batch: nBatch }: {
131
+ path: string;
132
+ n_batch?: number;
133
+ }): Promise<boolean>;
134
+ /**
135
+ * Check if TTS support is enabled
136
+ * @returns Promise resolving to true if TTS is enabled
137
+ */
138
+ isVocoderEnabled(): Promise<boolean>;
139
+ /**
140
+ * Get a formatted audio completion prompt
141
+ * @param speakerJsonStr JSON string representing the speaker
142
+ * @param textToSpeak Text to speak
143
+ * @returns Promise resolving to the formatted audio completion result with prompt and grammar
144
+ */
145
+ getFormattedAudioCompletion(speaker: object | null, textToSpeak: string): Promise<{
146
+ prompt: string;
147
+ grammar?: string;
148
+ }>;
149
+ /**
150
+ * Get guide tokens for audio completion
151
+ * @param textToSpeak Text to speak
152
+ * @returns Promise resolving to the guide tokens
153
+ */
154
+ getAudioCompletionGuideTokens(textToSpeak: string): Promise<Array<number>>;
155
+ /**
156
+ * Decode audio tokens
157
+ * @param tokens Array of audio tokens
158
+ * @returns Promise resolving to the decoded audio tokens
159
+ */
160
+ decodeAudioTokens(tokens: number[]): Promise<Array<number>>;
161
+ /**
162
+ * Release TTS support
163
+ * @returns Promise resolving to void
164
+ */
165
+ releaseVocoder(): Promise<void>;
166
+ release(): Promise<void>;
167
+ }
168
+ export declare function toggleNativeLog(enabled: boolean): Promise<void>;
169
+ export declare function addNativeLogListener(listener: (level: string, text: string) => void): {
170
+ remove: () => void;
171
+ };
172
+ export declare function setContextLimit(limit: number): Promise<void>;
173
+ export declare function loadLlamaModelInfo(model: string): Promise<Object>;
174
+ export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
175
+ export declare function releaseAllLlama(): Promise<void>;
176
+ export declare const BuildInfo: {
177
+ number: string;
178
+ commit: string;
179
+ };
180
+ export { LlamaCpp };