npm - node-llama-cpp - Versions diffs - 3.15.1 → 3.16.0 - Mend

node-llama-cpp 3.15.1 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/cli/utils/parseXtcArg.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+export declare function parseXtcArg(xtcString?: string): ParsedXtcArg | undefined;
+export type ParsedXtcArg = {
+    probability: number;
+    threshold: number;
+};

package/dist/cli/utils/parseXtcArg.js ADDED Viewed

@@ -0,0 +1,16 @@
+const xtcArgFormat = /^(\d+|\d*\.\d+),(\d*|\d?\.\d+)$/;
+export function parseXtcArg(xtcString) {
+    if (xtcString == null || xtcString === "")
+        return undefined;
+    const match = xtcString.match(xtcArgFormat);
+    if (match != null && match[1] != null && match[2] != null) {
+        const probability = parseFloat(match[1]);
+        const threshold = parseFloat(match[2]);
+        if (probability >= 0 && probability <= 1 && threshold >= 0 && threshold <= 1) {
+            return { probability, threshold };
+        }
+    }
+    throw new Error(`Invalid xtc argument: ${xtcString}. ` +
+        'Expected format: "probability,threshold" where probability and threshold are numbers between 0 and 1.');
+}
+//# sourceMappingURL=parseXtcArg.js.map

package/dist/cli/utils/parseXtcArg.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"parseXtcArg.js","sourceRoot":"","sources":["../../../src/cli/utils/parseXtcArg.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,iCAAiC,CAAC;AAEvD,MAAM,UAAU,WAAW,CAAC,SAAkB;IAC1C,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,KAAK,EAAE;QACrC,OAAO,SAAS,CAAC;IAErB,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAC5C,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;QACxD,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvC,IAAI,WAAW,IAAI,CAAC,IAAI,WAAW,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;YAC3E,OAAO,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC;QACpC,CAAC;IACL,CAAC;IAED,MAAM,IAAI,KAAK,CACX,yBAAyB,SAAS,IAAI;QACtC,uGAAuG,CAC1G,CAAC;AACN,CAAC"}

package/dist/evaluator/LlamaChat/LlamaChat.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { EventRelay } from "lifecycle-utils";
 import { ChatWrapper } from "../../ChatWrapper.js";
 import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
-import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer } from "../../types.js";
+import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer, LLamaContextualDryRepeatPenalty } from "../../types.js";
 import { GbnfJsonSchemaToType } from "../../utils/gbnfJson/types.js";
 import { LlamaGrammar } from "../LlamaGrammar.js";
 import { LlamaText, LlamaTextJSON } from "../../utils/LlamaText.js";
@@ -191,6 +191,28 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
      * Only relevant when using `temperature`.
      */
     seed?: number;
+    /**
+     * Exclude Top Choices (XTC) removes the top tokens from consideration and avoids more obvious and repetitive generations.
+     * Using it leads to more creative responses, but also to increased hallucinations.
+     *
+     * The `probability` value controls the chance that the top tokens will be removed in the next token generation step.
+     * The `threshold` value control the minimum probability of a token for it to be removed.
+     *
+     * Start with `{probability: 0.5, threshold: 0.1}` and adjust from there.
+     *
+     * Disabled by default.
+     */
+    xtc?: {
+        /**
+         * A number between `0` and `1` representing the probability of applying Exclude Top Choices (XTC) at each token generation step.
+         */
+        probability: number;
+        /**
+         * A number between `0` and `1` representing the minimum probability
+         * of a token for it to be removed when applying Exclude Top Choices (XTC).
+         */
+        threshold: number;
+    };
     /**
      * Trim whitespace from the end of the generated text
      *
@@ -198,6 +220,17 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
      */
     trimWhitespaceSuffix?: boolean;
     repeatPenalty?: false | LLamaContextualRepeatPenalty;
+    /**
+     * DRY (Don't Repeat Yourself) penalty is a technique to reduce repetitions in the generated text
+     * by penalizing tokens based on recent token usage patterns.
+     *
+     * With the right parameters choice, it makes it impossible for the model to
+     * repeat itself verbatim with the same tokens in the same order (the model can still repeat itself by
+     * using different tokens or by paraphrasing, but that is far less of an issue than a broken-record looping).
+     *
+     * Disabled by default.
+     */
+    dryRepeatPenalty?: LLamaContextualDryRepeatPenalty;
     /**
      * Adjust the probability of tokens being generated.
      * Can be used to bias the model to generate tokens that you want it to lean towards,
@@ -321,8 +354,10 @@ export type LLamaChatLoadAndCompleteUserMessageOptions<Functions extends ChatMod
     topK?: LLamaChatGenerateResponseOptions<Functions>["topK"];
     topP?: LLamaChatGenerateResponseOptions<Functions>["topP"];
     seed?: LLamaChatGenerateResponseOptions<Functions>["seed"];
+    xtc?: LLamaChatGenerateResponseOptions<Functions>["xtc"];
     trimWhitespaceSuffix?: LLamaChatGenerateResponseOptions<Functions>["trimWhitespaceSuffix"];
     repeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["repeatPenalty"];
+    dryRepeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["dryRepeatPenalty"];
     tokenBias?: LLamaChatGenerateResponseOptions<Functions>["tokenBias"];
     evaluationPriority?: LLamaChatGenerateResponseOptions<Functions>["evaluationPriority"];
     contextShift?: LLamaChatGenerateResponseOptions<Functions>["contextShift"];

package/dist/evaluator/LlamaChat/LlamaChat.js CHANGED Viewed

@@ -80,7 +80,7 @@ export class LlamaChat {
         return this.sequence.model;
     }
     async generateResponse(history, options = {}) {
-        const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
+        const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
         this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
         const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
             onTextChunk,
@@ -96,9 +96,11 @@ export class LlamaChat {
             topK,
             topP,
             seed,
+            xtc,
             grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
             trimWhitespaceSuffix,
             repeatPenalty,
+            dryRepeatPenalty,
             tokenBias,
             evaluationPriority,
             functions,
@@ -202,7 +204,7 @@ export class LlamaChat {
         });
     }
     async loadChatAndCompleteUserMessage(history, options = {}) {
-        const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = defaultMaxPreloadTokens(this.sequence), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
+        const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = defaultMaxPreloadTokens(this.sequence), temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
         this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize((findLastModelMessageInChatHistory(history)?.response ?? [])
             .map((item) => {
             if (typeof item === "string")
@@ -227,9 +229,11 @@ export class LlamaChat {
             topK,
             topP,
             seed,
+            xtc,
             grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
             trimWhitespaceSuffix,
             repeatPenalty,
+            dryRepeatPenalty,
             tokenBias,
             evaluationPriority,
             functions,
@@ -721,6 +725,7 @@ class GenerateResponseState {
     topK;
     topP;
     seed;
+    xtc;
     grammar;
     trimWhitespaceSuffix;
     tokenBias;
@@ -737,6 +742,7 @@ class GenerateResponseState {
     repeatPenaltyEnabled;
     resolvedContextShift;
     resolvedRepeatPenalty;
+    dryRepeatPenalty;
     grammarEvaluationState;
     functionNameGrammar;
     functionsGrammar;
@@ -798,7 +804,7 @@ class GenerateResponseState {
     currentTokens = [];
     currentText = "";
     currentQueuedTokenRelease;
-    constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
+    constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
         this.llamaChat = llamaChat;
         this.chatWrapper = chatWrapper;
         this.history = history;
@@ -815,6 +821,7 @@ class GenerateResponseState {
         this.topK = topK;
         this.topP = topP;
         this.seed = seed;
+        this.xtc = xtc;
         this.grammar = grammar;
         this.trimWhitespaceSuffix = trimWhitespaceSuffix;
         this.tokenBias = tokenBias;
@@ -847,6 +854,7 @@ class GenerateResponseState {
                 lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
             };
         this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
+        this.dryRepeatPenalty = dryRepeatPenalty;
         this.grammarEvaluationState = this.grammar != null
             ? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
             : undefined;
@@ -863,11 +871,16 @@ class GenerateResponseState {
         if (this.grammar != null)
             StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
                 .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
-        if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
-            this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
+        if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText) {
+            for (const sectionPrefix of [
                 this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
-                this.chatWrapper.settings.functions.call.prefix
-            ]), this.llamaChat.model.tokenizer));
+                ...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
+            ])
+                this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
+                    sectionPrefix,
+                    this.chatWrapper.settings.functions.call.prefix
+                ]), this.llamaChat.model.tokenizer));
+        }
         const segmentDefinitions = new Map();
         for (const segmentType of allSegmentTypes) {
             const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
@@ -889,10 +902,14 @@ class GenerateResponseState {
                 : SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
         });
         if (this.abortOnNonText) {
-            this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
+            for (const sectionPrefix of [
                 this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
-                this.chatWrapper.settings.functions.call.prefix
-            ]), this.llamaChat.model.tokenizer));
+                ...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
+            ])
+                this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
+                    sectionPrefix,
+                    this.chatWrapper.settings.functions.call.prefix
+                ]), this.llamaChat.model.tokenizer));
             for (const segmentType of allSegmentTypes) {
                 const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
                 if (segmentDefinition != null)
@@ -1765,6 +1782,7 @@ class GenerateResponseState {
             topK: this.topK,
             topP: this.topP,
             seed: this.seed,
+            xtc: this.xtc,
             grammarEvaluationState: () => {
                 if (this.functionEvaluationMode !== false)
                     return this.functionsEvaluationState;
@@ -1777,6 +1795,7 @@ class GenerateResponseState {
                 frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
                 presencePenalty: this.resolvedRepeatPenalty.presencePenalty
             },
+            dryRepeatPenalty: this.dryRepeatPenalty,
             tokenBias: this.tokenBias,
             evaluationPriority: this.evaluationPriority,
             yieldEogToken: true