node-llama-cpp 3.15.1 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bindings/AddonTypes.d.ts +8 -1
- package/dist/bindings/getLlama.d.ts +1 -1
- package/dist/bindings/getLlama.js +1 -1
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +67 -8
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -1
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +2 -1
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +6 -0
- package/dist/cli/commands/ChatCommand.js +66 -3
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +6 -0
- package/dist/cli/commands/CompleteCommand.js +66 -4
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +6 -0
- package/dist/cli/commands/InfillCommand.js +66 -4
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/utils/parseXtcArg.d.ts +5 -0
- package/dist/cli/utils/parseXtcArg.js +16 -0
- package/dist/cli/utils/parseXtcArg.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +36 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +29 -10
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +83 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +11 -5
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +2 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +36 -3
- package/dist/evaluator/LlamaCompletion.js +7 -4
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js +67 -35
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.js +8 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -1
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +113 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +2 -2
- package/dist/evaluator/LlamaModel/LlamaModel.js +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +4 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +5 -0
- package/dist/gguf/types/GgufMetadataTypes.js +5 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +51 -0
- package/dist/types.js.map +1 -1
- package/dist/utils/cmake.js +6 -3
- package/dist/utils/cmake.js.map +1 -1
- package/llama/addon/AddonContext.cpp +19 -5
- package/llama/addon/AddonContext.h +1 -1
- package/llama/addon/AddonSampler.cpp +158 -0
- package/llama/addon/AddonSampler.h +13 -1
- package/llama/addon/globals/getGpuInfo.cpp +1 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +12 -5
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +63 -62
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
const xtcArgFormat = /^(\d+|\d*\.\d+),(\d*|\d?\.\d+)$/;
|
|
2
|
+
export function parseXtcArg(xtcString) {
|
|
3
|
+
if (xtcString == null || xtcString === "")
|
|
4
|
+
return undefined;
|
|
5
|
+
const match = xtcString.match(xtcArgFormat);
|
|
6
|
+
if (match != null && match[1] != null && match[2] != null) {
|
|
7
|
+
const probability = parseFloat(match[1]);
|
|
8
|
+
const threshold = parseFloat(match[2]);
|
|
9
|
+
if (probability >= 0 && probability <= 1 && threshold >= 0 && threshold <= 1) {
|
|
10
|
+
return { probability, threshold };
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
throw new Error(`Invalid xtc argument: ${xtcString}. ` +
|
|
14
|
+
'Expected format: "probability,threshold" where probability and threshold are numbers between 0 and 1.');
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=parseXtcArg.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseXtcArg.js","sourceRoot":"","sources":["../../../src/cli/utils/parseXtcArg.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,iCAAiC,CAAC;AAEvD,MAAM,UAAU,WAAW,CAAC,SAAkB;IAC1C,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,KAAK,EAAE;QACrC,OAAO,SAAS,CAAC;IAErB,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAC5C,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;QACxD,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvC,IAAI,WAAW,IAAI,CAAC,IAAI,WAAW,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;YAC3E,OAAO,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC;QACpC,CAAC;IACL,CAAC;IAED,MAAM,IAAI,KAAK,CACX,yBAAyB,SAAS,IAAI;QACtC,uGAAuG,CAC1G,CAAC;AACN,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { EventRelay } from "lifecycle-utils";
|
|
2
2
|
import { ChatWrapper } from "../../ChatWrapper.js";
|
|
3
3
|
import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
|
|
4
|
-
import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer } from "../../types.js";
|
|
4
|
+
import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer, LLamaContextualDryRepeatPenalty } from "../../types.js";
|
|
5
5
|
import { GbnfJsonSchemaToType } from "../../utils/gbnfJson/types.js";
|
|
6
6
|
import { LlamaGrammar } from "../LlamaGrammar.js";
|
|
7
7
|
import { LlamaText, LlamaTextJSON } from "../../utils/LlamaText.js";
|
|
@@ -191,6 +191,28 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
|
|
|
191
191
|
* Only relevant when using `temperature`.
|
|
192
192
|
*/
|
|
193
193
|
seed?: number;
|
|
194
|
+
/**
|
|
195
|
+
* Exclude Top Choices (XTC) removes the top tokens from consideration and avoids more obvious and repetitive generations.
|
|
196
|
+
* Using it leads to more creative responses, but also to increased hallucinations.
|
|
197
|
+
*
|
|
198
|
+
* The `probability` value controls the chance that the top tokens will be removed in the next token generation step.
|
|
199
|
+
* The `threshold` value control the minimum probability of a token for it to be removed.
|
|
200
|
+
*
|
|
201
|
+
* Start with `{probability: 0.5, threshold: 0.1}` and adjust from there.
|
|
202
|
+
*
|
|
203
|
+
* Disabled by default.
|
|
204
|
+
*/
|
|
205
|
+
xtc?: {
|
|
206
|
+
/**
|
|
207
|
+
* A number between `0` and `1` representing the probability of applying Exclude Top Choices (XTC) at each token generation step.
|
|
208
|
+
*/
|
|
209
|
+
probability: number;
|
|
210
|
+
/**
|
|
211
|
+
* A number between `0` and `1` representing the minimum probability
|
|
212
|
+
* of a token for it to be removed when applying Exclude Top Choices (XTC).
|
|
213
|
+
*/
|
|
214
|
+
threshold: number;
|
|
215
|
+
};
|
|
194
216
|
/**
|
|
195
217
|
* Trim whitespace from the end of the generated text
|
|
196
218
|
*
|
|
@@ -198,6 +220,17 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
|
|
|
198
220
|
*/
|
|
199
221
|
trimWhitespaceSuffix?: boolean;
|
|
200
222
|
repeatPenalty?: false | LLamaContextualRepeatPenalty;
|
|
223
|
+
/**
|
|
224
|
+
* DRY (Don't Repeat Yourself) penalty is a technique to reduce repetitions in the generated text
|
|
225
|
+
* by penalizing tokens based on recent token usage patterns.
|
|
226
|
+
*
|
|
227
|
+
* With the right parameters choice, it makes it impossible for the model to
|
|
228
|
+
* repeat itself verbatim with the same tokens in the same order (the model can still repeat itself by
|
|
229
|
+
* using different tokens or by paraphrasing, but that is far less of an issue than a broken-record looping).
|
|
230
|
+
*
|
|
231
|
+
* Disabled by default.
|
|
232
|
+
*/
|
|
233
|
+
dryRepeatPenalty?: LLamaContextualDryRepeatPenalty;
|
|
201
234
|
/**
|
|
202
235
|
* Adjust the probability of tokens being generated.
|
|
203
236
|
* Can be used to bias the model to generate tokens that you want it to lean towards,
|
|
@@ -321,8 +354,10 @@ export type LLamaChatLoadAndCompleteUserMessageOptions<Functions extends ChatMod
|
|
|
321
354
|
topK?: LLamaChatGenerateResponseOptions<Functions>["topK"];
|
|
322
355
|
topP?: LLamaChatGenerateResponseOptions<Functions>["topP"];
|
|
323
356
|
seed?: LLamaChatGenerateResponseOptions<Functions>["seed"];
|
|
357
|
+
xtc?: LLamaChatGenerateResponseOptions<Functions>["xtc"];
|
|
324
358
|
trimWhitespaceSuffix?: LLamaChatGenerateResponseOptions<Functions>["trimWhitespaceSuffix"];
|
|
325
359
|
repeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["repeatPenalty"];
|
|
360
|
+
dryRepeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["dryRepeatPenalty"];
|
|
326
361
|
tokenBias?: LLamaChatGenerateResponseOptions<Functions>["tokenBias"];
|
|
327
362
|
evaluationPriority?: LLamaChatGenerateResponseOptions<Functions>["evaluationPriority"];
|
|
328
363
|
contextShift?: LLamaChatGenerateResponseOptions<Functions>["contextShift"];
|
|
@@ -80,7 +80,7 @@ export class LlamaChat {
|
|
|
80
80
|
return this.sequence.model;
|
|
81
81
|
}
|
|
82
82
|
async generateResponse(history, options = {}) {
|
|
83
|
-
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
83
|
+
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
84
84
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
|
|
85
85
|
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
86
86
|
onTextChunk,
|
|
@@ -96,9 +96,11 @@ export class LlamaChat {
|
|
|
96
96
|
topK,
|
|
97
97
|
topP,
|
|
98
98
|
seed,
|
|
99
|
+
xtc,
|
|
99
100
|
grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
|
|
100
101
|
trimWhitespaceSuffix,
|
|
101
102
|
repeatPenalty,
|
|
103
|
+
dryRepeatPenalty,
|
|
102
104
|
tokenBias,
|
|
103
105
|
evaluationPriority,
|
|
104
106
|
functions,
|
|
@@ -202,7 +204,7 @@ export class LlamaChat {
|
|
|
202
204
|
});
|
|
203
205
|
}
|
|
204
206
|
async loadChatAndCompleteUserMessage(history, options = {}) {
|
|
205
|
-
const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = defaultMaxPreloadTokens(this.sequence), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
|
|
207
|
+
const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = defaultMaxPreloadTokens(this.sequence), temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
|
|
206
208
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize((findLastModelMessageInChatHistory(history)?.response ?? [])
|
|
207
209
|
.map((item) => {
|
|
208
210
|
if (typeof item === "string")
|
|
@@ -227,9 +229,11 @@ export class LlamaChat {
|
|
|
227
229
|
topK,
|
|
228
230
|
topP,
|
|
229
231
|
seed,
|
|
232
|
+
xtc,
|
|
230
233
|
grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
|
|
231
234
|
trimWhitespaceSuffix,
|
|
232
235
|
repeatPenalty,
|
|
236
|
+
dryRepeatPenalty,
|
|
233
237
|
tokenBias,
|
|
234
238
|
evaluationPriority,
|
|
235
239
|
functions,
|
|
@@ -721,6 +725,7 @@ class GenerateResponseState {
|
|
|
721
725
|
topK;
|
|
722
726
|
topP;
|
|
723
727
|
seed;
|
|
728
|
+
xtc;
|
|
724
729
|
grammar;
|
|
725
730
|
trimWhitespaceSuffix;
|
|
726
731
|
tokenBias;
|
|
@@ -737,6 +742,7 @@ class GenerateResponseState {
|
|
|
737
742
|
repeatPenaltyEnabled;
|
|
738
743
|
resolvedContextShift;
|
|
739
744
|
resolvedRepeatPenalty;
|
|
745
|
+
dryRepeatPenalty;
|
|
740
746
|
grammarEvaluationState;
|
|
741
747
|
functionNameGrammar;
|
|
742
748
|
functionsGrammar;
|
|
@@ -798,7 +804,7 @@ class GenerateResponseState {
|
|
|
798
804
|
currentTokens = [];
|
|
799
805
|
currentText = "";
|
|
800
806
|
currentQueuedTokenRelease;
|
|
801
|
-
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
807
|
+
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, xtc, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
802
808
|
this.llamaChat = llamaChat;
|
|
803
809
|
this.chatWrapper = chatWrapper;
|
|
804
810
|
this.history = history;
|
|
@@ -815,6 +821,7 @@ class GenerateResponseState {
|
|
|
815
821
|
this.topK = topK;
|
|
816
822
|
this.topP = topP;
|
|
817
823
|
this.seed = seed;
|
|
824
|
+
this.xtc = xtc;
|
|
818
825
|
this.grammar = grammar;
|
|
819
826
|
this.trimWhitespaceSuffix = trimWhitespaceSuffix;
|
|
820
827
|
this.tokenBias = tokenBias;
|
|
@@ -847,6 +854,7 @@ class GenerateResponseState {
|
|
|
847
854
|
lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
|
|
848
855
|
};
|
|
849
856
|
this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
|
|
857
|
+
this.dryRepeatPenalty = dryRepeatPenalty;
|
|
850
858
|
this.grammarEvaluationState = this.grammar != null
|
|
851
859
|
? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
|
|
852
860
|
: undefined;
|
|
@@ -863,11 +871,16 @@ class GenerateResponseState {
|
|
|
863
871
|
if (this.grammar != null)
|
|
864
872
|
StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
|
|
865
873
|
.map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
866
|
-
if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
|
|
867
|
-
|
|
874
|
+
if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText) {
|
|
875
|
+
for (const sectionPrefix of [
|
|
868
876
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
869
|
-
this.chatWrapper.settings.functions
|
|
870
|
-
])
|
|
877
|
+
...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
|
|
878
|
+
])
|
|
879
|
+
this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
880
|
+
sectionPrefix,
|
|
881
|
+
this.chatWrapper.settings.functions.call.prefix
|
|
882
|
+
]), this.llamaChat.model.tokenizer));
|
|
883
|
+
}
|
|
871
884
|
const segmentDefinitions = new Map();
|
|
872
885
|
for (const segmentType of allSegmentTypes) {
|
|
873
886
|
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
@@ -889,10 +902,14 @@ class GenerateResponseState {
|
|
|
889
902
|
: SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
|
|
890
903
|
});
|
|
891
904
|
if (this.abortOnNonText) {
|
|
892
|
-
|
|
905
|
+
for (const sectionPrefix of [
|
|
893
906
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
894
|
-
this.chatWrapper.settings.functions
|
|
895
|
-
])
|
|
907
|
+
...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
|
|
908
|
+
])
|
|
909
|
+
this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
910
|
+
sectionPrefix,
|
|
911
|
+
this.chatWrapper.settings.functions.call.prefix
|
|
912
|
+
]), this.llamaChat.model.tokenizer));
|
|
896
913
|
for (const segmentType of allSegmentTypes) {
|
|
897
914
|
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
898
915
|
if (segmentDefinition != null)
|
|
@@ -1765,6 +1782,7 @@ class GenerateResponseState {
|
|
|
1765
1782
|
topK: this.topK,
|
|
1766
1783
|
topP: this.topP,
|
|
1767
1784
|
seed: this.seed,
|
|
1785
|
+
xtc: this.xtc,
|
|
1768
1786
|
grammarEvaluationState: () => {
|
|
1769
1787
|
if (this.functionEvaluationMode !== false)
|
|
1770
1788
|
return this.functionsEvaluationState;
|
|
@@ -1777,6 +1795,7 @@ class GenerateResponseState {
|
|
|
1777
1795
|
frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
|
|
1778
1796
|
presencePenalty: this.resolvedRepeatPenalty.presencePenalty
|
|
1779
1797
|
},
|
|
1798
|
+
dryRepeatPenalty: this.dryRepeatPenalty,
|
|
1780
1799
|
tokenBias: this.tokenBias,
|
|
1781
1800
|
evaluationPriority: this.evaluationPriority,
|
|
1782
1801
|
yieldEogToken: true
|