node-llama-cpp 3.0.0-beta.16 → 3.0.0-beta.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/ChatWrapper.d.ts +1 -15
- package/dist/ChatWrapper.js +22 -32
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +4 -1
- package/dist/bindings/getLlama.d.ts +5 -1
- package/dist/bindings/getLlama.js +11 -4
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +39 -40
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/{LlamaChatWrapper.d.ts → Llama2ChatWrapper.d.ts} +1 -1
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +3 -3
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +2 -2
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +6 -2
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +3 -4
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +1 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +3 -3
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +6 -4
- package/dist/chatWrappers/utils/resolveChatWrapper.js +23 -17
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +5 -3
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -0
- package/dist/cli/commands/ChatCommand.js +26 -9
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -0
- package/dist/cli/commands/CompleteCommand.js +24 -7
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +3 -5
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +1 -1
- package/dist/cli/commands/DownloadCommand.js +2 -1
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -0
- package/dist/cli/commands/InfillCommand.js +24 -7
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +109 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +23 -11
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +2 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +43 -11
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +61 -0
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +4 -3
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +44 -39
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +18 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +271 -186
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +3 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +18 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +2 -2
- package/dist/evaluator/LlamaCompletion.js +11 -13
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +6 -11
- package/dist/evaluator/LlamaContext/LlamaContext.js +23 -16
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +2 -10
- package/dist/evaluator/LlamaEmbeddingContext.js +10 -24
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +1 -1
- package/dist/evaluator/LlamaModel.d.ts +23 -3
- package/dist/evaluator/LlamaModel.js +32 -5
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +1 -1
- package/dist/evaluator/TokenBias.js +3 -3
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +12 -12
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +27 -3
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/parser/parseGguf.js +5 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +5 -2
- package/dist/gguf/readGgufFileInfo.js +38 -10
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +32 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +4 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +9 -5
- package/dist/index.js +7 -3
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +21 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +31 -21
- package/dist/utils/LlamaText.js +253 -223
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +21 -18
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +4 -2
- package/dist/utils/TokenStreamRegulator.js +22 -4
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +99 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +2 -2
- package/dist/utils/parseTextTemplate.js +2 -2
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/llama/addon.cpp +67 -14
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +9 -5
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
|
@@ -7,6 +7,7 @@ import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
|
|
|
7
7
|
import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
|
|
8
8
|
import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
9
9
|
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
|
|
10
|
+
import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
|
|
10
11
|
import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
|
|
11
12
|
import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
|
|
12
13
|
const defaultContextShiftOptions = {
|
|
@@ -36,7 +37,7 @@ export class LlamaChat {
|
|
|
36
37
|
bosString: contextSequence.model.tokens.bosString,
|
|
37
38
|
filename: contextSequence.model.filename,
|
|
38
39
|
fileInfo: contextSequence.model.fileInfo,
|
|
39
|
-
tokenizer: contextSequence.model.
|
|
40
|
+
tokenizer: contextSequence.model.tokenizer
|
|
40
41
|
}) ?? new GeneralChatWrapper())
|
|
41
42
|
: chatWrapper;
|
|
42
43
|
}
|
|
@@ -71,7 +72,7 @@ export class LlamaChat {
|
|
|
71
72
|
get model() {
|
|
72
73
|
return this.sequence.model;
|
|
73
74
|
}
|
|
74
|
-
async generateResponse(history, { onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
75
|
+
async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
75
76
|
const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
|
|
76
77
|
if (grammar != null && functionsEnabled)
|
|
77
78
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
@@ -89,7 +90,6 @@ export class LlamaChat {
|
|
|
89
90
|
});
|
|
90
91
|
const model = this._sequence.model;
|
|
91
92
|
const context = this._sequence.context;
|
|
92
|
-
const eosToken = model.tokens.eos;
|
|
93
93
|
const resolvedContextShift = {
|
|
94
94
|
...defaultContextShiftOptions,
|
|
95
95
|
...removeNullFields(contextShift)
|
|
@@ -116,6 +116,7 @@ export class LlamaChat {
|
|
|
116
116
|
: undefined;
|
|
117
117
|
const streamRegulator = new TokenStreamRegulator();
|
|
118
118
|
const stopGenerationDetector = new StopGenerationDetector();
|
|
119
|
+
const customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
119
120
|
const functionSyntaxStartDetector = new StopGenerationDetector();
|
|
120
121
|
const functionSyntaxEndDetector = new StopGenerationDetector();
|
|
121
122
|
const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
|
|
@@ -129,7 +130,7 @@ export class LlamaChat {
|
|
|
129
130
|
let lastContextWindowHistory = resolvedHistory;
|
|
130
131
|
let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
|
|
131
132
|
const ensureNotAborted = () => {
|
|
132
|
-
if (signal?.aborted)
|
|
133
|
+
if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
|
|
133
134
|
throw signal.reason;
|
|
134
135
|
if (this._sequence == null)
|
|
135
136
|
throw new DisposedError();
|
|
@@ -200,8 +201,11 @@ export class LlamaChat {
|
|
|
200
201
|
}
|
|
201
202
|
}
|
|
202
203
|
};
|
|
204
|
+
if (customStopTriggers != null)
|
|
205
|
+
StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
|
|
206
|
+
.map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
203
207
|
if (grammar != null)
|
|
204
|
-
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.
|
|
208
|
+
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
|
|
205
209
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
206
210
|
if (functions != null && Object.keys(functions).length > 0)
|
|
207
211
|
functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
|
|
@@ -214,7 +218,7 @@ export class LlamaChat {
|
|
|
214
218
|
resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
|
|
215
219
|
resolvedContextShift,
|
|
216
220
|
lastHistoryCompressionMetadata,
|
|
217
|
-
pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
|
|
221
|
+
pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
|
|
218
222
|
isFirstEvaluation,
|
|
219
223
|
chatWrapper: this._chatWrapper,
|
|
220
224
|
lastEvaluationContextWindowHistory,
|
|
@@ -226,11 +230,11 @@ export class LlamaChat {
|
|
|
226
230
|
});
|
|
227
231
|
ensureNotAborted();
|
|
228
232
|
if (generatedTokens === 0) {
|
|
229
|
-
StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.
|
|
233
|
+
StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenizer)
|
|
230
234
|
.map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
|
|
231
235
|
if (functionsEnabled) {
|
|
232
236
|
initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
|
|
233
|
-
StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.
|
|
237
|
+
StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenizer)
|
|
234
238
|
.map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
|
|
235
239
|
if (initiallyEngagedFunctionMode) {
|
|
236
240
|
inFunctionEvaluationMode = true;
|
|
@@ -247,10 +251,10 @@ export class LlamaChat {
|
|
|
247
251
|
lastContextWindowHistory = contextWindowHistory;
|
|
248
252
|
const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
|
|
249
253
|
const contextWindowsRes = [];
|
|
250
|
-
StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.
|
|
254
|
+
StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenizer)
|
|
251
255
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
252
256
|
if (functionsGrammar != null)
|
|
253
|
-
StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.
|
|
257
|
+
StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenizer)
|
|
254
258
|
.map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
|
|
255
259
|
let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
|
|
256
260
|
// we need to decode at least one token to generate a response
|
|
@@ -279,193 +283,274 @@ export class LlamaChat {
|
|
|
279
283
|
},
|
|
280
284
|
tokenBias,
|
|
281
285
|
evaluationPriority,
|
|
282
|
-
|
|
286
|
+
yieldEogToken: true
|
|
283
287
|
}));
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
locksToReleaseOnValidGeneration.
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
288
|
+
try {
|
|
289
|
+
let currentIteration = await evaluationIterator.next();
|
|
290
|
+
while (currentIteration.done !== true) {
|
|
291
|
+
const token = currentIteration.value;
|
|
292
|
+
let replacementToken = undefined;
|
|
293
|
+
ensureNotAborted();
|
|
294
|
+
generatedTokens++;
|
|
295
|
+
const tokens = [token];
|
|
296
|
+
const text = model.detokenize([token]);
|
|
297
|
+
const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
|
|
298
|
+
if (initiallyEngagedFunctionMode)
|
|
299
|
+
disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
|
|
300
|
+
if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
|
|
301
|
+
locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
while (locksToReleaseOnValidGeneration.length > 0)
|
|
305
|
+
locksToReleaseOnValidGeneration.shift().dispose();
|
|
306
|
+
}
|
|
307
|
+
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
308
|
+
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
309
|
+
initiallyEngagedFunctionMode = false;
|
|
310
|
+
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
311
|
+
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
312
|
+
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
313
|
+
try {
|
|
314
|
+
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
315
|
+
enableInternalBuiltinFunctions: true,
|
|
316
|
+
initialFunctionCallEngaged: true
|
|
317
|
+
});
|
|
318
|
+
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
319
|
+
if (internalBuiltinFunctions[functionName] != null) {
|
|
320
|
+
shouldStopFunctionEvaluationMode = true;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
catch (err) {
|
|
324
|
+
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
325
|
+
throw err;
|
|
313
326
|
}
|
|
314
327
|
}
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
328
|
+
if (shouldStopFunctionEvaluationMode) {
|
|
329
|
+
inFunctionEvaluationMode = false;
|
|
330
|
+
functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
|
|
331
|
+
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
332
|
+
grammar: functionsGrammar
|
|
333
|
+
});
|
|
334
|
+
functionCallTokens.length = 0;
|
|
335
|
+
while (functionCallTokenSyntaxLocks.length > 0)
|
|
336
|
+
functionCallTokenSyntaxLocks.shift().dispose();
|
|
337
|
+
functionSyntaxStartDetector.clearInProgressStops();
|
|
338
|
+
functionSyntaxStartDetector.clearTriggeredStops();
|
|
339
|
+
functionSyntaxEndDetector.clearInProgressStops();
|
|
340
|
+
functionSyntaxEndDetector.clearTriggeredStops();
|
|
318
341
|
}
|
|
319
342
|
}
|
|
320
|
-
if (
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
functionSyntaxStartDetector.
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
343
|
+
if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
|
|
344
|
+
functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
|
|
345
|
+
inFunctionEvaluationMode = true;
|
|
346
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
347
|
+
stopGenerationDetector.clearTriggeredStops();
|
|
348
|
+
stopGenerationDetector.clearInProgressStops();
|
|
349
|
+
customStopGenerationTriggersDetector.clearTriggeredStops();
|
|
350
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
351
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
352
|
+
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
353
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
354
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
355
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
356
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
357
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
358
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
359
|
+
.flat(1);
|
|
360
|
+
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
361
|
+
? ""
|
|
362
|
+
: typeof firstRemainingGenerationAfterStop === "string"
|
|
363
|
+
? firstRemainingGenerationAfterStop
|
|
364
|
+
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
365
|
+
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
366
|
+
for (const functionCallToken of functionCallTokens)
|
|
367
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
368
|
+
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
369
|
+
// or the context state should be modified to not include the incompatible tokens
|
|
370
|
+
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
371
|
+
let unfitTokens = [];
|
|
372
|
+
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
373
|
+
const remainingToken = remainingTextTokens[i];
|
|
374
|
+
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
375
|
+
if (!canBeNextToken) {
|
|
376
|
+
unfitTokens = remainingTextTokens.slice(i);
|
|
377
|
+
break;
|
|
378
|
+
}
|
|
379
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
380
|
+
functionCallTokens.push(remainingToken);
|
|
381
|
+
}
|
|
382
|
+
if (unfitTokens.length > 0) {
|
|
383
|
+
const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
|
|
384
|
+
const currentTokenText = queuedTokenRelease.text;
|
|
385
|
+
let replacementTokens;
|
|
386
|
+
if (!currentTokenText.endsWith(unfitTokensText)) {
|
|
387
|
+
console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
|
|
388
|
+
replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
|
|
392
|
+
replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
|
|
393
|
+
}
|
|
394
|
+
if (replacementTokens.length > 0) {
|
|
395
|
+
replacementToken = replacementTokens[0];
|
|
396
|
+
queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
|
|
397
|
+
}
|
|
398
|
+
}
|
|
333
399
|
}
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
339
|
-
stopGenerationDetector.clearTriggeredStops();
|
|
340
|
-
stopGenerationDetector.clearInProgressStops();
|
|
341
|
-
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
342
|
-
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
343
|
-
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
344
|
-
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
|
|
345
|
-
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
346
|
-
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
347
|
-
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
348
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
349
|
-
.flat(1);
|
|
350
|
-
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
351
|
-
? ""
|
|
352
|
-
: typeof firstRemainingGenerationAfterStop === "string"
|
|
353
|
-
? firstRemainingGenerationAfterStop
|
|
354
|
-
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
355
|
-
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop, false, "trimLeadingSpace"));
|
|
356
|
-
for (const functionCallToken of functionCallTokens)
|
|
357
|
-
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
358
|
-
}
|
|
359
|
-
else if (inFunctionEvaluationMode) {
|
|
360
|
-
functionCallTokens.push(...tokens);
|
|
361
|
-
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
362
|
-
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
363
|
-
}
|
|
364
|
-
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
365
|
-
const functionCallText = model.detokenize(functionCallTokens);
|
|
366
|
-
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
367
|
-
let modelResponse = model.detokenize(res);
|
|
368
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
369
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
370
|
-
modelResponse = modelResponse.trimEnd();
|
|
371
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
400
|
+
else if (inFunctionEvaluationMode) {
|
|
401
|
+
functionCallTokens.push(...tokens);
|
|
402
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
403
|
+
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
372
404
|
}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
functionCall: functionCall,
|
|
382
|
-
metadata: {
|
|
383
|
-
stopReason: "functionCall"
|
|
405
|
+
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
406
|
+
const functionCallText = model.detokenize(functionCallTokens);
|
|
407
|
+
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
408
|
+
let modelResponse = model.detokenize(res);
|
|
409
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
410
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
411
|
+
modelResponse = modelResponse.trimEnd();
|
|
412
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
384
413
|
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
399
|
-
.flat(1);
|
|
400
|
-
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
401
|
-
if (pendingTokens.length > 0)
|
|
402
|
-
onToken?.(pendingTokens.slice());
|
|
403
|
-
res.push(...pendingTokens);
|
|
404
|
-
contextWindowsRes.push(...pendingTokens);
|
|
405
|
-
pendingTokens.length = 0;
|
|
406
|
-
let modelResponse = model.detokenize(res);
|
|
407
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
408
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
409
|
-
modelResponse = modelResponse.trimEnd();
|
|
410
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
414
|
+
return {
|
|
415
|
+
response: modelResponse,
|
|
416
|
+
lastEvaluation: {
|
|
417
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
418
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
419
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
420
|
+
},
|
|
421
|
+
// prevent infinite TS type instantiation
|
|
422
|
+
functionCall: functionCall,
|
|
423
|
+
metadata: {
|
|
424
|
+
stopReason: "functionCall"
|
|
425
|
+
}
|
|
426
|
+
};
|
|
411
427
|
}
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
418
|
-
},
|
|
419
|
-
metadata: {
|
|
420
|
-
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
421
|
-
stopReason: token === eosToken
|
|
422
|
-
? "eosToken"
|
|
423
|
-
: "stopGenerationTrigger"
|
|
424
|
-
}
|
|
425
|
-
};
|
|
426
|
-
}
|
|
427
|
-
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
428
|
-
if (res.length === 0) {
|
|
429
|
-
ignoreStartTextDetector.clearInProgressStops();
|
|
430
|
-
ignoreStartTextDetector.clearTriggeredStops();
|
|
431
|
-
ignoreStartTextDetector.recordGeneration({
|
|
432
|
-
text: model.detokenize(pendingTokens),
|
|
433
|
-
tokens: pendingTokens
|
|
434
|
-
});
|
|
435
|
-
}
|
|
436
|
-
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
428
|
+
if (!inFunctionEvaluationMode) {
|
|
429
|
+
stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
430
|
+
customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
431
|
+
}
|
|
432
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
437
433
|
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
438
|
-
if (
|
|
439
|
-
|
|
434
|
+
if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
435
|
+
model.isEogToken(token)) {
|
|
436
|
+
const triggeredStops = stopGenerationDetector.hasTriggeredStops
|
|
437
|
+
? stopGenerationDetector.getTriggeredStops()
|
|
438
|
+
: customStopGenerationTriggersDetector.getTriggeredStops();
|
|
439
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
440
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
441
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
442
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
443
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
444
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
445
|
+
.flat(1);
|
|
446
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
447
|
+
if (pendingTokens.length > 0)
|
|
448
|
+
onToken?.(pendingTokens.slice());
|
|
440
449
|
res.push(...pendingTokens);
|
|
441
450
|
contextWindowsRes.push(...pendingTokens);
|
|
442
451
|
pendingTokens.length = 0;
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
451
|
-
}
|
|
452
|
-
return {
|
|
453
|
-
response: modelResponse,
|
|
454
|
-
lastEvaluation: {
|
|
452
|
+
let modelResponse = model.detokenize(res);
|
|
453
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
454
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
455
|
+
modelResponse = modelResponse.trimEnd();
|
|
456
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
457
|
+
}
|
|
458
|
+
const lastEvaluation = {
|
|
455
459
|
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
456
460
|
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
457
461
|
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
|
|
462
|
+
};
|
|
463
|
+
const isEogToken = model.isEogToken(token);
|
|
464
|
+
if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
|
|
465
|
+
return {
|
|
466
|
+
response: modelResponse,
|
|
467
|
+
lastEvaluation,
|
|
468
|
+
metadata: {
|
|
469
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
470
|
+
stopReason: isEogToken
|
|
471
|
+
? "eogToken"
|
|
472
|
+
: "stopGenerationTrigger"
|
|
473
|
+
}
|
|
474
|
+
};
|
|
461
475
|
}
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
476
|
+
return {
|
|
477
|
+
response: modelResponse,
|
|
478
|
+
lastEvaluation,
|
|
479
|
+
metadata: {
|
|
480
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
481
|
+
stopReason: "customStopTrigger",
|
|
482
|
+
customStopTrigger: triggeredStops[0].stopTrigger
|
|
483
|
+
}
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
487
|
+
if (res.length === 0) {
|
|
488
|
+
ignoreStartTextDetector.clearInProgressStops();
|
|
489
|
+
ignoreStartTextDetector.clearTriggeredStops();
|
|
490
|
+
ignoreStartTextDetector.recordGeneration({
|
|
491
|
+
text: model.detokenize(pendingTokens),
|
|
492
|
+
tokens: pendingTokens
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
496
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
497
|
+
if (pendingTokens.length > 0) {
|
|
498
|
+
onToken?.(pendingTokens.slice());
|
|
499
|
+
res.push(...pendingTokens);
|
|
500
|
+
contextWindowsRes.push(...pendingTokens);
|
|
501
|
+
pendingTokens.length = 0;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
if (maxTokensTriggered) {
|
|
505
|
+
let modelResponse = model.detokenize(res);
|
|
506
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
507
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
508
|
+
modelResponse = modelResponse.trimEnd();
|
|
509
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
510
|
+
}
|
|
511
|
+
return {
|
|
512
|
+
response: modelResponse,
|
|
513
|
+
lastEvaluation: {
|
|
514
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
515
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
516
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
517
|
+
},
|
|
518
|
+
metadata: {
|
|
519
|
+
stopReason: "maxTokens"
|
|
520
|
+
}
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
|
|
524
|
+
shouldContextShift = true;
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
if (signal?.aborted && stopOnAbortSignal) {
|
|
528
|
+
if (res.length === 0)
|
|
529
|
+
throw signal.reason;
|
|
530
|
+
let modelResponse = model.detokenize(res);
|
|
531
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
532
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
533
|
+
modelResponse = modelResponse.trimEnd();
|
|
534
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
535
|
+
}
|
|
536
|
+
return {
|
|
537
|
+
response: modelResponse,
|
|
538
|
+
lastEvaluation: {
|
|
539
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
540
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
541
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
542
|
+
},
|
|
543
|
+
metadata: {
|
|
544
|
+
stopReason: "abort"
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
currentIteration = await evaluationIterator.next(replacementToken);
|
|
467
549
|
}
|
|
468
550
|
}
|
|
551
|
+
finally {
|
|
552
|
+
await evaluationIterator.return();
|
|
553
|
+
}
|
|
469
554
|
isFirstEvaluation = false;
|
|
470
555
|
if (shouldContextShift)
|
|
471
556
|
continue;
|
|
@@ -593,7 +678,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
593
678
|
availableFunctions: functions,
|
|
594
679
|
documentFunctionParams
|
|
595
680
|
});
|
|
596
|
-
const tokens = contextText.tokenize(model.
|
|
681
|
+
const tokens = contextText.tokenize(model.tokenizer);
|
|
597
682
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
|
|
598
683
|
const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
|
|
599
684
|
const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
|
|
@@ -619,11 +704,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
619
704
|
: resolvedContextShift.size;
|
|
620
705
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
621
706
|
history: resolvedHistory,
|
|
622
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
707
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
623
708
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
624
709
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
625
710
|
contextSize: context.contextSize,
|
|
626
|
-
tokenizer: model.
|
|
711
|
+
tokenizer: model.tokenizer,
|
|
627
712
|
chatWrapper: chatWrapper,
|
|
628
713
|
functions,
|
|
629
714
|
documentFunctionParams
|
|
@@ -635,7 +720,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
635
720
|
return {
|
|
636
721
|
history: compressedHistory,
|
|
637
722
|
stopGenerationTriggers,
|
|
638
|
-
tokens: contextText.tokenize(model.
|
|
723
|
+
tokens: contextText.tokenize(model.tokenizer),
|
|
639
724
|
newResolvedHistory: resolvedHistory,
|
|
640
725
|
newHistoryCompressionMetadata: metadata,
|
|
641
726
|
ignoreStartText: ignoreStartText ?? [],
|
|
@@ -648,7 +733,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
648
733
|
availableFunctions: functions,
|
|
649
734
|
documentFunctionParams
|
|
650
735
|
});
|
|
651
|
-
const tokens = contextText.tokenize(model.
|
|
736
|
+
const tokens = contextText.tokenize(model.tokenizer);
|
|
652
737
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
|
|
653
738
|
return {
|
|
654
739
|
history: resolvedHistory,
|
|
@@ -666,11 +751,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
666
751
|
: resolvedContextShift.size)));
|
|
667
752
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
668
753
|
history: resolvedHistory,
|
|
669
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
754
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
670
755
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
671
756
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
672
757
|
contextSize: context.contextSize,
|
|
673
|
-
tokenizer: model.
|
|
758
|
+
tokenizer: model.tokenizer,
|
|
674
759
|
chatWrapper: chatWrapper,
|
|
675
760
|
functions,
|
|
676
761
|
documentFunctionParams
|
|
@@ -682,7 +767,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
682
767
|
return {
|
|
683
768
|
history: compressedHistory,
|
|
684
769
|
stopGenerationTriggers,
|
|
685
|
-
tokens: contextText.tokenize(model.
|
|
770
|
+
tokens: contextText.tokenize(model.tokenizer),
|
|
686
771
|
newResolvedHistory: resolvedHistory,
|
|
687
772
|
newHistoryCompressionMetadata: metadata,
|
|
688
773
|
ignoreStartText: ignoreStartText ?? [],
|