node-llama-cpp 3.0.0-beta.17 → 3.0.0-beta.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/ChatWrapper.js +3 -3
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +1 -0
- package/dist/bindings/getLlama.d.ts +5 -1
- package/dist/bindings/getLlama.js +11 -4
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +1 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +1 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +2 -2
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +5 -3
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +3 -5
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +1 -1
- package/dist/cli/commands/DownloadCommand.js +2 -1
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +109 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +23 -11
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +2 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +12 -20
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +41 -97
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +2 -2
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +1 -4
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +18 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +255 -205
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +18 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +1 -1
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +2 -7
- package/dist/evaluator/LlamaContext/LlamaContext.js +12 -12
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +2 -10
- package/dist/evaluator/LlamaEmbeddingContext.js +9 -23
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +1 -1
- package/dist/evaluator/LlamaModel.d.ts +9 -0
- package/dist/evaluator/LlamaModel.js +2 -1
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +12 -12
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +27 -3
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/parser/parseGguf.js +5 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +5 -2
- package/dist/gguf/readGgufFileInfo.js +38 -10
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +32 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +29 -20
- package/dist/utils/LlamaText.js +253 -243
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +2 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +99 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +2 -2
- package/dist/utils/parseTextTemplate.js +2 -2
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/llama/addon.cpp +18 -7
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +8 -5
|
@@ -72,7 +72,7 @@ export class LlamaChat {
|
|
|
72
72
|
get model() {
|
|
73
73
|
return this.sequence.model;
|
|
74
74
|
}
|
|
75
|
-
async generateResponse(history, { onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
75
|
+
async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
76
76
|
const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
|
|
77
77
|
if (grammar != null && functionsEnabled)
|
|
78
78
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
@@ -116,6 +116,7 @@ export class LlamaChat {
|
|
|
116
116
|
: undefined;
|
|
117
117
|
const streamRegulator = new TokenStreamRegulator();
|
|
118
118
|
const stopGenerationDetector = new StopGenerationDetector();
|
|
119
|
+
const customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
119
120
|
const functionSyntaxStartDetector = new StopGenerationDetector();
|
|
120
121
|
const functionSyntaxEndDetector = new StopGenerationDetector();
|
|
121
122
|
const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
|
|
@@ -129,7 +130,7 @@ export class LlamaChat {
|
|
|
129
130
|
let lastContextWindowHistory = resolvedHistory;
|
|
130
131
|
let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
|
|
131
132
|
const ensureNotAborted = () => {
|
|
132
|
-
if (signal?.aborted)
|
|
133
|
+
if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
|
|
133
134
|
throw signal.reason;
|
|
134
135
|
if (this._sequence == null)
|
|
135
136
|
throw new DisposedError();
|
|
@@ -200,6 +201,9 @@ export class LlamaChat {
|
|
|
200
201
|
}
|
|
201
202
|
}
|
|
202
203
|
};
|
|
204
|
+
if (customStopTriggers != null)
|
|
205
|
+
StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
|
|
206
|
+
.map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
203
207
|
if (grammar != null)
|
|
204
208
|
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
|
|
205
209
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
@@ -214,7 +218,7 @@ export class LlamaChat {
|
|
|
214
218
|
resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
|
|
215
219
|
resolvedContextShift,
|
|
216
220
|
lastHistoryCompressionMetadata,
|
|
217
|
-
pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
|
|
221
|
+
pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
|
|
218
222
|
isFirstEvaluation,
|
|
219
223
|
chatWrapper: this._chatWrapper,
|
|
220
224
|
lastEvaluationContextWindowHistory,
|
|
@@ -281,225 +285,271 @@ export class LlamaChat {
|
|
|
281
285
|
evaluationPriority,
|
|
282
286
|
yieldEogToken: true
|
|
283
287
|
}));
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
else {
|
|
299
|
-
while (locksToReleaseOnValidGeneration.length > 0)
|
|
300
|
-
locksToReleaseOnValidGeneration.shift().dispose();
|
|
301
|
-
}
|
|
302
|
-
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
303
|
-
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
304
|
-
initiallyEngagedFunctionMode = false;
|
|
305
|
-
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
306
|
-
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
307
|
-
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
308
|
-
try {
|
|
309
|
-
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
310
|
-
enableInternalBuiltinFunctions: true,
|
|
311
|
-
initialFunctionCallEngaged: true
|
|
312
|
-
});
|
|
313
|
-
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
314
|
-
if (internalBuiltinFunctions[functionName] != null) {
|
|
315
|
-
shouldStopFunctionEvaluationMode = true;
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
catch (err) {
|
|
319
|
-
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
320
|
-
throw err;
|
|
321
|
-
}
|
|
288
|
+
try {
|
|
289
|
+
let currentIteration = await evaluationIterator.next();
|
|
290
|
+
while (currentIteration.done !== true) {
|
|
291
|
+
const token = currentIteration.value;
|
|
292
|
+
let replacementToken = undefined;
|
|
293
|
+
ensureNotAborted();
|
|
294
|
+
generatedTokens++;
|
|
295
|
+
const tokens = [token];
|
|
296
|
+
const text = model.detokenize([token]);
|
|
297
|
+
const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
|
|
298
|
+
if (initiallyEngagedFunctionMode)
|
|
299
|
+
disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
|
|
300
|
+
if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
|
|
301
|
+
locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
|
|
322
302
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
327
|
-
grammar: functionsGrammar
|
|
328
|
-
});
|
|
329
|
-
functionCallTokens.length = 0;
|
|
330
|
-
while (functionCallTokenSyntaxLocks.length > 0)
|
|
331
|
-
functionCallTokenSyntaxLocks.shift().dispose();
|
|
332
|
-
functionSyntaxStartDetector.clearInProgressStops();
|
|
333
|
-
functionSyntaxStartDetector.clearTriggeredStops();
|
|
334
|
-
functionSyntaxEndDetector.clearInProgressStops();
|
|
335
|
-
functionSyntaxEndDetector.clearTriggeredStops();
|
|
303
|
+
else {
|
|
304
|
+
while (locksToReleaseOnValidGeneration.length > 0)
|
|
305
|
+
locksToReleaseOnValidGeneration.shift().dispose();
|
|
336
306
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
358
|
-
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
359
|
-
for (const functionCallToken of functionCallTokens)
|
|
360
|
-
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
361
|
-
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
362
|
-
// or the context state should be modified to not include the incompatible tokens
|
|
363
|
-
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
364
|
-
let unfitTokens = [];
|
|
365
|
-
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
366
|
-
const remainingToken = remainingTextTokens[i];
|
|
367
|
-
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
368
|
-
if (!canBeNextToken) {
|
|
369
|
-
unfitTokens = remainingTextTokens.slice(i);
|
|
370
|
-
break;
|
|
307
|
+
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
308
|
+
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
309
|
+
initiallyEngagedFunctionMode = false;
|
|
310
|
+
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
311
|
+
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
312
|
+
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
313
|
+
try {
|
|
314
|
+
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
315
|
+
enableInternalBuiltinFunctions: true,
|
|
316
|
+
initialFunctionCallEngaged: true
|
|
317
|
+
});
|
|
318
|
+
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
319
|
+
if (internalBuiltinFunctions[functionName] != null) {
|
|
320
|
+
shouldStopFunctionEvaluationMode = true;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
catch (err) {
|
|
324
|
+
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
325
|
+
throw err;
|
|
326
|
+
}
|
|
371
327
|
}
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
328
|
+
if (shouldStopFunctionEvaluationMode) {
|
|
329
|
+
inFunctionEvaluationMode = false;
|
|
330
|
+
functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
|
|
331
|
+
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
332
|
+
grammar: functionsGrammar
|
|
333
|
+
});
|
|
334
|
+
functionCallTokens.length = 0;
|
|
335
|
+
while (functionCallTokenSyntaxLocks.length > 0)
|
|
336
|
+
functionCallTokenSyntaxLocks.shift().dispose();
|
|
337
|
+
functionSyntaxStartDetector.clearInProgressStops();
|
|
338
|
+
functionSyntaxStartDetector.clearTriggeredStops();
|
|
339
|
+
functionSyntaxEndDetector.clearInProgressStops();
|
|
340
|
+
functionSyntaxEndDetector.clearTriggeredStops();
|
|
382
341
|
}
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
342
|
+
}
|
|
343
|
+
if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
|
|
344
|
+
functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
|
|
345
|
+
inFunctionEvaluationMode = true;
|
|
346
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
347
|
+
stopGenerationDetector.clearTriggeredStops();
|
|
348
|
+
stopGenerationDetector.clearInProgressStops();
|
|
349
|
+
customStopGenerationTriggersDetector.clearTriggeredStops();
|
|
350
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
351
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
352
|
+
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
353
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
354
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
355
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
356
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
357
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
358
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
359
|
+
.flat(1);
|
|
360
|
+
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
361
|
+
? ""
|
|
362
|
+
: typeof firstRemainingGenerationAfterStop === "string"
|
|
363
|
+
? firstRemainingGenerationAfterStop
|
|
364
|
+
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
365
|
+
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
366
|
+
for (const functionCallToken of functionCallTokens)
|
|
367
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
368
|
+
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
369
|
+
// or the context state should be modified to not include the incompatible tokens
|
|
370
|
+
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
371
|
+
let unfitTokens = [];
|
|
372
|
+
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
373
|
+
const remainingToken = remainingTextTokens[i];
|
|
374
|
+
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
375
|
+
if (!canBeNextToken) {
|
|
376
|
+
unfitTokens = remainingTextTokens.slice(i);
|
|
377
|
+
break;
|
|
378
|
+
}
|
|
379
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
380
|
+
functionCallTokens.push(remainingToken);
|
|
386
381
|
}
|
|
387
|
-
if (
|
|
388
|
-
|
|
389
|
-
queuedTokenRelease.
|
|
382
|
+
if (unfitTokens.length > 0) {
|
|
383
|
+
const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
|
|
384
|
+
const currentTokenText = queuedTokenRelease.text;
|
|
385
|
+
let replacementTokens;
|
|
386
|
+
if (!currentTokenText.endsWith(unfitTokensText)) {
|
|
387
|
+
console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
|
|
388
|
+
replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
|
|
392
|
+
replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
|
|
393
|
+
}
|
|
394
|
+
if (replacementTokens.length > 0) {
|
|
395
|
+
replacementToken = replacementTokens[0];
|
|
396
|
+
queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
|
|
397
|
+
}
|
|
390
398
|
}
|
|
391
399
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
397
|
-
}
|
|
398
|
-
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
399
|
-
const functionCallText = model.detokenize(functionCallTokens);
|
|
400
|
-
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
401
|
-
let modelResponse = model.detokenize(res);
|
|
402
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
403
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
404
|
-
modelResponse = modelResponse.trimEnd();
|
|
405
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
400
|
+
else if (inFunctionEvaluationMode) {
|
|
401
|
+
functionCallTokens.push(...tokens);
|
|
402
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
403
|
+
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
406
404
|
}
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
functionCall: functionCall,
|
|
416
|
-
metadata: {
|
|
417
|
-
stopReason: "functionCall"
|
|
405
|
+
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
406
|
+
const functionCallText = model.detokenize(functionCallTokens);
|
|
407
|
+
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
408
|
+
let modelResponse = model.detokenize(res);
|
|
409
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
410
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
411
|
+
modelResponse = modelResponse.trimEnd();
|
|
412
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
418
413
|
}
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
433
|
-
.flat(1);
|
|
434
|
-
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
435
|
-
if (pendingTokens.length > 0)
|
|
436
|
-
onToken?.(pendingTokens.slice());
|
|
437
|
-
res.push(...pendingTokens);
|
|
438
|
-
contextWindowsRes.push(...pendingTokens);
|
|
439
|
-
pendingTokens.length = 0;
|
|
440
|
-
let modelResponse = model.detokenize(res);
|
|
441
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
442
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
443
|
-
modelResponse = modelResponse.trimEnd();
|
|
444
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
414
|
+
return {
|
|
415
|
+
response: modelResponse,
|
|
416
|
+
lastEvaluation: {
|
|
417
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
418
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
419
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
420
|
+
},
|
|
421
|
+
// prevent infinite TS type instantiation
|
|
422
|
+
functionCall: functionCall,
|
|
423
|
+
metadata: {
|
|
424
|
+
stopReason: "functionCall"
|
|
425
|
+
}
|
|
426
|
+
};
|
|
445
427
|
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
452
|
-
},
|
|
453
|
-
metadata: {
|
|
454
|
-
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
455
|
-
stopReason: model.isEogToken(token)
|
|
456
|
-
? "eogToken"
|
|
457
|
-
: "stopGenerationTrigger"
|
|
458
|
-
}
|
|
459
|
-
};
|
|
460
|
-
}
|
|
461
|
-
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
462
|
-
if (res.length === 0) {
|
|
463
|
-
ignoreStartTextDetector.clearInProgressStops();
|
|
464
|
-
ignoreStartTextDetector.clearTriggeredStops();
|
|
465
|
-
ignoreStartTextDetector.recordGeneration({
|
|
466
|
-
text: model.detokenize(pendingTokens),
|
|
467
|
-
tokens: pendingTokens
|
|
468
|
-
});
|
|
469
|
-
}
|
|
470
|
-
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
428
|
+
if (!inFunctionEvaluationMode) {
|
|
429
|
+
stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
430
|
+
customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
431
|
+
}
|
|
432
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
471
433
|
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
472
|
-
if (
|
|
473
|
-
|
|
434
|
+
if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
435
|
+
model.isEogToken(token)) {
|
|
436
|
+
const triggeredStops = stopGenerationDetector.hasTriggeredStops
|
|
437
|
+
? stopGenerationDetector.getTriggeredStops()
|
|
438
|
+
: customStopGenerationTriggersDetector.getTriggeredStops();
|
|
439
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
440
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
441
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
442
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
443
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
444
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
445
|
+
.flat(1);
|
|
446
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
447
|
+
if (pendingTokens.length > 0)
|
|
448
|
+
onToken?.(pendingTokens.slice());
|
|
474
449
|
res.push(...pendingTokens);
|
|
475
450
|
contextWindowsRes.push(...pendingTokens);
|
|
476
451
|
pendingTokens.length = 0;
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
485
|
-
}
|
|
486
|
-
return {
|
|
487
|
-
response: modelResponse,
|
|
488
|
-
lastEvaluation: {
|
|
452
|
+
let modelResponse = model.detokenize(res);
|
|
453
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
454
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
455
|
+
modelResponse = modelResponse.trimEnd();
|
|
456
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
457
|
+
}
|
|
458
|
+
const lastEvaluation = {
|
|
489
459
|
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
490
460
|
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
491
461
|
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
|
|
462
|
+
};
|
|
463
|
+
const isEogToken = model.isEogToken(token);
|
|
464
|
+
if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
|
|
465
|
+
return {
|
|
466
|
+
response: modelResponse,
|
|
467
|
+
lastEvaluation,
|
|
468
|
+
metadata: {
|
|
469
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
470
|
+
stopReason: isEogToken
|
|
471
|
+
? "eogToken"
|
|
472
|
+
: "stopGenerationTrigger"
|
|
473
|
+
}
|
|
474
|
+
};
|
|
495
475
|
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
476
|
+
return {
|
|
477
|
+
response: modelResponse,
|
|
478
|
+
lastEvaluation,
|
|
479
|
+
metadata: {
|
|
480
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
481
|
+
stopReason: "customStopTrigger",
|
|
482
|
+
customStopTrigger: triggeredStops[0].stopTrigger
|
|
483
|
+
}
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
487
|
+
if (res.length === 0) {
|
|
488
|
+
ignoreStartTextDetector.clearInProgressStops();
|
|
489
|
+
ignoreStartTextDetector.clearTriggeredStops();
|
|
490
|
+
ignoreStartTextDetector.recordGeneration({
|
|
491
|
+
text: model.detokenize(pendingTokens),
|
|
492
|
+
tokens: pendingTokens
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
496
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
497
|
+
if (pendingTokens.length > 0) {
|
|
498
|
+
onToken?.(pendingTokens.slice());
|
|
499
|
+
res.push(...pendingTokens);
|
|
500
|
+
contextWindowsRes.push(...pendingTokens);
|
|
501
|
+
pendingTokens.length = 0;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
if (maxTokensTriggered) {
|
|
505
|
+
let modelResponse = model.detokenize(res);
|
|
506
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
507
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
508
|
+
modelResponse = modelResponse.trimEnd();
|
|
509
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
510
|
+
}
|
|
511
|
+
return {
|
|
512
|
+
response: modelResponse,
|
|
513
|
+
lastEvaluation: {
|
|
514
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
515
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
516
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
517
|
+
},
|
|
518
|
+
metadata: {
|
|
519
|
+
stopReason: "maxTokens"
|
|
520
|
+
}
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
|
|
524
|
+
shouldContextShift = true;
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
if (signal?.aborted && stopOnAbortSignal) {
|
|
528
|
+
if (res.length === 0)
|
|
529
|
+
throw signal.reason;
|
|
530
|
+
let modelResponse = model.detokenize(res);
|
|
531
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
532
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
533
|
+
modelResponse = modelResponse.trimEnd();
|
|
534
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
535
|
+
}
|
|
536
|
+
return {
|
|
537
|
+
response: modelResponse,
|
|
538
|
+
lastEvaluation: {
|
|
539
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
540
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
541
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
542
|
+
},
|
|
543
|
+
metadata: {
|
|
544
|
+
stopReason: "abort"
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
currentIteration = await evaluationIterator.next(replacementToken);
|
|
501
549
|
}
|
|
502
|
-
|
|
550
|
+
}
|
|
551
|
+
finally {
|
|
552
|
+
await evaluationIterator.return();
|
|
503
553
|
}
|
|
504
554
|
isFirstEvaluation = false;
|
|
505
555
|
if (shouldContextShift)
|
|
@@ -654,7 +704,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
654
704
|
: resolvedContextShift.size;
|
|
655
705
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
656
706
|
history: resolvedHistory,
|
|
657
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
707
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
658
708
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
659
709
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
660
710
|
contextSize: context.contextSize,
|
|
@@ -701,7 +751,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
701
751
|
: resolvedContextShift.size)));
|
|
702
752
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
703
753
|
history: resolvedHistory,
|
|
704
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
754
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
705
755
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
706
756
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
707
757
|
contextSize: context.contextSize,
|