node-llama-cpp 3.0.0-beta.17 → 3.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/ChatWrapper.js +3 -3
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +1 -0
- package/dist/bindings/Llama.d.ts +6 -0
- package/dist/bindings/Llama.js +11 -0
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +6 -2
- package/dist/bindings/getLlama.js +11 -4
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +1 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +1 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +2 -2
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +5 -3
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +3 -5
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +1 -1
- package/dist/cli/commands/DownloadCommand.js +2 -1
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +109 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +23 -11
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +2 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +12 -20
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +41 -97
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +2 -2
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +1 -4
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +18 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +258 -205
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +1 -2
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +18 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +2 -2
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +2 -7
- package/dist/evaluator/LlamaContext/LlamaContext.js +12 -12
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +2 -10
- package/dist/evaluator/LlamaEmbeddingContext.js +9 -23
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +3 -3
- package/dist/evaluator/LlamaGrammar.js +3 -3
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +2 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +9 -0
- package/dist/evaluator/LlamaModel.js +2 -1
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +12 -12
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +27 -3
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/parser/parseGguf.js +5 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +5 -2
- package/dist/gguf/readGgufFileInfo.js +38 -10
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +32 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +1 -1
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +6 -4
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +29 -20
- package/dist/utils/LlamaText.js +253 -243
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +2 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +4 -2
- package/dist/utils/TokenStreamRegulator.js +56 -4
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +99 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +6 -3
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +2 -2
- package/dist/utils/parseTextTemplate.js +2 -2
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/llama/addon.cpp +18 -7
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +11 -8
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
|
@@ -72,7 +72,7 @@ export class LlamaChat {
|
|
|
72
72
|
get model() {
|
|
73
73
|
return this.sequence.model;
|
|
74
74
|
}
|
|
75
|
-
async generateResponse(history, { onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
75
|
+
async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
76
76
|
const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
|
|
77
77
|
if (grammar != null && functionsEnabled)
|
|
78
78
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
@@ -116,6 +116,7 @@ export class LlamaChat {
|
|
|
116
116
|
: undefined;
|
|
117
117
|
const streamRegulator = new TokenStreamRegulator();
|
|
118
118
|
const stopGenerationDetector = new StopGenerationDetector();
|
|
119
|
+
const customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
119
120
|
const functionSyntaxStartDetector = new StopGenerationDetector();
|
|
120
121
|
const functionSyntaxEndDetector = new StopGenerationDetector();
|
|
121
122
|
const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
|
|
@@ -129,7 +130,7 @@ export class LlamaChat {
|
|
|
129
130
|
let lastContextWindowHistory = resolvedHistory;
|
|
130
131
|
let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
|
|
131
132
|
const ensureNotAborted = () => {
|
|
132
|
-
if (signal?.aborted)
|
|
133
|
+
if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
|
|
133
134
|
throw signal.reason;
|
|
134
135
|
if (this._sequence == null)
|
|
135
136
|
throw new DisposedError();
|
|
@@ -200,6 +201,9 @@ export class LlamaChat {
|
|
|
200
201
|
}
|
|
201
202
|
}
|
|
202
203
|
};
|
|
204
|
+
if (customStopTriggers != null)
|
|
205
|
+
StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
|
|
206
|
+
.map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
203
207
|
if (grammar != null)
|
|
204
208
|
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
|
|
205
209
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
@@ -214,7 +218,7 @@ export class LlamaChat {
|
|
|
214
218
|
resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
|
|
215
219
|
resolvedContextShift,
|
|
216
220
|
lastHistoryCompressionMetadata,
|
|
217
|
-
pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
|
|
221
|
+
pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
|
|
218
222
|
isFirstEvaluation,
|
|
219
223
|
chatWrapper: this._chatWrapper,
|
|
220
224
|
lastEvaluationContextWindowHistory,
|
|
@@ -281,225 +285,274 @@ export class LlamaChat {
|
|
|
281
285
|
evaluationPriority,
|
|
282
286
|
yieldEogToken: true
|
|
283
287
|
}));
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
else {
|
|
299
|
-
while (locksToReleaseOnValidGeneration.length > 0)
|
|
300
|
-
locksToReleaseOnValidGeneration.shift().dispose();
|
|
301
|
-
}
|
|
302
|
-
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
303
|
-
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
304
|
-
initiallyEngagedFunctionMode = false;
|
|
305
|
-
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
306
|
-
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
307
|
-
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
308
|
-
try {
|
|
309
|
-
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
310
|
-
enableInternalBuiltinFunctions: true,
|
|
311
|
-
initialFunctionCallEngaged: true
|
|
312
|
-
});
|
|
313
|
-
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
314
|
-
if (internalBuiltinFunctions[functionName] != null) {
|
|
315
|
-
shouldStopFunctionEvaluationMode = true;
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
catch (err) {
|
|
319
|
-
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
320
|
-
throw err;
|
|
321
|
-
}
|
|
288
|
+
try {
|
|
289
|
+
let currentIteration = await evaluationIterator.next();
|
|
290
|
+
while (currentIteration.done !== true) {
|
|
291
|
+
const token = currentIteration.value;
|
|
292
|
+
let replacementToken = undefined;
|
|
293
|
+
ensureNotAborted();
|
|
294
|
+
generatedTokens++;
|
|
295
|
+
const tokens = [token];
|
|
296
|
+
const text = model.detokenize([token]);
|
|
297
|
+
const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
|
|
298
|
+
if (initiallyEngagedFunctionMode)
|
|
299
|
+
disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
|
|
300
|
+
if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
|
|
301
|
+
locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
|
|
322
302
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
327
|
-
grammar: functionsGrammar
|
|
328
|
-
});
|
|
329
|
-
functionCallTokens.length = 0;
|
|
330
|
-
while (functionCallTokenSyntaxLocks.length > 0)
|
|
331
|
-
functionCallTokenSyntaxLocks.shift().dispose();
|
|
332
|
-
functionSyntaxStartDetector.clearInProgressStops();
|
|
333
|
-
functionSyntaxStartDetector.clearTriggeredStops();
|
|
334
|
-
functionSyntaxEndDetector.clearInProgressStops();
|
|
335
|
-
functionSyntaxEndDetector.clearTriggeredStops();
|
|
303
|
+
else {
|
|
304
|
+
while (locksToReleaseOnValidGeneration.length > 0)
|
|
305
|
+
locksToReleaseOnValidGeneration.shift().dispose();
|
|
336
306
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
358
|
-
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
359
|
-
for (const functionCallToken of functionCallTokens)
|
|
360
|
-
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
361
|
-
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
362
|
-
// or the context state should be modified to not include the incompatible tokens
|
|
363
|
-
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
364
|
-
let unfitTokens = [];
|
|
365
|
-
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
366
|
-
const remainingToken = remainingTextTokens[i];
|
|
367
|
-
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
368
|
-
if (!canBeNextToken) {
|
|
369
|
-
unfitTokens = remainingTextTokens.slice(i);
|
|
370
|
-
break;
|
|
307
|
+
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
308
|
+
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
309
|
+
initiallyEngagedFunctionMode = false;
|
|
310
|
+
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
311
|
+
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
312
|
+
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
313
|
+
try {
|
|
314
|
+
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
315
|
+
enableInternalBuiltinFunctions: true,
|
|
316
|
+
initialFunctionCallEngaged: true
|
|
317
|
+
});
|
|
318
|
+
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
319
|
+
if (internalBuiltinFunctions[functionName] != null) {
|
|
320
|
+
shouldStopFunctionEvaluationMode = true;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
catch (err) {
|
|
324
|
+
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
325
|
+
throw err;
|
|
326
|
+
}
|
|
371
327
|
}
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
328
|
+
if (shouldStopFunctionEvaluationMode) {
|
|
329
|
+
inFunctionEvaluationMode = false;
|
|
330
|
+
functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
|
|
331
|
+
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
332
|
+
grammar: functionsGrammar
|
|
333
|
+
});
|
|
334
|
+
functionCallTokens.length = 0;
|
|
335
|
+
while (functionCallTokenSyntaxLocks.length > 0)
|
|
336
|
+
functionCallTokenSyntaxLocks.shift().dispose();
|
|
337
|
+
functionSyntaxStartDetector.clearInProgressStops();
|
|
338
|
+
functionSyntaxStartDetector.clearTriggeredStops();
|
|
339
|
+
functionSyntaxEndDetector.clearInProgressStops();
|
|
340
|
+
functionSyntaxEndDetector.clearTriggeredStops();
|
|
382
341
|
}
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
342
|
+
}
|
|
343
|
+
if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
|
|
344
|
+
functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
|
|
345
|
+
inFunctionEvaluationMode = true;
|
|
346
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
347
|
+
stopGenerationDetector.clearTriggeredStops();
|
|
348
|
+
stopGenerationDetector.clearInProgressStops();
|
|
349
|
+
customStopGenerationTriggersDetector.clearTriggeredStops();
|
|
350
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
351
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
352
|
+
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
353
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
|
|
354
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
355
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
356
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
357
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
358
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
359
|
+
.flat(1);
|
|
360
|
+
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
361
|
+
? ""
|
|
362
|
+
: typeof firstRemainingGenerationAfterStop === "string"
|
|
363
|
+
? firstRemainingGenerationAfterStop
|
|
364
|
+
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
365
|
+
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
366
|
+
for (const functionCallToken of functionCallTokens)
|
|
367
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
368
|
+
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
369
|
+
// or the context state should be modified to not include the incompatible tokens
|
|
370
|
+
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
371
|
+
let unfitTokens = [];
|
|
372
|
+
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
373
|
+
const remainingToken = remainingTextTokens[i];
|
|
374
|
+
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
375
|
+
if (!canBeNextToken) {
|
|
376
|
+
unfitTokens = remainingTextTokens.slice(i);
|
|
377
|
+
break;
|
|
378
|
+
}
|
|
379
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
380
|
+
functionCallTokens.push(remainingToken);
|
|
386
381
|
}
|
|
387
|
-
if (
|
|
388
|
-
|
|
389
|
-
queuedTokenRelease.
|
|
382
|
+
if (unfitTokens.length > 0) {
|
|
383
|
+
const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
|
|
384
|
+
const currentTokenText = queuedTokenRelease.text;
|
|
385
|
+
let replacementTokens;
|
|
386
|
+
if (!currentTokenText.endsWith(unfitTokensText)) {
|
|
387
|
+
console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
|
|
388
|
+
replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
|
|
392
|
+
replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
|
|
393
|
+
}
|
|
394
|
+
if (replacementTokens.length > 0) {
|
|
395
|
+
replacementToken = replacementTokens[0];
|
|
396
|
+
queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
|
|
397
|
+
}
|
|
390
398
|
}
|
|
391
399
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
397
|
-
}
|
|
398
|
-
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
399
|
-
const functionCallText = model.detokenize(functionCallTokens);
|
|
400
|
-
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
401
|
-
let modelResponse = model.detokenize(res);
|
|
402
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
403
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
404
|
-
modelResponse = modelResponse.trimEnd();
|
|
405
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
400
|
+
else if (inFunctionEvaluationMode) {
|
|
401
|
+
functionCallTokens.push(...tokens);
|
|
402
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
403
|
+
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
406
404
|
}
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
functionCall: functionCall,
|
|
416
|
-
metadata: {
|
|
417
|
-
stopReason: "functionCall"
|
|
405
|
+
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
406
|
+
const functionCallText = model.detokenize(functionCallTokens);
|
|
407
|
+
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
408
|
+
let modelResponse = model.detokenize(res);
|
|
409
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
410
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
411
|
+
modelResponse = modelResponse.trimEnd();
|
|
412
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
418
413
|
}
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
433
|
-
.flat(1);
|
|
434
|
-
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
435
|
-
if (pendingTokens.length > 0)
|
|
436
|
-
onToken?.(pendingTokens.slice());
|
|
437
|
-
res.push(...pendingTokens);
|
|
438
|
-
contextWindowsRes.push(...pendingTokens);
|
|
439
|
-
pendingTokens.length = 0;
|
|
440
|
-
let modelResponse = model.detokenize(res);
|
|
441
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
442
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
443
|
-
modelResponse = modelResponse.trimEnd();
|
|
444
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
414
|
+
return {
|
|
415
|
+
response: modelResponse,
|
|
416
|
+
lastEvaluation: {
|
|
417
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
418
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
419
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
420
|
+
},
|
|
421
|
+
// prevent infinite TS type instantiation
|
|
422
|
+
functionCall: functionCall,
|
|
423
|
+
metadata: {
|
|
424
|
+
stopReason: "functionCall"
|
|
425
|
+
}
|
|
426
|
+
};
|
|
445
427
|
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
452
|
-
},
|
|
453
|
-
metadata: {
|
|
454
|
-
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
455
|
-
stopReason: model.isEogToken(token)
|
|
456
|
-
? "eogToken"
|
|
457
|
-
: "stopGenerationTrigger"
|
|
458
|
-
}
|
|
459
|
-
};
|
|
460
|
-
}
|
|
461
|
-
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
462
|
-
if (res.length === 0) {
|
|
463
|
-
ignoreStartTextDetector.clearInProgressStops();
|
|
464
|
-
ignoreStartTextDetector.clearTriggeredStops();
|
|
465
|
-
ignoreStartTextDetector.recordGeneration({
|
|
466
|
-
text: model.detokenize(pendingTokens),
|
|
467
|
-
tokens: pendingTokens
|
|
468
|
-
});
|
|
469
|
-
}
|
|
470
|
-
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
428
|
+
if (!inFunctionEvaluationMode) {
|
|
429
|
+
stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
430
|
+
customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
431
|
+
}
|
|
432
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
471
433
|
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
472
|
-
if (
|
|
473
|
-
|
|
434
|
+
if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
435
|
+
model.isEogToken(token)) {
|
|
436
|
+
stopGenerationDetector.clearInProgressStops();
|
|
437
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
438
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
439
|
+
const triggeredStops = stopGenerationDetector.hasTriggeredStops
|
|
440
|
+
? stopGenerationDetector.getTriggeredStops()
|
|
441
|
+
: customStopGenerationTriggersDetector.getTriggeredStops();
|
|
442
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
|
|
443
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
444
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
445
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
446
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
447
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
448
|
+
.flat(1);
|
|
449
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
450
|
+
if (pendingTokens.length > 0)
|
|
451
|
+
onToken?.(pendingTokens.slice());
|
|
474
452
|
res.push(...pendingTokens);
|
|
475
453
|
contextWindowsRes.push(...pendingTokens);
|
|
476
454
|
pendingTokens.length = 0;
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
485
|
-
}
|
|
486
|
-
return {
|
|
487
|
-
response: modelResponse,
|
|
488
|
-
lastEvaluation: {
|
|
455
|
+
let modelResponse = model.detokenize(res);
|
|
456
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
457
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
458
|
+
modelResponse = modelResponse.trimEnd();
|
|
459
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
460
|
+
}
|
|
461
|
+
const lastEvaluation = {
|
|
489
462
|
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
490
463
|
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
491
464
|
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
|
|
465
|
+
};
|
|
466
|
+
const isEogToken = model.isEogToken(token);
|
|
467
|
+
if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
|
|
468
|
+
return {
|
|
469
|
+
response: modelResponse,
|
|
470
|
+
lastEvaluation,
|
|
471
|
+
metadata: {
|
|
472
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
473
|
+
stopReason: isEogToken
|
|
474
|
+
? "eogToken"
|
|
475
|
+
: "stopGenerationTrigger"
|
|
476
|
+
}
|
|
477
|
+
};
|
|
495
478
|
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
479
|
+
return {
|
|
480
|
+
response: modelResponse,
|
|
481
|
+
lastEvaluation,
|
|
482
|
+
metadata: {
|
|
483
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
484
|
+
stopReason: "customStopTrigger",
|
|
485
|
+
customStopTrigger: triggeredStops[0].stopTrigger
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
490
|
+
if (res.length === 0) {
|
|
491
|
+
ignoreStartTextDetector.clearInProgressStops();
|
|
492
|
+
ignoreStartTextDetector.clearTriggeredStops();
|
|
493
|
+
ignoreStartTextDetector.recordGeneration({
|
|
494
|
+
text: model.detokenize(pendingTokens),
|
|
495
|
+
tokens: pendingTokens
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
499
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
500
|
+
if (pendingTokens.length > 0) {
|
|
501
|
+
onToken?.(pendingTokens.slice());
|
|
502
|
+
res.push(...pendingTokens);
|
|
503
|
+
contextWindowsRes.push(...pendingTokens);
|
|
504
|
+
pendingTokens.length = 0;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
if (maxTokensTriggered) {
|
|
508
|
+
let modelResponse = model.detokenize(res);
|
|
509
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
510
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
511
|
+
modelResponse = modelResponse.trimEnd();
|
|
512
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
513
|
+
}
|
|
514
|
+
return {
|
|
515
|
+
response: modelResponse,
|
|
516
|
+
lastEvaluation: {
|
|
517
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
518
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
519
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
520
|
+
},
|
|
521
|
+
metadata: {
|
|
522
|
+
stopReason: "maxTokens"
|
|
523
|
+
}
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
|
|
527
|
+
shouldContextShift = true;
|
|
528
|
+
break;
|
|
529
|
+
}
|
|
530
|
+
if (signal?.aborted && stopOnAbortSignal) {
|
|
531
|
+
if (res.length === 0)
|
|
532
|
+
throw signal.reason;
|
|
533
|
+
let modelResponse = model.detokenize(res);
|
|
534
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
535
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
536
|
+
modelResponse = modelResponse.trimEnd();
|
|
537
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
538
|
+
}
|
|
539
|
+
return {
|
|
540
|
+
response: modelResponse,
|
|
541
|
+
lastEvaluation: {
|
|
542
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
543
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
544
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
545
|
+
},
|
|
546
|
+
metadata: {
|
|
547
|
+
stopReason: "abort"
|
|
548
|
+
}
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
currentIteration = await evaluationIterator.next(replacementToken);
|
|
501
552
|
}
|
|
502
|
-
|
|
553
|
+
}
|
|
554
|
+
finally {
|
|
555
|
+
await evaluationIterator.return();
|
|
503
556
|
}
|
|
504
557
|
isFirstEvaluation = false;
|
|
505
558
|
if (shouldContextShift)
|
|
@@ -654,7 +707,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
654
707
|
: resolvedContextShift.size;
|
|
655
708
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
656
709
|
history: resolvedHistory,
|
|
657
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
710
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
658
711
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
659
712
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
660
713
|
contextSize: context.contextSize,
|
|
@@ -701,7 +754,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
701
754
|
: resolvedContextShift.size)));
|
|
702
755
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
703
756
|
history: resolvedHistory,
|
|
704
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
757
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
705
758
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
706
759
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
707
760
|
contextSize: context.contextSize,
|