node-llama-cpp 3.12.0 → 3.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bindings/Llama.js +16 -6
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.js +6 -2
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +11 -2
- package/dist/bindings/types.js +11 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/clearAllLocalBuilds.js +1 -1
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -0
- package/dist/bindings/utils/compileLLamaCpp.js +49 -12
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +4 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.js +12 -6
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +19 -7
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -1
- package/dist/bindings/utils/testBindingBinary.js +13 -5
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/chatWrappers/HarmonyChatWrapper.js +27 -15
- package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +8 -2
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +8 -4
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +14 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +126 -20
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +53 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +162 -47
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +6 -1
- package/dist/types.js +2 -1
- package/dist/types.js.map +1 -1
- package/llama/CMakeLists.txt +20 -0
- package/llama/addon/globals/getGpuInfo.cpp +8 -4
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/cmake/addVariantSuffix.cmake +21 -0
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +24 -23
- package/templates/packed/electron-typescript-react.json +1 -1
|
@@ -252,7 +252,21 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
|
|
|
252
252
|
* Defaults to `Infinity`.
|
|
253
253
|
*/
|
|
254
254
|
thoughtTokens?: number;
|
|
255
|
+
/**
|
|
256
|
+
* Budget for comment tokens.
|
|
257
|
+
*
|
|
258
|
+
* Defaults to `Infinity`.
|
|
259
|
+
*/
|
|
260
|
+
commentTokens?: number;
|
|
255
261
|
};
|
|
262
|
+
/**
|
|
263
|
+
* Stop the generation when the model tries to generate a non-textual segment or call a function.
|
|
264
|
+
*
|
|
265
|
+
* Useful for generating completions in a form of a model response.
|
|
266
|
+
*
|
|
267
|
+
* Defaults to `false`.
|
|
268
|
+
*/
|
|
269
|
+
abortOnNonText?: boolean;
|
|
256
270
|
} & ({
|
|
257
271
|
grammar?: LlamaGrammar;
|
|
258
272
|
functions?: never;
|
|
@@ -80,7 +80,7 @@ export class LlamaChat {
|
|
|
80
80
|
return this.sequence.model;
|
|
81
81
|
}
|
|
82
82
|
async generateResponse(history, options = {}) {
|
|
83
|
-
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
83
|
+
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
84
84
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
|
|
85
85
|
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
86
86
|
onTextChunk,
|
|
@@ -107,12 +107,13 @@ export class LlamaChat {
|
|
|
107
107
|
maxParallelFunctionCalls,
|
|
108
108
|
contextShift,
|
|
109
109
|
customStopTriggers,
|
|
110
|
+
abortOnNonText,
|
|
110
111
|
lastEvaluationContextWindow: {
|
|
111
112
|
history: lastEvaluationContextWindowHistory,
|
|
112
113
|
minimumOverlapPercentageToPreventContextShift
|
|
113
114
|
}
|
|
114
115
|
});
|
|
115
|
-
if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
|
|
116
|
+
if (generateResponseState.grammar != null && generateResponseState.functionsEnabled && !abortOnNonText)
|
|
116
117
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
117
118
|
return await withLock([this._chatLock, "evaluate"], signal, async () => {
|
|
118
119
|
try {
|
|
@@ -122,7 +123,6 @@ export class LlamaChat {
|
|
|
122
123
|
await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
|
|
123
124
|
};
|
|
124
125
|
const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
|
|
125
|
-
const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
|
|
126
126
|
while (true) {
|
|
127
127
|
generateResponseState.startTokenLoop();
|
|
128
128
|
generateResponseState.handleRerender();
|
|
@@ -137,12 +137,15 @@ export class LlamaChat {
|
|
|
137
137
|
generateResponseState.initFunctions();
|
|
138
138
|
}
|
|
139
139
|
}
|
|
140
|
+
const abortRes = generateResponseState.handleAbortTrigger("model");
|
|
141
|
+
if (abortRes != null)
|
|
142
|
+
return abortRes;
|
|
140
143
|
if (shouldHandlePrefixTriggers) {
|
|
141
144
|
const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
|
|
142
145
|
if (handlePrefixTriggersRes != null)
|
|
143
146
|
return handlePrefixTriggersRes;
|
|
144
147
|
}
|
|
145
|
-
if (generateResponseState.functionEvaluationMode !== false) {
|
|
148
|
+
if (generateResponseState.functionEvaluationMode !== false && !generateResponseState.abortOnNonText) {
|
|
146
149
|
const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
|
|
147
150
|
if (functionsCallsRes != null)
|
|
148
151
|
return functionsCallsRes;
|
|
@@ -176,9 +179,9 @@ export class LlamaChat {
|
|
|
176
179
|
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
177
180
|
break;
|
|
178
181
|
if (await generateResponseState.handleBudgetTriggers()) {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
+
generateResponseState.shouldRerender = true;
|
|
183
|
+
generateResponseState.skipClosingResponseItemOnRerender = true;
|
|
184
|
+
break;
|
|
182
185
|
}
|
|
183
186
|
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
184
187
|
break;
|
|
@@ -728,6 +731,7 @@ class GenerateResponseState {
|
|
|
728
731
|
maxParallelFunctionCalls;
|
|
729
732
|
contextShift;
|
|
730
733
|
customStopTriggers;
|
|
734
|
+
abortOnNonText;
|
|
731
735
|
minimumOverlapPercentageToPreventContextShift;
|
|
732
736
|
functionsEnabled;
|
|
733
737
|
repeatPenaltyEnabled;
|
|
@@ -771,6 +775,8 @@ class GenerateResponseState {
|
|
|
771
775
|
// context shift loop
|
|
772
776
|
shouldContextShift = false;
|
|
773
777
|
shouldRerender = false;
|
|
778
|
+
skipClosingResponseItemOnRerender = false;
|
|
779
|
+
shouldAbortBecauseOfNonText = false;
|
|
774
780
|
canAvoidReloadingHistory = false;
|
|
775
781
|
contextWindowTokens = [];
|
|
776
782
|
stopGenerationTriggers = [];
|
|
@@ -792,7 +798,7 @@ class GenerateResponseState {
|
|
|
792
798
|
currentTokens = [];
|
|
793
799
|
currentText = "";
|
|
794
800
|
currentQueuedTokenRelease;
|
|
795
|
-
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
801
|
+
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
796
802
|
this.llamaChat = llamaChat;
|
|
797
803
|
this.chatWrapper = chatWrapper;
|
|
798
804
|
this.history = history;
|
|
@@ -819,6 +825,7 @@ class GenerateResponseState {
|
|
|
819
825
|
this.maxParallelFunctionCalls = maxParallelFunctionCalls;
|
|
820
826
|
this.contextShift = contextShift;
|
|
821
827
|
this.customStopTriggers = customStopTriggers;
|
|
828
|
+
this.abortOnNonText = abortOnNonText ?? false;
|
|
822
829
|
this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
|
|
823
830
|
this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
|
|
824
831
|
if (this.signal?.aborted)
|
|
@@ -856,7 +863,7 @@ class GenerateResponseState {
|
|
|
856
863
|
if (this.grammar != null)
|
|
857
864
|
StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
|
|
858
865
|
.map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
859
|
-
if (this.functions != null && Object.keys(this.functions).length > 0)
|
|
866
|
+
if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
|
|
860
867
|
this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
861
868
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
862
869
|
this.chatWrapper.settings.functions.call.prefix
|
|
@@ -881,6 +888,17 @@ class GenerateResponseState {
|
|
|
881
888
|
? new Map()
|
|
882
889
|
: SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
|
|
883
890
|
});
|
|
891
|
+
if (this.abortOnNonText) {
|
|
892
|
+
this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
893
|
+
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
894
|
+
this.chatWrapper.settings.functions.call.prefix
|
|
895
|
+
]), this.llamaChat.model.tokenizer));
|
|
896
|
+
for (const segmentType of allSegmentTypes) {
|
|
897
|
+
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
898
|
+
if (segmentDefinition != null)
|
|
899
|
+
this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(segmentDefinition.prefix), this.llamaChat.model.tokenizer));
|
|
900
|
+
}
|
|
901
|
+
}
|
|
884
902
|
this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
|
|
885
903
|
}
|
|
886
904
|
async dispose() {
|
|
@@ -929,7 +947,10 @@ class GenerateResponseState {
|
|
|
929
947
|
});
|
|
930
948
|
if (!hadThoughtSegments)
|
|
931
949
|
return;
|
|
932
|
-
this.
|
|
950
|
+
if (this.abortOnNonText)
|
|
951
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
952
|
+
else
|
|
953
|
+
this.segmentHandler.openSegment("thought");
|
|
933
954
|
}
|
|
934
955
|
ensureNotAborted() {
|
|
935
956
|
if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
|
|
@@ -1033,10 +1054,12 @@ class GenerateResponseState {
|
|
|
1033
1054
|
if (this.shouldRerender) {
|
|
1034
1055
|
this.isRerender = true;
|
|
1035
1056
|
this.streamRegulator.reset();
|
|
1036
|
-
if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null
|
|
1057
|
+
if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null &&
|
|
1058
|
+
!this.skipClosingResponseItemOnRerender) {
|
|
1037
1059
|
this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
|
|
1038
1060
|
this.shouldRerender = false;
|
|
1039
1061
|
}
|
|
1062
|
+
this.skipClosingResponseItemOnRerender = false;
|
|
1040
1063
|
}
|
|
1041
1064
|
}
|
|
1042
1065
|
getContextWindowFunctionCallsTokens() {
|
|
@@ -1098,8 +1121,16 @@ class GenerateResponseState {
|
|
|
1098
1121
|
}
|
|
1099
1122
|
this.prefixTriggerDetectors.clear();
|
|
1100
1123
|
for (const trigger of prefixTriggers ?? []) {
|
|
1124
|
+
const segmentBudget = trigger.type === "segment"
|
|
1125
|
+
? this.getSegmentBudget(trigger.segmentType)
|
|
1126
|
+
: null;
|
|
1101
1127
|
if (trigger.type === "functionCall" && !this.functionsEnabled)
|
|
1102
1128
|
continue;
|
|
1129
|
+
else if (trigger.type === "segment" &&
|
|
1130
|
+
segmentBudget != null &&
|
|
1131
|
+
!this.segmentHandler.isSegmentTypeOpen(trigger.segmentType) &&
|
|
1132
|
+
this.segmentHandler.getSegmentTokensCount(trigger.segmentType) >= segmentBudget)
|
|
1133
|
+
continue;
|
|
1103
1134
|
const prefixDetector = new StopGenerationDetector();
|
|
1104
1135
|
StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
|
|
1105
1136
|
.forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
|
|
@@ -1114,8 +1145,16 @@ class GenerateResponseState {
|
|
|
1114
1145
|
}
|
|
1115
1146
|
}
|
|
1116
1147
|
this.noPrefixTrigger = noPrefixTrigger;
|
|
1148
|
+
const noPrefixTriggerSegmentBudget = noPrefixTrigger?.type === "segment"
|
|
1149
|
+
? this.getSegmentBudget(noPrefixTrigger.segmentType)
|
|
1150
|
+
: null;
|
|
1117
1151
|
if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
|
|
1118
1152
|
this.noPrefixTrigger = undefined;
|
|
1153
|
+
else if (noPrefixTrigger?.type === "segment" &&
|
|
1154
|
+
noPrefixTriggerSegmentBudget != null &&
|
|
1155
|
+
!this.segmentHandler.isSegmentTypeOpen(noPrefixTrigger.segmentType) &&
|
|
1156
|
+
this.segmentHandler.getSegmentTokensCount(noPrefixTrigger.segmentType) >= noPrefixTriggerSegmentBudget)
|
|
1157
|
+
this.noPrefixTrigger = undefined;
|
|
1119
1158
|
this.rerenderTriggers = rerender?.triggers ?? [];
|
|
1120
1159
|
this.rerenderTriggerDetector.clearInProgressStops();
|
|
1121
1160
|
this.rerenderTriggerDetector.clearTriggeredStops();
|
|
@@ -1163,6 +1202,10 @@ class GenerateResponseState {
|
|
|
1163
1202
|
}
|
|
1164
1203
|
initFunctions() {
|
|
1165
1204
|
this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
|
|
1205
|
+
if (this.initiallyEngagedFunctionMode && this.abortOnNonText) {
|
|
1206
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1207
|
+
return;
|
|
1208
|
+
}
|
|
1166
1209
|
if (this.initiallyEngagedFunctionMode) {
|
|
1167
1210
|
StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
|
|
1168
1211
|
.map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
|
|
@@ -1193,6 +1236,13 @@ class GenerateResponseState {
|
|
|
1193
1236
|
await reloadTokens();
|
|
1194
1237
|
};
|
|
1195
1238
|
if (this.prefixTriggerDetectors.size === 0) {
|
|
1239
|
+
if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
|
|
1240
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1241
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1242
|
+
if (stopRes != null)
|
|
1243
|
+
return stopRes;
|
|
1244
|
+
return undefined;
|
|
1245
|
+
}
|
|
1196
1246
|
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
1197
1247
|
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1198
1248
|
this.functionEvaluationMode = "functionName";
|
|
@@ -1231,6 +1281,13 @@ class GenerateResponseState {
|
|
|
1231
1281
|
return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
|
|
1232
1282
|
return [item];
|
|
1233
1283
|
});
|
|
1284
|
+
if (this.abortOnNonText && trigger.type !== "response") {
|
|
1285
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1286
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1287
|
+
if (stopRes != null)
|
|
1288
|
+
return stopRes;
|
|
1289
|
+
return undefined;
|
|
1290
|
+
}
|
|
1234
1291
|
this.streamRegulator.reset();
|
|
1235
1292
|
if (trigger.type === "segment") {
|
|
1236
1293
|
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
@@ -1263,6 +1320,13 @@ class GenerateResponseState {
|
|
|
1263
1320
|
this.prefixTriggerDetectors.delete(triggerDetector);
|
|
1264
1321
|
}
|
|
1265
1322
|
if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
|
|
1323
|
+
if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
|
|
1324
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1325
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1326
|
+
if (stopRes != null)
|
|
1327
|
+
return stopRes;
|
|
1328
|
+
return undefined;
|
|
1329
|
+
}
|
|
1266
1330
|
this.streamRegulator.reset();
|
|
1267
1331
|
continueGeneration = false;
|
|
1268
1332
|
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
@@ -1777,6 +1841,10 @@ class GenerateResponseState {
|
|
|
1777
1841
|
});
|
|
1778
1842
|
if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
|
|
1779
1843
|
this.functionSyntaxStartDetector.hasTriggeredStops) {
|
|
1844
|
+
if (this.abortOnNonText) {
|
|
1845
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1846
|
+
return;
|
|
1847
|
+
}
|
|
1780
1848
|
this.functionEvaluationMode = "functionName";
|
|
1781
1849
|
this.currentQueuedTokenRelease.createTextIndexLock(0);
|
|
1782
1850
|
this.stopGenerationDetector.clearTriggeredStops();
|
|
@@ -1918,21 +1986,37 @@ class GenerateResponseState {
|
|
|
1918
1986
|
}
|
|
1919
1987
|
async handleBudgetTriggers() {
|
|
1920
1988
|
let shouldReloadEvaluationState = false;
|
|
1921
|
-
|
|
1922
|
-
const hasBudgetTriggers = this.budgets != null && hasBudget(this.budgets.thoughtTokens);
|
|
1923
|
-
if (!hasBudgetTriggers)
|
|
1989
|
+
if (this.budgets == null)
|
|
1924
1990
|
return shouldReloadEvaluationState;
|
|
1925
|
-
|
|
1926
|
-
const
|
|
1927
|
-
if (
|
|
1928
|
-
|
|
1991
|
+
for (const segmentType of this.segmentHandler.getOpenSegmentStack().reverse()) {
|
|
1992
|
+
const budget = this.getSegmentBudget(segmentType);
|
|
1993
|
+
if (budget == null)
|
|
1994
|
+
continue;
|
|
1995
|
+
const usedSegmentTokens = this.segmentHandler.getSegmentTokensCount(segmentType);
|
|
1996
|
+
if (usedSegmentTokens >= budget) {
|
|
1997
|
+
this.segmentHandler.closeSegment(segmentType);
|
|
1929
1998
|
shouldReloadEvaluationState = true;
|
|
1930
1999
|
}
|
|
1931
2000
|
}
|
|
1932
2001
|
return shouldReloadEvaluationState;
|
|
1933
2002
|
}
|
|
2003
|
+
getSegmentBudget(segmentType) {
|
|
2004
|
+
const getBudget = (budget) => ((budget == null || budget === Infinity)
|
|
2005
|
+
? null
|
|
2006
|
+
: budget);
|
|
2007
|
+
if (this.budgets == null)
|
|
2008
|
+
return null;
|
|
2009
|
+
if (segmentType === "thought")
|
|
2010
|
+
return getBudget(this.budgets.thoughtTokens);
|
|
2011
|
+
else if (segmentType === "comment")
|
|
2012
|
+
return getBudget(this.budgets.commentTokens);
|
|
2013
|
+
void segmentType;
|
|
2014
|
+
return null;
|
|
2015
|
+
}
|
|
1934
2016
|
handleShouldRerender() {
|
|
1935
2017
|
this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
|
|
2018
|
+
if (this.abortOnNonText && this.shouldRerender)
|
|
2019
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1936
2020
|
return this.shouldRerender;
|
|
1937
2021
|
}
|
|
1938
2022
|
updateShouldContextShift() {
|
|
@@ -1940,7 +2024,7 @@ class GenerateResponseState {
|
|
|
1940
2024
|
return this.shouldContextShift;
|
|
1941
2025
|
}
|
|
1942
2026
|
get shouldAbort() {
|
|
1943
|
-
return !!(this.signal?.aborted && this.stopOnAbortSignal);
|
|
2027
|
+
return !!(this.signal?.aborted && this.stopOnAbortSignal) || this.shouldAbortBecauseOfNonText;
|
|
1944
2028
|
}
|
|
1945
2029
|
handleAbortTrigger(lastHistoryItemType) {
|
|
1946
2030
|
if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
|
|
@@ -1960,7 +2044,9 @@ class GenerateResponseState {
|
|
|
1960
2044
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1961
2045
|
},
|
|
1962
2046
|
metadata: {
|
|
1963
|
-
stopReason:
|
|
2047
|
+
stopReason: this.shouldAbortBecauseOfNonText
|
|
2048
|
+
? "eogToken"
|
|
2049
|
+
: "abort"
|
|
1964
2050
|
}
|
|
1965
2051
|
};
|
|
1966
2052
|
}
|
|
@@ -2083,6 +2169,26 @@ class SegmentHandler {
|
|
|
2083
2169
|
get topOpenSegmentType() {
|
|
2084
2170
|
return this._segmentsStack.at(-1);
|
|
2085
2171
|
}
|
|
2172
|
+
/**
|
|
2173
|
+
* First segment in the stack is the top most that'll close last.
|
|
2174
|
+
* ```
|
|
2175
|
+
* <segment1>
|
|
2176
|
+
* some text here
|
|
2177
|
+
* <segment2>
|
|
2178
|
+
* some text here
|
|
2179
|
+
* <segment3>
|
|
2180
|
+
* some text here
|
|
2181
|
+
* </segment3>
|
|
2182
|
+
* ```
|
|
2183
|
+
* In that example, the top most segment is `segment1`, and the last open segment is `segment2` (which is the next one to close).
|
|
2184
|
+
* So in that example, this function will return:
|
|
2185
|
+
* ```
|
|
2186
|
+
* ["segment1", "segment2"]
|
|
2187
|
+
* ```
|
|
2188
|
+
*/
|
|
2189
|
+
getOpenSegmentStack() {
|
|
2190
|
+
return this._segmentsStack.slice(this._ownedSegmentsStackLength);
|
|
2191
|
+
}
|
|
2086
2192
|
_processTokens(tokens, text) {
|
|
2087
2193
|
const queuedTokenRelease = this._streamRegulator.addChunk({
|
|
2088
2194
|
tokens,
|