node-llama-cpp 3.11.0 → 3.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/bindings/Llama.d.ts +5 -1
- package/dist/bindings/Llama.js +11 -1
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/types.d.ts +5 -2
- package/dist/bindings/types.js +16 -1
- package/dist/bindings/types.js.map +1 -1
- package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js +539 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +8 -2
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
- package/dist/chatWrappers/utils/resolveChatWrapper.js +21 -6
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -1
- package/dist/cli/commands/ChatCommand.js +21 -7
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -1
- package/dist/cli/commands/CompleteCommand.js +21 -7
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -1
- package/dist/cli/commands/InfillCommand.js +21 -7
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +16 -5
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +22 -0
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +14 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +369 -48
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +52 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +162 -47
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +22 -3
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +19 -2
- package/dist/gguf/types/GgufMetadataTypes.js +17 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +2 -1
- package/dist/gguf/types/GgufTensorInfoTypes.js +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/ggufQuantNames.js +1 -0
- package/dist/gguf/utils/ggufQuantNames.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +150 -3
- package/dist/types.js +2 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +1 -1
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/getChatWrapperSegmentDefinition.js +2 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +18 -16
- package/templates/packed/electron-typescript-react.json +1 -1
|
@@ -80,7 +80,7 @@ export class LlamaChat {
|
|
|
80
80
|
return this.sequence.model;
|
|
81
81
|
}
|
|
82
82
|
async generateResponse(history, options = {}) {
|
|
83
|
-
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
83
|
+
const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
84
84
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
|
|
85
85
|
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
86
86
|
onTextChunk,
|
|
@@ -107,12 +107,13 @@ export class LlamaChat {
|
|
|
107
107
|
maxParallelFunctionCalls,
|
|
108
108
|
contextShift,
|
|
109
109
|
customStopTriggers,
|
|
110
|
+
abortOnNonText,
|
|
110
111
|
lastEvaluationContextWindow: {
|
|
111
112
|
history: lastEvaluationContextWindowHistory,
|
|
112
113
|
minimumOverlapPercentageToPreventContextShift
|
|
113
114
|
}
|
|
114
115
|
});
|
|
115
|
-
if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
|
|
116
|
+
if (generateResponseState.grammar != null && generateResponseState.functionsEnabled && !abortOnNonText)
|
|
116
117
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
117
118
|
return await withLock([this._chatLock, "evaluate"], signal, async () => {
|
|
118
119
|
try {
|
|
@@ -122,11 +123,13 @@ export class LlamaChat {
|
|
|
122
123
|
await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
|
|
123
124
|
};
|
|
124
125
|
const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
|
|
125
|
-
const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
|
|
126
126
|
while (true) {
|
|
127
127
|
generateResponseState.startTokenLoop();
|
|
128
|
+
generateResponseState.handleRerender();
|
|
129
|
+
const shouldHandlePrefixTriggers = generateResponseState.isRerender;
|
|
128
130
|
generateResponseState.canAvoidReloadingHistory = false;
|
|
129
131
|
await loadContextWindow();
|
|
132
|
+
generateResponseState.isRerender = false;
|
|
130
133
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
131
134
|
if (generateResponseState.generatedTokens === 0) {
|
|
132
135
|
generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
|
|
@@ -134,7 +137,15 @@ export class LlamaChat {
|
|
|
134
137
|
generateResponseState.initFunctions();
|
|
135
138
|
}
|
|
136
139
|
}
|
|
137
|
-
|
|
140
|
+
const abortRes = generateResponseState.handleAbortTrigger("model");
|
|
141
|
+
if (abortRes != null)
|
|
142
|
+
return abortRes;
|
|
143
|
+
if (shouldHandlePrefixTriggers) {
|
|
144
|
+
const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
|
|
145
|
+
if (handlePrefixTriggersRes != null)
|
|
146
|
+
return handlePrefixTriggersRes;
|
|
147
|
+
}
|
|
148
|
+
if (generateResponseState.functionEvaluationMode !== false && !generateResponseState.abortOnNonText) {
|
|
138
149
|
const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
|
|
139
150
|
if (functionsCallsRes != null)
|
|
140
151
|
return functionsCallsRes;
|
|
@@ -165,21 +176,21 @@ export class LlamaChat {
|
|
|
165
176
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
|
|
166
177
|
if (maxTokensTriggerRes != null)
|
|
167
178
|
return maxTokensTriggerRes;
|
|
168
|
-
if (generateResponseState.updateShouldContextShift())
|
|
179
|
+
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
169
180
|
break;
|
|
170
181
|
if (await generateResponseState.handleBudgetTriggers()) {
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
182
|
+
generateResponseState.shouldRerender = true;
|
|
183
|
+
generateResponseState.skipClosingResponseItemOnRerender = true;
|
|
184
|
+
break;
|
|
174
185
|
}
|
|
175
|
-
if (generateResponseState.updateShouldContextShift())
|
|
186
|
+
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
176
187
|
break;
|
|
177
188
|
const abortRes = generateResponseState.handleAbortTrigger("model");
|
|
178
189
|
if (abortRes != null)
|
|
179
190
|
return abortRes;
|
|
180
191
|
}
|
|
181
192
|
generateResponseState.isFirstEvaluation = false;
|
|
182
|
-
if (generateResponseState.shouldContextShift)
|
|
193
|
+
if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift)
|
|
183
194
|
continue;
|
|
184
195
|
break;
|
|
185
196
|
}
|
|
@@ -236,10 +247,12 @@ export class LlamaChat {
|
|
|
236
247
|
while (true) {
|
|
237
248
|
generateResponseState.startTokenLoop();
|
|
238
249
|
const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
|
|
250
|
+
generateResponseState.isRerender = false;
|
|
239
251
|
generateResponseState.functionEvaluationMode = false;
|
|
240
252
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
241
253
|
if (userTextSuffix != null && userTextSuffix.values.length > 0)
|
|
242
254
|
generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
|
|
255
|
+
generateResponseState.rerenderTriggers.forEach((trigger) => (generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer))));
|
|
243
256
|
allSegmentTypes
|
|
244
257
|
.map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType))
|
|
245
258
|
.filter((segmentDefinition) => segmentDefinition != null)
|
|
@@ -545,13 +558,13 @@ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
|
|
|
545
558
|
`There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
|
|
546
559
|
"where not all user messages are properly added to the the result LlamaText");
|
|
547
560
|
}
|
|
548
|
-
async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
|
|
561
|
+
async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
|
|
549
562
|
if (sequence == null)
|
|
550
563
|
throw new DisposedError();
|
|
551
564
|
const model = sequence.model;
|
|
552
565
|
const context = sequence.context;
|
|
553
566
|
let removeRawFromHistory = false;
|
|
554
|
-
if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
567
|
+
if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
555
568
|
const newContextWindow = lastEvaluationContextWindowHistory.slice();
|
|
556
569
|
if (endWithUserText) {
|
|
557
570
|
if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
|
|
@@ -565,7 +578,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
565
578
|
type: "model",
|
|
566
579
|
response: []
|
|
567
580
|
});
|
|
568
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
581
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
569
582
|
chatHistory: newContextWindow,
|
|
570
583
|
availableFunctions: functions,
|
|
571
584
|
documentFunctionParams
|
|
@@ -574,7 +587,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
574
587
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
|
|
575
588
|
const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
|
|
576
589
|
const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
|
|
577
|
-
if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
|
|
590
|
+
if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
|
|
578
591
|
return {
|
|
579
592
|
history: newContextWindow,
|
|
580
593
|
stopGenerationTriggers,
|
|
@@ -584,7 +597,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
584
597
|
ignoreStartText: ignoreStartText ?? [],
|
|
585
598
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
586
599
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
587
|
-
userTextSuffix
|
|
600
|
+
userTextSuffix,
|
|
601
|
+
prefixTriggers,
|
|
602
|
+
noPrefixTrigger,
|
|
603
|
+
rerender,
|
|
604
|
+
detectFunctionCalls
|
|
588
605
|
};
|
|
589
606
|
}
|
|
590
607
|
}
|
|
@@ -607,7 +624,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
607
624
|
functions,
|
|
608
625
|
documentFunctionParams
|
|
609
626
|
});
|
|
610
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
627
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
611
628
|
chatHistory: compressedHistory,
|
|
612
629
|
availableFunctions: functions,
|
|
613
630
|
documentFunctionParams
|
|
@@ -621,11 +638,15 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
621
638
|
ignoreStartText: ignoreStartText ?? [],
|
|
622
639
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
623
640
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
624
|
-
userTextSuffix
|
|
641
|
+
userTextSuffix,
|
|
642
|
+
prefixTriggers,
|
|
643
|
+
noPrefixTrigger,
|
|
644
|
+
rerender,
|
|
645
|
+
detectFunctionCalls
|
|
625
646
|
};
|
|
626
647
|
}
|
|
627
648
|
{
|
|
628
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
649
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
629
650
|
chatHistory: resolvedHistory,
|
|
630
651
|
availableFunctions: functions,
|
|
631
652
|
documentFunctionParams
|
|
@@ -641,7 +662,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
641
662
|
ignoreStartText: ignoreStartText ?? [],
|
|
642
663
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
643
664
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
644
|
-
userTextSuffix
|
|
665
|
+
userTextSuffix,
|
|
666
|
+
prefixTriggers,
|
|
667
|
+
noPrefixTrigger,
|
|
668
|
+
rerender,
|
|
669
|
+
detectFunctionCalls
|
|
645
670
|
};
|
|
646
671
|
}
|
|
647
672
|
const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
|
|
@@ -658,7 +683,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
658
683
|
functions,
|
|
659
684
|
documentFunctionParams
|
|
660
685
|
});
|
|
661
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
686
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
662
687
|
chatHistory: compressedHistory,
|
|
663
688
|
availableFunctions: functions,
|
|
664
689
|
documentFunctionParams
|
|
@@ -672,7 +697,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
672
697
|
ignoreStartText: ignoreStartText ?? [],
|
|
673
698
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
674
699
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
675
|
-
userTextSuffix
|
|
700
|
+
userTextSuffix,
|
|
701
|
+
prefixTriggers,
|
|
702
|
+
noPrefixTrigger,
|
|
703
|
+
rerender,
|
|
704
|
+
detectFunctionCalls
|
|
676
705
|
};
|
|
677
706
|
}
|
|
678
707
|
class GenerateResponseState {
|
|
@@ -702,6 +731,7 @@ class GenerateResponseState {
|
|
|
702
731
|
maxParallelFunctionCalls;
|
|
703
732
|
contextShift;
|
|
704
733
|
customStopTriggers;
|
|
734
|
+
abortOnNonText;
|
|
705
735
|
minimumOverlapPercentageToPreventContextShift;
|
|
706
736
|
functionsEnabled;
|
|
707
737
|
repeatPenaltyEnabled;
|
|
@@ -711,6 +741,7 @@ class GenerateResponseState {
|
|
|
711
741
|
functionNameGrammar;
|
|
712
742
|
functionsGrammar;
|
|
713
743
|
functionsEvaluationState;
|
|
744
|
+
functionSyntaxStartDetectorEnabled = true;
|
|
714
745
|
streamRegulator = new TokenStreamRegulator();
|
|
715
746
|
stopGenerationDetector = new StopGenerationDetector();
|
|
716
747
|
customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
@@ -723,6 +754,7 @@ class GenerateResponseState {
|
|
|
723
754
|
res = [];
|
|
724
755
|
pendingTokens = [];
|
|
725
756
|
ignoredStartTextTokens = [];
|
|
757
|
+
prefixTriggerTokens = [];
|
|
726
758
|
resFunctionCalls = [];
|
|
727
759
|
segmentHandler;
|
|
728
760
|
pendingPartialTokens = [];
|
|
@@ -735,12 +767,16 @@ class GenerateResponseState {
|
|
|
735
767
|
releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
|
|
736
768
|
generatedTokens = 0;
|
|
737
769
|
isFirstEvaluation = true;
|
|
770
|
+
isRerender = true; // first render is a rerender
|
|
738
771
|
initiallyEngagedFunctionMode = false;
|
|
739
772
|
lastContextWindowHistory;
|
|
740
773
|
lastHistoryCompressionMetadata;
|
|
741
774
|
restartEvaluationIterator = false;
|
|
742
775
|
// context shift loop
|
|
743
776
|
shouldContextShift = false;
|
|
777
|
+
shouldRerender = false;
|
|
778
|
+
skipClosingResponseItemOnRerender = false;
|
|
779
|
+
shouldAbortBecauseOfNonText = false;
|
|
744
780
|
canAvoidReloadingHistory = false;
|
|
745
781
|
contextWindowTokens = [];
|
|
746
782
|
stopGenerationTriggers = [];
|
|
@@ -748,6 +784,11 @@ class GenerateResponseState {
|
|
|
748
784
|
functionCallInitiallyEngaged = false;
|
|
749
785
|
disengageInitiallyEngagedFunctionCall = [];
|
|
750
786
|
userTextSuffix = undefined;
|
|
787
|
+
prefixTriggerDetectors = new Map();
|
|
788
|
+
noPrefixTrigger = undefined;
|
|
789
|
+
rerenderTriggers = [];
|
|
790
|
+
rerenderTriggerDetector = new StopGenerationDetector();
|
|
791
|
+
rerenderActions = undefined;
|
|
751
792
|
tokens = [];
|
|
752
793
|
// token evaluation loop
|
|
753
794
|
evaluationIterator;
|
|
@@ -757,7 +798,7 @@ class GenerateResponseState {
|
|
|
757
798
|
currentTokens = [];
|
|
758
799
|
currentText = "";
|
|
759
800
|
currentQueuedTokenRelease;
|
|
760
|
-
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
801
|
+
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
761
802
|
this.llamaChat = llamaChat;
|
|
762
803
|
this.chatWrapper = chatWrapper;
|
|
763
804
|
this.history = history;
|
|
@@ -784,6 +825,7 @@ class GenerateResponseState {
|
|
|
784
825
|
this.maxParallelFunctionCalls = maxParallelFunctionCalls;
|
|
785
826
|
this.contextShift = contextShift;
|
|
786
827
|
this.customStopTriggers = customStopTriggers;
|
|
828
|
+
this.abortOnNonText = abortOnNonText ?? false;
|
|
787
829
|
this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
|
|
788
830
|
this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
|
|
789
831
|
if (this.signal?.aborted)
|
|
@@ -821,7 +863,7 @@ class GenerateResponseState {
|
|
|
821
863
|
if (this.grammar != null)
|
|
822
864
|
StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
|
|
823
865
|
.map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
824
|
-
if (this.functions != null && Object.keys(this.functions).length > 0)
|
|
866
|
+
if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
|
|
825
867
|
this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
826
868
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
827
869
|
this.chatWrapper.settings.functions.call.prefix
|
|
@@ -846,6 +888,17 @@ class GenerateResponseState {
|
|
|
846
888
|
? new Map()
|
|
847
889
|
: SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
|
|
848
890
|
});
|
|
891
|
+
if (this.abortOnNonText) {
|
|
892
|
+
this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
|
|
893
|
+
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
894
|
+
this.chatWrapper.settings.functions.call.prefix
|
|
895
|
+
]), this.llamaChat.model.tokenizer));
|
|
896
|
+
for (const segmentType of allSegmentTypes) {
|
|
897
|
+
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
898
|
+
if (segmentDefinition != null)
|
|
899
|
+
this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(segmentDefinition.prefix), this.llamaChat.model.tokenizer));
|
|
900
|
+
}
|
|
901
|
+
}
|
|
849
902
|
this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
|
|
850
903
|
}
|
|
851
904
|
async dispose() {
|
|
@@ -894,7 +947,10 @@ class GenerateResponseState {
|
|
|
894
947
|
});
|
|
895
948
|
if (!hadThoughtSegments)
|
|
896
949
|
return;
|
|
897
|
-
this.
|
|
950
|
+
if (this.abortOnNonText)
|
|
951
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
952
|
+
else
|
|
953
|
+
this.segmentHandler.openSegment("thought");
|
|
898
954
|
}
|
|
899
955
|
ensureNotAborted() {
|
|
900
956
|
if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
|
|
@@ -930,7 +986,8 @@ class GenerateResponseState {
|
|
|
930
986
|
let mostExhaustiveTriggeredStopsLeftoverTokens = [];
|
|
931
987
|
const lastTokensForDetokenizer = resolveLastTokens([
|
|
932
988
|
this.contextWindowTokens,
|
|
933
|
-
this.ignoredStartTextTokens
|
|
989
|
+
this.ignoredStartTextTokens,
|
|
990
|
+
this.prefixTriggerTokens
|
|
934
991
|
]);
|
|
935
992
|
const pendingPartialTokens = [];
|
|
936
993
|
for (let i = 0; i < this.pendingTokens.length; i++) {
|
|
@@ -993,6 +1050,18 @@ class GenerateResponseState {
|
|
|
993
1050
|
this.ensureNotAborted();
|
|
994
1051
|
this.shouldContextShift = false;
|
|
995
1052
|
}
|
|
1053
|
+
handleRerender() {
|
|
1054
|
+
if (this.shouldRerender) {
|
|
1055
|
+
this.isRerender = true;
|
|
1056
|
+
this.streamRegulator.reset();
|
|
1057
|
+
if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null &&
|
|
1058
|
+
!this.skipClosingResponseItemOnRerender) {
|
|
1059
|
+
this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
|
|
1060
|
+
this.shouldRerender = false;
|
|
1061
|
+
}
|
|
1062
|
+
this.skipClosingResponseItemOnRerender = false;
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
996
1065
|
getContextWindowFunctionCallsTokens() {
|
|
997
1066
|
if (this.functionEvaluationMode === false)
|
|
998
1067
|
return [];
|
|
@@ -1019,14 +1088,15 @@ class GenerateResponseState {
|
|
|
1019
1088
|
async loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText = false, avoidReloadingHistory = false) {
|
|
1020
1089
|
const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
|
|
1021
1090
|
const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
|
|
1022
|
-
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
1023
|
-
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
|
|
1091
|
+
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || this.isRerender || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
1092
|
+
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = await getContextWindow({
|
|
1024
1093
|
resolvedHistory: resolvedHistory,
|
|
1025
1094
|
resolvedContextShift: this.resolvedContextShift,
|
|
1026
1095
|
lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
|
|
1027
|
-
pendingTokensCount: this.
|
|
1028
|
-
this.pendingPartialTokens.length,
|
|
1096
|
+
pendingTokensCount: this.prefixTriggerTokens.length + this.pendingTokens.length + queuedChunkTokens.length +
|
|
1097
|
+
functionCallsTokens.length + this.pendingPartialTokens.length,
|
|
1029
1098
|
isFirstEvaluation: this.isFirstEvaluation,
|
|
1099
|
+
isRerender: this.isRerender,
|
|
1030
1100
|
chatWrapper: this.chatWrapper,
|
|
1031
1101
|
lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
|
|
1032
1102
|
minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
|
|
@@ -1043,6 +1113,61 @@ class GenerateResponseState {
|
|
|
1043
1113
|
this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
|
|
1044
1114
|
this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
|
|
1045
1115
|
this.userTextSuffix = userTextSuffix;
|
|
1116
|
+
if (this.isRerender) {
|
|
1117
|
+
this.prefixTriggerTokens.length = 0;
|
|
1118
|
+
for (const prefixDetector of this.prefixTriggerDetectors.keys()) {
|
|
1119
|
+
prefixDetector.clearInProgressStops();
|
|
1120
|
+
prefixDetector.clearTriggeredStops();
|
|
1121
|
+
}
|
|
1122
|
+
this.prefixTriggerDetectors.clear();
|
|
1123
|
+
for (const trigger of prefixTriggers ?? []) {
|
|
1124
|
+
const segmentBudget = trigger.type === "segment"
|
|
1125
|
+
? this.getSegmentBudget(trigger.segmentType)
|
|
1126
|
+
: null;
|
|
1127
|
+
if (trigger.type === "functionCall" && !this.functionsEnabled)
|
|
1128
|
+
continue;
|
|
1129
|
+
else if (trigger.type === "segment" &&
|
|
1130
|
+
segmentBudget != null &&
|
|
1131
|
+
!this.segmentHandler.isSegmentTypeOpen(trigger.segmentType) &&
|
|
1132
|
+
this.segmentHandler.getSegmentTokensCount(trigger.segmentType) >= segmentBudget)
|
|
1133
|
+
continue;
|
|
1134
|
+
const prefixDetector = new StopGenerationDetector();
|
|
1135
|
+
StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
|
|
1136
|
+
.forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
|
|
1137
|
+
this.prefixTriggerDetectors.set(prefixDetector, { inject: trigger.inject, trigger });
|
|
1138
|
+
const inject = trigger.inject;
|
|
1139
|
+
if (inject != null && inject.values.length > 0) {
|
|
1140
|
+
const fullPrefixDetector = new StopGenerationDetector();
|
|
1141
|
+
StopGenerationDetector
|
|
1142
|
+
.resolveStopTriggers(trigger.triggers.map((trigger) => LlamaText([trigger, inject])), this.llamaChat.model.tokenizer)
|
|
1143
|
+
.forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger));
|
|
1144
|
+
this.prefixTriggerDetectors.set(fullPrefixDetector, { trigger });
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
this.noPrefixTrigger = noPrefixTrigger;
|
|
1148
|
+
const noPrefixTriggerSegmentBudget = noPrefixTrigger?.type === "segment"
|
|
1149
|
+
? this.getSegmentBudget(noPrefixTrigger.segmentType)
|
|
1150
|
+
: null;
|
|
1151
|
+
if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
|
|
1152
|
+
this.noPrefixTrigger = undefined;
|
|
1153
|
+
else if (noPrefixTrigger?.type === "segment" &&
|
|
1154
|
+
noPrefixTriggerSegmentBudget != null &&
|
|
1155
|
+
!this.segmentHandler.isSegmentTypeOpen(noPrefixTrigger.segmentType) &&
|
|
1156
|
+
this.segmentHandler.getSegmentTokensCount(noPrefixTrigger.segmentType) >= noPrefixTriggerSegmentBudget)
|
|
1157
|
+
this.noPrefixTrigger = undefined;
|
|
1158
|
+
this.rerenderTriggers = rerender?.triggers ?? [];
|
|
1159
|
+
this.rerenderTriggerDetector.clearInProgressStops();
|
|
1160
|
+
this.rerenderTriggerDetector.clearTriggeredStops();
|
|
1161
|
+
this.rerenderTriggerDetector = new StopGenerationDetector();
|
|
1162
|
+
this.rerenderActions = rerender?.action;
|
|
1163
|
+
this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true;
|
|
1164
|
+
if (!this.functionSyntaxStartDetectorEnabled)
|
|
1165
|
+
this.functionSyntaxStartDetector.clearInProgressStops();
|
|
1166
|
+
if (rerender?.triggers != null) {
|
|
1167
|
+
StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer)
|
|
1168
|
+
.map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger));
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1046
1171
|
this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
|
|
1047
1172
|
this.lastContextWindowHistory = contextWindowHistory;
|
|
1048
1173
|
this.segmentHandler.resetContextWindow();
|
|
@@ -1055,6 +1180,7 @@ class GenerateResponseState {
|
|
|
1055
1180
|
this.tokens = [
|
|
1056
1181
|
...this.contextWindowTokens,
|
|
1057
1182
|
...this.ignoredStartTextTokens,
|
|
1183
|
+
...this.prefixTriggerTokens,
|
|
1058
1184
|
...this.pendingTokens,
|
|
1059
1185
|
...queuedChunkTokens,
|
|
1060
1186
|
...functionCallsTokens,
|
|
@@ -1076,6 +1202,10 @@ class GenerateResponseState {
|
|
|
1076
1202
|
}
|
|
1077
1203
|
initFunctions() {
|
|
1078
1204
|
this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
|
|
1205
|
+
if (this.initiallyEngagedFunctionMode && this.abortOnNonText) {
|
|
1206
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1207
|
+
return;
|
|
1208
|
+
}
|
|
1079
1209
|
if (this.initiallyEngagedFunctionMode) {
|
|
1080
1210
|
StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
|
|
1081
1211
|
.map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
|
|
@@ -1090,6 +1220,140 @@ class GenerateResponseState {
|
|
|
1090
1220
|
this.restartEvaluationIterator = true;
|
|
1091
1221
|
}
|
|
1092
1222
|
}
|
|
1223
|
+
async handlePrefixTriggers(loadContextWindow) {
|
|
1224
|
+
const reloadTokens = async () => {
|
|
1225
|
+
this.startTokenLoop();
|
|
1226
|
+
await loadContextWindow();
|
|
1227
|
+
};
|
|
1228
|
+
const injectTokens = async (text, alignStateTokens = false) => {
|
|
1229
|
+
if (text == null)
|
|
1230
|
+
return;
|
|
1231
|
+
const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
|
|
1232
|
+
if (tokens.length === 0)
|
|
1233
|
+
return;
|
|
1234
|
+
pushAll(this.prefixTriggerTokens, tokens);
|
|
1235
|
+
if (alignStateTokens)
|
|
1236
|
+
await reloadTokens();
|
|
1237
|
+
};
|
|
1238
|
+
if (this.prefixTriggerDetectors.size === 0) {
|
|
1239
|
+
if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
|
|
1240
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1241
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1242
|
+
if (stopRes != null)
|
|
1243
|
+
return stopRes;
|
|
1244
|
+
return undefined;
|
|
1245
|
+
}
|
|
1246
|
+
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
1247
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1248
|
+
this.functionEvaluationMode = "functionName";
|
|
1249
|
+
}
|
|
1250
|
+
else if (this.noPrefixTrigger?.type === "segment") {
|
|
1251
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1252
|
+
this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
|
|
1253
|
+
}
|
|
1254
|
+
else if (this.noPrefixTrigger?.type === "response")
|
|
1255
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1256
|
+
return undefined;
|
|
1257
|
+
}
|
|
1258
|
+
const generatedTokens = [];
|
|
1259
|
+
let isFirstToken = true;
|
|
1260
|
+
let continueGeneration = true;
|
|
1261
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1262
|
+
pushAll(generatedTokens, tokens);
|
|
1263
|
+
for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
|
|
1264
|
+
triggerDetector.recordGeneration({
|
|
1265
|
+
text: this.currentText,
|
|
1266
|
+
tokens: this.currentTokens,
|
|
1267
|
+
startNewChecks: isFirstToken,
|
|
1268
|
+
triggerMustStartWithGeneration: true
|
|
1269
|
+
});
|
|
1270
|
+
if (triggerDetector.hasTriggeredStops) {
|
|
1271
|
+
const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops());
|
|
1272
|
+
const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
|
|
1273
|
+
? firstRemainingGenerationAfterStop === ""
|
|
1274
|
+
? []
|
|
1275
|
+
: this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace")
|
|
1276
|
+
: (firstRemainingGenerationAfterStop ?? []);
|
|
1277
|
+
const triggerTokens = (stopTrigger == null || remainingTokens.length === 0)
|
|
1278
|
+
? generatedTokens
|
|
1279
|
+
: stopTrigger.flatMap((item) => {
|
|
1280
|
+
if (typeof item === "string")
|
|
1281
|
+
return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
|
|
1282
|
+
return [item];
|
|
1283
|
+
});
|
|
1284
|
+
if (this.abortOnNonText && trigger.type !== "response") {
|
|
1285
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1286
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1287
|
+
if (stopRes != null)
|
|
1288
|
+
return stopRes;
|
|
1289
|
+
return undefined;
|
|
1290
|
+
}
|
|
1291
|
+
this.streamRegulator.reset();
|
|
1292
|
+
if (trigger.type === "segment") {
|
|
1293
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1294
|
+
if (inject != null)
|
|
1295
|
+
await injectTokens(inject);
|
|
1296
|
+
await reloadTokens();
|
|
1297
|
+
this.segmentHandler.openSegment(trigger.segmentType);
|
|
1298
|
+
}
|
|
1299
|
+
else if (trigger.type === "response") {
|
|
1300
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1301
|
+
if (inject != null)
|
|
1302
|
+
await injectTokens(inject);
|
|
1303
|
+
await reloadTokens();
|
|
1304
|
+
}
|
|
1305
|
+
else if (trigger.type === "functionCall") {
|
|
1306
|
+
if (trigger.replaceTrigger === false)
|
|
1307
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1308
|
+
if (inject != null)
|
|
1309
|
+
await injectTokens(inject);
|
|
1310
|
+
await reloadTokens();
|
|
1311
|
+
this.functionEvaluationMode = "functionName";
|
|
1312
|
+
}
|
|
1313
|
+
else
|
|
1314
|
+
void trigger;
|
|
1315
|
+
this.prefixTriggerDetectors.clear();
|
|
1316
|
+
continueGeneration = false;
|
|
1317
|
+
break;
|
|
1318
|
+
}
|
|
1319
|
+
else if (!triggerDetector.hasInProgressStops)
|
|
1320
|
+
this.prefixTriggerDetectors.delete(triggerDetector);
|
|
1321
|
+
}
|
|
1322
|
+
if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
|
|
1323
|
+
if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
|
|
1324
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1325
|
+
const stopRes = this.handleAbortTrigger("model");
|
|
1326
|
+
if (stopRes != null)
|
|
1327
|
+
return stopRes;
|
|
1328
|
+
return undefined;
|
|
1329
|
+
}
|
|
1330
|
+
this.streamRegulator.reset();
|
|
1331
|
+
continueGeneration = false;
|
|
1332
|
+
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
1333
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1334
|
+
this.functionEvaluationMode = "functionName";
|
|
1335
|
+
}
|
|
1336
|
+
else if (this.noPrefixTrigger?.type === "segment") {
|
|
1337
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1338
|
+
this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
|
|
1339
|
+
}
|
|
1340
|
+
else if (this.noPrefixTrigger?.type === "response")
|
|
1341
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1342
|
+
else
|
|
1343
|
+
this.streamRegulator.addChunk({
|
|
1344
|
+
tokens: generatedTokens,
|
|
1345
|
+
text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens())
|
|
1346
|
+
});
|
|
1347
|
+
}
|
|
1348
|
+
isFirstToken = false;
|
|
1349
|
+
if (!continueGeneration)
|
|
1350
|
+
break;
|
|
1351
|
+
const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model");
|
|
1352
|
+
if (stopRes != null)
|
|
1353
|
+
return stopRes;
|
|
1354
|
+
}
|
|
1355
|
+
return undefined;
|
|
1356
|
+
}
|
|
1093
1357
|
async enterFunctionCallingLoop(loadContextWindow) {
|
|
1094
1358
|
if (!this.functionsEnabled) {
|
|
1095
1359
|
this.functionEvaluationMode = false;
|
|
@@ -1568,6 +1832,8 @@ class GenerateResponseState {
|
|
|
1568
1832
|
}
|
|
1569
1833
|
}
|
|
1570
1834
|
detectAndHandleFunctionStartSyntax() {
|
|
1835
|
+
if (!this.functionSyntaxStartDetectorEnabled)
|
|
1836
|
+
return;
|
|
1571
1837
|
this.functionSyntaxStartDetector.recordGeneration({
|
|
1572
1838
|
text: this.currentText,
|
|
1573
1839
|
tokens: this.currentTokens,
|
|
@@ -1575,6 +1841,10 @@ class GenerateResponseState {
|
|
|
1575
1841
|
});
|
|
1576
1842
|
if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
|
|
1577
1843
|
this.functionSyntaxStartDetector.hasTriggeredStops) {
|
|
1844
|
+
if (this.abortOnNonText) {
|
|
1845
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
1846
|
+
return;
|
|
1847
|
+
}
|
|
1578
1848
|
this.functionEvaluationMode = "functionName";
|
|
1579
1849
|
this.currentQueuedTokenRelease.createTextIndexLock(0);
|
|
1580
1850
|
this.stopGenerationDetector.clearTriggeredStops();
|
|
@@ -1592,6 +1862,11 @@ class GenerateResponseState {
|
|
|
1592
1862
|
}
|
|
1593
1863
|
}
|
|
1594
1864
|
recordStopGenerationEvaluation() {
|
|
1865
|
+
this.rerenderTriggerDetector.recordGeneration({
|
|
1866
|
+
text: this.currentText,
|
|
1867
|
+
tokens: this.currentTokens,
|
|
1868
|
+
queuedTokenRelease: this.currentQueuedTokenRelease
|
|
1869
|
+
});
|
|
1595
1870
|
this.stopGenerationDetector.recordGeneration({
|
|
1596
1871
|
text: this.currentText,
|
|
1597
1872
|
tokens: this.currentTokens,
|
|
@@ -1609,8 +1884,10 @@ class GenerateResponseState {
|
|
|
1609
1884
|
pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
|
|
1610
1885
|
}
|
|
1611
1886
|
handleStopGenerationTrigger(lastHistoryItemType, forceStopReason) {
|
|
1612
|
-
|
|
1613
|
-
this.
|
|
1887
|
+
const detectedStopGenerationTrigger = this.stopGenerationDetector.hasTriggeredStops ||
|
|
1888
|
+
this.customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
1889
|
+
this.llamaChat.model.isEogToken(this.currentToken);
|
|
1890
|
+
if ((detectedStopGenerationTrigger && !this.rerenderTriggerDetector.hasTriggeredStops) || forceStopReason != null) {
|
|
1614
1891
|
this.stopGenerationDetector.clearInProgressStops();
|
|
1615
1892
|
this.customStopGenerationTriggersDetector.clearInProgressStops();
|
|
1616
1893
|
pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
|
|
@@ -1709,25 +1986,45 @@ class GenerateResponseState {
|
|
|
1709
1986
|
}
|
|
1710
1987
|
async handleBudgetTriggers() {
|
|
1711
1988
|
let shouldReloadEvaluationState = false;
|
|
1712
|
-
|
|
1713
|
-
const hasBudgetTriggers = this.budgets != null && hasBudget(this.budgets.thoughtTokens);
|
|
1714
|
-
if (!hasBudgetTriggers)
|
|
1989
|
+
if (this.budgets == null)
|
|
1715
1990
|
return shouldReloadEvaluationState;
|
|
1716
|
-
|
|
1717
|
-
const
|
|
1718
|
-
if (
|
|
1719
|
-
|
|
1991
|
+
for (const segmentType of this.segmentHandler.getOpenSegmentStack().reverse()) {
|
|
1992
|
+
const budget = this.getSegmentBudget(segmentType);
|
|
1993
|
+
if (budget == null)
|
|
1994
|
+
continue;
|
|
1995
|
+
const usedSegmentTokens = this.segmentHandler.getSegmentTokensCount(segmentType);
|
|
1996
|
+
if (usedSegmentTokens >= budget) {
|
|
1997
|
+
this.segmentHandler.closeSegment(segmentType);
|
|
1720
1998
|
shouldReloadEvaluationState = true;
|
|
1721
1999
|
}
|
|
1722
2000
|
}
|
|
1723
2001
|
return shouldReloadEvaluationState;
|
|
1724
2002
|
}
|
|
2003
|
+
getSegmentBudget(segmentType) {
|
|
2004
|
+
const getBudget = (budget) => ((budget == null || budget === Infinity)
|
|
2005
|
+
? null
|
|
2006
|
+
: budget);
|
|
2007
|
+
if (this.budgets == null)
|
|
2008
|
+
return null;
|
|
2009
|
+
if (segmentType === "thought")
|
|
2010
|
+
return getBudget(this.budgets.thoughtTokens);
|
|
2011
|
+
else if (segmentType === "comment")
|
|
2012
|
+
return getBudget(this.budgets.commentTokens);
|
|
2013
|
+
void segmentType;
|
|
2014
|
+
return null;
|
|
2015
|
+
}
|
|
2016
|
+
handleShouldRerender() {
|
|
2017
|
+
this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
|
|
2018
|
+
if (this.abortOnNonText && this.shouldRerender)
|
|
2019
|
+
this.shouldAbortBecauseOfNonText = true;
|
|
2020
|
+
return this.shouldRerender;
|
|
2021
|
+
}
|
|
1725
2022
|
updateShouldContextShift() {
|
|
1726
2023
|
this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
|
|
1727
2024
|
return this.shouldContextShift;
|
|
1728
2025
|
}
|
|
1729
2026
|
get shouldAbort() {
|
|
1730
|
-
return !!(this.signal?.aborted && this.stopOnAbortSignal);
|
|
2027
|
+
return !!(this.signal?.aborted && this.stopOnAbortSignal) || this.shouldAbortBecauseOfNonText;
|
|
1731
2028
|
}
|
|
1732
2029
|
handleAbortTrigger(lastHistoryItemType) {
|
|
1733
2030
|
if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
|
|
@@ -1747,7 +2044,9 @@ class GenerateResponseState {
|
|
|
1747
2044
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1748
2045
|
},
|
|
1749
2046
|
metadata: {
|
|
1750
|
-
stopReason:
|
|
2047
|
+
stopReason: this.shouldAbortBecauseOfNonText
|
|
2048
|
+
? "eogToken"
|
|
2049
|
+
: "abort"
|
|
1751
2050
|
}
|
|
1752
2051
|
};
|
|
1753
2052
|
}
|
|
@@ -1867,6 +2166,29 @@ class SegmentHandler {
|
|
|
1867
2166
|
isSegmentTypeOpen(type) {
|
|
1868
2167
|
return this._segmentsStackSet.has(type);
|
|
1869
2168
|
}
|
|
2169
|
+
get topOpenSegmentType() {
|
|
2170
|
+
return this._segmentsStack.at(-1);
|
|
2171
|
+
}
|
|
2172
|
+
/**
|
|
2173
|
+
* First segment in the stack is the top most that'll close last.
|
|
2174
|
+
* ```
|
|
2175
|
+
* <segment1>
|
|
2176
|
+
* some text here
|
|
2177
|
+
* <segment2>
|
|
2178
|
+
* some text here
|
|
2179
|
+
* <segment3>
|
|
2180
|
+
* some text here
|
|
2181
|
+
* </segment3>
|
|
2182
|
+
* ```
|
|
2183
|
+
* In that example, the top most segment is `segment1`, and the last open segment is `segment2` (which is the next one to close).
|
|
2184
|
+
* So in that example, this function will return:
|
|
2185
|
+
* ```
|
|
2186
|
+
* ["segment1", "segment2"]
|
|
2187
|
+
* ```
|
|
2188
|
+
*/
|
|
2189
|
+
getOpenSegmentStack() {
|
|
2190
|
+
return this._segmentsStack.slice(this._ownedSegmentsStackLength);
|
|
2191
|
+
}
|
|
1870
2192
|
_processTokens(tokens, text) {
|
|
1871
2193
|
const queuedTokenRelease = this._streamRegulator.addChunk({
|
|
1872
2194
|
tokens,
|
|
@@ -2065,17 +2387,16 @@ class SegmentHandler {
|
|
|
2065
2387
|
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
2066
2388
|
}
|
|
2067
2389
|
else {
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2390
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
2391
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2392
|
+
: "";
|
|
2393
|
+
if (lastSegment instanceof Array)
|
|
2072
2394
|
pushAll(lastSegment, tokens);
|
|
2073
|
-
this.onToken?.(tokens);
|
|
2074
|
-
this.onTextChunk?.(text);
|
|
2075
|
-
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
|
|
2076
|
-
}
|
|
2077
2395
|
else
|
|
2078
2396
|
this._segments.push(tokens);
|
|
2397
|
+
this.onToken?.(tokens.slice());
|
|
2398
|
+
this.onTextChunk?.(text);
|
|
2399
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
2079
2400
|
}
|
|
2080
2401
|
if (lastContextWindowSegment == null)
|
|
2081
2402
|
this._contextWindowSegments.push(tokens.slice());
|