node-llama-cpp 3.11.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/bindings/Llama.d.ts +5 -1
- package/dist/bindings/Llama.js +11 -1
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/types.d.ts +5 -2
- package/dist/bindings/types.js +16 -1
- package/dist/bindings/types.js.map +1 -1
- package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js +527 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
- package/dist/chatWrappers/utils/resolveChatWrapper.js +21 -6
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -1
- package/dist/cli/commands/ChatCommand.js +21 -7
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -1
- package/dist/cli/commands/CompleteCommand.js +21 -7
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -1
- package/dist/cli/commands/InfillCommand.js +21 -7
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +16 -5
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +22 -0
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +244 -29
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +22 -3
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +19 -2
- package/dist/gguf/types/GgufMetadataTypes.js +17 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +2 -1
- package/dist/gguf/types/GgufTensorInfoTypes.js +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/ggufQuantNames.js +1 -0
- package/dist/gguf/utils/ggufQuantNames.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +144 -2
- package/dist/types.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +1 -1
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/getChatWrapperSegmentDefinition.js +2 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +18 -16
|
@@ -125,8 +125,11 @@ export class LlamaChat {
|
|
|
125
125
|
const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
|
|
126
126
|
while (true) {
|
|
127
127
|
generateResponseState.startTokenLoop();
|
|
128
|
+
generateResponseState.handleRerender();
|
|
129
|
+
const shouldHandlePrefixTriggers = generateResponseState.isRerender;
|
|
128
130
|
generateResponseState.canAvoidReloadingHistory = false;
|
|
129
131
|
await loadContextWindow();
|
|
132
|
+
generateResponseState.isRerender = false;
|
|
130
133
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
131
134
|
if (generateResponseState.generatedTokens === 0) {
|
|
132
135
|
generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
|
|
@@ -134,6 +137,11 @@ export class LlamaChat {
|
|
|
134
137
|
generateResponseState.initFunctions();
|
|
135
138
|
}
|
|
136
139
|
}
|
|
140
|
+
if (shouldHandlePrefixTriggers) {
|
|
141
|
+
const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
|
|
142
|
+
if (handlePrefixTriggersRes != null)
|
|
143
|
+
return handlePrefixTriggersRes;
|
|
144
|
+
}
|
|
137
145
|
if (generateResponseState.functionEvaluationMode !== false) {
|
|
138
146
|
const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
|
|
139
147
|
if (functionsCallsRes != null)
|
|
@@ -165,21 +173,21 @@ export class LlamaChat {
|
|
|
165
173
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
|
|
166
174
|
if (maxTokensTriggerRes != null)
|
|
167
175
|
return maxTokensTriggerRes;
|
|
168
|
-
if (generateResponseState.updateShouldContextShift())
|
|
176
|
+
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
169
177
|
break;
|
|
170
178
|
if (await generateResponseState.handleBudgetTriggers()) {
|
|
171
179
|
await loadContextWindowForBudgetTriggers();
|
|
172
180
|
await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
|
|
173
181
|
await generateResponseState.createNewEvaluationIterator();
|
|
174
182
|
}
|
|
175
|
-
if (generateResponseState.updateShouldContextShift())
|
|
183
|
+
if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
|
|
176
184
|
break;
|
|
177
185
|
const abortRes = generateResponseState.handleAbortTrigger("model");
|
|
178
186
|
if (abortRes != null)
|
|
179
187
|
return abortRes;
|
|
180
188
|
}
|
|
181
189
|
generateResponseState.isFirstEvaluation = false;
|
|
182
|
-
if (generateResponseState.shouldContextShift)
|
|
190
|
+
if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift)
|
|
183
191
|
continue;
|
|
184
192
|
break;
|
|
185
193
|
}
|
|
@@ -236,10 +244,12 @@ export class LlamaChat {
|
|
|
236
244
|
while (true) {
|
|
237
245
|
generateResponseState.startTokenLoop();
|
|
238
246
|
const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
|
|
247
|
+
generateResponseState.isRerender = false;
|
|
239
248
|
generateResponseState.functionEvaluationMode = false;
|
|
240
249
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
241
250
|
if (userTextSuffix != null && userTextSuffix.values.length > 0)
|
|
242
251
|
generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
|
|
252
|
+
generateResponseState.rerenderTriggers.forEach((trigger) => (generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer))));
|
|
243
253
|
allSegmentTypes
|
|
244
254
|
.map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType))
|
|
245
255
|
.filter((segmentDefinition) => segmentDefinition != null)
|
|
@@ -545,13 +555,13 @@ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
|
|
|
545
555
|
`There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
|
|
546
556
|
"where not all user messages are properly added to the the result LlamaText");
|
|
547
557
|
}
|
|
548
|
-
async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
|
|
558
|
+
async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
|
|
549
559
|
if (sequence == null)
|
|
550
560
|
throw new DisposedError();
|
|
551
561
|
const model = sequence.model;
|
|
552
562
|
const context = sequence.context;
|
|
553
563
|
let removeRawFromHistory = false;
|
|
554
|
-
if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
564
|
+
if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
555
565
|
const newContextWindow = lastEvaluationContextWindowHistory.slice();
|
|
556
566
|
if (endWithUserText) {
|
|
557
567
|
if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
|
|
@@ -565,7 +575,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
565
575
|
type: "model",
|
|
566
576
|
response: []
|
|
567
577
|
});
|
|
568
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
578
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
569
579
|
chatHistory: newContextWindow,
|
|
570
580
|
availableFunctions: functions,
|
|
571
581
|
documentFunctionParams
|
|
@@ -574,7 +584,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
574
584
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
|
|
575
585
|
const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
|
|
576
586
|
const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
|
|
577
|
-
if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
|
|
587
|
+
if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
|
|
578
588
|
return {
|
|
579
589
|
history: newContextWindow,
|
|
580
590
|
stopGenerationTriggers,
|
|
@@ -584,7 +594,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
584
594
|
ignoreStartText: ignoreStartText ?? [],
|
|
585
595
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
586
596
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
587
|
-
userTextSuffix
|
|
597
|
+
userTextSuffix,
|
|
598
|
+
prefixTriggers,
|
|
599
|
+
noPrefixTrigger,
|
|
600
|
+
rerender,
|
|
601
|
+
detectFunctionCalls
|
|
588
602
|
};
|
|
589
603
|
}
|
|
590
604
|
}
|
|
@@ -607,7 +621,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
607
621
|
functions,
|
|
608
622
|
documentFunctionParams
|
|
609
623
|
});
|
|
610
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
624
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
611
625
|
chatHistory: compressedHistory,
|
|
612
626
|
availableFunctions: functions,
|
|
613
627
|
documentFunctionParams
|
|
@@ -621,11 +635,15 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
621
635
|
ignoreStartText: ignoreStartText ?? [],
|
|
622
636
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
623
637
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
624
|
-
userTextSuffix
|
|
638
|
+
userTextSuffix,
|
|
639
|
+
prefixTriggers,
|
|
640
|
+
noPrefixTrigger,
|
|
641
|
+
rerender,
|
|
642
|
+
detectFunctionCalls
|
|
625
643
|
};
|
|
626
644
|
}
|
|
627
645
|
{
|
|
628
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
646
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
629
647
|
chatHistory: resolvedHistory,
|
|
630
648
|
availableFunctions: functions,
|
|
631
649
|
documentFunctionParams
|
|
@@ -641,7 +659,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
641
659
|
ignoreStartText: ignoreStartText ?? [],
|
|
642
660
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
643
661
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
644
|
-
userTextSuffix
|
|
662
|
+
userTextSuffix,
|
|
663
|
+
prefixTriggers,
|
|
664
|
+
noPrefixTrigger,
|
|
665
|
+
rerender,
|
|
666
|
+
detectFunctionCalls
|
|
645
667
|
};
|
|
646
668
|
}
|
|
647
669
|
const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
|
|
@@ -658,7 +680,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
658
680
|
functions,
|
|
659
681
|
documentFunctionParams
|
|
660
682
|
});
|
|
661
|
-
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
|
|
683
|
+
const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
|
|
662
684
|
chatHistory: compressedHistory,
|
|
663
685
|
availableFunctions: functions,
|
|
664
686
|
documentFunctionParams
|
|
@@ -672,7 +694,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
672
694
|
ignoreStartText: ignoreStartText ?? [],
|
|
673
695
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
674
696
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
|
|
675
|
-
userTextSuffix
|
|
697
|
+
userTextSuffix,
|
|
698
|
+
prefixTriggers,
|
|
699
|
+
noPrefixTrigger,
|
|
700
|
+
rerender,
|
|
701
|
+
detectFunctionCalls
|
|
676
702
|
};
|
|
677
703
|
}
|
|
678
704
|
class GenerateResponseState {
|
|
@@ -711,6 +737,7 @@ class GenerateResponseState {
|
|
|
711
737
|
functionNameGrammar;
|
|
712
738
|
functionsGrammar;
|
|
713
739
|
functionsEvaluationState;
|
|
740
|
+
functionSyntaxStartDetectorEnabled = true;
|
|
714
741
|
streamRegulator = new TokenStreamRegulator();
|
|
715
742
|
stopGenerationDetector = new StopGenerationDetector();
|
|
716
743
|
customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
@@ -723,6 +750,7 @@ class GenerateResponseState {
|
|
|
723
750
|
res = [];
|
|
724
751
|
pendingTokens = [];
|
|
725
752
|
ignoredStartTextTokens = [];
|
|
753
|
+
prefixTriggerTokens = [];
|
|
726
754
|
resFunctionCalls = [];
|
|
727
755
|
segmentHandler;
|
|
728
756
|
pendingPartialTokens = [];
|
|
@@ -735,12 +763,14 @@ class GenerateResponseState {
|
|
|
735
763
|
releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
|
|
736
764
|
generatedTokens = 0;
|
|
737
765
|
isFirstEvaluation = true;
|
|
766
|
+
isRerender = true; // first render is a rerender
|
|
738
767
|
initiallyEngagedFunctionMode = false;
|
|
739
768
|
lastContextWindowHistory;
|
|
740
769
|
lastHistoryCompressionMetadata;
|
|
741
770
|
restartEvaluationIterator = false;
|
|
742
771
|
// context shift loop
|
|
743
772
|
shouldContextShift = false;
|
|
773
|
+
shouldRerender = false;
|
|
744
774
|
canAvoidReloadingHistory = false;
|
|
745
775
|
contextWindowTokens = [];
|
|
746
776
|
stopGenerationTriggers = [];
|
|
@@ -748,6 +778,11 @@ class GenerateResponseState {
|
|
|
748
778
|
functionCallInitiallyEngaged = false;
|
|
749
779
|
disengageInitiallyEngagedFunctionCall = [];
|
|
750
780
|
userTextSuffix = undefined;
|
|
781
|
+
prefixTriggerDetectors = new Map();
|
|
782
|
+
noPrefixTrigger = undefined;
|
|
783
|
+
rerenderTriggers = [];
|
|
784
|
+
rerenderTriggerDetector = new StopGenerationDetector();
|
|
785
|
+
rerenderActions = undefined;
|
|
751
786
|
tokens = [];
|
|
752
787
|
// token evaluation loop
|
|
753
788
|
evaluationIterator;
|
|
@@ -930,7 +965,8 @@ class GenerateResponseState {
|
|
|
930
965
|
let mostExhaustiveTriggeredStopsLeftoverTokens = [];
|
|
931
966
|
const lastTokensForDetokenizer = resolveLastTokens([
|
|
932
967
|
this.contextWindowTokens,
|
|
933
|
-
this.ignoredStartTextTokens
|
|
968
|
+
this.ignoredStartTextTokens,
|
|
969
|
+
this.prefixTriggerTokens
|
|
934
970
|
]);
|
|
935
971
|
const pendingPartialTokens = [];
|
|
936
972
|
for (let i = 0; i < this.pendingTokens.length; i++) {
|
|
@@ -993,6 +1029,16 @@ class GenerateResponseState {
|
|
|
993
1029
|
this.ensureNotAborted();
|
|
994
1030
|
this.shouldContextShift = false;
|
|
995
1031
|
}
|
|
1032
|
+
handleRerender() {
|
|
1033
|
+
if (this.shouldRerender) {
|
|
1034
|
+
this.isRerender = true;
|
|
1035
|
+
this.streamRegulator.reset();
|
|
1036
|
+
if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null) {
|
|
1037
|
+
this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
|
|
1038
|
+
this.shouldRerender = false;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
996
1042
|
getContextWindowFunctionCallsTokens() {
|
|
997
1043
|
if (this.functionEvaluationMode === false)
|
|
998
1044
|
return [];
|
|
@@ -1019,14 +1065,15 @@ class GenerateResponseState {
|
|
|
1019
1065
|
async loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText = false, avoidReloadingHistory = false) {
|
|
1020
1066
|
const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
|
|
1021
1067
|
const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
|
|
1022
|
-
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
1023
|
-
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
|
|
1068
|
+
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || this.isRerender || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
1069
|
+
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = await getContextWindow({
|
|
1024
1070
|
resolvedHistory: resolvedHistory,
|
|
1025
1071
|
resolvedContextShift: this.resolvedContextShift,
|
|
1026
1072
|
lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
|
|
1027
|
-
pendingTokensCount: this.
|
|
1028
|
-
this.pendingPartialTokens.length,
|
|
1073
|
+
pendingTokensCount: this.prefixTriggerTokens.length + this.pendingTokens.length + queuedChunkTokens.length +
|
|
1074
|
+
functionCallsTokens.length + this.pendingPartialTokens.length,
|
|
1029
1075
|
isFirstEvaluation: this.isFirstEvaluation,
|
|
1076
|
+
isRerender: this.isRerender,
|
|
1030
1077
|
chatWrapper: this.chatWrapper,
|
|
1031
1078
|
lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
|
|
1032
1079
|
minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
|
|
@@ -1043,6 +1090,45 @@ class GenerateResponseState {
|
|
|
1043
1090
|
this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
|
|
1044
1091
|
this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
|
|
1045
1092
|
this.userTextSuffix = userTextSuffix;
|
|
1093
|
+
if (this.isRerender) {
|
|
1094
|
+
this.prefixTriggerTokens.length = 0;
|
|
1095
|
+
for (const prefixDetector of this.prefixTriggerDetectors.keys()) {
|
|
1096
|
+
prefixDetector.clearInProgressStops();
|
|
1097
|
+
prefixDetector.clearTriggeredStops();
|
|
1098
|
+
}
|
|
1099
|
+
this.prefixTriggerDetectors.clear();
|
|
1100
|
+
for (const trigger of prefixTriggers ?? []) {
|
|
1101
|
+
if (trigger.type === "functionCall" && !this.functionsEnabled)
|
|
1102
|
+
continue;
|
|
1103
|
+
const prefixDetector = new StopGenerationDetector();
|
|
1104
|
+
StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
|
|
1105
|
+
.forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
|
|
1106
|
+
this.prefixTriggerDetectors.set(prefixDetector, { inject: trigger.inject, trigger });
|
|
1107
|
+
const inject = trigger.inject;
|
|
1108
|
+
if (inject != null && inject.values.length > 0) {
|
|
1109
|
+
const fullPrefixDetector = new StopGenerationDetector();
|
|
1110
|
+
StopGenerationDetector
|
|
1111
|
+
.resolveStopTriggers(trigger.triggers.map((trigger) => LlamaText([trigger, inject])), this.llamaChat.model.tokenizer)
|
|
1112
|
+
.forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger));
|
|
1113
|
+
this.prefixTriggerDetectors.set(fullPrefixDetector, { trigger });
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
this.noPrefixTrigger = noPrefixTrigger;
|
|
1117
|
+
if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
|
|
1118
|
+
this.noPrefixTrigger = undefined;
|
|
1119
|
+
this.rerenderTriggers = rerender?.triggers ?? [];
|
|
1120
|
+
this.rerenderTriggerDetector.clearInProgressStops();
|
|
1121
|
+
this.rerenderTriggerDetector.clearTriggeredStops();
|
|
1122
|
+
this.rerenderTriggerDetector = new StopGenerationDetector();
|
|
1123
|
+
this.rerenderActions = rerender?.action;
|
|
1124
|
+
this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true;
|
|
1125
|
+
if (!this.functionSyntaxStartDetectorEnabled)
|
|
1126
|
+
this.functionSyntaxStartDetector.clearInProgressStops();
|
|
1127
|
+
if (rerender?.triggers != null) {
|
|
1128
|
+
StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer)
|
|
1129
|
+
.map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger));
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1046
1132
|
this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
|
|
1047
1133
|
this.lastContextWindowHistory = contextWindowHistory;
|
|
1048
1134
|
this.segmentHandler.resetContextWindow();
|
|
@@ -1055,6 +1141,7 @@ class GenerateResponseState {
|
|
|
1055
1141
|
this.tokens = [
|
|
1056
1142
|
...this.contextWindowTokens,
|
|
1057
1143
|
...this.ignoredStartTextTokens,
|
|
1144
|
+
...this.prefixTriggerTokens,
|
|
1058
1145
|
...this.pendingTokens,
|
|
1059
1146
|
...queuedChunkTokens,
|
|
1060
1147
|
...functionCallsTokens,
|
|
@@ -1090,6 +1177,119 @@ class GenerateResponseState {
|
|
|
1090
1177
|
this.restartEvaluationIterator = true;
|
|
1091
1178
|
}
|
|
1092
1179
|
}
|
|
1180
|
+
async handlePrefixTriggers(loadContextWindow) {
|
|
1181
|
+
const reloadTokens = async () => {
|
|
1182
|
+
this.startTokenLoop();
|
|
1183
|
+
await loadContextWindow();
|
|
1184
|
+
};
|
|
1185
|
+
const injectTokens = async (text, alignStateTokens = false) => {
|
|
1186
|
+
if (text == null)
|
|
1187
|
+
return;
|
|
1188
|
+
const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
|
|
1189
|
+
if (tokens.length === 0)
|
|
1190
|
+
return;
|
|
1191
|
+
pushAll(this.prefixTriggerTokens, tokens);
|
|
1192
|
+
if (alignStateTokens)
|
|
1193
|
+
await reloadTokens();
|
|
1194
|
+
};
|
|
1195
|
+
if (this.prefixTriggerDetectors.size === 0) {
|
|
1196
|
+
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
1197
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1198
|
+
this.functionEvaluationMode = "functionName";
|
|
1199
|
+
}
|
|
1200
|
+
else if (this.noPrefixTrigger?.type === "segment") {
|
|
1201
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1202
|
+
this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
|
|
1203
|
+
}
|
|
1204
|
+
else if (this.noPrefixTrigger?.type === "response")
|
|
1205
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1206
|
+
return undefined;
|
|
1207
|
+
}
|
|
1208
|
+
const generatedTokens = [];
|
|
1209
|
+
let isFirstToken = true;
|
|
1210
|
+
let continueGeneration = true;
|
|
1211
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1212
|
+
pushAll(generatedTokens, tokens);
|
|
1213
|
+
for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
|
|
1214
|
+
triggerDetector.recordGeneration({
|
|
1215
|
+
text: this.currentText,
|
|
1216
|
+
tokens: this.currentTokens,
|
|
1217
|
+
startNewChecks: isFirstToken,
|
|
1218
|
+
triggerMustStartWithGeneration: true
|
|
1219
|
+
});
|
|
1220
|
+
if (triggerDetector.hasTriggeredStops) {
|
|
1221
|
+
const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops());
|
|
1222
|
+
const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
|
|
1223
|
+
? firstRemainingGenerationAfterStop === ""
|
|
1224
|
+
? []
|
|
1225
|
+
: this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace")
|
|
1226
|
+
: (firstRemainingGenerationAfterStop ?? []);
|
|
1227
|
+
const triggerTokens = (stopTrigger == null || remainingTokens.length === 0)
|
|
1228
|
+
? generatedTokens
|
|
1229
|
+
: stopTrigger.flatMap((item) => {
|
|
1230
|
+
if (typeof item === "string")
|
|
1231
|
+
return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
|
|
1232
|
+
return [item];
|
|
1233
|
+
});
|
|
1234
|
+
this.streamRegulator.reset();
|
|
1235
|
+
if (trigger.type === "segment") {
|
|
1236
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1237
|
+
if (inject != null)
|
|
1238
|
+
await injectTokens(inject);
|
|
1239
|
+
await reloadTokens();
|
|
1240
|
+
this.segmentHandler.openSegment(trigger.segmentType);
|
|
1241
|
+
}
|
|
1242
|
+
else if (trigger.type === "response") {
|
|
1243
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1244
|
+
if (inject != null)
|
|
1245
|
+
await injectTokens(inject);
|
|
1246
|
+
await reloadTokens();
|
|
1247
|
+
}
|
|
1248
|
+
else if (trigger.type === "functionCall") {
|
|
1249
|
+
if (trigger.replaceTrigger === false)
|
|
1250
|
+
pushAll(this.prefixTriggerTokens, triggerTokens);
|
|
1251
|
+
if (inject != null)
|
|
1252
|
+
await injectTokens(inject);
|
|
1253
|
+
await reloadTokens();
|
|
1254
|
+
this.functionEvaluationMode = "functionName";
|
|
1255
|
+
}
|
|
1256
|
+
else
|
|
1257
|
+
void trigger;
|
|
1258
|
+
this.prefixTriggerDetectors.clear();
|
|
1259
|
+
continueGeneration = false;
|
|
1260
|
+
break;
|
|
1261
|
+
}
|
|
1262
|
+
else if (!triggerDetector.hasInProgressStops)
|
|
1263
|
+
this.prefixTriggerDetectors.delete(triggerDetector);
|
|
1264
|
+
}
|
|
1265
|
+
if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
|
|
1266
|
+
this.streamRegulator.reset();
|
|
1267
|
+
continueGeneration = false;
|
|
1268
|
+
if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
|
|
1269
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1270
|
+
this.functionEvaluationMode = "functionName";
|
|
1271
|
+
}
|
|
1272
|
+
else if (this.noPrefixTrigger?.type === "segment") {
|
|
1273
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1274
|
+
this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
|
|
1275
|
+
}
|
|
1276
|
+
else if (this.noPrefixTrigger?.type === "response")
|
|
1277
|
+
await injectTokens(this.noPrefixTrigger.inject, true);
|
|
1278
|
+
else
|
|
1279
|
+
this.streamRegulator.addChunk({
|
|
1280
|
+
tokens: generatedTokens,
|
|
1281
|
+
text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens())
|
|
1282
|
+
});
|
|
1283
|
+
}
|
|
1284
|
+
isFirstToken = false;
|
|
1285
|
+
if (!continueGeneration)
|
|
1286
|
+
break;
|
|
1287
|
+
const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model");
|
|
1288
|
+
if (stopRes != null)
|
|
1289
|
+
return stopRes;
|
|
1290
|
+
}
|
|
1291
|
+
return undefined;
|
|
1292
|
+
}
|
|
1093
1293
|
async enterFunctionCallingLoop(loadContextWindow) {
|
|
1094
1294
|
if (!this.functionsEnabled) {
|
|
1095
1295
|
this.functionEvaluationMode = false;
|
|
@@ -1568,6 +1768,8 @@ class GenerateResponseState {
|
|
|
1568
1768
|
}
|
|
1569
1769
|
}
|
|
1570
1770
|
detectAndHandleFunctionStartSyntax() {
|
|
1771
|
+
if (!this.functionSyntaxStartDetectorEnabled)
|
|
1772
|
+
return;
|
|
1571
1773
|
this.functionSyntaxStartDetector.recordGeneration({
|
|
1572
1774
|
text: this.currentText,
|
|
1573
1775
|
tokens: this.currentTokens,
|
|
@@ -1592,6 +1794,11 @@ class GenerateResponseState {
|
|
|
1592
1794
|
}
|
|
1593
1795
|
}
|
|
1594
1796
|
recordStopGenerationEvaluation() {
|
|
1797
|
+
this.rerenderTriggerDetector.recordGeneration({
|
|
1798
|
+
text: this.currentText,
|
|
1799
|
+
tokens: this.currentTokens,
|
|
1800
|
+
queuedTokenRelease: this.currentQueuedTokenRelease
|
|
1801
|
+
});
|
|
1595
1802
|
this.stopGenerationDetector.recordGeneration({
|
|
1596
1803
|
text: this.currentText,
|
|
1597
1804
|
tokens: this.currentTokens,
|
|
@@ -1609,8 +1816,10 @@ class GenerateResponseState {
|
|
|
1609
1816
|
pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
|
|
1610
1817
|
}
|
|
1611
1818
|
handleStopGenerationTrigger(lastHistoryItemType, forceStopReason) {
|
|
1612
|
-
|
|
1613
|
-
this.
|
|
1819
|
+
const detectedStopGenerationTrigger = this.stopGenerationDetector.hasTriggeredStops ||
|
|
1820
|
+
this.customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
1821
|
+
this.llamaChat.model.isEogToken(this.currentToken);
|
|
1822
|
+
if ((detectedStopGenerationTrigger && !this.rerenderTriggerDetector.hasTriggeredStops) || forceStopReason != null) {
|
|
1614
1823
|
this.stopGenerationDetector.clearInProgressStops();
|
|
1615
1824
|
this.customStopGenerationTriggersDetector.clearInProgressStops();
|
|
1616
1825
|
pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
|
|
@@ -1722,6 +1931,10 @@ class GenerateResponseState {
|
|
|
1722
1931
|
}
|
|
1723
1932
|
return shouldReloadEvaluationState;
|
|
1724
1933
|
}
|
|
1934
|
+
handleShouldRerender() {
|
|
1935
|
+
this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
|
|
1936
|
+
return this.shouldRerender;
|
|
1937
|
+
}
|
|
1725
1938
|
updateShouldContextShift() {
|
|
1726
1939
|
this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
|
|
1727
1940
|
return this.shouldContextShift;
|
|
@@ -1867,6 +2080,9 @@ class SegmentHandler {
|
|
|
1867
2080
|
isSegmentTypeOpen(type) {
|
|
1868
2081
|
return this._segmentsStackSet.has(type);
|
|
1869
2082
|
}
|
|
2083
|
+
get topOpenSegmentType() {
|
|
2084
|
+
return this._segmentsStack.at(-1);
|
|
2085
|
+
}
|
|
1870
2086
|
_processTokens(tokens, text) {
|
|
1871
2087
|
const queuedTokenRelease = this._streamRegulator.addChunk({
|
|
1872
2088
|
tokens,
|
|
@@ -2065,17 +2281,16 @@ class SegmentHandler {
|
|
|
2065
2281
|
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
2066
2282
|
}
|
|
2067
2283
|
else {
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2284
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
2285
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2286
|
+
: "";
|
|
2287
|
+
if (lastSegment instanceof Array)
|
|
2072
2288
|
pushAll(lastSegment, tokens);
|
|
2073
|
-
this.onToken?.(tokens);
|
|
2074
|
-
this.onTextChunk?.(text);
|
|
2075
|
-
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
|
|
2076
|
-
}
|
|
2077
2289
|
else
|
|
2078
2290
|
this._segments.push(tokens);
|
|
2291
|
+
this.onToken?.(tokens.slice());
|
|
2292
|
+
this.onTextChunk?.(text);
|
|
2293
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
2079
2294
|
}
|
|
2080
2295
|
if (lastContextWindowSegment == null)
|
|
2081
2296
|
this._contextWindowSegments.push(tokens.slice());
|