node-llama-cpp 3.5.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ChatWrapper.d.ts +3 -5
- package/dist/ChatWrapper.js +57 -5
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +1 -1
- package/dist/bindings/Llama.js +2 -0
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +2 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/DeepSeekChatWrapper.d.ts +37 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js +294 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +40 -14
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +0 -3
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +24 -13
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +22 -11
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -1
- package/dist/chatWrappers/MistralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/MistralChatWrapper.js +39 -28
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/QwenChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/QwenChatWrapper.js +162 -0
- package/dist/chatWrappers/QwenChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +41 -3
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +343 -126
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +17 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +10 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.d.ts +7 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js +30 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +5 -4
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +19 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +446 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +38 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.d.ts +6 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js +19 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js.map +1 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js +35 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js.map +1 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.d.ts +22 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js +28 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +3 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +25 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +197 -30
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +48 -3
- package/dist/chatWrappers/utils/resolveChatWrapper.js +15 -5
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +38 -7
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +93 -10
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +87 -5
- package/dist/evaluator/LlamaChat/LlamaChat.js +781 -196
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +55 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +28 -8
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +61 -48
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +2 -2
- package/dist/evaluator/LlamaGrammar.js +5 -3
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +3 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js +4 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaRankingContext.js +1 -1
- package/dist/evaluator/LlamaRankingContext.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +1 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/index.d.ts +8 -5
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +40 -2
- package/dist/types.js +7 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.js +8 -9
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/OpenAIFormat.d.ts +177 -0
- package/dist/utils/OpenAIFormat.js +488 -0
- package/dist/utils/OpenAIFormat.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +2 -0
- package/dist/utils/TokenStreamRegulator.js +12 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/getChatWrapperSegmentDefinition.d.ts +2 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js +7 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -0
- package/dist/utils/optionsMatrix.d.ts +58 -0
- package/dist/utils/optionsMatrix.js +97 -0
- package/dist/utils/optionsMatrix.js.map +1 -0
- package/dist/utils/parseModelUri.js +1 -1
- package/dist/utils/parseModelUri.js.map +1 -1
- package/dist/utils/resolveModelFile.js +2 -0
- package/dist/utils/resolveModelFile.js.map +1 -1
- package/llama/addon/AddonContext.cpp +11 -9
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +4 -4
- package/llama/llama.cpp.info.json +2 -2
- package/package.json +48 -45
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
|
+
import { isChatModelResponseFunctionCall, isChatModelResponseSegment, allSegmentTypes } from "../../types.js";
|
|
2
3
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
4
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
4
5
|
import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
|
|
@@ -11,6 +12,8 @@ import { safeEventCallback } from "../../utils/safeEventCallback.js";
|
|
|
11
12
|
import { pushAll } from "../../utils/pushAll.js";
|
|
12
13
|
import { resolveLastTokens } from "../../utils/resolveLastTokens.js";
|
|
13
14
|
import { LlamaSampler } from "../LlamaContext/LlamaSampler.js";
|
|
15
|
+
import { getChatWrapperSegmentDefinition } from "../../utils/getChatWrapperSegmentDefinition.js";
|
|
16
|
+
import { jsonDumps } from "../../chatWrappers/utils/jsonDumps.js";
|
|
14
17
|
import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
|
|
15
18
|
import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
|
|
16
19
|
import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
|
|
@@ -76,11 +79,12 @@ export class LlamaChat {
|
|
|
76
79
|
return this.sequence.model;
|
|
77
80
|
}
|
|
78
81
|
async generateResponse(history, options = {}) {
|
|
79
|
-
const { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
82
|
+
const { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
80
83
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
|
|
81
84
|
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
82
85
|
onTextChunk,
|
|
83
86
|
onToken,
|
|
87
|
+
onResponseChunk,
|
|
84
88
|
signal,
|
|
85
89
|
stopOnAbortSignal,
|
|
86
90
|
maxTokens,
|
|
@@ -110,6 +114,7 @@ export class LlamaChat {
|
|
|
110
114
|
return await withLock(this._chatLock, "evaluate", signal, async () => {
|
|
111
115
|
try {
|
|
112
116
|
generateResponseState.ensureLastHistoryItemIsModel();
|
|
117
|
+
generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
|
|
113
118
|
const loadContextWindow = async (avoidReloadingHistory = false) => {
|
|
114
119
|
await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
|
|
115
120
|
};
|
|
@@ -134,23 +139,25 @@ export class LlamaChat {
|
|
|
134
139
|
await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
|
|
135
140
|
await generateResponseState.createNewEvaluationIterator();
|
|
136
141
|
while (await generateResponseState.iterateEvaluation()) {
|
|
137
|
-
generateResponseState.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
generateResponseState.
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
143
|
+
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
144
|
+
generateResponseState.detectAndHandleFunctionStartSyntax();
|
|
145
|
+
if (generateResponseState.functionEvaluationMode !== false) {
|
|
146
|
+
generateResponseState.canAvoidReloadingHistory = false;
|
|
147
|
+
generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
|
|
148
|
+
const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
|
|
149
|
+
if (functionsCallsRes != null)
|
|
150
|
+
return functionsCallsRes;
|
|
151
|
+
}
|
|
152
|
+
generateResponseState.recordStopGenerationEvaluation();
|
|
153
|
+
generateResponseState.popStreamRegulatorFreeTokens();
|
|
154
|
+
generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
|
|
155
|
+
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
|
|
156
|
+
if (stopGenerationTriggerRes != null)
|
|
157
|
+
return stopGenerationTriggerRes;
|
|
158
|
+
generateResponseState.spliceIgnoreStartTextDetectedTokens();
|
|
159
|
+
generateResponseState.moveFreePendingTokensToRes();
|
|
145
160
|
}
|
|
146
|
-
generateResponseState.recordStopGenerationEvaluation();
|
|
147
|
-
generateResponseState.popStreamRegulatorFreeTokens();
|
|
148
|
-
generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
|
|
149
|
-
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
|
|
150
|
-
if (stopGenerationTriggerRes != null)
|
|
151
|
-
return stopGenerationTriggerRes;
|
|
152
|
-
generateResponseState.spliceIgnoreStartTextDetectedTokens();
|
|
153
|
-
generateResponseState.moveFreePendingTokensToRes();
|
|
154
161
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
|
|
155
162
|
if (maxTokensTriggerRes != null)
|
|
156
163
|
return maxTokensTriggerRes;
|
|
@@ -174,16 +181,20 @@ export class LlamaChat {
|
|
|
174
181
|
}
|
|
175
182
|
async loadChatAndCompleteUserMessage(history, options = {}) {
|
|
176
183
|
const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
|
|
177
|
-
const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
|
|
178
|
-
? null
|
|
179
|
-
: lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
|
|
180
|
-
const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
|
|
181
|
-
? lastEvaluationContextWindowHistoryItem.text
|
|
182
|
-
: "";
|
|
183
184
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize((findLastModelMessageInChatHistory(history)?.response ?? [])
|
|
184
|
-
.
|
|
185
|
+
.map((item) => {
|
|
186
|
+
if (typeof item === "string")
|
|
187
|
+
return item;
|
|
188
|
+
else if (isChatModelResponseFunctionCall(item))
|
|
189
|
+
return null;
|
|
190
|
+
else if (isChatModelResponseSegment(item))
|
|
191
|
+
return item.text;
|
|
192
|
+
void item;
|
|
193
|
+
return null;
|
|
194
|
+
})
|
|
195
|
+
.filter((item) => item != null)
|
|
185
196
|
.join(" ")));
|
|
186
|
-
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
197
|
+
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, mergeGeneratedResultWithChatHistory("user", history, [initialUserPrompt]), {
|
|
187
198
|
onTextChunk,
|
|
188
199
|
onToken,
|
|
189
200
|
signal,
|
|
@@ -204,27 +215,16 @@ export class LlamaChat {
|
|
|
204
215
|
contextShift,
|
|
205
216
|
customStopTriggers,
|
|
206
217
|
lastEvaluationContextWindow: {
|
|
207
|
-
history: lastEvaluationContextWindowHistory
|
|
208
|
-
? undefined
|
|
209
|
-
: setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
|
|
218
|
+
history: mergeGeneratedResultWithChatHistory("user", lastEvaluationContextWindowHistory ?? history, [initialUserPrompt]),
|
|
210
219
|
minimumOverlapPercentageToPreventContextShift
|
|
211
220
|
}
|
|
212
221
|
});
|
|
213
222
|
return await withLock(this._chatLock, "evaluate", signal, async () => {
|
|
214
223
|
try {
|
|
215
224
|
generateResponseState.ensureLastHistoryItemIsUser();
|
|
216
|
-
const getInitialUserMessage = (history) => {
|
|
217
|
-
const lastResolvedHistoryItem = history[history.length - 1];
|
|
218
|
-
if (lastResolvedHistoryItem?.type === "user")
|
|
219
|
-
return lastResolvedHistoryItem.text;
|
|
220
|
-
return "";
|
|
221
|
-
};
|
|
222
|
-
const initialUserMessage = getInitialUserMessage(generateResponseState.resolvedHistory);
|
|
223
|
-
const contextWindowInitialUserMessage = getInitialUserMessage(generateResponseState.lastContextWindowHistory);
|
|
224
225
|
while (true) {
|
|
225
226
|
generateResponseState.startTokenLoop();
|
|
226
|
-
const { userTextSuffix } = await generateResponseState.loadContextWindow(
|
|
227
|
-
this.model.detokenize(generateResponseState.contextWindowsRes)), true);
|
|
227
|
+
const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
|
|
228
228
|
generateResponseState.functionEvaluationMode = false;
|
|
229
229
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
230
230
|
if (userTextSuffix != null && userTextSuffix.values.length > 0)
|
|
@@ -235,7 +235,7 @@ export class LlamaChat {
|
|
|
235
235
|
return {
|
|
236
236
|
completion: "",
|
|
237
237
|
lastEvaluation: {
|
|
238
|
-
contextWindow:
|
|
238
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
239
239
|
contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
|
|
240
240
|
},
|
|
241
241
|
metadata: {
|
|
@@ -245,28 +245,30 @@ export class LlamaChat {
|
|
|
245
245
|
}
|
|
246
246
|
await generateResponseState.createNewEvaluationIterator();
|
|
247
247
|
while (await generateResponseState.iterateEvaluation()) {
|
|
248
|
-
generateResponseState.
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
248
|
+
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
249
|
+
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
250
|
+
generateResponseState.recordStopGenerationEvaluation();
|
|
251
|
+
generateResponseState.popStreamRegulatorFreeTokens();
|
|
252
|
+
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
|
|
253
|
+
if (stopGenerationTriggerRes != null)
|
|
254
|
+
return {
|
|
255
|
+
completion: stopGenerationTriggerRes.response,
|
|
256
|
+
lastEvaluation: {
|
|
257
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
258
|
+
contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
|
|
259
|
+
},
|
|
260
|
+
metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
|
|
261
|
+
? stopGenerationTriggerRes.metadata
|
|
262
|
+
: stopGenerationTriggerRes.metadata
|
|
263
|
+
};
|
|
264
|
+
generateResponseState.moveFreePendingTokensToRes(false);
|
|
265
|
+
}
|
|
264
266
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
|
|
265
267
|
if (maxTokensTriggerRes != null)
|
|
266
268
|
return {
|
|
267
269
|
completion: maxTokensTriggerRes.response,
|
|
268
270
|
lastEvaluation: {
|
|
269
|
-
contextWindow:
|
|
271
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
270
272
|
contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
|
|
271
273
|
},
|
|
272
274
|
metadata: maxTokensTriggerRes.metadata
|
|
@@ -278,7 +280,7 @@ export class LlamaChat {
|
|
|
278
280
|
return {
|
|
279
281
|
completion: abortRes.response,
|
|
280
282
|
lastEvaluation: {
|
|
281
|
-
contextWindow:
|
|
283
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
282
284
|
contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
|
|
283
285
|
},
|
|
284
286
|
metadata: abortRes.metadata
|
|
@@ -303,11 +305,18 @@ function removeRawFromHistoryItem(historyItem) {
|
|
|
303
305
|
newHistoryItem.response = newHistoryItem.response.map((item) => {
|
|
304
306
|
if (typeof item === "string")
|
|
305
307
|
return item;
|
|
306
|
-
else
|
|
308
|
+
else if (isChatModelResponseFunctionCall(item))
|
|
307
309
|
return {
|
|
308
310
|
...item,
|
|
309
311
|
rawCall: undefined
|
|
310
312
|
};
|
|
313
|
+
else if (isChatModelResponseSegment(item))
|
|
314
|
+
return {
|
|
315
|
+
...item,
|
|
316
|
+
raw: undefined
|
|
317
|
+
};
|
|
318
|
+
void item;
|
|
319
|
+
return item;
|
|
311
320
|
});
|
|
312
321
|
return newHistoryItem;
|
|
313
322
|
}
|
|
@@ -370,42 +379,17 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
|
|
|
370
379
|
metadata
|
|
371
380
|
};
|
|
372
381
|
}
|
|
373
|
-
function
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
|
|
379
|
-
return modelResponse[modelResponse.length - 1];
|
|
380
|
-
return "";
|
|
382
|
+
function getLastModelMessageFullResponseFromChatHistory(chatHistory) {
|
|
383
|
+
const lastModelResponseItem = chatHistory.at(-1);
|
|
384
|
+
if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
|
|
385
|
+
return [];
|
|
386
|
+
return lastModelResponseItem.response;
|
|
381
387
|
}
|
|
382
388
|
function getLastUserTextFromChatHistory(chatHistory) {
|
|
383
389
|
if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
|
|
384
390
|
return "";
|
|
385
391
|
return chatHistory[chatHistory.length - 1].text;
|
|
386
392
|
}
|
|
387
|
-
function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
|
|
388
|
-
const newChatHistory = chatHistory.slice();
|
|
389
|
-
if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
|
|
390
|
-
newChatHistory.push({
|
|
391
|
-
type: "model",
|
|
392
|
-
response: []
|
|
393
|
-
});
|
|
394
|
-
const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
|
|
395
|
-
const newLastModelResponseItem = { ...lastModelResponseItem };
|
|
396
|
-
newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
|
|
397
|
-
const modelResponse = newLastModelResponseItem.response.slice();
|
|
398
|
-
newLastModelResponseItem.response = modelResponse;
|
|
399
|
-
if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
|
|
400
|
-
if (textResponse === "")
|
|
401
|
-
modelResponse.pop();
|
|
402
|
-
else
|
|
403
|
-
modelResponse[modelResponse.length - 1] = textResponse;
|
|
404
|
-
}
|
|
405
|
-
else if (textResponse !== "")
|
|
406
|
-
modelResponse.push(textResponse);
|
|
407
|
-
return newChatHistory;
|
|
408
|
-
}
|
|
409
393
|
function setLastUserTextInChatHistory(chatHistory, userText) {
|
|
410
394
|
const newChatHistory = chatHistory.slice();
|
|
411
395
|
if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
|
|
@@ -419,11 +403,73 @@ function setLastUserTextInChatHistory(chatHistory, userText) {
|
|
|
419
403
|
newLastUserItem.text = userText;
|
|
420
404
|
return newChatHistory;
|
|
421
405
|
}
|
|
422
|
-
function
|
|
423
|
-
if (
|
|
424
|
-
return
|
|
425
|
-
|
|
426
|
-
|
|
406
|
+
function mergeGeneratedResultWithChatHistory(itemType, chatHistory, generatedResult) {
|
|
407
|
+
if (generatedResult.length === 0 || (generatedResult.length === 1 && generatedResult[0] === ""))
|
|
408
|
+
return chatHistory;
|
|
409
|
+
const newChatHistory = chatHistory.slice();
|
|
410
|
+
if (itemType === "user") {
|
|
411
|
+
let lastUserItem = newChatHistory.at(-1);
|
|
412
|
+
if (lastUserItem?.type !== "user") {
|
|
413
|
+
lastUserItem = {
|
|
414
|
+
type: "user",
|
|
415
|
+
text: ""
|
|
416
|
+
};
|
|
417
|
+
newChatHistory.push(lastUserItem);
|
|
418
|
+
}
|
|
419
|
+
const newLastUserItem = { ...lastUserItem };
|
|
420
|
+
newChatHistory[newChatHistory.length - 1] = newLastUserItem;
|
|
421
|
+
newLastUserItem.text += generatedResult
|
|
422
|
+
.map((item) => {
|
|
423
|
+
if (typeof item === "string")
|
|
424
|
+
return item;
|
|
425
|
+
return item.text;
|
|
426
|
+
})
|
|
427
|
+
.join("");
|
|
428
|
+
return newChatHistory;
|
|
429
|
+
}
|
|
430
|
+
else {
|
|
431
|
+
let lastModelItem = newChatHistory.at(-1);
|
|
432
|
+
if (lastModelItem?.type !== "model") {
|
|
433
|
+
lastModelItem = {
|
|
434
|
+
type: "model",
|
|
435
|
+
response: []
|
|
436
|
+
};
|
|
437
|
+
newChatHistory.push(lastModelItem);
|
|
438
|
+
}
|
|
439
|
+
const newLastModelItem = { ...lastModelItem };
|
|
440
|
+
newChatHistory[newChatHistory.length - 1] = newLastModelItem;
|
|
441
|
+
const modelResponse = newLastModelItem.response.slice();
|
|
442
|
+
newLastModelItem.response = modelResponse;
|
|
443
|
+
const firstGeneratedResultItem = generatedResult[0];
|
|
444
|
+
if (firstGeneratedResultItem == null)
|
|
445
|
+
return newChatHistory;
|
|
446
|
+
const lastModelResponseItem = modelResponse.at(-1);
|
|
447
|
+
if (typeof firstGeneratedResultItem === "string" && typeof lastModelResponseItem === "string") {
|
|
448
|
+
modelResponse[modelResponse.length - 1] = lastModelResponseItem + firstGeneratedResultItem;
|
|
449
|
+
}
|
|
450
|
+
else if (typeof firstGeneratedResultItem !== "string" && isChatModelResponseSegment(firstGeneratedResultItem) &&
|
|
451
|
+
typeof lastModelResponseItem !== "string" && isChatModelResponseSegment(lastModelResponseItem) &&
|
|
452
|
+
!lastModelResponseItem.ended && lastModelResponseItem.segmentType === firstGeneratedResultItem.segmentType) {
|
|
453
|
+
modelResponse[modelResponse.length - 1] = {
|
|
454
|
+
...lastModelResponseItem,
|
|
455
|
+
...firstGeneratedResultItem,
|
|
456
|
+
text: lastModelResponseItem.text + firstGeneratedResultItem.text,
|
|
457
|
+
ended: firstGeneratedResultItem.ended,
|
|
458
|
+
raw: (lastModelResponseItem.raw != null && firstGeneratedResultItem.raw != null)
|
|
459
|
+
? LlamaText([
|
|
460
|
+
LlamaText.fromJSON(lastModelResponseItem.raw),
|
|
461
|
+
LlamaText.fromJSON(firstGeneratedResultItem.raw)
|
|
462
|
+
]).toJSON()
|
|
463
|
+
: undefined,
|
|
464
|
+
startTime: lastModelResponseItem.startTime,
|
|
465
|
+
endTime: firstGeneratedResultItem.endTime
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
else
|
|
469
|
+
modelResponse.push(firstGeneratedResultItem);
|
|
470
|
+
pushAll(modelResponse, generatedResult.slice(1));
|
|
471
|
+
return newChatHistory;
|
|
472
|
+
}
|
|
427
473
|
}
|
|
428
474
|
function findLastUserMessageInChatHistory(chatHistory) {
|
|
429
475
|
for (let i = chatHistory.length - 1; i >= 0; i--) {
|
|
@@ -486,6 +532,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
486
532
|
throw new DisposedError();
|
|
487
533
|
const model = sequence.model;
|
|
488
534
|
const context = sequence.context;
|
|
535
|
+
let removeRawFromHistory = false;
|
|
489
536
|
if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
490
537
|
const newContextWindow = lastEvaluationContextWindowHistory.slice();
|
|
491
538
|
if (endWithUserText) {
|
|
@@ -514,7 +561,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
514
561
|
history: newContextWindow,
|
|
515
562
|
stopGenerationTriggers,
|
|
516
563
|
tokens,
|
|
517
|
-
|
|
564
|
+
removeRawFromHistory,
|
|
518
565
|
newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
|
|
519
566
|
ignoreStartText: ignoreStartText ?? [],
|
|
520
567
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -523,9 +570,10 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
523
570
|
};
|
|
524
571
|
}
|
|
525
572
|
}
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
573
|
+
removeRawFromHistory = !sequence.isLoadedToMemory;
|
|
574
|
+
resolvedHistory = removeRawFromHistory
|
|
575
|
+
? resolvedHistory.map(removeRawFromHistoryItem)
|
|
576
|
+
: resolvedHistory.slice();
|
|
529
577
|
if (resolvedContextShift.lastEvaluationMetadata != null) {
|
|
530
578
|
const contextShiftSize = resolvedContextShift.size instanceof Function
|
|
531
579
|
? await resolvedContextShift.size(sequence)
|
|
@@ -550,7 +598,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
550
598
|
history: compressedHistory,
|
|
551
599
|
stopGenerationTriggers,
|
|
552
600
|
tokens: contextText.tokenize(model.tokenizer),
|
|
553
|
-
|
|
601
|
+
removeRawFromHistory,
|
|
554
602
|
newHistoryCompressionMetadata: metadata,
|
|
555
603
|
ignoreStartText: ignoreStartText ?? [],
|
|
556
604
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -570,7 +618,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
570
618
|
history: resolvedHistory,
|
|
571
619
|
stopGenerationTriggers,
|
|
572
620
|
tokens,
|
|
573
|
-
|
|
621
|
+
removeRawFromHistory,
|
|
574
622
|
newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
|
|
575
623
|
ignoreStartText: ignoreStartText ?? [],
|
|
576
624
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -601,7 +649,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
601
649
|
history: compressedHistory,
|
|
602
650
|
stopGenerationTriggers,
|
|
603
651
|
tokens: contextText.tokenize(model.tokenizer),
|
|
604
|
-
|
|
652
|
+
removeRawFromHistory,
|
|
605
653
|
newHistoryCompressionMetadata: metadata,
|
|
606
654
|
ignoreStartText: ignoreStartText ?? [],
|
|
607
655
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -615,6 +663,7 @@ class GenerateResponseState {
|
|
|
615
663
|
history;
|
|
616
664
|
onTextChunk;
|
|
617
665
|
onToken;
|
|
666
|
+
onResponseChunk;
|
|
618
667
|
signal;
|
|
619
668
|
stopOnAbortSignal;
|
|
620
669
|
maxTokens;
|
|
@@ -638,7 +687,6 @@ class GenerateResponseState {
|
|
|
638
687
|
repeatPenaltyEnabled;
|
|
639
688
|
resolvedContextShift;
|
|
640
689
|
resolvedRepeatPenalty;
|
|
641
|
-
lastModelResponse;
|
|
642
690
|
grammarEvaluationState;
|
|
643
691
|
functionNameGrammar;
|
|
644
692
|
functionsGrammar;
|
|
@@ -651,10 +699,13 @@ class GenerateResponseState {
|
|
|
651
699
|
ignoreStartTextDetector = new StopGenerationDetector();
|
|
652
700
|
locksToReleaseOnValidGeneration = [];
|
|
653
701
|
resolvedHistory;
|
|
702
|
+
noRawInResolvedHistory;
|
|
654
703
|
res = [];
|
|
655
704
|
pendingTokens = [];
|
|
656
705
|
ignoredStartTextTokens = [];
|
|
657
706
|
resFunctionCalls = [];
|
|
707
|
+
segmentHandler;
|
|
708
|
+
pendingPartialTokens = [];
|
|
658
709
|
functionEvaluationMode = false;
|
|
659
710
|
currentFunctionCallPreviousText = LlamaText([]);
|
|
660
711
|
currentFunctionCallCurrentPartTokens = [];
|
|
@@ -678,8 +729,6 @@ class GenerateResponseState {
|
|
|
678
729
|
disengageInitiallyEngagedFunctionCall = [];
|
|
679
730
|
userTextSuffix = undefined;
|
|
680
731
|
tokens = [];
|
|
681
|
-
contextWindowLastModelResponse = "";
|
|
682
|
-
contextWindowsRes = [];
|
|
683
732
|
// token evaluation loop
|
|
684
733
|
evaluationIterator;
|
|
685
734
|
currentIteration;
|
|
@@ -688,12 +737,13 @@ class GenerateResponseState {
|
|
|
688
737
|
currentTokens = [];
|
|
689
738
|
currentText = "";
|
|
690
739
|
currentQueuedTokenRelease;
|
|
691
|
-
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
740
|
+
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
692
741
|
this.llamaChat = llamaChat;
|
|
693
742
|
this.chatWrapper = chatWrapper;
|
|
694
743
|
this.history = history;
|
|
695
744
|
this.onTextChunk = safeEventCallback(onTextChunk);
|
|
696
745
|
this.onToken = safeEventCallback(onToken);
|
|
746
|
+
this.onResponseChunk = safeEventCallback(onResponseChunk);
|
|
697
747
|
this.signal = signal;
|
|
698
748
|
this.stopOnAbortSignal = stopOnAbortSignal;
|
|
699
749
|
this.maxTokens = maxTokens;
|
|
@@ -718,9 +768,10 @@ class GenerateResponseState {
|
|
|
718
768
|
throw this.signal.reason;
|
|
719
769
|
if (this.llamaChat.disposed)
|
|
720
770
|
throw new DisposedError();
|
|
721
|
-
this.
|
|
722
|
-
|
|
723
|
-
|
|
771
|
+
this.noRawInResolvedHistory = !this.llamaChat.sequence.isLoadedToMemory;
|
|
772
|
+
this.resolvedHistory = this.noRawInResolvedHistory
|
|
773
|
+
? this.history.map(removeRawFromHistoryItem)
|
|
774
|
+
: this.history.slice();
|
|
724
775
|
this.resolvedContextShift = {
|
|
725
776
|
...defaultContextShiftOptions,
|
|
726
777
|
...removeNullFields(this.contextShift)
|
|
@@ -731,7 +782,6 @@ class GenerateResponseState {
|
|
|
731
782
|
...(repeatPenalty ?? {}),
|
|
732
783
|
lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
|
|
733
784
|
};
|
|
734
|
-
this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
|
|
735
785
|
this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
|
|
736
786
|
this.grammarEvaluationState = this.grammar != null
|
|
737
787
|
? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
|
|
@@ -742,7 +792,7 @@ class GenerateResponseState {
|
|
|
742
792
|
this.functionsGrammar = undefined;
|
|
743
793
|
this.functionsEvaluationState = undefined;
|
|
744
794
|
this.lastContextWindowHistory = lastEvaluationContextWindowHistory ?? this.resolvedHistory;
|
|
745
|
-
this.lastHistoryCompressionMetadata = this.resolvedContextShift;
|
|
795
|
+
this.lastHistoryCompressionMetadata = this.resolvedContextShift.lastEvaluationMetadata;
|
|
746
796
|
if (this.customStopTriggers != null)
|
|
747
797
|
StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
|
|
748
798
|
.map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
@@ -754,6 +804,22 @@ class GenerateResponseState {
|
|
|
754
804
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
755
805
|
this.chatWrapper.settings.functions.call.prefix
|
|
756
806
|
]), this.llamaChat.model.tokenizer));
|
|
807
|
+
const segmentDefinitions = new Map();
|
|
808
|
+
for (const segmentType of allSegmentTypes) {
|
|
809
|
+
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
810
|
+
if (segmentDefinition != null)
|
|
811
|
+
segmentDefinitions.set(segmentType, segmentDefinition);
|
|
812
|
+
}
|
|
813
|
+
this.segmentHandler = new SegmentHandler({
|
|
814
|
+
model: this.llamaChat.model,
|
|
815
|
+
onTextChunk: this.onTextChunk,
|
|
816
|
+
onToken: this.onToken,
|
|
817
|
+
onResponseChunk: this.onResponseChunk,
|
|
818
|
+
previousTokens: this.getLastTokens(),
|
|
819
|
+
closeAllSegments: this.chatWrapper.settings.segments?.closeAllSegments,
|
|
820
|
+
segmentDefinitions,
|
|
821
|
+
initialSegmentStack: SegmentHandler.getStackFromModelResponse(getLastModelMessageFullResponseFromChatHistory(this.resolvedHistory))
|
|
822
|
+
});
|
|
757
823
|
this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
|
|
758
824
|
}
|
|
759
825
|
async dispose() {
|
|
@@ -763,19 +829,47 @@ class GenerateResponseState {
|
|
|
763
829
|
await this.dispose();
|
|
764
830
|
}
|
|
765
831
|
ensureLastHistoryItemIsModel() {
|
|
766
|
-
if (this.resolvedHistory.
|
|
832
|
+
if (this.resolvedHistory.at(-1)?.type !== "model")
|
|
767
833
|
this.resolvedHistory.push({
|
|
768
834
|
type: "model",
|
|
769
835
|
response: []
|
|
770
836
|
});
|
|
771
837
|
}
|
|
772
838
|
ensureLastHistoryItemIsUser() {
|
|
773
|
-
if (this.resolvedHistory.
|
|
839
|
+
if (this.resolvedHistory.at(-1)?.type !== "user")
|
|
774
840
|
this.resolvedHistory.push({
|
|
775
841
|
type: "user",
|
|
776
842
|
text: ""
|
|
777
843
|
});
|
|
778
844
|
}
|
|
845
|
+
ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded() {
|
|
846
|
+
if (this.chatWrapper.settings.segments?.thought?.reopenAfterFunctionCalls !== true)
|
|
847
|
+
return;
|
|
848
|
+
const lastModelResponseItem = this.resolvedHistory.at(-1);
|
|
849
|
+
if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
|
|
850
|
+
return;
|
|
851
|
+
const lastResponse = lastModelResponseItem.response.at(-1);
|
|
852
|
+
if (lastResponse == null)
|
|
853
|
+
return;
|
|
854
|
+
const lastResponseIsFunctionCall = typeof lastResponse !== "string" && lastResponse.type === "functionCall";
|
|
855
|
+
if (!lastResponseIsFunctionCall)
|
|
856
|
+
return;
|
|
857
|
+
const currentResponseSegmentsStack = SegmentHandler.getStackFromModelResponse(lastModelResponseItem.response);
|
|
858
|
+
if (currentResponseSegmentsStack.includes("thought"))
|
|
859
|
+
return;
|
|
860
|
+
const hadThoughtSegments = this.resolvedHistory.some((chatItem) => {
|
|
861
|
+
if (chatItem.type !== "model")
|
|
862
|
+
return false;
|
|
863
|
+
return chatItem.response.some((responseItem) => {
|
|
864
|
+
if (typeof responseItem === "string")
|
|
865
|
+
return false;
|
|
866
|
+
return responseItem.type === "segment" && responseItem.segmentType === "thought";
|
|
867
|
+
});
|
|
868
|
+
});
|
|
869
|
+
if (!hadThoughtSegments)
|
|
870
|
+
return;
|
|
871
|
+
this.segmentHandler.openSegment("thought");
|
|
872
|
+
}
|
|
779
873
|
ensureNotAborted() {
|
|
780
874
|
if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
|
|
781
875
|
throw this.signal.reason;
|
|
@@ -784,7 +878,7 @@ class GenerateResponseState {
|
|
|
784
878
|
}
|
|
785
879
|
getPenaltyTokens() {
|
|
786
880
|
if (this.llamaChat.disposed)
|
|
787
|
-
|
|
881
|
+
return [];
|
|
788
882
|
let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
|
|
789
883
|
if (this.resolvedRepeatPenalty.punishTokensFilter != null)
|
|
790
884
|
punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
|
|
@@ -796,24 +890,10 @@ class GenerateResponseState {
|
|
|
796
890
|
return punishTokens;
|
|
797
891
|
}
|
|
798
892
|
getResolvedHistoryWithCurrentModelResponse() {
|
|
799
|
-
|
|
800
|
-
return this.resolvedHistory;
|
|
801
|
-
let modelResponse = this.llamaChat.model.detokenize(this.res);
|
|
802
|
-
if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
|
|
803
|
-
modelResponse = modelResponse.trimEnd();
|
|
804
|
-
if (modelResponse === "")
|
|
805
|
-
return this.resolvedHistory;
|
|
806
|
-
return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
|
|
893
|
+
return mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, this.segmentHandler.getModelResponseSegments());
|
|
807
894
|
}
|
|
808
895
|
getContextWindowsHistoryWithCurrentModelResponse() {
|
|
809
|
-
|
|
810
|
-
return this.lastContextWindowHistory;
|
|
811
|
-
let modelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
|
|
812
|
-
if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
|
|
813
|
-
modelResponse = modelResponse.trimEnd();
|
|
814
|
-
if (modelResponse === "")
|
|
815
|
-
return this.lastContextWindowHistory;
|
|
816
|
-
return setLastModelTextResponseInChatHistory(this.lastContextWindowHistory, this.contextWindowLastModelResponse + modelResponse);
|
|
896
|
+
return mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments());
|
|
817
897
|
}
|
|
818
898
|
removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
|
|
819
899
|
if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
|
|
@@ -826,14 +906,26 @@ class GenerateResponseState {
|
|
|
826
906
|
this.contextWindowTokens,
|
|
827
907
|
this.ignoredStartTextTokens
|
|
828
908
|
]);
|
|
909
|
+
const pendingPartialTokens = [];
|
|
829
910
|
for (let i = 0; i < this.pendingTokens.length; i++) {
|
|
911
|
+
const currentToken = this.pendingTokens[i];
|
|
912
|
+
const tokens = [...pendingPartialTokens, currentToken];
|
|
913
|
+
const text = this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer);
|
|
914
|
+
if (pendingPartialTokens.length === 0 &&
|
|
915
|
+
text.endsWith(UNKNOWN_UNICODE_CHAR) &&
|
|
916
|
+
!this.llamaChat.model.isSpecialToken(currentToken) &&
|
|
917
|
+
!this.llamaChat.model.isEogToken(currentToken)) {
|
|
918
|
+
pendingPartialTokens.length = 0;
|
|
919
|
+
pushAll(pendingPartialTokens, tokens);
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
830
922
|
this.ignoreStartTextDetector.recordGeneration({
|
|
831
|
-
text: this.llamaChat.model.detokenize(
|
|
832
|
-
tokens
|
|
923
|
+
text: this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer),
|
|
924
|
+
tokens,
|
|
833
925
|
startNewChecks: i === 0,
|
|
834
926
|
triggerMustStartWithGeneration: true
|
|
835
927
|
});
|
|
836
|
-
lastTokensForDetokenizer
|
|
928
|
+
pushAll(lastTokensForDetokenizer, tokens);
|
|
837
929
|
if (this.ignoreStartTextDetector.hasTriggeredStops) {
|
|
838
930
|
mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
|
|
839
931
|
this.ignoreStartTextDetector.clearTriggeredStops();
|
|
@@ -902,11 +994,12 @@ class GenerateResponseState {
|
|
|
902
994
|
const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
|
|
903
995
|
const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
|
|
904
996
|
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
905
|
-
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens,
|
|
997
|
+
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
|
|
906
998
|
resolvedHistory: resolvedHistory,
|
|
907
999
|
resolvedContextShift: this.resolvedContextShift,
|
|
908
1000
|
lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
|
|
909
|
-
pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length
|
|
1001
|
+
pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
|
|
1002
|
+
this.pendingPartialTokens.length,
|
|
910
1003
|
isFirstEvaluation: this.isFirstEvaluation,
|
|
911
1004
|
chatWrapper: this.chatWrapper,
|
|
912
1005
|
lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
|
|
@@ -924,19 +1017,22 @@ class GenerateResponseState {
|
|
|
924
1017
|
this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
|
|
925
1018
|
this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
|
|
926
1019
|
this.userTextSuffix = userTextSuffix;
|
|
927
|
-
this.resolvedHistory = newResolvedHistory;
|
|
928
1020
|
this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
|
|
929
1021
|
this.lastContextWindowHistory = contextWindowHistory;
|
|
930
|
-
this.
|
|
931
|
-
this.contextWindowsRes = [];
|
|
1022
|
+
this.segmentHandler.resetContextWindow();
|
|
932
1023
|
this.canAvoidReloadingHistory = true;
|
|
1024
|
+
if (removeRawFromHistory && !this.noRawInResolvedHistory) {
|
|
1025
|
+
this.noRawInResolvedHistory = true;
|
|
1026
|
+
this.resolvedHistory = this.resolvedHistory.map(removeRawFromHistoryItem);
|
|
1027
|
+
}
|
|
933
1028
|
}
|
|
934
1029
|
this.tokens = [
|
|
935
1030
|
...this.contextWindowTokens,
|
|
936
1031
|
...this.ignoredStartTextTokens,
|
|
937
1032
|
...this.pendingTokens,
|
|
938
1033
|
...queuedChunkTokens,
|
|
939
|
-
...functionCallsTokens
|
|
1034
|
+
...functionCallsTokens,
|
|
1035
|
+
...this.pendingPartialTokens
|
|
940
1036
|
];
|
|
941
1037
|
if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
|
|
942
1038
|
return await this.loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText, false);
|
|
@@ -1017,24 +1113,24 @@ class GenerateResponseState {
|
|
|
1017
1113
|
pushAll(prefixDetectorRecordedTokens, tokens);
|
|
1018
1114
|
}
|
|
1019
1115
|
}
|
|
1020
|
-
for await (const
|
|
1116
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1021
1117
|
const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
|
|
1022
1118
|
if (stopGenerationTriggerRes != null)
|
|
1023
1119
|
return stopGenerationTriggerRes;
|
|
1024
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1120
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1025
1121
|
this.disengageInitiallyEngagedFunctionMode.recordGeneration({
|
|
1026
1122
|
text: this.currentText,
|
|
1027
1123
|
tokens: this.currentTokens,
|
|
1028
|
-
startNewChecks: this.currentFunctionCallCurrentPartTokens.length ===
|
|
1124
|
+
startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
|
|
1029
1125
|
triggerMustStartWithGeneration: true
|
|
1030
1126
|
});
|
|
1031
1127
|
if (prefixDetector.hasTriggeredStops)
|
|
1032
|
-
afterPrefixLeftoverTokens
|
|
1128
|
+
pushAll(afterPrefixLeftoverTokens, tokens);
|
|
1033
1129
|
else {
|
|
1034
1130
|
prefixDetector.recordGeneration({
|
|
1035
1131
|
text: this.currentText,
|
|
1036
1132
|
tokens: this.currentTokens,
|
|
1037
|
-
startNewChecks: this.currentFunctionCallCurrentPartTokens.length ===
|
|
1133
|
+
startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
|
|
1038
1134
|
triggerMustStartWithGeneration: true
|
|
1039
1135
|
});
|
|
1040
1136
|
pushAll(prefixDetectorRecordedTokens, this.currentTokens);
|
|
@@ -1109,8 +1205,8 @@ class GenerateResponseState {
|
|
|
1109
1205
|
}
|
|
1110
1206
|
}
|
|
1111
1207
|
}
|
|
1112
|
-
for await (const
|
|
1113
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1208
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1209
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1114
1210
|
functionNameGenerationDoneDetector.recordGeneration({
|
|
1115
1211
|
text: this.currentText,
|
|
1116
1212
|
tokens: this.currentTokens
|
|
@@ -1141,8 +1237,16 @@ class GenerateResponseState {
|
|
|
1141
1237
|
if (functionDefinition == null)
|
|
1142
1238
|
throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
|
|
1143
1239
|
else if (functionDefinition.params == null) {
|
|
1144
|
-
|
|
1145
|
-
|
|
1240
|
+
const emptyCallParamsPlaceholder = this.chatWrapper.settings?.functions?.call?.emptyCallParamsPlaceholder;
|
|
1241
|
+
if (emptyCallParamsPlaceholder !== undefined && emptyCallParamsPlaceholder !== "") {
|
|
1242
|
+
params = structuredClone(emptyCallParamsPlaceholder);
|
|
1243
|
+
paramsText = jsonDumps(params);
|
|
1244
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, this.llamaChat.model.tokenize(paramsText));
|
|
1245
|
+
}
|
|
1246
|
+
else {
|
|
1247
|
+
params = undefined;
|
|
1248
|
+
paramsText = "";
|
|
1249
|
+
}
|
|
1146
1250
|
}
|
|
1147
1251
|
else {
|
|
1148
1252
|
const functionParamsGenerationDoneDetector = new StopGenerationDetector();
|
|
@@ -1154,8 +1258,8 @@ class GenerateResponseState {
|
|
|
1154
1258
|
});
|
|
1155
1259
|
StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
|
|
1156
1260
|
.map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
|
|
1157
|
-
for await (const
|
|
1158
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1261
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1262
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1159
1263
|
functionParamsGenerationDoneDetector.recordGeneration({
|
|
1160
1264
|
text: this.currentText,
|
|
1161
1265
|
tokens: this.currentTokens
|
|
@@ -1213,8 +1317,8 @@ class GenerateResponseState {
|
|
|
1213
1317
|
LlamaText(new SpecialToken("EOT"))
|
|
1214
1318
|
], this.llamaChat.model.tokenizer)
|
|
1215
1319
|
.map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
|
|
1216
|
-
for await (const
|
|
1217
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1320
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1321
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1218
1322
|
sectionSuffixDetector.recordGeneration({
|
|
1219
1323
|
text: this.currentText,
|
|
1220
1324
|
tokens: this.currentTokens,
|
|
@@ -1258,17 +1362,17 @@ class GenerateResponseState {
|
|
|
1258
1362
|
returnFunctionCallResults() {
|
|
1259
1363
|
if (this.resFunctionCalls.length > 0) {
|
|
1260
1364
|
this.releasePartiallyFreeTokensBeforeFunctionCallStart();
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
modelResponse = modelResponse.trimEnd();
|
|
1265
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1266
|
-
}
|
|
1365
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1366
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1367
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1267
1368
|
return {
|
|
1268
|
-
response:
|
|
1369
|
+
response: responseSegments
|
|
1370
|
+
.filter((segment) => typeof segment === "string")
|
|
1371
|
+
.join(""),
|
|
1372
|
+
fullResponse: responseSegments,
|
|
1269
1373
|
lastEvaluation: {
|
|
1270
|
-
contextWindow:
|
|
1271
|
-
cleanHistory:
|
|
1374
|
+
contextWindow: mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1375
|
+
cleanHistory: mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, responseSegments),
|
|
1272
1376
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1273
1377
|
},
|
|
1274
1378
|
functionCalls: this.resFunctionCalls.map((functionCall) => {
|
|
@@ -1292,9 +1396,10 @@ class GenerateResponseState {
|
|
|
1292
1396
|
await this.alignCurrentSequenceStateWithCurrentTokens();
|
|
1293
1397
|
await this.createNewEvaluationIterator();
|
|
1294
1398
|
while (await this.iterateEvaluation()) {
|
|
1295
|
-
if (this.
|
|
1399
|
+
if (this.currentTokens.length === 0)
|
|
1296
1400
|
break;
|
|
1297
|
-
|
|
1401
|
+
if (!this.holdPartialTokensForNextEvaluation())
|
|
1402
|
+
yield this.currentTokens;
|
|
1298
1403
|
if (this.shouldAbort)
|
|
1299
1404
|
return;
|
|
1300
1405
|
if (this.updateShouldContextShift())
|
|
@@ -1367,9 +1472,14 @@ class GenerateResponseState {
|
|
|
1367
1472
|
this.currentIterationReplacementToken = undefined;
|
|
1368
1473
|
this.ensureNotAborted();
|
|
1369
1474
|
this.generatedTokens++;
|
|
1370
|
-
if (this.currentIteration != null && this.currentIteration?.done !== true) {
|
|
1371
|
-
this.currentToken = this.currentIteration
|
|
1372
|
-
this.currentTokens =
|
|
1475
|
+
if ((this.currentIteration != null && this.currentIteration?.done !== true) || this.pendingPartialTokens.length !== 0) {
|
|
1476
|
+
this.currentToken = this.currentIteration?.value ?? undefined;
|
|
1477
|
+
this.currentTokens = this.currentToken != null
|
|
1478
|
+
? this.pendingPartialTokens.length === 0
|
|
1479
|
+
? [this.currentToken]
|
|
1480
|
+
: [...this.pendingPartialTokens, this.currentToken]
|
|
1481
|
+
: [...this.pendingPartialTokens];
|
|
1482
|
+
this.pendingPartialTokens.length = 0;
|
|
1373
1483
|
this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, this.getLastTokens());
|
|
1374
1484
|
if (this.functionEvaluationMode === false)
|
|
1375
1485
|
this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
|
|
@@ -1382,6 +1492,19 @@ class GenerateResponseState {
|
|
|
1382
1492
|
}
|
|
1383
1493
|
return false;
|
|
1384
1494
|
}
|
|
1495
|
+
holdPartialTokensForNextEvaluation() {
|
|
1496
|
+
if (this.pendingPartialTokens.length === 0 &&
|
|
1497
|
+
this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) &&
|
|
1498
|
+
this.currentToken != null &&
|
|
1499
|
+
!this.llamaChat.model.isSpecialToken(this.currentToken) &&
|
|
1500
|
+
!this.llamaChat.model.isEogToken(this.currentToken)) {
|
|
1501
|
+
this.pendingPartialTokens.length = 0;
|
|
1502
|
+
pushAll(this.pendingPartialTokens, this.currentTokens);
|
|
1503
|
+
this.streamRegulator.removeChunkIfLast(this.currentQueuedTokenRelease);
|
|
1504
|
+
return true;
|
|
1505
|
+
}
|
|
1506
|
+
return false;
|
|
1507
|
+
}
|
|
1385
1508
|
waitOnPartialCharactersOrWhiteSpaceTokens() {
|
|
1386
1509
|
if (this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "") || (this.currentText === "" && this.locksToReleaseOnValidGeneration.length > 0 &&
|
|
1387
1510
|
!this.llamaChat.model.isSpecialToken(this.currentToken))) {
|
|
@@ -1449,21 +1572,22 @@ class GenerateResponseState {
|
|
|
1449
1572
|
const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
|
|
1450
1573
|
this.removeFoundStartIgnoreTextsFromPendingTokens(true);
|
|
1451
1574
|
this.pushPendingTokensAndCallOnToken();
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1575
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1576
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1577
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1578
|
+
const response = responseSegments
|
|
1579
|
+
.filter((segment) => typeof segment === "string")
|
|
1580
|
+
.join("");
|
|
1458
1581
|
const lastEvaluation = {
|
|
1459
|
-
contextWindow:
|
|
1460
|
-
cleanHistory:
|
|
1582
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1583
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1461
1584
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1462
1585
|
};
|
|
1463
1586
|
const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
|
|
1464
1587
|
if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
|
|
1465
1588
|
return {
|
|
1466
|
-
response
|
|
1589
|
+
response,
|
|
1590
|
+
fullResponse: responseSegments,
|
|
1467
1591
|
lastEvaluation,
|
|
1468
1592
|
metadata: {
|
|
1469
1593
|
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
@@ -1474,7 +1598,8 @@ class GenerateResponseState {
|
|
|
1474
1598
|
};
|
|
1475
1599
|
}
|
|
1476
1600
|
return {
|
|
1477
|
-
response
|
|
1601
|
+
response,
|
|
1602
|
+
fullResponse: responseSegments,
|
|
1478
1603
|
lastEvaluation,
|
|
1479
1604
|
metadata: {
|
|
1480
1605
|
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
@@ -1511,17 +1636,17 @@ class GenerateResponseState {
|
|
|
1511
1636
|
}
|
|
1512
1637
|
handleMaxTokensTrigger(lastHistoryItemType) {
|
|
1513
1638
|
if (this.isMaxTokensTriggered()) {
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
modelResponse = modelResponse.trimEnd();
|
|
1518
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1519
|
-
}
|
|
1639
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1640
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1641
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1520
1642
|
return {
|
|
1521
|
-
response:
|
|
1643
|
+
response: responseSegments
|
|
1644
|
+
.filter((segment) => typeof segment === "string")
|
|
1645
|
+
.join(""),
|
|
1646
|
+
fullResponse: responseSegments,
|
|
1522
1647
|
lastEvaluation: {
|
|
1523
|
-
contextWindow:
|
|
1524
|
-
cleanHistory:
|
|
1648
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1649
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1525
1650
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1526
1651
|
},
|
|
1527
1652
|
metadata: {
|
|
@@ -1542,17 +1667,17 @@ class GenerateResponseState {
|
|
|
1542
1667
|
if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
|
|
1543
1668
|
if (this.res.length === 0)
|
|
1544
1669
|
throw this.signal.reason;
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
modelResponse = modelResponse.trimEnd();
|
|
1549
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1550
|
-
}
|
|
1670
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1671
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1672
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1551
1673
|
return {
|
|
1552
|
-
response:
|
|
1674
|
+
response: responseSegments
|
|
1675
|
+
.filter((segment) => typeof segment === "string")
|
|
1676
|
+
.join(""),
|
|
1677
|
+
fullResponse: responseSegments,
|
|
1553
1678
|
lastEvaluation: {
|
|
1554
|
-
contextWindow:
|
|
1555
|
-
cleanHistory:
|
|
1679
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1680
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1556
1681
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1557
1682
|
},
|
|
1558
1683
|
metadata: {
|
|
@@ -1565,10 +1690,8 @@ class GenerateResponseState {
|
|
|
1565
1690
|
pushPendingTokensAndCallOnToken() {
|
|
1566
1691
|
if (this.pendingTokens.length === 0)
|
|
1567
1692
|
return;
|
|
1568
|
-
this.
|
|
1569
|
-
this.onTextChunk?.(this.llamaChat.model.detokenize(this.pendingTokens, false, this.res));
|
|
1693
|
+
this.segmentHandler.processTokens(this.pendingTokens);
|
|
1570
1694
|
pushAll(this.res, this.pendingTokens);
|
|
1571
|
-
pushAll(this.contextWindowsRes, this.pendingTokens);
|
|
1572
1695
|
this.pendingTokens.length = 0;
|
|
1573
1696
|
}
|
|
1574
1697
|
getLastTokens(maxTokens = maxRecentDetokenizerTokens) {
|
|
@@ -1577,8 +1700,470 @@ class GenerateResponseState {
|
|
|
1577
1700
|
this.ignoredStartTextTokens,
|
|
1578
1701
|
this.pendingTokens,
|
|
1579
1702
|
this.streamRegulator.getLastQueuedChunkTokens(maxTokens),
|
|
1580
|
-
this.getContextWindowFunctionCallsTokens()
|
|
1703
|
+
this.getContextWindowFunctionCallsTokens(),
|
|
1704
|
+
this.pendingPartialTokens
|
|
1581
1705
|
], maxTokens);
|
|
1582
1706
|
}
|
|
1583
1707
|
}
|
|
1708
|
+
class SegmentHandler {
|
|
1709
|
+
model;
|
|
1710
|
+
onToken;
|
|
1711
|
+
onTextChunk;
|
|
1712
|
+
onResponseChunk;
|
|
1713
|
+
_closeAllSegmentsDetector;
|
|
1714
|
+
_segmentDetectors;
|
|
1715
|
+
_segmentsStack = [];
|
|
1716
|
+
_segmentsStackSet = new Set();
|
|
1717
|
+
_ownedSegmentsStackLength = 0;
|
|
1718
|
+
_segments = [];
|
|
1719
|
+
_segmentsStartTokenTrail = [];
|
|
1720
|
+
_contextWindowSegments = [];
|
|
1721
|
+
_contextWindowStartTokenTrail = [];
|
|
1722
|
+
_initialTokensTrail;
|
|
1723
|
+
_tokensTrail;
|
|
1724
|
+
_streamRegulator = new TokenStreamRegulator();
|
|
1725
|
+
_segmentDefinitions;
|
|
1726
|
+
constructor({ model, onTextChunk, onToken, onResponseChunk, segmentDefinitions, closeAllSegments, initialSegmentStack, previousTokens }) {
|
|
1727
|
+
this.model = model;
|
|
1728
|
+
this.onTextChunk = onTextChunk;
|
|
1729
|
+
this.onToken = onToken;
|
|
1730
|
+
this.onResponseChunk = onResponseChunk;
|
|
1731
|
+
this._initialTokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1732
|
+
this._segmentsStartTokenTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1733
|
+
this._tokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1734
|
+
this._closeAllSegmentsDetector = closeAllSegments != null
|
|
1735
|
+
? new StopGenerationDetector()
|
|
1736
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(closeAllSegments), this.model.tokenizer))
|
|
1737
|
+
: undefined;
|
|
1738
|
+
this._segmentDetectors = new Map();
|
|
1739
|
+
this._segmentsStack = initialSegmentStack;
|
|
1740
|
+
this._segmentsStackSet = new Set(initialSegmentStack);
|
|
1741
|
+
this._ownedSegmentsStackLength = initialSegmentStack.length;
|
|
1742
|
+
this._segmentDefinitions = segmentDefinitions;
|
|
1743
|
+
for (const [segment, { prefix, suffix }] of segmentDefinitions.entries()) {
|
|
1744
|
+
this._segmentDetectors.set(segment, {
|
|
1745
|
+
prefix: new StopGenerationDetector()
|
|
1746
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(prefix), this.model.tokenizer)),
|
|
1747
|
+
suffix: suffix != null
|
|
1748
|
+
? new StopGenerationDetector()
|
|
1749
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(suffix), this.model.tokenizer))
|
|
1750
|
+
: undefined
|
|
1751
|
+
});
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
processTokens(tokens) {
|
|
1755
|
+
if (tokens.length === 0)
|
|
1756
|
+
return;
|
|
1757
|
+
let pendingTokens = [];
|
|
1758
|
+
for (const token of tokens) {
|
|
1759
|
+
pendingTokens.push(token);
|
|
1760
|
+
const currentText = this.model.detokenize(pendingTokens, false, this._tokensTrail);
|
|
1761
|
+
if (currentText.endsWith(UNKNOWN_UNICODE_CHAR))
|
|
1762
|
+
continue;
|
|
1763
|
+
pushAll(this._tokensTrail, pendingTokens);
|
|
1764
|
+
this._processTokens(pendingTokens, currentText);
|
|
1765
|
+
pendingTokens = [];
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
onFinishedGeneration() {
|
|
1769
|
+
this._clearDetectors();
|
|
1770
|
+
this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
|
|
1771
|
+
}
|
|
1772
|
+
resetContextWindow() {
|
|
1773
|
+
this._contextWindowSegments.length = 0;
|
|
1774
|
+
this._contextWindowStartTokenTrail.length = 0;
|
|
1775
|
+
pushAll(this._contextWindowStartTokenTrail, this._getTokenTrailFromResult());
|
|
1776
|
+
}
|
|
1777
|
+
openSegment(type) {
|
|
1778
|
+
const now = Date.now();
|
|
1779
|
+
this._segmentsStack.push(type);
|
|
1780
|
+
this._segmentsStackSet.add(type);
|
|
1781
|
+
this._segments.push({ type, tokens: [], ended: false, start: true, startTime: now });
|
|
1782
|
+
this._contextWindowSegments.push({ type, tokens: [], ended: false, start: true, startTime: now });
|
|
1783
|
+
this.onResponseChunk?.({
|
|
1784
|
+
type: "segment",
|
|
1785
|
+
segmentType: type,
|
|
1786
|
+
tokens: [],
|
|
1787
|
+
text: "",
|
|
1788
|
+
segmentStartTime: new Date(now)
|
|
1789
|
+
});
|
|
1790
|
+
}
|
|
1791
|
+
_processTokens(tokens, text) {
|
|
1792
|
+
const queuedTokenRelease = this._streamRegulator.addChunk({
|
|
1793
|
+
tokens,
|
|
1794
|
+
text
|
|
1795
|
+
});
|
|
1796
|
+
const currentType = this._segmentsStack.at(-1);
|
|
1797
|
+
const handleDetector = (stopDetector, action, type) => {
|
|
1798
|
+
if (stopDetector == null)
|
|
1799
|
+
return false;
|
|
1800
|
+
stopDetector.recordGeneration({
|
|
1801
|
+
text,
|
|
1802
|
+
tokens,
|
|
1803
|
+
queuedTokenRelease
|
|
1804
|
+
});
|
|
1805
|
+
if (stopDetector.hasTriggeredStops) {
|
|
1806
|
+
const [leftTokens, leftText] = this._handleTriggeredStopDetector(stopDetector);
|
|
1807
|
+
if (action === "pop")
|
|
1808
|
+
this._closeSegment(type);
|
|
1809
|
+
else if (action === "push") {
|
|
1810
|
+
this.openSegment(type);
|
|
1811
|
+
}
|
|
1812
|
+
else if (action === "reset") {
|
|
1813
|
+
const now = Date.now();
|
|
1814
|
+
while (this._segmentsStack.length > 0) {
|
|
1815
|
+
const segmentType = this._segmentsStack.pop();
|
|
1816
|
+
this._segmentsStackSet.delete(segmentType);
|
|
1817
|
+
const lastSegment = this._segments.at(-1);
|
|
1818
|
+
if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === segmentType) {
|
|
1819
|
+
lastSegment.ended = true;
|
|
1820
|
+
lastSegment.endTime = now;
|
|
1821
|
+
this.onResponseChunk?.({
|
|
1822
|
+
type: "segment",
|
|
1823
|
+
segmentType: segmentType,
|
|
1824
|
+
tokens: [],
|
|
1825
|
+
text: "",
|
|
1826
|
+
segmentStartTime: undefined,
|
|
1827
|
+
segmentEndTime: new Date(now)
|
|
1828
|
+
});
|
|
1829
|
+
}
|
|
1830
|
+
else {
|
|
1831
|
+
this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1832
|
+
this.onResponseChunk?.({
|
|
1833
|
+
type: "segment",
|
|
1834
|
+
segmentType: segmentType,
|
|
1835
|
+
tokens: [],
|
|
1836
|
+
text: "",
|
|
1837
|
+
segmentStartTime: undefined,
|
|
1838
|
+
segmentEndTime: new Date(now)
|
|
1839
|
+
});
|
|
1840
|
+
}
|
|
1841
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1842
|
+
if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
|
|
1843
|
+
lastContextWindowSegment.type === segmentType)
|
|
1844
|
+
lastContextWindowSegment.ended = true;
|
|
1845
|
+
else
|
|
1846
|
+
this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1847
|
+
}
|
|
1848
|
+
this._ownedSegmentsStackLength = 0;
|
|
1849
|
+
}
|
|
1850
|
+
if (leftTokens.length > 0)
|
|
1851
|
+
this._processTokens(leftTokens, leftText);
|
|
1852
|
+
return true;
|
|
1853
|
+
}
|
|
1854
|
+
return false;
|
|
1855
|
+
};
|
|
1856
|
+
if (currentType != null) {
|
|
1857
|
+
if (handleDetector(this._closeAllSegmentsDetector, "reset", currentType))
|
|
1858
|
+
return;
|
|
1859
|
+
if (handleDetector(this._segmentDetectors.get(currentType)?.suffix, "pop", currentType))
|
|
1860
|
+
return;
|
|
1861
|
+
}
|
|
1862
|
+
else
|
|
1863
|
+
this._closeAllSegmentsDetector?.clearInProgressStops();
|
|
1864
|
+
for (const [type, { prefix, suffix }] of this._segmentDetectors.entries()) {
|
|
1865
|
+
if (!this._segmentsStackSet.has(type)) {
|
|
1866
|
+
if (handleDetector(prefix, "push", type))
|
|
1867
|
+
return;
|
|
1868
|
+
}
|
|
1869
|
+
else
|
|
1870
|
+
prefix.clearInProgressStops();
|
|
1871
|
+
if (this._segmentsStackSet.has(type)) {
|
|
1872
|
+
// `currentType` suffix is already handled above
|
|
1873
|
+
if (type === currentType && handleDetector(suffix, "pop", type))
|
|
1874
|
+
return;
|
|
1875
|
+
}
|
|
1876
|
+
else
|
|
1877
|
+
suffix?.clearInProgressStops();
|
|
1878
|
+
}
|
|
1879
|
+
this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
|
|
1880
|
+
}
|
|
1881
|
+
_handleTriggeredStopDetector(stopDetector) {
|
|
1882
|
+
this._clearDetectors(stopDetector);
|
|
1883
|
+
stopDetector.clearInProgressStops();
|
|
1884
|
+
const triggeredStops = stopDetector.getTriggeredStops();
|
|
1885
|
+
const freeTokens = this._streamRegulator.popFreeChunkTokens();
|
|
1886
|
+
const partiallyFreeTokens = this._streamRegulator.getPartiallyFreeChunk(this.model.tokenizer);
|
|
1887
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.model.tokenizer);
|
|
1888
|
+
const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
|
|
1889
|
+
const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
|
|
1890
|
+
? firstRemainingGenerationAfterStop === ""
|
|
1891
|
+
? []
|
|
1892
|
+
: this.model.tokenize(firstRemainingGenerationAfterStop, false)
|
|
1893
|
+
: (firstRemainingGenerationAfterStop ?? []);
|
|
1894
|
+
const remainingText = typeof firstRemainingGenerationAfterStop === "string"
|
|
1895
|
+
? firstRemainingGenerationAfterStop
|
|
1896
|
+
: this.model.detokenize(remainingTokens, false, queuedTokensBeforeStopTrigger.length === 0
|
|
1897
|
+
? this._getTokenTrailFromResult()
|
|
1898
|
+
: queuedTokensBeforeStopTrigger);
|
|
1899
|
+
this._pushCurrentTokens([...freeTokens, ...queuedTokensBeforeStopTrigger]);
|
|
1900
|
+
stopDetector.clearTriggeredStops();
|
|
1901
|
+
this._streamRegulator.reset();
|
|
1902
|
+
return [remainingTokens, remainingText];
|
|
1903
|
+
}
|
|
1904
|
+
_closeSegment(type) {
|
|
1905
|
+
if (type == null)
|
|
1906
|
+
return;
|
|
1907
|
+
const lastSegment = this._segments.at(-1);
|
|
1908
|
+
const now = Date.now();
|
|
1909
|
+
if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === type && this._segmentsStack.at(-1) === type) {
|
|
1910
|
+
if (lastSegment.ended !== true) {
|
|
1911
|
+
lastSegment.ended = true;
|
|
1912
|
+
lastSegment.endTime = now;
|
|
1913
|
+
this.onResponseChunk?.({
|
|
1914
|
+
type: "segment",
|
|
1915
|
+
segmentType: type,
|
|
1916
|
+
tokens: [],
|
|
1917
|
+
text: "",
|
|
1918
|
+
segmentStartTime: undefined,
|
|
1919
|
+
segmentEndTime: new Date(now)
|
|
1920
|
+
});
|
|
1921
|
+
}
|
|
1922
|
+
this._segmentsStackSet.delete(this._segmentsStack.pop());
|
|
1923
|
+
if (this._segmentsStack.length < this._ownedSegmentsStackLength)
|
|
1924
|
+
this._ownedSegmentsStackLength = this._segmentsStack.length;
|
|
1925
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1926
|
+
if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
|
|
1927
|
+
lastContextWindowSegment.type === type && this._segmentsStack.at(-1) === type) {
|
|
1928
|
+
if (lastContextWindowSegment.ended !== true) {
|
|
1929
|
+
lastContextWindowSegment.ended = true;
|
|
1930
|
+
lastContextWindowSegment.endTime = now;
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
else
|
|
1934
|
+
this._contextWindowSegments.push({ type, tokens: [], ended: true, start: false, endTime: now });
|
|
1935
|
+
return;
|
|
1936
|
+
}
|
|
1937
|
+
const typeIndex = this._segmentsStack.lastIndexOf(type);
|
|
1938
|
+
if (typeIndex < 0)
|
|
1939
|
+
return;
|
|
1940
|
+
for (let i = this._segmentsStack.length - 1; i >= typeIndex; i--) {
|
|
1941
|
+
const segmentType = this._segmentsStack.pop();
|
|
1942
|
+
this._segmentsStackSet.delete(segmentType);
|
|
1943
|
+
if (this._segmentsStack.length < this._ownedSegmentsStackLength)
|
|
1944
|
+
this._ownedSegmentsStackLength = this._segmentsStack.length;
|
|
1945
|
+
this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1946
|
+
this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1947
|
+
this.onResponseChunk?.({
|
|
1948
|
+
type: "segment",
|
|
1949
|
+
segmentType: segmentType,
|
|
1950
|
+
tokens: [],
|
|
1951
|
+
text: "",
|
|
1952
|
+
segmentStartTime: undefined,
|
|
1953
|
+
segmentEndTime: new Date(now)
|
|
1954
|
+
});
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
_clearDetectors(skipDetector) {
|
|
1958
|
+
if (this._closeAllSegmentsDetector !== skipDetector) {
|
|
1959
|
+
this._closeAllSegmentsDetector?.clearInProgressStops();
|
|
1960
|
+
this._closeAllSegmentsDetector?.clearTriggeredStops();
|
|
1961
|
+
}
|
|
1962
|
+
for (const { prefix, suffix } of this._segmentDetectors.values()) {
|
|
1963
|
+
if (prefix !== skipDetector) {
|
|
1964
|
+
prefix.clearInProgressStops();
|
|
1965
|
+
prefix.clearTriggeredStops();
|
|
1966
|
+
}
|
|
1967
|
+
if (suffix !== skipDetector) {
|
|
1968
|
+
suffix?.clearInProgressStops();
|
|
1969
|
+
suffix?.clearTriggeredStops();
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
_pushCurrentTokens(tokens) {
|
|
1974
|
+
const lastSegment = this._segments.at(-1);
|
|
1975
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1976
|
+
const type = this._segmentsStack.at(-1);
|
|
1977
|
+
if (type == null) {
|
|
1978
|
+
if (lastSegment == null) {
|
|
1979
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
1980
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
1981
|
+
: "";
|
|
1982
|
+
this._segments.push(tokens);
|
|
1983
|
+
this.onToken?.(tokens.slice());
|
|
1984
|
+
this.onTextChunk?.(text);
|
|
1985
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
1986
|
+
}
|
|
1987
|
+
else {
|
|
1988
|
+
if (lastSegment instanceof Array) {
|
|
1989
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
1990
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
1991
|
+
: "";
|
|
1992
|
+
pushAll(lastSegment, tokens);
|
|
1993
|
+
this.onToken?.(tokens);
|
|
1994
|
+
this.onTextChunk?.(text);
|
|
1995
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
|
|
1996
|
+
}
|
|
1997
|
+
else
|
|
1998
|
+
this._segments.push(tokens);
|
|
1999
|
+
}
|
|
2000
|
+
if (lastContextWindowSegment == null)
|
|
2001
|
+
this._contextWindowSegments.push(tokens.slice());
|
|
2002
|
+
else {
|
|
2003
|
+
if (lastContextWindowSegment instanceof Array)
|
|
2004
|
+
pushAll(lastContextWindowSegment, tokens);
|
|
2005
|
+
else
|
|
2006
|
+
this._contextWindowSegments.push(tokens.slice());
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
else {
|
|
2010
|
+
const now = Date.now();
|
|
2011
|
+
if (lastSegment == null) {
|
|
2012
|
+
const text = this.onResponseChunk != null
|
|
2013
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2014
|
+
: "";
|
|
2015
|
+
this._segments.push({
|
|
2016
|
+
type,
|
|
2017
|
+
tokens,
|
|
2018
|
+
ended: false,
|
|
2019
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2020
|
+
startTime: now
|
|
2021
|
+
});
|
|
2022
|
+
this.onResponseChunk?.({
|
|
2023
|
+
type: "segment",
|
|
2024
|
+
segmentType: type,
|
|
2025
|
+
tokens: tokens.slice(),
|
|
2026
|
+
text,
|
|
2027
|
+
segmentStartTime: new Date(now)
|
|
2028
|
+
});
|
|
2029
|
+
}
|
|
2030
|
+
else {
|
|
2031
|
+
const text = this.onResponseChunk != null
|
|
2032
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2033
|
+
: "";
|
|
2034
|
+
if (lastSegment instanceof Array || lastSegment.type !== type) {
|
|
2035
|
+
this._segments.push({
|
|
2036
|
+
type,
|
|
2037
|
+
tokens,
|
|
2038
|
+
ended: false,
|
|
2039
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2040
|
+
startTime: now
|
|
2041
|
+
});
|
|
2042
|
+
this.onResponseChunk?.({
|
|
2043
|
+
type: "segment",
|
|
2044
|
+
segmentType: type,
|
|
2045
|
+
tokens: tokens.slice(),
|
|
2046
|
+
text,
|
|
2047
|
+
segmentStartTime: new Date(now)
|
|
2048
|
+
});
|
|
2049
|
+
}
|
|
2050
|
+
else {
|
|
2051
|
+
pushAll(lastSegment.tokens, tokens);
|
|
2052
|
+
this.onResponseChunk?.({
|
|
2053
|
+
type: "segment",
|
|
2054
|
+
segmentType: type,
|
|
2055
|
+
tokens: tokens.slice(),
|
|
2056
|
+
text,
|
|
2057
|
+
segmentStartTime: undefined
|
|
2058
|
+
});
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
if (lastContextWindowSegment == null)
|
|
2062
|
+
this._contextWindowSegments.push({
|
|
2063
|
+
type,
|
|
2064
|
+
tokens: tokens.slice(),
|
|
2065
|
+
ended: false,
|
|
2066
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2067
|
+
startTime: now
|
|
2068
|
+
});
|
|
2069
|
+
else {
|
|
2070
|
+
if (lastContextWindowSegment instanceof Array || lastContextWindowSegment.type !== type)
|
|
2071
|
+
this._contextWindowSegments.push({
|
|
2072
|
+
type,
|
|
2073
|
+
tokens: tokens.slice(),
|
|
2074
|
+
ended: false,
|
|
2075
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2076
|
+
startTime: now
|
|
2077
|
+
});
|
|
2078
|
+
else
|
|
2079
|
+
pushAll(lastContextWindowSegment.tokens, tokens);
|
|
2080
|
+
}
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
_getTokenTrailFromResult() {
|
|
2084
|
+
const res = [];
|
|
2085
|
+
for (let i = this._segments.length - 1; i >= 0; i--) {
|
|
2086
|
+
const segment = this._segments[i];
|
|
2087
|
+
const segmentTokens = segment instanceof Array
|
|
2088
|
+
? segment
|
|
2089
|
+
: segment.tokens;
|
|
2090
|
+
for (let j = segmentTokens.length - 1; j >= 0; j--) {
|
|
2091
|
+
res.unshift(segmentTokens[j]);
|
|
2092
|
+
if (res.length >= maxRecentDetokenizerTokens)
|
|
2093
|
+
return res;
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
for (let i = this._initialTokensTrail.length - 1; i >= 0; i--) {
|
|
2097
|
+
res.unshift(this._initialTokensTrail[i]);
|
|
2098
|
+
if (res.length >= maxRecentDetokenizerTokens)
|
|
2099
|
+
return res;
|
|
2100
|
+
}
|
|
2101
|
+
return res;
|
|
2102
|
+
}
|
|
2103
|
+
getModelResponseSegments(trimWhitespaceSuffix = false) {
|
|
2104
|
+
return this._getModelResponseForSegments(this._segments, this._segmentsStartTokenTrail, trimWhitespaceSuffix);
|
|
2105
|
+
}
|
|
2106
|
+
getContextWindowModelResponseSegments(trimWhitespaceSuffix = false) {
|
|
2107
|
+
return this._getModelResponseForSegments(this._contextWindowSegments, this._contextWindowStartTokenTrail, trimWhitespaceSuffix);
|
|
2108
|
+
}
|
|
2109
|
+
_getModelResponseForSegments(rawSegments, recentTokens, trimWhitespaceSuffix) {
|
|
2110
|
+
let tokenTrail = resolveLastTokens([recentTokens]);
|
|
2111
|
+
return rawSegments.map((rawSegment, index) => {
|
|
2112
|
+
const isLast = index === rawSegments.length - 1;
|
|
2113
|
+
if (rawSegment instanceof Array) {
|
|
2114
|
+
let text = this.model.detokenize(rawSegment, false, tokenTrail);
|
|
2115
|
+
if (isLast && trimWhitespaceSuffix)
|
|
2116
|
+
text = text.trimEnd();
|
|
2117
|
+
tokenTrail = resolveLastTokens([tokenTrail, rawSegment]);
|
|
2118
|
+
return text;
|
|
2119
|
+
}
|
|
2120
|
+
let text = this.model.detokenize(rawSegment.tokens, false, tokenTrail);
|
|
2121
|
+
if (isLast && rawSegment.ended && trimWhitespaceSuffix)
|
|
2122
|
+
text = text.trimEnd();
|
|
2123
|
+
tokenTrail = resolveLastTokens([tokenTrail, rawSegment.tokens]);
|
|
2124
|
+
const segmentDefinition = this._segmentDefinitions.get(rawSegment.type);
|
|
2125
|
+
return {
|
|
2126
|
+
type: "segment",
|
|
2127
|
+
segmentType: rawSegment.type,
|
|
2128
|
+
text,
|
|
2129
|
+
ended: rawSegment.ended,
|
|
2130
|
+
raw: segmentDefinition == null
|
|
2131
|
+
? LlamaText([text]).toJSON()
|
|
2132
|
+
: LlamaText([
|
|
2133
|
+
rawSegment.start
|
|
2134
|
+
? segmentDefinition.prefix
|
|
2135
|
+
: "",
|
|
2136
|
+
text,
|
|
2137
|
+
rawSegment.ended
|
|
2138
|
+
? (segmentDefinition.suffix ?? "")
|
|
2139
|
+
: ""
|
|
2140
|
+
]).toJSON(),
|
|
2141
|
+
startTime: rawSegment.startTime != null
|
|
2142
|
+
? new Date(rawSegment.startTime).toISOString()
|
|
2143
|
+
: undefined,
|
|
2144
|
+
endTime: rawSegment.endTime != null
|
|
2145
|
+
? new Date(rawSegment.endTime).toISOString()
|
|
2146
|
+
: undefined
|
|
2147
|
+
};
|
|
2148
|
+
});
|
|
2149
|
+
}
|
|
2150
|
+
static getStackFromModelResponse(modelResponse) {
|
|
2151
|
+
const stack = [];
|
|
2152
|
+
const stackSet = new Set();
|
|
2153
|
+
for (const item of modelResponse) {
|
|
2154
|
+
if (typeof item === "string" || isChatModelResponseFunctionCall(item))
|
|
2155
|
+
continue;
|
|
2156
|
+
void item.type;
|
|
2157
|
+
if (item.ended && stack.at(-1) === item.segmentType) {
|
|
2158
|
+
stack.pop();
|
|
2159
|
+
stackSet.delete(item.segmentType);
|
|
2160
|
+
}
|
|
2161
|
+
else if (!item.ended && !stackSet.has(item.segmentType)) {
|
|
2162
|
+
stack.push(item.segmentType);
|
|
2163
|
+
stackSet.add(item.segmentType);
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
return stack;
|
|
2167
|
+
}
|
|
2168
|
+
}
|
|
1584
2169
|
//# sourceMappingURL=LlamaChat.js.map
|