node-llama-cpp 3.4.3 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ChatWrapper.js +45 -0
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +2 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/chatWrappers/DeepSeekChatWrapper.d.ts +37 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js +294 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +39 -13
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +0 -3
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +5 -4
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +1 -1
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +10 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +8 -2
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +17 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +10 -2
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.d.ts +22 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js +28 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +46 -3
- package/dist/chatWrappers/utils/resolveChatWrapper.js +6 -2
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +38 -7
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.js +2 -1
- package/dist/cli/commands/PullCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +18 -5
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +16 -5
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +137 -67
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +8 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +45 -1
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +2 -2
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -1
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +87 -5
- package/dist/evaluator/LlamaChat/LlamaChat.js +770 -194
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +55 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +28 -8
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +9 -2
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +61 -48
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +2 -2
- package/dist/evaluator/LlamaGrammar.js +2 -2
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js +1 -1
- package/dist/gguf/readGgufFileInfo.js +7 -4
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +2 -2
- package/dist/gguf/types/GgufMetadataTypes.js +2 -2
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/ggufQuantNames.d.ts +2 -0
- package/dist/gguf/utils/ggufQuantNames.js +40 -0
- package/dist/gguf/utils/ggufQuantNames.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +1 -1
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +1 -2
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
- package/dist/index.d.ts +7 -5
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +32 -2
- package/dist/types.js +7 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.js +8 -9
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +2 -0
- package/dist/utils/TokenStreamRegulator.js +12 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +21 -4
- package/dist/utils/createModelDownloader.js +83 -36
- package/dist/utils/createModelDownloader.js.map +1 -1
- package/dist/utils/getChatWrapperSegmentDefinition.d.ts +2 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js +7 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -0
- package/dist/utils/modelFileAccesTokens.js +1 -1
- package/dist/utils/modelFileAccesTokens.js.map +1 -1
- package/dist/utils/parseModelFileName.d.ts +5 -0
- package/dist/utils/parseModelFileName.js +63 -4
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseModelUri.d.ts +30 -2
- package/dist/utils/parseModelUri.js +199 -24
- package/dist/utils/parseModelUri.js.map +1 -1
- package/dist/utils/resolveModelDestination.d.ts +4 -3
- package/dist/utils/resolveModelDestination.js +25 -2
- package/dist/utils/resolveModelDestination.js.map +1 -1
- package/dist/utils/resolveModelFile.d.ts +1 -1
- package/dist/utils/resolveModelFile.js +61 -20
- package/dist/utils/resolveModelFile.js.map +1 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +4 -4
- package/llama/llama.cpp.info.json +2 -2
- package/package.json +36 -35
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
|
+
import { isChatModelResponseFunctionCall, isChatModelResponseSegment, allSegmentTypes } from "../../types.js";
|
|
2
3
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
4
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
4
5
|
import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
|
|
@@ -11,6 +12,7 @@ import { safeEventCallback } from "../../utils/safeEventCallback.js";
|
|
|
11
12
|
import { pushAll } from "../../utils/pushAll.js";
|
|
12
13
|
import { resolveLastTokens } from "../../utils/resolveLastTokens.js";
|
|
13
14
|
import { LlamaSampler } from "../LlamaContext/LlamaSampler.js";
|
|
15
|
+
import { getChatWrapperSegmentDefinition } from "../../utils/getChatWrapperSegmentDefinition.js";
|
|
14
16
|
import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
|
|
15
17
|
import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
|
|
16
18
|
import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
|
|
@@ -76,11 +78,12 @@ export class LlamaChat {
|
|
|
76
78
|
return this.sequence.model;
|
|
77
79
|
}
|
|
78
80
|
async generateResponse(history, options = {}) {
|
|
79
|
-
const { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
81
|
+
const { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
|
|
80
82
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
|
|
81
83
|
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
82
84
|
onTextChunk,
|
|
83
85
|
onToken,
|
|
86
|
+
onResponseChunk,
|
|
84
87
|
signal,
|
|
85
88
|
stopOnAbortSignal,
|
|
86
89
|
maxTokens,
|
|
@@ -110,6 +113,7 @@ export class LlamaChat {
|
|
|
110
113
|
return await withLock(this._chatLock, "evaluate", signal, async () => {
|
|
111
114
|
try {
|
|
112
115
|
generateResponseState.ensureLastHistoryItemIsModel();
|
|
116
|
+
generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
|
|
113
117
|
const loadContextWindow = async (avoidReloadingHistory = false) => {
|
|
114
118
|
await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
|
|
115
119
|
};
|
|
@@ -134,23 +138,25 @@ export class LlamaChat {
|
|
|
134
138
|
await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
|
|
135
139
|
await generateResponseState.createNewEvaluationIterator();
|
|
136
140
|
while (await generateResponseState.iterateEvaluation()) {
|
|
137
|
-
generateResponseState.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
generateResponseState.
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
142
|
+
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
143
|
+
generateResponseState.detectAndHandleFunctionStartSyntax();
|
|
144
|
+
if (generateResponseState.functionEvaluationMode !== false) {
|
|
145
|
+
generateResponseState.canAvoidReloadingHistory = false;
|
|
146
|
+
generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
|
|
147
|
+
const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
|
|
148
|
+
if (functionsCallsRes != null)
|
|
149
|
+
return functionsCallsRes;
|
|
150
|
+
}
|
|
151
|
+
generateResponseState.recordStopGenerationEvaluation();
|
|
152
|
+
generateResponseState.popStreamRegulatorFreeTokens();
|
|
153
|
+
generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
|
|
154
|
+
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
|
|
155
|
+
if (stopGenerationTriggerRes != null)
|
|
156
|
+
return stopGenerationTriggerRes;
|
|
157
|
+
generateResponseState.spliceIgnoreStartTextDetectedTokens();
|
|
158
|
+
generateResponseState.moveFreePendingTokensToRes();
|
|
145
159
|
}
|
|
146
|
-
generateResponseState.recordStopGenerationEvaluation();
|
|
147
|
-
generateResponseState.popStreamRegulatorFreeTokens();
|
|
148
|
-
generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
|
|
149
|
-
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
|
|
150
|
-
if (stopGenerationTriggerRes != null)
|
|
151
|
-
return stopGenerationTriggerRes;
|
|
152
|
-
generateResponseState.spliceIgnoreStartTextDetectedTokens();
|
|
153
|
-
generateResponseState.moveFreePendingTokensToRes();
|
|
154
160
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
|
|
155
161
|
if (maxTokensTriggerRes != null)
|
|
156
162
|
return maxTokensTriggerRes;
|
|
@@ -174,16 +180,20 @@ export class LlamaChat {
|
|
|
174
180
|
}
|
|
175
181
|
async loadChatAndCompleteUserMessage(history, options = {}) {
|
|
176
182
|
const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
|
|
177
|
-
const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
|
|
178
|
-
? null
|
|
179
|
-
: lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
|
|
180
|
-
const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
|
|
181
|
-
? lastEvaluationContextWindowHistoryItem.text
|
|
182
|
-
: "";
|
|
183
183
|
this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize((findLastModelMessageInChatHistory(history)?.response ?? [])
|
|
184
|
-
.
|
|
184
|
+
.map((item) => {
|
|
185
|
+
if (typeof item === "string")
|
|
186
|
+
return item;
|
|
187
|
+
else if (isChatModelResponseFunctionCall(item))
|
|
188
|
+
return null;
|
|
189
|
+
else if (isChatModelResponseSegment(item))
|
|
190
|
+
return item.text;
|
|
191
|
+
void item;
|
|
192
|
+
return null;
|
|
193
|
+
})
|
|
194
|
+
.filter((item) => item != null)
|
|
185
195
|
.join(" ")));
|
|
186
|
-
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
|
|
196
|
+
const generateResponseState = new GenerateResponseState(this, this._chatWrapper, mergeGeneratedResultWithChatHistory("user", history, [initialUserPrompt]), {
|
|
187
197
|
onTextChunk,
|
|
188
198
|
onToken,
|
|
189
199
|
signal,
|
|
@@ -204,27 +214,16 @@ export class LlamaChat {
|
|
|
204
214
|
contextShift,
|
|
205
215
|
customStopTriggers,
|
|
206
216
|
lastEvaluationContextWindow: {
|
|
207
|
-
history: lastEvaluationContextWindowHistory
|
|
208
|
-
? undefined
|
|
209
|
-
: setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
|
|
217
|
+
history: mergeGeneratedResultWithChatHistory("user", lastEvaluationContextWindowHistory ?? history, [initialUserPrompt]),
|
|
210
218
|
minimumOverlapPercentageToPreventContextShift
|
|
211
219
|
}
|
|
212
220
|
});
|
|
213
221
|
return await withLock(this._chatLock, "evaluate", signal, async () => {
|
|
214
222
|
try {
|
|
215
223
|
generateResponseState.ensureLastHistoryItemIsUser();
|
|
216
|
-
const getInitialUserMessage = (history) => {
|
|
217
|
-
const lastResolvedHistoryItem = history[history.length - 1];
|
|
218
|
-
if (lastResolvedHistoryItem?.type === "user")
|
|
219
|
-
return lastResolvedHistoryItem.text;
|
|
220
|
-
return "";
|
|
221
|
-
};
|
|
222
|
-
const initialUserMessage = getInitialUserMessage(generateResponseState.resolvedHistory);
|
|
223
|
-
const contextWindowInitialUserMessage = getInitialUserMessage(generateResponseState.lastContextWindowHistory);
|
|
224
224
|
while (true) {
|
|
225
225
|
generateResponseState.startTokenLoop();
|
|
226
|
-
const { userTextSuffix } = await generateResponseState.loadContextWindow(
|
|
227
|
-
this.model.detokenize(generateResponseState.contextWindowsRes)), true);
|
|
226
|
+
const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
|
|
228
227
|
generateResponseState.functionEvaluationMode = false;
|
|
229
228
|
generateResponseState.addStopGenerationTriggersFromChatWrapper();
|
|
230
229
|
if (userTextSuffix != null && userTextSuffix.values.length > 0)
|
|
@@ -235,7 +234,7 @@ export class LlamaChat {
|
|
|
235
234
|
return {
|
|
236
235
|
completion: "",
|
|
237
236
|
lastEvaluation: {
|
|
238
|
-
contextWindow:
|
|
237
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
239
238
|
contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
|
|
240
239
|
},
|
|
241
240
|
metadata: {
|
|
@@ -245,28 +244,30 @@ export class LlamaChat {
|
|
|
245
244
|
}
|
|
246
245
|
await generateResponseState.createNewEvaluationIterator();
|
|
247
246
|
while (await generateResponseState.iterateEvaluation()) {
|
|
248
|
-
generateResponseState.
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
247
|
+
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
248
|
+
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
249
|
+
generateResponseState.recordStopGenerationEvaluation();
|
|
250
|
+
generateResponseState.popStreamRegulatorFreeTokens();
|
|
251
|
+
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
|
|
252
|
+
if (stopGenerationTriggerRes != null)
|
|
253
|
+
return {
|
|
254
|
+
completion: stopGenerationTriggerRes.response,
|
|
255
|
+
lastEvaluation: {
|
|
256
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
257
|
+
contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
|
|
258
|
+
},
|
|
259
|
+
metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
|
|
260
|
+
? stopGenerationTriggerRes.metadata
|
|
261
|
+
: stopGenerationTriggerRes.metadata
|
|
262
|
+
};
|
|
263
|
+
generateResponseState.moveFreePendingTokensToRes(false);
|
|
264
|
+
}
|
|
264
265
|
const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
|
|
265
266
|
if (maxTokensTriggerRes != null)
|
|
266
267
|
return {
|
|
267
268
|
completion: maxTokensTriggerRes.response,
|
|
268
269
|
lastEvaluation: {
|
|
269
|
-
contextWindow:
|
|
270
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
270
271
|
contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
|
|
271
272
|
},
|
|
272
273
|
metadata: maxTokensTriggerRes.metadata
|
|
@@ -278,7 +279,7 @@ export class LlamaChat {
|
|
|
278
279
|
return {
|
|
279
280
|
completion: abortRes.response,
|
|
280
281
|
lastEvaluation: {
|
|
281
|
-
contextWindow:
|
|
282
|
+
contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
|
|
282
283
|
contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
|
|
283
284
|
},
|
|
284
285
|
metadata: abortRes.metadata
|
|
@@ -303,11 +304,18 @@ function removeRawFromHistoryItem(historyItem) {
|
|
|
303
304
|
newHistoryItem.response = newHistoryItem.response.map((item) => {
|
|
304
305
|
if (typeof item === "string")
|
|
305
306
|
return item;
|
|
306
|
-
else
|
|
307
|
+
else if (isChatModelResponseFunctionCall(item))
|
|
307
308
|
return {
|
|
308
309
|
...item,
|
|
309
310
|
rawCall: undefined
|
|
310
311
|
};
|
|
312
|
+
else if (isChatModelResponseSegment(item))
|
|
313
|
+
return {
|
|
314
|
+
...item,
|
|
315
|
+
raw: undefined
|
|
316
|
+
};
|
|
317
|
+
void item;
|
|
318
|
+
return item;
|
|
311
319
|
});
|
|
312
320
|
return newHistoryItem;
|
|
313
321
|
}
|
|
@@ -370,42 +378,17 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
|
|
|
370
378
|
metadata
|
|
371
379
|
};
|
|
372
380
|
}
|
|
373
|
-
function
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
|
|
379
|
-
return modelResponse[modelResponse.length - 1];
|
|
380
|
-
return "";
|
|
381
|
+
function getLastModelMessageFullResponseFromChatHistory(chatHistory) {
|
|
382
|
+
const lastModelResponseItem = chatHistory.at(-1);
|
|
383
|
+
if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
|
|
384
|
+
return [];
|
|
385
|
+
return lastModelResponseItem.response;
|
|
381
386
|
}
|
|
382
387
|
function getLastUserTextFromChatHistory(chatHistory) {
|
|
383
388
|
if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
|
|
384
389
|
return "";
|
|
385
390
|
return chatHistory[chatHistory.length - 1].text;
|
|
386
391
|
}
|
|
387
|
-
function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
|
|
388
|
-
const newChatHistory = chatHistory.slice();
|
|
389
|
-
if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
|
|
390
|
-
newChatHistory.push({
|
|
391
|
-
type: "model",
|
|
392
|
-
response: []
|
|
393
|
-
});
|
|
394
|
-
const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
|
|
395
|
-
const newLastModelResponseItem = { ...lastModelResponseItem };
|
|
396
|
-
newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
|
|
397
|
-
const modelResponse = newLastModelResponseItem.response.slice();
|
|
398
|
-
newLastModelResponseItem.response = modelResponse;
|
|
399
|
-
if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
|
|
400
|
-
if (textResponse === "")
|
|
401
|
-
modelResponse.pop();
|
|
402
|
-
else
|
|
403
|
-
modelResponse[modelResponse.length - 1] = textResponse;
|
|
404
|
-
}
|
|
405
|
-
else if (textResponse !== "")
|
|
406
|
-
modelResponse.push(textResponse);
|
|
407
|
-
return newChatHistory;
|
|
408
|
-
}
|
|
409
392
|
function setLastUserTextInChatHistory(chatHistory, userText) {
|
|
410
393
|
const newChatHistory = chatHistory.slice();
|
|
411
394
|
if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
|
|
@@ -419,11 +402,73 @@ function setLastUserTextInChatHistory(chatHistory, userText) {
|
|
|
419
402
|
newLastUserItem.text = userText;
|
|
420
403
|
return newChatHistory;
|
|
421
404
|
}
|
|
422
|
-
function
|
|
423
|
-
if (
|
|
424
|
-
return
|
|
425
|
-
|
|
426
|
-
|
|
405
|
+
function mergeGeneratedResultWithChatHistory(itemType, chatHistory, generatedResult) {
|
|
406
|
+
if (generatedResult.length === 0 || (generatedResult.length === 1 && generatedResult[0] === ""))
|
|
407
|
+
return chatHistory;
|
|
408
|
+
const newChatHistory = chatHistory.slice();
|
|
409
|
+
if (itemType === "user") {
|
|
410
|
+
let lastUserItem = newChatHistory.at(-1);
|
|
411
|
+
if (lastUserItem?.type !== "user") {
|
|
412
|
+
lastUserItem = {
|
|
413
|
+
type: "user",
|
|
414
|
+
text: ""
|
|
415
|
+
};
|
|
416
|
+
newChatHistory.push(lastUserItem);
|
|
417
|
+
}
|
|
418
|
+
const newLastUserItem = { ...lastUserItem };
|
|
419
|
+
newChatHistory[newChatHistory.length - 1] = newLastUserItem;
|
|
420
|
+
newLastUserItem.text += generatedResult
|
|
421
|
+
.map((item) => {
|
|
422
|
+
if (typeof item === "string")
|
|
423
|
+
return item;
|
|
424
|
+
return item.text;
|
|
425
|
+
})
|
|
426
|
+
.join("");
|
|
427
|
+
return newChatHistory;
|
|
428
|
+
}
|
|
429
|
+
else {
|
|
430
|
+
let lastModelItem = newChatHistory.at(-1);
|
|
431
|
+
if (lastModelItem?.type !== "model") {
|
|
432
|
+
lastModelItem = {
|
|
433
|
+
type: "model",
|
|
434
|
+
response: []
|
|
435
|
+
};
|
|
436
|
+
newChatHistory.push(lastModelItem);
|
|
437
|
+
}
|
|
438
|
+
const newLastModelItem = { ...lastModelItem };
|
|
439
|
+
newChatHistory[newChatHistory.length - 1] = newLastModelItem;
|
|
440
|
+
const modelResponse = newLastModelItem.response.slice();
|
|
441
|
+
newLastModelItem.response = modelResponse;
|
|
442
|
+
const firstGeneratedResultItem = generatedResult[0];
|
|
443
|
+
if (firstGeneratedResultItem == null)
|
|
444
|
+
return newChatHistory;
|
|
445
|
+
const lastModelResponseItem = modelResponse.at(-1);
|
|
446
|
+
if (typeof firstGeneratedResultItem === "string" && typeof lastModelResponseItem === "string") {
|
|
447
|
+
modelResponse[modelResponse.length - 1] = lastModelResponseItem + firstGeneratedResultItem;
|
|
448
|
+
}
|
|
449
|
+
else if (typeof firstGeneratedResultItem !== "string" && isChatModelResponseSegment(firstGeneratedResultItem) &&
|
|
450
|
+
typeof lastModelResponseItem !== "string" && isChatModelResponseSegment(lastModelResponseItem) &&
|
|
451
|
+
!lastModelResponseItem.ended && lastModelResponseItem.segmentType === firstGeneratedResultItem.segmentType) {
|
|
452
|
+
modelResponse[modelResponse.length - 1] = {
|
|
453
|
+
...lastModelResponseItem,
|
|
454
|
+
...firstGeneratedResultItem,
|
|
455
|
+
text: lastModelResponseItem.text + firstGeneratedResultItem.text,
|
|
456
|
+
ended: firstGeneratedResultItem.ended,
|
|
457
|
+
raw: (lastModelResponseItem.raw != null && firstGeneratedResultItem.raw != null)
|
|
458
|
+
? LlamaText([
|
|
459
|
+
LlamaText.fromJSON(lastModelResponseItem.raw),
|
|
460
|
+
LlamaText.fromJSON(firstGeneratedResultItem.raw)
|
|
461
|
+
]).toJSON()
|
|
462
|
+
: undefined,
|
|
463
|
+
startTime: lastModelResponseItem.startTime,
|
|
464
|
+
endTime: firstGeneratedResultItem.endTime
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
else
|
|
468
|
+
modelResponse.push(firstGeneratedResultItem);
|
|
469
|
+
pushAll(modelResponse, generatedResult.slice(1));
|
|
470
|
+
return newChatHistory;
|
|
471
|
+
}
|
|
427
472
|
}
|
|
428
473
|
function findLastUserMessageInChatHistory(chatHistory) {
|
|
429
474
|
for (let i = chatHistory.length - 1; i >= 0; i--) {
|
|
@@ -486,6 +531,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
486
531
|
throw new DisposedError();
|
|
487
532
|
const model = sequence.model;
|
|
488
533
|
const context = sequence.context;
|
|
534
|
+
let removeRawFromHistory = false;
|
|
489
535
|
if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
|
|
490
536
|
const newContextWindow = lastEvaluationContextWindowHistory.slice();
|
|
491
537
|
if (endWithUserText) {
|
|
@@ -514,7 +560,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
514
560
|
history: newContextWindow,
|
|
515
561
|
stopGenerationTriggers,
|
|
516
562
|
tokens,
|
|
517
|
-
|
|
563
|
+
removeRawFromHistory,
|
|
518
564
|
newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
|
|
519
565
|
ignoreStartText: ignoreStartText ?? [],
|
|
520
566
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -523,9 +569,10 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
523
569
|
};
|
|
524
570
|
}
|
|
525
571
|
}
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
572
|
+
removeRawFromHistory = !sequence.isLoadedToMemory;
|
|
573
|
+
resolvedHistory = removeRawFromHistory
|
|
574
|
+
? resolvedHistory.map(removeRawFromHistoryItem)
|
|
575
|
+
: resolvedHistory.slice();
|
|
529
576
|
if (resolvedContextShift.lastEvaluationMetadata != null) {
|
|
530
577
|
const contextShiftSize = resolvedContextShift.size instanceof Function
|
|
531
578
|
? await resolvedContextShift.size(sequence)
|
|
@@ -550,7 +597,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
550
597
|
history: compressedHistory,
|
|
551
598
|
stopGenerationTriggers,
|
|
552
599
|
tokens: contextText.tokenize(model.tokenizer),
|
|
553
|
-
|
|
600
|
+
removeRawFromHistory,
|
|
554
601
|
newHistoryCompressionMetadata: metadata,
|
|
555
602
|
ignoreStartText: ignoreStartText ?? [],
|
|
556
603
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -570,7 +617,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
570
617
|
history: resolvedHistory,
|
|
571
618
|
stopGenerationTriggers,
|
|
572
619
|
tokens,
|
|
573
|
-
|
|
620
|
+
removeRawFromHistory,
|
|
574
621
|
newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
|
|
575
622
|
ignoreStartText: ignoreStartText ?? [],
|
|
576
623
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -601,7 +648,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
601
648
|
history: compressedHistory,
|
|
602
649
|
stopGenerationTriggers,
|
|
603
650
|
tokens: contextText.tokenize(model.tokenizer),
|
|
604
|
-
|
|
651
|
+
removeRawFromHistory,
|
|
605
652
|
newHistoryCompressionMetadata: metadata,
|
|
606
653
|
ignoreStartText: ignoreStartText ?? [],
|
|
607
654
|
functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
|
|
@@ -615,6 +662,7 @@ class GenerateResponseState {
|
|
|
615
662
|
history;
|
|
616
663
|
onTextChunk;
|
|
617
664
|
onToken;
|
|
665
|
+
onResponseChunk;
|
|
618
666
|
signal;
|
|
619
667
|
stopOnAbortSignal;
|
|
620
668
|
maxTokens;
|
|
@@ -638,7 +686,6 @@ class GenerateResponseState {
|
|
|
638
686
|
repeatPenaltyEnabled;
|
|
639
687
|
resolvedContextShift;
|
|
640
688
|
resolvedRepeatPenalty;
|
|
641
|
-
lastModelResponse;
|
|
642
689
|
grammarEvaluationState;
|
|
643
690
|
functionNameGrammar;
|
|
644
691
|
functionsGrammar;
|
|
@@ -651,10 +698,13 @@ class GenerateResponseState {
|
|
|
651
698
|
ignoreStartTextDetector = new StopGenerationDetector();
|
|
652
699
|
locksToReleaseOnValidGeneration = [];
|
|
653
700
|
resolvedHistory;
|
|
701
|
+
noRawInResolvedHistory;
|
|
654
702
|
res = [];
|
|
655
703
|
pendingTokens = [];
|
|
656
704
|
ignoredStartTextTokens = [];
|
|
657
705
|
resFunctionCalls = [];
|
|
706
|
+
segmentHandler;
|
|
707
|
+
pendingPartialTokens = [];
|
|
658
708
|
functionEvaluationMode = false;
|
|
659
709
|
currentFunctionCallPreviousText = LlamaText([]);
|
|
660
710
|
currentFunctionCallCurrentPartTokens = [];
|
|
@@ -678,8 +728,6 @@ class GenerateResponseState {
|
|
|
678
728
|
disengageInitiallyEngagedFunctionCall = [];
|
|
679
729
|
userTextSuffix = undefined;
|
|
680
730
|
tokens = [];
|
|
681
|
-
contextWindowLastModelResponse = "";
|
|
682
|
-
contextWindowsRes = [];
|
|
683
731
|
// token evaluation loop
|
|
684
732
|
evaluationIterator;
|
|
685
733
|
currentIteration;
|
|
@@ -688,12 +736,13 @@ class GenerateResponseState {
|
|
|
688
736
|
currentTokens = [];
|
|
689
737
|
currentText = "";
|
|
690
738
|
currentQueuedTokenRelease;
|
|
691
|
-
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
739
|
+
constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
692
740
|
this.llamaChat = llamaChat;
|
|
693
741
|
this.chatWrapper = chatWrapper;
|
|
694
742
|
this.history = history;
|
|
695
743
|
this.onTextChunk = safeEventCallback(onTextChunk);
|
|
696
744
|
this.onToken = safeEventCallback(onToken);
|
|
745
|
+
this.onResponseChunk = safeEventCallback(onResponseChunk);
|
|
697
746
|
this.signal = signal;
|
|
698
747
|
this.stopOnAbortSignal = stopOnAbortSignal;
|
|
699
748
|
this.maxTokens = maxTokens;
|
|
@@ -718,9 +767,10 @@ class GenerateResponseState {
|
|
|
718
767
|
throw this.signal.reason;
|
|
719
768
|
if (this.llamaChat.disposed)
|
|
720
769
|
throw new DisposedError();
|
|
721
|
-
this.
|
|
722
|
-
|
|
723
|
-
|
|
770
|
+
this.noRawInResolvedHistory = !this.llamaChat.sequence.isLoadedToMemory;
|
|
771
|
+
this.resolvedHistory = this.noRawInResolvedHistory
|
|
772
|
+
? this.history.map(removeRawFromHistoryItem)
|
|
773
|
+
: this.history.slice();
|
|
724
774
|
this.resolvedContextShift = {
|
|
725
775
|
...defaultContextShiftOptions,
|
|
726
776
|
...removeNullFields(this.contextShift)
|
|
@@ -731,7 +781,6 @@ class GenerateResponseState {
|
|
|
731
781
|
...(repeatPenalty ?? {}),
|
|
732
782
|
lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
|
|
733
783
|
};
|
|
734
|
-
this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
|
|
735
784
|
this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
|
|
736
785
|
this.grammarEvaluationState = this.grammar != null
|
|
737
786
|
? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
|
|
@@ -742,7 +791,7 @@ class GenerateResponseState {
|
|
|
742
791
|
this.functionsGrammar = undefined;
|
|
743
792
|
this.functionsEvaluationState = undefined;
|
|
744
793
|
this.lastContextWindowHistory = lastEvaluationContextWindowHistory ?? this.resolvedHistory;
|
|
745
|
-
this.lastHistoryCompressionMetadata = this.resolvedContextShift;
|
|
794
|
+
this.lastHistoryCompressionMetadata = this.resolvedContextShift.lastEvaluationMetadata;
|
|
746
795
|
if (this.customStopTriggers != null)
|
|
747
796
|
StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
|
|
748
797
|
.map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
@@ -754,6 +803,22 @@ class GenerateResponseState {
|
|
|
754
803
|
this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
|
|
755
804
|
this.chatWrapper.settings.functions.call.prefix
|
|
756
805
|
]), this.llamaChat.model.tokenizer));
|
|
806
|
+
const segmentDefinitions = new Map();
|
|
807
|
+
for (const segmentType of allSegmentTypes) {
|
|
808
|
+
const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
|
|
809
|
+
if (segmentDefinition != null)
|
|
810
|
+
segmentDefinitions.set(segmentType, segmentDefinition);
|
|
811
|
+
}
|
|
812
|
+
this.segmentHandler = new SegmentHandler({
|
|
813
|
+
model: this.llamaChat.model,
|
|
814
|
+
onTextChunk: this.onTextChunk,
|
|
815
|
+
onToken: this.onToken,
|
|
816
|
+
onResponseChunk: this.onResponseChunk,
|
|
817
|
+
previousTokens: this.getLastTokens(),
|
|
818
|
+
closeAllSegments: this.chatWrapper.settings.segments?.closeAllSegments,
|
|
819
|
+
segmentDefinitions,
|
|
820
|
+
initialSegmentStack: SegmentHandler.getStackFromModelResponse(getLastModelMessageFullResponseFromChatHistory(this.resolvedHistory))
|
|
821
|
+
});
|
|
757
822
|
this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
|
|
758
823
|
}
|
|
759
824
|
async dispose() {
|
|
@@ -763,19 +828,47 @@ class GenerateResponseState {
|
|
|
763
828
|
await this.dispose();
|
|
764
829
|
}
|
|
765
830
|
ensureLastHistoryItemIsModel() {
|
|
766
|
-
if (this.resolvedHistory.
|
|
831
|
+
if (this.resolvedHistory.at(-1)?.type !== "model")
|
|
767
832
|
this.resolvedHistory.push({
|
|
768
833
|
type: "model",
|
|
769
834
|
response: []
|
|
770
835
|
});
|
|
771
836
|
}
|
|
772
837
|
ensureLastHistoryItemIsUser() {
|
|
773
|
-
if (this.resolvedHistory.
|
|
838
|
+
if (this.resolvedHistory.at(-1)?.type !== "user")
|
|
774
839
|
this.resolvedHistory.push({
|
|
775
840
|
type: "user",
|
|
776
841
|
text: ""
|
|
777
842
|
});
|
|
778
843
|
}
|
|
844
|
+
ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded() {
|
|
845
|
+
if (this.chatWrapper.settings.segments?.thought?.reopenAfterFunctionCalls !== true)
|
|
846
|
+
return;
|
|
847
|
+
const lastModelResponseItem = this.resolvedHistory.at(-1);
|
|
848
|
+
if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
|
|
849
|
+
return;
|
|
850
|
+
const lastResponse = lastModelResponseItem.response.at(-1);
|
|
851
|
+
if (lastResponse == null)
|
|
852
|
+
return;
|
|
853
|
+
const lastResponseIsFunctionCall = typeof lastResponse !== "string" && lastResponse.type === "functionCall";
|
|
854
|
+
if (!lastResponseIsFunctionCall)
|
|
855
|
+
return;
|
|
856
|
+
const currentResponseSegmentsStack = SegmentHandler.getStackFromModelResponse(lastModelResponseItem.response);
|
|
857
|
+
if (currentResponseSegmentsStack.includes("thought"))
|
|
858
|
+
return;
|
|
859
|
+
const hadThoughtSegments = this.resolvedHistory.some((chatItem) => {
|
|
860
|
+
if (chatItem.type !== "model")
|
|
861
|
+
return false;
|
|
862
|
+
return chatItem.response.some((responseItem) => {
|
|
863
|
+
if (typeof responseItem === "string")
|
|
864
|
+
return false;
|
|
865
|
+
return responseItem.type === "segment" && responseItem.segmentType === "thought";
|
|
866
|
+
});
|
|
867
|
+
});
|
|
868
|
+
if (!hadThoughtSegments)
|
|
869
|
+
return;
|
|
870
|
+
this.segmentHandler.openSegment("thought");
|
|
871
|
+
}
|
|
779
872
|
ensureNotAborted() {
|
|
780
873
|
if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
|
|
781
874
|
throw this.signal.reason;
|
|
@@ -784,7 +877,7 @@ class GenerateResponseState {
|
|
|
784
877
|
}
|
|
785
878
|
getPenaltyTokens() {
|
|
786
879
|
if (this.llamaChat.disposed)
|
|
787
|
-
|
|
880
|
+
return [];
|
|
788
881
|
let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
|
|
789
882
|
if (this.resolvedRepeatPenalty.punishTokensFilter != null)
|
|
790
883
|
punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
|
|
@@ -796,24 +889,10 @@ class GenerateResponseState {
|
|
|
796
889
|
return punishTokens;
|
|
797
890
|
}
|
|
798
891
|
getResolvedHistoryWithCurrentModelResponse() {
|
|
799
|
-
|
|
800
|
-
return this.resolvedHistory;
|
|
801
|
-
let modelResponse = this.llamaChat.model.detokenize(this.res);
|
|
802
|
-
if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
|
|
803
|
-
modelResponse = modelResponse.trimEnd();
|
|
804
|
-
if (modelResponse === "")
|
|
805
|
-
return this.resolvedHistory;
|
|
806
|
-
return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
|
|
892
|
+
return mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, this.segmentHandler.getModelResponseSegments());
|
|
807
893
|
}
|
|
808
894
|
getContextWindowsHistoryWithCurrentModelResponse() {
|
|
809
|
-
|
|
810
|
-
return this.lastContextWindowHistory;
|
|
811
|
-
let modelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
|
|
812
|
-
if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
|
|
813
|
-
modelResponse = modelResponse.trimEnd();
|
|
814
|
-
if (modelResponse === "")
|
|
815
|
-
return this.lastContextWindowHistory;
|
|
816
|
-
return setLastModelTextResponseInChatHistory(this.lastContextWindowHistory, this.contextWindowLastModelResponse + modelResponse);
|
|
895
|
+
return mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments());
|
|
817
896
|
}
|
|
818
897
|
removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
|
|
819
898
|
if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
|
|
@@ -826,14 +905,26 @@ class GenerateResponseState {
|
|
|
826
905
|
this.contextWindowTokens,
|
|
827
906
|
this.ignoredStartTextTokens
|
|
828
907
|
]);
|
|
908
|
+
const pendingPartialTokens = [];
|
|
829
909
|
for (let i = 0; i < this.pendingTokens.length; i++) {
|
|
910
|
+
const currentToken = this.pendingTokens[i];
|
|
911
|
+
const tokens = [...pendingPartialTokens, currentToken];
|
|
912
|
+
const text = this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer);
|
|
913
|
+
if (pendingPartialTokens.length === 0 &&
|
|
914
|
+
text.endsWith(UNKNOWN_UNICODE_CHAR) &&
|
|
915
|
+
!this.llamaChat.model.isSpecialToken(currentToken) &&
|
|
916
|
+
!this.llamaChat.model.isEogToken(currentToken)) {
|
|
917
|
+
pendingPartialTokens.length = 0;
|
|
918
|
+
pushAll(pendingPartialTokens, tokens);
|
|
919
|
+
continue;
|
|
920
|
+
}
|
|
830
921
|
this.ignoreStartTextDetector.recordGeneration({
|
|
831
|
-
text: this.llamaChat.model.detokenize(
|
|
832
|
-
tokens
|
|
922
|
+
text: this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer),
|
|
923
|
+
tokens,
|
|
833
924
|
startNewChecks: i === 0,
|
|
834
925
|
triggerMustStartWithGeneration: true
|
|
835
926
|
});
|
|
836
|
-
lastTokensForDetokenizer
|
|
927
|
+
pushAll(lastTokensForDetokenizer, tokens);
|
|
837
928
|
if (this.ignoreStartTextDetector.hasTriggeredStops) {
|
|
838
929
|
mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
|
|
839
930
|
this.ignoreStartTextDetector.clearTriggeredStops();
|
|
@@ -902,11 +993,12 @@ class GenerateResponseState {
|
|
|
902
993
|
const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
|
|
903
994
|
const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
|
|
904
995
|
if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
|
|
905
|
-
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens,
|
|
996
|
+
const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
|
|
906
997
|
resolvedHistory: resolvedHistory,
|
|
907
998
|
resolvedContextShift: this.resolvedContextShift,
|
|
908
999
|
lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
|
|
909
|
-
pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length
|
|
1000
|
+
pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
|
|
1001
|
+
this.pendingPartialTokens.length,
|
|
910
1002
|
isFirstEvaluation: this.isFirstEvaluation,
|
|
911
1003
|
chatWrapper: this.chatWrapper,
|
|
912
1004
|
lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
|
|
@@ -924,19 +1016,22 @@ class GenerateResponseState {
|
|
|
924
1016
|
this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
|
|
925
1017
|
this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
|
|
926
1018
|
this.userTextSuffix = userTextSuffix;
|
|
927
|
-
this.resolvedHistory = newResolvedHistory;
|
|
928
1019
|
this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
|
|
929
1020
|
this.lastContextWindowHistory = contextWindowHistory;
|
|
930
|
-
this.
|
|
931
|
-
this.contextWindowsRes = [];
|
|
1021
|
+
this.segmentHandler.resetContextWindow();
|
|
932
1022
|
this.canAvoidReloadingHistory = true;
|
|
1023
|
+
if (removeRawFromHistory && !this.noRawInResolvedHistory) {
|
|
1024
|
+
this.noRawInResolvedHistory = true;
|
|
1025
|
+
this.resolvedHistory = this.resolvedHistory.map(removeRawFromHistoryItem);
|
|
1026
|
+
}
|
|
933
1027
|
}
|
|
934
1028
|
this.tokens = [
|
|
935
1029
|
...this.contextWindowTokens,
|
|
936
1030
|
...this.ignoredStartTextTokens,
|
|
937
1031
|
...this.pendingTokens,
|
|
938
1032
|
...queuedChunkTokens,
|
|
939
|
-
...functionCallsTokens
|
|
1033
|
+
...functionCallsTokens,
|
|
1034
|
+
...this.pendingPartialTokens
|
|
940
1035
|
];
|
|
941
1036
|
if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
|
|
942
1037
|
return await this.loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText, false);
|
|
@@ -1017,24 +1112,24 @@ class GenerateResponseState {
|
|
|
1017
1112
|
pushAll(prefixDetectorRecordedTokens, tokens);
|
|
1018
1113
|
}
|
|
1019
1114
|
}
|
|
1020
|
-
for await (const
|
|
1115
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1021
1116
|
const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
|
|
1022
1117
|
if (stopGenerationTriggerRes != null)
|
|
1023
1118
|
return stopGenerationTriggerRes;
|
|
1024
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1119
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1025
1120
|
this.disengageInitiallyEngagedFunctionMode.recordGeneration({
|
|
1026
1121
|
text: this.currentText,
|
|
1027
1122
|
tokens: this.currentTokens,
|
|
1028
|
-
startNewChecks: this.currentFunctionCallCurrentPartTokens.length ===
|
|
1123
|
+
startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
|
|
1029
1124
|
triggerMustStartWithGeneration: true
|
|
1030
1125
|
});
|
|
1031
1126
|
if (prefixDetector.hasTriggeredStops)
|
|
1032
|
-
afterPrefixLeftoverTokens
|
|
1127
|
+
pushAll(afterPrefixLeftoverTokens, tokens);
|
|
1033
1128
|
else {
|
|
1034
1129
|
prefixDetector.recordGeneration({
|
|
1035
1130
|
text: this.currentText,
|
|
1036
1131
|
tokens: this.currentTokens,
|
|
1037
|
-
startNewChecks: this.currentFunctionCallCurrentPartTokens.length ===
|
|
1132
|
+
startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
|
|
1038
1133
|
triggerMustStartWithGeneration: true
|
|
1039
1134
|
});
|
|
1040
1135
|
pushAll(prefixDetectorRecordedTokens, this.currentTokens);
|
|
@@ -1109,8 +1204,8 @@ class GenerateResponseState {
|
|
|
1109
1204
|
}
|
|
1110
1205
|
}
|
|
1111
1206
|
}
|
|
1112
|
-
for await (const
|
|
1113
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1207
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1208
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1114
1209
|
functionNameGenerationDoneDetector.recordGeneration({
|
|
1115
1210
|
text: this.currentText,
|
|
1116
1211
|
tokens: this.currentTokens
|
|
@@ -1154,8 +1249,8 @@ class GenerateResponseState {
|
|
|
1154
1249
|
});
|
|
1155
1250
|
StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
|
|
1156
1251
|
.map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
|
|
1157
|
-
for await (const
|
|
1158
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1252
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1253
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1159
1254
|
functionParamsGenerationDoneDetector.recordGeneration({
|
|
1160
1255
|
text: this.currentText,
|
|
1161
1256
|
tokens: this.currentTokens
|
|
@@ -1213,8 +1308,8 @@ class GenerateResponseState {
|
|
|
1213
1308
|
LlamaText(new SpecialToken("EOT"))
|
|
1214
1309
|
], this.llamaChat.model.tokenizer)
|
|
1215
1310
|
.map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
|
|
1216
|
-
for await (const
|
|
1217
|
-
this.currentFunctionCallCurrentPartTokens
|
|
1311
|
+
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1312
|
+
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1218
1313
|
sectionSuffixDetector.recordGeneration({
|
|
1219
1314
|
text: this.currentText,
|
|
1220
1315
|
tokens: this.currentTokens,
|
|
@@ -1258,17 +1353,17 @@ class GenerateResponseState {
|
|
|
1258
1353
|
returnFunctionCallResults() {
|
|
1259
1354
|
if (this.resFunctionCalls.length > 0) {
|
|
1260
1355
|
this.releasePartiallyFreeTokensBeforeFunctionCallStart();
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
modelResponse = modelResponse.trimEnd();
|
|
1265
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1266
|
-
}
|
|
1356
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1357
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1358
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1267
1359
|
return {
|
|
1268
|
-
response:
|
|
1360
|
+
response: responseSegments
|
|
1361
|
+
.filter((segment) => typeof segment === "string")
|
|
1362
|
+
.join(""),
|
|
1363
|
+
fullResponse: responseSegments,
|
|
1269
1364
|
lastEvaluation: {
|
|
1270
|
-
contextWindow:
|
|
1271
|
-
cleanHistory:
|
|
1365
|
+
contextWindow: mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1366
|
+
cleanHistory: mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, responseSegments),
|
|
1272
1367
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1273
1368
|
},
|
|
1274
1369
|
functionCalls: this.resFunctionCalls.map((functionCall) => {
|
|
@@ -1292,9 +1387,10 @@ class GenerateResponseState {
|
|
|
1292
1387
|
await this.alignCurrentSequenceStateWithCurrentTokens();
|
|
1293
1388
|
await this.createNewEvaluationIterator();
|
|
1294
1389
|
while (await this.iterateEvaluation()) {
|
|
1295
|
-
if (this.
|
|
1390
|
+
if (this.currentTokens.length === 0)
|
|
1296
1391
|
break;
|
|
1297
|
-
|
|
1392
|
+
if (!this.holdPartialTokensForNextEvaluation())
|
|
1393
|
+
yield this.currentTokens;
|
|
1298
1394
|
if (this.shouldAbort)
|
|
1299
1395
|
return;
|
|
1300
1396
|
if (this.updateShouldContextShift())
|
|
@@ -1367,9 +1463,14 @@ class GenerateResponseState {
|
|
|
1367
1463
|
this.currentIterationReplacementToken = undefined;
|
|
1368
1464
|
this.ensureNotAborted();
|
|
1369
1465
|
this.generatedTokens++;
|
|
1370
|
-
if (this.currentIteration != null && this.currentIteration?.done !== true) {
|
|
1371
|
-
this.currentToken = this.currentIteration
|
|
1372
|
-
this.currentTokens =
|
|
1466
|
+
if ((this.currentIteration != null && this.currentIteration?.done !== true) || this.pendingPartialTokens.length !== 0) {
|
|
1467
|
+
this.currentToken = this.currentIteration?.value ?? undefined;
|
|
1468
|
+
this.currentTokens = this.currentToken != null
|
|
1469
|
+
? this.pendingPartialTokens.length === 0
|
|
1470
|
+
? [this.currentToken]
|
|
1471
|
+
: [...this.pendingPartialTokens, this.currentToken]
|
|
1472
|
+
: [...this.pendingPartialTokens];
|
|
1473
|
+
this.pendingPartialTokens.length = 0;
|
|
1373
1474
|
this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, this.getLastTokens());
|
|
1374
1475
|
if (this.functionEvaluationMode === false)
|
|
1375
1476
|
this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
|
|
@@ -1382,6 +1483,19 @@ class GenerateResponseState {
|
|
|
1382
1483
|
}
|
|
1383
1484
|
return false;
|
|
1384
1485
|
}
|
|
1486
|
+
holdPartialTokensForNextEvaluation() {
|
|
1487
|
+
if (this.pendingPartialTokens.length === 0 &&
|
|
1488
|
+
this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) &&
|
|
1489
|
+
this.currentToken != null &&
|
|
1490
|
+
!this.llamaChat.model.isSpecialToken(this.currentToken) &&
|
|
1491
|
+
!this.llamaChat.model.isEogToken(this.currentToken)) {
|
|
1492
|
+
this.pendingPartialTokens.length = 0;
|
|
1493
|
+
pushAll(this.pendingPartialTokens, this.currentTokens);
|
|
1494
|
+
this.streamRegulator.removeChunkIfLast(this.currentQueuedTokenRelease);
|
|
1495
|
+
return true;
|
|
1496
|
+
}
|
|
1497
|
+
return false;
|
|
1498
|
+
}
|
|
1385
1499
|
waitOnPartialCharactersOrWhiteSpaceTokens() {
|
|
1386
1500
|
if (this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "") || (this.currentText === "" && this.locksToReleaseOnValidGeneration.length > 0 &&
|
|
1387
1501
|
!this.llamaChat.model.isSpecialToken(this.currentToken))) {
|
|
@@ -1449,21 +1563,22 @@ class GenerateResponseState {
|
|
|
1449
1563
|
const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
|
|
1450
1564
|
this.removeFoundStartIgnoreTextsFromPendingTokens(true);
|
|
1451
1565
|
this.pushPendingTokensAndCallOnToken();
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1566
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1567
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1568
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1569
|
+
const response = responseSegments
|
|
1570
|
+
.filter((segment) => typeof segment === "string")
|
|
1571
|
+
.join("");
|
|
1458
1572
|
const lastEvaluation = {
|
|
1459
|
-
contextWindow:
|
|
1460
|
-
cleanHistory:
|
|
1573
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1574
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1461
1575
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1462
1576
|
};
|
|
1463
1577
|
const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
|
|
1464
1578
|
if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
|
|
1465
1579
|
return {
|
|
1466
|
-
response
|
|
1580
|
+
response,
|
|
1581
|
+
fullResponse: responseSegments,
|
|
1467
1582
|
lastEvaluation,
|
|
1468
1583
|
metadata: {
|
|
1469
1584
|
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
@@ -1474,7 +1589,8 @@ class GenerateResponseState {
|
|
|
1474
1589
|
};
|
|
1475
1590
|
}
|
|
1476
1591
|
return {
|
|
1477
|
-
response
|
|
1592
|
+
response,
|
|
1593
|
+
fullResponse: responseSegments,
|
|
1478
1594
|
lastEvaluation,
|
|
1479
1595
|
metadata: {
|
|
1480
1596
|
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
@@ -1511,17 +1627,17 @@ class GenerateResponseState {
|
|
|
1511
1627
|
}
|
|
1512
1628
|
handleMaxTokensTrigger(lastHistoryItemType) {
|
|
1513
1629
|
if (this.isMaxTokensTriggered()) {
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
modelResponse = modelResponse.trimEnd();
|
|
1518
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1519
|
-
}
|
|
1630
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1631
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1632
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1520
1633
|
return {
|
|
1521
|
-
response:
|
|
1634
|
+
response: responseSegments
|
|
1635
|
+
.filter((segment) => typeof segment === "string")
|
|
1636
|
+
.join(""),
|
|
1637
|
+
fullResponse: responseSegments,
|
|
1522
1638
|
lastEvaluation: {
|
|
1523
|
-
contextWindow:
|
|
1524
|
-
cleanHistory:
|
|
1639
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1640
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1525
1641
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1526
1642
|
},
|
|
1527
1643
|
metadata: {
|
|
@@ -1542,17 +1658,17 @@ class GenerateResponseState {
|
|
|
1542
1658
|
if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
|
|
1543
1659
|
if (this.res.length === 0)
|
|
1544
1660
|
throw this.signal.reason;
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
modelResponse = modelResponse.trimEnd();
|
|
1549
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
1550
|
-
}
|
|
1661
|
+
this.segmentHandler.onFinishedGeneration();
|
|
1662
|
+
const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
|
|
1663
|
+
const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
|
|
1551
1664
|
return {
|
|
1552
|
-
response:
|
|
1665
|
+
response: responseSegments
|
|
1666
|
+
.filter((segment) => typeof segment === "string")
|
|
1667
|
+
.join(""),
|
|
1668
|
+
fullResponse: responseSegments,
|
|
1553
1669
|
lastEvaluation: {
|
|
1554
|
-
contextWindow:
|
|
1555
|
-
cleanHistory:
|
|
1670
|
+
contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
|
|
1671
|
+
cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
|
|
1556
1672
|
contextShiftMetadata: this.lastHistoryCompressionMetadata
|
|
1557
1673
|
},
|
|
1558
1674
|
metadata: {
|
|
@@ -1565,10 +1681,8 @@ class GenerateResponseState {
|
|
|
1565
1681
|
pushPendingTokensAndCallOnToken() {
|
|
1566
1682
|
if (this.pendingTokens.length === 0)
|
|
1567
1683
|
return;
|
|
1568
|
-
this.
|
|
1569
|
-
this.onTextChunk?.(this.llamaChat.model.detokenize(this.pendingTokens, false, this.res));
|
|
1684
|
+
this.segmentHandler.processTokens(this.pendingTokens);
|
|
1570
1685
|
pushAll(this.res, this.pendingTokens);
|
|
1571
|
-
pushAll(this.contextWindowsRes, this.pendingTokens);
|
|
1572
1686
|
this.pendingTokens.length = 0;
|
|
1573
1687
|
}
|
|
1574
1688
|
getLastTokens(maxTokens = maxRecentDetokenizerTokens) {
|
|
@@ -1577,8 +1691,470 @@ class GenerateResponseState {
|
|
|
1577
1691
|
this.ignoredStartTextTokens,
|
|
1578
1692
|
this.pendingTokens,
|
|
1579
1693
|
this.streamRegulator.getLastQueuedChunkTokens(maxTokens),
|
|
1580
|
-
this.getContextWindowFunctionCallsTokens()
|
|
1694
|
+
this.getContextWindowFunctionCallsTokens(),
|
|
1695
|
+
this.pendingPartialTokens
|
|
1581
1696
|
], maxTokens);
|
|
1582
1697
|
}
|
|
1583
1698
|
}
|
|
1699
|
+
class SegmentHandler {
|
|
1700
|
+
model;
|
|
1701
|
+
onToken;
|
|
1702
|
+
onTextChunk;
|
|
1703
|
+
onResponseChunk;
|
|
1704
|
+
_closeAllSegmentsDetector;
|
|
1705
|
+
_segmentDetectors;
|
|
1706
|
+
_segmentsStack = [];
|
|
1707
|
+
_segmentsStackSet = new Set();
|
|
1708
|
+
_ownedSegmentsStackLength = 0;
|
|
1709
|
+
_segments = [];
|
|
1710
|
+
_segmentsStartTokenTrail = [];
|
|
1711
|
+
_contextWindowSegments = [];
|
|
1712
|
+
_contextWindowStartTokenTrail = [];
|
|
1713
|
+
_initialTokensTrail;
|
|
1714
|
+
_tokensTrail;
|
|
1715
|
+
_streamRegulator = new TokenStreamRegulator();
|
|
1716
|
+
_segmentDefinitions;
|
|
1717
|
+
constructor({ model, onTextChunk, onToken, onResponseChunk, segmentDefinitions, closeAllSegments, initialSegmentStack, previousTokens }) {
|
|
1718
|
+
this.model = model;
|
|
1719
|
+
this.onTextChunk = onTextChunk;
|
|
1720
|
+
this.onToken = onToken;
|
|
1721
|
+
this.onResponseChunk = onResponseChunk;
|
|
1722
|
+
this._initialTokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1723
|
+
this._segmentsStartTokenTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1724
|
+
this._tokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
|
|
1725
|
+
this._closeAllSegmentsDetector = closeAllSegments != null
|
|
1726
|
+
? new StopGenerationDetector()
|
|
1727
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(closeAllSegments), this.model.tokenizer))
|
|
1728
|
+
: undefined;
|
|
1729
|
+
this._segmentDetectors = new Map();
|
|
1730
|
+
this._segmentsStack = initialSegmentStack;
|
|
1731
|
+
this._segmentsStackSet = new Set(initialSegmentStack);
|
|
1732
|
+
this._ownedSegmentsStackLength = initialSegmentStack.length;
|
|
1733
|
+
this._segmentDefinitions = segmentDefinitions;
|
|
1734
|
+
for (const [segment, { prefix, suffix }] of segmentDefinitions.entries()) {
|
|
1735
|
+
this._segmentDetectors.set(segment, {
|
|
1736
|
+
prefix: new StopGenerationDetector()
|
|
1737
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(prefix), this.model.tokenizer)),
|
|
1738
|
+
suffix: suffix != null
|
|
1739
|
+
? new StopGenerationDetector()
|
|
1740
|
+
.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(suffix), this.model.tokenizer))
|
|
1741
|
+
: undefined
|
|
1742
|
+
});
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
processTokens(tokens) {
|
|
1746
|
+
if (tokens.length === 0)
|
|
1747
|
+
return;
|
|
1748
|
+
let pendingTokens = [];
|
|
1749
|
+
for (const token of tokens) {
|
|
1750
|
+
pendingTokens.push(token);
|
|
1751
|
+
const currentText = this.model.detokenize(pendingTokens, false, this._tokensTrail);
|
|
1752
|
+
if (currentText.endsWith(UNKNOWN_UNICODE_CHAR))
|
|
1753
|
+
continue;
|
|
1754
|
+
pushAll(this._tokensTrail, pendingTokens);
|
|
1755
|
+
this._processTokens(pendingTokens, currentText);
|
|
1756
|
+
pendingTokens = [];
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
onFinishedGeneration() {
|
|
1760
|
+
this._clearDetectors();
|
|
1761
|
+
this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
|
|
1762
|
+
}
|
|
1763
|
+
resetContextWindow() {
|
|
1764
|
+
this._contextWindowSegments.length = 0;
|
|
1765
|
+
this._contextWindowStartTokenTrail.length = 0;
|
|
1766
|
+
pushAll(this._contextWindowStartTokenTrail, this._getTokenTrailFromResult());
|
|
1767
|
+
}
|
|
1768
|
+
openSegment(type) {
|
|
1769
|
+
const now = Date.now();
|
|
1770
|
+
this._segmentsStack.push(type);
|
|
1771
|
+
this._segmentsStackSet.add(type);
|
|
1772
|
+
this._segments.push({ type, tokens: [], ended: false, start: true, startTime: now });
|
|
1773
|
+
this._contextWindowSegments.push({ type, tokens: [], ended: false, start: true, startTime: now });
|
|
1774
|
+
this.onResponseChunk?.({
|
|
1775
|
+
type: "segment",
|
|
1776
|
+
segmentType: type,
|
|
1777
|
+
tokens: [],
|
|
1778
|
+
text: "",
|
|
1779
|
+
segmentStartTime: new Date(now)
|
|
1780
|
+
});
|
|
1781
|
+
}
|
|
1782
|
+
_processTokens(tokens, text) {
|
|
1783
|
+
const queuedTokenRelease = this._streamRegulator.addChunk({
|
|
1784
|
+
tokens,
|
|
1785
|
+
text
|
|
1786
|
+
});
|
|
1787
|
+
const currentType = this._segmentsStack.at(-1);
|
|
1788
|
+
const handleDetector = (stopDetector, action, type) => {
|
|
1789
|
+
if (stopDetector == null)
|
|
1790
|
+
return false;
|
|
1791
|
+
stopDetector.recordGeneration({
|
|
1792
|
+
text,
|
|
1793
|
+
tokens,
|
|
1794
|
+
queuedTokenRelease
|
|
1795
|
+
});
|
|
1796
|
+
if (stopDetector.hasTriggeredStops) {
|
|
1797
|
+
const [leftTokens, leftText] = this._handleTriggeredStopDetector(stopDetector);
|
|
1798
|
+
if (action === "pop")
|
|
1799
|
+
this._closeSegment(type);
|
|
1800
|
+
else if (action === "push") {
|
|
1801
|
+
this.openSegment(type);
|
|
1802
|
+
}
|
|
1803
|
+
else if (action === "reset") {
|
|
1804
|
+
const now = Date.now();
|
|
1805
|
+
while (this._segmentsStack.length > 0) {
|
|
1806
|
+
const segmentType = this._segmentsStack.pop();
|
|
1807
|
+
this._segmentsStackSet.delete(segmentType);
|
|
1808
|
+
const lastSegment = this._segments.at(-1);
|
|
1809
|
+
if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === segmentType) {
|
|
1810
|
+
lastSegment.ended = true;
|
|
1811
|
+
lastSegment.endTime = now;
|
|
1812
|
+
this.onResponseChunk?.({
|
|
1813
|
+
type: "segment",
|
|
1814
|
+
segmentType: segmentType,
|
|
1815
|
+
tokens: [],
|
|
1816
|
+
text: "",
|
|
1817
|
+
segmentStartTime: undefined,
|
|
1818
|
+
segmentEndTime: new Date(now)
|
|
1819
|
+
});
|
|
1820
|
+
}
|
|
1821
|
+
else {
|
|
1822
|
+
this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1823
|
+
this.onResponseChunk?.({
|
|
1824
|
+
type: "segment",
|
|
1825
|
+
segmentType: segmentType,
|
|
1826
|
+
tokens: [],
|
|
1827
|
+
text: "",
|
|
1828
|
+
segmentStartTime: undefined,
|
|
1829
|
+
segmentEndTime: new Date(now)
|
|
1830
|
+
});
|
|
1831
|
+
}
|
|
1832
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1833
|
+
if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
|
|
1834
|
+
lastContextWindowSegment.type === segmentType)
|
|
1835
|
+
lastContextWindowSegment.ended = true;
|
|
1836
|
+
else
|
|
1837
|
+
this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1838
|
+
}
|
|
1839
|
+
this._ownedSegmentsStackLength = 0;
|
|
1840
|
+
}
|
|
1841
|
+
if (leftTokens.length > 0)
|
|
1842
|
+
this._processTokens(leftTokens, leftText);
|
|
1843
|
+
return true;
|
|
1844
|
+
}
|
|
1845
|
+
return false;
|
|
1846
|
+
};
|
|
1847
|
+
if (currentType != null) {
|
|
1848
|
+
if (handleDetector(this._closeAllSegmentsDetector, "reset", currentType))
|
|
1849
|
+
return;
|
|
1850
|
+
if (handleDetector(this._segmentDetectors.get(currentType)?.suffix, "pop", currentType))
|
|
1851
|
+
return;
|
|
1852
|
+
}
|
|
1853
|
+
else
|
|
1854
|
+
this._closeAllSegmentsDetector?.clearInProgressStops();
|
|
1855
|
+
for (const [type, { prefix, suffix }] of this._segmentDetectors.entries()) {
|
|
1856
|
+
if (!this._segmentsStackSet.has(type)) {
|
|
1857
|
+
if (handleDetector(prefix, "push", type))
|
|
1858
|
+
return;
|
|
1859
|
+
}
|
|
1860
|
+
else
|
|
1861
|
+
prefix.clearInProgressStops();
|
|
1862
|
+
if (this._segmentsStackSet.has(type)) {
|
|
1863
|
+
// `currentType` suffix is already handled above
|
|
1864
|
+
if (type === currentType && handleDetector(suffix, "pop", type))
|
|
1865
|
+
return;
|
|
1866
|
+
}
|
|
1867
|
+
else
|
|
1868
|
+
suffix?.clearInProgressStops();
|
|
1869
|
+
}
|
|
1870
|
+
this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
|
|
1871
|
+
}
|
|
1872
|
+
_handleTriggeredStopDetector(stopDetector) {
|
|
1873
|
+
this._clearDetectors(stopDetector);
|
|
1874
|
+
stopDetector.clearInProgressStops();
|
|
1875
|
+
const triggeredStops = stopDetector.getTriggeredStops();
|
|
1876
|
+
const freeTokens = this._streamRegulator.popFreeChunkTokens();
|
|
1877
|
+
const partiallyFreeTokens = this._streamRegulator.getPartiallyFreeChunk(this.model.tokenizer);
|
|
1878
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.model.tokenizer);
|
|
1879
|
+
const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
|
|
1880
|
+
const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
|
|
1881
|
+
? firstRemainingGenerationAfterStop === ""
|
|
1882
|
+
? []
|
|
1883
|
+
: this.model.tokenize(firstRemainingGenerationAfterStop, false)
|
|
1884
|
+
: (firstRemainingGenerationAfterStop ?? []);
|
|
1885
|
+
const remainingText = typeof firstRemainingGenerationAfterStop === "string"
|
|
1886
|
+
? firstRemainingGenerationAfterStop
|
|
1887
|
+
: this.model.detokenize(remainingTokens, false, queuedTokensBeforeStopTrigger.length === 0
|
|
1888
|
+
? this._getTokenTrailFromResult()
|
|
1889
|
+
: queuedTokensBeforeStopTrigger);
|
|
1890
|
+
this._pushCurrentTokens([...freeTokens, ...queuedTokensBeforeStopTrigger]);
|
|
1891
|
+
stopDetector.clearTriggeredStops();
|
|
1892
|
+
this._streamRegulator.reset();
|
|
1893
|
+
return [remainingTokens, remainingText];
|
|
1894
|
+
}
|
|
1895
|
+
_closeSegment(type) {
|
|
1896
|
+
if (type == null)
|
|
1897
|
+
return;
|
|
1898
|
+
const lastSegment = this._segments.at(-1);
|
|
1899
|
+
const now = Date.now();
|
|
1900
|
+
if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === type && this._segmentsStack.at(-1) === type) {
|
|
1901
|
+
if (lastSegment.ended !== true) {
|
|
1902
|
+
lastSegment.ended = true;
|
|
1903
|
+
lastSegment.endTime = now;
|
|
1904
|
+
this.onResponseChunk?.({
|
|
1905
|
+
type: "segment",
|
|
1906
|
+
segmentType: type,
|
|
1907
|
+
tokens: [],
|
|
1908
|
+
text: "",
|
|
1909
|
+
segmentStartTime: undefined,
|
|
1910
|
+
segmentEndTime: new Date(now)
|
|
1911
|
+
});
|
|
1912
|
+
}
|
|
1913
|
+
this._segmentsStackSet.delete(this._segmentsStack.pop());
|
|
1914
|
+
if (this._segmentsStack.length < this._ownedSegmentsStackLength)
|
|
1915
|
+
this._ownedSegmentsStackLength = this._segmentsStack.length;
|
|
1916
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1917
|
+
if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
|
|
1918
|
+
lastContextWindowSegment.type === type && this._segmentsStack.at(-1) === type) {
|
|
1919
|
+
if (lastContextWindowSegment.ended !== true) {
|
|
1920
|
+
lastContextWindowSegment.ended = true;
|
|
1921
|
+
lastContextWindowSegment.endTime = now;
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
else
|
|
1925
|
+
this._contextWindowSegments.push({ type, tokens: [], ended: true, start: false, endTime: now });
|
|
1926
|
+
return;
|
|
1927
|
+
}
|
|
1928
|
+
const typeIndex = this._segmentsStack.lastIndexOf(type);
|
|
1929
|
+
if (typeIndex < 0)
|
|
1930
|
+
return;
|
|
1931
|
+
for (let i = this._segmentsStack.length - 1; i >= typeIndex; i--) {
|
|
1932
|
+
const segmentType = this._segmentsStack.pop();
|
|
1933
|
+
this._segmentsStackSet.delete(segmentType);
|
|
1934
|
+
if (this._segmentsStack.length < this._ownedSegmentsStackLength)
|
|
1935
|
+
this._ownedSegmentsStackLength = this._segmentsStack.length;
|
|
1936
|
+
this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1937
|
+
this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
|
|
1938
|
+
this.onResponseChunk?.({
|
|
1939
|
+
type: "segment",
|
|
1940
|
+
segmentType: segmentType,
|
|
1941
|
+
tokens: [],
|
|
1942
|
+
text: "",
|
|
1943
|
+
segmentStartTime: undefined,
|
|
1944
|
+
segmentEndTime: new Date(now)
|
|
1945
|
+
});
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
_clearDetectors(skipDetector) {
|
|
1949
|
+
if (this._closeAllSegmentsDetector !== skipDetector) {
|
|
1950
|
+
this._closeAllSegmentsDetector?.clearInProgressStops();
|
|
1951
|
+
this._closeAllSegmentsDetector?.clearTriggeredStops();
|
|
1952
|
+
}
|
|
1953
|
+
for (const { prefix, suffix } of this._segmentDetectors.values()) {
|
|
1954
|
+
if (prefix !== skipDetector) {
|
|
1955
|
+
prefix.clearInProgressStops();
|
|
1956
|
+
prefix.clearTriggeredStops();
|
|
1957
|
+
}
|
|
1958
|
+
if (suffix !== skipDetector) {
|
|
1959
|
+
suffix?.clearInProgressStops();
|
|
1960
|
+
suffix?.clearTriggeredStops();
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
_pushCurrentTokens(tokens) {
|
|
1965
|
+
const lastSegment = this._segments.at(-1);
|
|
1966
|
+
const lastContextWindowSegment = this._contextWindowSegments.at(-1);
|
|
1967
|
+
const type = this._segmentsStack.at(-1);
|
|
1968
|
+
if (type == null) {
|
|
1969
|
+
if (lastSegment == null) {
|
|
1970
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
1971
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
1972
|
+
: "";
|
|
1973
|
+
this._segments.push(tokens);
|
|
1974
|
+
this.onToken?.(tokens.slice());
|
|
1975
|
+
this.onTextChunk?.(text);
|
|
1976
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
|
|
1977
|
+
}
|
|
1978
|
+
else {
|
|
1979
|
+
if (lastSegment instanceof Array) {
|
|
1980
|
+
const text = (this.onResponseChunk != null || this.onTextChunk != null)
|
|
1981
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
1982
|
+
: "";
|
|
1983
|
+
pushAll(lastSegment, tokens);
|
|
1984
|
+
this.onToken?.(tokens);
|
|
1985
|
+
this.onTextChunk?.(text);
|
|
1986
|
+
this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
|
|
1987
|
+
}
|
|
1988
|
+
else
|
|
1989
|
+
this._segments.push(tokens);
|
|
1990
|
+
}
|
|
1991
|
+
if (lastContextWindowSegment == null)
|
|
1992
|
+
this._contextWindowSegments.push(tokens.slice());
|
|
1993
|
+
else {
|
|
1994
|
+
if (lastContextWindowSegment instanceof Array)
|
|
1995
|
+
pushAll(lastContextWindowSegment, tokens);
|
|
1996
|
+
else
|
|
1997
|
+
this._contextWindowSegments.push(tokens.slice());
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
else {
|
|
2001
|
+
const now = Date.now();
|
|
2002
|
+
if (lastSegment == null) {
|
|
2003
|
+
const text = this.onResponseChunk != null
|
|
2004
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2005
|
+
: "";
|
|
2006
|
+
this._segments.push({
|
|
2007
|
+
type,
|
|
2008
|
+
tokens,
|
|
2009
|
+
ended: false,
|
|
2010
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2011
|
+
startTime: now
|
|
2012
|
+
});
|
|
2013
|
+
this.onResponseChunk?.({
|
|
2014
|
+
type: "segment",
|
|
2015
|
+
segmentType: type,
|
|
2016
|
+
tokens: tokens.slice(),
|
|
2017
|
+
text,
|
|
2018
|
+
segmentStartTime: new Date(now)
|
|
2019
|
+
});
|
|
2020
|
+
}
|
|
2021
|
+
else {
|
|
2022
|
+
const text = this.onResponseChunk != null
|
|
2023
|
+
? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
|
|
2024
|
+
: "";
|
|
2025
|
+
if (lastSegment instanceof Array || lastSegment.type !== type) {
|
|
2026
|
+
this._segments.push({
|
|
2027
|
+
type,
|
|
2028
|
+
tokens,
|
|
2029
|
+
ended: false,
|
|
2030
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2031
|
+
startTime: now
|
|
2032
|
+
});
|
|
2033
|
+
this.onResponseChunk?.({
|
|
2034
|
+
type: "segment",
|
|
2035
|
+
segmentType: type,
|
|
2036
|
+
tokens: tokens.slice(),
|
|
2037
|
+
text,
|
|
2038
|
+
segmentStartTime: new Date(now)
|
|
2039
|
+
});
|
|
2040
|
+
}
|
|
2041
|
+
else {
|
|
2042
|
+
pushAll(lastSegment.tokens, tokens);
|
|
2043
|
+
this.onResponseChunk?.({
|
|
2044
|
+
type: "segment",
|
|
2045
|
+
segmentType: type,
|
|
2046
|
+
tokens: tokens.slice(),
|
|
2047
|
+
text,
|
|
2048
|
+
segmentStartTime: undefined
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
2051
|
+
}
|
|
2052
|
+
if (lastContextWindowSegment == null)
|
|
2053
|
+
this._contextWindowSegments.push({
|
|
2054
|
+
type,
|
|
2055
|
+
tokens: tokens.slice(),
|
|
2056
|
+
ended: false,
|
|
2057
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2058
|
+
startTime: now
|
|
2059
|
+
});
|
|
2060
|
+
else {
|
|
2061
|
+
if (lastContextWindowSegment instanceof Array || lastContextWindowSegment.type !== type)
|
|
2062
|
+
this._contextWindowSegments.push({
|
|
2063
|
+
type,
|
|
2064
|
+
tokens: tokens.slice(),
|
|
2065
|
+
ended: false,
|
|
2066
|
+
start: this._segmentsStack.length > this._ownedSegmentsStackLength,
|
|
2067
|
+
startTime: now
|
|
2068
|
+
});
|
|
2069
|
+
else
|
|
2070
|
+
pushAll(lastContextWindowSegment.tokens, tokens);
|
|
2071
|
+
}
|
|
2072
|
+
}
|
|
2073
|
+
}
|
|
2074
|
+
_getTokenTrailFromResult() {
|
|
2075
|
+
const res = [];
|
|
2076
|
+
for (let i = this._segments.length - 1; i >= 0; i--) {
|
|
2077
|
+
const segment = this._segments[i];
|
|
2078
|
+
const segmentTokens = segment instanceof Array
|
|
2079
|
+
? segment
|
|
2080
|
+
: segment.tokens;
|
|
2081
|
+
for (let j = segmentTokens.length - 1; j >= 0; j--) {
|
|
2082
|
+
res.unshift(segmentTokens[j]);
|
|
2083
|
+
if (res.length >= maxRecentDetokenizerTokens)
|
|
2084
|
+
return res;
|
|
2085
|
+
}
|
|
2086
|
+
}
|
|
2087
|
+
for (let i = this._initialTokensTrail.length - 1; i >= 0; i--) {
|
|
2088
|
+
res.unshift(this._initialTokensTrail[i]);
|
|
2089
|
+
if (res.length >= maxRecentDetokenizerTokens)
|
|
2090
|
+
return res;
|
|
2091
|
+
}
|
|
2092
|
+
return res;
|
|
2093
|
+
}
|
|
2094
|
+
getModelResponseSegments(trimWhitespaceSuffix = false) {
|
|
2095
|
+
return this._getModelResponseForSegments(this._segments, this._segmentsStartTokenTrail, trimWhitespaceSuffix);
|
|
2096
|
+
}
|
|
2097
|
+
getContextWindowModelResponseSegments(trimWhitespaceSuffix = false) {
|
|
2098
|
+
return this._getModelResponseForSegments(this._contextWindowSegments, this._contextWindowStartTokenTrail, trimWhitespaceSuffix);
|
|
2099
|
+
}
|
|
2100
|
+
_getModelResponseForSegments(rawSegments, recentTokens, trimWhitespaceSuffix) {
|
|
2101
|
+
let tokenTrail = resolveLastTokens([recentTokens]);
|
|
2102
|
+
return rawSegments.map((rawSegment, index) => {
|
|
2103
|
+
const isLast = index === rawSegments.length - 1;
|
|
2104
|
+
if (rawSegment instanceof Array) {
|
|
2105
|
+
let text = this.model.detokenize(rawSegment, false, tokenTrail);
|
|
2106
|
+
if (isLast && trimWhitespaceSuffix)
|
|
2107
|
+
text = text.trimEnd();
|
|
2108
|
+
tokenTrail = resolveLastTokens([tokenTrail, rawSegment]);
|
|
2109
|
+
return text;
|
|
2110
|
+
}
|
|
2111
|
+
let text = this.model.detokenize(rawSegment.tokens, false, tokenTrail);
|
|
2112
|
+
if (isLast && rawSegment.ended && trimWhitespaceSuffix)
|
|
2113
|
+
text = text.trimEnd();
|
|
2114
|
+
tokenTrail = resolveLastTokens([tokenTrail, rawSegment.tokens]);
|
|
2115
|
+
const segmentDefinition = this._segmentDefinitions.get(rawSegment.type);
|
|
2116
|
+
return {
|
|
2117
|
+
type: "segment",
|
|
2118
|
+
segmentType: rawSegment.type,
|
|
2119
|
+
text,
|
|
2120
|
+
ended: rawSegment.ended,
|
|
2121
|
+
raw: segmentDefinition == null
|
|
2122
|
+
? LlamaText([text]).toJSON()
|
|
2123
|
+
: LlamaText([
|
|
2124
|
+
rawSegment.start
|
|
2125
|
+
? segmentDefinition.prefix
|
|
2126
|
+
: "",
|
|
2127
|
+
text,
|
|
2128
|
+
rawSegment.ended
|
|
2129
|
+
? (segmentDefinition.suffix ?? "")
|
|
2130
|
+
: ""
|
|
2131
|
+
]).toJSON(),
|
|
2132
|
+
startTime: rawSegment.startTime != null
|
|
2133
|
+
? new Date(rawSegment.startTime).toISOString()
|
|
2134
|
+
: undefined,
|
|
2135
|
+
endTime: rawSegment.endTime != null
|
|
2136
|
+
? new Date(rawSegment.endTime).toISOString()
|
|
2137
|
+
: undefined
|
|
2138
|
+
};
|
|
2139
|
+
});
|
|
2140
|
+
}
|
|
2141
|
+
static getStackFromModelResponse(modelResponse) {
|
|
2142
|
+
const stack = [];
|
|
2143
|
+
const stackSet = new Set();
|
|
2144
|
+
for (const item of modelResponse) {
|
|
2145
|
+
if (typeof item === "string" || isChatModelResponseFunctionCall(item))
|
|
2146
|
+
continue;
|
|
2147
|
+
void item.type;
|
|
2148
|
+
if (item.ended && stack.at(-1) === item.segmentType) {
|
|
2149
|
+
stack.pop();
|
|
2150
|
+
stackSet.delete(item.segmentType);
|
|
2151
|
+
}
|
|
2152
|
+
else if (!item.ended && !stackSet.has(item.segmentType)) {
|
|
2153
|
+
stack.push(item.segmentType);
|
|
2154
|
+
stackSet.add(item.segmentType);
|
|
2155
|
+
}
|
|
2156
|
+
}
|
|
2157
|
+
return stack;
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
1584
2160
|
//# sourceMappingURL=LlamaChat.js.map
|