node-llama-cpp 3.5.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +2 -2
  2. package/dist/ChatWrapper.d.ts +3 -5
  3. package/dist/ChatWrapper.js +57 -5
  4. package/dist/ChatWrapper.js.map +1 -1
  5. package/dist/bindings/AddonTypes.d.ts +1 -1
  6. package/dist/bindings/Llama.js +2 -0
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/utils/compileLLamaCpp.js +2 -0
  9. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  10. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  11. package/dist/chatWrappers/DeepSeekChatWrapper.d.ts +37 -0
  12. package/dist/chatWrappers/DeepSeekChatWrapper.js +294 -0
  13. package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -0
  14. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  15. package/dist/chatWrappers/FunctionaryChatWrapper.js +40 -14
  16. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  17. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  18. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
  19. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +0 -3
  20. package/dist/chatWrappers/Llama3_1ChatWrapper.js +24 -13
  21. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
  22. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +22 -11
  23. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -1
  24. package/dist/chatWrappers/MistralChatWrapper.d.ts +2 -1
  25. package/dist/chatWrappers/MistralChatWrapper.js +39 -28
  26. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -1
  27. package/dist/chatWrappers/QwenChatWrapper.d.ts +21 -0
  28. package/dist/chatWrappers/QwenChatWrapper.js +162 -0
  29. package/dist/chatWrappers/QwenChatWrapper.js.map +1 -0
  30. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +41 -3
  31. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +343 -126
  32. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  33. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +17 -1
  34. package/dist/chatWrappers/generic/TemplateChatWrapper.js +10 -2
  35. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
  36. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.d.ts +7 -0
  37. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js +30 -0
  38. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js.map +1 -0
  39. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +5 -4
  40. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +19 -0
  41. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +446 -0
  42. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -0
  43. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.d.ts +2 -0
  44. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +38 -0
  45. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -0
  46. package/dist/chatWrappers/generic/utils/getFirstValidResult.d.ts +6 -0
  47. package/dist/chatWrappers/generic/utils/getFirstValidResult.js +19 -0
  48. package/dist/chatWrappers/generic/utils/getFirstValidResult.js.map +1 -0
  49. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.d.ts +2 -0
  50. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js +35 -0
  51. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js.map +1 -0
  52. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.d.ts +22 -0
  53. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js +28 -0
  54. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js.map +1 -0
  55. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +3 -0
  56. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +25 -0
  57. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
  58. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +197 -30
  59. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  60. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +48 -3
  61. package/dist/chatWrappers/utils/resolveChatWrapper.js +15 -5
  62. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  63. package/dist/cli/commands/ChatCommand.js +38 -7
  64. package/dist/cli/commands/ChatCommand.js.map +1 -1
  65. package/dist/cli/recommendedModels.js +93 -10
  66. package/dist/cli/recommendedModels.js.map +1 -1
  67. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
  68. package/dist/config.d.ts +1 -1
  69. package/dist/config.js +1 -1
  70. package/dist/config.js.map +1 -1
  71. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +87 -5
  72. package/dist/evaluator/LlamaChat/LlamaChat.js +781 -196
  73. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  74. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +55 -1
  75. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  76. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -7
  77. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +28 -8
  78. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  79. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +1 -1
  80. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  81. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +1 -1
  82. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  83. package/dist/evaluator/LlamaCompletion.js +61 -48
  84. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  85. package/dist/evaluator/LlamaGrammar.d.ts +2 -2
  86. package/dist/evaluator/LlamaGrammar.js +5 -3
  87. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  88. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +3 -1
  89. package/dist/evaluator/LlamaModel/LlamaModel.js +4 -1
  90. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  91. package/dist/evaluator/LlamaRankingContext.js +1 -1
  92. package/dist/evaluator/LlamaRankingContext.js.map +1 -1
  93. package/dist/gguf/types/GgufMetadataTypes.d.ts +1 -1
  94. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  95. package/dist/index.d.ts +8 -5
  96. package/dist/index.js +4 -2
  97. package/dist/index.js.map +1 -1
  98. package/dist/tsconfig.tsbuildinfo +1 -1
  99. package/dist/types.d.ts +40 -2
  100. package/dist/types.js +7 -1
  101. package/dist/types.js.map +1 -1
  102. package/dist/utils/LlamaText.js +8 -9
  103. package/dist/utils/LlamaText.js.map +1 -1
  104. package/dist/utils/OpenAIFormat.d.ts +177 -0
  105. package/dist/utils/OpenAIFormat.js +488 -0
  106. package/dist/utils/OpenAIFormat.js.map +1 -0
  107. package/dist/utils/TokenStreamRegulator.d.ts +2 -0
  108. package/dist/utils/TokenStreamRegulator.js +12 -0
  109. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  110. package/dist/utils/getChatWrapperSegmentDefinition.d.ts +2 -0
  111. package/dist/utils/getChatWrapperSegmentDefinition.js +7 -0
  112. package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -0
  113. package/dist/utils/optionsMatrix.d.ts +58 -0
  114. package/dist/utils/optionsMatrix.js +97 -0
  115. package/dist/utils/optionsMatrix.js.map +1 -0
  116. package/dist/utils/parseModelUri.js +1 -1
  117. package/dist/utils/parseModelUri.js.map +1 -1
  118. package/dist/utils/resolveModelFile.js +2 -0
  119. package/dist/utils/resolveModelFile.js.map +1 -1
  120. package/llama/addon/AddonContext.cpp +11 -9
  121. package/llama/binariesGithubRelease.json +1 -1
  122. package/llama/gitRelease.bundle +0 -0
  123. package/llama/grammars/README.md +4 -4
  124. package/llama/llama.cpp.info.json +2 -2
  125. package/package.json +48 -45
  126. package/templates/packed/electron-typescript-react.json +1 -1
  127. package/templates/packed/node-typescript.json +1 -1
@@ -1,4 +1,5 @@
1
1
  import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
+ import { isChatModelResponseFunctionCall, isChatModelResponseSegment, allSegmentTypes } from "../../types.js";
2
3
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
4
  import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
4
5
  import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
@@ -11,6 +12,8 @@ import { safeEventCallback } from "../../utils/safeEventCallback.js";
11
12
  import { pushAll } from "../../utils/pushAll.js";
12
13
  import { resolveLastTokens } from "../../utils/resolveLastTokens.js";
13
14
  import { LlamaSampler } from "../LlamaContext/LlamaSampler.js";
15
+ import { getChatWrapperSegmentDefinition } from "../../utils/getChatWrapperSegmentDefinition.js";
16
+ import { jsonDumps } from "../../chatWrappers/utils/jsonDumps.js";
14
17
  import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
15
18
  import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
16
19
  import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
@@ -76,11 +79,12 @@ export class LlamaChat {
76
79
  return this.sequence.model;
77
80
  }
78
81
  async generateResponse(history, options = {}) {
79
- const { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
82
+ const { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
80
83
  this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
81
84
  const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
82
85
  onTextChunk,
83
86
  onToken,
87
+ onResponseChunk,
84
88
  signal,
85
89
  stopOnAbortSignal,
86
90
  maxTokens,
@@ -110,6 +114,7 @@ export class LlamaChat {
110
114
  return await withLock(this._chatLock, "evaluate", signal, async () => {
111
115
  try {
112
116
  generateResponseState.ensureLastHistoryItemIsModel();
117
+ generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
113
118
  const loadContextWindow = async (avoidReloadingHistory = false) => {
114
119
  await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
115
120
  };
@@ -134,23 +139,25 @@ export class LlamaChat {
134
139
  await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
135
140
  await generateResponseState.createNewEvaluationIterator();
136
141
  while (await generateResponseState.iterateEvaluation()) {
137
- generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
138
- generateResponseState.detectAndHandleFunctionStartSyntax();
139
- if (generateResponseState.functionEvaluationMode !== false) {
140
- generateResponseState.canAvoidReloadingHistory = false;
141
- generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
142
- const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
143
- if (functionsCallsRes != null)
144
- return functionsCallsRes;
142
+ if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
143
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
144
+ generateResponseState.detectAndHandleFunctionStartSyntax();
145
+ if (generateResponseState.functionEvaluationMode !== false) {
146
+ generateResponseState.canAvoidReloadingHistory = false;
147
+ generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
148
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
149
+ if (functionsCallsRes != null)
150
+ return functionsCallsRes;
151
+ }
152
+ generateResponseState.recordStopGenerationEvaluation();
153
+ generateResponseState.popStreamRegulatorFreeTokens();
154
+ generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
155
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
156
+ if (stopGenerationTriggerRes != null)
157
+ return stopGenerationTriggerRes;
158
+ generateResponseState.spliceIgnoreStartTextDetectedTokens();
159
+ generateResponseState.moveFreePendingTokensToRes();
145
160
  }
146
- generateResponseState.recordStopGenerationEvaluation();
147
- generateResponseState.popStreamRegulatorFreeTokens();
148
- generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
149
- const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
150
- if (stopGenerationTriggerRes != null)
151
- return stopGenerationTriggerRes;
152
- generateResponseState.spliceIgnoreStartTextDetectedTokens();
153
- generateResponseState.moveFreePendingTokensToRes();
154
161
  const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
155
162
  if (maxTokensTriggerRes != null)
156
163
  return maxTokensTriggerRes;
@@ -174,16 +181,20 @@ export class LlamaChat {
174
181
  }
175
182
  async loadChatAndCompleteUserMessage(history, options = {}) {
176
183
  const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
177
- const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
178
- ? null
179
- : lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
180
- const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
181
- ? lastEvaluationContextWindowHistoryItem.text
182
- : "";
183
184
  this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize((findLastModelMessageInChatHistory(history)?.response ?? [])
184
- .filter((item) => typeof item === "string")
185
+ .map((item) => {
186
+ if (typeof item === "string")
187
+ return item;
188
+ else if (isChatModelResponseFunctionCall(item))
189
+ return null;
190
+ else if (isChatModelResponseSegment(item))
191
+ return item.text;
192
+ void item;
193
+ return null;
194
+ })
195
+ .filter((item) => item != null)
185
196
  .join(" ")));
186
- const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
197
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, mergeGeneratedResultWithChatHistory("user", history, [initialUserPrompt]), {
187
198
  onTextChunk,
188
199
  onToken,
189
200
  signal,
@@ -204,27 +215,16 @@ export class LlamaChat {
204
215
  contextShift,
205
216
  customStopTriggers,
206
217
  lastEvaluationContextWindow: {
207
- history: lastEvaluationContextWindowHistory == null
208
- ? undefined
209
- : setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
218
+ history: mergeGeneratedResultWithChatHistory("user", lastEvaluationContextWindowHistory ?? history, [initialUserPrompt]),
210
219
  minimumOverlapPercentageToPreventContextShift
211
220
  }
212
221
  });
213
222
  return await withLock(this._chatLock, "evaluate", signal, async () => {
214
223
  try {
215
224
  generateResponseState.ensureLastHistoryItemIsUser();
216
- const getInitialUserMessage = (history) => {
217
- const lastResolvedHistoryItem = history[history.length - 1];
218
- if (lastResolvedHistoryItem?.type === "user")
219
- return lastResolvedHistoryItem.text;
220
- return "";
221
- };
222
- const initialUserMessage = getInitialUserMessage(generateResponseState.resolvedHistory);
223
- const contextWindowInitialUserMessage = getInitialUserMessage(generateResponseState.lastContextWindowHistory);
224
225
  while (true) {
225
226
  generateResponseState.startTokenLoop();
226
- const { userTextSuffix } = await generateResponseState.loadContextWindow(setLastUserTextInChatHistory(generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)), setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, contextWindowInitialUserMessage + initialUserPrompt +
227
- this.model.detokenize(generateResponseState.contextWindowsRes)), true);
227
+ const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
228
228
  generateResponseState.functionEvaluationMode = false;
229
229
  generateResponseState.addStopGenerationTriggersFromChatWrapper();
230
230
  if (userTextSuffix != null && userTextSuffix.values.length > 0)
@@ -235,7 +235,7 @@ export class LlamaChat {
235
235
  return {
236
236
  completion: "",
237
237
  lastEvaluation: {
238
- contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
238
+ contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
239
239
  contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
240
240
  },
241
241
  metadata: {
@@ -245,28 +245,30 @@ export class LlamaChat {
245
245
  }
246
246
  await generateResponseState.createNewEvaluationIterator();
247
247
  while (await generateResponseState.iterateEvaluation()) {
248
- generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
249
- generateResponseState.recordStopGenerationEvaluation();
250
- generateResponseState.popStreamRegulatorFreeTokens();
251
- const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
252
- if (stopGenerationTriggerRes != null)
253
- return {
254
- completion: stopGenerationTriggerRes.response,
255
- lastEvaluation: {
256
- contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
257
- contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
258
- },
259
- metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
260
- ? stopGenerationTriggerRes.metadata
261
- : stopGenerationTriggerRes.metadata
262
- };
263
- generateResponseState.moveFreePendingTokensToRes(false);
248
+ if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
249
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
250
+ generateResponseState.recordStopGenerationEvaluation();
251
+ generateResponseState.popStreamRegulatorFreeTokens();
252
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
253
+ if (stopGenerationTriggerRes != null)
254
+ return {
255
+ completion: stopGenerationTriggerRes.response,
256
+ lastEvaluation: {
257
+ contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
258
+ contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
259
+ },
260
+ metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
261
+ ? stopGenerationTriggerRes.metadata
262
+ : stopGenerationTriggerRes.metadata
263
+ };
264
+ generateResponseState.moveFreePendingTokensToRes(false);
265
+ }
264
266
  const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
265
267
  if (maxTokensTriggerRes != null)
266
268
  return {
267
269
  completion: maxTokensTriggerRes.response,
268
270
  lastEvaluation: {
269
- contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
271
+ contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
270
272
  contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
271
273
  },
272
274
  metadata: maxTokensTriggerRes.metadata
@@ -278,7 +280,7 @@ export class LlamaChat {
278
280
  return {
279
281
  completion: abortRes.response,
280
282
  lastEvaluation: {
281
- contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
283
+ contextWindow: mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()),
282
284
  contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
283
285
  },
284
286
  metadata: abortRes.metadata
@@ -303,11 +305,18 @@ function removeRawFromHistoryItem(historyItem) {
303
305
  newHistoryItem.response = newHistoryItem.response.map((item) => {
304
306
  if (typeof item === "string")
305
307
  return item;
306
- else
308
+ else if (isChatModelResponseFunctionCall(item))
307
309
  return {
308
310
  ...item,
309
311
  rawCall: undefined
310
312
  };
313
+ else if (isChatModelResponseSegment(item))
314
+ return {
315
+ ...item,
316
+ raw: undefined
317
+ };
318
+ void item;
319
+ return item;
311
320
  });
312
321
  return newHistoryItem;
313
322
  }
@@ -370,42 +379,17 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
370
379
  metadata
371
380
  };
372
381
  }
373
- function getLastTextModelResponseFromChatHistory(chatHistory) {
374
- if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
375
- return "";
376
- const lastModelResponseItem = chatHistory[chatHistory.length - 1];
377
- const modelResponse = lastModelResponseItem.response;
378
- if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
379
- return modelResponse[modelResponse.length - 1];
380
- return "";
382
+ function getLastModelMessageFullResponseFromChatHistory(chatHistory) {
383
+ const lastModelResponseItem = chatHistory.at(-1);
384
+ if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
385
+ return [];
386
+ return lastModelResponseItem.response;
381
387
  }
382
388
  function getLastUserTextFromChatHistory(chatHistory) {
383
389
  if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
384
390
  return "";
385
391
  return chatHistory[chatHistory.length - 1].text;
386
392
  }
387
- function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
388
- const newChatHistory = chatHistory.slice();
389
- if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
390
- newChatHistory.push({
391
- type: "model",
392
- response: []
393
- });
394
- const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
395
- const newLastModelResponseItem = { ...lastModelResponseItem };
396
- newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
397
- const modelResponse = newLastModelResponseItem.response.slice();
398
- newLastModelResponseItem.response = modelResponse;
399
- if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
400
- if (textResponse === "")
401
- modelResponse.pop();
402
- else
403
- modelResponse[modelResponse.length - 1] = textResponse;
404
- }
405
- else if (textResponse !== "")
406
- modelResponse.push(textResponse);
407
- return newChatHistory;
408
- }
409
393
  function setLastUserTextInChatHistory(chatHistory, userText) {
410
394
  const newChatHistory = chatHistory.slice();
411
395
  if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
@@ -419,11 +403,73 @@ function setLastUserTextInChatHistory(chatHistory, userText) {
419
403
  newLastUserItem.text = userText;
420
404
  return newChatHistory;
421
405
  }
422
- function setLastTextInChatHistory(itemType, chatHistory, text) {
423
- if (itemType === "user")
424
- return setLastUserTextInChatHistory(chatHistory, text);
425
- else
426
- return setLastModelTextResponseInChatHistory(chatHistory, text);
406
+ function mergeGeneratedResultWithChatHistory(itemType, chatHistory, generatedResult) {
407
+ if (generatedResult.length === 0 || (generatedResult.length === 1 && generatedResult[0] === ""))
408
+ return chatHistory;
409
+ const newChatHistory = chatHistory.slice();
410
+ if (itemType === "user") {
411
+ let lastUserItem = newChatHistory.at(-1);
412
+ if (lastUserItem?.type !== "user") {
413
+ lastUserItem = {
414
+ type: "user",
415
+ text: ""
416
+ };
417
+ newChatHistory.push(lastUserItem);
418
+ }
419
+ const newLastUserItem = { ...lastUserItem };
420
+ newChatHistory[newChatHistory.length - 1] = newLastUserItem;
421
+ newLastUserItem.text += generatedResult
422
+ .map((item) => {
423
+ if (typeof item === "string")
424
+ return item;
425
+ return item.text;
426
+ })
427
+ .join("");
428
+ return newChatHistory;
429
+ }
430
+ else {
431
+ let lastModelItem = newChatHistory.at(-1);
432
+ if (lastModelItem?.type !== "model") {
433
+ lastModelItem = {
434
+ type: "model",
435
+ response: []
436
+ };
437
+ newChatHistory.push(lastModelItem);
438
+ }
439
+ const newLastModelItem = { ...lastModelItem };
440
+ newChatHistory[newChatHistory.length - 1] = newLastModelItem;
441
+ const modelResponse = newLastModelItem.response.slice();
442
+ newLastModelItem.response = modelResponse;
443
+ const firstGeneratedResultItem = generatedResult[0];
444
+ if (firstGeneratedResultItem == null)
445
+ return newChatHistory;
446
+ const lastModelResponseItem = modelResponse.at(-1);
447
+ if (typeof firstGeneratedResultItem === "string" && typeof lastModelResponseItem === "string") {
448
+ modelResponse[modelResponse.length - 1] = lastModelResponseItem + firstGeneratedResultItem;
449
+ }
450
+ else if (typeof firstGeneratedResultItem !== "string" && isChatModelResponseSegment(firstGeneratedResultItem) &&
451
+ typeof lastModelResponseItem !== "string" && isChatModelResponseSegment(lastModelResponseItem) &&
452
+ !lastModelResponseItem.ended && lastModelResponseItem.segmentType === firstGeneratedResultItem.segmentType) {
453
+ modelResponse[modelResponse.length - 1] = {
454
+ ...lastModelResponseItem,
455
+ ...firstGeneratedResultItem,
456
+ text: lastModelResponseItem.text + firstGeneratedResultItem.text,
457
+ ended: firstGeneratedResultItem.ended,
458
+ raw: (lastModelResponseItem.raw != null && firstGeneratedResultItem.raw != null)
459
+ ? LlamaText([
460
+ LlamaText.fromJSON(lastModelResponseItem.raw),
461
+ LlamaText.fromJSON(firstGeneratedResultItem.raw)
462
+ ]).toJSON()
463
+ : undefined,
464
+ startTime: lastModelResponseItem.startTime,
465
+ endTime: firstGeneratedResultItem.endTime
466
+ };
467
+ }
468
+ else
469
+ modelResponse.push(firstGeneratedResultItem);
470
+ pushAll(modelResponse, generatedResult.slice(1));
471
+ return newChatHistory;
472
+ }
427
473
  }
428
474
  function findLastUserMessageInChatHistory(chatHistory) {
429
475
  for (let i = chatHistory.length - 1; i >= 0; i--) {
@@ -486,6 +532,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
486
532
  throw new DisposedError();
487
533
  const model = sequence.model;
488
534
  const context = sequence.context;
535
+ let removeRawFromHistory = false;
489
536
  if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
490
537
  const newContextWindow = lastEvaluationContextWindowHistory.slice();
491
538
  if (endWithUserText) {
@@ -514,7 +561,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
514
561
  history: newContextWindow,
515
562
  stopGenerationTriggers,
516
563
  tokens,
517
- newResolvedHistory: resolvedHistory,
564
+ removeRawFromHistory,
518
565
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
519
566
  ignoreStartText: ignoreStartText ?? [],
520
567
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
@@ -523,9 +570,10 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
523
570
  };
524
571
  }
525
572
  }
526
- resolvedHistory = sequence.isLoadedToMemory
527
- ? resolvedHistory.slice()
528
- : resolvedHistory.map(removeRawFromHistoryItem);
573
+ removeRawFromHistory = !sequence.isLoadedToMemory;
574
+ resolvedHistory = removeRawFromHistory
575
+ ? resolvedHistory.map(removeRawFromHistoryItem)
576
+ : resolvedHistory.slice();
529
577
  if (resolvedContextShift.lastEvaluationMetadata != null) {
530
578
  const contextShiftSize = resolvedContextShift.size instanceof Function
531
579
  ? await resolvedContextShift.size(sequence)
@@ -550,7 +598,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
550
598
  history: compressedHistory,
551
599
  stopGenerationTriggers,
552
600
  tokens: contextText.tokenize(model.tokenizer),
553
- newResolvedHistory: resolvedHistory,
601
+ removeRawFromHistory,
554
602
  newHistoryCompressionMetadata: metadata,
555
603
  ignoreStartText: ignoreStartText ?? [],
556
604
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
@@ -570,7 +618,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
570
618
  history: resolvedHistory,
571
619
  stopGenerationTriggers,
572
620
  tokens,
573
- newResolvedHistory: resolvedHistory,
621
+ removeRawFromHistory,
574
622
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
575
623
  ignoreStartText: ignoreStartText ?? [],
576
624
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
@@ -601,7 +649,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
601
649
  history: compressedHistory,
602
650
  stopGenerationTriggers,
603
651
  tokens: contextText.tokenize(model.tokenizer),
604
- newResolvedHistory: resolvedHistory,
652
+ removeRawFromHistory,
605
653
  newHistoryCompressionMetadata: metadata,
606
654
  ignoreStartText: ignoreStartText ?? [],
607
655
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
@@ -615,6 +663,7 @@ class GenerateResponseState {
615
663
  history;
616
664
  onTextChunk;
617
665
  onToken;
666
+ onResponseChunk;
618
667
  signal;
619
668
  stopOnAbortSignal;
620
669
  maxTokens;
@@ -638,7 +687,6 @@ class GenerateResponseState {
638
687
  repeatPenaltyEnabled;
639
688
  resolvedContextShift;
640
689
  resolvedRepeatPenalty;
641
- lastModelResponse;
642
690
  grammarEvaluationState;
643
691
  functionNameGrammar;
644
692
  functionsGrammar;
@@ -651,10 +699,13 @@ class GenerateResponseState {
651
699
  ignoreStartTextDetector = new StopGenerationDetector();
652
700
  locksToReleaseOnValidGeneration = [];
653
701
  resolvedHistory;
702
+ noRawInResolvedHistory;
654
703
  res = [];
655
704
  pendingTokens = [];
656
705
  ignoredStartTextTokens = [];
657
706
  resFunctionCalls = [];
707
+ segmentHandler;
708
+ pendingPartialTokens = [];
658
709
  functionEvaluationMode = false;
659
710
  currentFunctionCallPreviousText = LlamaText([]);
660
711
  currentFunctionCallCurrentPartTokens = [];
@@ -678,8 +729,6 @@ class GenerateResponseState {
678
729
  disengageInitiallyEngagedFunctionCall = [];
679
730
  userTextSuffix = undefined;
680
731
  tokens = [];
681
- contextWindowLastModelResponse = "";
682
- contextWindowsRes = [];
683
732
  // token evaluation loop
684
733
  evaluationIterator;
685
734
  currentIteration;
@@ -688,12 +737,13 @@ class GenerateResponseState {
688
737
  currentTokens = [];
689
738
  currentText = "";
690
739
  currentQueuedTokenRelease;
691
- constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
740
+ constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
692
741
  this.llamaChat = llamaChat;
693
742
  this.chatWrapper = chatWrapper;
694
743
  this.history = history;
695
744
  this.onTextChunk = safeEventCallback(onTextChunk);
696
745
  this.onToken = safeEventCallback(onToken);
746
+ this.onResponseChunk = safeEventCallback(onResponseChunk);
697
747
  this.signal = signal;
698
748
  this.stopOnAbortSignal = stopOnAbortSignal;
699
749
  this.maxTokens = maxTokens;
@@ -718,9 +768,10 @@ class GenerateResponseState {
718
768
  throw this.signal.reason;
719
769
  if (this.llamaChat.disposed)
720
770
  throw new DisposedError();
721
- this.resolvedHistory = this.llamaChat.sequence.isLoadedToMemory
722
- ? this.history.slice()
723
- : this.history.map(removeRawFromHistoryItem);
771
+ this.noRawInResolvedHistory = !this.llamaChat.sequence.isLoadedToMemory;
772
+ this.resolvedHistory = this.noRawInResolvedHistory
773
+ ? this.history.map(removeRawFromHistoryItem)
774
+ : this.history.slice();
724
775
  this.resolvedContextShift = {
725
776
  ...defaultContextShiftOptions,
726
777
  ...removeNullFields(this.contextShift)
@@ -731,7 +782,6 @@ class GenerateResponseState {
731
782
  ...(repeatPenalty ?? {}),
732
783
  lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
733
784
  };
734
- this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
735
785
  this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
736
786
  this.grammarEvaluationState = this.grammar != null
737
787
  ? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
@@ -742,7 +792,7 @@ class GenerateResponseState {
742
792
  this.functionsGrammar = undefined;
743
793
  this.functionsEvaluationState = undefined;
744
794
  this.lastContextWindowHistory = lastEvaluationContextWindowHistory ?? this.resolvedHistory;
745
- this.lastHistoryCompressionMetadata = this.resolvedContextShift;
795
+ this.lastHistoryCompressionMetadata = this.resolvedContextShift.lastEvaluationMetadata;
746
796
  if (this.customStopTriggers != null)
747
797
  StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
748
798
  .map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
@@ -754,6 +804,22 @@ class GenerateResponseState {
754
804
  this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
755
805
  this.chatWrapper.settings.functions.call.prefix
756
806
  ]), this.llamaChat.model.tokenizer));
807
+ const segmentDefinitions = new Map();
808
+ for (const segmentType of allSegmentTypes) {
809
+ const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
810
+ if (segmentDefinition != null)
811
+ segmentDefinitions.set(segmentType, segmentDefinition);
812
+ }
813
+ this.segmentHandler = new SegmentHandler({
814
+ model: this.llamaChat.model,
815
+ onTextChunk: this.onTextChunk,
816
+ onToken: this.onToken,
817
+ onResponseChunk: this.onResponseChunk,
818
+ previousTokens: this.getLastTokens(),
819
+ closeAllSegments: this.chatWrapper.settings.segments?.closeAllSegments,
820
+ segmentDefinitions,
821
+ initialSegmentStack: SegmentHandler.getStackFromModelResponse(getLastModelMessageFullResponseFromChatHistory(this.resolvedHistory))
822
+ });
757
823
  this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
758
824
  }
759
825
  async dispose() {
@@ -763,19 +829,47 @@ class GenerateResponseState {
763
829
  await this.dispose();
764
830
  }
765
831
  ensureLastHistoryItemIsModel() {
766
- if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "model")
832
+ if (this.resolvedHistory.at(-1)?.type !== "model")
767
833
  this.resolvedHistory.push({
768
834
  type: "model",
769
835
  response: []
770
836
  });
771
837
  }
772
838
  ensureLastHistoryItemIsUser() {
773
- if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "user")
839
+ if (this.resolvedHistory.at(-1)?.type !== "user")
774
840
  this.resolvedHistory.push({
775
841
  type: "user",
776
842
  text: ""
777
843
  });
778
844
  }
845
+ ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded() {
846
+ if (this.chatWrapper.settings.segments?.thought?.reopenAfterFunctionCalls !== true)
847
+ return;
848
+ const lastModelResponseItem = this.resolvedHistory.at(-1);
849
+ if (lastModelResponseItem == null || lastModelResponseItem.type !== "model")
850
+ return;
851
+ const lastResponse = lastModelResponseItem.response.at(-1);
852
+ if (lastResponse == null)
853
+ return;
854
+ const lastResponseIsFunctionCall = typeof lastResponse !== "string" && lastResponse.type === "functionCall";
855
+ if (!lastResponseIsFunctionCall)
856
+ return;
857
+ const currentResponseSegmentsStack = SegmentHandler.getStackFromModelResponse(lastModelResponseItem.response);
858
+ if (currentResponseSegmentsStack.includes("thought"))
859
+ return;
860
+ const hadThoughtSegments = this.resolvedHistory.some((chatItem) => {
861
+ if (chatItem.type !== "model")
862
+ return false;
863
+ return chatItem.response.some((responseItem) => {
864
+ if (typeof responseItem === "string")
865
+ return false;
866
+ return responseItem.type === "segment" && responseItem.segmentType === "thought";
867
+ });
868
+ });
869
+ if (!hadThoughtSegments)
870
+ return;
871
+ this.segmentHandler.openSegment("thought");
872
+ }
779
873
  ensureNotAborted() {
780
874
  if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
781
875
  throw this.signal.reason;
@@ -784,7 +878,7 @@ class GenerateResponseState {
784
878
  }
785
879
  getPenaltyTokens() {
786
880
  if (this.llamaChat.disposed)
787
- throw new DisposedError();
881
+ return [];
788
882
  let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
789
883
  if (this.resolvedRepeatPenalty.punishTokensFilter != null)
790
884
  punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
@@ -796,24 +890,10 @@ class GenerateResponseState {
796
890
  return punishTokens;
797
891
  }
798
892
  getResolvedHistoryWithCurrentModelResponse() {
799
- if (this.res.length === 0)
800
- return this.resolvedHistory;
801
- let modelResponse = this.llamaChat.model.detokenize(this.res);
802
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
803
- modelResponse = modelResponse.trimEnd();
804
- if (modelResponse === "")
805
- return this.resolvedHistory;
806
- return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
893
+ return mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, this.segmentHandler.getModelResponseSegments());
807
894
  }
808
895
  getContextWindowsHistoryWithCurrentModelResponse() {
809
- if (this.contextWindowsRes.length === 0)
810
- return this.lastContextWindowHistory;
811
- let modelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
812
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
813
- modelResponse = modelResponse.trimEnd();
814
- if (modelResponse === "")
815
- return this.lastContextWindowHistory;
816
- return setLastModelTextResponseInChatHistory(this.lastContextWindowHistory, this.contextWindowLastModelResponse + modelResponse);
896
+ return mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments());
817
897
  }
818
898
  removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
819
899
  if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
@@ -826,14 +906,26 @@ class GenerateResponseState {
826
906
  this.contextWindowTokens,
827
907
  this.ignoredStartTextTokens
828
908
  ]);
909
+ const pendingPartialTokens = [];
829
910
  for (let i = 0; i < this.pendingTokens.length; i++) {
911
+ const currentToken = this.pendingTokens[i];
912
+ const tokens = [...pendingPartialTokens, currentToken];
913
+ const text = this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer);
914
+ if (pendingPartialTokens.length === 0 &&
915
+ text.endsWith(UNKNOWN_UNICODE_CHAR) &&
916
+ !this.llamaChat.model.isSpecialToken(currentToken) &&
917
+ !this.llamaChat.model.isEogToken(currentToken)) {
918
+ pendingPartialTokens.length = 0;
919
+ pushAll(pendingPartialTokens, tokens);
920
+ continue;
921
+ }
830
922
  this.ignoreStartTextDetector.recordGeneration({
831
- text: this.llamaChat.model.detokenize([this.pendingTokens[i]], false, lastTokensForDetokenizer),
832
- tokens: [this.pendingTokens[i]],
923
+ text: this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer),
924
+ tokens,
833
925
  startNewChecks: i === 0,
834
926
  triggerMustStartWithGeneration: true
835
927
  });
836
- lastTokensForDetokenizer.push(this.pendingTokens[i]);
928
+ pushAll(lastTokensForDetokenizer, tokens);
837
929
  if (this.ignoreStartTextDetector.hasTriggeredStops) {
838
930
  mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
839
931
  this.ignoreStartTextDetector.clearTriggeredStops();
@@ -902,11 +994,12 @@ class GenerateResponseState {
902
994
  const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
903
995
  const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
904
996
  if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
905
- const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
997
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
906
998
  resolvedHistory: resolvedHistory,
907
999
  resolvedContextShift: this.resolvedContextShift,
908
1000
  lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
909
- pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
1001
+ pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
1002
+ this.pendingPartialTokens.length,
910
1003
  isFirstEvaluation: this.isFirstEvaluation,
911
1004
  chatWrapper: this.chatWrapper,
912
1005
  lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
@@ -924,19 +1017,22 @@ class GenerateResponseState {
924
1017
  this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
925
1018
  this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
926
1019
  this.userTextSuffix = userTextSuffix;
927
- this.resolvedHistory = newResolvedHistory;
928
1020
  this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
929
1021
  this.lastContextWindowHistory = contextWindowHistory;
930
- this.contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
931
- this.contextWindowsRes = [];
1022
+ this.segmentHandler.resetContextWindow();
932
1023
  this.canAvoidReloadingHistory = true;
1024
+ if (removeRawFromHistory && !this.noRawInResolvedHistory) {
1025
+ this.noRawInResolvedHistory = true;
1026
+ this.resolvedHistory = this.resolvedHistory.map(removeRawFromHistoryItem);
1027
+ }
933
1028
  }
934
1029
  this.tokens = [
935
1030
  ...this.contextWindowTokens,
936
1031
  ...this.ignoredStartTextTokens,
937
1032
  ...this.pendingTokens,
938
1033
  ...queuedChunkTokens,
939
- ...functionCallsTokens
1034
+ ...functionCallsTokens,
1035
+ ...this.pendingPartialTokens
940
1036
  ];
941
1037
  if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
942
1038
  return await this.loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText, false);
@@ -1017,24 +1113,24 @@ class GenerateResponseState {
1017
1113
  pushAll(prefixDetectorRecordedTokens, tokens);
1018
1114
  }
1019
1115
  }
1020
- for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1116
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1021
1117
  const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
1022
1118
  if (stopGenerationTriggerRes != null)
1023
1119
  return stopGenerationTriggerRes;
1024
- this.currentFunctionCallCurrentPartTokens.push(token);
1120
+ pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
1025
1121
  this.disengageInitiallyEngagedFunctionMode.recordGeneration({
1026
1122
  text: this.currentText,
1027
1123
  tokens: this.currentTokens,
1028
- startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
1124
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
1029
1125
  triggerMustStartWithGeneration: true
1030
1126
  });
1031
1127
  if (prefixDetector.hasTriggeredStops)
1032
- afterPrefixLeftoverTokens.push(token);
1128
+ pushAll(afterPrefixLeftoverTokens, tokens);
1033
1129
  else {
1034
1130
  prefixDetector.recordGeneration({
1035
1131
  text: this.currentText,
1036
1132
  tokens: this.currentTokens,
1037
- startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
1133
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === tokens.length,
1038
1134
  triggerMustStartWithGeneration: true
1039
1135
  });
1040
1136
  pushAll(prefixDetectorRecordedTokens, this.currentTokens);
@@ -1109,8 +1205,8 @@ class GenerateResponseState {
1109
1205
  }
1110
1206
  }
1111
1207
  }
1112
- for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1113
- this.currentFunctionCallCurrentPartTokens.push(token);
1208
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1209
+ pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
1114
1210
  functionNameGenerationDoneDetector.recordGeneration({
1115
1211
  text: this.currentText,
1116
1212
  tokens: this.currentTokens
@@ -1141,8 +1237,16 @@ class GenerateResponseState {
1141
1237
  if (functionDefinition == null)
1142
1238
  throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
1143
1239
  else if (functionDefinition.params == null) {
1144
- params = undefined;
1145
- paramsText = "";
1240
+ const emptyCallParamsPlaceholder = this.chatWrapper.settings?.functions?.call?.emptyCallParamsPlaceholder;
1241
+ if (emptyCallParamsPlaceholder !== undefined && emptyCallParamsPlaceholder !== "") {
1242
+ params = structuredClone(emptyCallParamsPlaceholder);
1243
+ paramsText = jsonDumps(params);
1244
+ pushAll(this.currentFunctionCallCurrentPartTokens, this.llamaChat.model.tokenize(paramsText));
1245
+ }
1246
+ else {
1247
+ params = undefined;
1248
+ paramsText = "";
1249
+ }
1146
1250
  }
1147
1251
  else {
1148
1252
  const functionParamsGenerationDoneDetector = new StopGenerationDetector();
@@ -1154,8 +1258,8 @@ class GenerateResponseState {
1154
1258
  });
1155
1259
  StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1156
1260
  .map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
1157
- for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1158
- this.currentFunctionCallCurrentPartTokens.push(token);
1261
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1262
+ pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
1159
1263
  functionParamsGenerationDoneDetector.recordGeneration({
1160
1264
  text: this.currentText,
1161
1265
  tokens: this.currentTokens
@@ -1213,8 +1317,8 @@ class GenerateResponseState {
1213
1317
  LlamaText(new SpecialToken("EOT"))
1214
1318
  ], this.llamaChat.model.tokenizer)
1215
1319
  .map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
1216
- for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1217
- this.currentFunctionCallCurrentPartTokens.push(token);
1320
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1321
+ pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
1218
1322
  sectionSuffixDetector.recordGeneration({
1219
1323
  text: this.currentText,
1220
1324
  tokens: this.currentTokens,
@@ -1258,17 +1362,17 @@ class GenerateResponseState {
1258
1362
  returnFunctionCallResults() {
1259
1363
  if (this.resFunctionCalls.length > 0) {
1260
1364
  this.releasePartiallyFreeTokensBeforeFunctionCallStart();
1261
- let modelResponse = this.llamaChat.model.detokenize(this.res);
1262
- let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1263
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1264
- modelResponse = modelResponse.trimEnd();
1265
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1266
- }
1365
+ this.segmentHandler.onFinishedGeneration();
1366
+ const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
1367
+ const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
1267
1368
  return {
1268
- response: modelResponse,
1369
+ response: responseSegments
1370
+ .filter((segment) => typeof segment === "string")
1371
+ .join(""),
1372
+ fullResponse: responseSegments,
1269
1373
  lastEvaluation: {
1270
- contextWindow: setLastTextInChatHistory("model", this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1271
- cleanHistory: setLastTextInChatHistory("model", this.resolvedHistory, this.lastModelResponse + modelResponse),
1374
+ contextWindow: mergeGeneratedResultWithChatHistory("model", this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
1375
+ cleanHistory: mergeGeneratedResultWithChatHistory("model", this.resolvedHistory, responseSegments),
1272
1376
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1273
1377
  },
1274
1378
  functionCalls: this.resFunctionCalls.map((functionCall) => {
@@ -1292,9 +1396,10 @@ class GenerateResponseState {
1292
1396
  await this.alignCurrentSequenceStateWithCurrentTokens();
1293
1397
  await this.createNewEvaluationIterator();
1294
1398
  while (await this.iterateEvaluation()) {
1295
- if (this.currentToken == null)
1399
+ if (this.currentTokens.length === 0)
1296
1400
  break;
1297
- yield this.currentToken;
1401
+ if (!this.holdPartialTokensForNextEvaluation())
1402
+ yield this.currentTokens;
1298
1403
  if (this.shouldAbort)
1299
1404
  return;
1300
1405
  if (this.updateShouldContextShift())
@@ -1367,9 +1472,14 @@ class GenerateResponseState {
1367
1472
  this.currentIterationReplacementToken = undefined;
1368
1473
  this.ensureNotAborted();
1369
1474
  this.generatedTokens++;
1370
- if (this.currentIteration != null && this.currentIteration?.done !== true) {
1371
- this.currentToken = this.currentIteration.value;
1372
- this.currentTokens = [this.currentToken];
1475
+ if ((this.currentIteration != null && this.currentIteration?.done !== true) || this.pendingPartialTokens.length !== 0) {
1476
+ this.currentToken = this.currentIteration?.value ?? undefined;
1477
+ this.currentTokens = this.currentToken != null
1478
+ ? this.pendingPartialTokens.length === 0
1479
+ ? [this.currentToken]
1480
+ : [...this.pendingPartialTokens, this.currentToken]
1481
+ : [...this.pendingPartialTokens];
1482
+ this.pendingPartialTokens.length = 0;
1373
1483
  this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, this.getLastTokens());
1374
1484
  if (this.functionEvaluationMode === false)
1375
1485
  this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
@@ -1382,6 +1492,19 @@ class GenerateResponseState {
1382
1492
  }
1383
1493
  return false;
1384
1494
  }
1495
+ holdPartialTokensForNextEvaluation() {
1496
+ if (this.pendingPartialTokens.length === 0 &&
1497
+ this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) &&
1498
+ this.currentToken != null &&
1499
+ !this.llamaChat.model.isSpecialToken(this.currentToken) &&
1500
+ !this.llamaChat.model.isEogToken(this.currentToken)) {
1501
+ this.pendingPartialTokens.length = 0;
1502
+ pushAll(this.pendingPartialTokens, this.currentTokens);
1503
+ this.streamRegulator.removeChunkIfLast(this.currentQueuedTokenRelease);
1504
+ return true;
1505
+ }
1506
+ return false;
1507
+ }
1385
1508
  waitOnPartialCharactersOrWhiteSpaceTokens() {
1386
1509
  if (this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "") || (this.currentText === "" && this.locksToReleaseOnValidGeneration.length > 0 &&
1387
1510
  !this.llamaChat.model.isSpecialToken(this.currentToken))) {
@@ -1449,21 +1572,22 @@ class GenerateResponseState {
1449
1572
  const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1450
1573
  this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1451
1574
  this.pushPendingTokensAndCallOnToken();
1452
- let modelResponse = this.llamaChat.model.detokenize(this.res);
1453
- let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1454
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1455
- modelResponse = modelResponse.trimEnd();
1456
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1457
- }
1575
+ this.segmentHandler.onFinishedGeneration();
1576
+ const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
1577
+ const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
1578
+ const response = responseSegments
1579
+ .filter((segment) => typeof segment === "string")
1580
+ .join("");
1458
1581
  const lastEvaluation = {
1459
- contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1460
- cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1582
+ contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
1583
+ cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
1461
1584
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1462
1585
  };
1463
1586
  const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
1464
1587
  if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
1465
1588
  return {
1466
- response: modelResponse,
1589
+ response,
1590
+ fullResponse: responseSegments,
1467
1591
  lastEvaluation,
1468
1592
  metadata: {
1469
1593
  remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
@@ -1474,7 +1598,8 @@ class GenerateResponseState {
1474
1598
  };
1475
1599
  }
1476
1600
  return {
1477
- response: modelResponse,
1601
+ response,
1602
+ fullResponse: responseSegments,
1478
1603
  lastEvaluation,
1479
1604
  metadata: {
1480
1605
  remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
@@ -1511,17 +1636,17 @@ class GenerateResponseState {
1511
1636
  }
1512
1637
  handleMaxTokensTrigger(lastHistoryItemType) {
1513
1638
  if (this.isMaxTokensTriggered()) {
1514
- let modelResponse = this.llamaChat.model.detokenize(this.res);
1515
- let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1516
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1517
- modelResponse = modelResponse.trimEnd();
1518
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1519
- }
1639
+ this.segmentHandler.onFinishedGeneration();
1640
+ const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
1641
+ const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
1520
1642
  return {
1521
- response: modelResponse,
1643
+ response: responseSegments
1644
+ .filter((segment) => typeof segment === "string")
1645
+ .join(""),
1646
+ fullResponse: responseSegments,
1522
1647
  lastEvaluation: {
1523
- contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1524
- cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1648
+ contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
1649
+ cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
1525
1650
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1526
1651
  },
1527
1652
  metadata: {
@@ -1542,17 +1667,17 @@ class GenerateResponseState {
1542
1667
  if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
1543
1668
  if (this.res.length === 0)
1544
1669
  throw this.signal.reason;
1545
- let modelResponse = this.llamaChat.model.detokenize(this.res);
1546
- let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1547
- if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1548
- modelResponse = modelResponse.trimEnd();
1549
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1550
- }
1670
+ this.segmentHandler.onFinishedGeneration();
1671
+ const trimWhitespaceSuffix = this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix;
1672
+ const responseSegments = this.segmentHandler.getModelResponseSegments(trimWhitespaceSuffix);
1551
1673
  return {
1552
- response: modelResponse,
1674
+ response: responseSegments
1675
+ .filter((segment) => typeof segment === "string")
1676
+ .join(""),
1677
+ fullResponse: responseSegments,
1553
1678
  lastEvaluation: {
1554
- contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1555
- cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1679
+ contextWindow: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.segmentHandler.getContextWindowModelResponseSegments(trimWhitespaceSuffix)),
1680
+ cleanHistory: mergeGeneratedResultWithChatHistory(lastHistoryItemType, this.resolvedHistory, responseSegments),
1556
1681
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1557
1682
  },
1558
1683
  metadata: {
@@ -1565,10 +1690,8 @@ class GenerateResponseState {
1565
1690
  pushPendingTokensAndCallOnToken() {
1566
1691
  if (this.pendingTokens.length === 0)
1567
1692
  return;
1568
- this.onToken?.(this.pendingTokens.slice());
1569
- this.onTextChunk?.(this.llamaChat.model.detokenize(this.pendingTokens, false, this.res));
1693
+ this.segmentHandler.processTokens(this.pendingTokens);
1570
1694
  pushAll(this.res, this.pendingTokens);
1571
- pushAll(this.contextWindowsRes, this.pendingTokens);
1572
1695
  this.pendingTokens.length = 0;
1573
1696
  }
1574
1697
  getLastTokens(maxTokens = maxRecentDetokenizerTokens) {
@@ -1577,8 +1700,470 @@ class GenerateResponseState {
1577
1700
  this.ignoredStartTextTokens,
1578
1701
  this.pendingTokens,
1579
1702
  this.streamRegulator.getLastQueuedChunkTokens(maxTokens),
1580
- this.getContextWindowFunctionCallsTokens()
1703
+ this.getContextWindowFunctionCallsTokens(),
1704
+ this.pendingPartialTokens
1581
1705
  ], maxTokens);
1582
1706
  }
1583
1707
  }
1708
+ class SegmentHandler {
1709
+ model;
1710
+ onToken;
1711
+ onTextChunk;
1712
+ onResponseChunk;
1713
+ _closeAllSegmentsDetector;
1714
+ _segmentDetectors;
1715
+ _segmentsStack = [];
1716
+ _segmentsStackSet = new Set();
1717
+ _ownedSegmentsStackLength = 0;
1718
+ _segments = [];
1719
+ _segmentsStartTokenTrail = [];
1720
+ _contextWindowSegments = [];
1721
+ _contextWindowStartTokenTrail = [];
1722
+ _initialTokensTrail;
1723
+ _tokensTrail;
1724
+ _streamRegulator = new TokenStreamRegulator();
1725
+ _segmentDefinitions;
1726
+ constructor({ model, onTextChunk, onToken, onResponseChunk, segmentDefinitions, closeAllSegments, initialSegmentStack, previousTokens }) {
1727
+ this.model = model;
1728
+ this.onTextChunk = onTextChunk;
1729
+ this.onToken = onToken;
1730
+ this.onResponseChunk = onResponseChunk;
1731
+ this._initialTokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
1732
+ this._segmentsStartTokenTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
1733
+ this._tokensTrail = previousTokens.slice(-maxRecentDetokenizerTokens);
1734
+ this._closeAllSegmentsDetector = closeAllSegments != null
1735
+ ? new StopGenerationDetector()
1736
+ .addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(closeAllSegments), this.model.tokenizer))
1737
+ : undefined;
1738
+ this._segmentDetectors = new Map();
1739
+ this._segmentsStack = initialSegmentStack;
1740
+ this._segmentsStackSet = new Set(initialSegmentStack);
1741
+ this._ownedSegmentsStackLength = initialSegmentStack.length;
1742
+ this._segmentDefinitions = segmentDefinitions;
1743
+ for (const [segment, { prefix, suffix }] of segmentDefinitions.entries()) {
1744
+ this._segmentDetectors.set(segment, {
1745
+ prefix: new StopGenerationDetector()
1746
+ .addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(prefix), this.model.tokenizer)),
1747
+ suffix: suffix != null
1748
+ ? new StopGenerationDetector()
1749
+ .addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(suffix), this.model.tokenizer))
1750
+ : undefined
1751
+ });
1752
+ }
1753
+ }
1754
+ processTokens(tokens) {
1755
+ if (tokens.length === 0)
1756
+ return;
1757
+ let pendingTokens = [];
1758
+ for (const token of tokens) {
1759
+ pendingTokens.push(token);
1760
+ const currentText = this.model.detokenize(pendingTokens, false, this._tokensTrail);
1761
+ if (currentText.endsWith(UNKNOWN_UNICODE_CHAR))
1762
+ continue;
1763
+ pushAll(this._tokensTrail, pendingTokens);
1764
+ this._processTokens(pendingTokens, currentText);
1765
+ pendingTokens = [];
1766
+ }
1767
+ }
1768
+ onFinishedGeneration() {
1769
+ this._clearDetectors();
1770
+ this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
1771
+ }
1772
+ resetContextWindow() {
1773
+ this._contextWindowSegments.length = 0;
1774
+ this._contextWindowStartTokenTrail.length = 0;
1775
+ pushAll(this._contextWindowStartTokenTrail, this._getTokenTrailFromResult());
1776
+ }
1777
+ openSegment(type) {
1778
+ const now = Date.now();
1779
+ this._segmentsStack.push(type);
1780
+ this._segmentsStackSet.add(type);
1781
+ this._segments.push({ type, tokens: [], ended: false, start: true, startTime: now });
1782
+ this._contextWindowSegments.push({ type, tokens: [], ended: false, start: true, startTime: now });
1783
+ this.onResponseChunk?.({
1784
+ type: "segment",
1785
+ segmentType: type,
1786
+ tokens: [],
1787
+ text: "",
1788
+ segmentStartTime: new Date(now)
1789
+ });
1790
+ }
1791
+ _processTokens(tokens, text) {
1792
+ const queuedTokenRelease = this._streamRegulator.addChunk({
1793
+ tokens,
1794
+ text
1795
+ });
1796
+ const currentType = this._segmentsStack.at(-1);
1797
+ const handleDetector = (stopDetector, action, type) => {
1798
+ if (stopDetector == null)
1799
+ return false;
1800
+ stopDetector.recordGeneration({
1801
+ text,
1802
+ tokens,
1803
+ queuedTokenRelease
1804
+ });
1805
+ if (stopDetector.hasTriggeredStops) {
1806
+ const [leftTokens, leftText] = this._handleTriggeredStopDetector(stopDetector);
1807
+ if (action === "pop")
1808
+ this._closeSegment(type);
1809
+ else if (action === "push") {
1810
+ this.openSegment(type);
1811
+ }
1812
+ else if (action === "reset") {
1813
+ const now = Date.now();
1814
+ while (this._segmentsStack.length > 0) {
1815
+ const segmentType = this._segmentsStack.pop();
1816
+ this._segmentsStackSet.delete(segmentType);
1817
+ const lastSegment = this._segments.at(-1);
1818
+ if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === segmentType) {
1819
+ lastSegment.ended = true;
1820
+ lastSegment.endTime = now;
1821
+ this.onResponseChunk?.({
1822
+ type: "segment",
1823
+ segmentType: segmentType,
1824
+ tokens: [],
1825
+ text: "",
1826
+ segmentStartTime: undefined,
1827
+ segmentEndTime: new Date(now)
1828
+ });
1829
+ }
1830
+ else {
1831
+ this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
1832
+ this.onResponseChunk?.({
1833
+ type: "segment",
1834
+ segmentType: segmentType,
1835
+ tokens: [],
1836
+ text: "",
1837
+ segmentStartTime: undefined,
1838
+ segmentEndTime: new Date(now)
1839
+ });
1840
+ }
1841
+ const lastContextWindowSegment = this._contextWindowSegments.at(-1);
1842
+ if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
1843
+ lastContextWindowSegment.type === segmentType)
1844
+ lastContextWindowSegment.ended = true;
1845
+ else
1846
+ this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
1847
+ }
1848
+ this._ownedSegmentsStackLength = 0;
1849
+ }
1850
+ if (leftTokens.length > 0)
1851
+ this._processTokens(leftTokens, leftText);
1852
+ return true;
1853
+ }
1854
+ return false;
1855
+ };
1856
+ if (currentType != null) {
1857
+ if (handleDetector(this._closeAllSegmentsDetector, "reset", currentType))
1858
+ return;
1859
+ if (handleDetector(this._segmentDetectors.get(currentType)?.suffix, "pop", currentType))
1860
+ return;
1861
+ }
1862
+ else
1863
+ this._closeAllSegmentsDetector?.clearInProgressStops();
1864
+ for (const [type, { prefix, suffix }] of this._segmentDetectors.entries()) {
1865
+ if (!this._segmentsStackSet.has(type)) {
1866
+ if (handleDetector(prefix, "push", type))
1867
+ return;
1868
+ }
1869
+ else
1870
+ prefix.clearInProgressStops();
1871
+ if (this._segmentsStackSet.has(type)) {
1872
+ // `currentType` suffix is already handled above
1873
+ if (type === currentType && handleDetector(suffix, "pop", type))
1874
+ return;
1875
+ }
1876
+ else
1877
+ suffix?.clearInProgressStops();
1878
+ }
1879
+ this._pushCurrentTokens(this._streamRegulator.popFreeChunkTokens());
1880
+ }
1881
+ _handleTriggeredStopDetector(stopDetector) {
1882
+ this._clearDetectors(stopDetector);
1883
+ stopDetector.clearInProgressStops();
1884
+ const triggeredStops = stopDetector.getTriggeredStops();
1885
+ const freeTokens = this._streamRegulator.popFreeChunkTokens();
1886
+ const partiallyFreeTokens = this._streamRegulator.getPartiallyFreeChunk(this.model.tokenizer);
1887
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.model.tokenizer);
1888
+ const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1889
+ const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
1890
+ ? firstRemainingGenerationAfterStop === ""
1891
+ ? []
1892
+ : this.model.tokenize(firstRemainingGenerationAfterStop, false)
1893
+ : (firstRemainingGenerationAfterStop ?? []);
1894
+ const remainingText = typeof firstRemainingGenerationAfterStop === "string"
1895
+ ? firstRemainingGenerationAfterStop
1896
+ : this.model.detokenize(remainingTokens, false, queuedTokensBeforeStopTrigger.length === 0
1897
+ ? this._getTokenTrailFromResult()
1898
+ : queuedTokensBeforeStopTrigger);
1899
+ this._pushCurrentTokens([...freeTokens, ...queuedTokensBeforeStopTrigger]);
1900
+ stopDetector.clearTriggeredStops();
1901
+ this._streamRegulator.reset();
1902
+ return [remainingTokens, remainingText];
1903
+ }
1904
+ _closeSegment(type) {
1905
+ if (type == null)
1906
+ return;
1907
+ const lastSegment = this._segments.at(-1);
1908
+ const now = Date.now();
1909
+ if (lastSegment != null && !(lastSegment instanceof Array) && lastSegment.type === type && this._segmentsStack.at(-1) === type) {
1910
+ if (lastSegment.ended !== true) {
1911
+ lastSegment.ended = true;
1912
+ lastSegment.endTime = now;
1913
+ this.onResponseChunk?.({
1914
+ type: "segment",
1915
+ segmentType: type,
1916
+ tokens: [],
1917
+ text: "",
1918
+ segmentStartTime: undefined,
1919
+ segmentEndTime: new Date(now)
1920
+ });
1921
+ }
1922
+ this._segmentsStackSet.delete(this._segmentsStack.pop());
1923
+ if (this._segmentsStack.length < this._ownedSegmentsStackLength)
1924
+ this._ownedSegmentsStackLength = this._segmentsStack.length;
1925
+ const lastContextWindowSegment = this._contextWindowSegments.at(-1);
1926
+ if (lastContextWindowSegment != null && !(lastContextWindowSegment instanceof Array) &&
1927
+ lastContextWindowSegment.type === type && this._segmentsStack.at(-1) === type) {
1928
+ if (lastContextWindowSegment.ended !== true) {
1929
+ lastContextWindowSegment.ended = true;
1930
+ lastContextWindowSegment.endTime = now;
1931
+ }
1932
+ }
1933
+ else
1934
+ this._contextWindowSegments.push({ type, tokens: [], ended: true, start: false, endTime: now });
1935
+ return;
1936
+ }
1937
+ const typeIndex = this._segmentsStack.lastIndexOf(type);
1938
+ if (typeIndex < 0)
1939
+ return;
1940
+ for (let i = this._segmentsStack.length - 1; i >= typeIndex; i--) {
1941
+ const segmentType = this._segmentsStack.pop();
1942
+ this._segmentsStackSet.delete(segmentType);
1943
+ if (this._segmentsStack.length < this._ownedSegmentsStackLength)
1944
+ this._ownedSegmentsStackLength = this._segmentsStack.length;
1945
+ this._segments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
1946
+ this._contextWindowSegments.push({ type: segmentType, tokens: [], ended: true, start: false, endTime: now });
1947
+ this.onResponseChunk?.({
1948
+ type: "segment",
1949
+ segmentType: segmentType,
1950
+ tokens: [],
1951
+ text: "",
1952
+ segmentStartTime: undefined,
1953
+ segmentEndTime: new Date(now)
1954
+ });
1955
+ }
1956
+ }
1957
+ _clearDetectors(skipDetector) {
1958
+ if (this._closeAllSegmentsDetector !== skipDetector) {
1959
+ this._closeAllSegmentsDetector?.clearInProgressStops();
1960
+ this._closeAllSegmentsDetector?.clearTriggeredStops();
1961
+ }
1962
+ for (const { prefix, suffix } of this._segmentDetectors.values()) {
1963
+ if (prefix !== skipDetector) {
1964
+ prefix.clearInProgressStops();
1965
+ prefix.clearTriggeredStops();
1966
+ }
1967
+ if (suffix !== skipDetector) {
1968
+ suffix?.clearInProgressStops();
1969
+ suffix?.clearTriggeredStops();
1970
+ }
1971
+ }
1972
+ }
1973
+ _pushCurrentTokens(tokens) {
1974
+ const lastSegment = this._segments.at(-1);
1975
+ const lastContextWindowSegment = this._contextWindowSegments.at(-1);
1976
+ const type = this._segmentsStack.at(-1);
1977
+ if (type == null) {
1978
+ if (lastSegment == null) {
1979
+ const text = (this.onResponseChunk != null || this.onTextChunk != null)
1980
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
1981
+ : "";
1982
+ this._segments.push(tokens);
1983
+ this.onToken?.(tokens.slice());
1984
+ this.onTextChunk?.(text);
1985
+ this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
1986
+ }
1987
+ else {
1988
+ if (lastSegment instanceof Array) {
1989
+ const text = (this.onResponseChunk != null || this.onTextChunk != null)
1990
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
1991
+ : "";
1992
+ pushAll(lastSegment, tokens);
1993
+ this.onToken?.(tokens);
1994
+ this.onTextChunk?.(text);
1995
+ this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
1996
+ }
1997
+ else
1998
+ this._segments.push(tokens);
1999
+ }
2000
+ if (lastContextWindowSegment == null)
2001
+ this._contextWindowSegments.push(tokens.slice());
2002
+ else {
2003
+ if (lastContextWindowSegment instanceof Array)
2004
+ pushAll(lastContextWindowSegment, tokens);
2005
+ else
2006
+ this._contextWindowSegments.push(tokens.slice());
2007
+ }
2008
+ }
2009
+ else {
2010
+ const now = Date.now();
2011
+ if (lastSegment == null) {
2012
+ const text = this.onResponseChunk != null
2013
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2014
+ : "";
2015
+ this._segments.push({
2016
+ type,
2017
+ tokens,
2018
+ ended: false,
2019
+ start: this._segmentsStack.length > this._ownedSegmentsStackLength,
2020
+ startTime: now
2021
+ });
2022
+ this.onResponseChunk?.({
2023
+ type: "segment",
2024
+ segmentType: type,
2025
+ tokens: tokens.slice(),
2026
+ text,
2027
+ segmentStartTime: new Date(now)
2028
+ });
2029
+ }
2030
+ else {
2031
+ const text = this.onResponseChunk != null
2032
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2033
+ : "";
2034
+ if (lastSegment instanceof Array || lastSegment.type !== type) {
2035
+ this._segments.push({
2036
+ type,
2037
+ tokens,
2038
+ ended: false,
2039
+ start: this._segmentsStack.length > this._ownedSegmentsStackLength,
2040
+ startTime: now
2041
+ });
2042
+ this.onResponseChunk?.({
2043
+ type: "segment",
2044
+ segmentType: type,
2045
+ tokens: tokens.slice(),
2046
+ text,
2047
+ segmentStartTime: new Date(now)
2048
+ });
2049
+ }
2050
+ else {
2051
+ pushAll(lastSegment.tokens, tokens);
2052
+ this.onResponseChunk?.({
2053
+ type: "segment",
2054
+ segmentType: type,
2055
+ tokens: tokens.slice(),
2056
+ text,
2057
+ segmentStartTime: undefined
2058
+ });
2059
+ }
2060
+ }
2061
+ if (lastContextWindowSegment == null)
2062
+ this._contextWindowSegments.push({
2063
+ type,
2064
+ tokens: tokens.slice(),
2065
+ ended: false,
2066
+ start: this._segmentsStack.length > this._ownedSegmentsStackLength,
2067
+ startTime: now
2068
+ });
2069
+ else {
2070
+ if (lastContextWindowSegment instanceof Array || lastContextWindowSegment.type !== type)
2071
+ this._contextWindowSegments.push({
2072
+ type,
2073
+ tokens: tokens.slice(),
2074
+ ended: false,
2075
+ start: this._segmentsStack.length > this._ownedSegmentsStackLength,
2076
+ startTime: now
2077
+ });
2078
+ else
2079
+ pushAll(lastContextWindowSegment.tokens, tokens);
2080
+ }
2081
+ }
2082
+ }
2083
+ _getTokenTrailFromResult() {
2084
+ const res = [];
2085
+ for (let i = this._segments.length - 1; i >= 0; i--) {
2086
+ const segment = this._segments[i];
2087
+ const segmentTokens = segment instanceof Array
2088
+ ? segment
2089
+ : segment.tokens;
2090
+ for (let j = segmentTokens.length - 1; j >= 0; j--) {
2091
+ res.unshift(segmentTokens[j]);
2092
+ if (res.length >= maxRecentDetokenizerTokens)
2093
+ return res;
2094
+ }
2095
+ }
2096
+ for (let i = this._initialTokensTrail.length - 1; i >= 0; i--) {
2097
+ res.unshift(this._initialTokensTrail[i]);
2098
+ if (res.length >= maxRecentDetokenizerTokens)
2099
+ return res;
2100
+ }
2101
+ return res;
2102
+ }
2103
+ getModelResponseSegments(trimWhitespaceSuffix = false) {
2104
+ return this._getModelResponseForSegments(this._segments, this._segmentsStartTokenTrail, trimWhitespaceSuffix);
2105
+ }
2106
+ getContextWindowModelResponseSegments(trimWhitespaceSuffix = false) {
2107
+ return this._getModelResponseForSegments(this._contextWindowSegments, this._contextWindowStartTokenTrail, trimWhitespaceSuffix);
2108
+ }
2109
+ _getModelResponseForSegments(rawSegments, recentTokens, trimWhitespaceSuffix) {
2110
+ let tokenTrail = resolveLastTokens([recentTokens]);
2111
+ return rawSegments.map((rawSegment, index) => {
2112
+ const isLast = index === rawSegments.length - 1;
2113
+ if (rawSegment instanceof Array) {
2114
+ let text = this.model.detokenize(rawSegment, false, tokenTrail);
2115
+ if (isLast && trimWhitespaceSuffix)
2116
+ text = text.trimEnd();
2117
+ tokenTrail = resolveLastTokens([tokenTrail, rawSegment]);
2118
+ return text;
2119
+ }
2120
+ let text = this.model.detokenize(rawSegment.tokens, false, tokenTrail);
2121
+ if (isLast && rawSegment.ended && trimWhitespaceSuffix)
2122
+ text = text.trimEnd();
2123
+ tokenTrail = resolveLastTokens([tokenTrail, rawSegment.tokens]);
2124
+ const segmentDefinition = this._segmentDefinitions.get(rawSegment.type);
2125
+ return {
2126
+ type: "segment",
2127
+ segmentType: rawSegment.type,
2128
+ text,
2129
+ ended: rawSegment.ended,
2130
+ raw: segmentDefinition == null
2131
+ ? LlamaText([text]).toJSON()
2132
+ : LlamaText([
2133
+ rawSegment.start
2134
+ ? segmentDefinition.prefix
2135
+ : "",
2136
+ text,
2137
+ rawSegment.ended
2138
+ ? (segmentDefinition.suffix ?? "")
2139
+ : ""
2140
+ ]).toJSON(),
2141
+ startTime: rawSegment.startTime != null
2142
+ ? new Date(rawSegment.startTime).toISOString()
2143
+ : undefined,
2144
+ endTime: rawSegment.endTime != null
2145
+ ? new Date(rawSegment.endTime).toISOString()
2146
+ : undefined
2147
+ };
2148
+ });
2149
+ }
2150
+ static getStackFromModelResponse(modelResponse) {
2151
+ const stack = [];
2152
+ const stackSet = new Set();
2153
+ for (const item of modelResponse) {
2154
+ if (typeof item === "string" || isChatModelResponseFunctionCall(item))
2155
+ continue;
2156
+ void item.type;
2157
+ if (item.ended && stack.at(-1) === item.segmentType) {
2158
+ stack.pop();
2159
+ stackSet.delete(item.segmentType);
2160
+ }
2161
+ else if (!item.ended && !stackSet.has(item.segmentType)) {
2162
+ stack.push(item.segmentType);
2163
+ stackSet.add(item.segmentType);
2164
+ }
2165
+ }
2166
+ return stack;
2167
+ }
2168
+ }
1584
2169
  //# sourceMappingURL=LlamaChat.js.map