node-llama-cpp 3.0.0-beta.16 → 3.0.0-beta.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +8 -5
  2. package/dist/ChatWrapper.d.ts +1 -15
  3. package/dist/ChatWrapper.js +22 -32
  4. package/dist/ChatWrapper.js.map +1 -1
  5. package/dist/apiDocsOverrides.d.ts +1 -0
  6. package/dist/apiDocsOverrides.js +5 -0
  7. package/dist/apiDocsOverrides.js.map +1 -0
  8. package/dist/bindings/AddonTypes.d.ts +4 -1
  9. package/dist/bindings/getLlama.d.ts +5 -1
  10. package/dist/bindings/getLlama.js +11 -4
  11. package/dist/bindings/getLlama.js.map +1 -1
  12. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  13. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  14. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  15. package/dist/chatWrappers/FunctionaryChatWrapper.js +39 -40
  16. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  17. package/dist/chatWrappers/{LlamaChatWrapper.d.ts → Llama2ChatWrapper.d.ts} +1 -1
  18. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +3 -3
  19. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  20. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
  21. package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
  22. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  23. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +2 -2
  24. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +6 -2
  25. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  26. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +3 -4
  27. package/dist/chatWrappers/generic/TemplateChatWrapper.js +1 -2
  28. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
  29. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  30. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  31. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  32. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +3 -3
  33. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  34. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +6 -4
  35. package/dist/chatWrappers/utils/resolveChatWrapper.js +23 -17
  36. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  37. package/dist/cli/cli.js +5 -3
  38. package/dist/cli/cli.js.map +1 -1
  39. package/dist/cli/commands/ChatCommand.d.ts +2 -0
  40. package/dist/cli/commands/ChatCommand.js +26 -9
  41. package/dist/cli/commands/ChatCommand.js.map +1 -1
  42. package/dist/cli/commands/CompleteCommand.d.ts +2 -0
  43. package/dist/cli/commands/CompleteCommand.js +24 -7
  44. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  45. package/dist/cli/commands/DebugCommand.js +3 -5
  46. package/dist/cli/commands/DebugCommand.js.map +1 -1
  47. package/dist/cli/commands/DownloadCommand.d.ts +1 -1
  48. package/dist/cli/commands/DownloadCommand.js +2 -1
  49. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  50. package/dist/cli/commands/InfillCommand.d.ts +2 -0
  51. package/dist/cli/commands/InfillCommand.js +24 -7
  52. package/dist/cli/commands/InfillCommand.js.map +1 -1
  53. package/dist/cli/commands/PullCommand.d.ts +12 -0
  54. package/dist/cli/commands/PullCommand.js +109 -0
  55. package/dist/cli/commands/PullCommand.js.map +1 -0
  56. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +1 -0
  57. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +23 -11
  58. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
  59. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +2 -0
  60. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +43 -11
  61. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  62. package/dist/cli/recommendedModels.js +61 -0
  63. package/dist/cli/recommendedModels.js.map +1 -1
  64. package/dist/cli/utils/printCommonInfoLines.js +4 -3
  65. package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
  66. package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
  67. package/dist/cli/utils/resolveCommandGgufPath.js +44 -39
  68. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  69. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
  70. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +18 -2
  71. package/dist/evaluator/LlamaChat/LlamaChat.js +271 -186
  72. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  73. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +3 -1
  74. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -1
  75. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +22 -3
  76. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +18 -7
  77. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  78. package/dist/evaluator/LlamaCompletion.d.ts +2 -2
  79. package/dist/evaluator/LlamaCompletion.js +11 -13
  80. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  81. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +6 -11
  82. package/dist/evaluator/LlamaContext/LlamaContext.js +23 -16
  83. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  84. package/dist/evaluator/LlamaEmbeddingContext.d.ts +2 -10
  85. package/dist/evaluator/LlamaEmbeddingContext.js +10 -24
  86. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  87. package/dist/evaluator/LlamaGrammar.d.ts +1 -1
  88. package/dist/evaluator/LlamaModel.d.ts +23 -3
  89. package/dist/evaluator/LlamaModel.js +32 -5
  90. package/dist/evaluator/LlamaModel.js.map +1 -1
  91. package/dist/evaluator/TokenBias.d.ts +1 -1
  92. package/dist/evaluator/TokenBias.js +3 -3
  93. package/dist/evaluator/TokenBias.js.map +1 -1
  94. package/dist/gguf/insights/GgufInsights.js +12 -12
  95. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  96. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +27 -3
  97. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
  98. package/dist/gguf/parser/parseGguf.js +5 -0
  99. package/dist/gguf/parser/parseGguf.js.map +1 -1
  100. package/dist/gguf/readGgufFileInfo.d.ts +5 -2
  101. package/dist/gguf/readGgufFileInfo.js +38 -10
  102. package/dist/gguf/readGgufFileInfo.js.map +1 -1
  103. package/dist/gguf/types/GgufFileInfoTypes.d.ts +32 -0
  104. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -1
  105. package/dist/gguf/types/GgufMetadataTypes.d.ts +4 -0
  106. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  107. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +1 -1
  108. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -1
  109. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  110. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  111. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  112. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  113. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  114. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  115. package/dist/index.d.ts +9 -5
  116. package/dist/index.js +7 -3
  117. package/dist/index.js.map +1 -1
  118. package/dist/types.d.ts +21 -1
  119. package/dist/types.js.map +1 -1
  120. package/dist/utils/LlamaText.d.ts +31 -21
  121. package/dist/utils/LlamaText.js +253 -223
  122. package/dist/utils/LlamaText.js.map +1 -1
  123. package/dist/utils/StopGenerationDetector.d.ts +1 -1
  124. package/dist/utils/StopGenerationDetector.js +21 -18
  125. package/dist/utils/StopGenerationDetector.js.map +1 -1
  126. package/dist/utils/TokenStreamRegulator.d.ts +4 -2
  127. package/dist/utils/TokenStreamRegulator.js +22 -4
  128. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  129. package/dist/utils/createModelDownloader.d.ts +99 -0
  130. package/dist/utils/createModelDownloader.js +226 -0
  131. package/dist/utils/createModelDownloader.js.map +1 -0
  132. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
  133. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  134. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  135. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  136. package/dist/utils/parseTextTemplate.d.ts +2 -2
  137. package/dist/utils/parseTextTemplate.js +2 -2
  138. package/dist/utils/runtime.d.ts +4 -0
  139. package/dist/utils/runtime.js +8 -0
  140. package/dist/utils/runtime.js.map +1 -0
  141. package/dist/utils/utilTypes.d.ts +3 -0
  142. package/dist/utils/utilTypes.js +2 -0
  143. package/dist/utils/utilTypes.js.map +1 -0
  144. package/llama/addon.cpp +67 -14
  145. package/llama/binariesGithubRelease.json +1 -1
  146. package/llama/gitRelease.bundle +0 -0
  147. package/llama/grammars/README.md +1 -1
  148. package/llama/llama.cpp.info.json +1 -1
  149. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
  150. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  151. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  152. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  153. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
  154. package/llamaBins/linux-x64/llama-addon.node +0 -0
  155. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
  156. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  157. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  158. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  159. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  160. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  161. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  162. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
  163. package/llamaBins/mac-x64/llama-addon.node +0 -0
  164. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -1
  165. package/llamaBins/win-arm64/llama-addon.node +0 -0
  166. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
  167. package/llamaBins/win-x64/llama-addon.node +0 -0
  168. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
  169. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  170. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  171. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  172. package/package.json +9 -5
  173. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
@@ -7,6 +7,7 @@ import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
7
7
  import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
8
8
  import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
9
9
  import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
10
+ import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
10
11
  import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
11
12
  import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
12
13
  const defaultContextShiftOptions = {
@@ -36,7 +37,7 @@ export class LlamaChat {
36
37
  bosString: contextSequence.model.tokens.bosString,
37
38
  filename: contextSequence.model.filename,
38
39
  fileInfo: contextSequence.model.fileInfo,
39
- tokenizer: contextSequence.model.tokenize
40
+ tokenizer: contextSequence.model.tokenizer
40
41
  }) ?? new GeneralChatWrapper())
41
42
  : chatWrapper;
42
43
  }
@@ -71,7 +72,7 @@ export class LlamaChat {
71
72
  get model() {
72
73
  return this.sequence.model;
73
74
  }
74
- async generateResponse(history, { onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
75
+ async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
75
76
  const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
76
77
  if (grammar != null && functionsEnabled)
77
78
  throw new Error("Using both grammar and functions is not supported yet");
@@ -89,7 +90,6 @@ export class LlamaChat {
89
90
  });
90
91
  const model = this._sequence.model;
91
92
  const context = this._sequence.context;
92
- const eosToken = model.tokens.eos;
93
93
  const resolvedContextShift = {
94
94
  ...defaultContextShiftOptions,
95
95
  ...removeNullFields(contextShift)
@@ -116,6 +116,7 @@ export class LlamaChat {
116
116
  : undefined;
117
117
  const streamRegulator = new TokenStreamRegulator();
118
118
  const stopGenerationDetector = new StopGenerationDetector();
119
+ const customStopGenerationTriggersDetector = new StopGenerationDetector();
119
120
  const functionSyntaxStartDetector = new StopGenerationDetector();
120
121
  const functionSyntaxEndDetector = new StopGenerationDetector();
121
122
  const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
@@ -129,7 +130,7 @@ export class LlamaChat {
129
130
  let lastContextWindowHistory = resolvedHistory;
130
131
  let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
131
132
  const ensureNotAborted = () => {
132
- if (signal?.aborted)
133
+ if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
133
134
  throw signal.reason;
134
135
  if (this._sequence == null)
135
136
  throw new DisposedError();
@@ -200,8 +201,11 @@ export class LlamaChat {
200
201
  }
201
202
  }
202
203
  };
204
+ if (customStopTriggers != null)
205
+ StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
206
+ .map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
203
207
  if (grammar != null)
204
- StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
208
+ StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
205
209
  .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
206
210
  if (functions != null && Object.keys(functions).length > 0)
207
211
  functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
@@ -214,7 +218,7 @@ export class LlamaChat {
214
218
  resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
215
219
  resolvedContextShift,
216
220
  lastHistoryCompressionMetadata,
217
- pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
221
+ pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
218
222
  isFirstEvaluation,
219
223
  chatWrapper: this._chatWrapper,
220
224
  lastEvaluationContextWindowHistory,
@@ -226,11 +230,11 @@ export class LlamaChat {
226
230
  });
227
231
  ensureNotAborted();
228
232
  if (generatedTokens === 0) {
229
- StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
233
+ StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenizer)
230
234
  .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
231
235
  if (functionsEnabled) {
232
236
  initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
233
- StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
237
+ StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenizer)
234
238
  .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
235
239
  if (initiallyEngagedFunctionMode) {
236
240
  inFunctionEvaluationMode = true;
@@ -247,10 +251,10 @@ export class LlamaChat {
247
251
  lastContextWindowHistory = contextWindowHistory;
248
252
  const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
249
253
  const contextWindowsRes = [];
250
- StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
254
+ StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenizer)
251
255
  .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
252
256
  if (functionsGrammar != null)
253
- StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
257
+ StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenizer)
254
258
  .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
255
259
  let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
256
260
  // we need to decode at least one token to generate a response
@@ -279,193 +283,274 @@ export class LlamaChat {
279
283
  },
280
284
  tokenBias,
281
285
  evaluationPriority,
282
- yieldEosToken: true
286
+ yieldEogToken: true
283
287
  }));
284
- for await (const token of evaluationIterator) {
285
- ensureNotAborted();
286
- generatedTokens++;
287
- const tokens = [token];
288
- const text = model.detokenize([token]);
289
- const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
290
- if (initiallyEngagedFunctionMode)
291
- disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
292
- if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
293
- locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
294
- }
295
- else {
296
- while (locksToReleaseOnValidGeneration.length > 0)
297
- locksToReleaseOnValidGeneration.shift().dispose();
298
- }
299
- functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
300
- if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
301
- initiallyEngagedFunctionMode = false;
302
- let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
303
- if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
304
- const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
305
- try {
306
- const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
307
- enableInternalBuiltinFunctions: true,
308
- initialFunctionCallEngaged: true
309
- });
310
- const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
311
- if (internalBuiltinFunctions[functionName] != null) {
312
- shouldStopFunctionEvaluationMode = true;
288
+ try {
289
+ let currentIteration = await evaluationIterator.next();
290
+ while (currentIteration.done !== true) {
291
+ const token = currentIteration.value;
292
+ let replacementToken = undefined;
293
+ ensureNotAborted();
294
+ generatedTokens++;
295
+ const tokens = [token];
296
+ const text = model.detokenize([token]);
297
+ const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
298
+ if (initiallyEngagedFunctionMode)
299
+ disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
300
+ if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
301
+ locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
302
+ }
303
+ else {
304
+ while (locksToReleaseOnValidGeneration.length > 0)
305
+ locksToReleaseOnValidGeneration.shift().dispose();
306
+ }
307
+ functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
308
+ if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
309
+ initiallyEngagedFunctionMode = false;
310
+ let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
311
+ if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
312
+ const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
313
+ try {
314
+ const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
315
+ enableInternalBuiltinFunctions: true,
316
+ initialFunctionCallEngaged: true
317
+ });
318
+ const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
319
+ if (internalBuiltinFunctions[functionName] != null) {
320
+ shouldStopFunctionEvaluationMode = true;
321
+ }
322
+ }
323
+ catch (err) {
324
+ if (!(err instanceof LlamaFunctionCallValidationError))
325
+ throw err;
313
326
  }
314
327
  }
315
- catch (err) {
316
- if (!(err instanceof LlamaFunctionCallValidationError))
317
- throw err;
328
+ if (shouldStopFunctionEvaluationMode) {
329
+ inFunctionEvaluationMode = false;
330
+ functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
331
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
332
+ grammar: functionsGrammar
333
+ });
334
+ functionCallTokens.length = 0;
335
+ while (functionCallTokenSyntaxLocks.length > 0)
336
+ functionCallTokenSyntaxLocks.shift().dispose();
337
+ functionSyntaxStartDetector.clearInProgressStops();
338
+ functionSyntaxStartDetector.clearTriggeredStops();
339
+ functionSyntaxEndDetector.clearInProgressStops();
340
+ functionSyntaxEndDetector.clearTriggeredStops();
318
341
  }
319
342
  }
320
- if (shouldStopFunctionEvaluationMode) {
321
- inFunctionEvaluationMode = false;
322
- functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
323
- functionsEvaluationState = new LlamaGrammarEvaluationState({
324
- grammar: functionsGrammar
325
- });
326
- functionCallTokens.length = 0;
327
- while (functionCallTokenSyntaxLocks.length > 0)
328
- functionCallTokenSyntaxLocks.shift().dispose();
329
- functionSyntaxStartDetector.clearInProgressStops();
330
- functionSyntaxStartDetector.clearTriggeredStops();
331
- functionSyntaxEndDetector.clearInProgressStops();
332
- functionSyntaxEndDetector.clearTriggeredStops();
343
+ if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
344
+ functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
345
+ inFunctionEvaluationMode = true;
346
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
347
+ stopGenerationDetector.clearTriggeredStops();
348
+ stopGenerationDetector.clearInProgressStops();
349
+ customStopGenerationTriggersDetector.clearTriggeredStops();
350
+ customStopGenerationTriggersDetector.clearInProgressStops();
351
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
352
+ const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
353
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
354
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
355
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
356
+ const [firstRemainingGenerationAfterStop] = triggeredStops
357
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
358
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
359
+ .flat(1);
360
+ const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
361
+ ? ""
362
+ : typeof firstRemainingGenerationAfterStop === "string"
363
+ ? firstRemainingGenerationAfterStop
364
+ : model.detokenize(firstRemainingGenerationAfterStop);
365
+ functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
366
+ for (const functionCallToken of functionCallTokens)
367
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
368
+ // these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
369
+ // or the context state should be modified to not include the incompatible tokens
370
+ const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
371
+ let unfitTokens = [];
372
+ for (let i = 0; i < remainingTextTokens.length; i++) {
373
+ const remainingToken = remainingTextTokens[i];
374
+ const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
375
+ if (!canBeNextToken) {
376
+ unfitTokens = remainingTextTokens.slice(i);
377
+ break;
378
+ }
379
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
380
+ functionCallTokens.push(remainingToken);
381
+ }
382
+ if (unfitTokens.length > 0) {
383
+ const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
384
+ const currentTokenText = queuedTokenRelease.text;
385
+ let replacementTokens;
386
+ if (!currentTokenText.endsWith(unfitTokensText)) {
387
+ console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
388
+ replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
389
+ }
390
+ else {
391
+ const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
392
+ replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
393
+ }
394
+ if (replacementTokens.length > 0) {
395
+ replacementToken = replacementTokens[0];
396
+ queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
397
+ }
398
+ }
333
399
  }
334
- }
335
- if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
336
- functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
337
- inFunctionEvaluationMode = true;
338
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
339
- stopGenerationDetector.clearTriggeredStops();
340
- stopGenerationDetector.clearInProgressStops();
341
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
342
- const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
343
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
344
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
345
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
346
- const [firstRemainingGenerationAfterStop] = triggeredStops
347
- .map((stopTrigger) => stopTrigger.remainingGenerations)
348
- .filter((remainingGenerations) => remainingGenerations.length > 0)
349
- .flat(1);
350
- const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
351
- ? ""
352
- : typeof firstRemainingGenerationAfterStop === "string"
353
- ? firstRemainingGenerationAfterStop
354
- : model.detokenize(firstRemainingGenerationAfterStop);
355
- functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop, false, "trimLeadingSpace"));
356
- for (const functionCallToken of functionCallTokens)
357
- context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
358
- }
359
- else if (inFunctionEvaluationMode) {
360
- functionCallTokens.push(...tokens);
361
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
362
- functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
363
- }
364
- if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
365
- const functionCallText = model.detokenize(functionCallTokens);
366
- const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
367
- let modelResponse = model.detokenize(res);
368
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
369
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
370
- modelResponse = modelResponse.trimEnd();
371
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
400
+ else if (inFunctionEvaluationMode) {
401
+ functionCallTokens.push(...tokens);
402
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
403
+ functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
372
404
  }
373
- return {
374
- response: modelResponse,
375
- lastEvaluation: {
376
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
377
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
378
- contextShiftMetadata: lastHistoryCompressionMetadata
379
- },
380
- // prevent infinite TS type instantiation
381
- functionCall: functionCall,
382
- metadata: {
383
- stopReason: "functionCall"
405
+ if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
406
+ const functionCallText = model.detokenize(functionCallTokens);
407
+ const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
408
+ let modelResponse = model.detokenize(res);
409
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
410
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
411
+ modelResponse = modelResponse.trimEnd();
412
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
384
413
  }
385
- };
386
- }
387
- if (!inFunctionEvaluationMode)
388
- stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
389
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
390
- removeFoundStartIgnoreTextsFromPendingTokens();
391
- if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
392
- const triggeredStops = stopGenerationDetector.getTriggeredStops();
393
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
394
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
395
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
396
- const [firstRemainingGenerationAfterStop] = triggeredStops
397
- .map((stopTrigger) => stopTrigger.remainingGenerations)
398
- .filter((remainingGenerations) => remainingGenerations.length > 0)
399
- .flat(1);
400
- removeFoundStartIgnoreTextsFromPendingTokens();
401
- if (pendingTokens.length > 0)
402
- onToken?.(pendingTokens.slice());
403
- res.push(...pendingTokens);
404
- contextWindowsRes.push(...pendingTokens);
405
- pendingTokens.length = 0;
406
- let modelResponse = model.detokenize(res);
407
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
408
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
409
- modelResponse = modelResponse.trimEnd();
410
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
414
+ return {
415
+ response: modelResponse,
416
+ lastEvaluation: {
417
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
418
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
419
+ contextShiftMetadata: lastHistoryCompressionMetadata
420
+ },
421
+ // prevent infinite TS type instantiation
422
+ functionCall: functionCall,
423
+ metadata: {
424
+ stopReason: "functionCall"
425
+ }
426
+ };
411
427
  }
412
- return {
413
- response: modelResponse,
414
- lastEvaluation: {
415
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
416
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
417
- contextShiftMetadata: lastHistoryCompressionMetadata
418
- },
419
- metadata: {
420
- remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
421
- stopReason: token === eosToken
422
- ? "eosToken"
423
- : "stopGenerationTrigger"
424
- }
425
- };
426
- }
427
- const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
428
- if (res.length === 0) {
429
- ignoreStartTextDetector.clearInProgressStops();
430
- ignoreStartTextDetector.clearTriggeredStops();
431
- ignoreStartTextDetector.recordGeneration({
432
- text: model.detokenize(pendingTokens),
433
- tokens: pendingTokens
434
- });
435
- }
436
- if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
428
+ if (!inFunctionEvaluationMode) {
429
+ stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
430
+ customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
431
+ }
432
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
437
433
  removeFoundStartIgnoreTextsFromPendingTokens();
438
- if (pendingTokens.length > 0) {
439
- onToken?.(pendingTokens.slice());
434
+ if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
435
+ model.isEogToken(token)) {
436
+ const triggeredStops = stopGenerationDetector.hasTriggeredStops
437
+ ? stopGenerationDetector.getTriggeredStops()
438
+ : customStopGenerationTriggersDetector.getTriggeredStops();
439
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
440
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
441
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
442
+ const [firstRemainingGenerationAfterStop] = triggeredStops
443
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
444
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
445
+ .flat(1);
446
+ removeFoundStartIgnoreTextsFromPendingTokens();
447
+ if (pendingTokens.length > 0)
448
+ onToken?.(pendingTokens.slice());
440
449
  res.push(...pendingTokens);
441
450
  contextWindowsRes.push(...pendingTokens);
442
451
  pendingTokens.length = 0;
443
- }
444
- }
445
- if (maxTokensTriggered) {
446
- let modelResponse = model.detokenize(res);
447
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
448
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
449
- modelResponse = modelResponse.trimEnd();
450
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
451
- }
452
- return {
453
- response: modelResponse,
454
- lastEvaluation: {
452
+ let modelResponse = model.detokenize(res);
453
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
454
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
455
+ modelResponse = modelResponse.trimEnd();
456
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
457
+ }
458
+ const lastEvaluation = {
455
459
  contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
456
460
  cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
457
461
  contextShiftMetadata: lastHistoryCompressionMetadata
458
- },
459
- metadata: {
460
- stopReason: "maxTokens"
462
+ };
463
+ const isEogToken = model.isEogToken(token);
464
+ if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
465
+ return {
466
+ response: modelResponse,
467
+ lastEvaluation,
468
+ metadata: {
469
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
470
+ stopReason: isEogToken
471
+ ? "eogToken"
472
+ : "stopGenerationTrigger"
473
+ }
474
+ };
461
475
  }
462
- };
463
- }
464
- if (this._sequence.nextTokenIndex >= context.contextSize) {
465
- shouldContextShift = true;
466
- break;
476
+ return {
477
+ response: modelResponse,
478
+ lastEvaluation,
479
+ metadata: {
480
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
481
+ stopReason: "customStopTrigger",
482
+ customStopTrigger: triggeredStops[0].stopTrigger
483
+ }
484
+ };
485
+ }
486
+ const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
487
+ if (res.length === 0) {
488
+ ignoreStartTextDetector.clearInProgressStops();
489
+ ignoreStartTextDetector.clearTriggeredStops();
490
+ ignoreStartTextDetector.recordGeneration({
491
+ text: model.detokenize(pendingTokens),
492
+ tokens: pendingTokens
493
+ });
494
+ }
495
+ if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
496
+ removeFoundStartIgnoreTextsFromPendingTokens();
497
+ if (pendingTokens.length > 0) {
498
+ onToken?.(pendingTokens.slice());
499
+ res.push(...pendingTokens);
500
+ contextWindowsRes.push(...pendingTokens);
501
+ pendingTokens.length = 0;
502
+ }
503
+ }
504
+ if (maxTokensTriggered) {
505
+ let modelResponse = model.detokenize(res);
506
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
507
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
508
+ modelResponse = modelResponse.trimEnd();
509
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
510
+ }
511
+ return {
512
+ response: modelResponse,
513
+ lastEvaluation: {
514
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
515
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
516
+ contextShiftMetadata: lastHistoryCompressionMetadata
517
+ },
518
+ metadata: {
519
+ stopReason: "maxTokens"
520
+ }
521
+ };
522
+ }
523
+ if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
524
+ shouldContextShift = true;
525
+ break;
526
+ }
527
+ if (signal?.aborted && stopOnAbortSignal) {
528
+ if (res.length === 0)
529
+ throw signal.reason;
530
+ let modelResponse = model.detokenize(res);
531
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
532
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
533
+ modelResponse = modelResponse.trimEnd();
534
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
535
+ }
536
+ return {
537
+ response: modelResponse,
538
+ lastEvaluation: {
539
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
540
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
541
+ contextShiftMetadata: lastHistoryCompressionMetadata
542
+ },
543
+ metadata: {
544
+ stopReason: "abort"
545
+ }
546
+ };
547
+ }
548
+ currentIteration = await evaluationIterator.next(replacementToken);
467
549
  }
468
550
  }
551
+ finally {
552
+ await evaluationIterator.return();
553
+ }
469
554
  isFirstEvaluation = false;
470
555
  if (shouldContextShift)
471
556
  continue;
@@ -593,7 +678,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
593
678
  availableFunctions: functions,
594
679
  documentFunctionParams
595
680
  });
596
- const tokens = contextText.tokenize(model.tokenize);
681
+ const tokens = contextText.tokenize(model.tokenizer);
597
682
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
598
683
  const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
599
684
  const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
@@ -619,11 +704,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
619
704
  : resolvedContextShift.size;
620
705
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
621
706
  history: resolvedHistory,
622
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
707
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
623
708
  contextShiftStrategy: resolvedContextShift.strategy,
624
709
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
625
710
  contextSize: context.contextSize,
626
- tokenizer: model.tokenize,
711
+ tokenizer: model.tokenizer,
627
712
  chatWrapper: chatWrapper,
628
713
  functions,
629
714
  documentFunctionParams
@@ -635,7 +720,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
635
720
  return {
636
721
  history: compressedHistory,
637
722
  stopGenerationTriggers,
638
- tokens: contextText.tokenize(model.tokenize),
723
+ tokens: contextText.tokenize(model.tokenizer),
639
724
  newResolvedHistory: resolvedHistory,
640
725
  newHistoryCompressionMetadata: metadata,
641
726
  ignoreStartText: ignoreStartText ?? [],
@@ -648,7 +733,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
648
733
  availableFunctions: functions,
649
734
  documentFunctionParams
650
735
  });
651
- const tokens = contextText.tokenize(model.tokenize);
736
+ const tokens = contextText.tokenize(model.tokenizer);
652
737
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
653
738
  return {
654
739
  history: resolvedHistory,
@@ -666,11 +751,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
666
751
  : resolvedContextShift.size)));
667
752
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
668
753
  history: resolvedHistory,
669
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
754
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
670
755
  contextShiftStrategy: resolvedContextShift.strategy,
671
756
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
672
757
  contextSize: context.contextSize,
673
- tokenizer: model.tokenize,
758
+ tokenizer: model.tokenizer,
674
759
  chatWrapper: chatWrapper,
675
760
  functions,
676
761
  documentFunctionParams
@@ -682,7 +767,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
682
767
  return {
683
768
  history: compressedHistory,
684
769
  stopGenerationTriggers,
685
- tokens: contextText.tokenize(model.tokenize),
770
+ tokens: contextText.tokenize(model.tokenizer),
686
771
  newResolvedHistory: resolvedHistory,
687
772
  newHistoryCompressionMetadata: metadata,
688
773
  ignoreStartText: ignoreStartText ?? [],