node-llama-cpp 3.0.0-beta.22 → 3.0.0-beta.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/dist/ChatWrapper.d.ts +8 -26
  2. package/dist/ChatWrapper.js +100 -52
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/{apiDocsOverrides.js → apiDocsIndex.js} +3 -1
  5. package/dist/apiDocsIndex.js.map +1 -0
  6. package/dist/bindings/AddonTypes.d.ts +1 -1
  7. package/dist/bindings/Llama.d.ts +1 -1
  8. package/dist/bindings/Llama.js +5 -1
  9. package/dist/bindings/Llama.js.map +1 -1
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +1 -0
  12. package/dist/bindings/utils/asyncEvery.js.map +1 -1
  13. package/dist/bindings/utils/asyncSome.js.map +1 -1
  14. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  15. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  16. package/dist/bindings/utils/compileLLamaCpp.d.ts +2 -0
  17. package/dist/bindings/utils/compileLLamaCpp.js +5 -1
  18. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  19. package/dist/bindings/utils/detectAvailableComputeLayers.js +2 -1
  20. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  21. package/dist/bindings/utils/detectGlibc.js +3 -3
  22. package/dist/bindings/utils/detectGlibc.js.map +1 -1
  23. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +1 -0
  24. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -1
  25. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  26. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  27. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +1 -0
  28. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -1
  29. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -1
  30. package/dist/bindings/utils/getPlatform.js.map +1 -1
  31. package/dist/bindings/utils/getPlatformInfo.js.map +1 -1
  32. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -1
  33. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  34. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  35. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
  36. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -0
  37. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  38. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  39. package/dist/bindings/utils/testCmakeBinary.d.ts +1 -0
  40. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  41. package/dist/chatWrappers/ChatMLChatWrapper.js +10 -10
  42. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  43. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -9
  44. package/dist/chatWrappers/FalconChatWrapper.js +10 -10
  45. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  46. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  47. package/dist/chatWrappers/FunctionaryChatWrapper.js +274 -82
  48. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  49. package/dist/chatWrappers/GemmaChatWrapper.d.ts +3 -14
  50. package/dist/chatWrappers/GemmaChatWrapper.js +20 -12
  51. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  52. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -9
  53. package/dist/chatWrappers/GeneralChatWrapper.js +10 -10
  54. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  55. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +2 -10
  56. package/dist/chatWrappers/Llama2ChatWrapper.js +9 -9
  57. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
  58. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +10 -25
  59. package/dist/chatWrappers/Llama3ChatWrapper.js +71 -26
  60. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
  61. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +6 -12
  62. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +25 -13
  63. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  64. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +14 -24
  65. package/dist/chatWrappers/generic/TemplateChatWrapper.js +21 -21
  66. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
  67. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +11 -21
  68. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +1 -1
  69. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -1
  70. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +8 -4
  71. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  72. package/dist/chatWrappers/utils/resolveChatWrapper.js +30 -1
  73. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  74. package/dist/cli/commands/BuildCommand.d.ts +6 -1
  75. package/dist/cli/commands/BuildCommand.js +14 -3
  76. package/dist/cli/commands/BuildCommand.js.map +1 -1
  77. package/dist/cli/commands/ChatCommand.js +1 -1
  78. package/dist/cli/commands/ChatCommand.js.map +1 -1
  79. package/dist/cli/commands/ClearCommand.js.map +1 -1
  80. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  81. package/dist/cli/commands/DownloadCommand.d.ts +1 -0
  82. package/dist/cli/commands/DownloadCommand.js +1 -1
  83. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  84. package/dist/cli/commands/InfillCommand.js.map +1 -1
  85. package/dist/cli/commands/InitCommand.js.map +1 -1
  86. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  87. package/dist/cli/commands/PullCommand.js.map +1 -1
  88. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
  89. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
  90. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  91. package/dist/cli/recommendedModels.js +48 -27
  92. package/dist/cli/recommendedModels.js.map +1 -1
  93. package/dist/cli/utils/ConsoleInteraction.js.map +1 -1
  94. package/dist/cli/utils/ConsoleTable.js.map +1 -1
  95. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -1
  96. package/dist/cli/utils/consolePromptQuestion.js.map +1 -1
  97. package/dist/cli/utils/getReadablePath.js +1 -1
  98. package/dist/cli/utils/getReadablePath.js.map +1 -1
  99. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  100. package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
  101. package/dist/cli/utils/printInfoLine.js.map +1 -1
  102. package/dist/cli/utils/projectTemplates.js.map +1 -1
  103. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  104. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -1
  105. package/dist/cli/utils/splitAnsiToLines.js.map +1 -1
  106. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +79 -8
  107. package/dist/evaluator/LlamaChat/LlamaChat.js +1212 -481
  108. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  109. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  110. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  111. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  112. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  113. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  114. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  115. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  116. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  117. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  118. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +24 -16
  119. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  120. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +110 -5
  121. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +226 -39
  122. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  123. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  124. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  125. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  126. package/dist/evaluator/LlamaCompletion.d.ts +17 -29
  127. package/dist/evaluator/LlamaCompletion.js +32 -19
  128. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  129. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -2
  130. package/dist/evaluator/LlamaContext/LlamaContext.js +1 -1
  131. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  132. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -1
  133. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -1
  134. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  135. package/dist/evaluator/LlamaGrammar.d.ts +3 -3
  136. package/dist/evaluator/LlamaGrammar.js +1 -1
  137. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  138. package/dist/evaluator/{LlamaModel.d.ts → LlamaModel/LlamaModel.d.ts} +20 -14
  139. package/dist/evaluator/{LlamaModel.js → LlamaModel/LlamaModel.js} +100 -18
  140. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  141. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  142. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  143. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  144. package/dist/evaluator/TokenBias.d.ts +1 -1
  145. package/dist/evaluator/TokenMeter.js.map +1 -1
  146. package/dist/gguf/fileReaders/GgufFsFileReader.js +2 -1
  147. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -1
  148. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
  149. package/dist/gguf/insights/GgufInsights.d.ts +6 -0
  150. package/dist/gguf/insights/GgufInsights.js +20 -0
  151. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  152. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +1 -1
  153. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +1 -1
  154. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
  155. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
  156. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +1 -1
  157. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
  158. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -1
  159. package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
  160. package/dist/gguf/parser/parseGguf.js.map +1 -1
  161. package/dist/gguf/readGgufFileInfo.js.map +1 -1
  162. package/dist/gguf/types/GgufMetadataTypes.d.ts +2 -1
  163. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  164. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
  165. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -1
  166. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -1
  167. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
  168. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -1
  169. package/dist/index.d.ts +14 -10
  170. package/dist/index.js +5 -2
  171. package/dist/index.js.map +1 -1
  172. package/dist/types.d.ts +68 -8
  173. package/dist/types.js.map +1 -1
  174. package/dist/utils/DisposeGuard.js.map +1 -1
  175. package/dist/utils/LlamaText.d.ts +22 -3
  176. package/dist/utils/LlamaText.js +107 -7
  177. package/dist/utils/LlamaText.js.map +1 -1
  178. package/dist/utils/LruCache.d.ts +12 -0
  179. package/dist/utils/LruCache.js +44 -0
  180. package/dist/utils/LruCache.js.map +1 -0
  181. package/dist/utils/ReplHistory.js.map +1 -1
  182. package/dist/utils/StopGenerationDetector.d.ts +25 -9
  183. package/dist/utils/StopGenerationDetector.js +70 -4
  184. package/dist/utils/StopGenerationDetector.js.map +1 -1
  185. package/dist/utils/TokenStreamRegulator.d.ts +1 -0
  186. package/dist/utils/TokenStreamRegulator.js +3 -0
  187. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  188. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  189. package/dist/utils/clearTempFolder.js.map +1 -1
  190. package/dist/utils/cmake.js.map +1 -1
  191. package/dist/utils/createModelDownloader.d.ts +6 -0
  192. package/dist/utils/createModelDownloader.js +5 -2
  193. package/dist/utils/createModelDownloader.js.map +1 -1
  194. package/dist/utils/findBestOption.js.map +1 -1
  195. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  196. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +5 -4
  197. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  198. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  199. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +1 -1
  200. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  201. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -1
  202. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  203. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +1 -1
  204. package/dist/utils/gbnfJson/types.d.ts +3 -0
  205. package/dist/utils/gbnfJson/types.js.map +1 -1
  206. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  207. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  208. package/dist/utils/getGrammarsFolder.js.map +1 -1
  209. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  210. package/dist/utils/gitReleaseBundles.js.map +1 -1
  211. package/dist/utils/isUrl.js.map +1 -1
  212. package/dist/utils/parseModelFileName.js.map +1 -1
  213. package/dist/utils/parseTextTemplate.js.map +1 -1
  214. package/dist/utils/prettyPrintObject.js.map +1 -1
  215. package/dist/utils/removeNullFields.js.map +1 -1
  216. package/dist/utils/resolveGithubRelease.d.ts +1 -1
  217. package/dist/utils/resolveGithubRelease.js.map +1 -1
  218. package/dist/utils/safeEventCallback.d.ts +6 -0
  219. package/dist/utils/safeEventCallback.js +29 -0
  220. package/dist/utils/safeEventCallback.js.map +1 -0
  221. package/dist/utils/spawnCommand.js.map +1 -1
  222. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  223. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  224. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  225. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  226. package/dist/utils/withLockfile.js.map +1 -1
  227. package/dist/utils/withOra.js.map +1 -1
  228. package/dist/utils/withProgressLog.js.map +1 -1
  229. package/dist/utils/withStatusLogs.js.map +1 -1
  230. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  231. package/dist/utils/wrapAbortSignal.js +9 -0
  232. package/dist/utils/wrapAbortSignal.js.map +1 -0
  233. package/llama/addon.cpp +9 -9
  234. package/llama/binariesGithubRelease.json +1 -1
  235. package/llama/gitRelease.bundle +0 -0
  236. package/llama/llama.cpp.info.json +1 -1
  237. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
  238. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  239. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  240. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  241. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
  242. package/llamaBins/linux-x64/llama-addon.node +0 -0
  243. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
  244. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  245. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  246. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  247. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  248. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  249. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  250. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
  251. package/llamaBins/mac-x64/llama-addon.node +0 -0
  252. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -1
  253. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  254. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  255. package/llamaBins/win-arm64/llama-addon.node +0 -0
  256. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
  257. package/llamaBins/win-x64/llama-addon.exp +0 -0
  258. package/llamaBins/win-x64/llama-addon.lib +0 -0
  259. package/llamaBins/win-x64/llama-addon.node +0 -0
  260. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
  261. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  262. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  263. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  264. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  265. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  266. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  267. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  268. package/package.json +32 -34
  269. package/templates/packed/electron-typescript-react.json +1 -1
  270. package/templates/packed/node-typescript.json +1 -1
  271. package/dist/apiDocsOverrides.js.map +0 -1
  272. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  273. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -122
  274. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  275. package/dist/evaluator/LlamaModel.js.map +0 -1
  276. /package/dist/{apiDocsOverrides.d.ts → apiDocsIndex.d.ts} +0 -0
@@ -1,24 +1,30 @@
1
- import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
1
+ import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
3
  import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
4
+ import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
4
5
  import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
5
6
  import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
6
7
  import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
7
8
  import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
8
9
  import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
9
10
  import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
10
- import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
11
+ import { safeEventCallback } from "../../utils/safeEventCallback.js";
11
12
  import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
12
- import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
13
+ import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
14
+ import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
13
15
  const defaultContextShiftOptions = {
14
16
  size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
15
17
  strategy: "eraseFirstResponseAndKeepFirstSystem",
16
18
  lastEvaluationMetadata: null
17
19
  };
20
+ const defaultRepeatPenaltyLastTokens = 64;
21
+ const defaultTrimWhitespaceSuffix = false;
22
+ const defaultEvaluationPriority = 5;
18
23
  export class LlamaChat {
19
24
  /** @internal */ _chatWrapper;
20
25
  /** @internal */ _disposeAggregator = new DisposeAggregator();
21
26
  /** @internal */ _autoDisposeSequence;
27
+ /** @internal */ _chatLock = {};
22
28
  /** @internal */ _sequence;
23
29
  onDispose = new EventRelay();
24
30
  constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
@@ -72,494 +78,215 @@ export class LlamaChat {
72
78
  get model() {
73
79
  return this.sequence.model;
74
80
  }
75
- async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
76
- const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
77
- if (grammar != null && functionsEnabled)
78
- throw new Error("Using both grammar and functions is not supported yet");
79
- if (signal?.aborted)
80
- throw signal.reason;
81
- if (this._sequence == null)
82
- throw new DisposedError();
83
- let resolvedHistory = this._sequence.isLoadedToMemory
84
- ? history.slice()
85
- : history.map(removeRawFromHistoryItem);
86
- if (resolvedHistory.length === 0 || resolvedHistory[resolvedHistory.length - 1].type !== "model")
87
- resolvedHistory.push({
88
- type: "model",
89
- response: []
90
- });
91
- const model = this._sequence.model;
92
- const context = this._sequence.context;
93
- const resolvedContextShift = {
94
- ...defaultContextShiftOptions,
95
- ...removeNullFields(contextShift)
96
- };
97
- const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
98
- ? { lastTokens: 0 }
99
- : repeatPenalty;
100
- const lastModelResponse = getLastTextModelResponseFromChatHistory(resolvedHistory);
101
- const res = [];
102
- const pendingTokens = [];
103
- let ignoredStartTextTokens = [];
104
- const functionCallTokens = [];
105
- const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
106
- const grammarEvaluationState = grammar != null
107
- ? new LlamaGrammarEvaluationState({ grammar })
108
- : undefined;
109
- let functionsGrammar = functionsEnabled
110
- ? new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false)
111
- : undefined;
112
- let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
113
- ? new LlamaGrammarEvaluationState({
114
- grammar: functionsGrammar
115
- })
116
- : undefined;
117
- const streamRegulator = new TokenStreamRegulator();
118
- const stopGenerationDetector = new StopGenerationDetector();
119
- const customStopGenerationTriggersDetector = new StopGenerationDetector();
120
- const functionSyntaxStartDetector = new StopGenerationDetector();
121
- const functionSyntaxEndDetector = new StopGenerationDetector();
122
- const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
123
- const ignoreStartTextDetector = new StopGenerationDetector();
124
- const locksToReleaseOnValidGeneration = [];
125
- const functionCallTokenSyntaxLocks = [];
126
- let generatedTokens = 0;
127
- let isFirstEvaluation = true;
128
- let inFunctionEvaluationMode = false;
129
- let initiallyEngagedFunctionMode = false;
130
- let lastContextWindowHistory = resolvedHistory;
131
- let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
132
- const ensureNotAborted = () => {
133
- if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
134
- throw signal.reason;
135
- if (this._sequence == null)
136
- throw new DisposedError();
137
- };
138
- const getPenaltyTokens = () => {
139
- if (this._sequence == null)
140
- throw new DisposedError();
141
- let punishTokens = res.slice(-repeatPenaltyLastTokens);
142
- if (punishTokensFilter != null)
143
- punishTokens = punishTokensFilter(punishTokens);
144
- if (penalizeNewLine == null || !penalizeNewLine) {
145
- const nlToken = model.tokens.nl;
146
- if (nlToken != null)
147
- punishTokens = punishTokens.filter(token => token !== nlToken);
81
+ async generateResponse(history, options = {}) {
82
+ const { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
83
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
84
+ onToken,
85
+ signal,
86
+ stopOnAbortSignal,
87
+ maxTokens,
88
+ temperature,
89
+ minP,
90
+ topK,
91
+ topP,
92
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
93
+ trimWhitespaceSuffix,
94
+ repeatPenalty,
95
+ tokenBias,
96
+ evaluationPriority,
97
+ functions,
98
+ onFunctionCall,
99
+ documentFunctionParams,
100
+ maxParallelFunctionCalls,
101
+ contextShift,
102
+ customStopTriggers,
103
+ lastEvaluationContextWindow: {
104
+ history: lastEvaluationContextWindowHistory,
105
+ minimumOverlapPercentageToPreventContextShift
148
106
  }
149
- return punishTokens;
150
- };
151
- const getResolvedHistoryWithCurrentModelResponse = () => {
152
- if (res.length === 0)
153
- return resolvedHistory;
154
- let modelResponse = model.detokenize(res);
155
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
156
- modelResponse = modelResponse.trimEnd();
157
- if (modelResponse === "")
158
- return resolvedHistory;
159
- return setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse);
160
- };
161
- const removeFoundStartIgnoreTextsFromPendingTokens = () => {
162
- if (res.length === 0 && pendingTokens.length > 0) {
163
- ignoreStartTextDetector.clearInProgressStops();
164
- ignoreStartTextDetector.clearTriggeredStops();
165
- let mostExhaustiveTriggeredStops = null;
166
- for (let i = 0; i < pendingTokens.length; i++) {
167
- ignoreStartTextDetector.recordGeneration({
168
- text: model.detokenize([pendingTokens[i]]),
169
- tokens: [pendingTokens[i]],
170
- startNewChecks: i === 0
171
- });
172
- if (ignoreStartTextDetector.hasTriggeredStops) {
173
- mostExhaustiveTriggeredStops = ignoreStartTextDetector.getTriggeredStops();
174
- ignoreStartTextDetector.clearTriggeredStops();
107
+ });
108
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
109
+ throw new Error("Using both grammar and functions is not supported yet");
110
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
111
+ try {
112
+ generateResponseState.ensureLastHistoryItemIsModel();
113
+ const loadContextWindow = async (avoidReloadingHistory = false) => {
114
+ await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
115
+ };
116
+ const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
117
+ // eslint-disable-next-line no-constant-condition
118
+ while (true) {
119
+ generateResponseState.startTokenLoop();
120
+ generateResponseState.canAvoidReloadingHistory = false;
121
+ await loadContextWindow();
122
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
123
+ if (generateResponseState.generatedTokens === 0) {
124
+ generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
125
+ if (generateResponseState.functionsEnabled) {
126
+ generateResponseState.initFunctions();
127
+ }
175
128
  }
176
- else if (!ignoreStartTextDetector.hasInProgressStops)
177
- break;
178
- }
179
- if (mostExhaustiveTriggeredStops != null) {
180
- const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
181
- if (mostExhaustiveTriggeredStop != null) {
182
- ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
183
- .map((stopTrigger) => {
184
- if (typeof stopTrigger === "string")
185
- return model.tokenize(stopTrigger, false, "trimLeadingSpace");
186
- else
187
- return [stopTrigger];
188
- })
189
- .flat(1);
190
- const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
191
- .map((generation) => {
192
- if (typeof generation === "string")
193
- return model.tokenize(generation, false, "trimLeadingSpace");
194
- else
195
- return generation;
196
- })
197
- .flat(1);
198
- pendingTokens.length = 0;
199
- pendingTokens.push(...newPendingTokens);
129
+ if (generateResponseState.functionEvaluationMode !== false) {
130
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
131
+ if (functionsCallsRes != null)
132
+ return functionsCallsRes;
133
+ await loadContextWindowForFunctionCallingLoop();
200
134
  }
201
- }
202
- }
203
- };
204
- if (customStopTriggers != null)
205
- StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
206
- .map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
207
- if (grammar != null)
208
- StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
209
- .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
210
- if (functions != null && Object.keys(functions).length > 0)
211
- functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
212
- // eslint-disable-next-line no-constant-condition
213
- while (true) {
214
- ensureNotAborted();
215
- let shouldContextShift = false;
216
- const queuedChunkTokens = streamRegulator.getAllQueuedChunkTokens();
217
- const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall } = await getContextWindow({
218
- resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
219
- resolvedContextShift,
220
- lastHistoryCompressionMetadata,
221
- pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
222
- isFirstEvaluation,
223
- chatWrapper: this._chatWrapper,
224
- lastEvaluationContextWindowHistory,
225
- minimumOverlapPercentageToPreventContextShift,
226
- sequence: this._sequence,
227
- minFreeContextTokens: 1,
228
- functions: functionsEnabled ? functions : undefined,
229
- documentFunctionParams
230
- });
231
- ensureNotAborted();
232
- if (generatedTokens === 0) {
233
- StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenizer)
234
- .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
235
- if (functionsEnabled) {
236
- initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
237
- StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenizer)
238
- .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
239
- if (initiallyEngagedFunctionMode) {
240
- inFunctionEvaluationMode = true;
241
- functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, true);
242
- functionsEvaluationState = new LlamaGrammarEvaluationState({
243
- grammar: functionsGrammar
244
- });
135
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
136
+ await generateResponseState.createNewEvaluationIterator();
137
+ while (await generateResponseState.iterateEvaluation()) {
138
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
139
+ generateResponseState.detectAndHandleFunctionStartSyntax();
140
+ if (generateResponseState.functionEvaluationMode !== false) {
141
+ generateResponseState.canAvoidReloadingHistory = false;
142
+ generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
143
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
144
+ if (functionsCallsRes != null)
145
+ return functionsCallsRes;
146
+ }
147
+ generateResponseState.recordStopGenerationEvaluation();
148
+ generateResponseState.popStreamRegulatorFreeTokens();
149
+ generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
150
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
151
+ if (stopGenerationTriggerRes != null)
152
+ return stopGenerationTriggerRes;
153
+ generateResponseState.spliceIgnoreStartTextDetectedTokens();
154
+ generateResponseState.moveFreePendingTokensToRes();
155
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
156
+ if (maxTokensTriggerRes != null)
157
+ return maxTokensTriggerRes;
158
+ if (generateResponseState.updateShouldContextShift())
159
+ break;
160
+ const abortRes = generateResponseState.handleAbortTrigger("model");
161
+ if (abortRes != null)
162
+ return abortRes;
245
163
  }
164
+ generateResponseState.isFirstEvaluation = false;
165
+ if (generateResponseState.shouldContextShift)
166
+ continue;
167
+ break;
246
168
  }
169
+ throw new Error("The context size is too small to generate a response");
170
+ }
171
+ finally {
172
+ generateResponseState.dispose();
247
173
  }
248
- const tokens = [...contextWindowTokens, ...ignoredStartTextTokens, ...pendingTokens, ...queuedChunkTokens];
249
- resolvedHistory = newResolvedHistory;
250
- lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
251
- lastContextWindowHistory = contextWindowHistory;
252
- const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
253
- const contextWindowsRes = [];
254
- StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenizer)
255
- .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
256
- if (functionsGrammar != null)
257
- StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenizer)
258
- .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
259
- let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
260
- // we need to decode at least one token to generate a response
261
- if (firstDifferentIndex === tokens.length && firstDifferentIndex > 0)
262
- firstDifferentIndex -= 1;
263
- tokens.splice(0, firstDifferentIndex);
264
- if (firstDifferentIndex < this._sequence.nextTokenIndex) {
265
- await this._sequence.eraseContextTokenRanges([{
266
- start: firstDifferentIndex,
267
- end: this._sequence.nextTokenIndex
268
- }]);
269
- ensureNotAborted();
174
+ });
175
+ }
176
+ async loadChatAndCompleteUserMessage(history, options = {}) {
177
+ const { initialUserPrompt = "", stopOnAbortSignal = false, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
178
+ const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
179
+ ? null
180
+ : lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
181
+ const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
182
+ ? lastEvaluationContextWindowHistoryItem.text
183
+ : "";
184
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
185
+ onToken,
186
+ signal,
187
+ stopOnAbortSignal,
188
+ maxTokens,
189
+ temperature,
190
+ minP,
191
+ topK,
192
+ topP,
193
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
194
+ trimWhitespaceSuffix,
195
+ repeatPenalty,
196
+ tokenBias,
197
+ evaluationPriority,
198
+ functions,
199
+ documentFunctionParams,
200
+ contextShift,
201
+ customStopTriggers,
202
+ lastEvaluationContextWindow: {
203
+ history: lastEvaluationContextWindowHistory == null
204
+ ? undefined
205
+ : setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
206
+ minimumOverlapPercentageToPreventContextShift
270
207
  }
271
- const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
272
- temperature, minP, topK, topP,
273
- grammarEvaluationState: () => {
274
- if (inFunctionEvaluationMode)
275
- return functionsEvaluationState;
276
- return grammarEvaluationState;
277
- },
278
- repeatPenalty: !repeatPenaltyEnabled ? undefined : {
279
- punishTokens: getPenaltyTokens,
280
- penalty,
281
- frequencyPenalty,
282
- presencePenalty
283
- },
284
- tokenBias,
285
- evaluationPriority,
286
- yieldEogToken: true
287
- }));
208
+ });
209
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
288
210
  try {
289
- let currentIteration = await evaluationIterator.next();
290
- while (currentIteration.done !== true) {
291
- const token = currentIteration.value;
292
- let replacementToken = undefined;
293
- ensureNotAborted();
294
- generatedTokens++;
295
- const tokens = [token];
296
- const text = model.detokenize([token]);
297
- const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
298
- if (initiallyEngagedFunctionMode)
299
- disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
300
- if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
301
- locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
302
- }
303
- else {
304
- while (locksToReleaseOnValidGeneration.length > 0)
305
- locksToReleaseOnValidGeneration.shift().dispose();
306
- }
307
- functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
308
- if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
309
- initiallyEngagedFunctionMode = false;
310
- let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
311
- if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
312
- const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
313
- try {
314
- const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
315
- enableInternalBuiltinFunctions: true,
316
- initialFunctionCallEngaged: true
317
- });
318
- const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
319
- if (internalBuiltinFunctions[functionName] != null) {
320
- shouldStopFunctionEvaluationMode = true;
321
- }
322
- }
323
- catch (err) {
324
- if (!(err instanceof LlamaFunctionCallValidationError))
325
- throw err;
326
- }
327
- }
328
- if (shouldStopFunctionEvaluationMode) {
329
- inFunctionEvaluationMode = false;
330
- functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
331
- functionsEvaluationState = new LlamaGrammarEvaluationState({
332
- grammar: functionsGrammar
333
- });
334
- functionCallTokens.length = 0;
335
- while (functionCallTokenSyntaxLocks.length > 0)
336
- functionCallTokenSyntaxLocks.shift().dispose();
337
- functionSyntaxStartDetector.clearInProgressStops();
338
- functionSyntaxStartDetector.clearTriggeredStops();
339
- functionSyntaxEndDetector.clearInProgressStops();
340
- functionSyntaxEndDetector.clearTriggeredStops();
341
- }
342
- }
343
- if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
344
- functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
345
- inFunctionEvaluationMode = true;
346
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
347
- stopGenerationDetector.clearTriggeredStops();
348
- stopGenerationDetector.clearInProgressStops();
349
- customStopGenerationTriggersDetector.clearTriggeredStops();
350
- customStopGenerationTriggersDetector.clearInProgressStops();
351
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
352
- const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
353
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
354
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
355
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
356
- const [firstRemainingGenerationAfterStop] = triggeredStops
357
- .map((stopTrigger) => stopTrigger.remainingGenerations)
358
- .filter((remainingGenerations) => remainingGenerations.length > 0)
359
- .flat(1);
360
- const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
361
- ? ""
362
- : typeof firstRemainingGenerationAfterStop === "string"
363
- ? firstRemainingGenerationAfterStop
364
- : model.detokenize(firstRemainingGenerationAfterStop);
365
- functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
366
- for (const functionCallToken of functionCallTokens)
367
- context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
368
- // these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
369
- // or the context state should be modified to not include the incompatible tokens
370
- const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
371
- let unfitTokens = [];
372
- for (let i = 0; i < remainingTextTokens.length; i++) {
373
- const remainingToken = remainingTextTokens[i];
374
- const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
375
- if (!canBeNextToken) {
376
- unfitTokens = remainingTextTokens.slice(i);
377
- break;
378
- }
379
- context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
380
- functionCallTokens.push(remainingToken);
381
- }
382
- if (unfitTokens.length > 0) {
383
- const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
384
- const currentTokenText = queuedTokenRelease.text;
385
- let replacementTokens;
386
- if (!currentTokenText.endsWith(unfitTokensText)) {
387
- console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
388
- replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
389
- }
390
- else {
391
- const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
392
- replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
393
- }
394
- if (replacementTokens.length > 0) {
395
- replacementToken = replacementTokens[0];
396
- queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
397
- }
398
- }
399
- }
400
- else if (inFunctionEvaluationMode) {
401
- functionCallTokens.push(...tokens);
402
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
403
- functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
404
- }
405
- if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
406
- const functionCallText = model.detokenize(functionCallTokens);
407
- const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
408
- let modelResponse = model.detokenize(res);
409
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
410
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
411
- modelResponse = modelResponse.trimEnd();
412
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
413
- }
211
+ generateResponseState.ensureLastHistoryItemIsUser();
212
+ const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1];
213
+ const initialUserMessage = lastResolvedHistoryItem?.type === "user"
214
+ ? lastResolvedHistoryItem.text
215
+ : "";
216
+ // eslint-disable-next-line no-constant-condition
217
+ while (true) {
218
+ generateResponseState.startTokenLoop();
219
+ const { userTextSuffix } = await generateResponseState.loadContextWindow(setLastUserTextInChatHistory(generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)), true);
220
+ generateResponseState.functionEvaluationMode = false;
221
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
222
+ if (userTextSuffix != null && userTextSuffix.values.length > 0)
223
+ generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
224
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
225
+ if (generateResponseState.maxTokens === 0) {
226
+ await generateResponseState.evaluateWithoutGeneratingNewTokens();
414
227
  return {
415
- response: modelResponse,
228
+ completion: "",
416
229
  lastEvaluation: {
417
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
418
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
419
- contextShiftMetadata: lastHistoryCompressionMetadata
230
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
231
+ contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
420
232
  },
421
- // prevent infinite TS type instantiation
422
- functionCall: functionCall,
423
233
  metadata: {
424
- stopReason: "functionCall"
234
+ stopReason: "maxTokens"
425
235
  }
426
236
  };
427
237
  }
428
- if (!inFunctionEvaluationMode) {
429
- stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
430
- customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
431
- }
432
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
433
- removeFoundStartIgnoreTextsFromPendingTokens();
434
- if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
435
- model.isEogToken(token)) {
436
- stopGenerationDetector.clearInProgressStops();
437
- customStopGenerationTriggersDetector.clearInProgressStops();
438
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
439
- const triggeredStops = stopGenerationDetector.hasTriggeredStops
440
- ? stopGenerationDetector.getTriggeredStops()
441
- : customStopGenerationTriggersDetector.getTriggeredStops();
442
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
443
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
444
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
445
- const [firstRemainingGenerationAfterStop] = triggeredStops
446
- .map((stopTrigger) => stopTrigger.remainingGenerations)
447
- .filter((remainingGenerations) => remainingGenerations.length > 0)
448
- .flat(1);
449
- removeFoundStartIgnoreTextsFromPendingTokens();
450
- if (pendingTokens.length > 0)
451
- onToken?.(pendingTokens.slice());
452
- res.push(...pendingTokens);
453
- contextWindowsRes.push(...pendingTokens);
454
- pendingTokens.length = 0;
455
- let modelResponse = model.detokenize(res);
456
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
457
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
458
- modelResponse = modelResponse.trimEnd();
459
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
460
- }
461
- const lastEvaluation = {
462
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
463
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
464
- contextShiftMetadata: lastHistoryCompressionMetadata
465
- };
466
- const isEogToken = model.isEogToken(token);
467
- if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
238
+ await generateResponseState.createNewEvaluationIterator();
239
+ while (await generateResponseState.iterateEvaluation()) {
240
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
241
+ generateResponseState.recordStopGenerationEvaluation();
242
+ generateResponseState.popStreamRegulatorFreeTokens();
243
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
244
+ if (stopGenerationTriggerRes != null)
468
245
  return {
469
- response: modelResponse,
470
- lastEvaluation,
471
- metadata: {
472
- remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
473
- stopReason: isEogToken
474
- ? "eogToken"
475
- : "stopGenerationTrigger"
476
- }
246
+ completion: stopGenerationTriggerRes.response,
247
+ lastEvaluation: {
248
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
249
+ contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
250
+ },
251
+ metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
252
+ ? stopGenerationTriggerRes.metadata
253
+ : stopGenerationTriggerRes.metadata
254
+ };
255
+ generateResponseState.moveFreePendingTokensToRes(false);
256
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
257
+ if (maxTokensTriggerRes != null)
258
+ return {
259
+ completion: maxTokensTriggerRes.response,
260
+ lastEvaluation: {
261
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
262
+ contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
263
+ },
264
+ metadata: maxTokensTriggerRes.metadata
265
+ };
266
+ if (generateResponseState.updateShouldContextShift())
267
+ break;
268
+ const abortRes = generateResponseState.handleAbortTrigger("user");
269
+ if (abortRes != null)
270
+ return {
271
+ completion: abortRes.response,
272
+ lastEvaluation: {
273
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
274
+ contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
275
+ },
276
+ metadata: abortRes.metadata
477
277
  };
478
- }
479
- return {
480
- response: modelResponse,
481
- lastEvaluation,
482
- metadata: {
483
- remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
484
- stopReason: "customStopTrigger",
485
- customStopTrigger: triggeredStops[0].stopTrigger
486
- }
487
- };
488
- }
489
- const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
490
- if (res.length === 0) {
491
- ignoreStartTextDetector.clearInProgressStops();
492
- ignoreStartTextDetector.clearTriggeredStops();
493
- ignoreStartTextDetector.recordGeneration({
494
- text: model.detokenize(pendingTokens),
495
- tokens: pendingTokens
496
- });
497
- }
498
- if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
499
- removeFoundStartIgnoreTextsFromPendingTokens();
500
- if (pendingTokens.length > 0) {
501
- onToken?.(pendingTokens.slice());
502
- res.push(...pendingTokens);
503
- contextWindowsRes.push(...pendingTokens);
504
- pendingTokens.length = 0;
505
- }
506
- }
507
- if (maxTokensTriggered) {
508
- let modelResponse = model.detokenize(res);
509
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
510
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
511
- modelResponse = modelResponse.trimEnd();
512
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
513
- }
514
- return {
515
- response: modelResponse,
516
- lastEvaluation: {
517
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
518
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
519
- contextShiftMetadata: lastHistoryCompressionMetadata
520
- },
521
- metadata: {
522
- stopReason: "maxTokens"
523
- }
524
- };
525
- }
526
- if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
527
- shouldContextShift = true;
528
- break;
529
- }
530
- if (signal?.aborted && stopOnAbortSignal) {
531
- if (res.length === 0)
532
- throw signal.reason;
533
- let modelResponse = model.detokenize(res);
534
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
535
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
536
- modelResponse = modelResponse.trimEnd();
537
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
538
- }
539
- return {
540
- response: modelResponse,
541
- lastEvaluation: {
542
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
543
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
544
- contextShiftMetadata: lastHistoryCompressionMetadata
545
- },
546
- metadata: {
547
- stopReason: "abort"
548
- }
549
- };
550
278
  }
551
- currentIteration = await evaluationIterator.next(replacementToken);
279
+ generateResponseState.isFirstEvaluation = false;
280
+ if (generateResponseState.shouldContextShift)
281
+ continue;
282
+ break;
552
283
  }
284
+ throw new Error("The context size is too small to generate a completion");
553
285
  }
554
286
  finally {
555
- await evaluationIterator.return();
287
+ generateResponseState.dispose();
556
288
  }
557
- isFirstEvaluation = false;
558
- if (shouldContextShift)
559
- continue;
560
- break;
561
- }
562
- throw new Error("The context size is too small to generate a response");
289
+ });
563
290
  }
564
291
  }
565
292
  function removeRawFromHistoryItem(historyItem) {
@@ -571,7 +298,7 @@ function removeRawFromHistoryItem(historyItem) {
571
298
  else
572
299
  return {
573
300
  ...item,
574
- raw: undefined
301
+ rawCall: undefined
575
302
  };
576
303
  });
577
304
  return newHistoryItem;
@@ -580,7 +307,8 @@ function removeRawFromHistoryItem(historyItem) {
580
307
  }
581
308
  async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
582
309
  function checkIfHistoryFitsContext(history) {
583
- const { contextText } = chatWrapper.generateContextText(history, {
310
+ const { contextText } = chatWrapper.generateContextState({
311
+ chatHistory: history,
584
312
  availableFunctions: functions,
585
313
  documentFunctionParams
586
314
  });
@@ -643,6 +371,11 @@ function getLastTextModelResponseFromChatHistory(chatHistory) {
643
371
  return modelResponse[modelResponse.length - 1];
644
372
  return "";
645
373
  }
374
+ function getLastUserTextFromChatHistory(chatHistory) {
375
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
376
+ return "";
377
+ return chatHistory[chatHistory.length - 1].text;
378
+ }
646
379
  function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
647
380
  const newChatHistory = chatHistory.slice();
648
381
  if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
@@ -665,19 +398,86 @@ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
665
398
  modelResponse.push(textResponse);
666
399
  return newChatHistory;
667
400
  }
668
- async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams }) {
401
+ function setLastUserTextInChatHistory(chatHistory, userText) {
402
+ const newChatHistory = chatHistory.slice();
403
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
404
+ newChatHistory.push({
405
+ type: "user",
406
+ text: ""
407
+ });
408
+ const lastUserItem = newChatHistory[newChatHistory.length - 1];
409
+ const newLastUserItem = { ...lastUserItem };
410
+ newChatHistory[newChatHistory.length - 1] = newLastUserItem;
411
+ newLastUserItem.text = userText;
412
+ return newChatHistory;
413
+ }
414
+ function setLastTextInChatHistory(itemType, chatHistory, text) {
415
+ if (itemType === "user")
416
+ return setLastUserTextInChatHistory(chatHistory, text);
417
+ else
418
+ return setLastModelTextResponseInChatHistory(chatHistory, text);
419
+ }
420
+ function generateContextText(endWithUserText, chatWrapper, options) {
421
+ if (endWithUserText)
422
+ return generateContextTextThatEndsWithUserText(chatWrapper, options);
423
+ return chatWrapper.generateContextState(options);
424
+ }
425
+ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
426
+ const lastUserText = getLastUserTextFromChatHistory(options.chatHistory);
427
+ const randomId = "W" + (Math.random()
428
+ .toString(36)
429
+ .slice(2)) + "W";
430
+ const { contextText, ...rest } = chatWrapper.generateContextState({
431
+ ...options,
432
+ chatHistory: setLastUserTextInChatHistory(options.chatHistory, lastUserText + randomId)
433
+ });
434
+ let newContextText = contextText;
435
+ for (let i = 0; i < newContextText.values.length; i++) {
436
+ const item = newContextText.values[i];
437
+ if (typeof item !== "string")
438
+ continue;
439
+ const randomTextIndex = item.indexOf(randomId);
440
+ if (randomTextIndex < 0)
441
+ continue;
442
+ const newValue = item.slice(0, randomTextIndex);
443
+ newContextText = LlamaText([
444
+ ...newContextText.values.slice(0, i),
445
+ newValue
446
+ ]);
447
+ return {
448
+ contextText: newContextText,
449
+ userTextSuffix: LlamaText([
450
+ item.slice(randomTextIndex + randomId.length),
451
+ ...newContextText.values.slice(i + 1)
452
+ ]),
453
+ ...rest
454
+ };
455
+ }
456
+ throw new Error("The random ID was not found in the context text. " +
457
+ `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
458
+ "where not all user messages are properly added to the the result LlamaText");
459
+ }
460
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
669
461
  if (sequence == null)
670
462
  throw new DisposedError();
671
463
  const model = sequence.model;
672
464
  const context = sequence.context;
673
465
  if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
674
466
  const newContextWindow = lastEvaluationContextWindowHistory.slice();
675
- if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
467
+ if (endWithUserText) {
468
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
469
+ newContextWindow.push({
470
+ type: "user",
471
+ text: ""
472
+ });
473
+ }
474
+ else if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
676
475
  newContextWindow.push({
677
476
  type: "model",
678
477
  response: []
679
478
  });
680
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(newContextWindow, {
479
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
480
+ chatHistory: newContextWindow,
681
481
  availableFunctions: functions,
682
482
  documentFunctionParams
683
483
  });
@@ -694,7 +494,8 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
694
494
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
695
495
  ignoreStartText: ignoreStartText ?? [],
696
496
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
697
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
497
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
498
+ userTextSuffix
698
499
  };
699
500
  }
700
501
  }
@@ -716,7 +517,8 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
716
517
  functions,
717
518
  documentFunctionParams
718
519
  });
719
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
520
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
521
+ chatHistory: compressedHistory,
720
522
  availableFunctions: functions,
721
523
  documentFunctionParams
722
524
  });
@@ -728,11 +530,13 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
728
530
  newHistoryCompressionMetadata: metadata,
729
531
  ignoreStartText: ignoreStartText ?? [],
730
532
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
731
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
533
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
534
+ userTextSuffix
732
535
  };
733
536
  }
734
537
  {
735
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(resolvedHistory, {
538
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
539
+ chatHistory: resolvedHistory,
736
540
  availableFunctions: functions,
737
541
  documentFunctionParams
738
542
  });
@@ -746,7 +550,8 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
746
550
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
747
551
  ignoreStartText: ignoreStartText ?? [],
748
552
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
749
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
553
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
554
+ userTextSuffix
750
555
  };
751
556
  }
752
557
  const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
@@ -763,7 +568,8 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
763
568
  functions,
764
569
  documentFunctionParams
765
570
  });
766
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
571
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
572
+ chatHistory: compressedHistory,
767
573
  availableFunctions: functions,
768
574
  documentFunctionParams
769
575
  });
@@ -775,7 +581,932 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
775
581
  newHistoryCompressionMetadata: metadata,
776
582
  ignoreStartText: ignoreStartText ?? [],
777
583
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
778
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
584
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
585
+ userTextSuffix
779
586
  };
780
587
  }
588
+ class GenerateResponseState {
589
+ llamaChat;
590
+ chatWrapper;
591
+ history;
592
+ onToken;
593
+ signal;
594
+ stopOnAbortSignal;
595
+ maxTokens;
596
+ temperature;
597
+ minP;
598
+ topK;
599
+ topP;
600
+ grammar;
601
+ trimWhitespaceSuffix;
602
+ tokenBias;
603
+ evaluationPriority;
604
+ functions;
605
+ onFunctionCall;
606
+ documentFunctionParams;
607
+ maxParallelFunctionCalls;
608
+ contextShift;
609
+ customStopTriggers;
610
+ lastEvaluationContextWindowHistory;
611
+ minimumOverlapPercentageToPreventContextShift;
612
+ functionsEnabled;
613
+ repeatPenaltyEnabled;
614
+ resolvedContextShift;
615
+ resolvedRepeatPenalty;
616
+ lastModelResponse;
617
+ grammarEvaluationState;
618
+ functionNameGrammar;
619
+ functionsGrammar;
620
+ functionsEvaluationState;
621
+ streamRegulator = new TokenStreamRegulator();
622
+ stopGenerationDetector = new StopGenerationDetector();
623
+ customStopGenerationTriggersDetector = new StopGenerationDetector();
624
+ functionSyntaxStartDetector = new StopGenerationDetector();
625
+ disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
626
+ ignoreStartTextDetector = new StopGenerationDetector();
627
+ locksToReleaseOnValidGeneration = [];
628
+ resolvedHistory;
629
+ res = [];
630
+ pendingTokens = [];
631
+ ignoredStartTextTokens = [];
632
+ resFunctionCalls = [];
633
+ functionEvaluationMode = false;
634
+ currentFunctionCallPreviousText = LlamaText([]);
635
+ currentFunctionCallCurrentPartTokens = [];
636
+ functionEvaluationFunctionName = "";
637
+ currentFunctionCallPreviousPartLeftoverText = "";
638
+ removedStartTextToIgnore = false;
639
+ releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
640
+ generatedTokens = 0;
641
+ isFirstEvaluation = true;
642
+ initiallyEngagedFunctionMode = false;
643
+ lastContextWindowHistory;
644
+ lastHistoryCompressionMetadata;
645
+ restartEvaluationIterator = false;
646
+ // context shift loop
647
+ shouldContextShift = false;
648
+ canAvoidReloadingHistory = false;
649
+ contextWindowTokens = [];
650
+ stopGenerationTriggers = [];
651
+ ignoreStartText = [];
652
+ functionCallInitiallyEngaged = false;
653
+ disengageInitiallyEngagedFunctionCall = [];
654
+ userTextSuffix = undefined;
655
+ tokens = [];
656
+ contextWindowLastModelResponse = "";
657
+ contextWindowsRes = [];
658
+ // token evaluation loop
659
+ evaluationIterator;
660
+ currentIteration;
661
+ currentIterationReplacementToken;
662
+ currentToken;
663
+ currentTokens = [];
664
+ currentText = "";
665
+ currentQueuedTokenRelease;
666
+ constructor(llamaChat, chatWrapper, history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
667
+ this.llamaChat = llamaChat;
668
+ this.chatWrapper = chatWrapper;
669
+ this.history = history;
670
+ this.onToken = safeEventCallback(onToken);
671
+ this.signal = signal;
672
+ this.stopOnAbortSignal = stopOnAbortSignal;
673
+ this.maxTokens = maxTokens;
674
+ this.temperature = temperature;
675
+ this.minP = minP;
676
+ this.topK = topK;
677
+ this.topP = topP;
678
+ this.grammar = grammar;
679
+ this.trimWhitespaceSuffix = trimWhitespaceSuffix;
680
+ this.tokenBias = tokenBias;
681
+ this.evaluationPriority = evaluationPriority;
682
+ this.functions = functions;
683
+ this.onFunctionCall = safeEventCallback(onFunctionCall);
684
+ this.documentFunctionParams = documentFunctionParams;
685
+ this.maxParallelFunctionCalls = maxParallelFunctionCalls;
686
+ this.contextShift = contextShift;
687
+ this.customStopTriggers = customStopTriggers;
688
+ this.lastEvaluationContextWindowHistory = lastEvaluationContextWindowHistory;
689
+ this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
690
+ this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
691
+ if (this.signal?.aborted)
692
+ throw this.signal.reason;
693
+ if (this.llamaChat.disposed)
694
+ throw new DisposedError();
695
+ this.resolvedHistory = this.llamaChat.sequence.isLoadedToMemory
696
+ ? this.history.slice()
697
+ : this.history.map(removeRawFromHistoryItem);
698
+ this.resolvedContextShift = {
699
+ ...defaultContextShiftOptions,
700
+ ...removeNullFields(this.contextShift)
701
+ };
702
+ this.resolvedRepeatPenalty = repeatPenalty === false
703
+ ? { lastTokens: 0 }
704
+ : {
705
+ ...(repeatPenalty ?? {}),
706
+ lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
707
+ };
708
+ this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
709
+ this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
710
+ this.grammarEvaluationState = this.grammar != null
711
+ ? new LlamaGrammarEvaluationState({ grammar: this.grammar })
712
+ : undefined;
713
+ this.functionNameGrammar = this.functionsEnabled
714
+ ? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper)
715
+ : undefined;
716
+ this.functionsGrammar = undefined;
717
+ this.functionsEvaluationState = undefined;
718
+ this.lastContextWindowHistory = this.resolvedHistory;
719
+ this.lastHistoryCompressionMetadata = this.resolvedContextShift;
720
+ if (this.customStopTriggers != null)
721
+ StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
722
+ .map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
723
+ if (this.grammar != null)
724
+ StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
725
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
726
+ if (this.functions != null && Object.keys(this.functions).length > 0)
727
+ this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
728
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
729
+ this.chatWrapper.settings.functions.call.prefix
730
+ ]), this.llamaChat.model.tokenizer));
731
+ this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
732
+ }
733
+ dispose() {
734
+ }
735
+ [Symbol.dispose]() {
736
+ this.dispose();
737
+ }
738
+ ensureLastHistoryItemIsModel() {
739
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "model")
740
+ this.resolvedHistory.push({
741
+ type: "model",
742
+ response: []
743
+ });
744
+ }
745
+ ensureLastHistoryItemIsUser() {
746
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "user")
747
+ this.resolvedHistory.push({
748
+ type: "user",
749
+ text: ""
750
+ });
751
+ }
752
+ ensureNotAborted() {
753
+ if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
754
+ throw this.signal.reason;
755
+ if (this.llamaChat.disposed)
756
+ throw new DisposedError();
757
+ }
758
+ getPenaltyTokens() {
759
+ if (this.llamaChat.disposed)
760
+ throw new DisposedError();
761
+ let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
762
+ if (this.resolvedRepeatPenalty.punishTokensFilter != null)
763
+ punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
764
+ if (this.resolvedRepeatPenalty.penalizeNewLine == null || !this.resolvedRepeatPenalty.penalizeNewLine) {
765
+ const nlToken = this.llamaChat.model.tokens.nl;
766
+ if (nlToken != null)
767
+ punishTokens = punishTokens.filter(token => token !== nlToken);
768
+ }
769
+ return punishTokens;
770
+ }
771
+ getResolvedHistoryWithCurrentModelResponse() {
772
+ if (this.res.length === 0)
773
+ return this.resolvedHistory;
774
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
775
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
776
+ modelResponse = modelResponse.trimEnd();
777
+ if (modelResponse === "")
778
+ return this.resolvedHistory;
779
+ return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
780
+ }
781
+ removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
782
+ if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
783
+ this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops)) {
784
+ this.ignoreStartTextDetector.clearInProgressStops();
785
+ this.ignoreStartTextDetector.clearTriggeredStops();
786
+ let mostExhaustiveTriggeredStops = null;
787
+ let mostExhaustiveTriggeredStopsLeftoverTokens = [];
788
+ for (let i = 0; i < this.pendingTokens.length; i++) {
789
+ this.ignoreStartTextDetector.recordGeneration({
790
+ text: this.llamaChat.model.detokenize([this.pendingTokens[i]]),
791
+ tokens: [this.pendingTokens[i]],
792
+ startNewChecks: i === 0,
793
+ triggerMustStartWithGeneration: true
794
+ });
795
+ if (this.ignoreStartTextDetector.hasTriggeredStops) {
796
+ mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
797
+ this.ignoreStartTextDetector.clearTriggeredStops();
798
+ mostExhaustiveTriggeredStopsLeftoverTokens = this.pendingTokens.slice(i + 1);
799
+ }
800
+ else if (!this.ignoreStartTextDetector.hasInProgressStops)
801
+ break;
802
+ }
803
+ if (mostExhaustiveTriggeredStops != null) {
804
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
805
+ if (mostExhaustiveTriggeredStop != null) {
806
+ this.ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
807
+ .map((stopTrigger) => {
808
+ if (typeof stopTrigger === "string")
809
+ return this.llamaChat.model.tokenize(stopTrigger, false, "trimLeadingSpace");
810
+ else
811
+ return [stopTrigger];
812
+ })
813
+ .flat(1);
814
+ const newPendingTokens = [
815
+ ...mostExhaustiveTriggeredStop.remainingGeneration,
816
+ mostExhaustiveTriggeredStopsLeftoverTokens
817
+ ]
818
+ .map((generation) => {
819
+ if (typeof generation === "string")
820
+ return this.llamaChat.model.tokenize(generation, false, "trimLeadingSpace");
821
+ else
822
+ return generation;
823
+ })
824
+ .flat(1);
825
+ this.pendingTokens.length = 0;
826
+ this.pendingTokens.push(...newPendingTokens);
827
+ this.removedStartTextToIgnore = true;
828
+ }
829
+ }
830
+ }
831
+ }
832
+ startTokenLoop() {
833
+ this.ensureNotAborted();
834
+ this.shouldContextShift = false;
835
+ }
836
+ getContextWindowFunctionCallsTokens() {
837
+ if (this.functionEvaluationMode === false)
838
+ return [];
839
+ else if (this.functionEvaluationMode === "prefixOrDisengage")
840
+ return [
841
+ ...LlamaText(this.currentFunctionCallPreviousText).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
842
+ ...this.currentFunctionCallCurrentPartTokens
843
+ ];
844
+ const text = [];
845
+ if (this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix != null)
846
+ text.push(this.chatWrapper.settings.functions.parallelism.call.sectionPrefix);
847
+ for (let i = 0; i < this.resFunctionCalls.length; i++) {
848
+ const call = this.resFunctionCalls[i];
849
+ if (i > 0)
850
+ text.push(this.chatWrapper.settings.functions?.parallelism?.call?.betweenCalls ?? "");
851
+ text.push(call.raw);
852
+ }
853
+ text.push(this.currentFunctionCallPreviousText);
854
+ return [
855
+ ...LlamaText(text).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
856
+ ...this.currentFunctionCallCurrentPartTokens
857
+ ];
858
+ }
859
+ async loadContextWindow(resolvedHistory, endWithUserText = false, avoidReloadingHistory = false) {
860
+ const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
861
+ const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
862
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
863
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
864
+ resolvedHistory: resolvedHistory,
865
+ resolvedContextShift: this.resolvedContextShift,
866
+ lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
867
+ pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
868
+ isFirstEvaluation: this.isFirstEvaluation,
869
+ chatWrapper: this.chatWrapper,
870
+ lastEvaluationContextWindowHistory: this.lastEvaluationContextWindowHistory,
871
+ minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
872
+ sequence: this.llamaChat.sequence,
873
+ minFreeContextTokens: 1,
874
+ functions: this.functionsEnabled ? this.functions : undefined,
875
+ documentFunctionParams: this.documentFunctionParams,
876
+ endWithUserText
877
+ });
878
+ this.ensureNotAborted();
879
+ this.contextWindowTokens = contextWindowTokens;
880
+ this.stopGenerationTriggers = stopGenerationTriggers;
881
+ this.ignoreStartText = ignoreStartText;
882
+ this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
883
+ this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
884
+ this.userTextSuffix = userTextSuffix;
885
+ this.resolvedHistory = newResolvedHistory;
886
+ this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
887
+ this.lastContextWindowHistory = contextWindowHistory;
888
+ this.contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
889
+ this.contextWindowsRes = [];
890
+ this.canAvoidReloadingHistory = true;
891
+ }
892
+ this.tokens = [
893
+ ...this.contextWindowTokens,
894
+ ...this.ignoredStartTextTokens,
895
+ ...this.pendingTokens,
896
+ ...queuedChunkTokens,
897
+ ...functionCallsTokens
898
+ ];
899
+ if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
900
+ return await this.loadContextWindow(resolvedHistory, endWithUserText, false);
901
+ return {
902
+ userTextSuffix: this.userTextSuffix
903
+ };
904
+ }
905
+ addIgnoreStartTextTriggersFromChatWrapper() {
906
+ StopGenerationDetector.resolveStopTriggers(this.ignoreStartText, this.llamaChat.model.tokenizer)
907
+ .map((stopTrigger) => this.ignoreStartTextDetector.addStopTrigger(stopTrigger));
908
+ }
909
+ addStopGenerationTriggersFromChatWrapper() {
910
+ StopGenerationDetector.resolveStopTriggers(this.stopGenerationTriggers, this.llamaChat.model.tokenizer)
911
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
912
+ }
913
+ initFunctions() {
914
+ this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
915
+ if (this.initiallyEngagedFunctionMode) {
916
+ StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
917
+ .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
918
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggers) {
919
+ this.functionEvaluationMode = "prefixOrDisengage";
920
+ this.functionsGrammar = undefined;
921
+ this.functionsEvaluationState = undefined;
922
+ }
923
+ else {
924
+ this.functionEvaluationMode = "functionName";
925
+ }
926
+ this.restartEvaluationIterator = true;
927
+ }
928
+ }
929
+ async enterFunctionCallingLoop(loadContextWindow) {
930
+ if (!this.functionsEnabled) {
931
+ this.functionEvaluationMode = false;
932
+ return undefined;
933
+ }
934
+ // eslint-disable-next-line no-constant-condition
935
+ while (true) {
936
+ if (this.functionEvaluationMode === "prefixOrDisengage") {
937
+ this.functionsGrammar = undefined;
938
+ this.functionsEvaluationState = undefined;
939
+ this.currentFunctionCallPreviousText = LlamaText([]);
940
+ this.currentFunctionCallCurrentPartTokens.length = 0;
941
+ const prefixTokens = LlamaText(this.chatWrapper.settings.functions.call.prefix)
942
+ .tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
943
+ const prefixDetector = new StopGenerationDetector();
944
+ const afterPrefixLeftoverTokens = [];
945
+ prefixDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(this.chatWrapper.settings.functions.call.prefix), this.llamaChat.model.tokenizer));
946
+ for (const prefixToken of prefixTokens) {
947
+ const tokens = [prefixToken];
948
+ const text = this.llamaChat.model.detokenize(tokens);
949
+ const disregardedPossibilities = this.disengageInitiallyEngagedFunctionMode
950
+ .getDisregardedPossibilitiesCountForAGeneration({
951
+ text,
952
+ tokens,
953
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 0
954
+ });
955
+ if (disregardedPossibilities > 0)
956
+ break;
957
+ this.currentFunctionCallCurrentPartTokens.push(prefixToken);
958
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
959
+ text: text,
960
+ tokens: tokens,
961
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
962
+ triggerMustStartWithGeneration: true
963
+ });
964
+ if (prefixDetector.hasTriggeredStops)
965
+ afterPrefixLeftoverTokens.push(prefixToken);
966
+ else
967
+ prefixDetector.recordGeneration({
968
+ text: text,
969
+ tokens: tokens,
970
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
971
+ triggerMustStartWithGeneration: true
972
+ });
973
+ }
974
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
975
+ const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
976
+ if (stopGenerationTriggerRes != null)
977
+ return stopGenerationTriggerRes;
978
+ this.currentFunctionCallCurrentPartTokens.push(token);
979
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
980
+ text: this.currentText,
981
+ tokens: this.currentTokens,
982
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
983
+ triggerMustStartWithGeneration: true
984
+ });
985
+ if (prefixDetector.hasTriggeredStops)
986
+ afterPrefixLeftoverTokens.push(token);
987
+ else
988
+ prefixDetector.recordGeneration({
989
+ text: this.currentText,
990
+ tokens: this.currentTokens,
991
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
992
+ triggerMustStartWithGeneration: true
993
+ });
994
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops ||
995
+ !this.disengageInitiallyEngagedFunctionMode.hasInProgressStops)
996
+ break;
997
+ }
998
+ const abortRes = this.handleAbortTrigger("model");
999
+ if (abortRes != null)
1000
+ return abortRes;
1001
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
1002
+ for (const token of this.currentFunctionCallCurrentPartTokens) {
1003
+ this.currentToken = token;
1004
+ this.currentTokens = [this.currentToken];
1005
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens);
1006
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1007
+ tokens: this.currentTokens,
1008
+ text: this.currentText
1009
+ });
1010
+ this.recordStopGenerationEvaluation();
1011
+ }
1012
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1013
+ this.functionEvaluationMode = false;
1014
+ return undefined;
1015
+ }
1016
+ if (prefixDetector.hasTriggeredStops) {
1017
+ const triggeredStops = prefixDetector.getTriggeredStops();
1018
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1019
+ this.currentFunctionCallPreviousPartLeftoverText = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, this.llamaChat.model.detokenize) + this.llamaChat.model.detokenize(afterPrefixLeftoverTokens);
1020
+ }
1021
+ else
1022
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1023
+ this.functionEvaluationMode = "functionName";
1024
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1025
+ continue;
1026
+ }
1027
+ else if (this.functionEvaluationMode === "functionName") {
1028
+ const functionNameGenerationDoneDetector = new StopGenerationDetector();
1029
+ this.stopGenerationDetector.clearInProgressStops();
1030
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1031
+ this.currentFunctionCallPreviousText = LlamaText(this.chatWrapper.settings.functions.call.prefix);
1032
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1033
+ const functionNameGrammar = this.functionNameGrammar ?? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper);
1034
+ this.functionsGrammar = functionNameGrammar;
1035
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1036
+ grammar: this.functionsGrammar
1037
+ });
1038
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1039
+ .map((stopTrigger) => functionNameGenerationDoneDetector.addStopTrigger(stopTrigger));
1040
+ if (this.currentFunctionCallPreviousPartLeftoverText !== "") {
1041
+ const validFunctionNames = Object.keys(this.functions);
1042
+ const hasAnyFunctionStartWithLeftover = validFunctionNames.some((functionName) => functionName.startsWith(this.currentFunctionCallPreviousPartLeftoverText));
1043
+ if (hasAnyFunctionStartWithLeftover) {
1044
+ const leftoverTokens = this.llamaChat.model.tokenize(this.currentFunctionCallPreviousPartLeftoverText, false, "trimLeadingSpace");
1045
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1046
+ for (const leftoverToken of leftoverTokens) {
1047
+ const canBeNextToken = this.llamaChat.context._canBeNextTokenForGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1048
+ if (!canBeNextToken)
1049
+ break;
1050
+ this.llamaChat.context._acceptTokenOnGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1051
+ this.currentFunctionCallCurrentPartTokens.push(leftoverToken);
1052
+ functionNameGenerationDoneDetector.recordGeneration({
1053
+ text: this.llamaChat.model.detokenize([leftoverToken]),
1054
+ tokens: [leftoverToken]
1055
+ });
1056
+ }
1057
+ }
1058
+ }
1059
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1060
+ this.currentFunctionCallCurrentPartTokens.push(token);
1061
+ functionNameGenerationDoneDetector.recordGeneration({
1062
+ text: this.currentText,
1063
+ tokens: this.currentTokens
1064
+ });
1065
+ if (functionNameGenerationDoneDetector.hasTriggeredStops)
1066
+ break;
1067
+ }
1068
+ const abortRes = this.handleAbortTrigger("model");
1069
+ if (abortRes != null)
1070
+ return abortRes;
1071
+ const functionCallNameText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1072
+ const functionName = functionNameGrammar.parseFunctionName(functionCallNameText);
1073
+ this.functionEvaluationFunctionName = functionName;
1074
+ this.functionEvaluationMode = "params";
1075
+ continue;
1076
+ }
1077
+ else if (this.functionEvaluationMode === "params") {
1078
+ this.currentFunctionCallPreviousText = LlamaText([
1079
+ this.chatWrapper.settings.functions.call.prefix,
1080
+ this.functionEvaluationFunctionName,
1081
+ this.chatWrapper.settings.functions.call.paramsPrefix
1082
+ ]);
1083
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1084
+ let params = undefined;
1085
+ let paramsText = "";
1086
+ const functionDefinition = this.functions[this.functionEvaluationFunctionName];
1087
+ if (functionDefinition == null)
1088
+ throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
1089
+ else if (functionDefinition.params == null) {
1090
+ params = undefined;
1091
+ paramsText = "";
1092
+ }
1093
+ else {
1094
+ const functionParamsGenerationDoneDetector = new StopGenerationDetector();
1095
+ const functionParamsGrammar = new FunctionCallParamsGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper, this.functionEvaluationFunctionName, functionDefinition.params);
1096
+ this.functionsGrammar = functionParamsGrammar;
1097
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1098
+ grammar: this.functionsGrammar
1099
+ });
1100
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1101
+ .map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
1102
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1103
+ this.currentFunctionCallCurrentPartTokens.push(token);
1104
+ functionParamsGenerationDoneDetector.recordGeneration({
1105
+ text: this.currentText,
1106
+ tokens: this.currentTokens
1107
+ });
1108
+ if (functionParamsGenerationDoneDetector.hasTriggeredStops)
1109
+ break;
1110
+ }
1111
+ const abortRes = this.handleAbortTrigger("model");
1112
+ if (abortRes != null)
1113
+ return abortRes;
1114
+ const functionCallParamsText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1115
+ const parsedFunctionParams = functionParamsGrammar.parseParams(functionCallParamsText);
1116
+ params = parsedFunctionParams.params;
1117
+ paramsText = parsedFunctionParams.raw;
1118
+ }
1119
+ const functionCallText = LlamaText([
1120
+ this.chatWrapper.settings.functions.call.prefix,
1121
+ this.functionEvaluationFunctionName,
1122
+ this.chatWrapper.settings.functions.call.paramsPrefix,
1123
+ paramsText,
1124
+ this.chatWrapper.settings.functions.call.suffix
1125
+ ]);
1126
+ this.resFunctionCalls.push({
1127
+ functionName: this.functionEvaluationFunctionName,
1128
+ params,
1129
+ raw: functionCallText
1130
+ });
1131
+ this.onFunctionCall?.({
1132
+ functionName: this.functionEvaluationFunctionName,
1133
+ params: structuredClone(params),
1134
+ raw: functionCallText.toJSON()
1135
+ });
1136
+ this.currentFunctionCallPreviousText = LlamaText([]);
1137
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1138
+ this.functionEvaluationFunctionName = "";
1139
+ if (this.chatWrapper.settings.functions.parallelism == null || (this.maxParallelFunctionCalls != null && this.maxParallelFunctionCalls <= this.resFunctionCalls.length)) {
1140
+ this.functionEvaluationMode = false;
1141
+ return this.returnFunctionCallResults();
1142
+ }
1143
+ this.functionEvaluationMode = "sectionSuffixOrBetweenCalls";
1144
+ continue;
1145
+ }
1146
+ else if (this.functionEvaluationMode === "sectionSuffixOrBetweenCalls") {
1147
+ const sectionSuffixDetector = new StopGenerationDetector();
1148
+ let isFirstToken = true;
1149
+ this.functionsGrammar = undefined;
1150
+ this.functionsEvaluationState = undefined;
1151
+ this.currentFunctionCallPreviousText = LlamaText([]);
1152
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1153
+ StopGenerationDetector.resolveStopTriggers([
1154
+ ...(this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix != null
1155
+ ? [this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix]
1156
+ : []),
1157
+ LlamaText(new SpecialToken("EOS")),
1158
+ LlamaText(new SpecialToken("EOT"))
1159
+ ], this.llamaChat.model.tokenizer)
1160
+ .map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
1161
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1162
+ this.currentFunctionCallCurrentPartTokens.push(token);
1163
+ sectionSuffixDetector.recordGeneration({
1164
+ text: this.currentText,
1165
+ tokens: this.currentTokens,
1166
+ startNewChecks: isFirstToken,
1167
+ triggerMustStartWithGeneration: true
1168
+ });
1169
+ isFirstToken = false;
1170
+ if (sectionSuffixDetector.hasTriggeredStops || !sectionSuffixDetector.hasInProgressStops)
1171
+ break;
1172
+ }
1173
+ const abortRes = this.handleAbortTrigger("model");
1174
+ if (abortRes != null)
1175
+ return abortRes;
1176
+ if (sectionSuffixDetector.hasTriggeredStops) {
1177
+ this.functionEvaluationMode = false;
1178
+ return this.returnFunctionCallResults();
1179
+ }
1180
+ this.functionEvaluationMode = "functionName";
1181
+ this.initiallyEngagedFunctionMode = false;
1182
+ continue;
1183
+ }
1184
+ break;
1185
+ }
1186
+ return undefined;
1187
+ }
1188
+ releasePartiallyFreeTokensBeforeFunctionCallStart() {
1189
+ if (this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax)
1190
+ return;
1191
+ this.stopGenerationDetector.clearInProgressStops();
1192
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1193
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1194
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1195
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1196
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1197
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1198
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1199
+ if (this.pendingTokens.length > 0)
1200
+ this.onToken?.(this.pendingTokens.slice());
1201
+ this.res.push(...this.pendingTokens);
1202
+ this.contextWindowsRes.push(...this.pendingTokens);
1203
+ this.pendingTokens.length = 0;
1204
+ this.streamRegulator.clearQueue();
1205
+ this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = true;
1206
+ }
1207
+ returnFunctionCallResults() {
1208
+ if (this.resFunctionCalls.length > 0) {
1209
+ this.releasePartiallyFreeTokensBeforeFunctionCallStart();
1210
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1211
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1212
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1213
+ modelResponse = modelResponse.trimEnd();
1214
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1215
+ }
1216
+ return {
1217
+ response: modelResponse,
1218
+ lastEvaluation: {
1219
+ contextWindow: setLastTextInChatHistory("model", this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1220
+ cleanHistory: setLastTextInChatHistory("model", this.resolvedHistory, this.lastModelResponse + modelResponse),
1221
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1222
+ },
1223
+ functionCalls: this.resFunctionCalls.map((functionCall) => {
1224
+ return {
1225
+ functionName: functionCall.functionName,
1226
+ params: functionCall.params,
1227
+ raw: functionCall.raw.toJSON()
1228
+ };
1229
+ }), // prevent infinite TS type instantiation
1230
+ metadata: {
1231
+ stopReason: "functionCalls"
1232
+ }
1233
+ };
1234
+ }
1235
+ return undefined;
1236
+ }
1237
+ async *evaluateWithContextShift(loadContextWindow) {
1238
+ while (true) {
1239
+ this.startTokenLoop();
1240
+ await loadContextWindow();
1241
+ await this.alignCurrentSequenceStateWithCurrentTokens();
1242
+ await this.createNewEvaluationIterator();
1243
+ while (await this.iterateEvaluation()) {
1244
+ if (this.currentToken == null)
1245
+ break;
1246
+ yield this.currentToken;
1247
+ if (this.shouldAbort)
1248
+ return;
1249
+ if (this.updateShouldContextShift())
1250
+ break;
1251
+ if (this.restartEvaluationIterator) {
1252
+ await this.createNewEvaluationIterator();
1253
+ }
1254
+ }
1255
+ this.isFirstEvaluation = false;
1256
+ if (this.shouldContextShift)
1257
+ continue;
1258
+ break;
1259
+ }
1260
+ throw new Error("The context size is too small to generate a response");
1261
+ }
1262
+ async alignCurrentSequenceStateWithCurrentTokens() {
1263
+ let { firstDifferentIndex } = this.llamaChat.sequence.compareContextTokens(this.tokens);
1264
+ // we need to decode at least one token to generate a response
1265
+ if (firstDifferentIndex === this.tokens.length && firstDifferentIndex > 0)
1266
+ firstDifferentIndex -= 1;
1267
+ this.tokens.splice(0, firstDifferentIndex);
1268
+ if (firstDifferentIndex < this.llamaChat.sequence.nextTokenIndex) {
1269
+ await this.llamaChat.sequence.eraseContextTokenRanges([{
1270
+ start: firstDifferentIndex,
1271
+ end: this.llamaChat.sequence.nextTokenIndex
1272
+ }]);
1273
+ this.ensureNotAborted();
1274
+ }
1275
+ }
1276
+ async evaluateWithoutGeneratingNewTokens() {
1277
+ if (this.evaluationIterator != null)
1278
+ await this.evaluationIterator.return();
1279
+ await this.llamaChat.sequence.evaluateWithoutGeneratingNewTokens(this.tokens, removeNullFields({
1280
+ evaluationPriority: this.evaluationPriority
1281
+ }));
1282
+ }
1283
+ async createNewEvaluationIterator() {
1284
+ if (this.evaluationIterator != null)
1285
+ await this.evaluationIterator.return();
1286
+ this.currentIterationReplacementToken = undefined;
1287
+ this.restartEvaluationIterator = false;
1288
+ this.evaluationIterator = this.llamaChat.sequence.evaluate(this.tokens, removeNullFields({
1289
+ temperature: this.temperature,
1290
+ minP: this.minP,
1291
+ topK: this.topK,
1292
+ topP: this.topP,
1293
+ grammarEvaluationState: () => {
1294
+ if (this.functionEvaluationMode !== false)
1295
+ return this.functionsEvaluationState;
1296
+ return this.grammarEvaluationState;
1297
+ },
1298
+ repeatPenalty: !this.repeatPenaltyEnabled ? undefined : {
1299
+ punishTokens: this.getPenaltyTokens,
1300
+ penalty: this.resolvedRepeatPenalty.penalty,
1301
+ frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
1302
+ presencePenalty: this.resolvedRepeatPenalty.presencePenalty
1303
+ },
1304
+ tokenBias: this.tokenBias,
1305
+ evaluationPriority: this.evaluationPriority,
1306
+ yieldEogToken: true
1307
+ }));
1308
+ }
1309
+ async iterateEvaluation() {
1310
+ this.currentIteration = await this.evaluationIterator?.next(this.currentIterationReplacementToken);
1311
+ this.currentIterationReplacementToken = undefined;
1312
+ this.ensureNotAborted();
1313
+ this.generatedTokens++;
1314
+ if (this.currentIteration != null && this.currentIteration?.done !== true) {
1315
+ this.currentToken = this.currentIteration.value;
1316
+ this.currentTokens = [this.currentToken];
1317
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens);
1318
+ if (this.functionEvaluationMode === false)
1319
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1320
+ tokens: this.currentTokens,
1321
+ text: this.currentText
1322
+ });
1323
+ else
1324
+ this.currentQueuedTokenRelease = undefined;
1325
+ return true;
1326
+ }
1327
+ return false;
1328
+ }
1329
+ waitOnPartialCharactersOrWhiteSpaceTokens() {
1330
+ if (this.currentText === UNKNOWN_UNICODE_CHAR || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "")) {
1331
+ if (this.currentQueuedTokenRelease != null)
1332
+ this.locksToReleaseOnValidGeneration.push(this.currentQueuedTokenRelease.createTextIndexLock(0));
1333
+ }
1334
+ else {
1335
+ while (this.locksToReleaseOnValidGeneration.length > 0)
1336
+ this.locksToReleaseOnValidGeneration.shift().dispose();
1337
+ }
1338
+ }
1339
+ detectAndHandleFunctionStartSyntax() {
1340
+ this.functionSyntaxStartDetector.recordGeneration({
1341
+ text: this.currentText,
1342
+ tokens: this.currentTokens,
1343
+ queuedTokenRelease: this.currentQueuedTokenRelease
1344
+ });
1345
+ if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1346
+ this.functionSyntaxStartDetector.hasTriggeredStops) {
1347
+ this.functionEvaluationMode = "functionName";
1348
+ this.currentQueuedTokenRelease.createTextIndexLock(0);
1349
+ this.stopGenerationDetector.clearTriggeredStops();
1350
+ this.stopGenerationDetector.clearInProgressStops();
1351
+ this.customStopGenerationTriggersDetector.clearTriggeredStops();
1352
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1353
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1354
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1355
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1356
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1357
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1358
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1359
+ const remainingTextAfterStop = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, this.llamaChat.model.detokenize);
1360
+ this.currentFunctionCallPreviousPartLeftoverText = remainingTextAfterStop;
1361
+ }
1362
+ }
1363
+ recordStopGenerationEvaluation() {
1364
+ this.stopGenerationDetector.recordGeneration({
1365
+ text: this.currentText,
1366
+ tokens: this.currentTokens,
1367
+ queuedTokenRelease: this.currentQueuedTokenRelease
1368
+ });
1369
+ this.customStopGenerationTriggersDetector.recordGeneration({
1370
+ text: this.currentText,
1371
+ tokens: this.currentTokens,
1372
+ queuedTokenRelease: this.currentQueuedTokenRelease
1373
+ });
1374
+ }
1375
+ popStreamRegulatorFreeTokens() {
1376
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1377
+ }
1378
+ handleStopGenerationTrigger(lastHistoryItemType) {
1379
+ if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1380
+ this.llamaChat.model.isEogToken(this.currentToken)) {
1381
+ this.stopGenerationDetector.clearInProgressStops();
1382
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1383
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1384
+ const triggeredStops = this.stopGenerationDetector.hasTriggeredStops
1385
+ ? this.stopGenerationDetector.getTriggeredStops()
1386
+ : this.customStopGenerationTriggersDetector.getTriggeredStops();
1387
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1388
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1389
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1390
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1391
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1392
+ if (this.pendingTokens.length > 0)
1393
+ this.onToken?.(this.pendingTokens.slice());
1394
+ this.res.push(...this.pendingTokens);
1395
+ this.contextWindowsRes.push(...this.pendingTokens);
1396
+ this.pendingTokens.length = 0;
1397
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1398
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1399
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1400
+ modelResponse = modelResponse.trimEnd();
1401
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1402
+ }
1403
+ const lastEvaluation = {
1404
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1405
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1406
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1407
+ };
1408
+ const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
1409
+ if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
1410
+ return {
1411
+ response: modelResponse,
1412
+ lastEvaluation,
1413
+ metadata: {
1414
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1415
+ stopReason: isEogToken
1416
+ ? "eogToken"
1417
+ : "stopGenerationTrigger"
1418
+ }
1419
+ };
1420
+ }
1421
+ return {
1422
+ response: modelResponse,
1423
+ lastEvaluation,
1424
+ metadata: {
1425
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1426
+ stopReason: "customStopTrigger",
1427
+ customStopTrigger: triggeredStops[0].stopTrigger
1428
+ }
1429
+ };
1430
+ }
1431
+ return undefined;
1432
+ }
1433
+ spliceIgnoreStartTextDetectedTokens() {
1434
+ if (this.res.length === 0) {
1435
+ this.ignoreStartTextDetector.clearInProgressStops();
1436
+ this.ignoreStartTextDetector.clearTriggeredStops();
1437
+ this.ignoreStartTextDetector.recordGeneration({
1438
+ text: this.llamaChat.model.detokenize(this.pendingTokens),
1439
+ tokens: this.pendingTokens
1440
+ });
1441
+ }
1442
+ }
1443
+ isMaxTokensTriggered() {
1444
+ return this.maxTokens != null && this.maxTokens > 0 && this.generatedTokens >= this.maxTokens;
1445
+ }
1446
+ moveFreePendingTokensToRes(removeFoundStartIgnoreTextsFromPendingTokens = true) {
1447
+ if (this.pendingTokens.length > 0 && (this.isMaxTokensTriggered() || !this.ignoreStartTextDetector.hasInProgressStops)) {
1448
+ if (removeFoundStartIgnoreTextsFromPendingTokens)
1449
+ this.removeFoundStartIgnoreTextsFromPendingTokens();
1450
+ if (this.pendingTokens.length > 0) {
1451
+ this.onToken?.(this.pendingTokens.slice());
1452
+ this.res.push(...this.pendingTokens);
1453
+ this.contextWindowsRes.push(...this.pendingTokens);
1454
+ this.pendingTokens.length = 0;
1455
+ }
1456
+ }
1457
+ }
1458
+ handleMaxTokensTrigger(lastHistoryItemType) {
1459
+ if (this.isMaxTokensTriggered()) {
1460
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1461
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1462
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1463
+ modelResponse = modelResponse.trimEnd();
1464
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1465
+ }
1466
+ return {
1467
+ response: modelResponse,
1468
+ lastEvaluation: {
1469
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1470
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1471
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1472
+ },
1473
+ metadata: {
1474
+ stopReason: "maxTokens"
1475
+ }
1476
+ };
1477
+ }
1478
+ return undefined;
1479
+ }
1480
+ updateShouldContextShift() {
1481
+ this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1482
+ return this.shouldContextShift;
1483
+ }
1484
+ get shouldAbort() {
1485
+ return !!(this.signal?.aborted && this.stopOnAbortSignal);
1486
+ }
1487
+ handleAbortTrigger(lastHistoryItemType) {
1488
+ if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
1489
+ if (this.res.length === 0)
1490
+ throw this.signal.reason;
1491
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1492
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1493
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1494
+ modelResponse = modelResponse.trimEnd();
1495
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1496
+ }
1497
+ return {
1498
+ response: modelResponse,
1499
+ lastEvaluation: {
1500
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1501
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1502
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1503
+ },
1504
+ metadata: {
1505
+ stopReason: "abort"
1506
+ }
1507
+ };
1508
+ }
1509
+ return undefined;
1510
+ }
1511
+ }
781
1512
  //# sourceMappingURL=LlamaChat.js.map