node-llama-cpp 3.0.0-beta.8 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (685) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -6
  21. package/dist/bindings/Llama.js +214 -40
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +288 -90
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +323 -191
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +63 -22
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +503 -121
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +22 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/.clang-format +1 -2
  586. package/llama/CMakeLists.txt +134 -5
  587. package/llama/addon/AddonContext.cpp +629 -0
  588. package/llama/addon/AddonContext.h +52 -0
  589. package/llama/addon/AddonGrammar.cpp +39 -0
  590. package/llama/addon/AddonGrammar.h +19 -0
  591. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  592. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  593. package/llama/addon/AddonModel.cpp +672 -0
  594. package/llama/addon/AddonModel.h +61 -0
  595. package/llama/addon/AddonModelData.cpp +25 -0
  596. package/llama/addon/AddonModelData.h +15 -0
  597. package/llama/addon/AddonModelLora.cpp +105 -0
  598. package/llama/addon/AddonModelLora.h +28 -0
  599. package/llama/addon/AddonSampler.cpp +513 -0
  600. package/llama/addon/AddonSampler.h +65 -0
  601. package/llama/addon/RingBuffer.h +109 -0
  602. package/llama/addon/addon.cpp +223 -0
  603. package/llama/addon/addonGlobals.cpp +22 -0
  604. package/llama/addon/addonGlobals.h +12 -0
  605. package/llama/addon/globals/addonLog.cpp +136 -0
  606. package/llama/addon/globals/addonLog.h +21 -0
  607. package/llama/addon/globals/addonProgress.cpp +15 -0
  608. package/llama/addon/globals/addonProgress.h +15 -0
  609. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  610. package/llama/addon/globals/getGpuInfo.h +6 -0
  611. package/llama/binariesGithubRelease.json +1 -1
  612. package/llama/gitRelease.bundle +0 -0
  613. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  614. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  615. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  616. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  618. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  619. package/llama/grammars/README.md +297 -6
  620. package/llama/grammars/json.gbnf +4 -4
  621. package/llama/grammars/json_arr.gbnf +4 -4
  622. package/llama/llama.cpp.info.json +1 -1
  623. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  624. package/package.json +109 -59
  625. package/templates/packed/electron-typescript-react.json +1 -0
  626. package/templates/packed/node-typescript.json +1 -0
  627. package/dist/AbortError.d.ts +0 -2
  628. package/dist/AbortError.js +0 -7
  629. package/dist/AbortError.js.map +0 -1
  630. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  631. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  634. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  635. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  636. package/dist/cli/commands/BuildCommand.js +0 -106
  637. package/dist/cli/commands/BuildCommand.js.map +0 -1
  638. package/dist/cli/commands/ClearCommand.js.map +0 -1
  639. package/dist/cli/commands/DownloadCommand.js +0 -169
  640. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  643. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  647. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  648. package/dist/evaluator/LlamaModel.d.ts +0 -120
  649. package/dist/evaluator/LlamaModel.js +0 -320
  650. package/dist/evaluator/LlamaModel.js.map +0 -1
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  653. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  654. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  655. package/dist/utils/parseModelTypeDescription.js +0 -9
  656. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  657. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  658. package/dist/utils/resolveChatWrapper.js +0 -16
  659. package/dist/utils/resolveChatWrapper.js.map +0 -1
  660. package/llama/addon.cpp +0 -916
  661. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  662. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  663. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  664. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  665. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  666. package/llamaBins/linux-x64/llama-addon.node +0 -0
  667. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  668. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  669. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  670. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  671. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  672. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  673. package/llamaBins/mac-x64/llama-addon.node +0 -0
  674. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  675. package/llamaBins/win-x64/llama-addon.exp +0 -0
  676. package/llamaBins/win-x64/llama-addon.lib +0 -0
  677. package/llamaBins/win-x64/llama-addon.node +0 -0
  678. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  679. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  681. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  685. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -1,25 +1,36 @@
1
- import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
2
- import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
1
+ import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
3
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
4
3
  import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
5
- import { AbortError } from "../../AbortError.js";
4
+ import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
6
5
  import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
7
6
  import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
7
+ import { maxRecentDetokenizerTokens, UNKNOWN_UNICODE_CHAR } from "../../consts.js";
8
+ import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
9
+ import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
10
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
11
+ import { safeEventCallback } from "../../utils/safeEventCallback.js";
12
+ import { pushAll } from "../../utils/pushAll.js";
13
+ import { resolveLastTokens } from "../../utils/resolveLastTokens.js";
14
+ import { LlamaSampler } from "../LlamaContext/LlamaSampler.js";
8
15
  import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
9
- import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
16
+ import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
17
+ import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
10
18
  const defaultContextShiftOptions = {
11
19
  size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
12
20
  strategy: "eraseFirstResponseAndKeepFirstSystem",
13
21
  lastEvaluationMetadata: null
14
22
  };
15
- const UNKNOWN_UNICODE_CHAR = "\ufffd";
23
+ const defaultRepeatPenaltyLastTokens = 64;
24
+ const defaultTrimWhitespaceSuffix = false;
25
+ const defaultEvaluationPriority = 5;
16
26
  export class LlamaChat {
17
27
  /** @internal */ _chatWrapper;
18
28
  /** @internal */ _disposeAggregator = new DisposeAggregator();
19
29
  /** @internal */ _autoDisposeSequence;
30
+ /** @internal */ _chatLock = {};
20
31
  /** @internal */ _sequence;
21
32
  onDispose = new EventRelay();
22
- constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
33
+ constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = false }) {
23
34
  if (contextSequence == null)
24
35
  throw new Error("contextSequence cannot be null");
25
36
  if (contextSequence.disposed)
@@ -30,7 +41,14 @@ export class LlamaChat {
30
41
  this.dispose();
31
42
  }));
32
43
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
33
- this._chatWrapper = resolveChatWrapper(chatWrapper, contextSequence.model);
44
+ this._chatWrapper = chatWrapper === "auto"
45
+ ? (resolveChatWrapper({
46
+ bosString: contextSequence.model.tokens.bosString,
47
+ filename: contextSequence.model.filename,
48
+ fileInfo: contextSequence.model.fileInfo,
49
+ tokenizer: contextSequence.model.tokenizer
50
+ }) ?? new GeneralChatWrapper())
51
+ : chatWrapper;
34
52
  }
35
53
  dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
36
54
  if (this._sequence == null)
@@ -63,406 +81,219 @@ export class LlamaChat {
63
81
  get model() {
64
82
  return this.sequence.model;
65
83
  }
66
- async generateResponse(history, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
67
- const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
68
- if (grammar != null && functionsEnabled)
69
- throw new Error("Using both grammar and functions is not supported yet");
70
- if (signal?.aborted)
71
- throw new AbortError();
72
- if (this._sequence == null)
73
- throw new DisposedError();
74
- let resolvedHistory = this._sequence.isLoadedToMemory
75
- ? history.slice()
76
- : history.map(removeRawFromHistoryItem);
77
- if (resolvedHistory.length === 0 || resolvedHistory[resolvedHistory.length - 1].type !== "model")
78
- resolvedHistory.push({
79
- type: "model",
80
- response: []
81
- });
82
- const model = this._sequence.model;
83
- const context = this._sequence.context;
84
- const eosToken = model.tokens.eos;
85
- const resolvedContextShift = {
86
- ...defaultContextShiftOptions,
87
- ...removeNullFields(contextShift)
88
- };
89
- const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
90
- ? { lastTokens: 0 }
91
- : repeatPenalty;
92
- const lastModelResponse = getLastTextModelResponseFromChatHistory(resolvedHistory);
93
- const res = [];
94
- const pendingTokens = [];
95
- let ignoredStartTextTokens = [];
96
- const functionCallTokens = [];
97
- const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
98
- const grammarEvaluationState = grammar != null
99
- ? new LlamaGrammarEvaluationState({ grammar })
100
- : undefined;
101
- let functionsGrammar = functionsEnabled
102
- ? new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false)
103
- : undefined;
104
- let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
105
- ? new LlamaGrammarEvaluationState({
106
- grammar: functionsGrammar
107
- })
108
- : undefined;
109
- const streamRegulator = new TokenStreamRegulator();
110
- const stopGenerationDetector = new StopGenerationDetector();
111
- const functionSyntaxStartDetector = new StopGenerationDetector();
112
- const functionSyntaxEndDetector = new StopGenerationDetector();
113
- const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
114
- const ignoreStartTextDetector = new StopGenerationDetector();
115
- const locksToReleaseOnValidGeneration = [];
116
- const functionCallTokenSyntaxLocks = [];
117
- let generatedTokens = 0;
118
- let isFirstEvaluation = true;
119
- let inFunctionEvaluationMode = false;
120
- let initiallyEngagedFunctionMode = false;
121
- let lastContextWindowHistory = resolvedHistory;
122
- let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
123
- const ensureNotAborted = () => {
124
- if (signal?.aborted)
125
- throw new AbortError();
126
- if (this._sequence == null)
127
- throw new DisposedError();
128
- };
129
- const getPenaltyTokens = () => {
130
- if (this._sequence == null)
131
- throw new DisposedError();
132
- let punishTokens = res.slice(-repeatPenaltyLastTokens);
133
- if (punishTokensFilter != null)
134
- punishTokens = punishTokensFilter(punishTokens);
135
- if (!penalizeNewLine) {
136
- const nlToken = model.tokens.nl;
137
- if (nlToken != null)
138
- punishTokens = punishTokens.filter(token => token !== nlToken);
84
+ async generateResponse(history, options = {}) {
85
+ const { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
86
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
87
+ onTextChunk,
88
+ onToken,
89
+ signal,
90
+ stopOnAbortSignal,
91
+ maxTokens,
92
+ temperature,
93
+ minP,
94
+ topK,
95
+ topP,
96
+ seed,
97
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
98
+ trimWhitespaceSuffix,
99
+ repeatPenalty,
100
+ tokenBias,
101
+ evaluationPriority,
102
+ functions,
103
+ onFunctionCall,
104
+ documentFunctionParams,
105
+ maxParallelFunctionCalls,
106
+ contextShift,
107
+ customStopTriggers,
108
+ lastEvaluationContextWindow: {
109
+ history: lastEvaluationContextWindowHistory,
110
+ minimumOverlapPercentageToPreventContextShift
139
111
  }
140
- return punishTokens;
141
- };
142
- const getResolvedHistoryWithCurrentModelResponse = () => {
143
- if (res.length === 0)
144
- return resolvedHistory;
145
- let modelResponse = model.detokenize(res);
146
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
147
- modelResponse = modelResponse.trimEnd();
148
- if (modelResponse === "")
149
- return resolvedHistory;
150
- return setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse);
151
- };
152
- const removeFoundStartIgnoreTextsFromPendingTokens = () => {
153
- if (res.length === 0 && pendingTokens.length > 0) {
154
- ignoreStartTextDetector.clearInProgressStops();
155
- ignoreStartTextDetector.clearTriggeredStops();
156
- let mostExhaustiveTriggeredStops = null;
157
- for (let i = 0; i < pendingTokens.length; i++) {
158
- ignoreStartTextDetector.recordGeneration({
159
- text: model.detokenize([pendingTokens[i]]),
160
- tokens: [pendingTokens[i]],
161
- startNewChecks: i === 0
162
- });
163
- if (ignoreStartTextDetector.hasTriggeredStops) {
164
- mostExhaustiveTriggeredStops = ignoreStartTextDetector.getTriggeredStops();
165
- ignoreStartTextDetector.clearTriggeredStops();
112
+ });
113
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
114
+ throw new Error("Using both grammar and functions is not supported yet");
115
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
116
+ try {
117
+ generateResponseState.ensureLastHistoryItemIsModel();
118
+ const loadContextWindow = async (avoidReloadingHistory = false) => {
119
+ await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
120
+ };
121
+ const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
122
+ // eslint-disable-next-line no-constant-condition
123
+ while (true) {
124
+ generateResponseState.startTokenLoop();
125
+ generateResponseState.canAvoidReloadingHistory = false;
126
+ await loadContextWindow();
127
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
128
+ if (generateResponseState.generatedTokens === 0) {
129
+ generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
130
+ if (generateResponseState.functionsEnabled) {
131
+ generateResponseState.initFunctions();
132
+ }
166
133
  }
167
- else if (!ignoreStartTextDetector.hasInProgressStops)
168
- break;
169
- }
170
- if (mostExhaustiveTriggeredStops != null) {
171
- const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
172
- if (mostExhaustiveTriggeredStop != null) {
173
- ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
174
- .map((stopTrigger) => {
175
- if (typeof stopTrigger === "string")
176
- return model.tokenize(stopTrigger);
177
- else
178
- return [stopTrigger];
179
- })
180
- .flat(1);
181
- const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
182
- .map((generation) => {
183
- if (typeof generation === "string")
184
- return model.tokenize(generation);
185
- else
186
- return generation;
187
- })
188
- .flat(1);
189
- pendingTokens.length = 0;
190
- pendingTokens.push(...newPendingTokens);
134
+ if (generateResponseState.functionEvaluationMode !== false) {
135
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
136
+ if (functionsCallsRes != null)
137
+ return functionsCallsRes;
138
+ await loadContextWindowForFunctionCallingLoop();
191
139
  }
192
- }
193
- }
194
- };
195
- if (grammar != null)
196
- StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
197
- .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
198
- if (functions != null && Object.keys(functions).length > 0)
199
- functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
200
- // eslint-disable-next-line no-constant-condition
201
- while (true) {
202
- ensureNotAborted();
203
- let shouldContextShift = false;
204
- const queuedChunkTokens = streamRegulator.getAllQueuedChunkTokens();
205
- const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall } = await getContextWindow({
206
- resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
207
- resolvedContextShift,
208
- lastHistoryCompressionMetadata,
209
- pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
210
- isFirstEvaluation,
211
- chatWrapper: this._chatWrapper,
212
- lastEvaluationContextWindowHistory,
213
- minimumOverlapPercentageToPreventContextShift,
214
- sequence: this._sequence,
215
- minFreeContextTokens: 1,
216
- functions: functionsEnabled ? functions : undefined,
217
- documentFunctionParams
218
- });
219
- ensureNotAborted();
220
- if (generatedTokens === 0) {
221
- StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
222
- .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
223
- if (functionsEnabled) {
224
- initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
225
- StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
226
- .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
227
- if (initiallyEngagedFunctionMode) {
228
- inFunctionEvaluationMode = true;
229
- functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, true);
230
- functionsEvaluationState = new LlamaGrammarEvaluationState({
231
- grammar: functionsGrammar
232
- });
140
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
141
+ await generateResponseState.createNewEvaluationIterator();
142
+ while (await generateResponseState.iterateEvaluation()) {
143
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
144
+ generateResponseState.detectAndHandleFunctionStartSyntax();
145
+ if (generateResponseState.functionEvaluationMode !== false) {
146
+ generateResponseState.canAvoidReloadingHistory = false;
147
+ generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
148
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
149
+ if (functionsCallsRes != null)
150
+ return functionsCallsRes;
151
+ }
152
+ generateResponseState.recordStopGenerationEvaluation();
153
+ generateResponseState.popStreamRegulatorFreeTokens();
154
+ generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
155
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
156
+ if (stopGenerationTriggerRes != null)
157
+ return stopGenerationTriggerRes;
158
+ generateResponseState.spliceIgnoreStartTextDetectedTokens();
159
+ generateResponseState.moveFreePendingTokensToRes();
160
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
161
+ if (maxTokensTriggerRes != null)
162
+ return maxTokensTriggerRes;
163
+ if (generateResponseState.updateShouldContextShift())
164
+ break;
165
+ const abortRes = generateResponseState.handleAbortTrigger("model");
166
+ if (abortRes != null)
167
+ return abortRes;
233
168
  }
169
+ generateResponseState.isFirstEvaluation = false;
170
+ if (generateResponseState.shouldContextShift)
171
+ continue;
172
+ break;
234
173
  }
174
+ throw new Error("The context size is too small to generate a response");
235
175
  }
236
- const tokens = [...contextWindowTokens, ...ignoredStartTextTokens, ...pendingTokens, ...queuedChunkTokens];
237
- resolvedHistory = newResolvedHistory;
238
- lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
239
- lastContextWindowHistory = contextWindowHistory;
240
- const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
241
- const contextWindowsRes = [];
242
- StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
243
- .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
244
- if (functionsGrammar != null)
245
- StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
246
- .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
247
- let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
248
- // we need to decode at least one token to generate a response
249
- if (firstDifferentIndex === tokens.length && firstDifferentIndex > 0)
250
- firstDifferentIndex -= 1;
251
- tokens.splice(0, firstDifferentIndex);
252
- if (firstDifferentIndex < this._sequence.nextTokenIndex) {
253
- await this._sequence.eraseContextTokenRanges([{
254
- start: firstDifferentIndex,
255
- end: this._sequence.nextTokenIndex
256
- }]);
257
- ensureNotAborted();
176
+ finally {
177
+ await generateResponseState.dispose();
258
178
  }
259
- const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
260
- temperature, topK, topP,
261
- grammarEvaluationState: () => {
262
- if (inFunctionEvaluationMode)
263
- return functionsEvaluationState;
264
- return grammarEvaluationState;
265
- },
266
- repeatPenalty: !repeatPenaltyEnabled ? undefined : {
267
- punishTokens: getPenaltyTokens,
268
- penalty,
269
- frequencyPenalty,
270
- presencePenalty
271
- },
272
- evaluationPriority,
273
- yieldEosToken: true
274
- }));
275
- for await (const token of evaluationIterator) {
276
- ensureNotAborted();
277
- generatedTokens++;
278
- const tokens = [token];
279
- const text = model.detokenize([token]);
280
- const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
281
- if (initiallyEngagedFunctionMode)
282
- disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
283
- if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
284
- locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
285
- }
286
- else {
287
- while (locksToReleaseOnValidGeneration.length > 0)
288
- locksToReleaseOnValidGeneration.shift().dispose();
289
- }
290
- functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
291
- if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
292
- initiallyEngagedFunctionMode = false;
293
- let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
294
- if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
295
- const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
296
- try {
297
- const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
298
- enableInternalBuiltinFunctions: true,
299
- initialFunctionCallEngaged: true
300
- });
301
- const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
302
- if (internalBuiltinFunctions[functionName] != null) {
303
- shouldStopFunctionEvaluationMode = true;
179
+ });
180
+ }
181
+ async loadChatAndCompleteUserMessage(history, options = {}) {
182
+ const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
183
+ const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
184
+ ? null
185
+ : lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
186
+ const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
187
+ ? lastEvaluationContextWindowHistoryItem.text
188
+ : "";
189
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
190
+ onTextChunk,
191
+ onToken,
192
+ signal,
193
+ stopOnAbortSignal,
194
+ maxTokens,
195
+ temperature,
196
+ minP,
197
+ topK,
198
+ topP,
199
+ seed,
200
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
201
+ trimWhitespaceSuffix,
202
+ repeatPenalty,
203
+ tokenBias,
204
+ evaluationPriority,
205
+ functions,
206
+ documentFunctionParams,
207
+ contextShift,
208
+ customStopTriggers,
209
+ lastEvaluationContextWindow: {
210
+ history: lastEvaluationContextWindowHistory == null
211
+ ? undefined
212
+ : setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
213
+ minimumOverlapPercentageToPreventContextShift
214
+ }
215
+ });
216
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
217
+ try {
218
+ generateResponseState.ensureLastHistoryItemIsUser();
219
+ const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1];
220
+ const initialUserMessage = lastResolvedHistoryItem?.type === "user"
221
+ ? lastResolvedHistoryItem.text
222
+ : "";
223
+ // eslint-disable-next-line no-constant-condition
224
+ while (true) {
225
+ generateResponseState.startTokenLoop();
226
+ const { userTextSuffix } = await generateResponseState.loadContextWindow(setLastUserTextInChatHistory(generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)), true);
227
+ generateResponseState.functionEvaluationMode = false;
228
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
229
+ if (userTextSuffix != null && userTextSuffix.values.length > 0)
230
+ generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
231
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
232
+ if (generateResponseState.maxTokens === 0) {
233
+ await generateResponseState.evaluateWithoutGeneratingNewTokens();
234
+ return {
235
+ completion: "",
236
+ lastEvaluation: {
237
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
238
+ contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
239
+ },
240
+ metadata: {
241
+ stopReason: "maxTokens"
304
242
  }
305
- }
306
- catch (err) {
307
- if (!(err instanceof LlamaFunctionCallValidationError))
308
- throw err;
309
- }
310
- }
311
- if (shouldStopFunctionEvaluationMode) {
312
- inFunctionEvaluationMode = false;
313
- functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
314
- functionsEvaluationState = new LlamaGrammarEvaluationState({
315
- grammar: functionsGrammar
316
- });
317
- functionCallTokens.length = 0;
318
- while (functionCallTokenSyntaxLocks.length > 0)
319
- functionCallTokenSyntaxLocks.shift().dispose();
320
- functionSyntaxStartDetector.clearInProgressStops();
321
- functionSyntaxStartDetector.clearTriggeredStops();
322
- functionSyntaxEndDetector.clearInProgressStops();
323
- functionSyntaxEndDetector.clearTriggeredStops();
324
- }
325
- }
326
- if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
327
- functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
328
- inFunctionEvaluationMode = true;
329
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
330
- stopGenerationDetector.clearTriggeredStops();
331
- stopGenerationDetector.clearInProgressStops();
332
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
333
- const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
334
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
335
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
336
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
337
- const [firstRemainingGenerationAfterStop] = triggeredStops
338
- .map((stopTrigger) => stopTrigger.remainingGenerations)
339
- .filter((remainingGenerations) => remainingGenerations.length > 0)
340
- .flat(1);
341
- const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
342
- ? ""
343
- : typeof firstRemainingGenerationAfterStop === "string"
344
- ? firstRemainingGenerationAfterStop
345
- : model.detokenize(firstRemainingGenerationAfterStop);
346
- functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
347
- for (const functionCallToken of functionCallTokens)
348
- context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
349
- }
350
- else if (inFunctionEvaluationMode) {
351
- functionCallTokens.push(...tokens);
352
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
353
- functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
354
- }
355
- if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
356
- const functionCallText = model.detokenize(functionCallTokens);
357
- const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
358
- let modelResponse = model.detokenize(res);
359
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
360
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
361
- modelResponse = modelResponse.trimEnd();
362
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
363
- }
364
- return {
365
- response: modelResponse,
366
- lastEvaluation: {
367
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
368
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
369
- contextShiftMetadata: lastHistoryCompressionMetadata
370
- },
371
- // prevent infinite TS type instantiation
372
- functionCall: functionCall,
373
- metadata: {
374
- stopReason: "functionCall"
375
- }
376
- };
377
- }
378
- if (!inFunctionEvaluationMode)
379
- stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
380
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
381
- removeFoundStartIgnoreTextsFromPendingTokens();
382
- if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
383
- const triggeredStops = stopGenerationDetector.getTriggeredStops();
384
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
385
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
386
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
387
- const [firstRemainingGenerationAfterStop] = triggeredStops
388
- .map((stopTrigger) => stopTrigger.remainingGenerations)
389
- .filter((remainingGenerations) => remainingGenerations.length > 0)
390
- .flat(1);
391
- removeFoundStartIgnoreTextsFromPendingTokens();
392
- if (pendingTokens.length > 0)
393
- onToken?.(pendingTokens.slice());
394
- res.push(...pendingTokens);
395
- contextWindowsRes.push(...pendingTokens);
396
- pendingTokens.length = 0;
397
- let modelResponse = model.detokenize(res);
398
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
399
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
400
- modelResponse = modelResponse.trimEnd();
401
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
402
- }
403
- return {
404
- response: modelResponse,
405
- lastEvaluation: {
406
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
407
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
408
- contextShiftMetadata: lastHistoryCompressionMetadata
409
- },
410
- metadata: {
411
- remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
412
- stopReason: token === eosToken
413
- ? "eosToken"
414
- : "stopGenerationTrigger"
415
- }
416
- };
417
- }
418
- const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
419
- if (res.length === 0) {
420
- ignoreStartTextDetector.clearInProgressStops();
421
- ignoreStartTextDetector.clearTriggeredStops();
422
- ignoreStartTextDetector.recordGeneration({
423
- text: model.detokenize(pendingTokens),
424
- tokens: pendingTokens
425
- });
426
- }
427
- if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
428
- removeFoundStartIgnoreTextsFromPendingTokens();
429
- if (pendingTokens.length > 0) {
430
- onToken?.(pendingTokens.slice());
431
- res.push(...pendingTokens);
432
- contextWindowsRes.push(...pendingTokens);
433
- pendingTokens.length = 0;
243
+ };
434
244
  }
435
- }
436
- if (maxTokensTriggered) {
437
- let modelResponse = model.detokenize(res);
438
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
439
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
440
- modelResponse = modelResponse.trimEnd();
441
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
245
+ await generateResponseState.createNewEvaluationIterator();
246
+ while (await generateResponseState.iterateEvaluation()) {
247
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
248
+ generateResponseState.recordStopGenerationEvaluation();
249
+ generateResponseState.popStreamRegulatorFreeTokens();
250
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
251
+ if (stopGenerationTriggerRes != null)
252
+ return {
253
+ completion: stopGenerationTriggerRes.response,
254
+ lastEvaluation: {
255
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
256
+ contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
257
+ },
258
+ metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
259
+ ? stopGenerationTriggerRes.metadata
260
+ : stopGenerationTriggerRes.metadata
261
+ };
262
+ generateResponseState.moveFreePendingTokensToRes(false);
263
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
264
+ if (maxTokensTriggerRes != null)
265
+ return {
266
+ completion: maxTokensTriggerRes.response,
267
+ lastEvaluation: {
268
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
269
+ contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
270
+ },
271
+ metadata: maxTokensTriggerRes.metadata
272
+ };
273
+ if (generateResponseState.updateShouldContextShift())
274
+ break;
275
+ const abortRes = generateResponseState.handleAbortTrigger("user");
276
+ if (abortRes != null)
277
+ return {
278
+ completion: abortRes.response,
279
+ lastEvaluation: {
280
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
281
+ contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
282
+ },
283
+ metadata: abortRes.metadata
284
+ };
442
285
  }
443
- return {
444
- response: modelResponse,
445
- lastEvaluation: {
446
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
447
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
448
- contextShiftMetadata: lastHistoryCompressionMetadata
449
- },
450
- metadata: {
451
- stopReason: "maxTokens"
452
- }
453
- };
454
- }
455
- if (this._sequence.nextTokenIndex >= context.contextSize) {
456
- shouldContextShift = true;
286
+ generateResponseState.isFirstEvaluation = false;
287
+ if (generateResponseState.shouldContextShift)
288
+ continue;
457
289
  break;
458
290
  }
291
+ throw new Error("The context size is too small to generate a completion");
459
292
  }
460
- isFirstEvaluation = false;
461
- if (shouldContextShift)
462
- continue;
463
- break;
464
- }
465
- throw new Error("The context size is too small to generate a response");
293
+ finally {
294
+ await generateResponseState.dispose();
295
+ }
296
+ });
466
297
  }
467
298
  }
468
299
  function removeRawFromHistoryItem(historyItem) {
@@ -474,7 +305,7 @@ function removeRawFromHistoryItem(historyItem) {
474
305
  else
475
306
  return {
476
307
  ...item,
477
- raw: undefined
308
+ rawCall: undefined
478
309
  };
479
310
  });
480
311
  return newHistoryItem;
@@ -483,7 +314,8 @@ function removeRawFromHistoryItem(historyItem) {
483
314
  }
484
315
  async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
485
316
  function checkIfHistoryFitsContext(history) {
486
- const { contextText } = chatWrapper.generateContextText(history, {
317
+ const { contextText } = chatWrapper.generateContextState({
318
+ chatHistory: history,
487
319
  availableFunctions: functions,
488
320
  documentFunctionParams
489
321
  });
@@ -537,27 +369,6 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
537
369
  metadata
538
370
  };
539
371
  }
540
- function getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, tokenizer) {
541
- if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length === 0)
542
- return [];
543
- else if (partiallyFreeTokens.tokens.length !== 0 && partiallyFreeTokens.text.length === 0)
544
- return partiallyFreeTokens.tokens;
545
- else if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length !== 0)
546
- return tokenizer(partiallyFreeTokens.text);
547
- const triggerThatStartsWithStringIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] === "string");
548
- const triggerThatStartsWithTokenIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] !== "string");
549
- if (triggerThatStartsWithTokenIndex > 0 && triggerThatStartsWithStringIndex < 0)
550
- return partiallyFreeTokens.tokens;
551
- else if (triggerThatStartsWithStringIndex > 0 && triggerThatStartsWithTokenIndex < 0)
552
- return tokenizer(partiallyFreeTokens.text);
553
- const stringTokens = tokenizer(partiallyFreeTokens.text);
554
- if (stringTokens.length === partiallyFreeTokens.tokens.length &&
555
- stringTokens.every((value, index) => value === partiallyFreeTokens.tokens[index]))
556
- return stringTokens;
557
- else if (triggerThatStartsWithStringIndex < triggerThatStartsWithTokenIndex)
558
- return stringTokens;
559
- return partiallyFreeTokens.tokens;
560
- }
561
372
  function getLastTextModelResponseFromChatHistory(chatHistory) {
562
373
  if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
563
374
  return "";
@@ -567,6 +378,11 @@ function getLastTextModelResponseFromChatHistory(chatHistory) {
567
378
  return modelResponse[modelResponse.length - 1];
568
379
  return "";
569
380
  }
381
+ function getLastUserTextFromChatHistory(chatHistory) {
382
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
383
+ return "";
384
+ return chatHistory[chatHistory.length - 1].text;
385
+ }
570
386
  function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
571
387
  const newChatHistory = chatHistory.slice();
572
388
  if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
@@ -589,23 +405,90 @@ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
589
405
  modelResponse.push(textResponse);
590
406
  return newChatHistory;
591
407
  }
592
- async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams }) {
408
+ function setLastUserTextInChatHistory(chatHistory, userText) {
409
+ const newChatHistory = chatHistory.slice();
410
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
411
+ newChatHistory.push({
412
+ type: "user",
413
+ text: ""
414
+ });
415
+ const lastUserItem = newChatHistory[newChatHistory.length - 1];
416
+ const newLastUserItem = { ...lastUserItem };
417
+ newChatHistory[newChatHistory.length - 1] = newLastUserItem;
418
+ newLastUserItem.text = userText;
419
+ return newChatHistory;
420
+ }
421
+ function setLastTextInChatHistory(itemType, chatHistory, text) {
422
+ if (itemType === "user")
423
+ return setLastUserTextInChatHistory(chatHistory, text);
424
+ else
425
+ return setLastModelTextResponseInChatHistory(chatHistory, text);
426
+ }
427
+ function generateContextText(endWithUserText, chatWrapper, options) {
428
+ if (endWithUserText)
429
+ return generateContextTextThatEndsWithUserText(chatWrapper, options);
430
+ return chatWrapper.generateContextState(options);
431
+ }
432
+ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
433
+ const lastUserText = getLastUserTextFromChatHistory(options.chatHistory);
434
+ const randomId = "W" + (Math.random()
435
+ .toString(36)
436
+ .slice(2)) + "W";
437
+ const { contextText, ...rest } = chatWrapper.generateContextState({
438
+ ...options,
439
+ chatHistory: setLastUserTextInChatHistory(options.chatHistory, lastUserText + randomId)
440
+ });
441
+ let newContextText = contextText;
442
+ for (let i = 0; i < newContextText.values.length; i++) {
443
+ const item = newContextText.values[i];
444
+ if (typeof item !== "string")
445
+ continue;
446
+ const randomTextIndex = item.indexOf(randomId);
447
+ if (randomTextIndex < 0)
448
+ continue;
449
+ const newValue = item.slice(0, randomTextIndex);
450
+ newContextText = LlamaText([
451
+ ...newContextText.values.slice(0, i),
452
+ newValue
453
+ ]);
454
+ return {
455
+ contextText: newContextText,
456
+ userTextSuffix: LlamaText([
457
+ item.slice(randomTextIndex + randomId.length),
458
+ ...newContextText.values.slice(i + 1)
459
+ ]),
460
+ ...rest
461
+ };
462
+ }
463
+ throw new Error("The random ID was not found in the context text. " +
464
+ `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
465
+ "where not all user messages are properly added to the the result LlamaText");
466
+ }
467
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
593
468
  if (sequence == null)
594
469
  throw new DisposedError();
595
470
  const model = sequence.model;
596
471
  const context = sequence.context;
597
472
  if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
598
473
  const newContextWindow = lastEvaluationContextWindowHistory.slice();
599
- if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
474
+ if (endWithUserText) {
475
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
476
+ newContextWindow.push({
477
+ type: "user",
478
+ text: ""
479
+ });
480
+ }
481
+ else if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
600
482
  newContextWindow.push({
601
483
  type: "model",
602
484
  response: []
603
485
  });
604
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(newContextWindow, {
486
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
487
+ chatHistory: newContextWindow,
605
488
  availableFunctions: functions,
606
489
  documentFunctionParams
607
490
  });
608
- const tokens = contextText.tokenize(model.tokenize);
491
+ const tokens = contextText.tokenize(model.tokenizer);
609
492
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
610
493
  const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
611
494
  const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
@@ -618,7 +501,8 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
618
501
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
619
502
  ignoreStartText: ignoreStartText ?? [],
620
503
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
621
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
504
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
505
+ userTextSuffix
622
506
  };
623
507
  }
624
508
  }
@@ -631,36 +515,39 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
631
515
  : resolvedContextShift.size;
632
516
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
633
517
  history: resolvedHistory,
634
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
518
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
635
519
  contextShiftStrategy: resolvedContextShift.strategy,
636
520
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
637
521
  contextSize: context.contextSize,
638
- tokenizer: model.tokenize,
522
+ tokenizer: model.tokenizer,
639
523
  chatWrapper: chatWrapper,
640
524
  functions,
641
525
  documentFunctionParams
642
526
  });
643
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
527
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
528
+ chatHistory: compressedHistory,
644
529
  availableFunctions: functions,
645
530
  documentFunctionParams
646
531
  });
647
532
  return {
648
533
  history: compressedHistory,
649
534
  stopGenerationTriggers,
650
- tokens: contextText.tokenize(model.tokenize),
535
+ tokens: contextText.tokenize(model.tokenizer),
651
536
  newResolvedHistory: resolvedHistory,
652
537
  newHistoryCompressionMetadata: metadata,
653
538
  ignoreStartText: ignoreStartText ?? [],
654
539
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
655
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
540
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
541
+ userTextSuffix
656
542
  };
657
543
  }
658
544
  {
659
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(resolvedHistory, {
545
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
546
+ chatHistory: resolvedHistory,
660
547
  availableFunctions: functions,
661
548
  documentFunctionParams
662
549
  });
663
- const tokens = contextText.tokenize(model.tokenize);
550
+ const tokens = contextText.tokenize(model.tokenizer);
664
551
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
665
552
  return {
666
553
  history: resolvedHistory,
@@ -670,36 +557,1001 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
670
557
  newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
671
558
  ignoreStartText: ignoreStartText ?? [],
672
559
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
673
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
560
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
561
+ userTextSuffix
674
562
  };
675
563
  }
676
- const contextShiftSize = resolvedContextShift.size instanceof Function
564
+ const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
677
565
  ? await resolvedContextShift.size(sequence)
678
- : resolvedContextShift.size;
566
+ : resolvedContextShift.size)));
679
567
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
680
568
  history: resolvedHistory,
681
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
569
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
682
570
  contextShiftStrategy: resolvedContextShift.strategy,
683
571
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
684
572
  contextSize: context.contextSize,
685
- tokenizer: model.tokenize,
573
+ tokenizer: model.tokenizer,
686
574
  chatWrapper: chatWrapper,
687
575
  functions,
688
576
  documentFunctionParams
689
577
  });
690
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
578
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
579
+ chatHistory: compressedHistory,
691
580
  availableFunctions: functions,
692
581
  documentFunctionParams
693
582
  });
694
583
  return {
695
584
  history: compressedHistory,
696
585
  stopGenerationTriggers,
697
- tokens: contextText.tokenize(model.tokenize),
586
+ tokens: contextText.tokenize(model.tokenizer),
698
587
  newResolvedHistory: resolvedHistory,
699
588
  newHistoryCompressionMetadata: metadata,
700
589
  ignoreStartText: ignoreStartText ?? [],
701
590
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
702
- disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
591
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
592
+ userTextSuffix
703
593
  };
704
594
  }
595
+ class GenerateResponseState {
596
+ llamaChat;
597
+ chatWrapper;
598
+ history;
599
+ onTextChunk;
600
+ onToken;
601
+ signal;
602
+ stopOnAbortSignal;
603
+ maxTokens;
604
+ temperature;
605
+ minP;
606
+ topK;
607
+ topP;
608
+ seed;
609
+ grammar;
610
+ trimWhitespaceSuffix;
611
+ tokenBias;
612
+ evaluationPriority;
613
+ functions;
614
+ onFunctionCall;
615
+ documentFunctionParams;
616
+ maxParallelFunctionCalls;
617
+ contextShift;
618
+ customStopTriggers;
619
+ lastEvaluationContextWindowHistory;
620
+ minimumOverlapPercentageToPreventContextShift;
621
+ functionsEnabled;
622
+ repeatPenaltyEnabled;
623
+ resolvedContextShift;
624
+ resolvedRepeatPenalty;
625
+ lastModelResponse;
626
+ grammarEvaluationState;
627
+ functionNameGrammar;
628
+ functionsGrammar;
629
+ functionsEvaluationState;
630
+ streamRegulator = new TokenStreamRegulator();
631
+ stopGenerationDetector = new StopGenerationDetector();
632
+ customStopGenerationTriggersDetector = new StopGenerationDetector();
633
+ functionSyntaxStartDetector = new StopGenerationDetector();
634
+ disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
635
+ ignoreStartTextDetector = new StopGenerationDetector();
636
+ locksToReleaseOnValidGeneration = [];
637
+ resolvedHistory;
638
+ res = [];
639
+ pendingTokens = [];
640
+ ignoredStartTextTokens = [];
641
+ resFunctionCalls = [];
642
+ functionEvaluationMode = false;
643
+ currentFunctionCallPreviousText = LlamaText([]);
644
+ currentFunctionCallCurrentPartTokens = [];
645
+ functionEvaluationFunctionName = "";
646
+ currentFunctionCallPreviousPartLeftoverText = "";
647
+ removedStartTextToIgnore = false;
648
+ releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
649
+ generatedTokens = 0;
650
+ isFirstEvaluation = true;
651
+ initiallyEngagedFunctionMode = false;
652
+ lastContextWindowHistory;
653
+ lastHistoryCompressionMetadata;
654
+ restartEvaluationIterator = false;
655
+ // context shift loop
656
+ shouldContextShift = false;
657
+ canAvoidReloadingHistory = false;
658
+ contextWindowTokens = [];
659
+ stopGenerationTriggers = [];
660
+ ignoreStartText = [];
661
+ functionCallInitiallyEngaged = false;
662
+ disengageInitiallyEngagedFunctionCall = [];
663
+ userTextSuffix = undefined;
664
+ tokens = [];
665
+ contextWindowLastModelResponse = "";
666
+ contextWindowsRes = [];
667
+ // token evaluation loop
668
+ evaluationIterator;
669
+ currentIteration;
670
+ currentIterationReplacementToken;
671
+ currentToken;
672
+ currentTokens = [];
673
+ currentText = "";
674
+ currentQueuedTokenRelease;
675
+ constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
676
+ this.llamaChat = llamaChat;
677
+ this.chatWrapper = chatWrapper;
678
+ this.history = history;
679
+ this.onTextChunk = safeEventCallback(onTextChunk);
680
+ this.onToken = safeEventCallback(onToken);
681
+ this.signal = signal;
682
+ this.stopOnAbortSignal = stopOnAbortSignal;
683
+ this.maxTokens = maxTokens;
684
+ this.temperature = temperature;
685
+ this.minP = minP;
686
+ this.topK = topK;
687
+ this.topP = topP;
688
+ this.seed = seed;
689
+ this.grammar = grammar;
690
+ this.trimWhitespaceSuffix = trimWhitespaceSuffix;
691
+ this.tokenBias = tokenBias;
692
+ this.evaluationPriority = evaluationPriority;
693
+ this.functions = functions;
694
+ this.onFunctionCall = safeEventCallback(onFunctionCall);
695
+ this.documentFunctionParams = documentFunctionParams;
696
+ this.maxParallelFunctionCalls = maxParallelFunctionCalls;
697
+ this.contextShift = contextShift;
698
+ this.customStopTriggers = customStopTriggers;
699
+ this.lastEvaluationContextWindowHistory = lastEvaluationContextWindowHistory;
700
+ this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
701
+ this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
702
+ if (this.signal?.aborted)
703
+ throw this.signal.reason;
704
+ if (this.llamaChat.disposed)
705
+ throw new DisposedError();
706
+ this.resolvedHistory = this.llamaChat.sequence.isLoadedToMemory
707
+ ? this.history.slice()
708
+ : this.history.map(removeRawFromHistoryItem);
709
+ this.resolvedContextShift = {
710
+ ...defaultContextShiftOptions,
711
+ ...removeNullFields(this.contextShift)
712
+ };
713
+ this.resolvedRepeatPenalty = repeatPenalty === false
714
+ ? { lastTokens: 0 }
715
+ : {
716
+ ...(repeatPenalty ?? {}),
717
+ lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
718
+ };
719
+ this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
720
+ this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
721
+ this.grammarEvaluationState = this.grammar != null
722
+ ? new LlamaGrammarEvaluationState({ model: this.llamaChat.model, grammar: this.grammar })
723
+ : undefined;
724
+ this.functionNameGrammar = this.functionsEnabled
725
+ ? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper)
726
+ : undefined;
727
+ this.functionsGrammar = undefined;
728
+ this.functionsEvaluationState = undefined;
729
+ this.lastContextWindowHistory = this.resolvedHistory;
730
+ this.lastHistoryCompressionMetadata = this.resolvedContextShift;
731
+ if (this.customStopTriggers != null)
732
+ StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
733
+ .map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
734
+ if (this.grammar != null)
735
+ StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
736
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
737
+ if (this.functions != null && Object.keys(this.functions).length > 0)
738
+ this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
739
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
740
+ this.chatWrapper.settings.functions.call.prefix
741
+ ]), this.llamaChat.model.tokenizer));
742
+ this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
743
+ }
744
+ async dispose() {
745
+ await this.evaluationIterator?.return();
746
+ }
747
+ async [Symbol.asyncDispose]() {
748
+ await this.dispose();
749
+ }
750
+ ensureLastHistoryItemIsModel() {
751
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "model")
752
+ this.resolvedHistory.push({
753
+ type: "model",
754
+ response: []
755
+ });
756
+ }
757
+ ensureLastHistoryItemIsUser() {
758
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "user")
759
+ this.resolvedHistory.push({
760
+ type: "user",
761
+ text: ""
762
+ });
763
+ }
764
+ ensureNotAborted() {
765
+ if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
766
+ throw this.signal.reason;
767
+ if (this.llamaChat.disposed)
768
+ throw new DisposedError();
769
+ }
770
+ getPenaltyTokens() {
771
+ if (this.llamaChat.disposed)
772
+ throw new DisposedError();
773
+ let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
774
+ if (this.resolvedRepeatPenalty.punishTokensFilter != null)
775
+ punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
776
+ if (this.resolvedRepeatPenalty.penalizeNewLine == null || !this.resolvedRepeatPenalty.penalizeNewLine) {
777
+ const nlToken = this.llamaChat.model.tokens.nl;
778
+ if (nlToken != null)
779
+ punishTokens = punishTokens.filter(token => token !== nlToken);
780
+ }
781
+ return punishTokens;
782
+ }
783
+ getResolvedHistoryWithCurrentModelResponse() {
784
+ if (this.res.length === 0)
785
+ return this.resolvedHistory;
786
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
787
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
788
+ modelResponse = modelResponse.trimEnd();
789
+ if (modelResponse === "")
790
+ return this.resolvedHistory;
791
+ return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
792
+ }
793
+ removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
794
+ if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
795
+ this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops)) {
796
+ this.ignoreStartTextDetector.clearInProgressStops();
797
+ this.ignoreStartTextDetector.clearTriggeredStops();
798
+ let mostExhaustiveTriggeredStops = null;
799
+ let mostExhaustiveTriggeredStopsLeftoverTokens = [];
800
+ const lastTokensForDetokenizer = resolveLastTokens([
801
+ this.contextWindowTokens,
802
+ this.ignoredStartTextTokens
803
+ ]);
804
+ for (let i = 0; i < this.pendingTokens.length; i++) {
805
+ this.ignoreStartTextDetector.recordGeneration({
806
+ text: this.llamaChat.model.detokenize([this.pendingTokens[i]], false, lastTokensForDetokenizer),
807
+ tokens: [this.pendingTokens[i]],
808
+ startNewChecks: i === 0,
809
+ triggerMustStartWithGeneration: true
810
+ });
811
+ lastTokensForDetokenizer.push(this.pendingTokens[i]);
812
+ if (this.ignoreStartTextDetector.hasTriggeredStops) {
813
+ mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
814
+ this.ignoreStartTextDetector.clearTriggeredStops();
815
+ mostExhaustiveTriggeredStopsLeftoverTokens = this.pendingTokens.slice(i + 1);
816
+ }
817
+ else if (!this.ignoreStartTextDetector.hasInProgressStops)
818
+ break;
819
+ }
820
+ if (mostExhaustiveTriggeredStops != null) {
821
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
822
+ if (mostExhaustiveTriggeredStop != null) {
823
+ this.ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
824
+ .map((stopTrigger) => {
825
+ if (typeof stopTrigger === "string")
826
+ return this.llamaChat.model.tokenize(stopTrigger, false, "trimLeadingSpace");
827
+ else
828
+ return [stopTrigger];
829
+ })
830
+ .flat(1);
831
+ const newPendingTokens = [
832
+ ...mostExhaustiveTriggeredStop.remainingGeneration,
833
+ mostExhaustiveTriggeredStopsLeftoverTokens
834
+ ]
835
+ .map((generation) => {
836
+ if (typeof generation === "string")
837
+ return this.llamaChat.model.tokenize(generation, false, "trimLeadingSpace");
838
+ else
839
+ return generation;
840
+ })
841
+ .flat(1);
842
+ this.pendingTokens.length = 0;
843
+ pushAll(this.pendingTokens, newPendingTokens);
844
+ this.removedStartTextToIgnore = true;
845
+ }
846
+ }
847
+ }
848
+ }
849
+ startTokenLoop() {
850
+ this.ensureNotAborted();
851
+ this.shouldContextShift = false;
852
+ }
853
+ getContextWindowFunctionCallsTokens() {
854
+ if (this.functionEvaluationMode === false)
855
+ return [];
856
+ else if (this.functionEvaluationMode === "prefixOrDisengage")
857
+ return [
858
+ ...LlamaText(this.currentFunctionCallPreviousText).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
859
+ ...this.currentFunctionCallCurrentPartTokens
860
+ ];
861
+ const text = [];
862
+ if (this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix != null)
863
+ text.push(this.chatWrapper.settings.functions.parallelism.call.sectionPrefix);
864
+ for (let i = 0; i < this.resFunctionCalls.length; i++) {
865
+ const call = this.resFunctionCalls[i];
866
+ if (i > 0)
867
+ text.push(this.chatWrapper.settings.functions?.parallelism?.call?.betweenCalls ?? "");
868
+ text.push(call.raw);
869
+ }
870
+ text.push(this.currentFunctionCallPreviousText);
871
+ return [
872
+ ...LlamaText(text).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
873
+ ...this.currentFunctionCallCurrentPartTokens
874
+ ];
875
+ }
876
+ async loadContextWindow(resolvedHistory, endWithUserText = false, avoidReloadingHistory = false) {
877
+ const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
878
+ const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
879
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
880
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
881
+ resolvedHistory: resolvedHistory,
882
+ resolvedContextShift: this.resolvedContextShift,
883
+ lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
884
+ pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
885
+ isFirstEvaluation: this.isFirstEvaluation,
886
+ chatWrapper: this.chatWrapper,
887
+ lastEvaluationContextWindowHistory: this.lastEvaluationContextWindowHistory,
888
+ minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
889
+ sequence: this.llamaChat.sequence,
890
+ minFreeContextTokens: 1,
891
+ functions: this.functionsEnabled ? this.functions : undefined,
892
+ documentFunctionParams: this.documentFunctionParams,
893
+ endWithUserText
894
+ });
895
+ this.ensureNotAborted();
896
+ this.contextWindowTokens = contextWindowTokens;
897
+ this.stopGenerationTriggers = stopGenerationTriggers;
898
+ this.ignoreStartText = ignoreStartText;
899
+ this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
900
+ this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
901
+ this.userTextSuffix = userTextSuffix;
902
+ this.resolvedHistory = newResolvedHistory;
903
+ this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
904
+ this.lastContextWindowHistory = contextWindowHistory;
905
+ this.contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
906
+ this.contextWindowsRes = [];
907
+ this.canAvoidReloadingHistory = true;
908
+ }
909
+ this.tokens = [
910
+ ...this.contextWindowTokens,
911
+ ...this.ignoredStartTextTokens,
912
+ ...this.pendingTokens,
913
+ ...queuedChunkTokens,
914
+ ...functionCallsTokens
915
+ ];
916
+ if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
917
+ return await this.loadContextWindow(resolvedHistory, endWithUserText, false);
918
+ return {
919
+ userTextSuffix: this.userTextSuffix
920
+ };
921
+ }
922
+ addIgnoreStartTextTriggersFromChatWrapper() {
923
+ StopGenerationDetector.resolveStopTriggers(this.ignoreStartText, this.llamaChat.model.tokenizer)
924
+ .map((stopTrigger) => this.ignoreStartTextDetector.addStopTrigger(stopTrigger));
925
+ }
926
+ addStopGenerationTriggersFromChatWrapper() {
927
+ StopGenerationDetector.resolveStopTriggers(this.stopGenerationTriggers, this.llamaChat.model.tokenizer)
928
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
929
+ }
930
+ initFunctions() {
931
+ this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
932
+ if (this.initiallyEngagedFunctionMode) {
933
+ StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
934
+ .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
935
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggers) {
936
+ this.functionEvaluationMode = "prefixOrDisengage";
937
+ this.functionsGrammar = undefined;
938
+ this.functionsEvaluationState = undefined;
939
+ }
940
+ else {
941
+ this.functionEvaluationMode = "functionName";
942
+ }
943
+ this.restartEvaluationIterator = true;
944
+ }
945
+ }
946
+ async enterFunctionCallingLoop(loadContextWindow) {
947
+ if (!this.functionsEnabled) {
948
+ this.functionEvaluationMode = false;
949
+ return undefined;
950
+ }
951
+ // eslint-disable-next-line no-constant-condition
952
+ while (true) {
953
+ if (this.functionEvaluationMode === "prefixOrDisengage") {
954
+ this.functionsGrammar = undefined;
955
+ this.functionsEvaluationState = undefined;
956
+ this.currentFunctionCallPreviousText = LlamaText([]);
957
+ this.currentFunctionCallCurrentPartTokens.length = 0;
958
+ const prefixTokens = LlamaText(this.chatWrapper.settings.functions.call.prefix)
959
+ .tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
960
+ const prefixDetector = new StopGenerationDetector();
961
+ const prefixDetectorRecordedTokens = [];
962
+ const afterPrefixLeftoverTokens = [];
963
+ prefixDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(this.chatWrapper.settings.functions.call.prefix), this.llamaChat.model.tokenizer));
964
+ const lastTokensForDetokenizer = this.streamRegulator.getLastQueuedChunkTokens();
965
+ for (const prefixToken of prefixTokens) {
966
+ const tokens = [prefixToken];
967
+ const text = this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer);
968
+ pushAll(lastTokensForDetokenizer, tokens);
969
+ const disregardedPossibilities = this.disengageInitiallyEngagedFunctionMode
970
+ .getDisregardedPossibilitiesCountForAGeneration({
971
+ text,
972
+ tokens,
973
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 0
974
+ });
975
+ if (disregardedPossibilities > 0)
976
+ break;
977
+ this.currentFunctionCallCurrentPartTokens.push(prefixToken);
978
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
979
+ text: text,
980
+ tokens: tokens,
981
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
982
+ triggerMustStartWithGeneration: true
983
+ });
984
+ if (prefixDetector.hasTriggeredStops)
985
+ afterPrefixLeftoverTokens.push(prefixToken);
986
+ else {
987
+ prefixDetector.recordGeneration({
988
+ text: text,
989
+ tokens: tokens,
990
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
991
+ triggerMustStartWithGeneration: true
992
+ });
993
+ pushAll(prefixDetectorRecordedTokens, tokens);
994
+ }
995
+ }
996
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
997
+ const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
998
+ if (stopGenerationTriggerRes != null)
999
+ return stopGenerationTriggerRes;
1000
+ this.currentFunctionCallCurrentPartTokens.push(token);
1001
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
1002
+ text: this.currentText,
1003
+ tokens: this.currentTokens,
1004
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
1005
+ triggerMustStartWithGeneration: true
1006
+ });
1007
+ if (prefixDetector.hasTriggeredStops)
1008
+ afterPrefixLeftoverTokens.push(token);
1009
+ else {
1010
+ prefixDetector.recordGeneration({
1011
+ text: this.currentText,
1012
+ tokens: this.currentTokens,
1013
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
1014
+ triggerMustStartWithGeneration: true
1015
+ });
1016
+ pushAll(prefixDetectorRecordedTokens, this.currentTokens);
1017
+ }
1018
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops ||
1019
+ !this.disengageInitiallyEngagedFunctionMode.hasInProgressStops)
1020
+ break;
1021
+ }
1022
+ const abortRes = this.handleAbortTrigger("model");
1023
+ if (abortRes != null)
1024
+ return abortRes;
1025
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
1026
+ const lastTokensForDetokenizer = this.streamRegulator.getLastQueuedChunkTokens();
1027
+ for (const token of this.currentFunctionCallCurrentPartTokens) {
1028
+ this.currentToken = token;
1029
+ this.currentTokens = [this.currentToken];
1030
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, lastTokensForDetokenizer);
1031
+ pushAll(lastTokensForDetokenizer, this.currentTokens);
1032
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1033
+ tokens: this.currentTokens,
1034
+ text: this.currentText
1035
+ });
1036
+ this.recordStopGenerationEvaluation();
1037
+ }
1038
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1039
+ this.functionEvaluationMode = false;
1040
+ return undefined;
1041
+ }
1042
+ if (prefixDetector.hasTriggeredStops) {
1043
+ const triggeredStops = prefixDetector.getTriggeredStops();
1044
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1045
+ this.currentFunctionCallPreviousPartLeftoverText = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, stopTrigger, this.llamaChat.model.tokenizer) + this.llamaChat.model.detokenize(afterPrefixLeftoverTokens, false, prefixDetectorRecordedTokens);
1046
+ }
1047
+ else
1048
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1049
+ this.functionEvaluationMode = "functionName";
1050
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1051
+ continue;
1052
+ }
1053
+ else if (this.functionEvaluationMode === "functionName") {
1054
+ const functionNameGenerationDoneDetector = new StopGenerationDetector();
1055
+ this.stopGenerationDetector.clearInProgressStops();
1056
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1057
+ this.currentFunctionCallPreviousText = LlamaText(this.chatWrapper.settings.functions.call.prefix);
1058
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1059
+ const functionNameGrammar = this.functionNameGrammar ?? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper);
1060
+ this.functionsGrammar = functionNameGrammar;
1061
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1062
+ model: this.llamaChat.model,
1063
+ grammar: this.functionsGrammar
1064
+ });
1065
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1066
+ .map((stopTrigger) => functionNameGenerationDoneDetector.addStopTrigger(stopTrigger));
1067
+ if (this.currentFunctionCallPreviousPartLeftoverText !== "") {
1068
+ const validFunctionNames = Object.keys(this.functions);
1069
+ const hasAnyFunctionStartWithLeftover = validFunctionNames.some((functionName) => functionName.startsWith(this.currentFunctionCallPreviousPartLeftoverText));
1070
+ if (hasAnyFunctionStartWithLeftover) {
1071
+ const leftoverTokens = this.llamaChat.model.tokenize(this.currentFunctionCallPreviousPartLeftoverText, false, "trimLeadingSpace");
1072
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1073
+ const lastTokens = [];
1074
+ for (const leftoverToken of leftoverTokens) {
1075
+ const canBeNextToken = LlamaSampler._canBeNextTokenForGrammarEvaluationState(this.llamaChat.model._llama, this.functionsEvaluationState, leftoverToken);
1076
+ if (!canBeNextToken)
1077
+ break;
1078
+ LlamaSampler._acceptTokenOnGrammarEvaluationState(this.llamaChat.model._llama, this.functionsEvaluationState, leftoverToken);
1079
+ this.currentFunctionCallCurrentPartTokens.push(leftoverToken);
1080
+ functionNameGenerationDoneDetector.recordGeneration({
1081
+ text: this.llamaChat.model.detokenize([leftoverToken], false, lastTokens),
1082
+ tokens: [leftoverToken]
1083
+ });
1084
+ lastTokens.push(leftoverToken);
1085
+ }
1086
+ }
1087
+ }
1088
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1089
+ this.currentFunctionCallCurrentPartTokens.push(token);
1090
+ functionNameGenerationDoneDetector.recordGeneration({
1091
+ text: this.currentText,
1092
+ tokens: this.currentTokens
1093
+ });
1094
+ if (functionNameGenerationDoneDetector.hasTriggeredStops)
1095
+ break;
1096
+ }
1097
+ const abortRes = this.handleAbortTrigger("model");
1098
+ if (abortRes != null)
1099
+ return abortRes;
1100
+ const functionCallNameText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1101
+ const functionName = functionNameGrammar.parseFunctionName(functionCallNameText);
1102
+ this.functionEvaluationFunctionName = functionName;
1103
+ this.functionEvaluationMode = "params";
1104
+ continue;
1105
+ }
1106
+ else if (this.functionEvaluationMode === "params") {
1107
+ this.currentFunctionCallPreviousText = LlamaText([
1108
+ this.chatWrapper.settings.functions.call.prefix,
1109
+ this.functionEvaluationFunctionName,
1110
+ this.chatWrapper.settings.functions.call.paramsPrefix
1111
+ ]);
1112
+ const lastPartTokens = resolveLastTokens([this.currentFunctionCallCurrentPartTokens]);
1113
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1114
+ let params = undefined;
1115
+ let paramsText = "";
1116
+ const functionDefinition = this.functions[this.functionEvaluationFunctionName];
1117
+ if (functionDefinition == null)
1118
+ throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
1119
+ else if (functionDefinition.params == null) {
1120
+ params = undefined;
1121
+ paramsText = "";
1122
+ }
1123
+ else {
1124
+ const functionParamsGenerationDoneDetector = new StopGenerationDetector();
1125
+ const functionParamsGrammar = new FunctionCallParamsGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper, this.functionEvaluationFunctionName, functionDefinition.params);
1126
+ this.functionsGrammar = functionParamsGrammar;
1127
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1128
+ model: this.llamaChat.model,
1129
+ grammar: this.functionsGrammar
1130
+ });
1131
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1132
+ .map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
1133
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1134
+ this.currentFunctionCallCurrentPartTokens.push(token);
1135
+ functionParamsGenerationDoneDetector.recordGeneration({
1136
+ text: this.currentText,
1137
+ tokens: this.currentTokens
1138
+ });
1139
+ if (functionParamsGenerationDoneDetector.hasTriggeredStops)
1140
+ break;
1141
+ }
1142
+ const abortRes = this.handleAbortTrigger("model");
1143
+ if (abortRes != null)
1144
+ return abortRes;
1145
+ const functionCallParamsText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens, false, lastPartTokens);
1146
+ const parsedFunctionParams = functionParamsGrammar.parseParams(functionCallParamsText);
1147
+ params = parsedFunctionParams.params;
1148
+ paramsText = parsedFunctionParams.raw;
1149
+ }
1150
+ const functionCallText = LlamaText([
1151
+ this.chatWrapper.settings.functions.call.prefix,
1152
+ this.functionEvaluationFunctionName,
1153
+ this.chatWrapper.settings.functions.call.paramsPrefix,
1154
+ paramsText,
1155
+ this.chatWrapper.settings.functions.call.suffix
1156
+ ]);
1157
+ this.resFunctionCalls.push({
1158
+ functionName: this.functionEvaluationFunctionName,
1159
+ params,
1160
+ raw: functionCallText
1161
+ });
1162
+ this.onFunctionCall?.({
1163
+ functionName: this.functionEvaluationFunctionName,
1164
+ params: structuredClone(params),
1165
+ raw: functionCallText.toJSON()
1166
+ });
1167
+ this.currentFunctionCallPreviousText = LlamaText([]);
1168
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1169
+ this.functionEvaluationFunctionName = "";
1170
+ if (this.chatWrapper.settings.functions.parallelism == null || (this.maxParallelFunctionCalls != null && this.maxParallelFunctionCalls <= this.resFunctionCalls.length)) {
1171
+ this.functionEvaluationMode = false;
1172
+ return this.returnFunctionCallResults();
1173
+ }
1174
+ this.functionEvaluationMode = "sectionSuffixOrBetweenCalls";
1175
+ continue;
1176
+ }
1177
+ else if (this.functionEvaluationMode === "sectionSuffixOrBetweenCalls") {
1178
+ const sectionSuffixDetector = new StopGenerationDetector();
1179
+ let isFirstToken = true;
1180
+ this.functionsGrammar = undefined;
1181
+ this.functionsEvaluationState = undefined;
1182
+ this.currentFunctionCallPreviousText = LlamaText([]);
1183
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1184
+ StopGenerationDetector.resolveStopTriggers([
1185
+ ...(this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix != null
1186
+ ? [this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix]
1187
+ : []),
1188
+ LlamaText(new SpecialToken("EOS")),
1189
+ LlamaText(new SpecialToken("EOT"))
1190
+ ], this.llamaChat.model.tokenizer)
1191
+ .map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
1192
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1193
+ this.currentFunctionCallCurrentPartTokens.push(token);
1194
+ sectionSuffixDetector.recordGeneration({
1195
+ text: this.currentText,
1196
+ tokens: this.currentTokens,
1197
+ startNewChecks: isFirstToken,
1198
+ triggerMustStartWithGeneration: true
1199
+ });
1200
+ isFirstToken = false;
1201
+ if (sectionSuffixDetector.hasTriggeredStops || !sectionSuffixDetector.hasInProgressStops)
1202
+ break;
1203
+ }
1204
+ const abortRes = this.handleAbortTrigger("model");
1205
+ if (abortRes != null)
1206
+ return abortRes;
1207
+ if (sectionSuffixDetector.hasTriggeredStops) {
1208
+ this.functionEvaluationMode = false;
1209
+ return this.returnFunctionCallResults();
1210
+ }
1211
+ this.functionEvaluationMode = "functionName";
1212
+ this.initiallyEngagedFunctionMode = false;
1213
+ continue;
1214
+ }
1215
+ break;
1216
+ }
1217
+ return undefined;
1218
+ }
1219
+ releasePartiallyFreeTokensBeforeFunctionCallStart() {
1220
+ if (this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax)
1221
+ return;
1222
+ this.stopGenerationDetector.clearInProgressStops();
1223
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1224
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1225
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1226
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1227
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1228
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1229
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1230
+ this.pushPendingTokensAndCallOnToken();
1231
+ this.streamRegulator.clearQueue();
1232
+ this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = true;
1233
+ }
1234
+ returnFunctionCallResults() {
1235
+ if (this.resFunctionCalls.length > 0) {
1236
+ this.releasePartiallyFreeTokensBeforeFunctionCallStart();
1237
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1238
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1239
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1240
+ modelResponse = modelResponse.trimEnd();
1241
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1242
+ }
1243
+ return {
1244
+ response: modelResponse,
1245
+ lastEvaluation: {
1246
+ contextWindow: setLastTextInChatHistory("model", this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1247
+ cleanHistory: setLastTextInChatHistory("model", this.resolvedHistory, this.lastModelResponse + modelResponse),
1248
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1249
+ },
1250
+ functionCalls: this.resFunctionCalls.map((functionCall) => {
1251
+ return {
1252
+ functionName: functionCall.functionName,
1253
+ params: functionCall.params,
1254
+ raw: functionCall.raw.toJSON()
1255
+ };
1256
+ }), // prevent infinite TS type instantiation
1257
+ metadata: {
1258
+ stopReason: "functionCalls"
1259
+ }
1260
+ };
1261
+ }
1262
+ return undefined;
1263
+ }
1264
+ async *evaluateWithContextShift(loadContextWindow) {
1265
+ while (true) {
1266
+ this.startTokenLoop();
1267
+ await loadContextWindow();
1268
+ await this.alignCurrentSequenceStateWithCurrentTokens();
1269
+ await this.createNewEvaluationIterator();
1270
+ while (await this.iterateEvaluation()) {
1271
+ if (this.currentToken == null)
1272
+ break;
1273
+ yield this.currentToken;
1274
+ if (this.shouldAbort)
1275
+ return;
1276
+ if (this.updateShouldContextShift())
1277
+ break;
1278
+ if (this.restartEvaluationIterator) {
1279
+ await this.createNewEvaluationIterator();
1280
+ }
1281
+ }
1282
+ this.isFirstEvaluation = false;
1283
+ if (this.shouldContextShift)
1284
+ continue;
1285
+ break;
1286
+ }
1287
+ throw new Error("The context size is too small to generate a response");
1288
+ }
1289
+ async alignCurrentSequenceStateWithCurrentTokens() {
1290
+ let { firstDifferentIndex } = this.llamaChat.sequence.compareContextTokens(this.tokens);
1291
+ // we need to decode at least one token to generate a response
1292
+ if (firstDifferentIndex === this.tokens.length && firstDifferentIndex > 0)
1293
+ firstDifferentIndex -= 1;
1294
+ this.tokens.splice(0, firstDifferentIndex);
1295
+ if (firstDifferentIndex < this.llamaChat.sequence.nextTokenIndex) {
1296
+ await this.llamaChat.sequence.eraseContextTokenRanges([{
1297
+ start: firstDifferentIndex,
1298
+ end: this.llamaChat.sequence.nextTokenIndex
1299
+ }]);
1300
+ this.ensureNotAborted();
1301
+ }
1302
+ }
1303
+ async evaluateWithoutGeneratingNewTokens() {
1304
+ if (this.evaluationIterator != null)
1305
+ await this.evaluationIterator.return();
1306
+ await this.llamaChat.sequence.evaluateWithoutGeneratingNewTokens(this.tokens, removeNullFields({
1307
+ evaluationPriority: this.evaluationPriority
1308
+ }));
1309
+ }
1310
+ async createNewEvaluationIterator() {
1311
+ if (this.evaluationIterator != null)
1312
+ await this.evaluationIterator.return();
1313
+ this.currentIterationReplacementToken = undefined;
1314
+ this.restartEvaluationIterator = false;
1315
+ this.evaluationIterator = this.llamaChat.sequence.evaluate(this.tokens, removeNullFields({
1316
+ temperature: this.temperature,
1317
+ minP: this.minP,
1318
+ topK: this.topK,
1319
+ topP: this.topP,
1320
+ seed: this.seed,
1321
+ grammarEvaluationState: () => {
1322
+ if (this.functionEvaluationMode !== false)
1323
+ return this.functionsEvaluationState;
1324
+ return this.grammarEvaluationState;
1325
+ },
1326
+ repeatPenalty: !this.repeatPenaltyEnabled ? undefined : {
1327
+ punishTokens: this.getPenaltyTokens,
1328
+ maxPunishTokens: this.resolvedRepeatPenalty.lastTokens,
1329
+ penalty: this.resolvedRepeatPenalty.penalty,
1330
+ frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
1331
+ presencePenalty: this.resolvedRepeatPenalty.presencePenalty
1332
+ },
1333
+ tokenBias: this.tokenBias,
1334
+ evaluationPriority: this.evaluationPriority,
1335
+ yieldEogToken: true
1336
+ }));
1337
+ }
1338
+ async iterateEvaluation() {
1339
+ this.currentIteration = await this.evaluationIterator?.next(this.currentIterationReplacementToken);
1340
+ this.currentIterationReplacementToken = undefined;
1341
+ this.ensureNotAborted();
1342
+ this.generatedTokens++;
1343
+ if (this.currentIteration != null && this.currentIteration?.done !== true) {
1344
+ this.currentToken = this.currentIteration.value;
1345
+ this.currentTokens = [this.currentToken];
1346
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, this.getLastTokens());
1347
+ if (this.functionEvaluationMode === false)
1348
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1349
+ tokens: this.currentTokens,
1350
+ text: this.currentText
1351
+ });
1352
+ else
1353
+ this.currentQueuedTokenRelease = undefined;
1354
+ return true;
1355
+ }
1356
+ return false;
1357
+ }
1358
+ waitOnPartialCharactersOrWhiteSpaceTokens() {
1359
+ if (this.currentText.endsWith(UNKNOWN_UNICODE_CHAR) || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "") || (this.currentText === "" && this.locksToReleaseOnValidGeneration.length > 0 &&
1360
+ !this.llamaChat.model.isSpecialToken(this.currentToken))) {
1361
+ if (this.currentQueuedTokenRelease != null)
1362
+ this.locksToReleaseOnValidGeneration.push(this.currentQueuedTokenRelease.createTextIndexLock(0));
1363
+ }
1364
+ else {
1365
+ while (this.locksToReleaseOnValidGeneration.length > 0)
1366
+ this.locksToReleaseOnValidGeneration.shift().dispose();
1367
+ }
1368
+ }
1369
+ detectAndHandleFunctionStartSyntax() {
1370
+ this.functionSyntaxStartDetector.recordGeneration({
1371
+ text: this.currentText,
1372
+ tokens: this.currentTokens,
1373
+ queuedTokenRelease: this.currentQueuedTokenRelease
1374
+ });
1375
+ if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1376
+ this.functionSyntaxStartDetector.hasTriggeredStops) {
1377
+ this.functionEvaluationMode = "functionName";
1378
+ this.currentQueuedTokenRelease.createTextIndexLock(0);
1379
+ this.stopGenerationDetector.clearTriggeredStops();
1380
+ this.stopGenerationDetector.clearInProgressStops();
1381
+ this.customStopGenerationTriggersDetector.clearTriggeredStops();
1382
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1383
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1384
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1385
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1386
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1387
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1388
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1389
+ const remainingTextAfterStop = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, stopTrigger, this.llamaChat.model.tokenizer);
1390
+ this.currentFunctionCallPreviousPartLeftoverText = remainingTextAfterStop;
1391
+ }
1392
+ }
1393
+ recordStopGenerationEvaluation() {
1394
+ this.stopGenerationDetector.recordGeneration({
1395
+ text: this.currentText,
1396
+ tokens: this.currentTokens,
1397
+ queuedTokenRelease: this.currentQueuedTokenRelease
1398
+ });
1399
+ this.customStopGenerationTriggersDetector.recordGeneration({
1400
+ text: this.currentText,
1401
+ tokens: this.currentTokens,
1402
+ queuedTokenRelease: this.currentQueuedTokenRelease
1403
+ });
1404
+ if (this.llamaChat.model.isEogToken(this.currentToken))
1405
+ this.currentQueuedTokenRelease?.createTokenIndexLock(0);
1406
+ }
1407
+ popStreamRegulatorFreeTokens() {
1408
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1409
+ }
1410
+ handleStopGenerationTrigger(lastHistoryItemType) {
1411
+ if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1412
+ this.llamaChat.model.isEogToken(this.currentToken)) {
1413
+ this.stopGenerationDetector.clearInProgressStops();
1414
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1415
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1416
+ const triggeredStops = this.stopGenerationDetector.hasTriggeredStops
1417
+ ? this.stopGenerationDetector.getTriggeredStops()
1418
+ : this.customStopGenerationTriggersDetector.getTriggeredStops();
1419
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1420
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1421
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1422
+ const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1423
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1424
+ this.pushPendingTokensAndCallOnToken();
1425
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1426
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1427
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1428
+ modelResponse = modelResponse.trimEnd();
1429
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1430
+ }
1431
+ const lastEvaluation = {
1432
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1433
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1434
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1435
+ };
1436
+ const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
1437
+ if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
1438
+ return {
1439
+ response: modelResponse,
1440
+ lastEvaluation,
1441
+ metadata: {
1442
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1443
+ stopReason: isEogToken
1444
+ ? "eogToken"
1445
+ : "stopGenerationTrigger"
1446
+ }
1447
+ };
1448
+ }
1449
+ return {
1450
+ response: modelResponse,
1451
+ lastEvaluation,
1452
+ metadata: {
1453
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1454
+ stopReason: "customStopTrigger",
1455
+ customStopTrigger: triggeredStops[0].stopTrigger
1456
+ }
1457
+ };
1458
+ }
1459
+ return undefined;
1460
+ }
1461
+ spliceIgnoreStartTextDetectedTokens() {
1462
+ if (this.res.length === 0) {
1463
+ this.ignoreStartTextDetector.clearInProgressStops();
1464
+ this.ignoreStartTextDetector.clearTriggeredStops();
1465
+ const lastTokensForDetokenizer = resolveLastTokens([
1466
+ this.contextWindowTokens,
1467
+ this.ignoredStartTextTokens
1468
+ ]);
1469
+ this.ignoreStartTextDetector.recordGeneration({
1470
+ text: this.llamaChat.model.detokenize(this.pendingTokens, false, lastTokensForDetokenizer),
1471
+ tokens: this.pendingTokens
1472
+ });
1473
+ }
1474
+ }
1475
+ isMaxTokensTriggered() {
1476
+ return this.maxTokens != null && this.maxTokens > 0 && this.generatedTokens >= this.maxTokens;
1477
+ }
1478
+ moveFreePendingTokensToRes(removeFoundStartIgnoreTextsFromPendingTokens = true) {
1479
+ if (this.pendingTokens.length > 0 && (this.isMaxTokensTriggered() || !this.ignoreStartTextDetector.hasInProgressStops)) {
1480
+ if (removeFoundStartIgnoreTextsFromPendingTokens)
1481
+ this.removeFoundStartIgnoreTextsFromPendingTokens();
1482
+ this.pushPendingTokensAndCallOnToken();
1483
+ }
1484
+ }
1485
+ handleMaxTokensTrigger(lastHistoryItemType) {
1486
+ if (this.isMaxTokensTriggered()) {
1487
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1488
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1489
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1490
+ modelResponse = modelResponse.trimEnd();
1491
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1492
+ }
1493
+ return {
1494
+ response: modelResponse,
1495
+ lastEvaluation: {
1496
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1497
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1498
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1499
+ },
1500
+ metadata: {
1501
+ stopReason: "maxTokens"
1502
+ }
1503
+ };
1504
+ }
1505
+ return undefined;
1506
+ }
1507
+ updateShouldContextShift() {
1508
+ this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1509
+ return this.shouldContextShift;
1510
+ }
1511
+ get shouldAbort() {
1512
+ return !!(this.signal?.aborted && this.stopOnAbortSignal);
1513
+ }
1514
+ handleAbortTrigger(lastHistoryItemType) {
1515
+ if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
1516
+ if (this.res.length === 0)
1517
+ throw this.signal.reason;
1518
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1519
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1520
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1521
+ modelResponse = modelResponse.trimEnd();
1522
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1523
+ }
1524
+ return {
1525
+ response: modelResponse,
1526
+ lastEvaluation: {
1527
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1528
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1529
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1530
+ },
1531
+ metadata: {
1532
+ stopReason: "abort"
1533
+ }
1534
+ };
1535
+ }
1536
+ return undefined;
1537
+ }
1538
+ pushPendingTokensAndCallOnToken() {
1539
+ if (this.pendingTokens.length === 0)
1540
+ return;
1541
+ this.onToken?.(this.pendingTokens.slice());
1542
+ this.onTextChunk?.(this.llamaChat.model.detokenize(this.pendingTokens, false, this.res));
1543
+ pushAll(this.res, this.pendingTokens);
1544
+ pushAll(this.contextWindowsRes, this.pendingTokens);
1545
+ this.pendingTokens.length = 0;
1546
+ }
1547
+ getLastTokens(maxTokens = maxRecentDetokenizerTokens) {
1548
+ return resolveLastTokens([
1549
+ this.contextWindowTokens,
1550
+ this.ignoredStartTextTokens,
1551
+ this.pendingTokens,
1552
+ this.streamRegulator.getLastQueuedChunkTokens(maxTokens),
1553
+ this.getContextWindowFunctionCallsTokens()
1554
+ ], maxTokens);
1555
+ }
1556
+ }
705
1557
  //# sourceMappingURL=LlamaChat.js.map