node-llama-cpp 3.0.0-beta.4 → 3.0.0-beta.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (749) hide show
  1. package/README.md +14 -11
  2. package/bins/_linux-x64-cuda.moved.txt +1 -0
  3. package/bins/_win-x64-cuda.moved.txt +1 -0
  4. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-arm64/libggml.so +0 -0
  6. package/bins/linux-arm64/libllama.so +0 -0
  7. package/bins/linux-arm64/llama-addon.node +0 -0
  8. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-armv7l/libggml.so +0 -0
  10. package/bins/linux-armv7l/libllama.so +0 -0
  11. package/bins/linux-armv7l/llama-addon.node +0 -0
  12. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  13. package/bins/linux-x64/libggml.so +0 -0
  14. package/bins/linux-x64/libllama.so +0 -0
  15. package/bins/linux-x64/llama-addon.node +0 -0
  16. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  17. package/bins/linux-x64-vulkan/libggml.so +0 -0
  18. package/bins/linux-x64-vulkan/libllama.so +0 -0
  19. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  20. package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
  21. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  22. package/bins/mac-arm64-metal/ggml-common.h +1833 -0
  23. package/bins/mac-arm64-metal/ggml-metal.metal +6168 -0
  24. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  25. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  26. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  27. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  28. package/bins/mac-x64/libggml.dylib +0 -0
  29. package/bins/mac-x64/libllama.dylib +0 -0
  30. package/bins/mac-x64/llama-addon.node +0 -0
  31. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  32. package/bins/win-arm64/ggml.dll +0 -0
  33. package/bins/win-arm64/llama-addon.exp +0 -0
  34. package/bins/win-arm64/llama-addon.lib +0 -0
  35. package/bins/win-arm64/llama-addon.node +0 -0
  36. package/bins/win-arm64/llama.dll +0 -0
  37. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  38. package/bins/win-x64/ggml.dll +0 -0
  39. package/bins/win-x64/llama-addon.exp +0 -0
  40. package/bins/win-x64/llama-addon.lib +0 -0
  41. package/bins/win-x64/llama-addon.node +0 -0
  42. package/bins/win-x64/llama.dll +0 -0
  43. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  44. package/bins/win-x64-vulkan/ggml.dll +0 -0
  45. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  46. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  47. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  48. package/bins/win-x64-vulkan/llama.dll +0 -0
  49. package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
  50. package/dist/ChatWrapper.d.ts +9 -39
  51. package/dist/ChatWrapper.js +129 -72
  52. package/dist/ChatWrapper.js.map +1 -1
  53. package/dist/apiDocsIndex.d.ts +1 -0
  54. package/dist/apiDocsIndex.js +7 -0
  55. package/dist/apiDocsIndex.js.map +1 -0
  56. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +66 -9
  57. package/dist/bindings/AddonTypes.js +2 -0
  58. package/dist/bindings/AddonTypes.js.map +1 -0
  59. package/dist/bindings/Llama.d.ts +47 -0
  60. package/dist/bindings/Llama.js +356 -0
  61. package/dist/bindings/Llama.js.map +1 -0
  62. package/dist/bindings/consts.d.ts +2 -0
  63. package/dist/bindings/consts.js +11 -0
  64. package/dist/bindings/consts.js.map +1 -0
  65. package/dist/bindings/getLlama.d.ts +152 -0
  66. package/dist/bindings/getLlama.js +403 -0
  67. package/dist/bindings/getLlama.js.map +1 -0
  68. package/dist/bindings/types.d.ts +57 -0
  69. package/dist/bindings/types.js +77 -0
  70. package/dist/bindings/types.js.map +1 -0
  71. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  72. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  73. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  74. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  75. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  76. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  77. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  78. package/dist/bindings/utils/asyncEvery.js +15 -0
  79. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  80. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  81. package/dist/bindings/utils/asyncSome.js +27 -0
  82. package/dist/bindings/utils/asyncSome.js.map +1 -0
  83. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  84. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  85. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  86. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  87. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  88. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  89. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  90. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  91. package/dist/bindings/utils/compileLLamaCpp.d.ts +21 -0
  92. package/dist/bindings/utils/compileLLamaCpp.js +288 -0
  93. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  94. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  95. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  96. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  97. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  98. package/dist/bindings/utils/detectGlibc.js +46 -0
  99. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  100. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  101. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  102. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  103. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  104. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  105. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  106. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  107. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  108. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  109. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  110. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  111. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  112. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +13 -0
  113. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  114. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  115. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  116. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  117. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  118. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  119. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  120. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  121. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  122. package/dist/bindings/utils/getPlatform.js +15 -0
  123. package/dist/bindings/utils/getPlatform.js.map +1 -0
  124. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  125. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  126. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  127. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  128. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  129. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  130. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  131. package/dist/bindings/utils/hasFileInPath.js +34 -0
  132. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  133. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  134. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  135. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  136. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  137. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  138. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  139. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  140. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  141. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  142. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  143. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  144. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  145. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  146. package/dist/bindings/utils/testBindingBinary.js +100 -0
  147. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  148. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  149. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  150. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  151. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  152. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  153. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  154. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  155. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  156. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  157. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  158. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  159. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  160. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  161. package/dist/chatWrappers/FunctionaryChatWrapper.js +326 -118
  162. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  163. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  164. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  165. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  166. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  167. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  168. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  169. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  170. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  171. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  172. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  173. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  174. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  175. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +31 -0
  176. package/dist/chatWrappers/Llama3_1ChatWrapper.js +223 -0
  177. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  178. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +76 -0
  179. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  180. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  181. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  182. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  183. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  184. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  185. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  186. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  187. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  188. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  189. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  190. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  191. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  192. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  193. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  194. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  195. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  196. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +71 -0
  197. package/dist/chatWrappers/utils/resolveChatWrapper.js +289 -0
  198. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  199. package/dist/cli/cli.js +21 -7
  200. package/dist/cli/cli.js.map +1 -1
  201. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  202. package/dist/cli/commands/BuildCommand.js +114 -41
  203. package/dist/cli/commands/BuildCommand.js.map +1 -1
  204. package/dist/cli/commands/ChatCommand.d.ts +19 -7
  205. package/dist/cli/commands/ChatCommand.js +306 -150
  206. package/dist/cli/commands/ChatCommand.js.map +1 -1
  207. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  208. package/dist/cli/commands/ClearCommand.js +11 -12
  209. package/dist/cli/commands/ClearCommand.js.map +1 -1
  210. package/dist/cli/commands/CompleteCommand.d.ts +30 -0
  211. package/dist/cli/commands/CompleteCommand.js +374 -0
  212. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  213. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  214. package/dist/cli/commands/DebugCommand.js +54 -0
  215. package/dist/cli/commands/DebugCommand.js.map +1 -0
  216. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  217. package/dist/cli/commands/DownloadCommand.js +121 -70
  218. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  219. package/dist/cli/commands/InfillCommand.d.ts +32 -0
  220. package/dist/cli/commands/InfillCommand.js +410 -0
  221. package/dist/cli/commands/InfillCommand.js.map +1 -0
  222. package/dist/cli/commands/InitCommand.d.ts +11 -0
  223. package/dist/cli/commands/InitCommand.js +195 -0
  224. package/dist/cli/commands/InitCommand.js.map +1 -0
  225. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  226. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  227. package/dist/cli/commands/PullCommand.d.ts +12 -0
  228. package/dist/cli/commands/PullCommand.js +117 -0
  229. package/dist/cli/commands/PullCommand.js.map +1 -0
  230. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  231. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  232. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  233. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  234. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  235. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  236. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  237. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +180 -0
  238. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  239. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  240. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +626 -0
  241. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  242. package/dist/cli/projectTemplates.d.ts +7 -0
  243. package/dist/cli/projectTemplates.js +10 -0
  244. package/dist/cli/projectTemplates.js.map +1 -0
  245. package/dist/cli/recommendedModels.d.ts +2 -0
  246. package/dist/cli/recommendedModels.js +376 -0
  247. package/dist/cli/recommendedModels.js.map +1 -0
  248. package/dist/cli/startCreateCli.d.ts +2 -0
  249. package/dist/cli/startCreateCli.js +26 -0
  250. package/dist/cli/startCreateCli.js.map +1 -0
  251. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  252. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  253. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  254. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  255. package/dist/cli/utils/ConsoleTable.js +86 -0
  256. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  257. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  258. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  259. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  260. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  261. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  262. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  263. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  264. package/dist/cli/utils/getReadablePath.js +14 -0
  265. package/dist/cli/utils/getReadablePath.js.map +1 -0
  266. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  267. package/dist/cli/utils/interactivelyAskForModel.js +461 -0
  268. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  269. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  270. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  271. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  272. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  273. package/dist/cli/utils/printCommonInfoLines.js +79 -0
  274. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  275. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  276. package/dist/cli/utils/printInfoLine.js +54 -0
  277. package/dist/cli/utils/printInfoLine.js.map +1 -0
  278. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  279. package/dist/cli/utils/projectTemplates.js +47 -0
  280. package/dist/cli/utils/projectTemplates.js.map +1 -0
  281. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  282. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  283. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  284. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  285. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  286. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  287. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  288. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  289. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  290. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  291. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  292. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  293. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  294. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  295. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  296. package/dist/commands.d.ts +1 -0
  297. package/dist/commands.js +3 -0
  298. package/dist/commands.js.map +1 -1
  299. package/dist/config.d.ts +38 -5
  300. package/dist/config.js +61 -16
  301. package/dist/config.js.map +1 -1
  302. package/dist/consts.d.ts +4 -0
  303. package/dist/consts.js +11 -0
  304. package/dist/consts.js.map +1 -0
  305. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +270 -0
  306. package/dist/evaluator/LlamaChat/LlamaChat.js +1544 -0
  307. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  308. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  309. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  310. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  311. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  312. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  313. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  314. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  315. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  316. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  317. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  318. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  319. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +310 -0
  320. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +425 -0
  321. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  322. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  323. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  324. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  325. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  327. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  328. package/dist/evaluator/LlamaCompletion.d.ts +154 -0
  329. package/dist/evaluator/LlamaCompletion.js +424 -0
  330. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  331. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +42 -22
  332. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +338 -81
  333. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  334. package/dist/evaluator/LlamaContext/types.d.ts +175 -0
  335. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  336. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  337. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  338. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  339. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  340. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  341. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  342. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  343. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  344. package/dist/evaluator/LlamaGrammar.d.ts +34 -0
  345. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +15 -12
  346. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  347. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  348. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  349. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  350. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  351. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  352. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +242 -0
  353. package/dist/evaluator/LlamaModel/LlamaModel.js +765 -0
  354. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  355. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  356. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  357. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  358. package/dist/evaluator/TokenBias.d.ts +22 -0
  359. package/dist/evaluator/TokenBias.js +33 -0
  360. package/dist/evaluator/TokenBias.js.map +1 -0
  361. package/dist/evaluator/TokenMeter.d.ts +54 -0
  362. package/dist/evaluator/TokenMeter.js +86 -0
  363. package/dist/evaluator/TokenMeter.js.map +1 -0
  364. package/dist/gguf/consts.d.ts +4 -0
  365. package/dist/gguf/consts.js +12 -0
  366. package/dist/gguf/consts.js.map +1 -0
  367. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  368. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  369. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  370. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  371. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  372. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  373. package/dist/gguf/fileReaders/GgufFileReader.d.ts +37 -0
  374. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  375. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  376. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +18 -0
  377. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  378. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  379. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +23 -0
  380. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +79 -0
  381. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  382. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  383. package/dist/gguf/insights/GgufInsights.js +401 -0
  384. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  385. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +90 -0
  386. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +144 -0
  387. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  388. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  389. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  390. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  391. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  392. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  393. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  394. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  395. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  396. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  397. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  398. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  399. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  400. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  401. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  402. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  403. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  404. package/dist/gguf/parser/parseGguf.js +61 -0
  405. package/dist/gguf/parser/parseGguf.js.map +1 -0
  406. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  407. package/dist/gguf/readGgufFileInfo.js +66 -0
  408. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  409. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  410. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  411. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  412. package/dist/gguf/types/GgufMetadataTypes.d.ts +356 -0
  413. package/dist/gguf/types/GgufMetadataTypes.js +99 -0
  414. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  415. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  416. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  417. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  418. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  419. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  420. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  421. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  422. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +74 -0
  423. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  424. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  425. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  426. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  427. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  428. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  429. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  430. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  431. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  432. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  433. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  434. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  435. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  436. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  437. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  438. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  439. package/dist/index.d.ts +43 -18
  440. package/dist/index.js +38 -15
  441. package/dist/index.js.map +1 -1
  442. package/dist/state.d.ts +4 -0
  443. package/dist/state.js +14 -0
  444. package/dist/state.js.map +1 -1
  445. package/dist/types.d.ts +130 -5
  446. package/dist/types.js.map +1 -1
  447. package/dist/utils/DeepPartialObject.d.ts +3 -0
  448. package/dist/utils/DeepPartialObject.js +2 -0
  449. package/dist/utils/DeepPartialObject.js.map +1 -0
  450. package/dist/utils/DisposeGuard.d.ts +13 -0
  451. package/dist/utils/DisposeGuard.js +120 -0
  452. package/dist/utils/DisposeGuard.js.map +1 -0
  453. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  454. package/dist/utils/InsufficientMemoryError.js +6 -0
  455. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  456. package/dist/utils/LlamaText.d.ts +70 -26
  457. package/dist/utils/LlamaText.js +472 -157
  458. package/dist/utils/LlamaText.js.map +1 -1
  459. package/dist/utils/LruCache.d.ts +12 -0
  460. package/dist/utils/LruCache.js +44 -0
  461. package/dist/utils/LruCache.js.map +1 -0
  462. package/dist/utils/ReplHistory.js +5 -1
  463. package/dist/utils/ReplHistory.js.map +1 -1
  464. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  465. package/dist/utils/StopGenerationDetector.js +108 -22
  466. package/dist/utils/StopGenerationDetector.js.map +1 -1
  467. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  468. package/dist/utils/TokenStreamRegulator.js +102 -10
  469. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  470. package/dist/utils/UnsupportedError.d.ts +2 -0
  471. package/dist/utils/UnsupportedError.js +7 -0
  472. package/dist/utils/UnsupportedError.js.map +1 -0
  473. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  474. package/dist/utils/clearTempFolder.js.map +1 -1
  475. package/dist/utils/cmake.js +38 -20
  476. package/dist/utils/cmake.js.map +1 -1
  477. package/dist/utils/createModelDownloader.d.ts +111 -0
  478. package/dist/utils/createModelDownloader.js +273 -0
  479. package/dist/utils/createModelDownloader.js.map +1 -0
  480. package/dist/utils/findBestOption.d.ts +4 -0
  481. package/dist/utils/findBestOption.js +15 -0
  482. package/dist/utils/findBestOption.js.map +1 -0
  483. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  484. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  485. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  486. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  487. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  488. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  489. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  490. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  491. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  492. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  493. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  494. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  495. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  496. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  497. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  498. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  499. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  500. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  501. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  502. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  503. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  504. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  505. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  506. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  507. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  508. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  509. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  510. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  511. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  512. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  513. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  514. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  515. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  516. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  517. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  518. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  519. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  520. package/dist/utils/gbnfJson/types.d.ts +3 -0
  521. package/dist/utils/gbnfJson/types.js.map +1 -1
  522. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  523. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  524. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  525. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  526. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  527. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  528. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  529. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  530. package/dist/utils/getBuildDefaults.d.ts +1 -2
  531. package/dist/utils/getBuildDefaults.js +2 -3
  532. package/dist/utils/getBuildDefaults.js.map +1 -1
  533. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  534. package/dist/utils/getConsoleLogPrefix.js +10 -0
  535. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  536. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  537. package/dist/utils/getGrammarsFolder.js +8 -7
  538. package/dist/utils/getGrammarsFolder.js.map +1 -1
  539. package/dist/utils/getModuleVersion.d.ts +1 -0
  540. package/dist/utils/getModuleVersion.js +13 -0
  541. package/dist/utils/getModuleVersion.js.map +1 -0
  542. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  543. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  544. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  545. package/dist/utils/getReadableContextSize.d.ts +1 -0
  546. package/dist/utils/getReadableContextSize.js +7 -0
  547. package/dist/utils/getReadableContextSize.js.map +1 -0
  548. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  549. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  550. package/dist/utils/gitReleaseBundles.js +73 -5
  551. package/dist/utils/gitReleaseBundles.js.map +1 -1
  552. package/dist/utils/hashString.d.ts +1 -0
  553. package/dist/utils/hashString.js +8 -0
  554. package/dist/utils/hashString.js.map +1 -0
  555. package/dist/utils/isLockfileActive.d.ts +4 -0
  556. package/dist/utils/isLockfileActive.js +12 -0
  557. package/dist/utils/isLockfileActive.js.map +1 -0
  558. package/dist/utils/isToken.d.ts +2 -0
  559. package/dist/utils/isToken.js +4 -0
  560. package/dist/utils/isToken.js.map +1 -0
  561. package/dist/utils/isUrl.d.ts +1 -0
  562. package/dist/utils/isUrl.js +15 -0
  563. package/dist/utils/isUrl.js.map +1 -0
  564. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  565. package/dist/utils/mergeUnionTypes.js +2 -0
  566. package/dist/utils/mergeUnionTypes.js.map +1 -0
  567. package/dist/utils/parseModelFileName.d.ts +1 -0
  568. package/dist/utils/parseModelFileName.js +6 -1
  569. package/dist/utils/parseModelFileName.js.map +1 -1
  570. package/dist/utils/parseTextTemplate.d.ts +66 -0
  571. package/dist/utils/parseTextTemplate.js +116 -0
  572. package/dist/utils/parseTextTemplate.js.map +1 -0
  573. package/dist/utils/prettyPrintObject.d.ts +10 -0
  574. package/dist/utils/prettyPrintObject.js +84 -0
  575. package/dist/utils/prettyPrintObject.js.map +1 -0
  576. package/dist/utils/pushAll.d.ts +6 -0
  577. package/dist/utils/pushAll.js +11 -0
  578. package/dist/utils/pushAll.js.map +1 -0
  579. package/dist/utils/removeNullFields.d.ts +2 -1
  580. package/dist/utils/removeNullFields.js +8 -0
  581. package/dist/utils/removeNullFields.js.map +1 -1
  582. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  583. package/dist/utils/resolveGithubRelease.js +36 -0
  584. package/dist/utils/resolveGithubRelease.js.map +1 -0
  585. package/dist/utils/resolveLastTokens.d.ts +2 -0
  586. package/dist/utils/resolveLastTokens.js +12 -0
  587. package/dist/utils/resolveLastTokens.js.map +1 -0
  588. package/dist/utils/runtime.d.ts +4 -0
  589. package/dist/utils/runtime.js +8 -0
  590. package/dist/utils/runtime.js.map +1 -0
  591. package/dist/utils/safeEventCallback.d.ts +6 -0
  592. package/dist/utils/safeEventCallback.js +29 -0
  593. package/dist/utils/safeEventCallback.js.map +1 -0
  594. package/dist/utils/spawnCommand.d.ts +11 -1
  595. package/dist/utils/spawnCommand.js +56 -6
  596. package/dist/utils/spawnCommand.js.map +1 -1
  597. package/dist/utils/tokenizeInput.d.ts +3 -0
  598. package/dist/utils/tokenizeInput.js +12 -0
  599. package/dist/utils/tokenizeInput.js.map +1 -0
  600. package/dist/utils/transformPromisable.d.ts +40 -0
  601. package/dist/utils/transformPromisable.js +64 -0
  602. package/dist/utils/transformPromisable.js.map +1 -0
  603. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  604. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  605. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  606. package/dist/utils/utilTypes.d.ts +3 -0
  607. package/dist/utils/utilTypes.js +2 -0
  608. package/dist/utils/utilTypes.js.map +1 -0
  609. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  610. package/dist/utils/waitForLockfileRelease.js +20 -0
  611. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  612. package/dist/utils/withLockfile.d.ts +7 -0
  613. package/dist/utils/withLockfile.js +44 -0
  614. package/dist/utils/withLockfile.js.map +1 -0
  615. package/dist/utils/withOra.d.ts +2 -0
  616. package/dist/utils/withOra.js +22 -6
  617. package/dist/utils/withOra.js.map +1 -1
  618. package/dist/utils/withProgressLog.d.ts +23 -0
  619. package/dist/utils/withProgressLog.js +211 -0
  620. package/dist/utils/withProgressLog.js.map +1 -0
  621. package/dist/utils/withStatusLogs.d.ts +2 -1
  622. package/dist/utils/withStatusLogs.js +12 -9
  623. package/dist/utils/withStatusLogs.js.map +1 -1
  624. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  625. package/dist/utils/wrapAbortSignal.js +9 -0
  626. package/dist/utils/wrapAbortSignal.js.map +1 -0
  627. package/llama/.clang-format +1 -2
  628. package/llama/CMakeLists.txt +126 -5
  629. package/llama/addon/AddonContext.cpp +772 -0
  630. package/llama/addon/AddonContext.h +53 -0
  631. package/llama/addon/AddonGrammar.cpp +44 -0
  632. package/llama/addon/AddonGrammar.h +18 -0
  633. package/llama/addon/AddonGrammarEvaluationState.cpp +28 -0
  634. package/llama/addon/AddonGrammarEvaluationState.h +15 -0
  635. package/llama/addon/AddonModel.cpp +681 -0
  636. package/llama/addon/AddonModel.h +61 -0
  637. package/llama/addon/AddonModelData.cpp +25 -0
  638. package/llama/addon/AddonModelData.h +15 -0
  639. package/llama/addon/AddonModelLora.cpp +107 -0
  640. package/llama/addon/AddonModelLora.h +28 -0
  641. package/llama/addon/addon.cpp +216 -0
  642. package/llama/addon/addonGlobals.cpp +22 -0
  643. package/llama/addon/addonGlobals.h +12 -0
  644. package/llama/addon/globals/addonLog.cpp +135 -0
  645. package/llama/addon/globals/addonLog.h +21 -0
  646. package/llama/addon/globals/addonProgress.cpp +15 -0
  647. package/llama/addon/globals/addonProgress.h +15 -0
  648. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  649. package/llama/addon/globals/getGpuInfo.h +6 -0
  650. package/llama/binariesGithubRelease.json +1 -1
  651. package/llama/gitRelease.bundle +0 -0
  652. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  653. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  654. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  655. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  656. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  657. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  658. package/llama/grammars/README.md +297 -6
  659. package/llama/grammars/json.gbnf +4 -4
  660. package/llama/grammars/json_arr.gbnf +4 -4
  661. package/llama/llama.cpp.info.json +4 -0
  662. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  663. package/package.json +85 -54
  664. package/templates/packed/electron-typescript-react.json +1 -0
  665. package/templates/packed/node-typescript.json +1 -0
  666. package/dist/AbortError.d.ts +0 -2
  667. package/dist/AbortError.js +0 -7
  668. package/dist/AbortError.js.map +0 -1
  669. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  670. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  671. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  672. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  673. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  674. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  675. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  676. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  677. package/dist/llamaEvaluator/LlamaChat/LlamaChat.d.ts +0 -175
  678. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  679. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  680. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  681. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  682. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  683. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  684. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  685. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  686. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  687. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  688. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  689. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  690. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  691. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  692. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  693. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  694. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  695. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  696. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  697. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  698. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -28
  699. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  700. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  701. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  702. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  703. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  704. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  705. package/dist/utils/binariesGithubRelease.js.map +0 -1
  706. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  707. package/dist/utils/clearLlamaBuild.js +0 -12
  708. package/dist/utils/clearLlamaBuild.js.map +0 -1
  709. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  710. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  711. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  712. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  713. package/dist/utils/compileLLamaCpp.js +0 -132
  714. package/dist/utils/compileLLamaCpp.js.map +0 -1
  715. package/dist/utils/getBin.js +0 -78
  716. package/dist/utils/getBin.js.map +0 -1
  717. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  718. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  719. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  720. package/dist/utils/getReleaseInfo.d.ts +0 -7
  721. package/dist/utils/getReleaseInfo.js +0 -30
  722. package/dist/utils/getReleaseInfo.js.map +0 -1
  723. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  724. package/dist/utils/parseModelTypeDescription.js +0 -9
  725. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  726. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  727. package/dist/utils/resolveChatWrapper.js +0 -16
  728. package/dist/utils/resolveChatWrapper.js.map +0 -1
  729. package/dist/utils/usedBinFlag.d.ts +0 -6
  730. package/dist/utils/usedBinFlag.js +0 -15
  731. package/dist/utils/usedBinFlag.js.map +0 -1
  732. package/llama/addon.cpp +0 -814
  733. package/llama/usedBin.json +0 -3
  734. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  735. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  736. package/llamaBins/linux-x64/llama-addon.node +0 -0
  737. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  738. package/llamaBins/mac-x64/llama-addon.node +0 -0
  739. package/llamaBins/win-x64/llama-addon.exp +0 -0
  740. package/llamaBins/win-x64/llama-addon.lib +0 -0
  741. package/llamaBins/win-x64/llama-addon.node +0 -0
  742. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  743. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  744. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  745. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  746. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  747. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  748. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  749. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -0,0 +1,1544 @@
1
+ import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
+ import { removeNullFields } from "../../utils/removeNullFields.js";
3
+ import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
4
+ import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
5
+ import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
6
+ import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
7
+ import { maxRecentDetokenizerTokens, UNKNOWN_UNICODE_CHAR } from "../../consts.js";
8
+ import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
9
+ import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
10
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
11
+ import { safeEventCallback } from "../../utils/safeEventCallback.js";
12
+ import { pushAll } from "../../utils/pushAll.js";
13
+ import { resolveLastTokens } from "../../utils/resolveLastTokens.js";
14
+ import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
15
+ import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
16
+ import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
17
+ const defaultContextShiftOptions = {
18
+ size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
19
+ strategy: "eraseFirstResponseAndKeepFirstSystem",
20
+ lastEvaluationMetadata: null
21
+ };
22
+ const defaultRepeatPenaltyLastTokens = 64;
23
+ const defaultTrimWhitespaceSuffix = false;
24
+ const defaultEvaluationPriority = 5;
25
+ export class LlamaChat {
26
+ /** @internal */ _chatWrapper;
27
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
28
+ /** @internal */ _autoDisposeSequence;
29
+ /** @internal */ _chatLock = {};
30
+ /** @internal */ _sequence;
31
+ onDispose = new EventRelay();
32
+ constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
33
+ if (contextSequence == null)
34
+ throw new Error("contextSequence cannot be null");
35
+ if (contextSequence.disposed)
36
+ throw new DisposedError();
37
+ this._sequence = contextSequence;
38
+ this._autoDisposeSequence = autoDisposeSequence;
39
+ this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
40
+ this.dispose();
41
+ }));
42
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
43
+ this._chatWrapper = chatWrapper === "auto"
44
+ ? (resolveChatWrapper({
45
+ bosString: contextSequence.model.tokens.bosString,
46
+ filename: contextSequence.model.filename,
47
+ fileInfo: contextSequence.model.fileInfo,
48
+ tokenizer: contextSequence.model.tokenizer
49
+ }) ?? new GeneralChatWrapper())
50
+ : chatWrapper;
51
+ }
52
+ dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
53
+ if (this._sequence == null)
54
+ return;
55
+ if (disposeSequence)
56
+ this._sequence.dispose();
57
+ this._sequence = null;
58
+ this._disposeAggregator.dispose();
59
+ }
60
+ /** @hidden */
61
+ [Symbol.dispose]() {
62
+ return this.dispose();
63
+ }
64
+ get disposed() {
65
+ return this._sequence == null;
66
+ }
67
+ get chatWrapper() {
68
+ if (this._sequence == null)
69
+ throw new DisposedError();
70
+ return this._chatWrapper;
71
+ }
72
+ get sequence() {
73
+ if (this._sequence == null)
74
+ throw new DisposedError();
75
+ return this._sequence;
76
+ }
77
+ get context() {
78
+ return this.sequence.context;
79
+ }
80
+ get model() {
81
+ return this.sequence.model;
82
+ }
83
+ async generateResponse(history, options = {}) {
84
+ const { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
85
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
86
+ onTextChunk,
87
+ onToken,
88
+ signal,
89
+ stopOnAbortSignal,
90
+ maxTokens,
91
+ temperature,
92
+ minP,
93
+ topK,
94
+ topP,
95
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
96
+ trimWhitespaceSuffix,
97
+ repeatPenalty,
98
+ tokenBias,
99
+ evaluationPriority,
100
+ functions,
101
+ onFunctionCall,
102
+ documentFunctionParams,
103
+ maxParallelFunctionCalls,
104
+ contextShift,
105
+ customStopTriggers,
106
+ lastEvaluationContextWindow: {
107
+ history: lastEvaluationContextWindowHistory,
108
+ minimumOverlapPercentageToPreventContextShift
109
+ }
110
+ });
111
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
112
+ throw new Error("Using both grammar and functions is not supported yet");
113
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
114
+ try {
115
+ generateResponseState.ensureLastHistoryItemIsModel();
116
+ const loadContextWindow = async (avoidReloadingHistory = false) => {
117
+ await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
118
+ };
119
+ const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
120
+ // eslint-disable-next-line no-constant-condition
121
+ while (true) {
122
+ generateResponseState.startTokenLoop();
123
+ generateResponseState.canAvoidReloadingHistory = false;
124
+ await loadContextWindow();
125
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
126
+ if (generateResponseState.generatedTokens === 0) {
127
+ generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
128
+ if (generateResponseState.functionsEnabled) {
129
+ generateResponseState.initFunctions();
130
+ }
131
+ }
132
+ if (generateResponseState.functionEvaluationMode !== false) {
133
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
134
+ if (functionsCallsRes != null)
135
+ return functionsCallsRes;
136
+ await loadContextWindowForFunctionCallingLoop();
137
+ }
138
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
139
+ await generateResponseState.createNewEvaluationIterator();
140
+ while (await generateResponseState.iterateEvaluation()) {
141
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
142
+ generateResponseState.detectAndHandleFunctionStartSyntax();
143
+ if (generateResponseState.functionEvaluationMode !== false) {
144
+ generateResponseState.canAvoidReloadingHistory = false;
145
+ generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
146
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
147
+ if (functionsCallsRes != null)
148
+ return functionsCallsRes;
149
+ }
150
+ generateResponseState.recordStopGenerationEvaluation();
151
+ generateResponseState.popStreamRegulatorFreeTokens();
152
+ generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
153
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
154
+ if (stopGenerationTriggerRes != null)
155
+ return stopGenerationTriggerRes;
156
+ generateResponseState.spliceIgnoreStartTextDetectedTokens();
157
+ generateResponseState.moveFreePendingTokensToRes();
158
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
159
+ if (maxTokensTriggerRes != null)
160
+ return maxTokensTriggerRes;
161
+ if (generateResponseState.updateShouldContextShift())
162
+ break;
163
+ const abortRes = generateResponseState.handleAbortTrigger("model");
164
+ if (abortRes != null)
165
+ return abortRes;
166
+ }
167
+ generateResponseState.isFirstEvaluation = false;
168
+ if (generateResponseState.shouldContextShift)
169
+ continue;
170
+ break;
171
+ }
172
+ throw new Error("The context size is too small to generate a response");
173
+ }
174
+ finally {
175
+ generateResponseState.dispose();
176
+ }
177
+ });
178
+ }
179
+ async loadChatAndCompleteUserMessage(history, options = {}) {
180
+ const { initialUserPrompt = "", stopOnAbortSignal = false, onTextChunk, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
181
+ const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
182
+ ? null
183
+ : lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
184
+ const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
185
+ ? lastEvaluationContextWindowHistoryItem.text
186
+ : "";
187
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
188
+ onTextChunk,
189
+ onToken,
190
+ signal,
191
+ stopOnAbortSignal,
192
+ maxTokens,
193
+ temperature,
194
+ minP,
195
+ topK,
196
+ topP,
197
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
198
+ trimWhitespaceSuffix,
199
+ repeatPenalty,
200
+ tokenBias,
201
+ evaluationPriority,
202
+ functions,
203
+ documentFunctionParams,
204
+ contextShift,
205
+ customStopTriggers,
206
+ lastEvaluationContextWindow: {
207
+ history: lastEvaluationContextWindowHistory == null
208
+ ? undefined
209
+ : setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
210
+ minimumOverlapPercentageToPreventContextShift
211
+ }
212
+ });
213
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
214
+ try {
215
+ generateResponseState.ensureLastHistoryItemIsUser();
216
+ const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1];
217
+ const initialUserMessage = lastResolvedHistoryItem?.type === "user"
218
+ ? lastResolvedHistoryItem.text
219
+ : "";
220
+ // eslint-disable-next-line no-constant-condition
221
+ while (true) {
222
+ generateResponseState.startTokenLoop();
223
+ const { userTextSuffix } = await generateResponseState.loadContextWindow(setLastUserTextInChatHistory(generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)), true);
224
+ generateResponseState.functionEvaluationMode = false;
225
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
226
+ if (userTextSuffix != null && userTextSuffix.values.length > 0)
227
+ generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
228
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
229
+ if (generateResponseState.maxTokens === 0) {
230
+ await generateResponseState.evaluateWithoutGeneratingNewTokens();
231
+ return {
232
+ completion: "",
233
+ lastEvaluation: {
234
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
235
+ contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
236
+ },
237
+ metadata: {
238
+ stopReason: "maxTokens"
239
+ }
240
+ };
241
+ }
242
+ await generateResponseState.createNewEvaluationIterator();
243
+ while (await generateResponseState.iterateEvaluation()) {
244
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
245
+ generateResponseState.recordStopGenerationEvaluation();
246
+ generateResponseState.popStreamRegulatorFreeTokens();
247
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
248
+ if (stopGenerationTriggerRes != null)
249
+ return {
250
+ completion: stopGenerationTriggerRes.response,
251
+ lastEvaluation: {
252
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
253
+ contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
254
+ },
255
+ metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
256
+ ? stopGenerationTriggerRes.metadata
257
+ : stopGenerationTriggerRes.metadata
258
+ };
259
+ generateResponseState.moveFreePendingTokensToRes(false);
260
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
261
+ if (maxTokensTriggerRes != null)
262
+ return {
263
+ completion: maxTokensTriggerRes.response,
264
+ lastEvaluation: {
265
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
266
+ contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
267
+ },
268
+ metadata: maxTokensTriggerRes.metadata
269
+ };
270
+ if (generateResponseState.updateShouldContextShift())
271
+ break;
272
+ const abortRes = generateResponseState.handleAbortTrigger("user");
273
+ if (abortRes != null)
274
+ return {
275
+ completion: abortRes.response,
276
+ lastEvaluation: {
277
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
278
+ contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
279
+ },
280
+ metadata: abortRes.metadata
281
+ };
282
+ }
283
+ generateResponseState.isFirstEvaluation = false;
284
+ if (generateResponseState.shouldContextShift)
285
+ continue;
286
+ break;
287
+ }
288
+ throw new Error("The context size is too small to generate a completion");
289
+ }
290
+ finally {
291
+ generateResponseState.dispose();
292
+ }
293
+ });
294
+ }
295
+ }
296
+ function removeRawFromHistoryItem(historyItem) {
297
+ if (historyItem.type === "model") {
298
+ const newHistoryItem = { ...historyItem };
299
+ newHistoryItem.response = newHistoryItem.response.map((item) => {
300
+ if (typeof item === "string")
301
+ return item;
302
+ else
303
+ return {
304
+ ...item,
305
+ rawCall: undefined
306
+ };
307
+ });
308
+ return newHistoryItem;
309
+ }
310
+ return historyItem;
311
+ }
312
+ async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
313
+ function checkIfHistoryFitsContext(history) {
314
+ const { contextText } = chatWrapper.generateContextState({
315
+ chatHistory: history,
316
+ availableFunctions: functions,
317
+ documentFunctionParams
318
+ });
319
+ const tokens = contextText.tokenize(tokenizer);
320
+ return tokens.length <= contextSize - contextShiftSize;
321
+ }
322
+ if (contextSize - contextShiftSize <= 0)
323
+ throw new Error(`The context size (${contextSize}) is too small to fit the context shift size (${contextShiftSize})`);
324
+ if (checkIfHistoryFitsContext(history))
325
+ return {
326
+ compressedHistory: history,
327
+ metadata: null
328
+ };
329
+ if (contextShiftStrategy instanceof Function) {
330
+ try {
331
+ const { chatHistory, metadata } = await contextShiftStrategy({
332
+ chatHistory: history,
333
+ maxTokensCount: contextSize - contextShiftSize,
334
+ tokenizer,
335
+ chatWrapper,
336
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
337
+ });
338
+ if (checkIfHistoryFitsContext(chatHistory))
339
+ return {
340
+ compressedHistory: chatHistory,
341
+ metadata
342
+ };
343
+ console.warn("The provided context shift strategy did not return a history that fits the context size. " +
344
+ "Using the default strategy instead.");
345
+ }
346
+ catch (err) {
347
+ console.error("The provided context shift strategy threw an error. " +
348
+ "Using the default strategy instead.", err);
349
+ }
350
+ }
351
+ else if (contextShiftStrategy !== "eraseFirstResponseAndKeepFirstSystem")
352
+ console.warn(`Unknown context shift strategy "${contextShiftStrategy}". ` +
353
+ "Using the default strategy instead.");
354
+ const { chatHistory, metadata } = await eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({
355
+ chatHistory: history,
356
+ maxTokensCount: contextSize - contextShiftSize,
357
+ tokenizer,
358
+ chatWrapper,
359
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
360
+ });
361
+ if (!checkIfHistoryFitsContext(chatHistory))
362
+ throw new Error("The default context shift strategy did not return a history that fits the context size. " +
363
+ "This may happen due to the system prompt being too long");
364
+ return {
365
+ compressedHistory: chatHistory,
366
+ metadata
367
+ };
368
+ }
369
+ function getLastTextModelResponseFromChatHistory(chatHistory) {
370
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
371
+ return "";
372
+ const lastModelResponseItem = chatHistory[chatHistory.length - 1];
373
+ const modelResponse = lastModelResponseItem.response;
374
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
375
+ return modelResponse[modelResponse.length - 1];
376
+ return "";
377
+ }
378
+ function getLastUserTextFromChatHistory(chatHistory) {
379
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
380
+ return "";
381
+ return chatHistory[chatHistory.length - 1].text;
382
+ }
383
+ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
384
+ const newChatHistory = chatHistory.slice();
385
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
386
+ newChatHistory.push({
387
+ type: "model",
388
+ response: []
389
+ });
390
+ const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
391
+ const newLastModelResponseItem = { ...lastModelResponseItem };
392
+ newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
393
+ const modelResponse = newLastModelResponseItem.response.slice();
394
+ newLastModelResponseItem.response = modelResponse;
395
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
396
+ if (textResponse === "")
397
+ modelResponse.pop();
398
+ else
399
+ modelResponse[modelResponse.length - 1] = textResponse;
400
+ }
401
+ else if (textResponse !== "")
402
+ modelResponse.push(textResponse);
403
+ return newChatHistory;
404
+ }
405
+ function setLastUserTextInChatHistory(chatHistory, userText) {
406
+ const newChatHistory = chatHistory.slice();
407
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
408
+ newChatHistory.push({
409
+ type: "user",
410
+ text: ""
411
+ });
412
+ const lastUserItem = newChatHistory[newChatHistory.length - 1];
413
+ const newLastUserItem = { ...lastUserItem };
414
+ newChatHistory[newChatHistory.length - 1] = newLastUserItem;
415
+ newLastUserItem.text = userText;
416
+ return newChatHistory;
417
+ }
418
+ function setLastTextInChatHistory(itemType, chatHistory, text) {
419
+ if (itemType === "user")
420
+ return setLastUserTextInChatHistory(chatHistory, text);
421
+ else
422
+ return setLastModelTextResponseInChatHistory(chatHistory, text);
423
+ }
424
+ function generateContextText(endWithUserText, chatWrapper, options) {
425
+ if (endWithUserText)
426
+ return generateContextTextThatEndsWithUserText(chatWrapper, options);
427
+ return chatWrapper.generateContextState(options);
428
+ }
429
+ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
430
+ const lastUserText = getLastUserTextFromChatHistory(options.chatHistory);
431
+ const randomId = "W" + (Math.random()
432
+ .toString(36)
433
+ .slice(2)) + "W";
434
+ const { contextText, ...rest } = chatWrapper.generateContextState({
435
+ ...options,
436
+ chatHistory: setLastUserTextInChatHistory(options.chatHistory, lastUserText + randomId)
437
+ });
438
+ let newContextText = contextText;
439
+ for (let i = 0; i < newContextText.values.length; i++) {
440
+ const item = newContextText.values[i];
441
+ if (typeof item !== "string")
442
+ continue;
443
+ const randomTextIndex = item.indexOf(randomId);
444
+ if (randomTextIndex < 0)
445
+ continue;
446
+ const newValue = item.slice(0, randomTextIndex);
447
+ newContextText = LlamaText([
448
+ ...newContextText.values.slice(0, i),
449
+ newValue
450
+ ]);
451
+ return {
452
+ contextText: newContextText,
453
+ userTextSuffix: LlamaText([
454
+ item.slice(randomTextIndex + randomId.length),
455
+ ...newContextText.values.slice(i + 1)
456
+ ]),
457
+ ...rest
458
+ };
459
+ }
460
+ throw new Error("The random ID was not found in the context text. " +
461
+ `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
462
+ "where not all user messages are properly added to the the result LlamaText");
463
+ }
464
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
465
+ if (sequence == null)
466
+ throw new DisposedError();
467
+ const model = sequence.model;
468
+ const context = sequence.context;
469
+ if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
470
+ const newContextWindow = lastEvaluationContextWindowHistory.slice();
471
+ if (endWithUserText) {
472
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
473
+ newContextWindow.push({
474
+ type: "user",
475
+ text: ""
476
+ });
477
+ }
478
+ else if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
479
+ newContextWindow.push({
480
+ type: "model",
481
+ response: []
482
+ });
483
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
484
+ chatHistory: newContextWindow,
485
+ availableFunctions: functions,
486
+ documentFunctionParams
487
+ });
488
+ const tokens = contextText.tokenize(model.tokenizer);
489
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
490
+ const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
491
+ const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
492
+ if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
493
+ return {
494
+ history: newContextWindow,
495
+ stopGenerationTriggers,
496
+ tokens,
497
+ newResolvedHistory: resolvedHistory,
498
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
499
+ ignoreStartText: ignoreStartText ?? [],
500
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
501
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
502
+ userTextSuffix
503
+ };
504
+ }
505
+ }
506
+ resolvedHistory = sequence.isLoadedToMemory
507
+ ? resolvedHistory.slice()
508
+ : resolvedHistory.map(removeRawFromHistoryItem);
509
+ if (resolvedContextShift.lastEvaluationMetadata != null) {
510
+ const contextShiftSize = resolvedContextShift.size instanceof Function
511
+ ? await resolvedContextShift.size(sequence)
512
+ : resolvedContextShift.size;
513
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
514
+ history: resolvedHistory,
515
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
516
+ contextShiftStrategy: resolvedContextShift.strategy,
517
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
518
+ contextSize: context.contextSize,
519
+ tokenizer: model.tokenizer,
520
+ chatWrapper: chatWrapper,
521
+ functions,
522
+ documentFunctionParams
523
+ });
524
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
525
+ chatHistory: compressedHistory,
526
+ availableFunctions: functions,
527
+ documentFunctionParams
528
+ });
529
+ return {
530
+ history: compressedHistory,
531
+ stopGenerationTriggers,
532
+ tokens: contextText.tokenize(model.tokenizer),
533
+ newResolvedHistory: resolvedHistory,
534
+ newHistoryCompressionMetadata: metadata,
535
+ ignoreStartText: ignoreStartText ?? [],
536
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
537
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
538
+ userTextSuffix
539
+ };
540
+ }
541
+ {
542
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
543
+ chatHistory: resolvedHistory,
544
+ availableFunctions: functions,
545
+ documentFunctionParams
546
+ });
547
+ const tokens = contextText.tokenize(model.tokenizer);
548
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
549
+ return {
550
+ history: resolvedHistory,
551
+ stopGenerationTriggers,
552
+ tokens,
553
+ newResolvedHistory: resolvedHistory,
554
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
555
+ ignoreStartText: ignoreStartText ?? [],
556
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
557
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
558
+ userTextSuffix
559
+ };
560
+ }
561
+ const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
562
+ ? await resolvedContextShift.size(sequence)
563
+ : resolvedContextShift.size)));
564
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
565
+ history: resolvedHistory,
566
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
567
+ contextShiftStrategy: resolvedContextShift.strategy,
568
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
569
+ contextSize: context.contextSize,
570
+ tokenizer: model.tokenizer,
571
+ chatWrapper: chatWrapper,
572
+ functions,
573
+ documentFunctionParams
574
+ });
575
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
576
+ chatHistory: compressedHistory,
577
+ availableFunctions: functions,
578
+ documentFunctionParams
579
+ });
580
+ return {
581
+ history: compressedHistory,
582
+ stopGenerationTriggers,
583
+ tokens: contextText.tokenize(model.tokenizer),
584
+ newResolvedHistory: resolvedHistory,
585
+ newHistoryCompressionMetadata: metadata,
586
+ ignoreStartText: ignoreStartText ?? [],
587
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
588
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
589
+ userTextSuffix
590
+ };
591
+ }
592
+ class GenerateResponseState {
593
+ llamaChat;
594
+ chatWrapper;
595
+ history;
596
+ onTextChunk;
597
+ onToken;
598
+ signal;
599
+ stopOnAbortSignal;
600
+ maxTokens;
601
+ temperature;
602
+ minP;
603
+ topK;
604
+ topP;
605
+ grammar;
606
+ trimWhitespaceSuffix;
607
+ tokenBias;
608
+ evaluationPriority;
609
+ functions;
610
+ onFunctionCall;
611
+ documentFunctionParams;
612
+ maxParallelFunctionCalls;
613
+ contextShift;
614
+ customStopTriggers;
615
+ lastEvaluationContextWindowHistory;
616
+ minimumOverlapPercentageToPreventContextShift;
617
+ functionsEnabled;
618
+ repeatPenaltyEnabled;
619
+ resolvedContextShift;
620
+ resolvedRepeatPenalty;
621
+ lastModelResponse;
622
+ grammarEvaluationState;
623
+ functionNameGrammar;
624
+ functionsGrammar;
625
+ functionsEvaluationState;
626
+ streamRegulator = new TokenStreamRegulator();
627
+ stopGenerationDetector = new StopGenerationDetector();
628
+ customStopGenerationTriggersDetector = new StopGenerationDetector();
629
+ functionSyntaxStartDetector = new StopGenerationDetector();
630
+ disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
631
+ ignoreStartTextDetector = new StopGenerationDetector();
632
+ locksToReleaseOnValidGeneration = [];
633
+ resolvedHistory;
634
+ res = [];
635
+ pendingTokens = [];
636
+ ignoredStartTextTokens = [];
637
+ resFunctionCalls = [];
638
+ functionEvaluationMode = false;
639
+ currentFunctionCallPreviousText = LlamaText([]);
640
+ currentFunctionCallCurrentPartTokens = [];
641
+ functionEvaluationFunctionName = "";
642
+ currentFunctionCallPreviousPartLeftoverText = "";
643
+ removedStartTextToIgnore = false;
644
+ releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
645
+ generatedTokens = 0;
646
+ isFirstEvaluation = true;
647
+ initiallyEngagedFunctionMode = false;
648
+ lastContextWindowHistory;
649
+ lastHistoryCompressionMetadata;
650
+ restartEvaluationIterator = false;
651
+ // context shift loop
652
+ shouldContextShift = false;
653
+ canAvoidReloadingHistory = false;
654
+ contextWindowTokens = [];
655
+ stopGenerationTriggers = [];
656
+ ignoreStartText = [];
657
+ functionCallInitiallyEngaged = false;
658
+ disengageInitiallyEngagedFunctionCall = [];
659
+ userTextSuffix = undefined;
660
+ tokens = [];
661
+ contextWindowLastModelResponse = "";
662
+ contextWindowsRes = [];
663
+ // token evaluation loop
664
+ evaluationIterator;
665
+ currentIteration;
666
+ currentIterationReplacementToken;
667
+ currentToken;
668
+ currentTokens = [];
669
+ currentText = "";
670
+ currentQueuedTokenRelease;
671
+ constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
672
+ this.llamaChat = llamaChat;
673
+ this.chatWrapper = chatWrapper;
674
+ this.history = history;
675
+ this.onTextChunk = safeEventCallback(onTextChunk);
676
+ this.onToken = safeEventCallback(onToken);
677
+ this.signal = signal;
678
+ this.stopOnAbortSignal = stopOnAbortSignal;
679
+ this.maxTokens = maxTokens;
680
+ this.temperature = temperature;
681
+ this.minP = minP;
682
+ this.topK = topK;
683
+ this.topP = topP;
684
+ this.grammar = grammar;
685
+ this.trimWhitespaceSuffix = trimWhitespaceSuffix;
686
+ this.tokenBias = tokenBias;
687
+ this.evaluationPriority = evaluationPriority;
688
+ this.functions = functions;
689
+ this.onFunctionCall = safeEventCallback(onFunctionCall);
690
+ this.documentFunctionParams = documentFunctionParams;
691
+ this.maxParallelFunctionCalls = maxParallelFunctionCalls;
692
+ this.contextShift = contextShift;
693
+ this.customStopTriggers = customStopTriggers;
694
+ this.lastEvaluationContextWindowHistory = lastEvaluationContextWindowHistory;
695
+ this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
696
+ this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
697
+ if (this.signal?.aborted)
698
+ throw this.signal.reason;
699
+ if (this.llamaChat.disposed)
700
+ throw new DisposedError();
701
+ this.resolvedHistory = this.llamaChat.sequence.isLoadedToMemory
702
+ ? this.history.slice()
703
+ : this.history.map(removeRawFromHistoryItem);
704
+ this.resolvedContextShift = {
705
+ ...defaultContextShiftOptions,
706
+ ...removeNullFields(this.contextShift)
707
+ };
708
+ this.resolvedRepeatPenalty = repeatPenalty === false
709
+ ? { lastTokens: 0 }
710
+ : {
711
+ ...(repeatPenalty ?? {}),
712
+ lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
713
+ };
714
+ this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
715
+ this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
716
+ this.grammarEvaluationState = this.grammar != null
717
+ ? new LlamaGrammarEvaluationState({ grammar: this.grammar })
718
+ : undefined;
719
+ this.functionNameGrammar = this.functionsEnabled
720
+ ? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper)
721
+ : undefined;
722
+ this.functionsGrammar = undefined;
723
+ this.functionsEvaluationState = undefined;
724
+ this.lastContextWindowHistory = this.resolvedHistory;
725
+ this.lastHistoryCompressionMetadata = this.resolvedContextShift;
726
+ if (this.customStopTriggers != null)
727
+ StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
728
+ .map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
729
+ if (this.grammar != null)
730
+ StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
731
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
732
+ if (this.functions != null && Object.keys(this.functions).length > 0)
733
+ this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
734
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
735
+ this.chatWrapper.settings.functions.call.prefix
736
+ ]), this.llamaChat.model.tokenizer));
737
+ this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
738
+ }
739
+ dispose() {
740
+ }
741
+ [Symbol.dispose]() {
742
+ this.dispose();
743
+ }
744
+ ensureLastHistoryItemIsModel() {
745
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "model")
746
+ this.resolvedHistory.push({
747
+ type: "model",
748
+ response: []
749
+ });
750
+ }
751
+ ensureLastHistoryItemIsUser() {
752
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "user")
753
+ this.resolvedHistory.push({
754
+ type: "user",
755
+ text: ""
756
+ });
757
+ }
758
+ ensureNotAborted() {
759
+ if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
760
+ throw this.signal.reason;
761
+ if (this.llamaChat.disposed)
762
+ throw new DisposedError();
763
+ }
764
+ getPenaltyTokens() {
765
+ if (this.llamaChat.disposed)
766
+ throw new DisposedError();
767
+ let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
768
+ if (this.resolvedRepeatPenalty.punishTokensFilter != null)
769
+ punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
770
+ if (this.resolvedRepeatPenalty.penalizeNewLine == null || !this.resolvedRepeatPenalty.penalizeNewLine) {
771
+ const nlToken = this.llamaChat.model.tokens.nl;
772
+ if (nlToken != null)
773
+ punishTokens = punishTokens.filter(token => token !== nlToken);
774
+ }
775
+ return punishTokens;
776
+ }
777
+ getResolvedHistoryWithCurrentModelResponse() {
778
+ if (this.res.length === 0)
779
+ return this.resolvedHistory;
780
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
781
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
782
+ modelResponse = modelResponse.trimEnd();
783
+ if (modelResponse === "")
784
+ return this.resolvedHistory;
785
+ return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
786
+ }
787
+ removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
788
+ if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
789
+ this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops)) {
790
+ this.ignoreStartTextDetector.clearInProgressStops();
791
+ this.ignoreStartTextDetector.clearTriggeredStops();
792
+ let mostExhaustiveTriggeredStops = null;
793
+ let mostExhaustiveTriggeredStopsLeftoverTokens = [];
794
+ const lastTokensForDetokenizer = resolveLastTokens([
795
+ this.contextWindowTokens,
796
+ this.ignoredStartTextTokens
797
+ ]);
798
+ for (let i = 0; i < this.pendingTokens.length; i++) {
799
+ this.ignoreStartTextDetector.recordGeneration({
800
+ text: this.llamaChat.model.detokenize([this.pendingTokens[i]], false, lastTokensForDetokenizer),
801
+ tokens: [this.pendingTokens[i]],
802
+ startNewChecks: i === 0,
803
+ triggerMustStartWithGeneration: true
804
+ });
805
+ lastTokensForDetokenizer.push(this.pendingTokens[i]);
806
+ if (this.ignoreStartTextDetector.hasTriggeredStops) {
807
+ mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
808
+ this.ignoreStartTextDetector.clearTriggeredStops();
809
+ mostExhaustiveTriggeredStopsLeftoverTokens = this.pendingTokens.slice(i + 1);
810
+ }
811
+ else if (!this.ignoreStartTextDetector.hasInProgressStops)
812
+ break;
813
+ }
814
+ if (mostExhaustiveTriggeredStops != null) {
815
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
816
+ if (mostExhaustiveTriggeredStop != null) {
817
+ this.ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
818
+ .map((stopTrigger) => {
819
+ if (typeof stopTrigger === "string")
820
+ return this.llamaChat.model.tokenize(stopTrigger, false, "trimLeadingSpace");
821
+ else
822
+ return [stopTrigger];
823
+ })
824
+ .flat(1);
825
+ const newPendingTokens = [
826
+ ...mostExhaustiveTriggeredStop.remainingGeneration,
827
+ mostExhaustiveTriggeredStopsLeftoverTokens
828
+ ]
829
+ .map((generation) => {
830
+ if (typeof generation === "string")
831
+ return this.llamaChat.model.tokenize(generation, false, "trimLeadingSpace");
832
+ else
833
+ return generation;
834
+ })
835
+ .flat(1);
836
+ this.pendingTokens.length = 0;
837
+ pushAll(this.pendingTokens, newPendingTokens);
838
+ this.removedStartTextToIgnore = true;
839
+ }
840
+ }
841
+ }
842
+ }
843
+ startTokenLoop() {
844
+ this.ensureNotAborted();
845
+ this.shouldContextShift = false;
846
+ }
847
+ getContextWindowFunctionCallsTokens() {
848
+ if (this.functionEvaluationMode === false)
849
+ return [];
850
+ else if (this.functionEvaluationMode === "prefixOrDisengage")
851
+ return [
852
+ ...LlamaText(this.currentFunctionCallPreviousText).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
853
+ ...this.currentFunctionCallCurrentPartTokens
854
+ ];
855
+ const text = [];
856
+ if (this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix != null)
857
+ text.push(this.chatWrapper.settings.functions.parallelism.call.sectionPrefix);
858
+ for (let i = 0; i < this.resFunctionCalls.length; i++) {
859
+ const call = this.resFunctionCalls[i];
860
+ if (i > 0)
861
+ text.push(this.chatWrapper.settings.functions?.parallelism?.call?.betweenCalls ?? "");
862
+ text.push(call.raw);
863
+ }
864
+ text.push(this.currentFunctionCallPreviousText);
865
+ return [
866
+ ...LlamaText(text).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
867
+ ...this.currentFunctionCallCurrentPartTokens
868
+ ];
869
+ }
870
+ async loadContextWindow(resolvedHistory, endWithUserText = false, avoidReloadingHistory = false) {
871
+ const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
872
+ const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
873
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
874
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
875
+ resolvedHistory: resolvedHistory,
876
+ resolvedContextShift: this.resolvedContextShift,
877
+ lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
878
+ pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
879
+ isFirstEvaluation: this.isFirstEvaluation,
880
+ chatWrapper: this.chatWrapper,
881
+ lastEvaluationContextWindowHistory: this.lastEvaluationContextWindowHistory,
882
+ minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
883
+ sequence: this.llamaChat.sequence,
884
+ minFreeContextTokens: 1,
885
+ functions: this.functionsEnabled ? this.functions : undefined,
886
+ documentFunctionParams: this.documentFunctionParams,
887
+ endWithUserText
888
+ });
889
+ this.ensureNotAborted();
890
+ this.contextWindowTokens = contextWindowTokens;
891
+ this.stopGenerationTriggers = stopGenerationTriggers;
892
+ this.ignoreStartText = ignoreStartText;
893
+ this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
894
+ this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
895
+ this.userTextSuffix = userTextSuffix;
896
+ this.resolvedHistory = newResolvedHistory;
897
+ this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
898
+ this.lastContextWindowHistory = contextWindowHistory;
899
+ this.contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
900
+ this.contextWindowsRes = [];
901
+ this.canAvoidReloadingHistory = true;
902
+ }
903
+ this.tokens = [
904
+ ...this.contextWindowTokens,
905
+ ...this.ignoredStartTextTokens,
906
+ ...this.pendingTokens,
907
+ ...queuedChunkTokens,
908
+ ...functionCallsTokens
909
+ ];
910
+ if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
911
+ return await this.loadContextWindow(resolvedHistory, endWithUserText, false);
912
+ return {
913
+ userTextSuffix: this.userTextSuffix
914
+ };
915
+ }
916
+ addIgnoreStartTextTriggersFromChatWrapper() {
917
+ StopGenerationDetector.resolveStopTriggers(this.ignoreStartText, this.llamaChat.model.tokenizer)
918
+ .map((stopTrigger) => this.ignoreStartTextDetector.addStopTrigger(stopTrigger));
919
+ }
920
+ addStopGenerationTriggersFromChatWrapper() {
921
+ StopGenerationDetector.resolveStopTriggers(this.stopGenerationTriggers, this.llamaChat.model.tokenizer)
922
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
923
+ }
924
+ initFunctions() {
925
+ this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
926
+ if (this.initiallyEngagedFunctionMode) {
927
+ StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
928
+ .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
929
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggers) {
930
+ this.functionEvaluationMode = "prefixOrDisengage";
931
+ this.functionsGrammar = undefined;
932
+ this.functionsEvaluationState = undefined;
933
+ }
934
+ else {
935
+ this.functionEvaluationMode = "functionName";
936
+ }
937
+ this.restartEvaluationIterator = true;
938
+ }
939
+ }
940
+ async enterFunctionCallingLoop(loadContextWindow) {
941
+ if (!this.functionsEnabled) {
942
+ this.functionEvaluationMode = false;
943
+ return undefined;
944
+ }
945
+ // eslint-disable-next-line no-constant-condition
946
+ while (true) {
947
+ if (this.functionEvaluationMode === "prefixOrDisengage") {
948
+ this.functionsGrammar = undefined;
949
+ this.functionsEvaluationState = undefined;
950
+ this.currentFunctionCallPreviousText = LlamaText([]);
951
+ this.currentFunctionCallCurrentPartTokens.length = 0;
952
+ const prefixTokens = LlamaText(this.chatWrapper.settings.functions.call.prefix)
953
+ .tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
954
+ const prefixDetector = new StopGenerationDetector();
955
+ const prefixDetectorRecordedTokens = [];
956
+ const afterPrefixLeftoverTokens = [];
957
+ prefixDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(this.chatWrapper.settings.functions.call.prefix), this.llamaChat.model.tokenizer));
958
+ const lastTokensForDetokenizer = this.streamRegulator.getLastQueuedChunkTokens();
959
+ for (const prefixToken of prefixTokens) {
960
+ const tokens = [prefixToken];
961
+ const text = this.llamaChat.model.detokenize(tokens, false, lastTokensForDetokenizer);
962
+ pushAll(lastTokensForDetokenizer, tokens);
963
+ const disregardedPossibilities = this.disengageInitiallyEngagedFunctionMode
964
+ .getDisregardedPossibilitiesCountForAGeneration({
965
+ text,
966
+ tokens,
967
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 0
968
+ });
969
+ if (disregardedPossibilities > 0)
970
+ break;
971
+ this.currentFunctionCallCurrentPartTokens.push(prefixToken);
972
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
973
+ text: text,
974
+ tokens: tokens,
975
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
976
+ triggerMustStartWithGeneration: true
977
+ });
978
+ if (prefixDetector.hasTriggeredStops)
979
+ afterPrefixLeftoverTokens.push(prefixToken);
980
+ else {
981
+ prefixDetector.recordGeneration({
982
+ text: text,
983
+ tokens: tokens,
984
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
985
+ triggerMustStartWithGeneration: true
986
+ });
987
+ pushAll(prefixDetectorRecordedTokens, tokens);
988
+ }
989
+ }
990
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
991
+ const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
992
+ if (stopGenerationTriggerRes != null)
993
+ return stopGenerationTriggerRes;
994
+ this.currentFunctionCallCurrentPartTokens.push(token);
995
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
996
+ text: this.currentText,
997
+ tokens: this.currentTokens,
998
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
999
+ triggerMustStartWithGeneration: true
1000
+ });
1001
+ if (prefixDetector.hasTriggeredStops)
1002
+ afterPrefixLeftoverTokens.push(token);
1003
+ else {
1004
+ prefixDetector.recordGeneration({
1005
+ text: this.currentText,
1006
+ tokens: this.currentTokens,
1007
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
1008
+ triggerMustStartWithGeneration: true
1009
+ });
1010
+ pushAll(prefixDetectorRecordedTokens, this.currentTokens);
1011
+ }
1012
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops ||
1013
+ !this.disengageInitiallyEngagedFunctionMode.hasInProgressStops)
1014
+ break;
1015
+ }
1016
+ const abortRes = this.handleAbortTrigger("model");
1017
+ if (abortRes != null)
1018
+ return abortRes;
1019
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
1020
+ const lastTokensForDetokenizer = this.streamRegulator.getLastQueuedChunkTokens();
1021
+ for (const token of this.currentFunctionCallCurrentPartTokens) {
1022
+ this.currentToken = token;
1023
+ this.currentTokens = [this.currentToken];
1024
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, lastTokensForDetokenizer);
1025
+ pushAll(lastTokensForDetokenizer, this.currentTokens);
1026
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1027
+ tokens: this.currentTokens,
1028
+ text: this.currentText
1029
+ });
1030
+ this.recordStopGenerationEvaluation();
1031
+ }
1032
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1033
+ this.functionEvaluationMode = false;
1034
+ return undefined;
1035
+ }
1036
+ if (prefixDetector.hasTriggeredStops) {
1037
+ const triggeredStops = prefixDetector.getTriggeredStops();
1038
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1039
+ this.currentFunctionCallPreviousPartLeftoverText = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, stopTrigger, this.llamaChat.model.tokenizer) + this.llamaChat.model.detokenize(afterPrefixLeftoverTokens, false, prefixDetectorRecordedTokens);
1040
+ }
1041
+ else
1042
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1043
+ this.functionEvaluationMode = "functionName";
1044
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1045
+ continue;
1046
+ }
1047
+ else if (this.functionEvaluationMode === "functionName") {
1048
+ const functionNameGenerationDoneDetector = new StopGenerationDetector();
1049
+ this.stopGenerationDetector.clearInProgressStops();
1050
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1051
+ this.currentFunctionCallPreviousText = LlamaText(this.chatWrapper.settings.functions.call.prefix);
1052
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1053
+ const functionNameGrammar = this.functionNameGrammar ?? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper);
1054
+ this.functionsGrammar = functionNameGrammar;
1055
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1056
+ grammar: this.functionsGrammar
1057
+ });
1058
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1059
+ .map((stopTrigger) => functionNameGenerationDoneDetector.addStopTrigger(stopTrigger));
1060
+ if (this.currentFunctionCallPreviousPartLeftoverText !== "") {
1061
+ const validFunctionNames = Object.keys(this.functions);
1062
+ const hasAnyFunctionStartWithLeftover = validFunctionNames.some((functionName) => functionName.startsWith(this.currentFunctionCallPreviousPartLeftoverText));
1063
+ if (hasAnyFunctionStartWithLeftover) {
1064
+ const leftoverTokens = this.llamaChat.model.tokenize(this.currentFunctionCallPreviousPartLeftoverText, false, "trimLeadingSpace");
1065
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1066
+ const lastTokens = [];
1067
+ for (const leftoverToken of leftoverTokens) {
1068
+ const canBeNextToken = this.llamaChat.context._canBeNextTokenForGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1069
+ if (!canBeNextToken)
1070
+ break;
1071
+ this.llamaChat.context._acceptTokenOnGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1072
+ this.currentFunctionCallCurrentPartTokens.push(leftoverToken);
1073
+ functionNameGenerationDoneDetector.recordGeneration({
1074
+ text: this.llamaChat.model.detokenize([leftoverToken], false, lastTokens),
1075
+ tokens: [leftoverToken]
1076
+ });
1077
+ lastTokens.push(leftoverToken);
1078
+ }
1079
+ }
1080
+ }
1081
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1082
+ this.currentFunctionCallCurrentPartTokens.push(token);
1083
+ functionNameGenerationDoneDetector.recordGeneration({
1084
+ text: this.currentText,
1085
+ tokens: this.currentTokens
1086
+ });
1087
+ if (functionNameGenerationDoneDetector.hasTriggeredStops)
1088
+ break;
1089
+ }
1090
+ const abortRes = this.handleAbortTrigger("model");
1091
+ if (abortRes != null)
1092
+ return abortRes;
1093
+ const functionCallNameText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1094
+ const functionName = functionNameGrammar.parseFunctionName(functionCallNameText);
1095
+ this.functionEvaluationFunctionName = functionName;
1096
+ this.functionEvaluationMode = "params";
1097
+ continue;
1098
+ }
1099
+ else if (this.functionEvaluationMode === "params") {
1100
+ this.currentFunctionCallPreviousText = LlamaText([
1101
+ this.chatWrapper.settings.functions.call.prefix,
1102
+ this.functionEvaluationFunctionName,
1103
+ this.chatWrapper.settings.functions.call.paramsPrefix
1104
+ ]);
1105
+ const lastPartTokens = resolveLastTokens([this.currentFunctionCallCurrentPartTokens]);
1106
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1107
+ let params = undefined;
1108
+ let paramsText = "";
1109
+ const functionDefinition = this.functions[this.functionEvaluationFunctionName];
1110
+ if (functionDefinition == null)
1111
+ throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
1112
+ else if (functionDefinition.params == null) {
1113
+ params = undefined;
1114
+ paramsText = "";
1115
+ }
1116
+ else {
1117
+ const functionParamsGenerationDoneDetector = new StopGenerationDetector();
1118
+ const functionParamsGrammar = new FunctionCallParamsGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper, this.functionEvaluationFunctionName, functionDefinition.params);
1119
+ this.functionsGrammar = functionParamsGrammar;
1120
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1121
+ grammar: this.functionsGrammar
1122
+ });
1123
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1124
+ .map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
1125
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1126
+ this.currentFunctionCallCurrentPartTokens.push(token);
1127
+ functionParamsGenerationDoneDetector.recordGeneration({
1128
+ text: this.currentText,
1129
+ tokens: this.currentTokens
1130
+ });
1131
+ if (functionParamsGenerationDoneDetector.hasTriggeredStops)
1132
+ break;
1133
+ }
1134
+ const abortRes = this.handleAbortTrigger("model");
1135
+ if (abortRes != null)
1136
+ return abortRes;
1137
+ const functionCallParamsText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens, false, lastPartTokens);
1138
+ const parsedFunctionParams = functionParamsGrammar.parseParams(functionCallParamsText);
1139
+ params = parsedFunctionParams.params;
1140
+ paramsText = parsedFunctionParams.raw;
1141
+ }
1142
+ const functionCallText = LlamaText([
1143
+ this.chatWrapper.settings.functions.call.prefix,
1144
+ this.functionEvaluationFunctionName,
1145
+ this.chatWrapper.settings.functions.call.paramsPrefix,
1146
+ paramsText,
1147
+ this.chatWrapper.settings.functions.call.suffix
1148
+ ]);
1149
+ this.resFunctionCalls.push({
1150
+ functionName: this.functionEvaluationFunctionName,
1151
+ params,
1152
+ raw: functionCallText
1153
+ });
1154
+ this.onFunctionCall?.({
1155
+ functionName: this.functionEvaluationFunctionName,
1156
+ params: structuredClone(params),
1157
+ raw: functionCallText.toJSON()
1158
+ });
1159
+ this.currentFunctionCallPreviousText = LlamaText([]);
1160
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1161
+ this.functionEvaluationFunctionName = "";
1162
+ if (this.chatWrapper.settings.functions.parallelism == null || (this.maxParallelFunctionCalls != null && this.maxParallelFunctionCalls <= this.resFunctionCalls.length)) {
1163
+ this.functionEvaluationMode = false;
1164
+ return this.returnFunctionCallResults();
1165
+ }
1166
+ this.functionEvaluationMode = "sectionSuffixOrBetweenCalls";
1167
+ continue;
1168
+ }
1169
+ else if (this.functionEvaluationMode === "sectionSuffixOrBetweenCalls") {
1170
+ const sectionSuffixDetector = new StopGenerationDetector();
1171
+ let isFirstToken = true;
1172
+ this.functionsGrammar = undefined;
1173
+ this.functionsEvaluationState = undefined;
1174
+ this.currentFunctionCallPreviousText = LlamaText([]);
1175
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1176
+ StopGenerationDetector.resolveStopTriggers([
1177
+ ...(this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix != null
1178
+ ? [this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix]
1179
+ : []),
1180
+ LlamaText(new SpecialToken("EOS")),
1181
+ LlamaText(new SpecialToken("EOT"))
1182
+ ], this.llamaChat.model.tokenizer)
1183
+ .map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
1184
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1185
+ this.currentFunctionCallCurrentPartTokens.push(token);
1186
+ sectionSuffixDetector.recordGeneration({
1187
+ text: this.currentText,
1188
+ tokens: this.currentTokens,
1189
+ startNewChecks: isFirstToken,
1190
+ triggerMustStartWithGeneration: true
1191
+ });
1192
+ isFirstToken = false;
1193
+ if (sectionSuffixDetector.hasTriggeredStops || !sectionSuffixDetector.hasInProgressStops)
1194
+ break;
1195
+ }
1196
+ const abortRes = this.handleAbortTrigger("model");
1197
+ if (abortRes != null)
1198
+ return abortRes;
1199
+ if (sectionSuffixDetector.hasTriggeredStops) {
1200
+ this.functionEvaluationMode = false;
1201
+ return this.returnFunctionCallResults();
1202
+ }
1203
+ this.functionEvaluationMode = "functionName";
1204
+ this.initiallyEngagedFunctionMode = false;
1205
+ continue;
1206
+ }
1207
+ break;
1208
+ }
1209
+ return undefined;
1210
+ }
1211
+ releasePartiallyFreeTokensBeforeFunctionCallStart() {
1212
+ if (this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax)
1213
+ return;
1214
+ this.stopGenerationDetector.clearInProgressStops();
1215
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1216
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1217
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1218
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1219
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1220
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1221
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1222
+ this.pushPendingTokensAndCallOnToken();
1223
+ this.streamRegulator.clearQueue();
1224
+ this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = true;
1225
+ }
1226
+ returnFunctionCallResults() {
1227
+ if (this.resFunctionCalls.length > 0) {
1228
+ this.releasePartiallyFreeTokensBeforeFunctionCallStart();
1229
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1230
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1231
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1232
+ modelResponse = modelResponse.trimEnd();
1233
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1234
+ }
1235
+ return {
1236
+ response: modelResponse,
1237
+ lastEvaluation: {
1238
+ contextWindow: setLastTextInChatHistory("model", this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1239
+ cleanHistory: setLastTextInChatHistory("model", this.resolvedHistory, this.lastModelResponse + modelResponse),
1240
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1241
+ },
1242
+ functionCalls: this.resFunctionCalls.map((functionCall) => {
1243
+ return {
1244
+ functionName: functionCall.functionName,
1245
+ params: functionCall.params,
1246
+ raw: functionCall.raw.toJSON()
1247
+ };
1248
+ }), // prevent infinite TS type instantiation
1249
+ metadata: {
1250
+ stopReason: "functionCalls"
1251
+ }
1252
+ };
1253
+ }
1254
+ return undefined;
1255
+ }
1256
+ async *evaluateWithContextShift(loadContextWindow) {
1257
+ while (true) {
1258
+ this.startTokenLoop();
1259
+ await loadContextWindow();
1260
+ await this.alignCurrentSequenceStateWithCurrentTokens();
1261
+ await this.createNewEvaluationIterator();
1262
+ while (await this.iterateEvaluation()) {
1263
+ if (this.currentToken == null)
1264
+ break;
1265
+ yield this.currentToken;
1266
+ if (this.shouldAbort)
1267
+ return;
1268
+ if (this.updateShouldContextShift())
1269
+ break;
1270
+ if (this.restartEvaluationIterator) {
1271
+ await this.createNewEvaluationIterator();
1272
+ }
1273
+ }
1274
+ this.isFirstEvaluation = false;
1275
+ if (this.shouldContextShift)
1276
+ continue;
1277
+ break;
1278
+ }
1279
+ throw new Error("The context size is too small to generate a response");
1280
+ }
1281
+ async alignCurrentSequenceStateWithCurrentTokens() {
1282
+ let { firstDifferentIndex } = this.llamaChat.sequence.compareContextTokens(this.tokens);
1283
+ // we need to decode at least one token to generate a response
1284
+ if (firstDifferentIndex === this.tokens.length && firstDifferentIndex > 0)
1285
+ firstDifferentIndex -= 1;
1286
+ this.tokens.splice(0, firstDifferentIndex);
1287
+ if (firstDifferentIndex < this.llamaChat.sequence.nextTokenIndex) {
1288
+ await this.llamaChat.sequence.eraseContextTokenRanges([{
1289
+ start: firstDifferentIndex,
1290
+ end: this.llamaChat.sequence.nextTokenIndex
1291
+ }]);
1292
+ this.ensureNotAborted();
1293
+ }
1294
+ }
1295
+ async evaluateWithoutGeneratingNewTokens() {
1296
+ if (this.evaluationIterator != null)
1297
+ await this.evaluationIterator.return();
1298
+ await this.llamaChat.sequence.evaluateWithoutGeneratingNewTokens(this.tokens, removeNullFields({
1299
+ evaluationPriority: this.evaluationPriority
1300
+ }));
1301
+ }
1302
+ async createNewEvaluationIterator() {
1303
+ if (this.evaluationIterator != null)
1304
+ await this.evaluationIterator.return();
1305
+ this.currentIterationReplacementToken = undefined;
1306
+ this.restartEvaluationIterator = false;
1307
+ this.evaluationIterator = this.llamaChat.sequence.evaluate(this.tokens, removeNullFields({
1308
+ temperature: this.temperature,
1309
+ minP: this.minP,
1310
+ topK: this.topK,
1311
+ topP: this.topP,
1312
+ grammarEvaluationState: () => {
1313
+ if (this.functionEvaluationMode !== false)
1314
+ return this.functionsEvaluationState;
1315
+ return this.grammarEvaluationState;
1316
+ },
1317
+ repeatPenalty: !this.repeatPenaltyEnabled ? undefined : {
1318
+ punishTokens: this.getPenaltyTokens,
1319
+ penalty: this.resolvedRepeatPenalty.penalty,
1320
+ frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
1321
+ presencePenalty: this.resolvedRepeatPenalty.presencePenalty
1322
+ },
1323
+ tokenBias: this.tokenBias,
1324
+ evaluationPriority: this.evaluationPriority,
1325
+ yieldEogToken: true
1326
+ }));
1327
+ }
1328
+ async iterateEvaluation() {
1329
+ this.currentIteration = await this.evaluationIterator?.next(this.currentIterationReplacementToken);
1330
+ this.currentIterationReplacementToken = undefined;
1331
+ this.ensureNotAborted();
1332
+ this.generatedTokens++;
1333
+ if (this.currentIteration != null && this.currentIteration?.done !== true) {
1334
+ this.currentToken = this.currentIteration.value;
1335
+ this.currentTokens = [this.currentToken];
1336
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens, false, this.getLastTokens());
1337
+ if (this.functionEvaluationMode === false)
1338
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1339
+ tokens: this.currentTokens,
1340
+ text: this.currentText
1341
+ });
1342
+ else
1343
+ this.currentQueuedTokenRelease = undefined;
1344
+ return true;
1345
+ }
1346
+ return false;
1347
+ }
1348
+ waitOnPartialCharactersOrWhiteSpaceTokens() {
1349
+ if (this.currentText === UNKNOWN_UNICODE_CHAR || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "")) {
1350
+ if (this.currentQueuedTokenRelease != null)
1351
+ this.locksToReleaseOnValidGeneration.push(this.currentQueuedTokenRelease.createTextIndexLock(0));
1352
+ }
1353
+ else {
1354
+ while (this.locksToReleaseOnValidGeneration.length > 0)
1355
+ this.locksToReleaseOnValidGeneration.shift().dispose();
1356
+ }
1357
+ }
1358
+ detectAndHandleFunctionStartSyntax() {
1359
+ this.functionSyntaxStartDetector.recordGeneration({
1360
+ text: this.currentText,
1361
+ tokens: this.currentTokens,
1362
+ queuedTokenRelease: this.currentQueuedTokenRelease
1363
+ });
1364
+ if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1365
+ this.functionSyntaxStartDetector.hasTriggeredStops) {
1366
+ this.functionEvaluationMode = "functionName";
1367
+ this.currentQueuedTokenRelease.createTextIndexLock(0);
1368
+ this.stopGenerationDetector.clearTriggeredStops();
1369
+ this.stopGenerationDetector.clearInProgressStops();
1370
+ this.customStopGenerationTriggersDetector.clearTriggeredStops();
1371
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1372
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1373
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1374
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1375
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1376
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1377
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1378
+ const remainingTextAfterStop = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, stopTrigger, this.llamaChat.model.tokenizer);
1379
+ this.currentFunctionCallPreviousPartLeftoverText = remainingTextAfterStop;
1380
+ }
1381
+ }
1382
+ recordStopGenerationEvaluation() {
1383
+ this.stopGenerationDetector.recordGeneration({
1384
+ text: this.currentText,
1385
+ tokens: this.currentTokens,
1386
+ queuedTokenRelease: this.currentQueuedTokenRelease
1387
+ });
1388
+ this.customStopGenerationTriggersDetector.recordGeneration({
1389
+ text: this.currentText,
1390
+ tokens: this.currentTokens,
1391
+ queuedTokenRelease: this.currentQueuedTokenRelease
1392
+ });
1393
+ }
1394
+ popStreamRegulatorFreeTokens() {
1395
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1396
+ }
1397
+ handleStopGenerationTrigger(lastHistoryItemType) {
1398
+ if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1399
+ this.llamaChat.model.isEogToken(this.currentToken)) {
1400
+ this.stopGenerationDetector.clearInProgressStops();
1401
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1402
+ pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1403
+ const triggeredStops = this.stopGenerationDetector.hasTriggeredStops
1404
+ ? this.stopGenerationDetector.getTriggeredStops()
1405
+ : this.customStopGenerationTriggersDetector.getTriggeredStops();
1406
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1407
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1408
+ pushAll(this.pendingTokens, queuedTokensBeforeStopTrigger);
1409
+ const { firstRemainingGenerationAfterStop } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1410
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1411
+ this.pushPendingTokensAndCallOnToken();
1412
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1413
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1414
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1415
+ modelResponse = modelResponse.trimEnd();
1416
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1417
+ }
1418
+ const lastEvaluation = {
1419
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1420
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1421
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1422
+ };
1423
+ const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
1424
+ if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
1425
+ return {
1426
+ response: modelResponse,
1427
+ lastEvaluation,
1428
+ metadata: {
1429
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1430
+ stopReason: isEogToken
1431
+ ? "eogToken"
1432
+ : "stopGenerationTrigger"
1433
+ }
1434
+ };
1435
+ }
1436
+ return {
1437
+ response: modelResponse,
1438
+ lastEvaluation,
1439
+ metadata: {
1440
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1441
+ stopReason: "customStopTrigger",
1442
+ customStopTrigger: triggeredStops[0].stopTrigger
1443
+ }
1444
+ };
1445
+ }
1446
+ return undefined;
1447
+ }
1448
+ spliceIgnoreStartTextDetectedTokens() {
1449
+ if (this.res.length === 0) {
1450
+ this.ignoreStartTextDetector.clearInProgressStops();
1451
+ this.ignoreStartTextDetector.clearTriggeredStops();
1452
+ const lastTokensForDetokenizer = resolveLastTokens([
1453
+ this.contextWindowTokens,
1454
+ this.ignoredStartTextTokens
1455
+ ]);
1456
+ this.ignoreStartTextDetector.recordGeneration({
1457
+ text: this.llamaChat.model.detokenize(this.pendingTokens, false, lastTokensForDetokenizer),
1458
+ tokens: this.pendingTokens
1459
+ });
1460
+ }
1461
+ }
1462
+ isMaxTokensTriggered() {
1463
+ return this.maxTokens != null && this.maxTokens > 0 && this.generatedTokens >= this.maxTokens;
1464
+ }
1465
+ moveFreePendingTokensToRes(removeFoundStartIgnoreTextsFromPendingTokens = true) {
1466
+ if (this.pendingTokens.length > 0 && (this.isMaxTokensTriggered() || !this.ignoreStartTextDetector.hasInProgressStops)) {
1467
+ if (removeFoundStartIgnoreTextsFromPendingTokens)
1468
+ this.removeFoundStartIgnoreTextsFromPendingTokens();
1469
+ this.pushPendingTokensAndCallOnToken();
1470
+ }
1471
+ }
1472
+ handleMaxTokensTrigger(lastHistoryItemType) {
1473
+ if (this.isMaxTokensTriggered()) {
1474
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1475
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1476
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1477
+ modelResponse = modelResponse.trimEnd();
1478
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1479
+ }
1480
+ return {
1481
+ response: modelResponse,
1482
+ lastEvaluation: {
1483
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1484
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1485
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1486
+ },
1487
+ metadata: {
1488
+ stopReason: "maxTokens"
1489
+ }
1490
+ };
1491
+ }
1492
+ return undefined;
1493
+ }
1494
+ updateShouldContextShift() {
1495
+ this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1496
+ return this.shouldContextShift;
1497
+ }
1498
+ get shouldAbort() {
1499
+ return !!(this.signal?.aborted && this.stopOnAbortSignal);
1500
+ }
1501
+ handleAbortTrigger(lastHistoryItemType) {
1502
+ if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
1503
+ if (this.res.length === 0)
1504
+ throw this.signal.reason;
1505
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1506
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1507
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1508
+ modelResponse = modelResponse.trimEnd();
1509
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1510
+ }
1511
+ return {
1512
+ response: modelResponse,
1513
+ lastEvaluation: {
1514
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1515
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1516
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1517
+ },
1518
+ metadata: {
1519
+ stopReason: "abort"
1520
+ }
1521
+ };
1522
+ }
1523
+ return undefined;
1524
+ }
1525
+ pushPendingTokensAndCallOnToken() {
1526
+ if (this.pendingTokens.length === 0)
1527
+ return;
1528
+ this.onToken?.(this.pendingTokens.slice());
1529
+ this.onTextChunk?.(this.llamaChat.model.detokenize(this.pendingTokens, false, this.res));
1530
+ pushAll(this.res, this.pendingTokens);
1531
+ pushAll(this.contextWindowsRes, this.pendingTokens);
1532
+ this.pendingTokens.length = 0;
1533
+ }
1534
+ getLastTokens(maxTokens = maxRecentDetokenizerTokens) {
1535
+ return resolveLastTokens([
1536
+ this.contextWindowTokens,
1537
+ this.ignoredStartTextTokens,
1538
+ this.pendingTokens,
1539
+ this.streamRegulator.getLastQueuedChunkTokens(maxTokens),
1540
+ this.getContextWindowFunctionCallsTokens()
1541
+ ], maxTokens);
1542
+ }
1543
+ }
1544
+ //# sourceMappingURL=LlamaChat.js.map