node-llama-cpp 3.0.0-beta.3 → 3.0.0-beta.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (690) hide show
  1. package/README.md +14 -11
  2. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  3. package/bins/linux-arm64/llama-addon.node +0 -0
  4. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-armv7l/llama-addon.node +0 -0
  6. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  7. package/bins/linux-x64/llama-addon.node +0 -0
  8. package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-x64-cuda/llama-addon.node +0 -0
  10. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  11. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  12. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  13. package/bins/mac-arm64-metal/default.metallib +0 -0
  14. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  15. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  16. package/bins/mac-x64/llama-addon.node +0 -0
  17. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  18. package/bins/win-arm64/llama-addon.exp +0 -0
  19. package/bins/win-arm64/llama-addon.lib +0 -0
  20. package/bins/win-arm64/llama-addon.node +0 -0
  21. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  22. package/bins/win-x64/llama-addon.exp +0 -0
  23. package/bins/win-x64/llama-addon.lib +0 -0
  24. package/bins/win-x64/llama-addon.node +0 -0
  25. package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  26. package/bins/win-x64-cuda/llama-addon.exp +0 -0
  27. package/bins/win-x64-cuda/llama-addon.lib +0 -0
  28. package/bins/win-x64-cuda/llama-addon.node +0 -0
  29. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  30. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  31. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  32. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  33. package/dist/ChatWrapper.d.ts +8 -39
  34. package/dist/ChatWrapper.js +115 -72
  35. package/dist/ChatWrapper.js.map +1 -1
  36. package/dist/apiDocsIndex.d.ts +1 -0
  37. package/dist/apiDocsIndex.js +7 -0
  38. package/dist/apiDocsIndex.js.map +1 -0
  39. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -8
  40. package/dist/bindings/AddonTypes.js +2 -0
  41. package/dist/bindings/AddonTypes.js.map +1 -0
  42. package/dist/bindings/Llama.d.ts +47 -0
  43. package/dist/bindings/Llama.js +353 -0
  44. package/dist/bindings/Llama.js.map +1 -0
  45. package/dist/bindings/consts.d.ts +2 -0
  46. package/dist/bindings/consts.js +11 -0
  47. package/dist/bindings/consts.js.map +1 -0
  48. package/dist/bindings/getLlama.d.ts +148 -0
  49. package/dist/bindings/getLlama.js +401 -0
  50. package/dist/bindings/getLlama.js.map +1 -0
  51. package/dist/bindings/types.d.ts +56 -0
  52. package/dist/bindings/types.js +77 -0
  53. package/dist/bindings/types.js.map +1 -0
  54. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  55. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  56. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  57. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  58. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  59. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  60. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  61. package/dist/bindings/utils/asyncEvery.js +15 -0
  62. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  63. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  64. package/dist/bindings/utils/asyncSome.js +27 -0
  65. package/dist/bindings/utils/asyncSome.js.map +1 -0
  66. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  67. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  68. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  69. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  70. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  71. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  72. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  73. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  74. package/dist/bindings/utils/compileLLamaCpp.d.ts +17 -0
  75. package/dist/bindings/utils/compileLLamaCpp.js +226 -0
  76. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  77. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  78. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  79. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  80. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  81. package/dist/bindings/utils/detectGlibc.js +46 -0
  82. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  83. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  84. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  85. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  86. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  87. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  88. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  89. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  90. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  91. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  92. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  93. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  94. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  95. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  96. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  97. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  98. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  99. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  100. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  101. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  102. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  103. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  104. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  105. package/dist/bindings/utils/getPlatform.js +15 -0
  106. package/dist/bindings/utils/getPlatform.js.map +1 -0
  107. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  108. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  109. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  110. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  111. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  112. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  113. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  114. package/dist/bindings/utils/hasFileInPath.js +34 -0
  115. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  116. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  117. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  118. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  119. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  120. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  121. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  122. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  123. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  124. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  125. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  126. package/dist/bindings/utils/resolveCustomCmakeOptions.js +47 -0
  127. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  128. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  129. package/dist/bindings/utils/testBindingBinary.js +100 -0
  130. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  131. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  132. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  133. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  134. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  135. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  136. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  137. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  138. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  139. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  140. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  141. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  142. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  143. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  144. package/dist/chatWrappers/FunctionaryChatWrapper.js +323 -118
  145. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  146. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  147. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  148. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  149. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  150. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  151. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  152. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  153. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  154. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  155. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  156. package/dist/chatWrappers/Llama3ChatWrapper.js +174 -0
  157. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  158. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +67 -0
  159. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  160. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  161. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  162. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  163. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  164. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  165. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  166. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  167. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  168. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  169. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  170. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  171. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  172. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  173. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  174. package/dist/chatWrappers/utils/resolveChatWrapper.js +243 -0
  175. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  176. package/dist/cli/cli.js +21 -7
  177. package/dist/cli/cli.js.map +1 -1
  178. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  179. package/dist/cli/commands/BuildCommand.js +114 -41
  180. package/dist/cli/commands/BuildCommand.js.map +1 -1
  181. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  182. package/dist/cli/commands/ChatCommand.js +299 -143
  183. package/dist/cli/commands/ChatCommand.js.map +1 -1
  184. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  185. package/dist/cli/commands/ClearCommand.js +11 -12
  186. package/dist/cli/commands/ClearCommand.js.map +1 -1
  187. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  188. package/dist/cli/commands/CompleteCommand.js +365 -0
  189. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  190. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  191. package/dist/cli/commands/DebugCommand.js +54 -0
  192. package/dist/cli/commands/DebugCommand.js.map +1 -0
  193. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  194. package/dist/cli/commands/DownloadCommand.js +121 -70
  195. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  196. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  197. package/dist/cli/commands/InfillCommand.js +401 -0
  198. package/dist/cli/commands/InfillCommand.js.map +1 -0
  199. package/dist/cli/commands/InitCommand.d.ts +11 -0
  200. package/dist/cli/commands/InitCommand.js +195 -0
  201. package/dist/cli/commands/InitCommand.js.map +1 -0
  202. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  203. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  204. package/dist/cli/commands/PullCommand.d.ts +12 -0
  205. package/dist/cli/commands/PullCommand.js +117 -0
  206. package/dist/cli/commands/PullCommand.js.map +1 -0
  207. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  208. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  209. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  210. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  211. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  212. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  213. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  214. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +164 -0
  215. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  216. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  217. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  218. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  219. package/dist/cli/projectTemplates.d.ts +7 -0
  220. package/dist/cli/projectTemplates.js +10 -0
  221. package/dist/cli/projectTemplates.js.map +1 -0
  222. package/dist/cli/recommendedModels.d.ts +2 -0
  223. package/dist/cli/recommendedModels.js +363 -0
  224. package/dist/cli/recommendedModels.js.map +1 -0
  225. package/dist/cli/startCreateCli.d.ts +2 -0
  226. package/dist/cli/startCreateCli.js +26 -0
  227. package/dist/cli/startCreateCli.js.map +1 -0
  228. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  229. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  230. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  231. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  232. package/dist/cli/utils/ConsoleTable.js +86 -0
  233. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  234. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  235. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  236. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  237. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  238. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  239. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  240. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  241. package/dist/cli/utils/getReadablePath.js +14 -0
  242. package/dist/cli/utils/getReadablePath.js.map +1 -0
  243. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  244. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  245. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  246. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  247. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  248. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  249. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  250. package/dist/cli/utils/printCommonInfoLines.js +75 -0
  251. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  252. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  253. package/dist/cli/utils/printInfoLine.js +54 -0
  254. package/dist/cli/utils/printInfoLine.js.map +1 -0
  255. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  256. package/dist/cli/utils/projectTemplates.js +47 -0
  257. package/dist/cli/utils/projectTemplates.js.map +1 -0
  258. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  259. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  260. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  261. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  262. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  263. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  264. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  265. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  266. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  267. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  268. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  269. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  270. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  271. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  272. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  273. package/dist/commands.d.ts +1 -0
  274. package/dist/commands.js +3 -0
  275. package/dist/commands.js.map +1 -1
  276. package/dist/config.d.ts +38 -5
  277. package/dist/config.js +61 -16
  278. package/dist/config.js.map +1 -1
  279. package/dist/consts.d.ts +3 -0
  280. package/dist/consts.js +10 -0
  281. package/dist/consts.js.map +1 -0
  282. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +112 -39
  283. package/dist/evaluator/LlamaChat/LlamaChat.js +1512 -0
  284. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  285. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  286. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  287. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  289. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  290. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  291. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  292. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  293. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  294. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  295. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  296. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +288 -0
  297. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +419 -0
  298. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  299. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  300. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  301. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  302. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  303. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  304. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  305. package/dist/evaluator/LlamaCompletion.d.ts +143 -0
  306. package/dist/evaluator/LlamaCompletion.js +418 -0
  307. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  308. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -21
  309. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +270 -81
  310. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  311. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  312. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  313. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  314. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  315. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  316. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  317. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  318. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  319. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  320. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  321. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +10 -7
  322. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -11
  323. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  324. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  325. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  327. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  328. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  329. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +236 -0
  330. package/dist/evaluator/LlamaModel/LlamaModel.js +679 -0
  331. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  332. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  333. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  334. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  335. package/dist/evaluator/TokenBias.d.ts +22 -0
  336. package/dist/evaluator/TokenBias.js +33 -0
  337. package/dist/evaluator/TokenBias.js.map +1 -0
  338. package/dist/evaluator/TokenMeter.d.ts +54 -0
  339. package/dist/evaluator/TokenMeter.js +86 -0
  340. package/dist/evaluator/TokenMeter.js.map +1 -0
  341. package/dist/gguf/consts.d.ts +3 -0
  342. package/dist/gguf/consts.js +8 -0
  343. package/dist/gguf/consts.js.map +1 -0
  344. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  345. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  346. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  347. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  348. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  349. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  350. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  351. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  352. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  353. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  354. package/dist/gguf/fileReaders/GgufFsFileReader.js +46 -0
  355. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  356. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  357. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  358. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  359. package/dist/gguf/insights/GgufInsights.d.ts +48 -0
  360. package/dist/gguf/insights/GgufInsights.js +381 -0
  361. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  362. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  363. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +141 -0
  364. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  365. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  366. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  367. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  368. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  369. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  370. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  371. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  372. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  373. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  374. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  375. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  376. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  377. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  378. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  379. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  380. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  381. package/dist/gguf/parser/parseGguf.js +63 -0
  382. package/dist/gguf/parser/parseGguf.js.map +1 -0
  383. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  384. package/dist/gguf/readGgufFileInfo.js +66 -0
  385. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  386. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  387. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  388. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  389. package/dist/gguf/types/GgufMetadataTypes.d.ts +335 -0
  390. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  391. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  392. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  393. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  394. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  395. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  396. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  397. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  398. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  399. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  400. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  401. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  402. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  403. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  404. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  405. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  406. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  407. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  408. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  409. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  410. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  411. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  412. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  413. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  414. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  415. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  416. package/dist/index.d.ts +41 -18
  417. package/dist/index.js +36 -15
  418. package/dist/index.js.map +1 -1
  419. package/dist/state.d.ts +4 -0
  420. package/dist/state.js +14 -0
  421. package/dist/state.js.map +1 -1
  422. package/dist/types.d.ts +116 -5
  423. package/dist/types.js.map +1 -1
  424. package/dist/utils/DisposeGuard.d.ts +13 -0
  425. package/dist/utils/DisposeGuard.js +120 -0
  426. package/dist/utils/DisposeGuard.js.map +1 -0
  427. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  428. package/dist/utils/InsufficientMemoryError.js +6 -0
  429. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  430. package/dist/utils/LlamaText.d.ts +70 -26
  431. package/dist/utils/LlamaText.js +469 -157
  432. package/dist/utils/LlamaText.js.map +1 -1
  433. package/dist/utils/LruCache.d.ts +12 -0
  434. package/dist/utils/LruCache.js +44 -0
  435. package/dist/utils/LruCache.js.map +1 -0
  436. package/dist/utils/ReplHistory.js.map +1 -1
  437. package/dist/utils/StopGenerationDetector.d.ts +25 -9
  438. package/dist/utils/StopGenerationDetector.js +93 -22
  439. package/dist/utils/StopGenerationDetector.js.map +1 -1
  440. package/dist/utils/TokenStreamRegulator.d.ts +9 -4
  441. package/dist/utils/TokenStreamRegulator.js +81 -8
  442. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  443. package/dist/utils/UnsupportedError.d.ts +2 -0
  444. package/dist/utils/UnsupportedError.js +7 -0
  445. package/dist/utils/UnsupportedError.js.map +1 -0
  446. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  447. package/dist/utils/clearTempFolder.js.map +1 -1
  448. package/dist/utils/cmake.js +38 -20
  449. package/dist/utils/cmake.js.map +1 -1
  450. package/dist/utils/createModelDownloader.d.ts +108 -0
  451. package/dist/utils/createModelDownloader.js +231 -0
  452. package/dist/utils/createModelDownloader.js.map +1 -0
  453. package/dist/utils/findBestOption.d.ts +4 -0
  454. package/dist/utils/findBestOption.js +15 -0
  455. package/dist/utils/findBestOption.js.map +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  458. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  459. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  462. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  465. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  477. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  482. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  484. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  489. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  492. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  493. package/dist/utils/gbnfJson/types.d.ts +3 -0
  494. package/dist/utils/gbnfJson/types.js.map +1 -1
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  497. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  500. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  502. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  503. package/dist/utils/getBuildDefaults.d.ts +1 -2
  504. package/dist/utils/getBuildDefaults.js +2 -3
  505. package/dist/utils/getBuildDefaults.js.map +1 -1
  506. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  507. package/dist/utils/getConsoleLogPrefix.js +10 -0
  508. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  509. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  510. package/dist/utils/getGrammarsFolder.js +8 -7
  511. package/dist/utils/getGrammarsFolder.js.map +1 -1
  512. package/dist/utils/getModuleVersion.d.ts +1 -0
  513. package/dist/utils/getModuleVersion.js +13 -0
  514. package/dist/utils/getModuleVersion.js.map +1 -0
  515. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  516. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  517. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  518. package/dist/utils/getReadableContextSize.d.ts +1 -0
  519. package/dist/utils/getReadableContextSize.js +7 -0
  520. package/dist/utils/getReadableContextSize.js.map +1 -0
  521. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  522. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  523. package/dist/utils/gitReleaseBundles.js +73 -5
  524. package/dist/utils/gitReleaseBundles.js.map +1 -1
  525. package/dist/utils/hashString.d.ts +1 -0
  526. package/dist/utils/hashString.js +8 -0
  527. package/dist/utils/hashString.js.map +1 -0
  528. package/dist/utils/isLockfileActive.d.ts +4 -0
  529. package/dist/utils/isLockfileActive.js +12 -0
  530. package/dist/utils/isLockfileActive.js.map +1 -0
  531. package/dist/utils/isToken.d.ts +2 -0
  532. package/dist/utils/isToken.js +4 -0
  533. package/dist/utils/isToken.js.map +1 -0
  534. package/dist/utils/isUrl.d.ts +1 -0
  535. package/dist/utils/isUrl.js +15 -0
  536. package/dist/utils/isUrl.js.map +1 -0
  537. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  538. package/dist/utils/mergeUnionTypes.js +2 -0
  539. package/dist/utils/mergeUnionTypes.js.map +1 -0
  540. package/dist/utils/parseModelFileName.d.ts +1 -0
  541. package/dist/utils/parseModelFileName.js +6 -1
  542. package/dist/utils/parseModelFileName.js.map +1 -1
  543. package/dist/utils/parseTextTemplate.d.ts +66 -0
  544. package/dist/utils/parseTextTemplate.js +116 -0
  545. package/dist/utils/parseTextTemplate.js.map +1 -0
  546. package/dist/utils/prettyPrintObject.d.ts +10 -0
  547. package/dist/utils/prettyPrintObject.js +84 -0
  548. package/dist/utils/prettyPrintObject.js.map +1 -0
  549. package/dist/utils/removeNullFields.d.ts +2 -1
  550. package/dist/utils/removeNullFields.js +8 -0
  551. package/dist/utils/removeNullFields.js.map +1 -1
  552. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  553. package/dist/utils/resolveGithubRelease.js +36 -0
  554. package/dist/utils/resolveGithubRelease.js.map +1 -0
  555. package/dist/utils/runtime.d.ts +4 -0
  556. package/dist/utils/runtime.js +8 -0
  557. package/dist/utils/runtime.js.map +1 -0
  558. package/dist/utils/safeEventCallback.d.ts +6 -0
  559. package/dist/utils/safeEventCallback.js +29 -0
  560. package/dist/utils/safeEventCallback.js.map +1 -0
  561. package/dist/utils/spawnCommand.d.ts +11 -1
  562. package/dist/utils/spawnCommand.js +56 -6
  563. package/dist/utils/spawnCommand.js.map +1 -1
  564. package/dist/utils/tokenizeInput.d.ts +3 -0
  565. package/dist/utils/tokenizeInput.js +12 -0
  566. package/dist/utils/tokenizeInput.js.map +1 -0
  567. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  569. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  570. package/dist/utils/utilTypes.d.ts +3 -0
  571. package/dist/utils/utilTypes.js +2 -0
  572. package/dist/utils/utilTypes.js.map +1 -0
  573. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  574. package/dist/utils/waitForLockfileRelease.js +20 -0
  575. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  576. package/dist/utils/withLockfile.d.ts +7 -0
  577. package/dist/utils/withLockfile.js +44 -0
  578. package/dist/utils/withLockfile.js.map +1 -0
  579. package/dist/utils/withOra.d.ts +2 -0
  580. package/dist/utils/withOra.js +22 -6
  581. package/dist/utils/withOra.js.map +1 -1
  582. package/dist/utils/withProgressLog.d.ts +23 -0
  583. package/dist/utils/withProgressLog.js +211 -0
  584. package/dist/utils/withProgressLog.js.map +1 -0
  585. package/dist/utils/withStatusLogs.d.ts +2 -1
  586. package/dist/utils/withStatusLogs.js +12 -9
  587. package/dist/utils/withStatusLogs.js.map +1 -1
  588. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  589. package/dist/utils/wrapAbortSignal.js +9 -0
  590. package/dist/utils/wrapAbortSignal.js.map +1 -0
  591. package/llama/.clang-format +1 -2
  592. package/llama/CMakeLists.txt +115 -4
  593. package/llama/addon.cpp +1300 -97
  594. package/llama/binariesGithubRelease.json +1 -1
  595. package/llama/gitRelease.bundle +0 -0
  596. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  597. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  598. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  599. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  600. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  601. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  602. package/llama/grammars/README.md +58 -5
  603. package/llama/grammars/json.gbnf +4 -4
  604. package/llama/grammars/json_arr.gbnf +4 -4
  605. package/llama/llama.cpp.info.json +4 -0
  606. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  607. package/package.json +78 -53
  608. package/templates/packed/electron-typescript-react.json +1 -0
  609. package/templates/packed/node-typescript.json +1 -0
  610. package/dist/AbortError.d.ts +0 -2
  611. package/dist/AbortError.js +0 -7
  612. package/dist/AbortError.js.map +0 -1
  613. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  614. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  615. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  616. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  617. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  618. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  619. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  620. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  621. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  622. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  623. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  624. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  625. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  626. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  627. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  628. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  629. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  630. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  631. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  632. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  633. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  634. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  635. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  636. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  637. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  638. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  639. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  640. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  641. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  642. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  643. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  644. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  645. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  646. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  647. package/dist/utils/binariesGithubRelease.js.map +0 -1
  648. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  649. package/dist/utils/clearLlamaBuild.js +0 -12
  650. package/dist/utils/clearLlamaBuild.js.map +0 -1
  651. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  652. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  653. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  654. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  655. package/dist/utils/compileLLamaCpp.js +0 -132
  656. package/dist/utils/compileLLamaCpp.js.map +0 -1
  657. package/dist/utils/getBin.js +0 -78
  658. package/dist/utils/getBin.js.map +0 -1
  659. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  660. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  661. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  662. package/dist/utils/getReleaseInfo.d.ts +0 -7
  663. package/dist/utils/getReleaseInfo.js +0 -30
  664. package/dist/utils/getReleaseInfo.js.map +0 -1
  665. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  666. package/dist/utils/parseModelTypeDescription.js +0 -9
  667. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  668. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  669. package/dist/utils/resolveChatWrapper.js +0 -16
  670. package/dist/utils/resolveChatWrapper.js.map +0 -1
  671. package/dist/utils/usedBinFlag.d.ts +0 -6
  672. package/dist/utils/usedBinFlag.js +0 -15
  673. package/dist/utils/usedBinFlag.js.map +0 -1
  674. package/llama/usedBin.json +0 -3
  675. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  676. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  677. package/llamaBins/linux-x64/llama-addon.node +0 -0
  678. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  679. package/llamaBins/mac-x64/llama-addon.node +0 -0
  680. package/llamaBins/win-x64/llama-addon.exp +0 -0
  681. package/llamaBins/win-x64/llama-addon.lib +0 -0
  682. package/llamaBins/win-x64/llama-addon.node +0 -0
  683. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  684. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  685. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  686. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  687. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  688. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  689. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  690. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -0,0 +1,1512 @@
1
+ import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
+ import { removeNullFields } from "../../utils/removeNullFields.js";
3
+ import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
4
+ import { LlamaText, SpecialToken } from "../../utils/LlamaText.js";
5
+ import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
6
+ import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
7
+ import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
8
+ import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
9
+ import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
10
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
11
+ import { safeEventCallback } from "../../utils/safeEventCallback.js";
12
+ import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
13
+ import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
14
+ import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
15
+ const defaultContextShiftOptions = {
16
+ size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
17
+ strategy: "eraseFirstResponseAndKeepFirstSystem",
18
+ lastEvaluationMetadata: null
19
+ };
20
+ const defaultRepeatPenaltyLastTokens = 64;
21
+ const defaultTrimWhitespaceSuffix = false;
22
+ const defaultEvaluationPriority = 5;
23
+ export class LlamaChat {
24
+ /** @internal */ _chatWrapper;
25
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
26
+ /** @internal */ _autoDisposeSequence;
27
+ /** @internal */ _chatLock = {};
28
+ /** @internal */ _sequence;
29
+ onDispose = new EventRelay();
30
+ constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
31
+ if (contextSequence == null)
32
+ throw new Error("contextSequence cannot be null");
33
+ if (contextSequence.disposed)
34
+ throw new DisposedError();
35
+ this._sequence = contextSequence;
36
+ this._autoDisposeSequence = autoDisposeSequence;
37
+ this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
38
+ this.dispose();
39
+ }));
40
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
41
+ this._chatWrapper = chatWrapper === "auto"
42
+ ? (resolveChatWrapper({
43
+ bosString: contextSequence.model.tokens.bosString,
44
+ filename: contextSequence.model.filename,
45
+ fileInfo: contextSequence.model.fileInfo,
46
+ tokenizer: contextSequence.model.tokenizer
47
+ }) ?? new GeneralChatWrapper())
48
+ : chatWrapper;
49
+ }
50
+ dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
51
+ if (this._sequence == null)
52
+ return;
53
+ if (disposeSequence)
54
+ this._sequence.dispose();
55
+ this._sequence = null;
56
+ this._disposeAggregator.dispose();
57
+ }
58
+ /** @hidden */
59
+ [Symbol.dispose]() {
60
+ return this.dispose();
61
+ }
62
+ get disposed() {
63
+ return this._sequence == null;
64
+ }
65
+ get chatWrapper() {
66
+ if (this._sequence == null)
67
+ throw new DisposedError();
68
+ return this._chatWrapper;
69
+ }
70
+ get sequence() {
71
+ if (this._sequence == null)
72
+ throw new DisposedError();
73
+ return this._sequence;
74
+ }
75
+ get context() {
76
+ return this.sequence.context;
77
+ }
78
+ get model() {
79
+ return this.sequence.model;
80
+ }
81
+ async generateResponse(history, options = {}) {
82
+ const { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
83
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
84
+ onToken,
85
+ signal,
86
+ stopOnAbortSignal,
87
+ maxTokens,
88
+ temperature,
89
+ minP,
90
+ topK,
91
+ topP,
92
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
93
+ trimWhitespaceSuffix,
94
+ repeatPenalty,
95
+ tokenBias,
96
+ evaluationPriority,
97
+ functions,
98
+ onFunctionCall,
99
+ documentFunctionParams,
100
+ maxParallelFunctionCalls,
101
+ contextShift,
102
+ customStopTriggers,
103
+ lastEvaluationContextWindow: {
104
+ history: lastEvaluationContextWindowHistory,
105
+ minimumOverlapPercentageToPreventContextShift
106
+ }
107
+ });
108
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
109
+ throw new Error("Using both grammar and functions is not supported yet");
110
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
111
+ try {
112
+ generateResponseState.ensureLastHistoryItemIsModel();
113
+ const loadContextWindow = async (avoidReloadingHistory = false) => {
114
+ await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
115
+ };
116
+ const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
117
+ // eslint-disable-next-line no-constant-condition
118
+ while (true) {
119
+ generateResponseState.startTokenLoop();
120
+ generateResponseState.canAvoidReloadingHistory = false;
121
+ await loadContextWindow();
122
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
123
+ if (generateResponseState.generatedTokens === 0) {
124
+ generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
125
+ if (generateResponseState.functionsEnabled) {
126
+ generateResponseState.initFunctions();
127
+ }
128
+ }
129
+ if (generateResponseState.functionEvaluationMode !== false) {
130
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
131
+ if (functionsCallsRes != null)
132
+ return functionsCallsRes;
133
+ await loadContextWindowForFunctionCallingLoop();
134
+ }
135
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
136
+ await generateResponseState.createNewEvaluationIterator();
137
+ while (await generateResponseState.iterateEvaluation()) {
138
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
139
+ generateResponseState.detectAndHandleFunctionStartSyntax();
140
+ if (generateResponseState.functionEvaluationMode !== false) {
141
+ generateResponseState.canAvoidReloadingHistory = false;
142
+ generateResponseState.releasePartiallyFreeTokensBeforeFunctionCallStart();
143
+ const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
144
+ if (functionsCallsRes != null)
145
+ return functionsCallsRes;
146
+ }
147
+ generateResponseState.recordStopGenerationEvaluation();
148
+ generateResponseState.popStreamRegulatorFreeTokens();
149
+ generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
150
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
151
+ if (stopGenerationTriggerRes != null)
152
+ return stopGenerationTriggerRes;
153
+ generateResponseState.spliceIgnoreStartTextDetectedTokens();
154
+ generateResponseState.moveFreePendingTokensToRes();
155
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
156
+ if (maxTokensTriggerRes != null)
157
+ return maxTokensTriggerRes;
158
+ if (generateResponseState.updateShouldContextShift())
159
+ break;
160
+ const abortRes = generateResponseState.handleAbortTrigger("model");
161
+ if (abortRes != null)
162
+ return abortRes;
163
+ }
164
+ generateResponseState.isFirstEvaluation = false;
165
+ if (generateResponseState.shouldContextShift)
166
+ continue;
167
+ break;
168
+ }
169
+ throw new Error("The context size is too small to generate a response");
170
+ }
171
+ finally {
172
+ generateResponseState.dispose();
173
+ }
174
+ });
175
+ }
176
+ async loadChatAndCompleteUserMessage(history, options = {}) {
177
+ const { initialUserPrompt = "", stopOnAbortSignal = false, onToken, signal, maxTokens = Math.min(256, Math.ceil(this.context.contextSize / 2)), temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.8 } = {} } = options;
178
+ const lastEvaluationContextWindowHistoryItem = lastEvaluationContextWindowHistory == null
179
+ ? null
180
+ : lastEvaluationContextWindowHistory[lastEvaluationContextWindowHistory.length - 1];
181
+ const lastEvaluationContextWindowUserMessage = lastEvaluationContextWindowHistoryItem?.type === "user"
182
+ ? lastEvaluationContextWindowHistoryItem.text
183
+ : "";
184
+ const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
185
+ onToken,
186
+ signal,
187
+ stopOnAbortSignal,
188
+ maxTokens,
189
+ temperature,
190
+ minP,
191
+ topK,
192
+ topP,
193
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
194
+ trimWhitespaceSuffix,
195
+ repeatPenalty,
196
+ tokenBias,
197
+ evaluationPriority,
198
+ functions,
199
+ documentFunctionParams,
200
+ contextShift,
201
+ customStopTriggers,
202
+ lastEvaluationContextWindow: {
203
+ history: lastEvaluationContextWindowHistory == null
204
+ ? undefined
205
+ : setLastUserTextInChatHistory(lastEvaluationContextWindowHistory, lastEvaluationContextWindowUserMessage + initialUserPrompt),
206
+ minimumOverlapPercentageToPreventContextShift
207
+ }
208
+ });
209
+ return await withLock(this._chatLock, "evaluate", signal, async () => {
210
+ try {
211
+ generateResponseState.ensureLastHistoryItemIsUser();
212
+ const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1];
213
+ const initialUserMessage = lastResolvedHistoryItem?.type === "user"
214
+ ? lastResolvedHistoryItem.text
215
+ : "";
216
+ // eslint-disable-next-line no-constant-condition
217
+ while (true) {
218
+ generateResponseState.startTokenLoop();
219
+ const { userTextSuffix } = await generateResponseState.loadContextWindow(setLastUserTextInChatHistory(generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)), true);
220
+ generateResponseState.functionEvaluationMode = false;
221
+ generateResponseState.addStopGenerationTriggersFromChatWrapper();
222
+ if (userTextSuffix != null && userTextSuffix.values.length > 0)
223
+ generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
224
+ await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
225
+ if (generateResponseState.maxTokens === 0) {
226
+ await generateResponseState.evaluateWithoutGeneratingNewTokens();
227
+ return {
228
+ completion: "",
229
+ lastEvaluation: {
230
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
231
+ contextShiftMetadata: generateResponseState.lastHistoryCompressionMetadata
232
+ },
233
+ metadata: {
234
+ stopReason: "maxTokens"
235
+ }
236
+ };
237
+ }
238
+ await generateResponseState.createNewEvaluationIterator();
239
+ while (await generateResponseState.iterateEvaluation()) {
240
+ generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
241
+ generateResponseState.recordStopGenerationEvaluation();
242
+ generateResponseState.popStreamRegulatorFreeTokens();
243
+ const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user");
244
+ if (stopGenerationTriggerRes != null)
245
+ return {
246
+ completion: stopGenerationTriggerRes.response,
247
+ lastEvaluation: {
248
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
249
+ contextShiftMetadata: stopGenerationTriggerRes.lastEvaluation.contextShiftMetadata
250
+ },
251
+ metadata: stopGenerationTriggerRes.metadata.stopReason === "customStopTrigger"
252
+ ? stopGenerationTriggerRes.metadata
253
+ : stopGenerationTriggerRes.metadata
254
+ };
255
+ generateResponseState.moveFreePendingTokensToRes(false);
256
+ const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("user");
257
+ if (maxTokensTriggerRes != null)
258
+ return {
259
+ completion: maxTokensTriggerRes.response,
260
+ lastEvaluation: {
261
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
262
+ contextShiftMetadata: maxTokensTriggerRes.lastEvaluation.contextShiftMetadata
263
+ },
264
+ metadata: maxTokensTriggerRes.metadata
265
+ };
266
+ if (generateResponseState.updateShouldContextShift())
267
+ break;
268
+ const abortRes = generateResponseState.handleAbortTrigger("user");
269
+ if (abortRes != null)
270
+ return {
271
+ completion: abortRes.response,
272
+ lastEvaluation: {
273
+ contextWindow: setLastUserTextInChatHistory(generateResponseState.lastContextWindowHistory, initialUserMessage),
274
+ contextShiftMetadata: abortRes.lastEvaluation.contextShiftMetadata
275
+ },
276
+ metadata: abortRes.metadata
277
+ };
278
+ }
279
+ generateResponseState.isFirstEvaluation = false;
280
+ if (generateResponseState.shouldContextShift)
281
+ continue;
282
+ break;
283
+ }
284
+ throw new Error("The context size is too small to generate a completion");
285
+ }
286
+ finally {
287
+ generateResponseState.dispose();
288
+ }
289
+ });
290
+ }
291
+ }
292
+ function removeRawFromHistoryItem(historyItem) {
293
+ if (historyItem.type === "model") {
294
+ const newHistoryItem = { ...historyItem };
295
+ newHistoryItem.response = newHistoryItem.response.map((item) => {
296
+ if (typeof item === "string")
297
+ return item;
298
+ else
299
+ return {
300
+ ...item,
301
+ rawCall: undefined
302
+ };
303
+ });
304
+ return newHistoryItem;
305
+ }
306
+ return historyItem;
307
+ }
308
+ async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
309
+ function checkIfHistoryFitsContext(history) {
310
+ const { contextText } = chatWrapper.generateContextState({
311
+ chatHistory: history,
312
+ availableFunctions: functions,
313
+ documentFunctionParams
314
+ });
315
+ const tokens = contextText.tokenize(tokenizer);
316
+ return tokens.length <= contextSize - contextShiftSize;
317
+ }
318
+ if (contextSize - contextShiftSize <= 0)
319
+ throw new Error(`The context size (${contextSize}) is too small to fit the context shift size (${contextShiftSize})`);
320
+ if (checkIfHistoryFitsContext(history))
321
+ return {
322
+ compressedHistory: history,
323
+ metadata: null
324
+ };
325
+ if (contextShiftStrategy instanceof Function) {
326
+ try {
327
+ const { chatHistory, metadata } = await contextShiftStrategy({
328
+ chatHistory: history,
329
+ maxTokensCount: contextSize - contextShiftSize,
330
+ tokenizer,
331
+ chatWrapper,
332
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
333
+ });
334
+ if (checkIfHistoryFitsContext(chatHistory))
335
+ return {
336
+ compressedHistory: chatHistory,
337
+ metadata
338
+ };
339
+ console.warn("The provided context shift strategy did not return a history that fits the context size. " +
340
+ "Using the default strategy instead.");
341
+ }
342
+ catch (err) {
343
+ console.error("The provided context shift strategy threw an error. " +
344
+ "Using the default strategy instead.", err);
345
+ }
346
+ }
347
+ else if (contextShiftStrategy !== "eraseFirstResponseAndKeepFirstSystem")
348
+ console.warn(`Unknown context shift strategy "${contextShiftStrategy}". ` +
349
+ "Using the default strategy instead.");
350
+ const { chatHistory, metadata } = await eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({
351
+ chatHistory: history,
352
+ maxTokensCount: contextSize - contextShiftSize,
353
+ tokenizer,
354
+ chatWrapper,
355
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
356
+ });
357
+ if (!checkIfHistoryFitsContext(chatHistory))
358
+ throw new Error("The default context shift strategy did not return a history that fits the context size. " +
359
+ "This may happen due to the system prompt being too long");
360
+ return {
361
+ compressedHistory: chatHistory,
362
+ metadata
363
+ };
364
+ }
365
+ function getLastTextModelResponseFromChatHistory(chatHistory) {
366
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
367
+ return "";
368
+ const lastModelResponseItem = chatHistory[chatHistory.length - 1];
369
+ const modelResponse = lastModelResponseItem.response;
370
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
371
+ return modelResponse[modelResponse.length - 1];
372
+ return "";
373
+ }
374
+ function getLastUserTextFromChatHistory(chatHistory) {
375
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "user")
376
+ return "";
377
+ return chatHistory[chatHistory.length - 1].text;
378
+ }
379
+ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
380
+ const newChatHistory = chatHistory.slice();
381
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
382
+ newChatHistory.push({
383
+ type: "model",
384
+ response: []
385
+ });
386
+ const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
387
+ const newLastModelResponseItem = { ...lastModelResponseItem };
388
+ newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
389
+ const modelResponse = newLastModelResponseItem.response.slice();
390
+ newLastModelResponseItem.response = modelResponse;
391
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
392
+ if (textResponse === "")
393
+ modelResponse.pop();
394
+ else
395
+ modelResponse[modelResponse.length - 1] = textResponse;
396
+ }
397
+ else if (textResponse !== "")
398
+ modelResponse.push(textResponse);
399
+ return newChatHistory;
400
+ }
401
+ function setLastUserTextInChatHistory(chatHistory, userText) {
402
+ const newChatHistory = chatHistory.slice();
403
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "user")
404
+ newChatHistory.push({
405
+ type: "user",
406
+ text: ""
407
+ });
408
+ const lastUserItem = newChatHistory[newChatHistory.length - 1];
409
+ const newLastUserItem = { ...lastUserItem };
410
+ newChatHistory[newChatHistory.length - 1] = newLastUserItem;
411
+ newLastUserItem.text = userText;
412
+ return newChatHistory;
413
+ }
414
+ function setLastTextInChatHistory(itemType, chatHistory, text) {
415
+ if (itemType === "user")
416
+ return setLastUserTextInChatHistory(chatHistory, text);
417
+ else
418
+ return setLastModelTextResponseInChatHistory(chatHistory, text);
419
+ }
420
+ function generateContextText(endWithUserText, chatWrapper, options) {
421
+ if (endWithUserText)
422
+ return generateContextTextThatEndsWithUserText(chatWrapper, options);
423
+ return chatWrapper.generateContextState(options);
424
+ }
425
+ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
426
+ const lastUserText = getLastUserTextFromChatHistory(options.chatHistory);
427
+ const randomId = "W" + (Math.random()
428
+ .toString(36)
429
+ .slice(2)) + "W";
430
+ const { contextText, ...rest } = chatWrapper.generateContextState({
431
+ ...options,
432
+ chatHistory: setLastUserTextInChatHistory(options.chatHistory, lastUserText + randomId)
433
+ });
434
+ let newContextText = contextText;
435
+ for (let i = 0; i < newContextText.values.length; i++) {
436
+ const item = newContextText.values[i];
437
+ if (typeof item !== "string")
438
+ continue;
439
+ const randomTextIndex = item.indexOf(randomId);
440
+ if (randomTextIndex < 0)
441
+ continue;
442
+ const newValue = item.slice(0, randomTextIndex);
443
+ newContextText = LlamaText([
444
+ ...newContextText.values.slice(0, i),
445
+ newValue
446
+ ]);
447
+ return {
448
+ contextText: newContextText,
449
+ userTextSuffix: LlamaText([
450
+ item.slice(randomTextIndex + randomId.length),
451
+ ...newContextText.values.slice(i + 1)
452
+ ]),
453
+ ...rest
454
+ };
455
+ }
456
+ throw new Error("The random ID was not found in the context text. " +
457
+ `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
458
+ "where not all user messages are properly added to the the result LlamaText");
459
+ }
460
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
461
+ if (sequence == null)
462
+ throw new DisposedError();
463
+ const model = sequence.model;
464
+ const context = sequence.context;
465
+ if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
466
+ const newContextWindow = lastEvaluationContextWindowHistory.slice();
467
+ if (endWithUserText) {
468
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
469
+ newContextWindow.push({
470
+ type: "user",
471
+ text: ""
472
+ });
473
+ }
474
+ else if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
475
+ newContextWindow.push({
476
+ type: "model",
477
+ response: []
478
+ });
479
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
480
+ chatHistory: newContextWindow,
481
+ availableFunctions: functions,
482
+ documentFunctionParams
483
+ });
484
+ const tokens = contextText.tokenize(model.tokenizer);
485
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
486
+ const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
487
+ const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
488
+ if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
489
+ return {
490
+ history: newContextWindow,
491
+ stopGenerationTriggers,
492
+ tokens,
493
+ newResolvedHistory: resolvedHistory,
494
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
495
+ ignoreStartText: ignoreStartText ?? [],
496
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
497
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
498
+ userTextSuffix
499
+ };
500
+ }
501
+ }
502
+ resolvedHistory = sequence.isLoadedToMemory
503
+ ? resolvedHistory.slice()
504
+ : resolvedHistory.map(removeRawFromHistoryItem);
505
+ if (resolvedContextShift.lastEvaluationMetadata != null) {
506
+ const contextShiftSize = resolvedContextShift.size instanceof Function
507
+ ? await resolvedContextShift.size(sequence)
508
+ : resolvedContextShift.size;
509
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
510
+ history: resolvedHistory,
511
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
512
+ contextShiftStrategy: resolvedContextShift.strategy,
513
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
514
+ contextSize: context.contextSize,
515
+ tokenizer: model.tokenizer,
516
+ chatWrapper: chatWrapper,
517
+ functions,
518
+ documentFunctionParams
519
+ });
520
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
521
+ chatHistory: compressedHistory,
522
+ availableFunctions: functions,
523
+ documentFunctionParams
524
+ });
525
+ return {
526
+ history: compressedHistory,
527
+ stopGenerationTriggers,
528
+ tokens: contextText.tokenize(model.tokenizer),
529
+ newResolvedHistory: resolvedHistory,
530
+ newHistoryCompressionMetadata: metadata,
531
+ ignoreStartText: ignoreStartText ?? [],
532
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
533
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
534
+ userTextSuffix
535
+ };
536
+ }
537
+ {
538
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
539
+ chatHistory: resolvedHistory,
540
+ availableFunctions: functions,
541
+ documentFunctionParams
542
+ });
543
+ const tokens = contextText.tokenize(model.tokenizer);
544
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
545
+ return {
546
+ history: resolvedHistory,
547
+ stopGenerationTriggers,
548
+ tokens,
549
+ newResolvedHistory: resolvedHistory,
550
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
551
+ ignoreStartText: ignoreStartText ?? [],
552
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
553
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
554
+ userTextSuffix
555
+ };
556
+ }
557
+ const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
558
+ ? await resolvedContextShift.size(sequence)
559
+ : resolvedContextShift.size)));
560
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
561
+ history: resolvedHistory,
562
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
563
+ contextShiftStrategy: resolvedContextShift.strategy,
564
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
565
+ contextSize: context.contextSize,
566
+ tokenizer: model.tokenizer,
567
+ chatWrapper: chatWrapper,
568
+ functions,
569
+ documentFunctionParams
570
+ });
571
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
572
+ chatHistory: compressedHistory,
573
+ availableFunctions: functions,
574
+ documentFunctionParams
575
+ });
576
+ return {
577
+ history: compressedHistory,
578
+ stopGenerationTriggers,
579
+ tokens: contextText.tokenize(model.tokenizer),
580
+ newResolvedHistory: resolvedHistory,
581
+ newHistoryCompressionMetadata: metadata,
582
+ ignoreStartText: ignoreStartText ?? [],
583
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
584
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
585
+ userTextSuffix
586
+ };
587
+ }
588
+ class GenerateResponseState {
589
+ llamaChat;
590
+ chatWrapper;
591
+ history;
592
+ onToken;
593
+ signal;
594
+ stopOnAbortSignal;
595
+ maxTokens;
596
+ temperature;
597
+ minP;
598
+ topK;
599
+ topP;
600
+ grammar;
601
+ trimWhitespaceSuffix;
602
+ tokenBias;
603
+ evaluationPriority;
604
+ functions;
605
+ onFunctionCall;
606
+ documentFunctionParams;
607
+ maxParallelFunctionCalls;
608
+ contextShift;
609
+ customStopTriggers;
610
+ lastEvaluationContextWindowHistory;
611
+ minimumOverlapPercentageToPreventContextShift;
612
+ functionsEnabled;
613
+ repeatPenaltyEnabled;
614
+ resolvedContextShift;
615
+ resolvedRepeatPenalty;
616
+ lastModelResponse;
617
+ grammarEvaluationState;
618
+ functionNameGrammar;
619
+ functionsGrammar;
620
+ functionsEvaluationState;
621
+ streamRegulator = new TokenStreamRegulator();
622
+ stopGenerationDetector = new StopGenerationDetector();
623
+ customStopGenerationTriggersDetector = new StopGenerationDetector();
624
+ functionSyntaxStartDetector = new StopGenerationDetector();
625
+ disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
626
+ ignoreStartTextDetector = new StopGenerationDetector();
627
+ locksToReleaseOnValidGeneration = [];
628
+ resolvedHistory;
629
+ res = [];
630
+ pendingTokens = [];
631
+ ignoredStartTextTokens = [];
632
+ resFunctionCalls = [];
633
+ functionEvaluationMode = false;
634
+ currentFunctionCallPreviousText = LlamaText([]);
635
+ currentFunctionCallCurrentPartTokens = [];
636
+ functionEvaluationFunctionName = "";
637
+ currentFunctionCallPreviousPartLeftoverText = "";
638
+ removedStartTextToIgnore = false;
639
+ releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
640
+ generatedTokens = 0;
641
+ isFirstEvaluation = true;
642
+ initiallyEngagedFunctionMode = false;
643
+ lastContextWindowHistory;
644
+ lastHistoryCompressionMetadata;
645
+ restartEvaluationIterator = false;
646
+ // context shift loop
647
+ shouldContextShift = false;
648
+ canAvoidReloadingHistory = false;
649
+ contextWindowTokens = [];
650
+ stopGenerationTriggers = [];
651
+ ignoreStartText = [];
652
+ functionCallInitiallyEngaged = false;
653
+ disengageInitiallyEngagedFunctionCall = [];
654
+ userTextSuffix = undefined;
655
+ tokens = [];
656
+ contextWindowLastModelResponse = "";
657
+ contextWindowsRes = [];
658
+ // token evaluation loop
659
+ evaluationIterator;
660
+ currentIteration;
661
+ currentIterationReplacementToken;
662
+ currentToken;
663
+ currentTokens = [];
664
+ currentText = "";
665
+ currentQueuedTokenRelease;
666
+ constructor(llamaChat, chatWrapper, history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
667
+ this.llamaChat = llamaChat;
668
+ this.chatWrapper = chatWrapper;
669
+ this.history = history;
670
+ this.onToken = safeEventCallback(onToken);
671
+ this.signal = signal;
672
+ this.stopOnAbortSignal = stopOnAbortSignal;
673
+ this.maxTokens = maxTokens;
674
+ this.temperature = temperature;
675
+ this.minP = minP;
676
+ this.topK = topK;
677
+ this.topP = topP;
678
+ this.grammar = grammar;
679
+ this.trimWhitespaceSuffix = trimWhitespaceSuffix;
680
+ this.tokenBias = tokenBias;
681
+ this.evaluationPriority = evaluationPriority;
682
+ this.functions = functions;
683
+ this.onFunctionCall = safeEventCallback(onFunctionCall);
684
+ this.documentFunctionParams = documentFunctionParams;
685
+ this.maxParallelFunctionCalls = maxParallelFunctionCalls;
686
+ this.contextShift = contextShift;
687
+ this.customStopTriggers = customStopTriggers;
688
+ this.lastEvaluationContextWindowHistory = lastEvaluationContextWindowHistory;
689
+ this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
690
+ this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
691
+ if (this.signal?.aborted)
692
+ throw this.signal.reason;
693
+ if (this.llamaChat.disposed)
694
+ throw new DisposedError();
695
+ this.resolvedHistory = this.llamaChat.sequence.isLoadedToMemory
696
+ ? this.history.slice()
697
+ : this.history.map(removeRawFromHistoryItem);
698
+ this.resolvedContextShift = {
699
+ ...defaultContextShiftOptions,
700
+ ...removeNullFields(this.contextShift)
701
+ };
702
+ this.resolvedRepeatPenalty = repeatPenalty === false
703
+ ? { lastTokens: 0 }
704
+ : {
705
+ ...(repeatPenalty ?? {}),
706
+ lastTokens: repeatPenalty?.lastTokens ?? defaultRepeatPenaltyLastTokens
707
+ };
708
+ this.lastModelResponse = getLastTextModelResponseFromChatHistory(this.resolvedHistory);
709
+ this.repeatPenaltyEnabled = this.resolvedRepeatPenalty.lastTokens > 0;
710
+ this.grammarEvaluationState = this.grammar != null
711
+ ? new LlamaGrammarEvaluationState({ grammar: this.grammar })
712
+ : undefined;
713
+ this.functionNameGrammar = this.functionsEnabled
714
+ ? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper)
715
+ : undefined;
716
+ this.functionsGrammar = undefined;
717
+ this.functionsEvaluationState = undefined;
718
+ this.lastContextWindowHistory = this.resolvedHistory;
719
+ this.lastHistoryCompressionMetadata = this.resolvedContextShift;
720
+ if (this.customStopTriggers != null)
721
+ StopGenerationDetector.resolveStopTriggers(this.customStopTriggers, this.llamaChat.model.tokenizer)
722
+ .map((stopTrigger) => this.customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
723
+ if (this.grammar != null)
724
+ StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
725
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
726
+ if (this.functions != null && Object.keys(this.functions).length > 0)
727
+ this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
728
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
729
+ this.chatWrapper.settings.functions.call.prefix
730
+ ]), this.llamaChat.model.tokenizer));
731
+ this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
732
+ }
733
+ dispose() {
734
+ }
735
+ [Symbol.dispose]() {
736
+ this.dispose();
737
+ }
738
+ ensureLastHistoryItemIsModel() {
739
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "model")
740
+ this.resolvedHistory.push({
741
+ type: "model",
742
+ response: []
743
+ });
744
+ }
745
+ ensureLastHistoryItemIsUser() {
746
+ if (this.resolvedHistory.length === 0 || this.resolvedHistory[this.resolvedHistory.length - 1].type !== "user")
747
+ this.resolvedHistory.push({
748
+ type: "user",
749
+ text: ""
750
+ });
751
+ }
752
+ ensureNotAborted() {
753
+ if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
754
+ throw this.signal.reason;
755
+ if (this.llamaChat.disposed)
756
+ throw new DisposedError();
757
+ }
758
+ getPenaltyTokens() {
759
+ if (this.llamaChat.disposed)
760
+ throw new DisposedError();
761
+ let punishTokens = this.res.slice(-this.resolvedRepeatPenalty.lastTokens);
762
+ if (this.resolvedRepeatPenalty.punishTokensFilter != null)
763
+ punishTokens = this.resolvedRepeatPenalty.punishTokensFilter(punishTokens);
764
+ if (this.resolvedRepeatPenalty.penalizeNewLine == null || !this.resolvedRepeatPenalty.penalizeNewLine) {
765
+ const nlToken = this.llamaChat.model.tokens.nl;
766
+ if (nlToken != null)
767
+ punishTokens = punishTokens.filter(token => token !== nlToken);
768
+ }
769
+ return punishTokens;
770
+ }
771
+ getResolvedHistoryWithCurrentModelResponse() {
772
+ if (this.res.length === 0)
773
+ return this.resolvedHistory;
774
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
775
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
776
+ modelResponse = modelResponse.trimEnd();
777
+ if (modelResponse === "")
778
+ return this.resolvedHistory;
779
+ return setLastModelTextResponseInChatHistory(this.resolvedHistory, this.lastModelResponse + modelResponse);
780
+ }
781
+ removeFoundStartIgnoreTextsFromPendingTokens(forceRemove = false) {
782
+ if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
783
+ this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops)) {
784
+ this.ignoreStartTextDetector.clearInProgressStops();
785
+ this.ignoreStartTextDetector.clearTriggeredStops();
786
+ let mostExhaustiveTriggeredStops = null;
787
+ let mostExhaustiveTriggeredStopsLeftoverTokens = [];
788
+ for (let i = 0; i < this.pendingTokens.length; i++) {
789
+ this.ignoreStartTextDetector.recordGeneration({
790
+ text: this.llamaChat.model.detokenize([this.pendingTokens[i]]),
791
+ tokens: [this.pendingTokens[i]],
792
+ startNewChecks: i === 0,
793
+ triggerMustStartWithGeneration: true
794
+ });
795
+ if (this.ignoreStartTextDetector.hasTriggeredStops) {
796
+ mostExhaustiveTriggeredStops = this.ignoreStartTextDetector.getTriggeredStops();
797
+ this.ignoreStartTextDetector.clearTriggeredStops();
798
+ mostExhaustiveTriggeredStopsLeftoverTokens = this.pendingTokens.slice(i + 1);
799
+ }
800
+ else if (!this.ignoreStartTextDetector.hasInProgressStops)
801
+ break;
802
+ }
803
+ if (mostExhaustiveTriggeredStops != null) {
804
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
805
+ if (mostExhaustiveTriggeredStop != null) {
806
+ this.ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
807
+ .map((stopTrigger) => {
808
+ if (typeof stopTrigger === "string")
809
+ return this.llamaChat.model.tokenize(stopTrigger, false, "trimLeadingSpace");
810
+ else
811
+ return [stopTrigger];
812
+ })
813
+ .flat(1);
814
+ const newPendingTokens = [
815
+ ...mostExhaustiveTriggeredStop.remainingGeneration,
816
+ mostExhaustiveTriggeredStopsLeftoverTokens
817
+ ]
818
+ .map((generation) => {
819
+ if (typeof generation === "string")
820
+ return this.llamaChat.model.tokenize(generation, false, "trimLeadingSpace");
821
+ else
822
+ return generation;
823
+ })
824
+ .flat(1);
825
+ this.pendingTokens.length = 0;
826
+ this.pendingTokens.push(...newPendingTokens);
827
+ this.removedStartTextToIgnore = true;
828
+ }
829
+ }
830
+ }
831
+ }
832
+ startTokenLoop() {
833
+ this.ensureNotAborted();
834
+ this.shouldContextShift = false;
835
+ }
836
+ getContextWindowFunctionCallsTokens() {
837
+ if (this.functionEvaluationMode === false)
838
+ return [];
839
+ else if (this.functionEvaluationMode === "prefixOrDisengage")
840
+ return [
841
+ ...LlamaText(this.currentFunctionCallPreviousText).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
842
+ ...this.currentFunctionCallCurrentPartTokens
843
+ ];
844
+ const text = [];
845
+ if (this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix != null)
846
+ text.push(this.chatWrapper.settings.functions.parallelism.call.sectionPrefix);
847
+ for (let i = 0; i < this.resFunctionCalls.length; i++) {
848
+ const call = this.resFunctionCalls[i];
849
+ if (i > 0)
850
+ text.push(this.chatWrapper.settings.functions?.parallelism?.call?.betweenCalls ?? "");
851
+ text.push(call.raw);
852
+ }
853
+ text.push(this.currentFunctionCallPreviousText);
854
+ return [
855
+ ...LlamaText(text).tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"),
856
+ ...this.currentFunctionCallCurrentPartTokens
857
+ ];
858
+ }
859
+ async loadContextWindow(resolvedHistory, endWithUserText = false, avoidReloadingHistory = false) {
860
+ const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
861
+ const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
862
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
863
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
864
+ resolvedHistory: resolvedHistory,
865
+ resolvedContextShift: this.resolvedContextShift,
866
+ lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
867
+ pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
868
+ isFirstEvaluation: this.isFirstEvaluation,
869
+ chatWrapper: this.chatWrapper,
870
+ lastEvaluationContextWindowHistory: this.lastEvaluationContextWindowHistory,
871
+ minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
872
+ sequence: this.llamaChat.sequence,
873
+ minFreeContextTokens: 1,
874
+ functions: this.functionsEnabled ? this.functions : undefined,
875
+ documentFunctionParams: this.documentFunctionParams,
876
+ endWithUserText
877
+ });
878
+ this.ensureNotAborted();
879
+ this.contextWindowTokens = contextWindowTokens;
880
+ this.stopGenerationTriggers = stopGenerationTriggers;
881
+ this.ignoreStartText = ignoreStartText;
882
+ this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
883
+ this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
884
+ this.userTextSuffix = userTextSuffix;
885
+ this.resolvedHistory = newResolvedHistory;
886
+ this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
887
+ this.lastContextWindowHistory = contextWindowHistory;
888
+ this.contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
889
+ this.contextWindowsRes = [];
890
+ this.canAvoidReloadingHistory = true;
891
+ }
892
+ this.tokens = [
893
+ ...this.contextWindowTokens,
894
+ ...this.ignoredStartTextTokens,
895
+ ...this.pendingTokens,
896
+ ...queuedChunkTokens,
897
+ ...functionCallsTokens
898
+ ];
899
+ if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
900
+ return await this.loadContextWindow(resolvedHistory, endWithUserText, false);
901
+ return {
902
+ userTextSuffix: this.userTextSuffix
903
+ };
904
+ }
905
+ addIgnoreStartTextTriggersFromChatWrapper() {
906
+ StopGenerationDetector.resolveStopTriggers(this.ignoreStartText, this.llamaChat.model.tokenizer)
907
+ .map((stopTrigger) => this.ignoreStartTextDetector.addStopTrigger(stopTrigger));
908
+ }
909
+ addStopGenerationTriggersFromChatWrapper() {
910
+ StopGenerationDetector.resolveStopTriggers(this.stopGenerationTriggers, this.llamaChat.model.tokenizer)
911
+ .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
912
+ }
913
+ initFunctions() {
914
+ this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
915
+ if (this.initiallyEngagedFunctionMode) {
916
+ StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
917
+ .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
918
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggers) {
919
+ this.functionEvaluationMode = "prefixOrDisengage";
920
+ this.functionsGrammar = undefined;
921
+ this.functionsEvaluationState = undefined;
922
+ }
923
+ else {
924
+ this.functionEvaluationMode = "functionName";
925
+ }
926
+ this.restartEvaluationIterator = true;
927
+ }
928
+ }
929
+ async enterFunctionCallingLoop(loadContextWindow) {
930
+ if (!this.functionsEnabled) {
931
+ this.functionEvaluationMode = false;
932
+ return undefined;
933
+ }
934
+ // eslint-disable-next-line no-constant-condition
935
+ while (true) {
936
+ if (this.functionEvaluationMode === "prefixOrDisengage") {
937
+ this.functionsGrammar = undefined;
938
+ this.functionsEvaluationState = undefined;
939
+ this.currentFunctionCallPreviousText = LlamaText([]);
940
+ this.currentFunctionCallCurrentPartTokens.length = 0;
941
+ const prefixTokens = LlamaText(this.chatWrapper.settings.functions.call.prefix)
942
+ .tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
943
+ const prefixDetector = new StopGenerationDetector();
944
+ const afterPrefixLeftoverTokens = [];
945
+ prefixDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(this.chatWrapper.settings.functions.call.prefix), this.llamaChat.model.tokenizer));
946
+ for (const prefixToken of prefixTokens) {
947
+ const tokens = [prefixToken];
948
+ const text = this.llamaChat.model.detokenize(tokens);
949
+ const disregardedPossibilities = this.disengageInitiallyEngagedFunctionMode
950
+ .getDisregardedPossibilitiesCountForAGeneration({
951
+ text,
952
+ tokens,
953
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 0
954
+ });
955
+ if (disregardedPossibilities > 0)
956
+ break;
957
+ this.currentFunctionCallCurrentPartTokens.push(prefixToken);
958
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
959
+ text: text,
960
+ tokens: tokens,
961
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
962
+ triggerMustStartWithGeneration: true
963
+ });
964
+ if (prefixDetector.hasTriggeredStops)
965
+ afterPrefixLeftoverTokens.push(prefixToken);
966
+ else
967
+ prefixDetector.recordGeneration({
968
+ text: text,
969
+ tokens: tokens,
970
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
971
+ triggerMustStartWithGeneration: true
972
+ });
973
+ }
974
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
975
+ const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
976
+ if (stopGenerationTriggerRes != null)
977
+ return stopGenerationTriggerRes;
978
+ this.currentFunctionCallCurrentPartTokens.push(token);
979
+ this.disengageInitiallyEngagedFunctionMode.recordGeneration({
980
+ text: this.currentText,
981
+ tokens: this.currentTokens,
982
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
983
+ triggerMustStartWithGeneration: true
984
+ });
985
+ if (prefixDetector.hasTriggeredStops)
986
+ afterPrefixLeftoverTokens.push(token);
987
+ else
988
+ prefixDetector.recordGeneration({
989
+ text: this.currentText,
990
+ tokens: this.currentTokens,
991
+ startNewChecks: this.currentFunctionCallCurrentPartTokens.length === 1,
992
+ triggerMustStartWithGeneration: true
993
+ });
994
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops ||
995
+ !this.disengageInitiallyEngagedFunctionMode.hasInProgressStops)
996
+ break;
997
+ }
998
+ const abortRes = this.handleAbortTrigger("model");
999
+ if (abortRes != null)
1000
+ return abortRes;
1001
+ if (this.disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
1002
+ for (const token of this.currentFunctionCallCurrentPartTokens) {
1003
+ this.currentToken = token;
1004
+ this.currentTokens = [this.currentToken];
1005
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens);
1006
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1007
+ tokens: this.currentTokens,
1008
+ text: this.currentText
1009
+ });
1010
+ this.recordStopGenerationEvaluation();
1011
+ }
1012
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1013
+ this.functionEvaluationMode = false;
1014
+ return undefined;
1015
+ }
1016
+ if (prefixDetector.hasTriggeredStops) {
1017
+ const triggeredStops = prefixDetector.getTriggeredStops();
1018
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1019
+ this.currentFunctionCallPreviousPartLeftoverText = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, this.llamaChat.model.detokenize) + this.llamaChat.model.detokenize(afterPrefixLeftoverTokens);
1020
+ }
1021
+ else
1022
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1023
+ this.functionEvaluationMode = "functionName";
1024
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1025
+ continue;
1026
+ }
1027
+ else if (this.functionEvaluationMode === "functionName") {
1028
+ const functionNameGenerationDoneDetector = new StopGenerationDetector();
1029
+ this.stopGenerationDetector.clearInProgressStops();
1030
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1031
+ this.currentFunctionCallPreviousText = LlamaText(this.chatWrapper.settings.functions.call.prefix);
1032
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1033
+ const functionNameGrammar = this.functionNameGrammar ?? new FunctionCallNameGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper);
1034
+ this.functionsGrammar = functionNameGrammar;
1035
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1036
+ grammar: this.functionsGrammar
1037
+ });
1038
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1039
+ .map((stopTrigger) => functionNameGenerationDoneDetector.addStopTrigger(stopTrigger));
1040
+ if (this.currentFunctionCallPreviousPartLeftoverText !== "") {
1041
+ const validFunctionNames = Object.keys(this.functions);
1042
+ const hasAnyFunctionStartWithLeftover = validFunctionNames.some((functionName) => functionName.startsWith(this.currentFunctionCallPreviousPartLeftoverText));
1043
+ if (hasAnyFunctionStartWithLeftover) {
1044
+ const leftoverTokens = this.llamaChat.model.tokenize(this.currentFunctionCallPreviousPartLeftoverText, false, "trimLeadingSpace");
1045
+ this.currentFunctionCallPreviousPartLeftoverText = "";
1046
+ for (const leftoverToken of leftoverTokens) {
1047
+ const canBeNextToken = this.llamaChat.context._canBeNextTokenForGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1048
+ if (!canBeNextToken)
1049
+ break;
1050
+ this.llamaChat.context._acceptTokenOnGrammarEvaluationState(this.functionsEvaluationState, leftoverToken);
1051
+ this.currentFunctionCallCurrentPartTokens.push(leftoverToken);
1052
+ functionNameGenerationDoneDetector.recordGeneration({
1053
+ text: this.llamaChat.model.detokenize([leftoverToken]),
1054
+ tokens: [leftoverToken]
1055
+ });
1056
+ }
1057
+ }
1058
+ }
1059
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1060
+ this.currentFunctionCallCurrentPartTokens.push(token);
1061
+ functionNameGenerationDoneDetector.recordGeneration({
1062
+ text: this.currentText,
1063
+ tokens: this.currentTokens
1064
+ });
1065
+ if (functionNameGenerationDoneDetector.hasTriggeredStops)
1066
+ break;
1067
+ }
1068
+ const abortRes = this.handleAbortTrigger("model");
1069
+ if (abortRes != null)
1070
+ return abortRes;
1071
+ const functionCallNameText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1072
+ const functionName = functionNameGrammar.parseFunctionName(functionCallNameText);
1073
+ this.functionEvaluationFunctionName = functionName;
1074
+ this.functionEvaluationMode = "params";
1075
+ continue;
1076
+ }
1077
+ else if (this.functionEvaluationMode === "params") {
1078
+ this.currentFunctionCallPreviousText = LlamaText([
1079
+ this.chatWrapper.settings.functions.call.prefix,
1080
+ this.functionEvaluationFunctionName,
1081
+ this.chatWrapper.settings.functions.call.paramsPrefix
1082
+ ]);
1083
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1084
+ let params = undefined;
1085
+ let paramsText = "";
1086
+ const functionDefinition = this.functions[this.functionEvaluationFunctionName];
1087
+ if (functionDefinition == null)
1088
+ throw new Error(`Function "${this.functionEvaluationFunctionName}" is not provided in the functions object`);
1089
+ else if (functionDefinition.params == null) {
1090
+ params = undefined;
1091
+ paramsText = "";
1092
+ }
1093
+ else {
1094
+ const functionParamsGenerationDoneDetector = new StopGenerationDetector();
1095
+ const functionParamsGrammar = new FunctionCallParamsGrammar(this.llamaChat.model._llama, this.functions, this.chatWrapper, this.functionEvaluationFunctionName, functionDefinition.params);
1096
+ this.functionsGrammar = functionParamsGrammar;
1097
+ this.functionsEvaluationState = new LlamaGrammarEvaluationState({
1098
+ grammar: this.functionsGrammar
1099
+ });
1100
+ StopGenerationDetector.resolveStopTriggers(this.functionsGrammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
1101
+ .map((stopTrigger) => functionParamsGenerationDoneDetector.addStopTrigger(stopTrigger));
1102
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1103
+ this.currentFunctionCallCurrentPartTokens.push(token);
1104
+ functionParamsGenerationDoneDetector.recordGeneration({
1105
+ text: this.currentText,
1106
+ tokens: this.currentTokens
1107
+ });
1108
+ if (functionParamsGenerationDoneDetector.hasTriggeredStops)
1109
+ break;
1110
+ }
1111
+ const abortRes = this.handleAbortTrigger("model");
1112
+ if (abortRes != null)
1113
+ return abortRes;
1114
+ const functionCallParamsText = this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens);
1115
+ const parsedFunctionParams = functionParamsGrammar.parseParams(functionCallParamsText);
1116
+ params = parsedFunctionParams.params;
1117
+ paramsText = parsedFunctionParams.raw;
1118
+ }
1119
+ const functionCallText = LlamaText([
1120
+ this.chatWrapper.settings.functions.call.prefix,
1121
+ this.functionEvaluationFunctionName,
1122
+ this.chatWrapper.settings.functions.call.paramsPrefix,
1123
+ paramsText,
1124
+ this.chatWrapper.settings.functions.call.suffix
1125
+ ]);
1126
+ this.resFunctionCalls.push({
1127
+ functionName: this.functionEvaluationFunctionName,
1128
+ params,
1129
+ raw: functionCallText
1130
+ });
1131
+ this.onFunctionCall?.({
1132
+ functionName: this.functionEvaluationFunctionName,
1133
+ params: structuredClone(params),
1134
+ raw: functionCallText.toJSON()
1135
+ });
1136
+ this.currentFunctionCallPreviousText = LlamaText([]);
1137
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1138
+ this.functionEvaluationFunctionName = "";
1139
+ if (this.chatWrapper.settings.functions.parallelism == null || (this.maxParallelFunctionCalls != null && this.maxParallelFunctionCalls <= this.resFunctionCalls.length)) {
1140
+ this.functionEvaluationMode = false;
1141
+ return this.returnFunctionCallResults();
1142
+ }
1143
+ this.functionEvaluationMode = "sectionSuffixOrBetweenCalls";
1144
+ continue;
1145
+ }
1146
+ else if (this.functionEvaluationMode === "sectionSuffixOrBetweenCalls") {
1147
+ const sectionSuffixDetector = new StopGenerationDetector();
1148
+ let isFirstToken = true;
1149
+ this.functionsGrammar = undefined;
1150
+ this.functionsEvaluationState = undefined;
1151
+ this.currentFunctionCallPreviousText = LlamaText([]);
1152
+ this.currentFunctionCallCurrentPartTokens.length = 0;
1153
+ StopGenerationDetector.resolveStopTriggers([
1154
+ ...(this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix != null
1155
+ ? [this.chatWrapper.settings.functions.parallelism?.call?.sectionSuffix]
1156
+ : []),
1157
+ LlamaText(new SpecialToken("EOS")),
1158
+ LlamaText(new SpecialToken("EOT"))
1159
+ ], this.llamaChat.model.tokenizer)
1160
+ .map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
1161
+ for await (const token of this.evaluateWithContextShift(loadContextWindow)) {
1162
+ this.currentFunctionCallCurrentPartTokens.push(token);
1163
+ sectionSuffixDetector.recordGeneration({
1164
+ text: this.currentText,
1165
+ tokens: this.currentTokens,
1166
+ startNewChecks: isFirstToken,
1167
+ triggerMustStartWithGeneration: true
1168
+ });
1169
+ isFirstToken = false;
1170
+ if (sectionSuffixDetector.hasTriggeredStops || !sectionSuffixDetector.hasInProgressStops)
1171
+ break;
1172
+ }
1173
+ const abortRes = this.handleAbortTrigger("model");
1174
+ if (abortRes != null)
1175
+ return abortRes;
1176
+ if (sectionSuffixDetector.hasTriggeredStops) {
1177
+ this.functionEvaluationMode = false;
1178
+ return this.returnFunctionCallResults();
1179
+ }
1180
+ this.functionEvaluationMode = "functionName";
1181
+ this.initiallyEngagedFunctionMode = false;
1182
+ continue;
1183
+ }
1184
+ break;
1185
+ }
1186
+ return undefined;
1187
+ }
1188
+ releasePartiallyFreeTokensBeforeFunctionCallStart() {
1189
+ if (this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax)
1190
+ return;
1191
+ this.stopGenerationDetector.clearInProgressStops();
1192
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1193
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1194
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1195
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1196
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1197
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1198
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1199
+ if (this.pendingTokens.length > 0)
1200
+ this.onToken?.(this.pendingTokens.slice());
1201
+ this.res.push(...this.pendingTokens);
1202
+ this.contextWindowsRes.push(...this.pendingTokens);
1203
+ this.pendingTokens.length = 0;
1204
+ this.streamRegulator.clearQueue();
1205
+ this.releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = true;
1206
+ }
1207
+ returnFunctionCallResults() {
1208
+ if (this.resFunctionCalls.length > 0) {
1209
+ this.releasePartiallyFreeTokensBeforeFunctionCallStart();
1210
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1211
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1212
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1213
+ modelResponse = modelResponse.trimEnd();
1214
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1215
+ }
1216
+ return {
1217
+ response: modelResponse,
1218
+ lastEvaluation: {
1219
+ contextWindow: setLastTextInChatHistory("model", this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1220
+ cleanHistory: setLastTextInChatHistory("model", this.resolvedHistory, this.lastModelResponse + modelResponse),
1221
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1222
+ },
1223
+ functionCalls: this.resFunctionCalls.map((functionCall) => {
1224
+ return {
1225
+ functionName: functionCall.functionName,
1226
+ params: functionCall.params,
1227
+ raw: functionCall.raw.toJSON()
1228
+ };
1229
+ }), // prevent infinite TS type instantiation
1230
+ metadata: {
1231
+ stopReason: "functionCalls"
1232
+ }
1233
+ };
1234
+ }
1235
+ return undefined;
1236
+ }
1237
+ async *evaluateWithContextShift(loadContextWindow) {
1238
+ while (true) {
1239
+ this.startTokenLoop();
1240
+ await loadContextWindow();
1241
+ await this.alignCurrentSequenceStateWithCurrentTokens();
1242
+ await this.createNewEvaluationIterator();
1243
+ while (await this.iterateEvaluation()) {
1244
+ if (this.currentToken == null)
1245
+ break;
1246
+ yield this.currentToken;
1247
+ if (this.shouldAbort)
1248
+ return;
1249
+ if (this.updateShouldContextShift())
1250
+ break;
1251
+ if (this.restartEvaluationIterator) {
1252
+ await this.createNewEvaluationIterator();
1253
+ }
1254
+ }
1255
+ this.isFirstEvaluation = false;
1256
+ if (this.shouldContextShift)
1257
+ continue;
1258
+ break;
1259
+ }
1260
+ throw new Error("The context size is too small to generate a response");
1261
+ }
1262
+ async alignCurrentSequenceStateWithCurrentTokens() {
1263
+ let { firstDifferentIndex } = this.llamaChat.sequence.compareContextTokens(this.tokens);
1264
+ // we need to decode at least one token to generate a response
1265
+ if (firstDifferentIndex === this.tokens.length && firstDifferentIndex > 0)
1266
+ firstDifferentIndex -= 1;
1267
+ this.tokens.splice(0, firstDifferentIndex);
1268
+ if (firstDifferentIndex < this.llamaChat.sequence.nextTokenIndex) {
1269
+ await this.llamaChat.sequence.eraseContextTokenRanges([{
1270
+ start: firstDifferentIndex,
1271
+ end: this.llamaChat.sequence.nextTokenIndex
1272
+ }]);
1273
+ this.ensureNotAborted();
1274
+ }
1275
+ }
1276
+ async evaluateWithoutGeneratingNewTokens() {
1277
+ if (this.evaluationIterator != null)
1278
+ await this.evaluationIterator.return();
1279
+ await this.llamaChat.sequence.evaluateWithoutGeneratingNewTokens(this.tokens, removeNullFields({
1280
+ evaluationPriority: this.evaluationPriority
1281
+ }));
1282
+ }
1283
+ async createNewEvaluationIterator() {
1284
+ if (this.evaluationIterator != null)
1285
+ await this.evaluationIterator.return();
1286
+ this.currentIterationReplacementToken = undefined;
1287
+ this.restartEvaluationIterator = false;
1288
+ this.evaluationIterator = this.llamaChat.sequence.evaluate(this.tokens, removeNullFields({
1289
+ temperature: this.temperature,
1290
+ minP: this.minP,
1291
+ topK: this.topK,
1292
+ topP: this.topP,
1293
+ grammarEvaluationState: () => {
1294
+ if (this.functionEvaluationMode !== false)
1295
+ return this.functionsEvaluationState;
1296
+ return this.grammarEvaluationState;
1297
+ },
1298
+ repeatPenalty: !this.repeatPenaltyEnabled ? undefined : {
1299
+ punishTokens: this.getPenaltyTokens,
1300
+ penalty: this.resolvedRepeatPenalty.penalty,
1301
+ frequencyPenalty: this.resolvedRepeatPenalty.frequencyPenalty,
1302
+ presencePenalty: this.resolvedRepeatPenalty.presencePenalty
1303
+ },
1304
+ tokenBias: this.tokenBias,
1305
+ evaluationPriority: this.evaluationPriority,
1306
+ yieldEogToken: true
1307
+ }));
1308
+ }
1309
+ async iterateEvaluation() {
1310
+ this.currentIteration = await this.evaluationIterator?.next(this.currentIterationReplacementToken);
1311
+ this.currentIterationReplacementToken = undefined;
1312
+ this.ensureNotAborted();
1313
+ this.generatedTokens++;
1314
+ if (this.currentIteration != null && this.currentIteration?.done !== true) {
1315
+ this.currentToken = this.currentIteration.value;
1316
+ this.currentTokens = [this.currentToken];
1317
+ this.currentText = this.llamaChat.model.detokenize(this.currentTokens);
1318
+ if (this.functionEvaluationMode === false)
1319
+ this.currentQueuedTokenRelease = this.streamRegulator.addChunk({
1320
+ tokens: this.currentTokens,
1321
+ text: this.currentText
1322
+ });
1323
+ else
1324
+ this.currentQueuedTokenRelease = undefined;
1325
+ return true;
1326
+ }
1327
+ return false;
1328
+ }
1329
+ waitOnPartialCharactersOrWhiteSpaceTokens() {
1330
+ if (this.currentText === UNKNOWN_UNICODE_CHAR || ((this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) && this.currentText?.trim() === "")) {
1331
+ if (this.currentQueuedTokenRelease != null)
1332
+ this.locksToReleaseOnValidGeneration.push(this.currentQueuedTokenRelease.createTextIndexLock(0));
1333
+ }
1334
+ else {
1335
+ while (this.locksToReleaseOnValidGeneration.length > 0)
1336
+ this.locksToReleaseOnValidGeneration.shift().dispose();
1337
+ }
1338
+ }
1339
+ detectAndHandleFunctionStartSyntax() {
1340
+ this.functionSyntaxStartDetector.recordGeneration({
1341
+ text: this.currentText,
1342
+ tokens: this.currentTokens,
1343
+ queuedTokenRelease: this.currentQueuedTokenRelease
1344
+ });
1345
+ if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1346
+ this.functionSyntaxStartDetector.hasTriggeredStops) {
1347
+ this.functionEvaluationMode = "functionName";
1348
+ this.currentQueuedTokenRelease.createTextIndexLock(0);
1349
+ this.stopGenerationDetector.clearTriggeredStops();
1350
+ this.stopGenerationDetector.clearInProgressStops();
1351
+ this.customStopGenerationTriggersDetector.clearTriggeredStops();
1352
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1353
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1354
+ const triggeredStops = this.functionSyntaxStartDetector.getTriggeredStops();
1355
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1356
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1357
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1358
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1359
+ const remainingTextAfterStop = StopGenerationDetector.detokenizeRemainingGeneration(firstRemainingGenerationAfterStop, this.llamaChat.model.detokenize);
1360
+ this.currentFunctionCallPreviousPartLeftoverText = remainingTextAfterStop;
1361
+ }
1362
+ }
1363
+ recordStopGenerationEvaluation() {
1364
+ this.stopGenerationDetector.recordGeneration({
1365
+ text: this.currentText,
1366
+ tokens: this.currentTokens,
1367
+ queuedTokenRelease: this.currentQueuedTokenRelease
1368
+ });
1369
+ this.customStopGenerationTriggersDetector.recordGeneration({
1370
+ text: this.currentText,
1371
+ tokens: this.currentTokens,
1372
+ queuedTokenRelease: this.currentQueuedTokenRelease
1373
+ });
1374
+ }
1375
+ popStreamRegulatorFreeTokens() {
1376
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1377
+ }
1378
+ handleStopGenerationTrigger(lastHistoryItemType) {
1379
+ if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1380
+ this.llamaChat.model.isEogToken(this.currentToken)) {
1381
+ this.stopGenerationDetector.clearInProgressStops();
1382
+ this.customStopGenerationTriggersDetector.clearInProgressStops();
1383
+ this.pendingTokens.push(...this.streamRegulator.popFreeChunkTokens());
1384
+ const triggeredStops = this.stopGenerationDetector.hasTriggeredStops
1385
+ ? this.stopGenerationDetector.getTriggeredStops()
1386
+ : this.customStopGenerationTriggersDetector.getTriggeredStops();
1387
+ const partiallyFreeTokens = this.streamRegulator.getPartiallyFreeChunk(this.llamaChat.model.tokenizer);
1388
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, this.llamaChat.model.tokenizer);
1389
+ this.pendingTokens.push(...queuedTokensBeforeStopTrigger);
1390
+ const firstRemainingGenerationAfterStop = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggeredStops);
1391
+ this.removeFoundStartIgnoreTextsFromPendingTokens(true);
1392
+ if (this.pendingTokens.length > 0)
1393
+ this.onToken?.(this.pendingTokens.slice());
1394
+ this.res.push(...this.pendingTokens);
1395
+ this.contextWindowsRes.push(...this.pendingTokens);
1396
+ this.pendingTokens.length = 0;
1397
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1398
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1399
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1400
+ modelResponse = modelResponse.trimEnd();
1401
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1402
+ }
1403
+ const lastEvaluation = {
1404
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1405
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1406
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1407
+ };
1408
+ const isEogToken = this.llamaChat.model.isEogToken(this.currentToken);
1409
+ if (isEogToken || this.stopGenerationDetector.hasTriggeredStops) {
1410
+ return {
1411
+ response: modelResponse,
1412
+ lastEvaluation,
1413
+ metadata: {
1414
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1415
+ stopReason: isEogToken
1416
+ ? "eogToken"
1417
+ : "stopGenerationTrigger"
1418
+ }
1419
+ };
1420
+ }
1421
+ return {
1422
+ response: modelResponse,
1423
+ lastEvaluation,
1424
+ metadata: {
1425
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
1426
+ stopReason: "customStopTrigger",
1427
+ customStopTrigger: triggeredStops[0].stopTrigger
1428
+ }
1429
+ };
1430
+ }
1431
+ return undefined;
1432
+ }
1433
+ spliceIgnoreStartTextDetectedTokens() {
1434
+ if (this.res.length === 0) {
1435
+ this.ignoreStartTextDetector.clearInProgressStops();
1436
+ this.ignoreStartTextDetector.clearTriggeredStops();
1437
+ this.ignoreStartTextDetector.recordGeneration({
1438
+ text: this.llamaChat.model.detokenize(this.pendingTokens),
1439
+ tokens: this.pendingTokens
1440
+ });
1441
+ }
1442
+ }
1443
+ isMaxTokensTriggered() {
1444
+ return this.maxTokens != null && this.maxTokens > 0 && this.generatedTokens >= this.maxTokens;
1445
+ }
1446
+ moveFreePendingTokensToRes(removeFoundStartIgnoreTextsFromPendingTokens = true) {
1447
+ if (this.pendingTokens.length > 0 && (this.isMaxTokensTriggered() || !this.ignoreStartTextDetector.hasInProgressStops)) {
1448
+ if (removeFoundStartIgnoreTextsFromPendingTokens)
1449
+ this.removeFoundStartIgnoreTextsFromPendingTokens();
1450
+ if (this.pendingTokens.length > 0) {
1451
+ this.onToken?.(this.pendingTokens.slice());
1452
+ this.res.push(...this.pendingTokens);
1453
+ this.contextWindowsRes.push(...this.pendingTokens);
1454
+ this.pendingTokens.length = 0;
1455
+ }
1456
+ }
1457
+ }
1458
+ handleMaxTokensTrigger(lastHistoryItemType) {
1459
+ if (this.isMaxTokensTriggered()) {
1460
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1461
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1462
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1463
+ modelResponse = modelResponse.trimEnd();
1464
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1465
+ }
1466
+ return {
1467
+ response: modelResponse,
1468
+ lastEvaluation: {
1469
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1470
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1471
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1472
+ },
1473
+ metadata: {
1474
+ stopReason: "maxTokens"
1475
+ }
1476
+ };
1477
+ }
1478
+ return undefined;
1479
+ }
1480
+ updateShouldContextShift() {
1481
+ this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1482
+ return this.shouldContextShift;
1483
+ }
1484
+ get shouldAbort() {
1485
+ return !!(this.signal?.aborted && this.stopOnAbortSignal);
1486
+ }
1487
+ handleAbortTrigger(lastHistoryItemType) {
1488
+ if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
1489
+ if (this.res.length === 0)
1490
+ throw this.signal.reason;
1491
+ let modelResponse = this.llamaChat.model.detokenize(this.res);
1492
+ let contextWindowModelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
1493
+ if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix) {
1494
+ modelResponse = modelResponse.trimEnd();
1495
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
1496
+ }
1497
+ return {
1498
+ response: modelResponse,
1499
+ lastEvaluation: {
1500
+ contextWindow: setLastTextInChatHistory(lastHistoryItemType, this.lastContextWindowHistory, this.contextWindowLastModelResponse + contextWindowModelResponse),
1501
+ cleanHistory: setLastTextInChatHistory(lastHistoryItemType, this.resolvedHistory, this.lastModelResponse + modelResponse),
1502
+ contextShiftMetadata: this.lastHistoryCompressionMetadata
1503
+ },
1504
+ metadata: {
1505
+ stopReason: "abort"
1506
+ }
1507
+ };
1508
+ }
1509
+ return undefined;
1510
+ }
1511
+ }
1512
+ //# sourceMappingURL=LlamaChat.js.map