node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/README.md +2 -0
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +14 -4
  6. package/dist/bindings/AddonTypes.js +2 -0
  7. package/dist/bindings/AddonTypes.js.map +1 -0
  8. package/dist/bindings/Llama.d.ts +23 -0
  9. package/dist/bindings/Llama.js +225 -0
  10. package/dist/bindings/Llama.js.map +1 -0
  11. package/dist/bindings/getLlama.d.ts +103 -0
  12. package/dist/bindings/getLlama.js +228 -0
  13. package/dist/bindings/getLlama.js.map +1 -0
  14. package/dist/bindings/types.d.ts +33 -0
  15. package/dist/bindings/types.js +30 -0
  16. package/dist/bindings/types.js.map +1 -0
  17. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  18. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  19. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  20. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  21. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  22. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  23. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  24. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  25. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
  29. package/dist/bindings/utils/compileLLamaCpp.js +157 -0
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  31. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  32. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
  33. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  34. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  35. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  36. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  37. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  38. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  39. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  40. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  41. package/dist/bindings/utils/getPlatform.js +15 -0
  42. package/dist/bindings/utils/getPlatform.js.map +1 -0
  43. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  44. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  45. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  46. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  47. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
  48. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  49. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
  50. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
  51. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
  52. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  53. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  54. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  55. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  56. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  57. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  58. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
  59. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  60. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  61. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  62. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  63. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  64. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  65. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  66. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  67. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  68. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  69. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  70. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  71. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  72. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  73. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  74. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  75. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  76. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
  77. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
  78. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  79. package/dist/cli/cli.js +8 -0
  80. package/dist/cli/cli.js.map +1 -1
  81. package/dist/cli/commands/BuildCommand.d.ts +2 -1
  82. package/dist/cli/commands/BuildCommand.js +50 -10
  83. package/dist/cli/commands/BuildCommand.js.map +1 -1
  84. package/dist/cli/commands/ChatCommand.d.ts +11 -4
  85. package/dist/cli/commands/ChatCommand.js +138 -64
  86. package/dist/cli/commands/ChatCommand.js.map +1 -1
  87. package/dist/cli/commands/ClearCommand.js +4 -6
  88. package/dist/cli/commands/ClearCommand.js.map +1 -1
  89. package/dist/cli/commands/CompleteCommand.d.ts +25 -0
  90. package/dist/cli/commands/CompleteCommand.js +278 -0
  91. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  92. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  93. package/dist/cli/commands/DebugCommand.js +59 -0
  94. package/dist/cli/commands/DebugCommand.js.map +1 -0
  95. package/dist/cli/commands/DownloadCommand.d.ts +2 -1
  96. package/dist/cli/commands/DownloadCommand.js +47 -40
  97. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  98. package/dist/cli/commands/InfillCommand.d.ts +27 -0
  99. package/dist/cli/commands/InfillCommand.js +316 -0
  100. package/dist/cli/commands/InfillCommand.js.map +1 -0
  101. package/dist/cli/commands/OnPostInstallCommand.js +7 -10
  102. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  103. package/dist/config.d.ts +10 -3
  104. package/dist/config.js +18 -7
  105. package/dist/config.js.map +1 -1
  106. package/dist/consts.d.ts +1 -0
  107. package/dist/consts.js +2 -0
  108. package/dist/consts.js.map +1 -0
  109. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +154 -0
  110. package/dist/evaluator/LlamaChat/LlamaChat.js +684 -0
  111. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  112. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
  113. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
  114. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  115. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  116. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
  117. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  118. package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
  119. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
  120. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  121. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  122. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  123. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  124. package/dist/evaluator/LlamaCompletion.d.ts +148 -0
  125. package/dist/evaluator/LlamaCompletion.js +402 -0
  126. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  127. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
  128. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +77 -107
  129. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  130. package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
  131. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  132. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  133. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  134. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  135. package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
  136. package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
  137. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  138. package/dist/evaluator/LlamaGrammar.d.ts +30 -0
  139. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
  140. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  141. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  142. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  143. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  144. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
  145. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  146. package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +24 -6
  147. package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +51 -12
  148. package/dist/evaluator/LlamaModel.js.map +1 -0
  149. package/dist/index.d.ts +30 -17
  150. package/dist/index.js +29 -15
  151. package/dist/index.js.map +1 -1
  152. package/dist/state.d.ts +2 -0
  153. package/dist/state.js +7 -0
  154. package/dist/state.js.map +1 -1
  155. package/dist/types.d.ts +72 -3
  156. package/dist/types.js +5 -1
  157. package/dist/types.js.map +1 -1
  158. package/dist/utils/LlamaText.d.ts +42 -0
  159. package/dist/utils/LlamaText.js +207 -0
  160. package/dist/utils/LlamaText.js.map +1 -0
  161. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  162. package/dist/utils/StopGenerationDetector.js +205 -0
  163. package/dist/utils/StopGenerationDetector.js.map +1 -0
  164. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  165. package/dist/utils/TokenStreamRegulator.js +96 -0
  166. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  167. package/dist/utils/UnsupportedError.d.ts +2 -0
  168. package/dist/utils/UnsupportedError.js +7 -0
  169. package/dist/utils/UnsupportedError.js.map +1 -0
  170. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  171. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  172. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  173. package/dist/utils/cmake.js +16 -11
  174. package/dist/utils/cmake.js.map +1 -1
  175. package/dist/utils/compareTokens.d.ts +2 -0
  176. package/dist/utils/compareTokens.js +4 -0
  177. package/dist/utils/compareTokens.js.map +1 -0
  178. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  179. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  180. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  181. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  182. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  183. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  184. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  185. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  186. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  187. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  188. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  189. package/dist/utils/gbnfJson/types.d.ts +1 -1
  190. package/dist/utils/gbnfJson/types.js.map +1 -1
  191. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  192. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  193. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  194. package/dist/utils/getConsoleLogPrefix.js +9 -0
  195. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  196. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  197. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  198. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  199. package/dist/utils/getGrammarsFolder.js +8 -7
  200. package/dist/utils/getGrammarsFolder.js.map +1 -1
  201. package/dist/utils/getModuleVersion.d.ts +1 -0
  202. package/dist/utils/getModuleVersion.js +13 -0
  203. package/dist/utils/getModuleVersion.js.map +1 -0
  204. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  205. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  206. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  207. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  208. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  209. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  210. package/dist/utils/gitReleaseBundles.js +6 -5
  211. package/dist/utils/gitReleaseBundles.js.map +1 -1
  212. package/dist/utils/hashString.d.ts +1 -0
  213. package/dist/utils/hashString.js +8 -0
  214. package/dist/utils/hashString.js.map +1 -0
  215. package/dist/utils/isLockfileActive.d.ts +4 -0
  216. package/dist/utils/isLockfileActive.js +12 -0
  217. package/dist/utils/isLockfileActive.js.map +1 -0
  218. package/dist/utils/parseModelTypeDescription.d.ts +1 -1
  219. package/dist/utils/prettyPrintObject.d.ts +1 -0
  220. package/dist/utils/prettyPrintObject.js +40 -0
  221. package/dist/utils/prettyPrintObject.js.map +1 -0
  222. package/dist/utils/removeNullFields.d.ts +1 -0
  223. package/dist/utils/removeNullFields.js +8 -0
  224. package/dist/utils/removeNullFields.js.map +1 -1
  225. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  226. package/dist/utils/resolveChatWrapper.js +16 -0
  227. package/dist/utils/resolveChatWrapper.js.map +1 -0
  228. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  229. package/dist/utils/resolveGithubRelease.js +36 -0
  230. package/dist/utils/resolveGithubRelease.js.map +1 -0
  231. package/dist/utils/spawnCommand.d.ts +1 -1
  232. package/dist/utils/spawnCommand.js +4 -2
  233. package/dist/utils/spawnCommand.js.map +1 -1
  234. package/dist/utils/tokenizeInput.d.ts +3 -0
  235. package/dist/utils/tokenizeInput.js +9 -0
  236. package/dist/utils/tokenizeInput.js.map +1 -0
  237. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  238. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  239. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  240. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  241. package/dist/utils/waitForLockfileRelease.js +20 -0
  242. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  243. package/dist/utils/withLockfile.d.ts +7 -0
  244. package/dist/utils/withLockfile.js +44 -0
  245. package/dist/utils/withLockfile.js.map +1 -0
  246. package/dist/utils/withOra.js +11 -1
  247. package/dist/utils/withOra.js.map +1 -1
  248. package/dist/utils/withStatusLogs.d.ts +2 -1
  249. package/dist/utils/withStatusLogs.js +11 -8
  250. package/dist/utils/withStatusLogs.js.map +1 -1
  251. package/llama/.clang-format +1 -2
  252. package/llama/CMakeLists.txt +87 -2
  253. package/llama/addon.cpp +319 -31
  254. package/llama/binariesGithubRelease.json +1 -1
  255. package/llama/gitRelease.bundle +0 -0
  256. package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
  257. package/llama/gpuInfo/cuda-gpu-info.h +7 -0
  258. package/llama/gpuInfo/metal-gpu-info.h +5 -0
  259. package/llama/gpuInfo/metal-gpu-info.mm +17 -0
  260. package/llama/llama.cpp.info.json +4 -0
  261. package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
  262. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  263. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
  264. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  265. package/llamaBins/linux-x64/.buildMetadata.json +1 -0
  266. package/llamaBins/linux-x64/llama-addon.node +0 -0
  267. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
  268. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  269. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
  270. package/llamaBins/mac-arm64-metal/ggml-metal.metal +6491 -0
  271. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  272. package/llamaBins/mac-x64/.buildMetadata.json +1 -0
  273. package/llamaBins/mac-x64/llama-addon.node +0 -0
  274. package/llamaBins/win-x64/.buildMetadata.json +1 -0
  275. package/llamaBins/win-x64/llama-addon.exp +0 -0
  276. package/llamaBins/win-x64/llama-addon.node +0 -0
  277. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
  278. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  279. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  280. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  281. package/package.json +37 -15
  282. package/dist/AbortError.d.ts +0 -2
  283. package/dist/AbortError.js +0 -7
  284. package/dist/AbortError.js.map +0 -1
  285. package/dist/ChatPromptWrapper.d.ts +0 -11
  286. package/dist/ChatPromptWrapper.js +0 -20
  287. package/dist/ChatPromptWrapper.js.map +0 -1
  288. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  289. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  290. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  291. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  292. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  293. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  294. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  295. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  296. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  297. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  298. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  299. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  300. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  301. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  302. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  303. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
  304. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
  305. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  306. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
  307. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  308. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  309. package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
  310. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  311. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  312. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  313. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  314. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  315. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  316. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
  317. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  318. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  319. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  320. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  321. package/dist/utils/binariesGithubRelease.js.map +0 -1
  322. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  323. package/dist/utils/clearLlamaBuild.js +0 -12
  324. package/dist/utils/clearLlamaBuild.js.map +0 -1
  325. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  326. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  327. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  328. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  329. package/dist/utils/compileLLamaCpp.js +0 -127
  330. package/dist/utils/compileLLamaCpp.js.map +0 -1
  331. package/dist/utils/getBin.js +0 -78
  332. package/dist/utils/getBin.js.map +0 -1
  333. package/dist/utils/getReleaseInfo.d.ts +0 -7
  334. package/dist/utils/getReleaseInfo.js +0 -30
  335. package/dist/utils/getReleaseInfo.js.map +0 -1
  336. package/dist/utils/getTextCompletion.d.ts +0 -3
  337. package/dist/utils/getTextCompletion.js +0 -12
  338. package/dist/utils/getTextCompletion.js.map +0 -1
  339. package/dist/utils/usedBinFlag.d.ts +0 -6
  340. package/dist/utils/usedBinFlag.js +0 -15
  341. package/dist/utils/usedBinFlag.js.map +0 -1
  342. package/llama/usedBin.json +0 -3
  343. package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
  344. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  345. package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
  346. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  347. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  348. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
  349. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
  350. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
  351. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
  352. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
  353. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
  354. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -0,0 +1,684 @@
1
+ import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
2
+ import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
3
+ import { removeNullFields } from "../../utils/removeNullFields.js";
4
+ import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
5
+ import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
6
+ import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
7
+ import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
8
+ import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
9
+ import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
10
+ import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
11
+ const defaultContextShiftOptions = {
12
+ size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
13
+ strategy: "eraseFirstResponseAndKeepFirstSystem",
14
+ lastEvaluationMetadata: null
15
+ };
16
+ export class LlamaChat {
17
+ /** @internal */ _chatWrapper;
18
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
19
+ /** @internal */ _autoDisposeSequence;
20
+ /** @internal */ _sequence;
21
+ onDispose = new EventRelay();
22
+ constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
23
+ if (contextSequence == null)
24
+ throw new Error("contextSequence cannot be null");
25
+ if (contextSequence.disposed)
26
+ throw new DisposedError();
27
+ this._sequence = contextSequence;
28
+ this._autoDisposeSequence = autoDisposeSequence;
29
+ this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
30
+ this.dispose();
31
+ }));
32
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
33
+ this._chatWrapper = resolveChatWrapper(chatWrapper, contextSequence.model);
34
+ }
35
+ dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
36
+ if (this._sequence == null)
37
+ return;
38
+ if (disposeSequence)
39
+ this._sequence.dispose();
40
+ this._sequence = null;
41
+ this._disposeAggregator.dispose();
42
+ }
43
+ /** @hidden */
44
+ [Symbol.dispose]() {
45
+ return this.dispose();
46
+ }
47
+ get disposed() {
48
+ return this._sequence == null;
49
+ }
50
+ get chatWrapper() {
51
+ if (this._sequence == null)
52
+ throw new DisposedError();
53
+ return this._chatWrapper;
54
+ }
55
+ get sequence() {
56
+ if (this._sequence == null)
57
+ throw new DisposedError();
58
+ return this._sequence;
59
+ }
60
+ get context() {
61
+ return this.sequence.context;
62
+ }
63
+ get model() {
64
+ return this.sequence.model;
65
+ }
66
+ async generateResponse(history, { onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
67
+ const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
68
+ if (grammar != null && functionsEnabled)
69
+ throw new Error("Using both grammar and functions is not supported yet");
70
+ if (signal?.aborted)
71
+ throw signal.reason;
72
+ if (this._sequence == null)
73
+ throw new DisposedError();
74
+ let resolvedHistory = this._sequence.isLoadedToMemory
75
+ ? history.slice()
76
+ : history.map(removeRawFromHistoryItem);
77
+ if (resolvedHistory.length === 0 || resolvedHistory[resolvedHistory.length - 1].type !== "model")
78
+ resolvedHistory.push({
79
+ type: "model",
80
+ response: []
81
+ });
82
+ const model = this._sequence.model;
83
+ const context = this._sequence.context;
84
+ const eosToken = model.tokens.eos;
85
+ const resolvedContextShift = {
86
+ ...defaultContextShiftOptions,
87
+ ...removeNullFields(contextShift)
88
+ };
89
+ const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
90
+ ? { lastTokens: 0 }
91
+ : repeatPenalty;
92
+ const lastModelResponse = getLastTextModelResponseFromChatHistory(resolvedHistory);
93
+ const res = [];
94
+ const pendingTokens = [];
95
+ let ignoredStartTextTokens = [];
96
+ const functionCallTokens = [];
97
+ const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
98
+ const grammarEvaluationState = grammar != null
99
+ ? new LlamaGrammarEvaluationState({ grammar })
100
+ : undefined;
101
+ let functionsGrammar = functionsEnabled
102
+ ? new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false)
103
+ : undefined;
104
+ let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
105
+ ? new LlamaGrammarEvaluationState({
106
+ grammar: functionsGrammar
107
+ })
108
+ : undefined;
109
+ const streamRegulator = new TokenStreamRegulator();
110
+ const stopGenerationDetector = new StopGenerationDetector();
111
+ const functionSyntaxStartDetector = new StopGenerationDetector();
112
+ const functionSyntaxEndDetector = new StopGenerationDetector();
113
+ const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
114
+ const ignoreStartTextDetector = new StopGenerationDetector();
115
+ const locksToReleaseOnValidGeneration = [];
116
+ const functionCallTokenSyntaxLocks = [];
117
+ let generatedTokens = 0;
118
+ let isFirstEvaluation = true;
119
+ let inFunctionEvaluationMode = false;
120
+ let initiallyEngagedFunctionMode = false;
121
+ let lastContextWindowHistory = resolvedHistory;
122
+ let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
123
+ const ensureNotAborted = () => {
124
+ if (signal?.aborted)
125
+ throw signal.reason;
126
+ if (this._sequence == null)
127
+ throw new DisposedError();
128
+ };
129
+ const getPenaltyTokens = () => {
130
+ if (this._sequence == null)
131
+ throw new DisposedError();
132
+ let punishTokens = res.slice(-repeatPenaltyLastTokens);
133
+ if (punishTokensFilter != null)
134
+ punishTokens = punishTokensFilter(punishTokens);
135
+ if (penalizeNewLine == null || !penalizeNewLine) {
136
+ const nlToken = model.tokens.nl;
137
+ if (nlToken != null)
138
+ punishTokens = punishTokens.filter(token => token !== nlToken);
139
+ }
140
+ return punishTokens;
141
+ };
142
+ const getResolvedHistoryWithCurrentModelResponse = () => {
143
+ if (res.length === 0)
144
+ return resolvedHistory;
145
+ let modelResponse = model.detokenize(res);
146
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
147
+ modelResponse = modelResponse.trimEnd();
148
+ if (modelResponse === "")
149
+ return resolvedHistory;
150
+ return setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse);
151
+ };
152
+ const removeFoundStartIgnoreTextsFromPendingTokens = () => {
153
+ if (res.length === 0 && pendingTokens.length > 0) {
154
+ ignoreStartTextDetector.clearInProgressStops();
155
+ ignoreStartTextDetector.clearTriggeredStops();
156
+ let mostExhaustiveTriggeredStops = null;
157
+ for (let i = 0; i < pendingTokens.length; i++) {
158
+ ignoreStartTextDetector.recordGeneration({
159
+ text: model.detokenize([pendingTokens[i]]),
160
+ tokens: [pendingTokens[i]],
161
+ startNewChecks: i === 0
162
+ });
163
+ if (ignoreStartTextDetector.hasTriggeredStops) {
164
+ mostExhaustiveTriggeredStops = ignoreStartTextDetector.getTriggeredStops();
165
+ ignoreStartTextDetector.clearTriggeredStops();
166
+ }
167
+ else if (!ignoreStartTextDetector.hasInProgressStops)
168
+ break;
169
+ }
170
+ if (mostExhaustiveTriggeredStops != null) {
171
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
172
+ if (mostExhaustiveTriggeredStop != null) {
173
+ ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
174
+ .map((stopTrigger) => {
175
+ if (typeof stopTrigger === "string")
176
+ return model.tokenize(stopTrigger);
177
+ else
178
+ return [stopTrigger];
179
+ })
180
+ .flat(1);
181
+ const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
182
+ .map((generation) => {
183
+ if (typeof generation === "string")
184
+ return model.tokenize(generation);
185
+ else
186
+ return generation;
187
+ })
188
+ .flat(1);
189
+ pendingTokens.length = 0;
190
+ pendingTokens.push(...newPendingTokens);
191
+ }
192
+ }
193
+ }
194
+ };
195
+ if (grammar != null)
196
+ StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
197
+ .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
198
+ if (functions != null && Object.keys(functions).length > 0)
199
+ functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
200
+ // eslint-disable-next-line no-constant-condition
201
+ while (true) {
202
+ ensureNotAborted();
203
+ let shouldContextShift = false;
204
+ const queuedChunkTokens = streamRegulator.getAllQueuedChunkTokens();
205
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall } = await getContextWindow({
206
+ resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
207
+ resolvedContextShift,
208
+ lastHistoryCompressionMetadata,
209
+ pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
210
+ isFirstEvaluation,
211
+ chatWrapper: this._chatWrapper,
212
+ lastEvaluationContextWindowHistory,
213
+ minimumOverlapPercentageToPreventContextShift,
214
+ sequence: this._sequence,
215
+ minFreeContextTokens: 1,
216
+ functions: functionsEnabled ? functions : undefined,
217
+ documentFunctionParams
218
+ });
219
+ ensureNotAborted();
220
+ if (generatedTokens === 0) {
221
+ StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
222
+ .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
223
+ if (functionsEnabled) {
224
+ initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
225
+ StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
226
+ .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
227
+ if (initiallyEngagedFunctionMode) {
228
+ inFunctionEvaluationMode = true;
229
+ functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, true);
230
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
231
+ grammar: functionsGrammar
232
+ });
233
+ }
234
+ }
235
+ }
236
+ const tokens = [...contextWindowTokens, ...ignoredStartTextTokens, ...pendingTokens, ...queuedChunkTokens];
237
+ resolvedHistory = newResolvedHistory;
238
+ lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
239
+ lastContextWindowHistory = contextWindowHistory;
240
+ const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
241
+ const contextWindowsRes = [];
242
+ StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
243
+ .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
244
+ if (functionsGrammar != null)
245
+ StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
246
+ .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
247
+ let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
248
+ // we need to decode at least one token to generate a response
249
+ if (firstDifferentIndex === tokens.length && firstDifferentIndex > 0)
250
+ firstDifferentIndex -= 1;
251
+ tokens.splice(0, firstDifferentIndex);
252
+ if (firstDifferentIndex < this._sequence.nextTokenIndex) {
253
+ await this._sequence.eraseContextTokenRanges([{
254
+ start: firstDifferentIndex,
255
+ end: this._sequence.nextTokenIndex
256
+ }]);
257
+ ensureNotAborted();
258
+ }
259
+ const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
260
+ temperature, minP, topK, topP,
261
+ grammarEvaluationState: () => {
262
+ if (inFunctionEvaluationMode)
263
+ return functionsEvaluationState;
264
+ return grammarEvaluationState;
265
+ },
266
+ repeatPenalty: !repeatPenaltyEnabled ? undefined : {
267
+ punishTokens: getPenaltyTokens,
268
+ penalty,
269
+ frequencyPenalty,
270
+ presencePenalty
271
+ },
272
+ evaluationPriority,
273
+ yieldEosToken: true
274
+ }));
275
+ for await (const token of evaluationIterator) {
276
+ ensureNotAborted();
277
+ generatedTokens++;
278
+ const tokens = [token];
279
+ const text = model.detokenize([token]);
280
+ const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
281
+ if (initiallyEngagedFunctionMode)
282
+ disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
283
+ if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
284
+ locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
285
+ }
286
+ else {
287
+ while (locksToReleaseOnValidGeneration.length > 0)
288
+ locksToReleaseOnValidGeneration.shift().dispose();
289
+ }
290
+ functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
291
+ if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
292
+ initiallyEngagedFunctionMode = false;
293
+ let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
294
+ if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
295
+ const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
296
+ try {
297
+ const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
298
+ enableInternalBuiltinFunctions: true,
299
+ initialFunctionCallEngaged: true
300
+ });
301
+ const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
302
+ if (internalBuiltinFunctions[functionName] != null) {
303
+ shouldStopFunctionEvaluationMode = true;
304
+ }
305
+ }
306
+ catch (err) {
307
+ if (!(err instanceof LlamaFunctionCallValidationError))
308
+ throw err;
309
+ }
310
+ }
311
+ if (shouldStopFunctionEvaluationMode) {
312
+ inFunctionEvaluationMode = false;
313
+ functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
314
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
315
+ grammar: functionsGrammar
316
+ });
317
+ functionCallTokens.length = 0;
318
+ while (functionCallTokenSyntaxLocks.length > 0)
319
+ functionCallTokenSyntaxLocks.shift().dispose();
320
+ functionSyntaxStartDetector.clearInProgressStops();
321
+ functionSyntaxStartDetector.clearTriggeredStops();
322
+ functionSyntaxEndDetector.clearInProgressStops();
323
+ functionSyntaxEndDetector.clearTriggeredStops();
324
+ }
325
+ }
326
+ if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
327
+ functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
328
+ inFunctionEvaluationMode = true;
329
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
330
+ stopGenerationDetector.clearTriggeredStops();
331
+ stopGenerationDetector.clearInProgressStops();
332
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
333
+ const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
334
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
335
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
336
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
337
+ const [firstRemainingGenerationAfterStop] = triggeredStops
338
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
339
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
340
+ .flat(1);
341
+ const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
342
+ ? ""
343
+ : typeof firstRemainingGenerationAfterStop === "string"
344
+ ? firstRemainingGenerationAfterStop
345
+ : model.detokenize(firstRemainingGenerationAfterStop);
346
+ functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
347
+ for (const functionCallToken of functionCallTokens)
348
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
349
+ }
350
+ else if (inFunctionEvaluationMode) {
351
+ functionCallTokens.push(...tokens);
352
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
353
+ functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
354
+ }
355
+ if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
356
+ const functionCallText = model.detokenize(functionCallTokens);
357
+ const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
358
+ let modelResponse = model.detokenize(res);
359
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
360
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
361
+ modelResponse = modelResponse.trimEnd();
362
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
363
+ }
364
+ return {
365
+ response: modelResponse,
366
+ lastEvaluation: {
367
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
368
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
369
+ contextShiftMetadata: lastHistoryCompressionMetadata
370
+ },
371
+ // prevent infinite TS type instantiation
372
+ functionCall: functionCall,
373
+ metadata: {
374
+ stopReason: "functionCall"
375
+ }
376
+ };
377
+ }
378
+ if (!inFunctionEvaluationMode)
379
+ stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
380
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
381
+ removeFoundStartIgnoreTextsFromPendingTokens();
382
+ if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
383
+ const triggeredStops = stopGenerationDetector.getTriggeredStops();
384
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
385
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
386
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
387
+ const [firstRemainingGenerationAfterStop] = triggeredStops
388
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
389
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
390
+ .flat(1);
391
+ removeFoundStartIgnoreTextsFromPendingTokens();
392
+ if (pendingTokens.length > 0)
393
+ onToken?.(pendingTokens.slice());
394
+ res.push(...pendingTokens);
395
+ contextWindowsRes.push(...pendingTokens);
396
+ pendingTokens.length = 0;
397
+ let modelResponse = model.detokenize(res);
398
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
399
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
400
+ modelResponse = modelResponse.trimEnd();
401
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
402
+ }
403
+ return {
404
+ response: modelResponse,
405
+ lastEvaluation: {
406
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
407
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
408
+ contextShiftMetadata: lastHistoryCompressionMetadata
409
+ },
410
+ metadata: {
411
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
412
+ stopReason: token === eosToken
413
+ ? "eosToken"
414
+ : "stopGenerationTrigger"
415
+ }
416
+ };
417
+ }
418
+ const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
419
+ if (res.length === 0) {
420
+ ignoreStartTextDetector.clearInProgressStops();
421
+ ignoreStartTextDetector.clearTriggeredStops();
422
+ ignoreStartTextDetector.recordGeneration({
423
+ text: model.detokenize(pendingTokens),
424
+ tokens: pendingTokens
425
+ });
426
+ }
427
+ if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
428
+ removeFoundStartIgnoreTextsFromPendingTokens();
429
+ if (pendingTokens.length > 0) {
430
+ onToken?.(pendingTokens.slice());
431
+ res.push(...pendingTokens);
432
+ contextWindowsRes.push(...pendingTokens);
433
+ pendingTokens.length = 0;
434
+ }
435
+ }
436
+ if (maxTokensTriggered) {
437
+ let modelResponse = model.detokenize(res);
438
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
439
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
440
+ modelResponse = modelResponse.trimEnd();
441
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
442
+ }
443
+ return {
444
+ response: modelResponse,
445
+ lastEvaluation: {
446
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
447
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
448
+ contextShiftMetadata: lastHistoryCompressionMetadata
449
+ },
450
+ metadata: {
451
+ stopReason: "maxTokens"
452
+ }
453
+ };
454
+ }
455
+ if (this._sequence.nextTokenIndex >= context.contextSize) {
456
+ shouldContextShift = true;
457
+ break;
458
+ }
459
+ }
460
+ isFirstEvaluation = false;
461
+ if (shouldContextShift)
462
+ continue;
463
+ break;
464
+ }
465
+ throw new Error("The context size is too small to generate a response");
466
+ }
467
+ }
468
+ function removeRawFromHistoryItem(historyItem) {
469
+ if (historyItem.type === "model") {
470
+ const newHistoryItem = { ...historyItem };
471
+ newHistoryItem.response = newHistoryItem.response.map((item) => {
472
+ if (typeof item === "string")
473
+ return item;
474
+ else
475
+ return {
476
+ ...item,
477
+ raw: undefined
478
+ };
479
+ });
480
+ return newHistoryItem;
481
+ }
482
+ return historyItem;
483
+ }
484
+ async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
485
+ function checkIfHistoryFitsContext(history) {
486
+ const { contextText } = chatWrapper.generateContextText(history, {
487
+ availableFunctions: functions,
488
+ documentFunctionParams
489
+ });
490
+ const tokens = contextText.tokenize(tokenizer);
491
+ return tokens.length <= contextSize - contextShiftSize;
492
+ }
493
+ if (contextSize - contextShiftSize <= 0)
494
+ throw new Error(`The context size (${contextSize}) is too small to fit the context shift size (${contextShiftSize})`);
495
+ if (checkIfHistoryFitsContext(history))
496
+ return {
497
+ compressedHistory: history,
498
+ metadata: null
499
+ };
500
+ if (contextShiftStrategy instanceof Function) {
501
+ try {
502
+ const { chatHistory, metadata } = await contextShiftStrategy({
503
+ chatHistory: history,
504
+ maxTokensCount: contextSize - contextShiftSize,
505
+ tokenizer,
506
+ chatWrapper,
507
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
508
+ });
509
+ if (checkIfHistoryFitsContext(chatHistory))
510
+ return {
511
+ compressedHistory: chatHistory,
512
+ metadata
513
+ };
514
+ console.warn("The provided context shift strategy did not return a history that fits the context size. " +
515
+ "Using the default strategy instead.");
516
+ }
517
+ catch (err) {
518
+ console.error("The provided context shift strategy threw an error. " +
519
+ "Using the default strategy instead.", err);
520
+ }
521
+ }
522
+ else if (contextShiftStrategy !== "eraseFirstResponseAndKeepFirstSystem")
523
+ console.warn(`Unknown context shift strategy "${contextShiftStrategy}". ` +
524
+ "Using the default strategy instead.");
525
+ const { chatHistory, metadata } = await eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({
526
+ chatHistory: history,
527
+ maxTokensCount: contextSize - contextShiftSize,
528
+ tokenizer,
529
+ chatWrapper,
530
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
531
+ });
532
+ if (!checkIfHistoryFitsContext(chatHistory))
533
+ throw new Error("The default context shift strategy did not return a history that fits the context size. " +
534
+ "This may happen due to the system prompt being too long");
535
+ return {
536
+ compressedHistory: chatHistory,
537
+ metadata
538
+ };
539
+ }
540
+ function getLastTextModelResponseFromChatHistory(chatHistory) {
541
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
542
+ return "";
543
+ const lastModelResponseItem = chatHistory[chatHistory.length - 1];
544
+ const modelResponse = lastModelResponseItem.response;
545
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
546
+ return modelResponse[modelResponse.length - 1];
547
+ return "";
548
+ }
549
+ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
550
+ const newChatHistory = chatHistory.slice();
551
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
552
+ newChatHistory.push({
553
+ type: "model",
554
+ response: []
555
+ });
556
+ const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
557
+ const newLastModelResponseItem = { ...lastModelResponseItem };
558
+ newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
559
+ const modelResponse = newLastModelResponseItem.response.slice();
560
+ newLastModelResponseItem.response = modelResponse;
561
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
562
+ if (textResponse === "")
563
+ modelResponse.pop();
564
+ else
565
+ modelResponse[modelResponse.length - 1] = textResponse;
566
+ }
567
+ else if (textResponse !== "")
568
+ modelResponse.push(textResponse);
569
+ return newChatHistory;
570
+ }
571
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams }) {
572
+ if (sequence == null)
573
+ throw new DisposedError();
574
+ const model = sequence.model;
575
+ const context = sequence.context;
576
+ if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
577
+ const newContextWindow = lastEvaluationContextWindowHistory.slice();
578
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
579
+ newContextWindow.push({
580
+ type: "model",
581
+ response: []
582
+ });
583
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(newContextWindow, {
584
+ availableFunctions: functions,
585
+ documentFunctionParams
586
+ });
587
+ const tokens = contextText.tokenize(model.tokenize);
588
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
589
+ const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
590
+ const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
591
+ if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
592
+ return {
593
+ history: newContextWindow,
594
+ stopGenerationTriggers,
595
+ tokens,
596
+ newResolvedHistory: resolvedHistory,
597
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
598
+ ignoreStartText: ignoreStartText ?? [],
599
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
600
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
601
+ };
602
+ }
603
+ }
604
+ resolvedHistory = sequence.isLoadedToMemory
605
+ ? resolvedHistory.slice()
606
+ : resolvedHistory.map(removeRawFromHistoryItem);
607
+ if (resolvedContextShift.lastEvaluationMetadata != null) {
608
+ const contextShiftSize = resolvedContextShift.size instanceof Function
609
+ ? await resolvedContextShift.size(sequence)
610
+ : resolvedContextShift.size;
611
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
612
+ history: resolvedHistory,
613
+ contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
614
+ contextShiftStrategy: resolvedContextShift.strategy,
615
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
616
+ contextSize: context.contextSize,
617
+ tokenizer: model.tokenize,
618
+ chatWrapper: chatWrapper,
619
+ functions,
620
+ documentFunctionParams
621
+ });
622
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
623
+ availableFunctions: functions,
624
+ documentFunctionParams
625
+ });
626
+ return {
627
+ history: compressedHistory,
628
+ stopGenerationTriggers,
629
+ tokens: contextText.tokenize(model.tokenize),
630
+ newResolvedHistory: resolvedHistory,
631
+ newHistoryCompressionMetadata: metadata,
632
+ ignoreStartText: ignoreStartText ?? [],
633
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
634
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
635
+ };
636
+ }
637
+ {
638
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(resolvedHistory, {
639
+ availableFunctions: functions,
640
+ documentFunctionParams
641
+ });
642
+ const tokens = contextText.tokenize(model.tokenize);
643
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
644
+ return {
645
+ history: resolvedHistory,
646
+ stopGenerationTriggers,
647
+ tokens,
648
+ newResolvedHistory: resolvedHistory,
649
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
650
+ ignoreStartText: ignoreStartText ?? [],
651
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
652
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
653
+ };
654
+ }
655
+ const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
656
+ ? await resolvedContextShift.size(sequence)
657
+ : resolvedContextShift.size)));
658
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
659
+ history: resolvedHistory,
660
+ contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
661
+ contextShiftStrategy: resolvedContextShift.strategy,
662
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
663
+ contextSize: context.contextSize,
664
+ tokenizer: model.tokenize,
665
+ chatWrapper: chatWrapper,
666
+ functions,
667
+ documentFunctionParams
668
+ });
669
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
670
+ availableFunctions: functions,
671
+ documentFunctionParams
672
+ });
673
+ return {
674
+ history: compressedHistory,
675
+ stopGenerationTriggers,
676
+ tokens: contextText.tokenize(model.tokenize),
677
+ newResolvedHistory: resolvedHistory,
678
+ newHistoryCompressionMetadata: metadata,
679
+ ignoreStartText: ignoreStartText ?? [],
680
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
681
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
682
+ };
683
+ }
684
+ //# sourceMappingURL=LlamaChat.js.map